1. What format does MongoDB use to store data internally?
- JSON
- BSON
- XML
- CSV
Correct Answer: BSON
// BSON document example
{
_id: ObjectId("64fe1234567890abcdef1234"),
name: "John Doe",
age: 30,
joinDate: ISODate("2024-01-15")
}Get the Preplance app for a seamless learning experience. Practice offline, get daily streaks, and stay ahead with real-time interview updates.
Get it on
Google Play
4.9/5 Rating on Store
MongoDB/NoSQL · Complete Question Bank
Practice the complete collection of MongoDB/NoSQL interview questions including theory, coding, MCQs and real interview problems.
Questions
97
Full database
Topics
25
Categorised
Difficulty
Mixed Levels
Easy Hard
Scroll through every important MongoDB/NoSQL question asked in real interviews. Includes MCQs, subjective questions, and coding prompts.
Correct Answer: BSON
// BSON document example
{
_id: ObjectId("64fe1234567890abcdef1234"),
name: "John Doe",
age: 30,
joinDate: ISODate("2024-01-15")
}Browse more MongoDB/NoSQL questions, explore related subjects, or practice full interview sets to strengthen your preparation.
Correct Answer: _id
// Auto-generated _id
db.users.insertOne({ name: "John" })
// Result: { _id: ObjectId("507f1f77bcf86cd799439011"), name: "John" }
// Custom _id
db.users.insertOne({ _id: 100, name: "Alice" })Correct Answer: Float32
// Valid MongoDB data types
{
name: "Alice", // String
age: 30, // Number (int)
salary: 75000.50, // Number (double)
active: true, // Boolean
joinDate: new Date(), // Date
skills: ["Java", "Python"], // Array
_id: ObjectId() // ObjectId
}Correct Answer: Documents in a collection can have different structures
// Same collection, different structures - both valid
db.products.insertOne({
name: "Laptop",
price: 999,
specs: { ram: "16GB", cpu: "Intel i7" }
})
db.products.insertOne({
name: "Book",
price: 29,
author: "John Doe",
pages: 350
})Correct Answer: insertMany()
// Insert multiple documents
db.users.insertMany([
{ name: "Alice", age: 25 },
{ name: "Bob", age: 30 },
{ name: "Charlie", age: 35 }
])
// Returns:
{
acknowledged: true,
insertedIds: [ObjectId(), ObjectId(), ObjectId()]
}Correct Answer: $set
// Update existing field
db.users.updateOne(
{ name: "John" },
{ $set: { age: 31 } }
)
// Add new field
db.users.updateOne(
{ name: "John" },
{ $set: { email: "john@example.com" } }
)Correct Answer: use mydb
// Switch to database (creates if doesn't exist)
use mydatabase
// Database is actually created when you insert data
db.users.insertOne({ name: "Alice" })
// Show all databases
show dbsCorrect Answer: An object with acknowledged and deletedCount properties
// Delete one document
db.users.deleteOne({ name: "John" })
// Returns:
{
acknowledged: true,
deletedCount: 1
}
// If no match found:
{
acknowledged: true,
deletedCount: 0
}// Document example
{
_id: 1,
name: "Alice",
email: "alice@example.com"
}
// Another document in same collection
{
_id: 2,
name: "Bob",
phone: "123-456-7890"
}// MongoDB - Embedded relationship
{
_id: 1,
name: "John",
address: {
city: "New York",
zip: "10001"
}
}
// SQL - Separate tables with JOIN
SELECT users.name, address.city
FROM users
JOIN address ON users.id = address.user_id// Create
db.users.insertOne({ name: "John", age: 28 })
// Read
db.users.find({ age: { $gte: 20 } })
// Update
db.users.updateOne({ name: "John" }, { $set: { age: 29 } })
// Delete
db.users.deleteOne({ name: "John" })// Good use cases:
// 1. Product catalog with varying attributes
{ type: "laptop", ram: "16GB", screen: "15 inch" }
{ type: "book", pages: 300, author: "John" }
// 2. User activity logs (time-series)
{ userId: 123, action: "login", timestamp: ISODate() }
// 3. Geospatial data
{ location: { type: "Point", coordinates: [50, 2] } }// find() - returns cursor to multiple documents
db.users.find({ age: { $gt: 25 } })
// Returns: cursor to all users older than 25
// findOne() - returns single document
db.users.findOne({ name: "Alice" })
// Returns: { _id: 1, name: "Alice", age: 30 }// Horizontal scaling example
// Data distributed across 3 shards:
// Shard 1: users with _id 1-1000
// Shard 2: users with _id 1001-2000
// Shard 3: users with _id 2001-3000
// Query automatically routed to correct shard
db.users.find({ _id: 1500 })
// MongoDB routes to Shard 2// Embedded document example
{
_id: 1,
name: "John Doe",
email: "john@example.com",
address: {
street: "123 Main St",
city: "New York",
zip: "10001",
country: "USA"
},
phones: [
{ type: "home", number: "555-1234" },
{ type: "work", number: "555-5678" }
]
}Correct Answer: $gt
// Find users older than 25
db.users.find({ age: { $gt: 25 } })
// Find products with price greater than 100
db.products.find({ price: { $gt: 100 } })
// Combine with other operators
db.users.find({ age: { $gt: 25, $lt: 40 } })Correct Answer: Index on the _id field
// View indexes on a collection
db.users.getIndexes()
// Returns:
[
{
"v": 2,
"key": { "_id": 1 },
"name": "_id_"
}
]Correct Answer: $all
// Find users with both JavaScript and Python skills
db.users.find({
skills: { $all: ["JavaScript", "Python"] }
})
// This matches:
{ skills: ["JavaScript", "Python", "Java"] }
{ skills: ["Python", "JavaScript"] }
// But not:
{ skills: ["JavaScript", "Java"] }Correct Answer: db.collection.createIndex()
// Create single-field index
db.users.createIndex({ email: 1 })
// Create descending index
db.products.createIndex({ price: -1 })
// Create compound index
db.orders.createIndex({ userId: 1, orderDate: -1 })Correct Answer: Returns name and age fields, excludes _id
// Return only name and age, exclude _id
db.users.find(
{ age: { $gt: 25 } },
{ name: 1, age: 1, _id: 0 }
)
// Returns:
{ "name": "Alice", "age": 30 }
{ "name": "Bob", "age": 28 }Correct Answer: Text index
// Create text index
db.articles.createIndex({ title: "text", body: "text" })
// Perform text search
db.articles.find({
$text: { $search: "mongodb database" }
})
// Search with exact phrase
db.articles.find({
$text: { $search: "\"NoSQL database\"" }
})Correct Answer: Both B and C
// Compound index
db.users.createIndex({ age: 1, city: 1 })
// Efficient - uses index
db.users.find({ age: 30 })
db.users.find({ age: 30, city: "NYC" })
// Inefficient - cannot use this index
db.users.find({ city: "NYC" })// $in operator - single field, multiple values
db.users.find({
status: { $in: ["active", "pending", "verified"] }
})
// $or operator - multiple conditions
db.users.find({
$or: [
{ age: { $gt: 30 } },
{ city: "New York" },
{ status: "premium" }
]
})// Without index - scans all documents
db.users.find({ email: "alice@example.com" })
// docsExamined: 100000, executionTimeMillis: 150
// Create index
db.users.createIndex({ email: 1 })
// With index - fast lookup
db.users.find({ email: "alice@example.com" })
// docsExamined: 1, executionTimeMillis: 2// Compound index on age, city, status
db.users.createIndex({ age: 1, city: 1, status: 1 })
// Can use index (prefix rule):
db.users.find({ age: 30 })
db.users.find({ age: 30, city: "NYC" })
db.users.find({ age: 30, city: "NYC", status: "active" })
// Cannot use index efficiently:
db.users.find({ city: "NYC" })
db.users.find({ status: "active" })
db.users.find({ city: "NYC", status: "active" })// Check query performance
db.users.find({ age: { $gt: 25 } }).explain("executionStats")
// Optimize with index
db.users.createIndex({ age: 1 })
// Use projection to reduce data transfer
db.users.find(
{ age: { $gt: 25 } },
{ name: 1, email: 1, _id: 0 }
)
// Covered query (all fields in index)
db.users.createIndex({ age: 1, name: 1 })
db.users.find(
{ age: 30 },
{ name: 1, _id: 0 }
)// Simple array match
db.users.find({ skills: "JavaScript" })
// $elemMatch - multiple conditions on same element
db.students.find({
grades: {
$elemMatch: {
subject: "Math",
score: { $gt: 80 }
}
}
})
// Without $elemMatch (may match wrong docs)
db.students.find({
"grades.subject": "Math",
"grades.score": { $gt: 80 }
})// Basic explain
db.users.find({ age: 30 }).explain()
// Execution statistics mode
db.users.find({ age: 30 }).explain("executionStats")
// Key output:
{
executionStats: {
executionTimeMillis: 2,
totalDocsExamined: 1,
totalDocsReturned: 1,
executionStages: {
stage: "IXSCAN", // Index scan
indexName: "age_1"
}
}
}Correct Answer: $match
// Filter completed orders
db.orders.aggregate([
{ $match: { status: "completed" } },
{ $group: { _id: "$customerId", total: { $sum: "$amount" } } }
])
// Multiple conditions
db.orders.aggregate([
{ $match: { status: "completed", amount: { $gt: 100 } } }
])Correct Answer: $avg
// Calculate average salary by department
db.employees.aggregate([
{
$group: {
_id: "$department",
avgSalary: { $avg: "$salary" },
count: { $sum: 1 }
}
}
])Correct Answer: $match before $group
// Optimized pipeline
db.orders.aggregate([
{ $match: { status: "completed" } }, // Filter first
{ $group: { _id: "$customerId", total: { $sum: "$amount" } } },
{ $sort: { total: -1 } }, // Sort grouped results
{ $limit: 10 } // Limit final output
])Correct Answer: Performs a left outer join with another collection
// Join orders with customer details
db.orders.aggregate([
{
$lookup: {
from: "customers",
localField: "customerId",
foreignField: "_id",
as: "customerInfo"
}
}
])
// Result includes customerInfo arrayCorrect Answer: To reshape documents by including, excluding, or computing fields
// Reshape employee documents
db.employees.aggregate([
{
$project: {
fullName: { $concat: ["$firstName", " ", "$lastName"] },
salary: 1,
department: 1,
_id: 0
}
}
])Correct Answer: Deconstructs an array field creating a document for each element
// Unwind order items
db.orders.aggregate([
{ $unwind: "$items" },
{
$group: {
_id: "$items.productId",
totalQuantity: { $sum: "$items.quantity" }
}
}
])
// Before: { items: ["A", "B", "C"] }
// After: 3 docs with items: "A", items: "B", items: "C"// Find query - simple retrieval
db.orders.find({ status: "completed" }, { customer: 1, total: 1 })
// Aggregation - complex analysis
db.orders.aggregate([
{ $match: { status: "completed" } },
{ $group: { _id: "$customer", totalSpent: { $sum: "$total" } } },
{ $sort: { totalSpent: -1 } },
{ $limit: 10 }
])// Complete pipeline example
db.sales.aggregate([
{ $match: { date: { $gte: ISODate("2024-01-01") } } },
{ $unwind: "$items" },
{ $group: { _id: "$items.category", revenue: { $sum: "$items.price" } } },
{ $sort: { revenue: -1 } },
{ $limit: 5 },
{ $project: { category: "$_id", revenue: 1, _id: 0 } }
])db.orders.aggregate([
// Stage 1: Filter completed orders
{ $match: { status: "completed" } },
// Stage 2: Group by customer and sum amounts
{
$group: {
_id: "$customerId",
totalAmount: { $sum: "$amount" },
orderCount: { $sum: 1 }
}
},
// Stage 3: Sort by total in descending order
{ $sort: { totalAmount: -1 } },
// Stage 4: Get top 5
{ $limit: 5 },
// Stage 5: Lookup customer details
{
$lookup: {
from: "customers",
localField: "_id",
foreignField: "_id",
as: "customerInfo"
}
}
])// $push - includes duplicates
db.orders.aggregate([
{
$group: {
_id: "$customerId",
allDates: { $push: "$orderDate" }
}
}
])
// Result: allDates: ["2024-01-01", "2024-01-01", "2024-02-01"]
// $addToSet - unique values only
db.orders.aggregate([
{
$group: {
_id: "$customerId",
categories: { $addToSet: "$category" }
}
}
])
// Result: categories: ["Electronics", "Books"]// Basic $lookup
db.orders.aggregate([
{
$lookup: {
from: "customers",
localField: "customerId",
foreignField: "_id",
as: "customerDetails"
}
}
])
// With unwind for one-to-one
db.orders.aggregate([
{ $lookup: { from: "customers", localField: "customerId", foreignField: "_id", as: "customer" } },
{ $unwind: "$customer" }
])
// Better approach: Embed data
{
orderId: 1,
customer: { name: "John", email: "john@example.com" },
items: [...]
}// Old Map-Reduce approach (deprecated)
db.orders.mapReduce(
function() { emit(this.customerId, this.amount); },
function(key, values) { return Array.sum(values); },
{ out: "customer_totals" }
)
// Modern Aggregation approach (recommended)
db.orders.aggregate([
{
$group: {
_id: "$customerId",
total: { $sum: "$amount" }
}
},
{ $out: "customer_totals" }
])// Optimized pipeline
db.orders.aggregate([
// 1. Filter early with indexed field
{ $match: { status: "completed", date: { $gte: ISODate("2024-01-01") } } },
// 2. Project only needed fields
{ $project: { customerId: 1, amount: 1, _id: 0 } },
// 3. Group and calculate
{ $group: { _id: "$customerId", total: { $sum: "$amount" } } },
// 4. Sort with limit (uses top-K sort)
{ $sort: { total: -1 } },
{ $limit: 100 }
], {
allowDiskUse: true // For large datasets
})
// Create index to support pipeline
db.orders.createIndex({ status: 1, date: 1 })// $out - replaces entire collection
db.orders.aggregate([
{ $match: { status: "completed" } },
{ $group: { _id: "$customerId", total: { $sum: "$amount" } } },
{ $out: "customer_totals" } // Replaces customer_totals collection
])
// $merge - merges into existing collection
db.orders.aggregate([
{ $match: { status: "completed" } },
{ $group: { _id: "$customerId", total: { $sum: "$amount" } } },
{
$merge: {
into: "customer_totals",
on: "_id",
whenMatched: "replace",
whenNotMatched: "insert"
}
}
])Correct Answer: A group of servers maintaining identical data with one primary and multiple secondaries
// Replica set configuration
rs.initiate({
_id: "myReplicaSet",
members: [
{ _id: 0, host: "mongodb0.example.net:27017" },
{ _id: 1, host: "mongodb1.example.net:27017" },
{ _id: 2, host: "mongodb2.example.net:27017" }
]
})
// Check replica set status
rs.status()Correct Answer: The primary node receives all write operations
// Connect to replica set
mongo "mongodb://mongodb0.example.net,mongodb1.example.net,mongodb2.example.net/?replicaSet=myReplicaSet"
// Write operations go to primary only
db.users.insertOne({ name: "Alice" })
// Check which node is primary
rs.isMaster()Correct Answer: A capped collection that records all write operations on the primary
// View oplog size
use local
db.oplog.rs.stats().maxSize
// View recent oplog entries
db.oplog.rs.find().sort({ $natural: -1 }).limit(5)
// Sample oplog entry
{
ts: Timestamp(1640000000, 1),
op: "i", // insert operation
ns: "mydb.users",
o: { _id: 1, name: "Alice" }
}Correct Answer: When the primary node becomes unavailable
// Force an election by stepping down primary
rs.stepDown(60) // Step down for 60 seconds
// Check election status
rs.status().members.forEach(m => {
print(m.name + ": " + m.stateStr)
})
// Set member priority (higher priority more likely to be elected)
var cfg = rs.conf()
cfg.members[0].priority = 2
rs.reconfig(cfg)Correct Answer: Writes are acknowledged by the majority of replica set members
// Write with majority concern
db.users.insertOne(
{ name: "Alice" },
{ writeConcern: { w: "majority", wtimeout: 5000 } }
)
// Set default write concern for database
db.adminCommand({
setDefaultRWConcern: 1,
defaultWriteConcern: { w: "majority" }
})Correct Answer: secondary
// Set read preference to secondary
db.users.find().readPref("secondary")
// In connection string
mongo "mongodb://host1,host2,host3/?replicaSet=myRS&readPreference=secondary"
// With tags for specific secondaries
db.users.find().readPref(
"secondary",
[{ datacenter: "east" }]
)// Three-member replica set // Primary: handles all writes // Secondary 1: replicates data, can serve reads // Secondary 2: replicates data, can serve reads // If primary fails: // 1. Secondaries detect primary is unreachable // 2. Election starts automatically // 3. One secondary becomes new primary // 4. Applications reconnect to new primary // 5. Failed node rejoins as secondary when recovered
// Replication flow:
// 1. Write operation on primary
db.users.insertOne({ name: "Alice" }) // On primary
// 2. Primary writes to oplog
// local.oplog.rs: { ts: Timestamp(...), op: "i", ns: "db.users", o: {...} }
// 3. Secondaries query oplog
// Secondary runs: db.oplog.rs.find({ ts: { $gt: lastApplied } })
// 4. Secondaries apply operations
// Secondary executes same insert
// 5. Secondaries update their sync state
// Track timestamp of last applied operation// Configure member priorities var cfg = rs.conf() // Member 0: high priority, preferred primary cfg.members[0].priority = 2 // Member 1: normal priority cfg.members[1].priority = 1 // Member 2: priority 0, never becomes primary cfg.members[2].priority = 0 rs.reconfig(cfg) // During election: // 1. Primary becomes unavailable // 2. Secondaries detect loss of primary (heartbeat timeout) // 3. Eligible members call for election // 4. Members vote based on priority and data recency // 5. Member with majority votes becomes new primary
// Check current oplog size and window
use local
db.oplog.rs.stats(1024*1024) // Size in MB
// Check oplog time window
var first = db.oplog.rs.find().sort({$natural:1}).limit(1).next()
var last = db.oplog.rs.find().sort({$natural:-1}).limit(1).next()
var window = (last.ts.getTime() - first.ts.getTime()) / 1000 / 3600
print("Oplog window: " + window + " hours")
// Resize oplog (requires replSetResizeOplog command)
db.adminCommand({
replSetResizeOplog: 1,
size: 16000 // Size in MB
})// w:1 - primary only (default, fastest)
db.orders.insertOne(
{ item: "book", qty: 1 },
{ writeConcern: { w: 1 } }
)
// w:'majority' - majority of members (durable)
db.orders.insertOne(
{ item: "book", qty: 1 },
{ writeConcern: { w: "majority", wtimeout: 5000 } }
)
// w:'all' - all members (slowest, most durable)
db.orders.insertOne(
{ item: "book", qty: 1 },
{ writeConcern: { w: "all", wtimeout: 5000 } }
)// Read concern 'local' - fastest, may return rollback data
db.inventory.find({ qty: { $gt: 0 } })
.readConcern("local")
// Read concern 'majority' - durable reads, slower
db.inventory.find({ qty: { $gt: 0 } })
.readConcern("majority")
// Read concern 'linearizable' - strongest consistency
db.inventory.findOne({ _id: 1 })
.readConcern("linearizable")
// In transactions with snapshot isolation
const session = client.startSession()
session.startTransaction({
readConcern: { level: "snapshot" },
writeConcern: { w: "majority" }
})// Timeline of failover:
// Time 0: Primary node crashes
// Time 2-10s: Secondaries detect primary unreachable (heartbeat timeout)
// Time 10s: Secondary initiates election
// Time 10-15s: Election process, members vote
// Time 15s: New primary elected, begins accepting writes
// Time 16s: Applications reconnect to new primary
// Application connection with automatic failover
const client = new MongoClient(
'mongodb://host1:27017,host2:27017,host3:27017/?replicaSet=myRS',
{
retryWrites: true, // Automatic retry on failover
w: 'majority' // Wait for replication
}
)
// After failover completes
// Old primary (when recovered): joins as secondary
// New primary: handles all writes
// Other secondary: continues replicating// Add arbiter to replica set
rs.addArb("mongodb3.example.net:27017")
// Replica set with arbiter (not recommended)
// Primary: full data, accepts writes
// Secondary: full data, replicates
// Arbiter: no data, votes in elections
// Better approach: use data-bearing member
// Primary: full data, accepts writes
// Secondary 1: full data, replicates, can serve reads
// Secondary 2: full data, replicates, can serve reads
// Check arbiter status
rs.status().members.forEach(m => {
print(m.name + ": " + m.stateStr)
})Correct Answer: A method for distributing data horizontally across multiple servers
// Enable sharding on database
sh.enableSharding("myDatabase")
// Shard a collection
sh.shardCollection(
"myDatabase.users",
{ userId: 1 } // Shard key
)
// Data distributed across shards:
// Shard 1: userId 1-1000
// Shard 2: userId 1001-2000
// Shard 3: userId 2001-3000Correct Answer: High cardinality to ensure even data distribution
// Good shard key - high cardinality
sh.shardCollection("db.orders", { userId: 1, orderDate: 1 })
// Many unique combinations of userId and orderDate
// Poor shard key - low cardinality
sh.shardCollection("db.orders", { status: 1 })
// Only a few status values like 'pending', 'completed'
// Results in uneven distributionCorrect Answer: Acts as a query router directing operations to the appropriate shards
// Application connects to mongos, not shards
mongo "mongodb://mongos1.example.net:27017"
// Mongos routes query to correct shard(s)
db.users.find({ userId: 12345 })
// Mongos checks shard key range
// Routes to Shard 2 (contains userId 10000-20000)
// Query spanning multiple shards
db.users.find({ age: { $gt: 25 } })
// Mongos sends to all shards, merges resultsCorrect Answer: Metadata about the cluster configuration and data distribution
// Config server replica set stores:
// - Shard information and locations
// - Chunk ranges for each shard
// - Database and collection metadata
// View chunk distribution
use config
db.chunks.find({ ns: "mydb.users" }).pretty()
// Example chunk metadata
{
_id: "mydb.users-userId_1000",
ns: "mydb.users",
min: { userId: 1000 },
max: { userId: 2000 },
shard: "shard0001"
}Correct Answer: The balancer detects uneven distribution of chunks across shards
// Check balancer status
sh.isBalancerRunning()
// View balancer settings
sh.getBalancerState()
// Configure balancer window (run only at night)
use config
db.settings.updateOne(
{ _id: "balancer" },
{ $set: { activeWindow: { start: "23:00", stop: "06:00" } } },
{ upsert: true }
)
// Disable balancer temporarily
sh.stopBalancer()Correct Answer: More even distribution of data, especially for monotonically increasing keys
// Range-based sharding (default)
sh.shardCollection("db.orders", { orderId: 1 })
// orderId 1-1000 → Shard 1
// orderId 1001-2000 → Shard 2
// New inserts always go to last shard (hotspot)
// Hashed sharding
sh.shardCollection("db.orders", { orderId: "hashed" })
// Hash of orderId determines shard
// Even distribution, no hotspots
// But range queries scan all shards// When to shard:
// 1. Data size > single server storage (500GB-1TB+)
// 2. Working set > available RAM
// 3. Write throughput > single server capacity
// 4. Need to distribute data geographically
// Sharded cluster components:
// Config Servers: store metadata (replica set)
// Mongos Routers: query routing (multiple instances)
// Shards: store data (each is replica set)
// Basic sharding setup
sh.enableSharding("myDatabase")
sh.shardCollection("myDatabase.users", { userId: 1 })// Good shard key: compound with high cardinality
sh.shardCollection("db.orders", { userId: 1, orderDate: 1 })
// Pros: high cardinality, supports user queries, time-based queries
// Query: db.orders.find({ userId: 123, orderDate: {...} })
// Routes to specific shard
// Poor shard key: low cardinality
sh.shardCollection("db.orders", { status: 1 })
// Cons: only 3-4 values (pending, completed, cancelled)
// Cannot distribute beyond 3-4 chunks
// Poor shard key: monotonically increasing
sh.shardCollection("db.orders", { _id: 1 })
// Cons: all new writes to one shard (hotspot)
// Better: { _id: "hashed" }
// Queries without shard key (slow)
db.orders.find({ productId: 456 })
// Mongos broadcasts to all shards// Sharded cluster architecture:
//
// Application
// ↓
// Mongos (Query Router) ← queries config servers for metadata
// ↓
// Config Servers (Metadata) - replica set
// ↓
// Shards (Data Storage) - each is replica set
// Shard 1: userId 1-10000
// Shard 2: userId 10001-20000
// Shard 3: userId 20001-30000
// Query flow:
// 1. App sends: db.users.find({ userId: 15000 })
// 2. Mongos checks config: "userId 15000 is on Shard 2"
// 3. Mongos routes query to Shard 2 only
// 4. Shard 2 returns results
// 5. Mongos returns to application// Vertical scaling (single server)
// Year 1: 16GB RAM, 4 cores, 500GB storage
// Year 2: 64GB RAM, 16 cores, 2TB storage
// Year 3: 256GB RAM, 32 cores, 8TB storage
// Eventually: Cannot scale further, very expensive
// Horizontal scaling (sharding)
// Year 1: 3 shards, 16GB RAM each
// Year 2: 6 shards, 16GB RAM each
// Year 3: 12 shards, 16GB RAM each
// Can continue adding shards indefinitely
// Sharding setup for horizontal scaling
sh.addShard("shard1/host1:27017")
sh.addShard("shard2/host2:27017")
sh.addShard("shard3/host3:27017")
// Add more shards as needed// View chunks for a collection
use config
db.chunks.find({ ns: "mydb.users" }).pretty()
// Example chunk
{
_id: "mydb.users-userId_1000",
ns: "mydb.users",
min: { userId: 1000 },
max: { userId: 2000 },
shard: "shard0001"
}
// Chunk lifecycle:
// 1. Chunk grows beyond 64MB
// 2. MongoDB splits chunk at midpoint
// Chunk A: userId 1000-1500
// Chunk B: userId 1501-2000
// 3. Balancer detects imbalance
// 4. Balancer migrates Chunk B to another shard
// Manual chunk split (rarely needed)
sh.splitAt("mydb.users", { userId: 5000 })
// Change chunk size
use config
db.settings.updateOne(
{ _id: "chunksize" },
{ $set: { value: 128 } }, // 128MB chunks
{ upsert: true }
)// Shard key: { userId: 1 }
// Targeted query - includes shard key
db.orders.find({ userId: 12345, status: "completed" })
// Mongos knows userId 12345 is on Shard 2
// Routes to Shard 2 only - FAST
// Broadcast query - no shard key
db.orders.find({ status: "completed" })
// Mongos doesn't know which shards have completed orders
// Queries all shards, merges results - SLOW
// Explain shows broadcast
db.orders.find({ status: "completed" }).explain()
// Shows: SHARD_MERGE stage (broadcast to all shards)
// Best practice: include shard key
db.orders.find({
userId: { $in: [123, 456, 789] }, // Shard key
status: "completed"
})
// Targeted to specific shards - FAST// Geographic zone sharding
// 1. Add shards to zones
sh.addShardToZone("shard-us-west", "US")
sh.addShardToZone("shard-us-east", "US")
sh.addShardToZone("shard-eu-west", "EU")
// 2. Define shard key ranges for zones
sh.updateZoneKeyRange(
"mydb.users",
{ userId: 1000000, country: "US" },
{ userId: 1999999, country: "US" },
"US"
)
sh.updateZoneKeyRange(
"mydb.users",
{ userId: 2000000, country: "EU" },
{ userId: 2999999, country: "EU" },
"EU"
)
// 3. Balancer migrates chunks to appropriate shards
// US users' data stays on US shards
// EU users' data stays on EU shards
// Result: reduced latency for users
// US user queries hit US shards (low latency)
// EU user queries hit EU shards (low latency)// Shard key limitations
// Cannot change shard key after sharding
sh.shardCollection("db.users", { email: 1 })
// Later realize email is poor choice
// Must drop collection and reshhard - significant downtime
// Unique indexes require shard key
// Shard key: { userId: 1 }
db.users.createIndex({ email: 1 }, { unique: true })
// ERROR: cannot create unique index on field without shard key
// Must include shard key
db.users.createIndex({ userId: 1, email: 1 }, { unique: true })
// OK: includes shard key
// Scatter-gather queries are slow
db.users.find({ age: { $gt: 25 } }) // No shard key
// Queries all shards, slow with many shards
// Before sharding: consider alternatives
// - Better indexing
// - Vertical scaling
// - Read replicas for read distribution
// - Application-level cachingCorrect Answer: MongoDB 4.0 or higher with replica set or sharded cluster
// Start a transaction
const session = client.startSession()
session.startTransaction()
try {
// Multiple operations in transaction
await db.accounts.updateOne(
{ _id: 1 },
{ $inc: { balance: -100 } },
{ session }
)
await db.accounts.updateOne(
{ _id: 2 },
{ $inc: { balance: 100 } },
{ session }
)
// Commit transaction
await session.commitTransaction()
} catch (error) {
// Rollback on error
await session.abortTransaction()
} finally {
session.endSession()
}Correct Answer: WiredTiger
// Check current storage engine
db.serverStatus().storageEngine
// Output:
{
name: "wiredTiger",
supportsCommittedReads: true,
persistent: true
}
// WiredTiger features:
// - Document-level concurrency
// - Compression (snappy default)
// - Checkpoints for crash recovery
// - Write-ahead logging (journaling)Correct Answer: For files larger than 16MB BSON document size limit
// Upload file to GridFS
const bucket = new GridFSBucket(db)
const uploadStream = bucket.openUploadStream('video.mp4')
fs.createReadStream('./video.mp4').pipe(uploadStream)
// GridFS creates two collections:
// fs.files - file metadata
{
_id: ObjectId(),
filename: "video.mp4",
length: 52428800, // 50MB
chunkSize: 261120,
uploadDate: ISODate()
}
// fs.chunks - file data chunks
{
_id: ObjectId(),
files_id: ObjectId(),
n: 0, // Chunk number
data: BinData(...) // 255KB chunk
}Correct Answer: Documents are automatically deleted
// Create TTL index - expire after 1 hour
db.sessions.createIndex(
{ createdAt: 1 },
{ expireAfterSeconds: 3600 }
)
// Insert document
db.sessions.insertOne({
userId: 123,
token: "abc123",
createdAt: new Date()
})
// Document automatically deleted 1 hour after createdAt
// TTL index for logs - expire after 30 days
db.logs.createIndex(
{ timestamp: 1 },
{ expireAfterSeconds: 2592000 }
)Correct Answer: They have a fixed size and automatically overwrite oldest documents
// Create capped collection - max 100MB
db.createCollection("logs", {
capped: true,
size: 100000000 // 100MB in bytes
})
// With max document count
db.createCollection("recentActivity", {
capped: true,
size: 10000000, // 10MB
max: 5000 // Max 5000 documents
})
// Query in insertion order
db.logs.find().sort({ $natural: 1 })
// Query in reverse insertion order
db.logs.find().sort({ $natural: -1 })Correct Answer: Listen for real-time changes to data
// Watch for changes on a collection
const changeStream = db.collection('orders').watch()
changeStream.on('change', (change) => {
console.log('Change detected:', change)
if (change.operationType === 'insert') {
// Handle new order
notifyWarehouse(change.fullDocument)
} else if (change.operationType === 'update') {
// Handle order update
updateInventory(change.documentKey._id)
}
})
// Watch with filter
const pipeline = [
{ $match: { 'fullDocument.status': 'completed' } }
]
db.orders.watch(pipeline)// Bank transfer example - ACID transaction
const session = client.startSession()
try {
session.startTransaction({
readConcern: { level: 'snapshot' },
writeConcern: { w: 'majority' }
})
// Deduct from account 1
const result1 = await db.accounts.updateOne(
{ _id: 'account1', balance: { $gte: 100 } },
{ $inc: { balance: -100 } },
{ session }
)
if (result1.modifiedCount === 0) {
throw new Error('Insufficient funds')
}
// Add to account 2
await db.accounts.updateOne(
{ _id: 'account2' },
{ $inc: { balance: 100 } },
{ session }
)
// Record transaction
await db.transfers.insertOne(
{ from: 'account1', to: 'account2', amount: 100 },
{ session }
)
// Commit - all or nothing
await session.commitTransaction()
} catch (error) {
// Rollback all changes
await session.abortTransaction()
throw error
} finally {
session.endSession()
}// WiredTiger configuration
storage:
engine: wiredTiger
wiredTiger:
engineConfig:
cacheSizeGB: 4 // Internal cache size
journalCompressor: snappy
collectionConfig:
blockCompressor: snappy // Data compression
indexConfig:
prefixCompression: true // Index compression
// WiredTiger features:
// - Document-level locking (high write concurrency)
// - Compression (60% space savings typical)
// - Checkpoints every 60 seconds
// - Write-ahead logging (journal)
// - Configurable cache
// MMAPv1 (deprecated):
// - Collection-level locking (write bottleneck)
// - No compression
// - Power-of-2 sized allocations (wasted space)
// - Relies on OS file system cache// Create 2dsphere index for GPS coordinates
db.places.createIndex({ location: "2dsphere" })
// Insert location using GeoJSON
db.places.insertOne({
name: "Coffee Shop",
location: {
type: "Point",
coordinates: [-73.97, 40.77] // [longitude, latitude]
}
})
// Find places near a point (within 1000 meters)
db.places.find({
location: {
$near: {
$geometry: {
type: "Point",
coordinates: [-73.98, 40.76]
},
$maxDistance: 1000 // meters
}
}
})
// Find places within a polygon
db.places.find({
location: {
$geoWithin: {
$geometry: {
type: "Polygon",
coordinates: [[[-74, 40], [-73, 40], [-73, 41], [-74, 41], [-74, 40]]]
}
}
}
})// Method 1: mongodump and mongorestore // Backup entire database mongodump --host=localhost --port=27017 --db=mydb --out=/backup/ // Restore from backup mongorestore --host=localhost --port=27017 --db=mydb /backup/mydb/ // Method 2: File system snapshot (example with LVM) lvcreate --size 10G --snapshot --name mongodb-snap /dev/vg0/mongodb tar czf /backup/mongodb-snap.tar.gz /mnt/mongodb-snap lvremove /dev/vg0/mongodb-snap // Method 3: Delayed replica set member // Configure secondary with 4-hour delay var cfg = rs.conf() cfg.members[2].priority = 0 cfg.members[2].hidden = true cfg.members[2].slaveDelay = 14400 // 4 hours in seconds rs.reconfig(cfg) // Recovery from delayed member: // 1. Stop delayed secondary // 2. Copy its data files // 3. Restore primary from these files // Method 4: Continuous backup (conceptual) // MongoDB Atlas automated backups: // - Snapshots every 6-24 hours // - Oplog between snapshots // - Point-in-time restore to any second
// 1. Enable authentication
// Start MongoDB with --auth flag
mongod --auth --port 27017 --dbpath /data/db
// Create admin user
use admin
db.createUser({
user: "admin",
pwd: "SecurePassword123!",
roles: [{ role: "userAdminAnyDatabase", db: "admin" }]
})
// 2. Create application user with limited permissions
use myapp
db.createUser({
user: "appUser",
pwd: "AppPassword456!",
roles: [
{ role: "readWrite", db: "myapp" }
]
})
// 3. Custom role with specific permissions
use admin
db.createRole({
role: "reportsReader",
privileges: [
{
resource: { db: "analytics", collection: "reports" },
actions: ["find"]
}
],
roles: []
})
// 4. Network security in config file
net:
bindIp: 127.0.0.1,10.0.0.5 // Specific IPs only
port: 27017
tls:
mode: requireTLS
certificateKeyFile: /path/to/cert.pem
CAFile: /path/to/ca.pem
// 5. Enable encryption at rest (Enterprise)
security:
enableEncryption: true
encryptionKeyFile: /path/to/keyfile
// 6. Enable auditing (Enterprise)
auditLog:
destination: file
format: JSON
path: /var/log/mongodb/audit.json// 1. Check current operations
db.currentOp({
"active": true,
"secs_running": { "$gt": 5 } // Running > 5 seconds
})
// Kill long-running operation
db.killOp(12345)
// 2. Enable profiling for slow queries (> 100ms)
db.setProfilingLevel(1, { slowms: 100 })
// Analyze slow queries
db.system.profile.find().sort({ ts: -1 }).limit(10)
// 3. Get server statistics
db.serverStatus()
// Key metrics to monitor:
// - connections.current (connection count)
// - opcounters (operations per second)
// - mem.resident (RAM usage)
// - wiredTiger.cache (cache statistics)
// - network.bytesIn/bytesOut
// - repl.lag (replication lag)
// 4. Check database statistics
db.stats()
// 5. Analyze query performance
db.orders.find({ customerId: 123 }).explain("executionStats")
// Look for:
// - executionTimeMillis (total time)
// - totalDocsExamined vs totalDocsReturned
// - stage: "IXSCAN" (using index) vs "COLLSCAN" (full scan)
// 6. Check index usage statistics
db.orders.aggregate([{ $indexStats: {} }])
// 7. Monitor with mongostat (external command)
mongstat --host localhost:27017 5
// Shows operations, memory, connections every 5 seconds// Watch entire collection
const changeStream = db.collection('products').watch()
changeStream.on('change', (change) => {
console.log('Change event:', change)
switch(change.operationType) {
case 'insert':
handleNewProduct(change.fullDocument)
break
case 'update':
invalidateCache(change.documentKey._id)
break
case 'delete':
removeFromSearch(change.documentKey._id)
break
}
})
// Watch with filter - only completed orders
const pipeline = [
{
$match: {
'operationType': 'insert',
'fullDocument.status': 'completed'
}
}
]
const orderStream = db.orders.watch(pipeline)
orderStream.on('change', (change) => {
// Trigger fulfillment process
processOrder(change.fullDocument)
})
// Resume from token (after restart)
const resumeToken = getLastProcessedToken()
const resumableStream = db.orders.watch([], {
resumeAfter: resumeToken
})
// Use case: Real-time dashboard
db.sales.watch().on('change', (change) => {
if (change.operationType === 'insert') {
updateDashboard(change.fullDocument)
websocket.broadcast('newSale', change.fullDocument)
}
})// Production configuration example (mongod.conf)
// Network settings
net:
port: 27017
bindIp: 10.0.0.5,127.0.0.1 // Private IP + localhost
maxIncomingConnections: 1000
tls:
mode: requireTLS
certificateKeyFile: /etc/ssl/mongodb.pem
// Security
security:
authorization: enabled
keyFile: /etc/mongodb/keyfile // Replica set auth
// Storage
storage:
dbPath: /data/mongodb
engine: wiredTiger
wiredTiger:
engineConfig:
cacheSizeGB: 8 // 50% of RAM
journalCompressor: snappy
collectionConfig:
blockCompressor: snappy
// Replication
replication:
replSetName: "production-rs"
oplogSizeMB: 10240 // 10GB oplog
// System resource limits
processManagement:
fork: true
pidFilePath: /var/run/mongodb/mongod.pid
// Operational logging
systemLog:
destination: file
path: /var/log/mongodb/mongod.log
logAppend: true
logRotate: reopen
// Profiling (disable in production, enable when troubleshooting)
operationProfiling:
mode: slowOp
slowOpThresholdMs: 100
// Best practices checklist:
// ✓ Replica set with 3+ members
// ✓ Authentication and authorization enabled
// ✓ TLS encryption
// ✓ Monitoring and alerting configured
// ✓ Automated backups
// ✓ Proper hardware sizing (working set in RAM)
// ✓ Connection limits configured
// ✓ Regular security updates
// ✓ Documented proceduresCorrect Answer: Use validator option with JSON Schema when creating collections
// Create collection with schema validation
db.createCollection("users", {
validator: {
$jsonSchema: {
bsonType: "object",
required: ["name", "email", "age"],
properties: {
name: {
bsonType: "string",
description: "must be a string and is required"
},
email: {
bsonType: "string",
pattern: "^.+@.+$",
description: "must be a valid email"
},
age: {
bsonType: "int",
minimum: 18,
maximum: 120
}
}
}
},
validationLevel: "strict",
validationAction: "error"
})Correct Answer: It reduces network round trips by batching operations
// Bulk write with multiple operations
db.users.bulkWrite([
{
insertOne: {
document: { name: "Alice", age: 30 }
}
},
{
updateOne: {
filter: { name: "Bob" },
update: { $set: { age: 35 } }
}
},
{
deleteOne: {
filter: { name: "Charlie" }
}
},
{
replaceOne: {
filter: { name: "David" },
replacement: { name: "David", age: 40, city: "NYC" }
}
}
], { ordered: false })
// Result includes counts
{
insertedCount: 1,
matchedCount: 2,
modifiedCount: 2,
deletedCount: 1
}Correct Answer: mongoimport and mongoexport
// Export collection to JSON mongoexport --db=mydb --collection=users --out=users.json // Export to CSV with specific fields mongoexport --db=mydb --collection=users \ --type=csv --fields=name,email,age --out=users.csv // Import JSON data mongoimport --db=mydb --collection=users --file=users.json // Import CSV with header row mongoimport --db=mydb --collection=users \ --type=csv --headerline --file=users.csv // Import with upsert (update existing or insert new) mongoimport --db=mydb --collection=users \ --file=users.json --mode=upsert
Correct Answer: A fully managed cloud database service
// Connect to MongoDB Atlas
const uri = "mongodb+srv://username:password@cluster0.mongodb.net/mydb?retryWrites=true&w=majority"
const client = new MongoClient(uri, {
useNewUrlParser: true,
useUnifiedTopology: true
})
// Atlas features:
// - Automated backups (continuous or snapshot)
// - Automatic scaling (vertical and horizontal)
// - Multi-region clusters
// - Built-in monitoring and alerts
// - VPC peering and private endpoints
// - Integrated security (encryption, authentication)
// - Performance advisor recommendationsCorrect Answer: A graphical user interface for visualizing and managing MongoDB data
// MongoDB Compass features:
// 1. Schema Analysis
// - Visualize document structure
// - See field types and frequencies
// - Identify data patterns
// 2. Query Builder
// - Build queries visually
// - Filter: { age: { $gte: 25, $lte: 40 } }
// - Project: { name: 1, email: 1 }
// - Sort: { age: -1 }
// 3. Aggregation Pipeline Builder
// - Drag and drop pipeline stages
// - Preview results at each stage
// - Export pipeline to code
// 4. Performance Analysis
// - View explain plans graphically
// - Identify missing indexes
// - Analyze slow queries
// 5. Index Management
// - Create indexes with GUI
// - View index usage statistics
// - Drop unused indexesCorrect Answer: To ensure data durability through write-ahead logging
// Journaling configuration
storage:
journal:
enabled: true
commitIntervalMs: 100 // Flush journal every 100ms
// WiredTiger journal behavior:
// - Write operations first go to journal
// - Journal flushed to disk every 50-100ms
// - Checkpoints written every 60 seconds
// - On crash: replay journal from last checkpoint
// Journal ensures durability:
// Time 0: Write operation
// Time 50ms: Written to journal (durable)
// Time 2s: Written to data files (checkpoint)
// Crash at 1s: Data recovered from journal
// Check journal status
db.serverStatus().wiredTiger.log// EMBEDDING APPROACH
// Good for: one-to-one, one-to-few, always accessed together
{
_id: 1,
name: "John Doe",
email: "john@example.com",
address: { // Embedded document
street: "123 Main St",
city: "New York",
zip: "10001"
},
orders: [ // Embedded array (bounded)
{ orderId: 1, item: "Laptop", amount: 999 },
{ orderId: 2, item: "Mouse", amount: 29 }
]
}
// Single query gets everything
db.users.findOne({ _id: 1 })
// REFERENCING APPROACH
// Good for: one-to-many (unbounded), many-to-many, independent access
// Users collection
{
_id: 1,
name: "John Doe",
email: "john@example.com"
}
// Orders collection (references user)
{
_id: 101,
userId: 1, // Reference to user
items: [...],
total: 1500
}
// Requires multiple queries or $lookup
const user = db.users.findOne({ _id: 1 })
const orders = db.orders.find({ userId: 1 })
// HYBRID APPROACH
// Embed frequently used data, reference full document
{
_id: 101,
author: { // Embed key info
id: 1,
name: "John Doe"
},
title: "MongoDB Guide",
content: "..."
}
// Get author details when needed
db.authors.findOne({ _id: 1 })// ORDERED bulk operations (default)
try {
const result = await db.users.bulkWrite([
{ insertOne: { document: { name: "Alice", age: 25 } } },
{ insertOne: { document: { name: "Bob", age: 30 } } },
{ insertOne: { document: { name: "Alice" } } }, // Error: duplicate
{ insertOne: { document: { name: "David", age: 35 } } } // Not executed
], { ordered: true })
} catch (error) {
// First 2 succeed, 3rd fails, 4th never executed
console.log(error.writeErrors)
}
// UNORDERED bulk operations
try {
const result = await db.users.bulkWrite([
{ insertOne: { document: { name: "Alice", age: 25 } } },
{ insertOne: { document: { name: "Bob", age: 30 } } },
{ insertOne: { document: { name: "Alice" } } }, // Error: duplicate
{ insertOne: { document: { name: "David", age: 35 } } } // Still executed
], { ordered: false })
} catch (error) {
// 3 operations succeed, 1 fails, all errors reported
console.log(error.writeErrors)
}
// Mixed operations bulk write
const operations = [
{ insertOne: { document: { name: "Eve", age: 28 } } },
{ updateOne: {
filter: { name: "Alice" },
update: { $set: { age: 26 } }
}},
{ deleteOne: { filter: { name: "Bob" } } },
{ replaceOne: {
filter: { name: "Charlie" },
replacement: { name: "Charlie", age: 40, city: "NYC" }
}}
]
const result = await db.users.bulkWrite(operations, { ordered: false })
console.log(result)
// {
// insertedCount: 1,
// matchedCount: 2,
// modifiedCount: 2,
// deletedCount: 1,
// upsertedCount: 0
// }// Atlas Connection String
const uri = "mongodb+srv://user:pass@cluster0.mongodb.net/mydb?retryWrites=true&w=majority"
// Atlas Exclusive Features:
// 1. Global Clusters (multi-region)
// - Data in US, Europe, Asia
// - Zone sharding by region
// - Low-latency worldwide access
// 2. Atlas Search (full-text search)
db.products.aggregate([
{
$search: {
index: "default",
text: {
query: "laptop gaming",
path: ["title", "description"]
}
}
}
])
// 3. Atlas Data Lake (query S3 data)
// - Analyze data in cloud storage
// - Federated queries across MongoDB and S3
// 4. Automated Backup Schedule
// - Snapshots: every 6-24 hours
// - Oplog: continuous
// - Retention: 7 days to indefinite
// - Point-in-time restore to any second
// 5. Performance Advisor
// - Suggests missing indexes
// - Identifies slow queries
// - Recommends schema improvements
// 6. Atlas Triggers (serverless functions)
// - React to database changes
// - Scheduled functions
// - Event-driven architecture
// 7. Charts (data visualization)
// - Build dashboards
// - Embedded analytics
// - No coding required// MONGODUMP / MONGORESTORE (Binary BSON)
// Backup entire database
mongodump --host=localhost --port=27017 \
--db=mydb --out=/backup/
// Creates: /backup/mydb/ with .bson and .metadata.json files
// Backup specific collection
mongodump --db=mydb --collection=users --out=/backup/
// Backup with query filter
mongodump --db=mydb --collection=orders \
--query='{"status": "completed"}' --out=/backup/
// Restore entire database
mongorestore --host=localhost --port=27017 \
--db=mydb /backup/mydb/
// Restore with different name
mongorestore --db=mydb_copy /backup/mydb/
// MONGOEXPORT / MONGOIMPORT (Human-readable)
// Export to JSON
mongoexport --db=mydb --collection=users \
--out=users.json
// Creates readable JSON file
// Export to CSV with specific fields
mongoexport --db=mydb --collection=users \
--type=csv --fields=name,email,age --out=users.csv
// Export with query
mongoexport --db=mydb --collection=orders \
--query='{"total": {"$gt": 100}}' --out=large_orders.json
// Import JSON
mongoimport --db=mydb --collection=users \
--file=users.json
// Import CSV with header
mongoimport --db=mydb --collection=users \
--type=csv --headerline --file=users.csv
// Import with upsert (update or insert)
mongoimport --db=mydb --collection=products \
--file=products.json --mode=upsert \
--upsertFields=productId
// WHEN TO USE EACH:
// mongodump/restore: Production backups, full migrations, preserve everything
// mongoexport/import: Data sharing, CSV integration, subset exports, human-readable// Sample employees collection structure
{
_id: 1,
name: "Alice Johnson",
age: 32,
department: "Engineering",
salary: 95000,
address: {
street: "123 Main St",
city: "San Francisco",
zip: "94102"
},
skills: ["JavaScript", "Python", "MongoDB"],
projects: [
{ name: "Project A", role: "Lead", duration: 6 },
{ name: "Project B", role: "Developer", duration: 3 }
]
}
// Query 1: Find employees in San Francisco with salary > 80000
db.employees.find({
"address.city": "San Francisco",
salary: { $gt: 80000 }
})
// Query 2: Find employees with both JavaScript AND Python skills
db.employees.find({
skills: { $all: ["JavaScript", "Python"] }
})
// Query 3: Find employees who were Lead in any project for > 5 months
db.employees.find({
projects: {
$elemMatch: {
role: "Lead",
duration: { $gt: 5 }
}
}
})
// Query 4: Count employees by department
db.employees.aggregate([
{
$group: {
_id: "$department",
count: { $sum: 1 },
avgSalary: { $avg: "$salary" }
}
},
{ $sort: { avgSalary: -1 } }
])
// Query 5: Find top 3 highest paid employees by department
db.employees.aggregate([
{ $sort: { department: 1, salary: -1 } },
{
$group: {
_id: "$department",
topEmployees: {
$push: {
name: "$name",
salary: "$salary"
}
}
}
},
{
$project: {
department: "$_id",
topEmployees: { $slice: ["$topEmployees", 3] },
_id: 0
}
}
])
// Query 6: Employees with more than 3 skills, sorted by skill count
db.employees.aggregate([
{
$addFields: {
skillCount: { $size: "$skills" }
}
},
{ $match: { skillCount: { $gt: 3 } } },
{ $sort: { skillCount: -1 } },
{
$project: {
name: 1,
skillCount: 1,
skills: 1
}
}
])
// Query 7: Employees in SF or NYC with specific skills
db.employees.find({
$or: [
{ "address.city": "San Francisco" },
{ "address.city": "New York" }
],
skills: { $in: ["MongoDB", "PostgreSQL"] }
})// MIGRATION PROCESS
// Current: Single replica set
// Target: 3-shard cluster
// Step 1: Deploy infrastructure
// - Config servers (3-member replica set)
// - Shard 1 (existing replica set)
// - Shard 2 (new 3-member replica set)
// - Shard 3 (new 3-member replica set)
// - Mongos routers (2+ instances)
// Step 2: Initialize config server replica set
rs.initiate({
_id: "configReplSet",
configsvr: true,
members: [
{ _id: 0, host: "cfg1:27019" },
{ _id: 1, host: "cfg2:27019" },
{ _id: 2, host: "cfg3:27019" }
]
})
// Step 3: Start mongos
mongos --configdb configReplSet/cfg1:27019,cfg2:27019,cfg3:27019
// Step 4: Add existing replica set as first shard
sh.addShard("rs0/host1:27017,host2:27017,host3:27017")
// Step 5: Enable sharding on database
sh.enableSharding("mydb")
// Step 6: Shard the collection
sh.shardCollection("mydb.users", { userId: 1 })
// Or hashed: sh.shardCollection("mydb.users", { userId: "hashed" })
// Step 7: Add additional shards
sh.addShard("rs1/shard2-host1:27017,shard2-host2:27017")
sh.addShard("rs2/shard3-host1:27017,shard3-host2:27017")
// Step 8: Monitor chunk migration
sh.status()
db.printShardingStatus()
// Check balancer status
sh.isBalancerRunning()
// View chunk distribution
use config
db.chunks.find({ ns: "mydb.users" }).count()
db.chunks.aggregate([
{ $group: { _id: "$shard", count: { $sum: 1 } } }
])
// Step 9: Update application connection string
// Old: mongodb://host1:27017,host2:27017/?replicaSet=rs0
// New: mongodb://mongos1:27017,mongos2:27017/
// Step 10: Verify data
db.users.countDocuments() // Should match original count
// ROLLBACK PLAN (if needed)
// 1. Stop application writes
// 2. Remove shards (except original)
// 3. Disable sharding on database
// 4. Reconnect app to original replica set
// 5. Resume operations
// MONITORING DURING MIGRATION
db.currentOp() // Watch for chunk migrations
db.serverStatus().sharding // Shard statistics
mongotop 5 // Monitor collection activity
mongostat 5 // Monitor operations