MongoDB Aggregation Pipeline Guide

MongoDB's aggregation pipeline processes documents through stages. Here's how to use it effectively.

Basic Stages#

// $match - Filter documents
db.orders.aggregate([
  { $match: { status: 'completed', total: { $gte: 100 } } }
]);

// $project - Shape output
db.users.aggregate([
  {
    $project: {
      fullName: { $concat: ['$firstName', ' ', '$lastName'] },
      email: 1,
      _id: 0
    }
  }
]);

// $sort - Order results
db.products.aggregate([
  { $sort: { price: -1, name: 1 } }
]);

// $limit and $skip - Pagination
db.posts.aggregate([
  { $sort: { createdAt: -1 } },
  { $skip: 20 },
  { $limit: 10 }
]);

// $count - Count documents
db.orders.aggregate([
  { $match: { status: 'pending' } },
  { $count: 'pendingOrders' }
]);

Grouping and Accumulation#

// Basic $group
db.orders.aggregate([
  {
    $group: {
      _id: '$customerId',
      totalSpent: { $sum: '$total' },
      orderCount: { $sum: 1 },
      avgOrder: { $avg: '$total' }
    }
  }
]);

// Group by multiple fields
db.sales.aggregate([
  {
    $group: {
      _id: {
        year: { $year: '$date' },
        month: { $month: '$date' }
      },
      revenue: { $sum: '$amount' },
      transactions: { $sum: 1 }
    }
  },
  { $sort: { '_id.year': 1, '_id.month': 1 } }
]);

// Accumulator operators
db.products.aggregate([
  {
    $group: {
      _id: '$category',
      products: { $push: '$name' },           // Array of all names
      uniqueBrands: { $addToSet: '$brand' },  // Unique brands
      cheapest: { $min: '$price' },
      expensive: { $max: '$price' },
      avgPrice: { $avg: '$price' },
      firstAdded: { $first: '$createdAt' },
      lastAdded: { $last: '$createdAt' }
    }
  }
]);

// $push with object
db.orders.aggregate([
  {
    $group: {
      _id: '$customerId',
      orders: {
        $push: {
          orderId: '$_id',
          total: '$total',
          date: '$createdAt'
        }
      }
    }
  }
]);

Lookups (Joins)#

// Basic $lookup
db.orders.aggregate([
  {
    $lookup: {
      from: 'customers',
      localField: 'customerId',
      foreignField: '_id',
      as: 'customer'
    }
  },
  { $unwind: '$customer' }  // Convert array to object
]);

// Pipeline lookup (more flexible)
db.orders.aggregate([
  {
    $lookup: {
      from: 'products',
      let: { orderItems: '$items' },
      pipeline: [
        {
          $match: {
            $expr: { $in: ['$_id', '$$orderItems.productId'] }
          }
        },
        { $project: { name: 1, price: 1 } }
      ],
      as: 'productDetails'
    }
  }
]);

// Multiple lookups
db.orders.aggregate([
  {
    $lookup: {
      from: 'customers',
      localField: 'customerId',
      foreignField: '_id',
      as: 'customer'
    }
  },
  { $unwind: '$customer' },
  {
    $lookup: {
      from: 'products',
      localField: 'items.productId',
      foreignField: '_id',
      as: 'products'
    }
  }
]);

Array Operations#

// $unwind - Flatten arrays
db.orders.aggregate([
  { $unwind: '$items' },
  {
    $group: {
      _id: '$items.productId',
      totalQuantity: { $sum: '$items.quantity' }
    }
  }
]);

// $unwind with preserveNullAndEmptyArrays
db.users.aggregate([
  {
    $unwind: {
      path: '$addresses',
      preserveNullAndEmptyArrays: true
    }
  }
]);

// Array expressions
db.users.aggregate([
  {
    $project: {
      name: 1,
      primaryEmail: { $arrayElemAt: ['$emails', 0] },
      emailCount: { $size: '$emails' },
      hasVerified: { $in: [true, '$emails.verified'] }
    }
  }
]);

// $filter array elements
db.orders.aggregate([
  {
    $project: {
      expensiveItems: {
        $filter: {
          input: '$items',
          as: 'item',
          cond: { $gte: ['$$item.price', 100] }
        }
      }
    }
  }
]);

// $map array transformation
db.products.aggregate([
  {
    $project: {
      name: 1,
      discountedPrices: {
        $map: {
          input: '$variants',
          as: 'variant',
          in: {
            size: '$$variant.size',
            salePrice: { $multiply: ['$$variant.price', 0.9] }
          }
        }
      }
    }
  }
]);

// $reduce
db.orders.aggregate([
  {
    $project: {
      orderTotal: {
        $reduce: {
          input: '$items',
          initialValue: 0,
          in: {
            $add: [
              '$$value',
              { $multiply: ['$$this.price', '$$this.quantity'] }
            ]
          }
        }
      }
    }
  }
]);

Date Operations#

// Extract date parts
db.orders.aggregate([
  {
    $project: {
      year: { $year: '$createdAt' },
      month: { $month: '$createdAt' },
      day: { $dayOfMonth: '$createdAt' },
      hour: { $hour: '$createdAt' },
      dayOfWeek: { $dayOfWeek: '$createdAt' }
    }
  }
]);

// Date formatting
db.events.aggregate([
  {
    $project: {
      formattedDate: {
        $dateToString: {
          format: '%Y-%m-%d %H:%M',
          date: '$timestamp',
          timezone: 'America/New_York'
        }
      }
    }
  }
]);

// Date calculations
db.subscriptions.aggregate([
  {
    $project: {
      daysRemaining: {
        $dateDiff: {
          startDate: '$$NOW',
          endDate: '$expiresAt',
          unit: 'day'
        }
      }
    }
  }
]);

// Group by date period
db.sales.aggregate([
  {
    $group: {
      _id: {
        $dateToString: { format: '%Y-%m-%d', date: '$date' }
      },
      dailyTotal: { $sum: '$amount' }
    }
  },
  { $sort: { _id: 1 } }
]);

Conditional Logic#

// $cond (if-then-else)
db.products.aggregate([
  {
    $project: {
      name: 1,
      priceCategory: {
        $cond: {
          if: { $gte: ['$price', 100] },
          then: 'premium',
          else: 'standard'
        }
      }
    }
  }
]);

// $switch (multiple conditions)
db.orders.aggregate([
  {
    $project: {
      status: 1,
      priority: {
        $switch: {
          branches: [
            { case: { $eq: ['$status', 'urgent'] }, then: 1 },
            { case: { $eq: ['$status', 'high'] }, then: 2 },
            { case: { $eq: ['$status', 'normal'] }, then: 3 }
          ],
          default: 4
        }
      }
    }
  }
]);

// $ifNull
db.users.aggregate([
  {
    $project: {
      displayName: { $ifNull: ['$nickname', '$firstName'] }
    }
  }
]);

Text Search#

// Text search (requires text index)
db.articles.createIndex({ title: 'text', content: 'text' });

db.articles.aggregate([
  { $match: { $text: { $search: 'mongodb aggregation' } } },
  { $addFields: { score: { $meta: 'textScore' } } },
  { $sort: { score: -1 } },
  { $limit: 10 }
]);

Faceted Search#

// Multiple facets in one query
db.products.aggregate([
  {
    $facet: {
      categoryCounts: [
        { $group: { _id: '$category', count: { $sum: 1 } } }
      ],
      priceBuckets: [
        {
          $bucket: {
            groupBy: '$price',
            boundaries: [0, 50, 100, 200, 500],
            default: '500+',
            output: { count: { $sum: 1 } }
          }
        }
      ],
      topRated: [
        { $sort: { rating: -1 } },
        { $limit: 5 },
        { $project: { name: 1, rating: 1 } }
      ]
    }
  }
]);

// $bucket for ranges
db.employees.aggregate([
  {
    $bucket: {
      groupBy: '$salary',
      boundaries: [30000, 50000, 75000, 100000, 150000],
      default: 'Other',
      output: {
        count: { $sum: 1 },
        employees: { $push: '$name' }
      }
    }
  }
]);

Output Stages#

// $out - Write to collection (replaces)
db.orders.aggregate([
  { $match: { status: 'completed' } },
  {
    $group: {
      _id: '$customerId',
      totalSpent: { $sum: '$total' }
    }
  },
  { $out: 'customer_spending' }
]);

// $merge - Upsert to collection
db.dailySales.aggregate([
  {
    $group: {
      _id: { date: '$date', product: '$productId' },
      totalSold: { $sum: '$quantity' }
    }
  },
  {
    $merge: {
      into: 'salesSummary',
      on: '_id',
      whenMatched: 'replace',
      whenNotMatched: 'insert'
    }
  }
]);

Performance#

// Use indexes with $match first
db.orders.aggregate([
  { $match: { status: 'completed', date: { $gte: ISODate('2023-01-01') } } },
  { $group: { _id: '$customerId', total: { $sum: '$amount' } } }
]);

// Explain aggregation
db.orders.explain('executionStats').aggregate([
  { $match: { status: 'completed' } },
  { $group: { _id: '$customerId', total: { $sum: 1 } } }
]);

// Use $project early to reduce document size
db.largeCollection.aggregate([
  { $match: { active: true } },
  { $project: { name: 1, value: 1 } },  // Reduce fields early
  { $group: { _id: '$category', total: { $sum: '$value' } } }
]);

// Allow disk use for large aggregations
db.bigData.aggregate(
  [{ $group: { _id: '$field', count: { $sum: 1 } } }],
  { allowDiskUse: true }
);

Best Practices#

Pipeline Order:
✓ $match early to filter
✓ $project early to reduce size
✓ $sort after $match for index use
✓ $limit after $sort

Performance:
✓ Create indexes for $match fields
✓ Use $project to limit fields
✓ Avoid $unwind when possible
✓ Use allowDiskUse for large data

Design:
✓ Break complex pipelines into steps
✓ Test each stage independently
✓ Use explain() to analyze
✓ Consider $merge for materialized views

MongoDB's aggregation pipeline is powerful for data transformation and analysis. Structure pipelines with filtering first, use indexes effectively, and break complex operations into readable stages. For production, always analyze with explain() and consider creating materialized views for frequently-run aggregations.