Database Index Optimization Techniques

Proper indexing can make your database queries 100x faster. Poor indexing wastes storage and slows down writes. Here's how to get it right.

Understanding Query Plans#

-- Analyze query execution
EXPLAIN ANALYZE SELECT * FROM orders WHERE user_id = 123;

-- Output tells you:
-- Seq Scan = No index used (bad for large tables)
-- Index Scan = Index used efficiently
-- Index Only Scan = All data from index (best)
-- Bitmap Scan = Index used for filtering

-- Check which indexes exist
SELECT indexname, indexdef
FROM pg_indexes
WHERE tablename = 'orders';

Index Selection Guidelines#

// Analyze your queries to determine indexes
const queryPatterns = {
  // Exact match - single column index
  findByEmail: 'SELECT * FROM users WHERE email = $1',
  // Index: CREATE INDEX idx_users_email ON users(email)

  // Range query - index on range column
  recentOrders: 'SELECT * FROM orders WHERE created_at > $1',
  // Index: CREATE INDEX idx_orders_created ON orders(created_at)

  // Multiple conditions - composite index
  userOrders: 'SELECT * FROM orders WHERE user_id = $1 AND status = $2',
  // Index: CREATE INDEX idx_orders_user_status ON orders(user_id, status)

  // Sort with filter - include sort column
  sortedOrders: 'SELECT * FROM orders WHERE user_id = $1 ORDER BY created_at DESC',
  // Index: CREATE INDEX idx_orders_user_created ON orders(user_id, created_at DESC)
};

Composite Index Ordering#

-- The order of columns matters!

-- Index: (user_id, status, created_at)

-- ✅ Uses full index
SELECT * FROM orders WHERE user_id = 1 AND status = 'pending' AND created_at > '2024-01-01';

-- ✅ Uses first two columns
SELECT * FROM orders WHERE user_id = 1 AND status = 'pending';

-- ✅ Uses first column only
SELECT * FROM orders WHERE user_id = 1;

-- ❌ Cannot use index efficiently (skips first column)
SELECT * FROM orders WHERE status = 'pending';

-- ❌ Cannot use index (skips first two columns)
SELECT * FROM orders WHERE created_at > '2024-01-01';

-- Rule: Put equality conditions first, then range conditions
CREATE INDEX idx_orders_optimized ON orders(user_id, status, created_at);

Covering Indexes#

-- Covering index includes all columns needed by query
-- Query doesn't need to access table data

-- Query
SELECT email, name FROM users WHERE status = 'active';

-- Covering index
CREATE INDEX idx_users_status_covering ON users(status) INCLUDE (email, name);

-- Now query uses "Index Only Scan" - fastest possible

-- PostgreSQL 11+
CREATE INDEX idx_orders_covering ON orders(user_id)
INCLUDE (total, status, created_at);

-- This query won't access the table at all
SELECT total, status FROM orders WHERE user_id = 123;

Partial Indexes#

-- Index only relevant rows
-- Smaller index = faster queries and less storage

-- Only index active users
CREATE INDEX idx_users_active_email ON users(email) WHERE status = 'active';

-- Only index recent orders
CREATE INDEX idx_orders_recent ON orders(user_id, created_at)
WHERE created_at > '2024-01-01';

-- Only index non-null values
CREATE INDEX idx_users_phone ON users(phone) WHERE phone IS NOT NULL;

-- Query must match WHERE clause to use partial index
SELECT * FROM users WHERE email = 'test@example.com' AND status = 'active'; -- ✅ Uses index
SELECT * FROM users WHERE email = 'test@example.com'; -- ❌ Cannot use partial index

Expression Indexes#

-- Index on computed values

-- Case-insensitive email search
CREATE INDEX idx_users_email_lower ON users(LOWER(email));
-- Query must use same expression
SELECT * FROM users WHERE LOWER(email) = 'test@example.com';

-- Date extraction
CREATE INDEX idx_orders_month ON orders(DATE_TRUNC('month', created_at));
SELECT * FROM orders WHERE DATE_TRUNC('month', created_at) = '2024-01-01';

-- JSON field
CREATE INDEX idx_users_plan ON users((metadata->>'plan'));
SELECT * FROM users WHERE metadata->>'plan' = 'pro';

Index Maintenance#

-- Find unused indexes
SELECT
    schemaname || '.' || relname AS table,
    indexrelname AS index,
    pg_size_pretty(pg_relation_size(i.indexrelid)) AS size,
    idx_scan as index_scans
FROM pg_stat_user_indexes ui
JOIN pg_index i ON ui.indexrelid = i.indexrelid
WHERE idx_scan = 0
    AND NOT indisunique
ORDER BY pg_relation_size(i.indexrelid) DESC;

-- Find duplicate indexes
SELECT
    a.indrelid::regclass AS table_name,
    a.indexrelid::regclass AS index1,
    b.indexrelid::regclass AS index2
FROM pg_index a
JOIN pg_index b ON a.indrelid = b.indrelid
    AND a.indexrelid < b.indexrelid
    AND a.indkey = b.indkey;

-- Check index bloat
SELECT
    tablename,
    indexname,
    pg_size_pretty(pg_relation_size(indexname::regclass)) as index_size,
    idx_scan as times_used
FROM pg_stat_user_indexes
ORDER BY pg_relation_size(indexname::regclass) DESC
LIMIT 20;

-- Reindex to reduce bloat (use CONCURRENTLY to avoid locks)
REINDEX INDEX CONCURRENTLY idx_orders_user_id;

-- Update statistics
ANALYZE orders;

Index for Sorting#

-- Index can eliminate sort operations

-- Without proper index: Sort step needed
EXPLAIN SELECT * FROM orders WHERE user_id = 1 ORDER BY created_at DESC;
-- -> Sort (expensive for large result sets)

-- With matching index
CREATE INDEX idx_orders_user_created_desc ON orders(user_id, created_at DESC);

-- Now: Index Scan without Sort step
EXPLAIN SELECT * FROM orders WHERE user_id = 1 ORDER BY created_at DESC;

-- Multi-column sort
CREATE INDEX idx_orders_user_status_date ON orders(user_id, status, created_at DESC);
-- Supports: ORDER BY status ASC, created_at DESC

Monitoring Index Usage#

// Track slow queries
import { Pool } from 'pg';

const pool = new Pool();

// Log slow queries
pool.on('query', (query) => {
  const start = Date.now();

  query.on('end', () => {
    const duration = Date.now() - start;
    if (duration > 100) {
      console.warn({
        query: query.text,
        duration,
        rows: query.rows?.length,
      });
    }
  });
});

// Periodic index analysis
async function analyzeIndexUsage(): Promise<void> {
  const result = await pool.query(`
    SELECT
      schemaname,
      relname as table_name,
      indexrelname as index_name,
      idx_scan as times_used,
      pg_size_pretty(pg_relation_size(indexrelid)) as size
    FROM pg_stat_user_indexes
    ORDER BY idx_scan ASC
    LIMIT 20
  `);

  console.log('Least used indexes:', result.rows);
}

Best Practices#

Creating Indexes:
✓ Index columns in WHERE clauses
✓ Index foreign keys
✓ Use composite indexes for multi-column queries
✓ Order: equality columns, then range, then sort

Avoiding Problems:
✗ Don't index low-cardinality columns alone
✗ Don't create redundant indexes
✗ Don't ignore write performance impact
✗ Don't forget to ANALYZE after bulk changes

Maintenance:
✓ Monitor index usage regularly
✓ Remove unused indexes
✓ Reindex periodically
✓ Update statistics

Effective indexing requires understanding your query patterns. Use EXPLAIN ANALYZE to verify index usage, create targeted indexes, and regularly audit for unused or duplicate indexes. The right indexes can transform query performance from seconds to milliseconds.