PostgreSQL Indexing Strategies

Proper indexing dramatically improves query performance. Here's how to index effectively.

Index Types#

-- B-tree (default) - equality and range queries
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_orders_date ON orders(created_at);

-- Hash - equality only (rarely used)
CREATE INDEX idx_users_hash ON users USING hash(email);

-- GIN - arrays, JSONB, full-text search
CREATE INDEX idx_posts_tags ON posts USING gin(tags);
CREATE INDEX idx_users_data ON users USING gin(metadata);

-- GiST - geometric data, full-text search
CREATE INDEX idx_locations_point ON locations USING gist(coordinates);

-- BRIN - large tables with natural ordering
CREATE INDEX idx_logs_created ON logs USING brin(created_at);

Basic Indexes#

-- Single column
CREATE INDEX idx_users_email ON users(email);

-- Multi-column (compound)
CREATE INDEX idx_orders_user_date ON orders(user_id, created_at);
-- Order matters! Leftmost columns used first

-- Unique index
CREATE UNIQUE INDEX idx_users_email_unique ON users(email);

-- Descending order
CREATE INDEX idx_posts_created_desc ON posts(created_at DESC);

-- Include columns (covering index)
CREATE INDEX idx_orders_covering ON orders(user_id)
INCLUDE (total, status);
-- Allows index-only scans

Partial Indexes#

-- Index only active users
CREATE INDEX idx_users_active ON users(email)
WHERE status = 'active';

-- Index recent orders
CREATE INDEX idx_orders_recent ON orders(created_at)
WHERE created_at > '2024-01-01';

-- Index non-null values
CREATE INDEX idx_users_phone ON users(phone)
WHERE phone IS NOT NULL;

-- Combined conditions
CREATE INDEX idx_orders_pending ON orders(user_id, created_at)
WHERE status = 'pending' AND total > 100;

Expression Indexes#

-- Index on function result
CREATE INDEX idx_users_email_lower ON users(lower(email));

-- Use in queries
SELECT * FROM users WHERE lower(email) = 'john@example.com';

-- Date truncation
CREATE INDEX idx_orders_month ON orders(date_trunc('month', created_at));

-- JSONB expression
CREATE INDEX idx_users_country ON users((metadata->>'country'));

-- Multiple expressions
CREATE INDEX idx_products_search ON products(
  lower(name),
  lower(brand)
);

JSONB Indexes#

-- GIN index for JSONB containment
CREATE INDEX idx_users_data ON users USING gin(data);

-- Query with containment
SELECT * FROM users WHERE data @> '{"role": "admin"}';

-- Path-specific index
CREATE INDEX idx_users_role ON users((data->>'role'));

-- Query with path
SELECT * FROM users WHERE data->>'role' = 'admin';

-- GIN with jsonb_path_ops (smaller, faster for @>)
CREATE INDEX idx_users_data_ops ON users
USING gin(data jsonb_path_ops);

Full-Text Search Indexes#

-- Create tsvector column
ALTER TABLE posts ADD COLUMN search_vector tsvector;

-- Populate tsvector
UPDATE posts SET search_vector =
  setweight(to_tsvector('english', title), 'A') ||
  setweight(to_tsvector('english', content), 'B');

-- Create GIN index
CREATE INDEX idx_posts_search ON posts USING gin(search_vector);

-- Query
SELECT * FROM posts
WHERE search_vector @@ to_tsquery('english', 'postgresql & indexing');

-- Auto-update with trigger
CREATE FUNCTION posts_search_trigger() RETURNS trigger AS $$
BEGIN
  NEW.search_vector :=
    setweight(to_tsvector('english', NEW.title), 'A') ||
    setweight(to_tsvector('english', NEW.content), 'B');
  RETURN NEW;
END
$$ LANGUAGE plpgsql;

CREATE TRIGGER posts_search_update
BEFORE INSERT OR UPDATE ON posts
FOR EACH ROW EXECUTE FUNCTION posts_search_trigger();

Array Indexes#

-- GIN index for arrays
CREATE INDEX idx_posts_tags ON posts USING gin(tags);

-- Query with ANY
SELECT * FROM posts WHERE 'javascript' = ANY(tags);

-- Query with containment
SELECT * FROM posts WHERE tags @> ARRAY['javascript', 'react'];

-- Query with overlap
SELECT * FROM posts WHERE tags && ARRAY['javascript', 'typescript'];

Query Analysis#

-- EXPLAIN shows query plan
EXPLAIN SELECT * FROM users WHERE email = 'john@example.com';

-- EXPLAIN ANALYZE runs the query
EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'john@example.com';

-- Verbose output
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
SELECT * FROM users WHERE email = 'john@example.com';

-- Understanding output
/*
Index Scan using idx_users_email on users
  Index Cond: (email = 'john@example.com')
  Rows Removed by Filter: 0
  Buffers: shared hit=3
  Planning Time: 0.1 ms
  Execution Time: 0.05 ms
*/

-- Check index usage
SELECT
  schemaname,
  tablename,
  indexname,
  idx_scan,
  idx_tup_read,
  idx_tup_fetch
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC;

Index Maintenance#

-- Check index size
SELECT
  indexname,
  pg_size_pretty(pg_relation_size(indexrelid)) as size
FROM pg_stat_user_indexes
WHERE schemaname = 'public'
ORDER BY pg_relation_size(indexrelid) DESC;

-- Check for unused indexes
SELECT
  indexrelname,
  idx_scan,
  idx_tup_read
FROM pg_stat_user_indexes
WHERE idx_scan = 0
  AND schemaname = 'public';

-- Reindex
REINDEX INDEX idx_users_email;
REINDEX TABLE users;

-- Concurrent reindex (no blocking)
REINDEX INDEX CONCURRENTLY idx_users_email;

-- Check bloat (requires pg_stat_statements)
SELECT
  tablename,
  indexname,
  pg_size_pretty(pg_relation_size(indexrelid)) as index_size,
  idx_scan as scans
FROM pg_stat_user_indexes
WHERE pg_relation_size(indexrelid) > 10000000;

Common Patterns#

-- Pagination with keyset
CREATE INDEX idx_posts_pagination ON posts(created_at DESC, id DESC);

SELECT * FROM posts
WHERE (created_at, id) < ('2024-01-01', 'abc123')
ORDER BY created_at DESC, id DESC
LIMIT 20;

-- Soft deletes
CREATE INDEX idx_users_active ON users(id)
WHERE deleted_at IS NULL;

-- Status queries
CREATE INDEX idx_orders_status ON orders(status)
WHERE status IN ('pending', 'processing');

-- Date range queries
CREATE INDEX idx_events_date ON events(start_date, end_date);

-- Trigram search (similarity)
CREATE EXTENSION pg_trgm;
CREATE INDEX idx_users_name_trgm ON users USING gin(name gin_trgm_ops);

SELECT * FROM users WHERE name % 'john';

When NOT to Index#

-- Small tables (full scan is fast)
-- Tables rarely queried
-- Columns with low cardinality (few unique values)
-- Frequently updated columns (index maintenance overhead)

-- Example: boolean status with 50/50 distribution
-- Index won't help much
CREATE INDEX idx_users_active ON users(is_active); -- Usually not useful

-- Better: partial index for the minority
CREATE INDEX idx_users_inactive ON users(id)
WHERE is_active = false; -- Only if rare

Best Practices#

Design:
✓ Index columns in WHERE clauses
✓ Index columns in JOIN conditions
✓ Index columns in ORDER BY
✓ Consider column order in compound indexes

Optimization:
✓ Use EXPLAIN ANALYZE
✓ Create partial indexes for subsets
✓ Use covering indexes for hot queries
✓ Remove unused indexes

Maintenance:
✓ Monitor index usage
✓ Reindex periodically
✓ Check for bloat
✓ Use CONCURRENTLY for production

Effective indexing requires understanding query patterns and PostgreSQL's index types. Use B-tree for most cases, GIN for arrays and JSONB, and partial indexes for filtered queries. Monitor usage with pg_stat_user_indexes and always test with EXPLAIN ANALYZE.