Proper indexing dramatically improves query performance. Here's how to index effectively.
Index Types#
1-- B-tree (default) - equality and range queries
2CREATE INDEX idx_users_email ON users(email);
3CREATE INDEX idx_orders_date ON orders(created_at);
4
5-- Hash - equality only (rarely used)
6CREATE INDEX idx_users_hash ON users USING hash(email);
7
8-- GIN - arrays, JSONB, full-text search
9CREATE INDEX idx_posts_tags ON posts USING gin(tags);
10CREATE INDEX idx_users_data ON users USING gin(metadata);
11
12-- GiST - geometric data, full-text search
13CREATE INDEX idx_locations_point ON locations USING gist(coordinates);
14
15-- BRIN - large tables with natural ordering
16CREATE INDEX idx_logs_created ON logs USING brin(created_at);Basic Indexes#
1-- Single column
2CREATE INDEX idx_users_email ON users(email);
3
4-- Multi-column (compound)
5CREATE INDEX idx_orders_user_date ON orders(user_id, created_at);
6-- Order matters! Leftmost columns used first
7
8-- Unique index
9CREATE UNIQUE INDEX idx_users_email_unique ON users(email);
10
11-- Descending order
12CREATE INDEX idx_posts_created_desc ON posts(created_at DESC);
13
14-- Include columns (covering index)
15CREATE INDEX idx_orders_covering ON orders(user_id)
16INCLUDE (total, status);
17-- Allows index-only scansPartial Indexes#
1-- Index only active users
2CREATE INDEX idx_users_active ON users(email)
3WHERE status = 'active';
4
5-- Index recent orders
6CREATE INDEX idx_orders_recent ON orders(created_at)
7WHERE created_at > '2024-01-01';
8
9-- Index non-null values
10CREATE INDEX idx_users_phone ON users(phone)
11WHERE phone IS NOT NULL;
12
13-- Combined conditions
14CREATE INDEX idx_orders_pending ON orders(user_id, created_at)
15WHERE status = 'pending' AND total > 100;Expression Indexes#
1-- Index on function result
2CREATE INDEX idx_users_email_lower ON users(lower(email));
3
4-- Use in queries
5SELECT * FROM users WHERE lower(email) = 'john@example.com';
6
7-- Date truncation
8CREATE INDEX idx_orders_month ON orders(date_trunc('month', created_at));
9
10-- JSONB expression
11CREATE INDEX idx_users_country ON users((metadata->>'country'));
12
13-- Multiple expressions
14CREATE INDEX idx_products_search ON products(
15 lower(name),
16 lower(brand)
17);JSONB Indexes#
1-- GIN index for JSONB containment
2CREATE INDEX idx_users_data ON users USING gin(data);
3
4-- Query with containment
5SELECT * FROM users WHERE data @> '{"role": "admin"}';
6
7-- Path-specific index
8CREATE INDEX idx_users_role ON users((data->>'role'));
9
10-- Query with path
11SELECT * FROM users WHERE data->>'role' = 'admin';
12
13-- GIN with jsonb_path_ops (smaller, faster for @>)
14CREATE INDEX idx_users_data_ops ON users
15USING gin(data jsonb_path_ops);Full-Text Search Indexes#
1-- Create tsvector column
2ALTER TABLE posts ADD COLUMN search_vector tsvector;
3
4-- Populate tsvector
5UPDATE posts SET search_vector =
6 setweight(to_tsvector('english', title), 'A') ||
7 setweight(to_tsvector('english', content), 'B');
8
9-- Create GIN index
10CREATE INDEX idx_posts_search ON posts USING gin(search_vector);
11
12-- Query
13SELECT * FROM posts
14WHERE search_vector @@ to_tsquery('english', 'postgresql & indexing');
15
16-- Auto-update with trigger
17CREATE FUNCTION posts_search_trigger() RETURNS trigger AS $$
18BEGIN
19 NEW.search_vector :=
20 setweight(to_tsvector('english', NEW.title), 'A') ||
21 setweight(to_tsvector('english', NEW.content), 'B');
22 RETURN NEW;
23END
24$$ LANGUAGE plpgsql;
25
26CREATE TRIGGER posts_search_update
27BEFORE INSERT OR UPDATE ON posts
28FOR EACH ROW EXECUTE FUNCTION posts_search_trigger();Array Indexes#
1-- GIN index for arrays
2CREATE INDEX idx_posts_tags ON posts USING gin(tags);
3
4-- Query with ANY
5SELECT * FROM posts WHERE 'javascript' = ANY(tags);
6
7-- Query with containment
8SELECT * FROM posts WHERE tags @> ARRAY['javascript', 'react'];
9
10-- Query with overlap
11SELECT * FROM posts WHERE tags && ARRAY['javascript', 'typescript'];Query Analysis#
1-- EXPLAIN shows query plan
2EXPLAIN SELECT * FROM users WHERE email = 'john@example.com';
3
4-- EXPLAIN ANALYZE runs the query
5EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'john@example.com';
6
7-- Verbose output
8EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
9SELECT * FROM users WHERE email = 'john@example.com';
10
11-- Understanding output
12/*
13Index Scan using idx_users_email on users
14 Index Cond: (email = 'john@example.com')
15 Rows Removed by Filter: 0
16 Buffers: shared hit=3
17 Planning Time: 0.1 ms
18 Execution Time: 0.05 ms
19*/
20
21-- Check index usage
22SELECT
23 schemaname,
24 tablename,
25 indexname,
26 idx_scan,
27 idx_tup_read,
28 idx_tup_fetch
29FROM pg_stat_user_indexes
30ORDER BY idx_scan DESC;Index Maintenance#
1-- Check index size
2SELECT
3 indexname,
4 pg_size_pretty(pg_relation_size(indexrelid)) as size
5FROM pg_stat_user_indexes
6WHERE schemaname = 'public'
7ORDER BY pg_relation_size(indexrelid) DESC;
8
9-- Check for unused indexes
10SELECT
11 indexrelname,
12 idx_scan,
13 idx_tup_read
14FROM pg_stat_user_indexes
15WHERE idx_scan = 0
16 AND schemaname = 'public';
17
18-- Reindex
19REINDEX INDEX idx_users_email;
20REINDEX TABLE users;
21
22-- Concurrent reindex (no blocking)
23REINDEX INDEX CONCURRENTLY idx_users_email;
24
25-- Check bloat (requires pg_stat_statements)
26SELECT
27 tablename,
28 indexname,
29 pg_size_pretty(pg_relation_size(indexrelid)) as index_size,
30 idx_scan as scans
31FROM pg_stat_user_indexes
32WHERE pg_relation_size(indexrelid) > 10000000;Common Patterns#
1-- Pagination with keyset
2CREATE INDEX idx_posts_pagination ON posts(created_at DESC, id DESC);
3
4SELECT * FROM posts
5WHERE (created_at, id) < ('2024-01-01', 'abc123')
6ORDER BY created_at DESC, id DESC
7LIMIT 20;
8
9-- Soft deletes
10CREATE INDEX idx_users_active ON users(id)
11WHERE deleted_at IS NULL;
12
13-- Status queries
14CREATE INDEX idx_orders_status ON orders(status)
15WHERE status IN ('pending', 'processing');
16
17-- Date range queries
18CREATE INDEX idx_events_date ON events(start_date, end_date);
19
20-- Trigram search (similarity)
21CREATE EXTENSION pg_trgm;
22CREATE INDEX idx_users_name_trgm ON users USING gin(name gin_trgm_ops);
23
24SELECT * FROM users WHERE name % 'john';When NOT to Index#
1-- Small tables (full scan is fast)
2-- Tables rarely queried
3-- Columns with low cardinality (few unique values)
4-- Frequently updated columns (index maintenance overhead)
5
6-- Example: boolean status with 50/50 distribution
7-- Index won't help much
8CREATE INDEX idx_users_active ON users(is_active); -- Usually not useful
9
10-- Better: partial index for the minority
11CREATE INDEX idx_users_inactive ON users(id)
12WHERE is_active = false; -- Only if rareBest Practices#
Design:
✓ Index columns in WHERE clauses
✓ Index columns in JOIN conditions
✓ Index columns in ORDER BY
✓ Consider column order in compound indexes
Optimization:
✓ Use EXPLAIN ANALYZE
✓ Create partial indexes for subsets
✓ Use covering indexes for hot queries
✓ Remove unused indexes
Maintenance:
✓ Monitor index usage
✓ Reindex periodically
✓ Check for bloat
✓ Use CONCURRENTLY for production
Conclusion#
Effective indexing requires understanding query patterns and PostgreSQL's index types. Use B-tree for most cases, GIN for arrays and JSONB, and partial indexes for filtered queries. Monitor usage with pg_stat_user_indexes and always test with EXPLAIN ANALYZE.