Partitioning and sharding distribute data for better performance and scalability.
Partitioning vs Sharding#
Partitioning: Single database, multiple tables/partitions
┌─────────────────────────────────────┐
│ PostgreSQL │
│ ┌──────┐ ┌──────┐ ┌──────┐ │
│ │2024Q1│ │2024Q2│ │2024Q3│ │
│ └──────┘ └──────┘ └──────┘ │
└─────────────────────────────────────┘
Sharding: Multiple databases
┌──────────┐ ┌──────────┐ ┌──────────┐
│ Shard 1 │ │ Shard 2 │ │ Shard 3 │
│ Users A-I│ │Users J-R │ │Users S-Z │
└──────────┘ └──────────┘ └──────────┘
PostgreSQL Partitioning#
1-- Range partitioning by date
2CREATE TABLE orders (
3 id SERIAL,
4 order_date DATE NOT NULL,
5 customer_id INT,
6 total DECIMAL(10,2)
7) PARTITION BY RANGE (order_date);
8
9CREATE TABLE orders_2024_q1 PARTITION OF orders
10 FOR VALUES FROM ('2024-01-01') TO ('2024-04-01');
11
12CREATE TABLE orders_2024_q2 PARTITION OF orders
13 FOR VALUES FROM ('2024-04-01') TO ('2024-07-01');
14
15-- List partitioning
16CREATE TABLE customers (
17 id SERIAL,
18 name VARCHAR(100),
19 region VARCHAR(50)
20) PARTITION BY LIST (region);
21
22CREATE TABLE customers_us PARTITION OF customers
23 FOR VALUES IN ('US', 'CA', 'MX');
24
25CREATE TABLE customers_eu PARTITION OF customers
26 FOR VALUES IN ('UK', 'DE', 'FR');
27
28-- Hash partitioning
29CREATE TABLE sessions (
30 id UUID,
31 user_id INT,
32 data JSONB
33) PARTITION BY HASH (user_id);
34
35CREATE TABLE sessions_0 PARTITION OF sessions
36 FOR VALUES WITH (MODULUS 4, REMAINDER 0);
37CREATE TABLE sessions_1 PARTITION OF sessions
38 FOR VALUES WITH (MODULUS 4, REMAINDER 1);Sharding Strategy#
1import crypto from 'crypto';
2
3const SHARD_COUNT = 4;
4
5// Hash-based shard selection
6function getShardId(key: string): number {
7 const hash = crypto.createHash('md5').update(key).digest('hex');
8 return parseInt(hash.slice(0, 8), 16) % SHARD_COUNT;
9}
10
11// Shard manager
12class ShardManager {
13 private pools: Map<number, Pool> = new Map();
14
15 constructor(configs: DatabaseConfig[]) {
16 configs.forEach((config, index) => {
17 this.pools.set(index, new Pool(config));
18 });
19 }
20
21 async query<T>(shardKey: string, sql: string, params: unknown[]): Promise<T[]> {
22 const shardId = getShardId(shardKey);
23 const pool = this.pools.get(shardId)!;
24 const result = await pool.query(sql, params);
25 return result.rows;
26 }
27
28 async queryAll<T>(sql: string, params: unknown[]): Promise<T[]> {
29 const results = await Promise.all(
30 Array.from(this.pools.values()).map(pool =>
31 pool.query(sql, params).then(r => r.rows)
32 )
33 );
34 return results.flat();
35 }
36}Sharded Repository#
1class UserRepository {
2 constructor(private shardManager: ShardManager) {}
3
4 async findById(userId: string): Promise<User | null> {
5 const users = await this.shardManager.query<User>(
6 userId,
7 'SELECT * FROM users WHERE id = $1',
8 [userId]
9 );
10 return users[0] || null;
11 }
12
13 async create(user: CreateUserInput): Promise<User> {
14 const id = generateId();
15 const result = await this.shardManager.query<User>(
16 id,
17 `INSERT INTO users (id, email, name) VALUES ($1, $2, $3) RETURNING *`,
18 [id, user.email, user.name]
19 );
20 return result[0];
21 }
22
23 // Cross-shard query (expensive)
24 async findByEmail(email: string): Promise<User | null> {
25 const users = await this.shardManager.queryAll<User>(
26 'SELECT * FROM users WHERE email = $1',
27 [email]
28 );
29 return users[0] || null;
30 }
31}Consistent Hashing#
1class ConsistentHash {
2 private ring: Map<number, string> = new Map();
3 private sortedKeys: number[] = [];
4 private virtualNodes = 150;
5
6 addNode(node: string) {
7 for (let i = 0; i < this.virtualNodes; i++) {
8 const hash = this.hash(`${node}:${i}`);
9 this.ring.set(hash, node);
10 this.sortedKeys.push(hash);
11 }
12 this.sortedKeys.sort((a, b) => a - b);
13 }
14
15 removeNode(node: string) {
16 for (let i = 0; i < this.virtualNodes; i++) {
17 const hash = this.hash(`${node}:${i}`);
18 this.ring.delete(hash);
19 this.sortedKeys = this.sortedKeys.filter(k => k !== hash);
20 }
21 }
22
23 getNode(key: string): string {
24 const hash = this.hash(key);
25 for (const ringKey of this.sortedKeys) {
26 if (hash <= ringKey) {
27 return this.ring.get(ringKey)!;
28 }
29 }
30 return this.ring.get(this.sortedKeys[0])!;
31 }
32
33 private hash(key: string): number {
34 const hash = crypto.createHash('md5').update(key).digest('hex');
35 return parseInt(hash.slice(0, 8), 16);
36 }
37}Cross-Shard Joins#
1// Avoid cross-shard joins when possible
2// Denormalize or use application-level joins
3
4async function getUserWithOrders(userId: string): Promise<UserWithOrders> {
5 const user = await userRepo.findById(userId);
6
7 // If orders are on same shard (same shard key)
8 const orders = await orderRepo.findByUserId(userId);
9
10 return { ...user, orders };
11}
12
13// For aggregations across shards
14async function getTotalRevenue(): Promise<number> {
15 const shardTotals = await shardManager.queryAll<{ total: number }>(
16 'SELECT SUM(amount) as total FROM orders',
17 []
18 );
19 return shardTotals.reduce((sum, s) => sum + (s.total || 0), 0);
20}Partitioning improves single-database performance; sharding enables horizontal scaling across databases.