When vertical scaling hits limits, horizontal sharding distributes data across multiple databases. Here's how to shard effectively.
When to Shard#
Consider sharding when:
- Single database can't handle write load
- Data size exceeds single server capacity
- Read replicas aren't enough
- Geographic distribution needed
Before sharding, try:
- Query optimization
- Caching
- Read replicas
- Vertical scaling
Sharding Strategies#
Range-Based:
- Shard by value ranges (user_id 1-1M, 1M-2M)
- Simple to implement
- Can cause hotspots
Hash-Based:
- Shard by hash(key) % num_shards
- Even distribution
- Harder to range query
Directory-Based:
- Lookup table maps keys to shards
- Most flexible
- Additional lookup overhead
Geographic:
- Shard by region/location
- Reduces latency
- Compliance friendly
Shard Key Selection#
1// Good shard keys
2const goodKeys = {
3 // High cardinality, evenly distributed
4 userId: 'hash(user_id)',
5 tenantId: 'tenant_id', // Multi-tenant apps
6 orderId: 'hash(order_id)',
7};
8
9// Bad shard keys
10const badKeys = {
11 // Low cardinality - few distinct values
12 country: 'Most users in few countries',
13 status: 'Only a few statuses',
14
15 // Monotonically increasing - hotspot on latest shard
16 createdAt: 'All new writes to one shard',
17 autoIncrementId: 'Same problem',
18};Hash-Based Sharding#
1import crypto from 'crypto';
2
3class ShardRouter {
4 constructor(private shardCount: number) {}
5
6 // Consistent hashing for even distribution
7 getShard(key: string): number {
8 const hash = crypto.createHash('md5').update(key).digest('hex');
9 const hashInt = parseInt(hash.substring(0, 8), 16);
10 return hashInt % this.shardCount;
11 }
12
13 // Get database connection for key
14 getConnection(key: string): Database {
15 const shardId = this.getShard(key);
16 return shardConnections[shardId];
17 }
18}
19
20const router = new ShardRouter(4);
21
22// Usage
23async function getUser(userId: string): Promise<User> {
24 const db = router.getConnection(userId);
25 return db.user.findUnique({ where: { id: userId } });
26}
27
28async function createUser(data: CreateUserInput): Promise<User> {
29 const userId = generateId();
30 const db = router.getConnection(userId);
31 return db.user.create({ data: { id: userId, ...data } });
32}Consistent Hashing#
1// Consistent hashing for dynamic shard count
2class ConsistentHashRing {
3 private ring: Map<number, string> = new Map();
4 private sortedKeys: number[] = [];
5 private virtualNodes: number;
6
7 constructor(shards: string[], virtualNodes = 150) {
8 this.virtualNodes = virtualNodes;
9
10 for (const shard of shards) {
11 this.addShard(shard);
12 }
13 }
14
15 private hash(key: string): number {
16 const hash = crypto.createHash('md5').update(key).digest('hex');
17 return parseInt(hash.substring(0, 8), 16);
18 }
19
20 addShard(shard: string): void {
21 for (let i = 0; i < this.virtualNodes; i++) {
22 const key = this.hash(`${shard}:${i}`);
23 this.ring.set(key, shard);
24 this.sortedKeys.push(key);
25 }
26 this.sortedKeys.sort((a, b) => a - b);
27 }
28
29 removeShard(shard: string): void {
30 for (let i = 0; i < this.virtualNodes; i++) {
31 const key = this.hash(`${shard}:${i}`);
32 this.ring.delete(key);
33 this.sortedKeys = this.sortedKeys.filter((k) => k !== key);
34 }
35 }
36
37 getShard(key: string): string {
38 const hash = this.hash(key);
39
40 // Find first node >= hash
41 for (const nodeKey of this.sortedKeys) {
42 if (nodeKey >= hash) {
43 return this.ring.get(nodeKey)!;
44 }
45 }
46
47 // Wrap around to first node
48 return this.ring.get(this.sortedKeys[0])!;
49 }
50}
51
52const ring = new ConsistentHashRing(['shard-1', 'shard-2', 'shard-3']);
53
54// Adding a shard only moves ~1/n of keys
55ring.addShard('shard-4');Directory-Based Sharding#
1// Lookup table approach
2class DirectoryShardRouter {
3 private directory: Database;
4 private cache: Map<string, string> = new Map();
5
6 constructor(directoryDb: Database) {
7 this.directory = directoryDb;
8 }
9
10 async getShard(tenantId: string): Promise<string> {
11 // Check cache
12 if (this.cache.has(tenantId)) {
13 return this.cache.get(tenantId)!;
14 }
15
16 // Lookup in directory
17 const mapping = await this.directory.shardMapping.findUnique({
18 where: { tenantId },
19 });
20
21 if (mapping) {
22 this.cache.set(tenantId, mapping.shardId);
23 return mapping.shardId;
24 }
25
26 // Assign new tenant to shard
27 const shardId = await this.assignShard(tenantId);
28 this.cache.set(tenantId, shardId);
29 return shardId;
30 }
31
32 private async assignShard(tenantId: string): Promise<string> {
33 // Find shard with lowest load
34 const shards = await this.directory.shard.findMany({
35 orderBy: { tenantCount: 'asc' },
36 });
37
38 const targetShard = shards[0];
39
40 await this.directory.shardMapping.create({
41 data: { tenantId, shardId: targetShard.id },
42 });
43
44 await this.directory.shard.update({
45 where: { id: targetShard.id },
46 data: { tenantCount: { increment: 1 } },
47 });
48
49 return targetShard.id;
50 }
51}Cross-Shard Queries#
1// Fan-out query across shards
2async function searchUsers(query: string): Promise<User[]> {
3 const results = await Promise.all(
4 shardConnections.map((db) =>
5 db.user.findMany({
6 where: {
7 OR: [
8 { name: { contains: query } },
9 { email: { contains: query } },
10 ],
11 },
12 take: 10,
13 })
14 )
15 );
16
17 // Merge and sort results
18 return results
19 .flat()
20 .sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime())
21 .slice(0, 10);
22}
23
24// Aggregation across shards
25async function getUserCount(): Promise<number> {
26 const counts = await Promise.all(
27 shardConnections.map((db) => db.user.count())
28 );
29
30 return counts.reduce((sum, count) => sum + count, 0);
31}Global Tables#
1// Some data needs to be on all shards
2class GlobalTableSync {
3 async sync(tableName: string, data: any[]): Promise<void> {
4 await Promise.all(
5 shardConnections.map(async (db) => {
6 await db.$transaction([
7 db[tableName].deleteMany(),
8 db[tableName].createMany({ data }),
9 ]);
10 })
11 );
12 }
13
14 // Or use a central database for global data
15 async getConfig(key: string): Promise<any> {
16 return globalDb.config.findUnique({ where: { key } });
17 }
18}
19
20// Examples of global data:
21// - Configuration
22// - Feature flags
23// - Reference data (countries, currencies)
24// - User directory for cross-shard lookupsShard Rebalancing#
1// Move data between shards
2async function rebalanceShard(
3 sourceDb: Database,
4 targetDb: Database,
5 tenantId: string
6): Promise<void> {
7 // 1. Mark tenant as migrating
8 await globalDb.tenant.update({
9 where: { id: tenantId },
10 data: { status: 'migrating' },
11 });
12
13 // 2. Copy data
14 const users = await sourceDb.user.findMany({
15 where: { tenantId },
16 });
17
18 await targetDb.user.createMany({ data: users });
19
20 // 3. Update directory
21 await directoryDb.shardMapping.update({
22 where: { tenantId },
23 data: { shardId: targetDb.shardId },
24 });
25
26 // 4. Delete from source
27 await sourceDb.user.deleteMany({
28 where: { tenantId },
29 });
30
31 // 5. Mark migration complete
32 await globalDb.tenant.update({
33 where: { id: tenantId },
34 data: { status: 'active' },
35 });
36}Schema Management#
1// Apply migrations to all shards
2async function migrateAllShards(migration: Migration): Promise<void> {
3 const results = await Promise.allSettled(
4 shardConnections.map(async (db, index) => {
5 console.log(`Migrating shard ${index}...`);
6 await db.$executeRaw(migration.sql);
7 console.log(`Shard ${index} migrated`);
8 })
9 );
10
11 // Check for failures
12 const failures = results.filter((r) => r.status === 'rejected');
13 if (failures.length > 0) {
14 console.error('Migration failures:', failures);
15 throw new Error('Some shards failed to migrate');
16 }
17}Best Practices#
Design:
✓ Choose shard key carefully
✓ Plan for cross-shard queries
✓ Keep global data separate
✓ Design for rebalancing
Operations:
✓ Monitor shard sizes
✓ Automate migrations
✓ Test failover
✓ Document shard topology
Avoid:
✗ Sharding too early
✗ Cross-shard transactions
✗ Hotspot shard keys
✗ Manual shard management
Conclusion#
Sharding adds complexity but enables horizontal scale. Choose the right shard key, plan for cross-shard operations, and automate management. Consider managed solutions like CockroachDB or Vitess that handle sharding transparently.