Scaling applications requires distributing load effectively across multiple instances. This guide covers load balancing strategies, scaling patterns, and high availability architectures.
Load Balancing Fundamentals#
Load Balancing Algorithms#
Round Robin
├── Request 1 → Server A
├── Request 2 → Server B
├── Request 3 → Server C
└── Request 4 → Server A (cycles back)
Least Connections
├── Server A: 10 connections
├── Server B: 5 connections ← Next request
└── Server C: 8 connections
Weighted Round Robin
├── Server A (weight: 3) → Gets 3x traffic
├── Server B (weight: 2) → Gets 2x traffic
└── Server C (weight: 1) → Gets 1x traffic
IP Hash
└── Same client IP → Always same server (session affinity)
NGINX Configuration#
1# /etc/nginx/nginx.conf
2upstream backend {
3 # Least connections with health checks
4 least_conn;
5
6 server backend1.example.com:3000 weight=3;
7 server backend2.example.com:3000 weight=2;
8 server backend3.example.com:3000 weight=1 backup;
9
10 # Health check
11 keepalive 32;
12}
13
14server {
15 listen 80;
16 server_name api.example.com;
17
18 location / {
19 proxy_pass http://backend;
20 proxy_http_version 1.1;
21 proxy_set_header Connection "";
22 proxy_set_header Host $host;
23 proxy_set_header X-Real-IP $remote_addr;
24 proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
25 proxy_set_header X-Forwarded-Proto $scheme;
26
27 # Timeouts
28 proxy_connect_timeout 5s;
29 proxy_send_timeout 60s;
30 proxy_read_timeout 60s;
31
32 # Retry on failure
33 proxy_next_upstream error timeout http_502 http_503 http_504;
34 proxy_next_upstream_tries 3;
35 }
36
37 location /health {
38 access_log off;
39 return 200 "healthy\n";
40 add_header Content-Type text/plain;
41 }
42}HAProxy Configuration#
1# /etc/haproxy/haproxy.cfg
2global
3 maxconn 50000
4 log stdout format raw local0
5
6defaults
7 mode http
8 timeout connect 5s
9 timeout client 30s
10 timeout server 30s
11 option httplog
12 option dontlognull
13
14frontend http_front
15 bind *:80
16 bind *:443 ssl crt /etc/ssl/certs/cert.pem
17
18 # Route based on path
19 acl is_api path_beg /api
20 use_backend api_servers if is_api
21 default_backend web_servers
22
23backend web_servers
24 balance roundrobin
25 option httpchk GET /health
26 http-check expect status 200
27
28 server web1 10.0.0.1:3000 check inter 5s fall 3 rise 2
29 server web2 10.0.0.2:3000 check inter 5s fall 3 rise 2
30 server web3 10.0.0.3:3000 check inter 5s fall 3 rise 2 backup
31
32backend api_servers
33 balance leastconn
34 option httpchk GET /api/health
35
36 server api1 10.0.1.1:4000 check weight 100
37 server api2 10.0.1.2:4000 check weight 100
38 server api3 10.0.1.3:4000 check weight 50
39
40listen stats
41 bind *:8404
42 stats enable
43 stats uri /stats
44 stats refresh 10s
45 stats admin if LOCALHOSTHorizontal Scaling#
Stateless Application Design#
1// Bad: In-memory session storage
2const sessions = new Map<string, Session>();
3
4app.use((req, res, next) => {
5 const sessionId = req.cookies.sessionId;
6 req.session = sessions.get(sessionId);
7 next();
8});
9
10// Good: External session storage (Redis)
11import RedisStore from 'connect-redis';
12import session from 'express-session';
13import { createClient } from 'redis';
14
15const redisClient = createClient({ url: process.env.REDIS_URL });
16await redisClient.connect();
17
18app.use(session({
19 store: new RedisStore({ client: redisClient }),
20 secret: process.env.SESSION_SECRET,
21 resave: false,
22 saveUninitialized: false,
23 cookie: {
24 secure: true,
25 httpOnly: true,
26 maxAge: 24 * 60 * 60 * 1000,
27 },
28}));Kubernetes Horizontal Pod Autoscaler#
1# deployment.yaml
2apiVersion: apps/v1
3kind: Deployment
4metadata:
5 name: api-server
6spec:
7 replicas: 3
8 selector:
9 matchLabels:
10 app: api-server
11 template:
12 metadata:
13 labels:
14 app: api-server
15 spec:
16 containers:
17 - name: api
18 image: api-server:latest
19 resources:
20 requests:
21 cpu: 100m
22 memory: 128Mi
23 limits:
24 cpu: 500m
25 memory: 512Mi
26 ports:
27 - containerPort: 3000
28 readinessProbe:
29 httpGet:
30 path: /health
31 port: 3000
32 initialDelaySeconds: 5
33 periodSeconds: 10
34 livenessProbe:
35 httpGet:
36 path: /health
37 port: 3000
38 initialDelaySeconds: 15
39 periodSeconds: 20
40
41---
42# hpa.yaml
43apiVersion: autoscaling/v2
44kind: HorizontalPodAutoscaler
45metadata:
46 name: api-server-hpa
47spec:
48 scaleTargetRef:
49 apiVersion: apps/v1
50 kind: Deployment
51 name: api-server
52 minReplicas: 3
53 maxReplicas: 20
54 metrics:
55 - type: Resource
56 resource:
57 name: cpu
58 target:
59 type: Utilization
60 averageUtilization: 70
61 - type: Resource
62 resource:
63 name: memory
64 target:
65 type: Utilization
66 averageUtilization: 80
67 behavior:
68 scaleDown:
69 stabilizationWindowSeconds: 300
70 policies:
71 - type: Percent
72 value: 10
73 periodSeconds: 60
74 scaleUp:
75 stabilizationWindowSeconds: 0
76 policies:
77 - type: Percent
78 value: 100
79 periodSeconds: 15
80 - type: Pods
81 value: 4
82 periodSeconds: 15
83 selectPolicy: MaxDatabase Scaling#
Read Replicas#
1import { Pool } from 'pg';
2
3const primaryPool = new Pool({
4 host: process.env.DB_PRIMARY_HOST,
5 database: 'myapp',
6 max: 20,
7});
8
9const replicaPool = new Pool({
10 host: process.env.DB_REPLICA_HOST,
11 database: 'myapp',
12 max: 50,
13});
14
15class Database {
16 async query(sql: string, params?: any[], options?: { readonly?: boolean }) {
17 const pool = options?.readonly ? replicaPool : primaryPool;
18 return pool.query(sql, params);
19 }
20
21 // Convenience methods
22 async readQuery(sql: string, params?: any[]) {
23 return this.query(sql, params, { readonly: true });
24 }
25
26 async writeQuery(sql: string, params?: any[]) {
27 return this.query(sql, params, { readonly: false });
28 }
29}
30
31// Usage
32const db = new Database();
33
34// Reads go to replica
35const users = await db.readQuery('SELECT * FROM users WHERE active = true');
36
37// Writes go to primary
38await db.writeQuery('INSERT INTO users (name) VALUES ($1)', ['John']);Connection Pooling with PgBouncer#
1# pgbouncer.ini
2[databases]
3myapp = host=primary.db.example.com port=5432 dbname=myapp
4myapp_readonly = host=replica.db.example.com port=5432 dbname=myapp
5
6[pgbouncer]
7listen_addr = 0.0.0.0
8listen_port = 6432
9auth_type = md5
10auth_file = /etc/pgbouncer/userlist.txt
11
12# Connection pooling
13pool_mode = transaction
14max_client_conn = 1000
15default_pool_size = 20
16min_pool_size = 5
17reserve_pool_size = 5
18
19# Timeouts
20server_connect_timeout = 3
21server_login_retry = 3
22query_timeout = 300Caching Layer#
Multi-Level Caching#
1import Redis from 'ioredis';
2import { LRUCache } from 'lru-cache';
3
4// L1: In-memory cache (per-instance)
5const localCache = new LRUCache<string, any>({
6 max: 1000,
7 ttl: 60 * 1000, // 1 minute
8});
9
10// L2: Distributed cache (Redis)
11const redis = new Redis(process.env.REDIS_URL);
12
13async function getCached<T>(
14 key: string,
15 fetcher: () => Promise<T>,
16 ttl: number = 300
17): Promise<T> {
18 // Check L1 cache
19 const localValue = localCache.get(key);
20 if (localValue !== undefined) {
21 return localValue;
22 }
23
24 // Check L2 cache
25 const redisValue = await redis.get(key);
26 if (redisValue) {
27 const parsed = JSON.parse(redisValue);
28 localCache.set(key, parsed);
29 return parsed;
30 }
31
32 // Fetch from source
33 const value = await fetcher();
34
35 // Store in both caches
36 localCache.set(key, value);
37 await redis.setex(key, ttl, JSON.stringify(value));
38
39 return value;
40}
41
42// Invalidation
43async function invalidateCache(key: string) {
44 localCache.delete(key);
45 await redis.del(key);
46
47 // Notify other instances (pub/sub)
48 await redis.publish('cache-invalidation', key);
49}
50
51// Subscribe to invalidation events
52const subscriber = new Redis(process.env.REDIS_URL);
53subscriber.subscribe('cache-invalidation');
54subscriber.on('message', (channel, key) => {
55 localCache.delete(key);
56});Health Checks#
1import express from 'express';
2
3const app = express();
4
5interface HealthStatus {
6 status: 'healthy' | 'degraded' | 'unhealthy';
7 checks: Record<string, {
8 status: 'pass' | 'fail';
9 latency?: number;
10 message?: string;
11 }>;
12 version: string;
13 uptime: number;
14}
15
16app.get('/health', async (req, res) => {
17 const checks: HealthStatus['checks'] = {};
18 let overallStatus: HealthStatus['status'] = 'healthy';
19
20 // Database check
21 try {
22 const start = Date.now();
23 await db.query('SELECT 1');
24 checks.database = { status: 'pass', latency: Date.now() - start };
25 } catch (error) {
26 checks.database = { status: 'fail', message: error.message };
27 overallStatus = 'unhealthy';
28 }
29
30 // Redis check
31 try {
32 const start = Date.now();
33 await redis.ping();
34 checks.redis = { status: 'pass', latency: Date.now() - start };
35 } catch (error) {
36 checks.redis = { status: 'fail', message: error.message };
37 overallStatus = overallStatus === 'healthy' ? 'degraded' : overallStatus;
38 }
39
40 // External service check
41 try {
42 const start = Date.now();
43 const response = await fetch('https://api.external.com/health');
44 checks.externalApi = {
45 status: response.ok ? 'pass' : 'fail',
46 latency: Date.now() - start,
47 };
48 } catch (error) {
49 checks.externalApi = { status: 'fail', message: error.message };
50 }
51
52 const health: HealthStatus = {
53 status: overallStatus,
54 checks,
55 version: process.env.APP_VERSION || 'unknown',
56 uptime: process.uptime(),
57 };
58
59 const statusCode = overallStatus === 'healthy' ? 200 :
60 overallStatus === 'degraded' ? 200 : 503;
61
62 res.status(statusCode).json(health);
63});
64
65// Kubernetes probes
66app.get('/ready', async (req, res) => {
67 // Check if ready to receive traffic
68 const ready = await checkDatabaseConnection();
69 res.status(ready ? 200 : 503).send(ready ? 'ready' : 'not ready');
70});
71
72app.get('/live', (req, res) => {
73 // Simple liveness check
74 res.status(200).send('alive');
75});Best Practices#
- Design for failure: Assume any component can fail
- Use health checks: Let load balancers route around failures
- Implement graceful shutdown: Drain connections before stopping
- Monitor everything: Metrics, logs, and traces
- Test at scale: Load test before production
- Plan for capacity: Know your limits
Conclusion#
Effective scaling requires stateless design, proper load balancing, and careful attention to data consistency. Start with horizontal scaling for web servers, add caching layers, and scale databases with read replicas. Monitor continuously and adjust based on real traffic patterns.