Load balancers distribute traffic across multiple servers, improving availability and performance. Here's how to choose and configure the right strategy.
Load Balancing Algorithms#
Round Robin:
- Requests distributed sequentially
- Simple and fair
- Doesn't consider server capacity
Weighted Round Robin:
- Servers assigned weights
- Higher weights get more traffic
- Good for mixed server capacities
Least Connections:
- Routes to server with fewest active connections
- Better for varying request durations
- More complex tracking
IP Hash:
- Routes based on client IP
- Same client always hits same server
- Enables session persistence
Least Response Time:
- Routes to fastest responding server
- Accounts for server load and latency
- Requires active monitoring
NGINX Load Balancing#
1# Basic round robin
2upstream api_servers {
3 server api1.example.com:3000;
4 server api2.example.com:3000;
5 server api3.example.com:3000;
6}
7
8# Weighted distribution
9upstream api_servers_weighted {
10 server api1.example.com:3000 weight=5;
11 server api2.example.com:3000 weight=3;
12 server api3.example.com:3000 weight=2;
13}
14
15# Least connections
16upstream api_servers_least {
17 least_conn;
18 server api1.example.com:3000;
19 server api2.example.com:3000;
20 server api3.example.com:3000;
21}
22
23# IP hash for sticky sessions
24upstream api_servers_sticky {
25 ip_hash;
26 server api1.example.com:3000;
27 server api2.example.com:3000;
28 server api3.example.com:3000;
29}
30
31# Health checks
32upstream api_servers_health {
33 server api1.example.com:3000 max_fails=3 fail_timeout=30s;
34 server api2.example.com:3000 max_fails=3 fail_timeout=30s;
35 server api3.example.com:3000 backup;
36}
37
38server {
39 listen 80;
40
41 location /api/ {
42 proxy_pass http://api_servers;
43 proxy_http_version 1.1;
44 proxy_set_header Host $host;
45 proxy_set_header X-Real-IP $remote_addr;
46 proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
47 proxy_set_header X-Forwarded-Proto $scheme;
48
49 # Timeouts
50 proxy_connect_timeout 5s;
51 proxy_read_timeout 60s;
52 proxy_send_timeout 60s;
53
54 # Retry on failure
55 proxy_next_upstream error timeout http_502 http_503;
56 proxy_next_upstream_tries 3;
57 }
58}Health Checks#
1// Health check endpoint
2app.get('/health', (req, res) => {
3 res.json({ status: 'healthy' });
4});
5
6// Detailed health check
7app.get('/health/detailed', async (req, res) => {
8 const checks = {
9 database: await checkDatabase(),
10 redis: await checkRedis(),
11 memory: checkMemory(),
12 cpu: checkCPU(),
13 };
14
15 const healthy = Object.values(checks).every((c) => c.healthy);
16
17 res.status(healthy ? 200 : 503).json({
18 status: healthy ? 'healthy' : 'unhealthy',
19 checks,
20 uptime: process.uptime(),
21 timestamp: new Date().toISOString(),
22 });
23});
24
25async function checkDatabase(): Promise<HealthCheck> {
26 try {
27 await db.$queryRaw`SELECT 1`;
28 return { healthy: true };
29 } catch (error) {
30 return { healthy: false, error: error.message };
31 }
32}
33
34function checkMemory(): HealthCheck {
35 const used = process.memoryUsage();
36 const heapUsedPercent = (used.heapUsed / used.heapTotal) * 100;
37
38 return {
39 healthy: heapUsedPercent < 90,
40 heapUsedPercent: heapUsedPercent.toFixed(2),
41 };
42}Session Persistence#
1// Cookie-based session affinity
2// HAProxy configuration
3const haproxyConfig = `
4backend api_servers
5 balance roundrobin
6 cookie SERVERID insert indirect nocache
7 server api1 api1:3000 check cookie api1
8 server api2 api2:3000 check cookie api2
9 server api3 api3:3000 check cookie api3
10`;
11
12// Application-level: Store sessions in Redis
13import session from 'express-session';
14import RedisStore from 'connect-redis';
15
16app.use(
17 session({
18 store: new RedisStore({ client: redis }),
19 secret: process.env.SESSION_SECRET!,
20 resave: false,
21 saveUninitialized: false,
22 cookie: {
23 secure: process.env.NODE_ENV === 'production',
24 httpOnly: true,
25 maxAge: 24 * 60 * 60 * 1000,
26 },
27 })
28);
29
30// Now any server can handle any request
31// Session data is centralized in RedisAWS Application Load Balancer#
1// CloudFormation template
2const albTemplate = {
3 AWSTemplateFormatVersion: '2010-09-09',
4 Resources: {
5 LoadBalancer: {
6 Type: 'AWS::ElasticLoadBalancingV2::LoadBalancer',
7 Properties: {
8 Name: 'api-alb',
9 Scheme: 'internet-facing',
10 Type: 'application',
11 Subnets: ['subnet-1', 'subnet-2'],
12 SecurityGroups: ['sg-alb'],
13 },
14 },
15 TargetGroup: {
16 Type: 'AWS::ElasticLoadBalancingV2::TargetGroup',
17 Properties: {
18 Name: 'api-targets',
19 Port: 3000,
20 Protocol: 'HTTP',
21 VpcId: 'vpc-123',
22 TargetType: 'ip',
23 HealthCheckEnabled: true,
24 HealthCheckPath: '/health',
25 HealthCheckIntervalSeconds: 30,
26 HealthyThresholdCount: 2,
27 UnhealthyThresholdCount: 3,
28 TargetGroupAttributes: [
29 {
30 Key: 'deregistration_delay.timeout_seconds',
31 Value: '30',
32 },
33 {
34 Key: 'stickiness.enabled',
35 Value: 'true',
36 },
37 {
38 Key: 'stickiness.type',
39 Value: 'lb_cookie',
40 },
41 {
42 Key: 'stickiness.lb_cookie.duration_seconds',
43 Value: '86400',
44 },
45 ],
46 },
47 },
48 Listener: {
49 Type: 'AWS::ElasticLoadBalancingV2::Listener',
50 Properties: {
51 LoadBalancerArn: { Ref: 'LoadBalancer' },
52 Port: 443,
53 Protocol: 'HTTPS',
54 Certificates: [{ CertificateArn: 'arn:aws:acm:...' }],
55 DefaultActions: [
56 {
57 Type: 'forward',
58 TargetGroupArn: { Ref: 'TargetGroup' },
59 },
60 ],
61 },
62 },
63 },
64};Graceful Shutdown#
1// Handle graceful shutdown for zero-downtime deployments
2const server = app.listen(3000);
3
4let isShuttingDown = false;
5
6// Health check returns unhealthy during shutdown
7app.get('/health', (req, res) => {
8 if (isShuttingDown) {
9 return res.status(503).json({ status: 'shutting_down' });
10 }
11 res.json({ status: 'healthy' });
12});
13
14async function gracefulShutdown(signal: string): Promise<void> {
15 console.log(`${signal} received, starting graceful shutdown`);
16 isShuttingDown = true;
17
18 // Stop accepting new connections
19 server.close(async () => {
20 console.log('HTTP server closed');
21
22 // Close database connections
23 await db.$disconnect();
24
25 // Close Redis
26 await redis.quit();
27
28 console.log('Graceful shutdown complete');
29 process.exit(0);
30 });
31
32 // Force exit after timeout
33 setTimeout(() => {
34 console.error('Forced shutdown after timeout');
35 process.exit(1);
36 }, 30000);
37}
38
39process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
40process.on('SIGINT', () => gracefulShutdown('SIGINT'));Connection Draining#
1# NGINX connection draining
2upstream api_servers {
3 server api1.example.com:3000;
4 server api2.example.com:3000;
5}
6
7# Slow start for new servers
8upstream api_servers_slow_start {
9 server api1.example.com:3000 slow_start=30s;
10 server api2.example.com:3000 slow_start=30s;
11}1# Kubernetes: Graceful termination
2apiVersion: v1
3kind: Pod
4spec:
5 terminationGracePeriodSeconds: 30
6 containers:
7 - name: api
8 lifecycle:
9 preStop:
10 exec:
11 command: ["/bin/sh", "-c", "sleep 5"]
12 readinessProbe:
13 httpGet:
14 path: /health
15 port: 3000
16 initialDelaySeconds: 5
17 periodSeconds: 5Monitoring#
1// Track load balancer metrics
2import { Counter, Histogram, Gauge } from 'prom-client';
3
4const activeConnections = new Gauge({
5 name: 'active_connections',
6 help: 'Number of active connections',
7 labelNames: ['server'],
8});
9
10const requestsTotal = new Counter({
11 name: 'requests_total',
12 help: 'Total requests',
13 labelNames: ['server', 'status'],
14});
15
16const responseTime = new Histogram({
17 name: 'response_time_seconds',
18 help: 'Response time in seconds',
19 labelNames: ['server'],
20 buckets: [0.01, 0.05, 0.1, 0.5, 1, 5],
21});
22
23// Middleware to track metrics
24app.use((req, res, next) => {
25 const start = Date.now();
26 activeConnections.inc({ server: process.env.SERVER_ID });
27
28 res.on('finish', () => {
29 activeConnections.dec({ server: process.env.SERVER_ID });
30 requestsTotal.inc({
31 server: process.env.SERVER_ID,
32 status: res.statusCode.toString(),
33 });
34 responseTime.observe(
35 { server: process.env.SERVER_ID },
36 (Date.now() - start) / 1000
37 );
38 });
39
40 next();
41});Best Practices#
Configuration:
✓ Use appropriate algorithm for workload
✓ Configure health checks
✓ Set reasonable timeouts
✓ Enable connection draining
Availability:
✓ Multiple availability zones
✓ Cross-region failover
✓ Backup servers
✓ Circuit breakers
Performance:
✓ Keep-alive connections
✓ Connection pooling
✓ SSL termination at LB
✓ Compression
Monitoring:
✓ Track server health
✓ Monitor response times
✓ Alert on failures
✓ Log access patterns
Conclusion#
Load balancing is essential for scalable, highly available applications. Choose algorithms based on your workload, implement proper health checks, and plan for graceful deployments. Monitor continuously to catch issues before users do.