Load Balancing and Scaling: Building Resilient Systems

Scaling applications requires distributing load effectively across multiple instances. This guide covers load balancing strategies, scaling patterns, and high availability architectures.

Load Balancing Fundamentals#

Load Balancing Algorithms#

Round Robin
├── Request 1 → Server A
├── Request 2 → Server B
├── Request 3 → Server C
└── Request 4 → Server A (cycles back)

Least Connections
├── Server A: 10 connections
├── Server B: 5 connections  ← Next request
└── Server C: 8 connections

Weighted Round Robin
├── Server A (weight: 3) → Gets 3x traffic
├── Server B (weight: 2) → Gets 2x traffic
└── Server C (weight: 1) → Gets 1x traffic

IP Hash
└── Same client IP → Always same server (session affinity)

NGINX Configuration#

# /etc/nginx/nginx.conf
upstream backend {
    # Least connections with health checks
    least_conn;

    server backend1.example.com:3000 weight=3;
    server backend2.example.com:3000 weight=2;
    server backend3.example.com:3000 weight=1 backup;

    # Health check
    keepalive 32;
}

server {
    listen 80;
    server_name api.example.com;

    location / {
        proxy_pass http://backend;
        proxy_http_version 1.1;
        proxy_set_header Connection "";
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;

        # Timeouts
        proxy_connect_timeout 5s;
        proxy_send_timeout 60s;
        proxy_read_timeout 60s;

        # Retry on failure
        proxy_next_upstream error timeout http_502 http_503 http_504;
        proxy_next_upstream_tries 3;
    }

    location /health {
        access_log off;
        return 200 "healthy\n";
        add_header Content-Type text/plain;
    }
}

HAProxy Configuration#

# /etc/haproxy/haproxy.cfg
global
    maxconn 50000
    log stdout format raw local0

defaults
    mode http
    timeout connect 5s
    timeout client 30s
    timeout server 30s
    option httplog
    option dontlognull

frontend http_front
    bind *:80
    bind *:443 ssl crt /etc/ssl/certs/cert.pem

    # Route based on path
    acl is_api path_beg /api
    use_backend api_servers if is_api
    default_backend web_servers

backend web_servers
    balance roundrobin
    option httpchk GET /health
    http-check expect status 200

    server web1 10.0.0.1:3000 check inter 5s fall 3 rise 2
    server web2 10.0.0.2:3000 check inter 5s fall 3 rise 2
    server web3 10.0.0.3:3000 check inter 5s fall 3 rise 2 backup

backend api_servers
    balance leastconn
    option httpchk GET /api/health

    server api1 10.0.1.1:4000 check weight 100
    server api2 10.0.1.2:4000 check weight 100
    server api3 10.0.1.3:4000 check weight 50

listen stats
    bind *:8404
    stats enable
    stats uri /stats
    stats refresh 10s
    stats admin if LOCALHOST

Horizontal Scaling#

Stateless Application Design#

// Bad: In-memory session storage
const sessions = new Map<string, Session>();

app.use((req, res, next) => {
  const sessionId = req.cookies.sessionId;
  req.session = sessions.get(sessionId);
  next();
});

// Good: External session storage (Redis)
import RedisStore from 'connect-redis';
import session from 'express-session';
import { createClient } from 'redis';

const redisClient = createClient({ url: process.env.REDIS_URL });
await redisClient.connect();

app.use(session({
  store: new RedisStore({ client: redisClient }),
  secret: process.env.SESSION_SECRET,
  resave: false,
  saveUninitialized: false,
  cookie: {
    secure: true,
    httpOnly: true,
    maxAge: 24 * 60 * 60 * 1000,
  },
}));

Kubernetes Horizontal Pod Autoscaler#

# deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: api-server
spec:
  replicas: 3
  selector:
    matchLabels:
      app: api-server
  template:
    metadata:
      labels:
        app: api-server
    spec:
      containers:
        - name: api
          image: api-server:latest
          resources:
            requests:
              cpu: 100m
              memory: 128Mi
            limits:
              cpu: 500m
              memory: 512Mi
          ports:
            - containerPort: 3000
          readinessProbe:
            httpGet:
              path: /health
              port: 3000
            initialDelaySeconds: 5
            periodSeconds: 10
          livenessProbe:
            httpGet:
              path: /health
              port: 3000
            initialDelaySeconds: 15
            periodSeconds: 20

---
# hpa.yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: api-server-hpa
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: api-server
  minReplicas: 3
  maxReplicas: 20
  metrics:
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: 70
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: 80
  behavior:
    scaleDown:
      stabilizationWindowSeconds: 300
      policies:
        - type: Percent
          value: 10
          periodSeconds: 60
    scaleUp:
      stabilizationWindowSeconds: 0
      policies:
        - type: Percent
          value: 100
          periodSeconds: 15
        - type: Pods
          value: 4
          periodSeconds: 15
      selectPolicy: Max

Database Scaling#

Read Replicas#

import { Pool } from 'pg';

const primaryPool = new Pool({
  host: process.env.DB_PRIMARY_HOST,
  database: 'myapp',
  max: 20,
});

const replicaPool = new Pool({
  host: process.env.DB_REPLICA_HOST,
  database: 'myapp',
  max: 50,
});

class Database {
  async query(sql: string, params?: any[], options?: { readonly?: boolean }) {
    const pool = options?.readonly ? replicaPool : primaryPool;
    return pool.query(sql, params);
  }

  // Convenience methods
  async readQuery(sql: string, params?: any[]) {
    return this.query(sql, params, { readonly: true });
  }

  async writeQuery(sql: string, params?: any[]) {
    return this.query(sql, params, { readonly: false });
  }
}

// Usage
const db = new Database();

// Reads go to replica
const users = await db.readQuery('SELECT * FROM users WHERE active = true');

// Writes go to primary
await db.writeQuery('INSERT INTO users (name) VALUES ($1)', ['John']);

Connection Pooling with PgBouncer#

# pgbouncer.ini
[databases]
myapp = host=primary.db.example.com port=5432 dbname=myapp
myapp_readonly = host=replica.db.example.com port=5432 dbname=myapp

[pgbouncer]
listen_addr = 0.0.0.0
listen_port = 6432
auth_type = md5
auth_file = /etc/pgbouncer/userlist.txt

# Connection pooling
pool_mode = transaction
max_client_conn = 1000
default_pool_size = 20
min_pool_size = 5
reserve_pool_size = 5

# Timeouts
server_connect_timeout = 3
server_login_retry = 3
query_timeout = 300

Caching Layer#

Multi-Level Caching#

import Redis from 'ioredis';
import { LRUCache } from 'lru-cache';

// L1: In-memory cache (per-instance)
const localCache = new LRUCache<string, any>({
  max: 1000,
  ttl: 60 * 1000, // 1 minute
});

// L2: Distributed cache (Redis)
const redis = new Redis(process.env.REDIS_URL);

async function getCached<T>(
  key: string,
  fetcher: () => Promise<T>,
  ttl: number = 300
): Promise<T> {
  // Check L1 cache
  const localValue = localCache.get(key);
  if (localValue !== undefined) {
    return localValue;
  }

  // Check L2 cache
  const redisValue = await redis.get(key);
  if (redisValue) {
    const parsed = JSON.parse(redisValue);
    localCache.set(key, parsed);
    return parsed;
  }

  // Fetch from source
  const value = await fetcher();

  // Store in both caches
  localCache.set(key, value);
  await redis.setex(key, ttl, JSON.stringify(value));

  return value;
}

// Invalidation
async function invalidateCache(key: string) {
  localCache.delete(key);
  await redis.del(key);

  // Notify other instances (pub/sub)
  await redis.publish('cache-invalidation', key);
}

// Subscribe to invalidation events
const subscriber = new Redis(process.env.REDIS_URL);
subscriber.subscribe('cache-invalidation');
subscriber.on('message', (channel, key) => {
  localCache.delete(key);
});

Health Checks#

import express from 'express';

const app = express();

interface HealthStatus {
  status: 'healthy' | 'degraded' | 'unhealthy';
  checks: Record<string, {
    status: 'pass' | 'fail';
    latency?: number;
    message?: string;
  }>;
  version: string;
  uptime: number;
}

app.get('/health', async (req, res) => {
  const checks: HealthStatus['checks'] = {};
  let overallStatus: HealthStatus['status'] = 'healthy';

  // Database check
  try {
    const start = Date.now();
    await db.query('SELECT 1');
    checks.database = { status: 'pass', latency: Date.now() - start };
  } catch (error) {
    checks.database = { status: 'fail', message: error.message };
    overallStatus = 'unhealthy';
  }

  // Redis check
  try {
    const start = Date.now();
    await redis.ping();
    checks.redis = { status: 'pass', latency: Date.now() - start };
  } catch (error) {
    checks.redis = { status: 'fail', message: error.message };
    overallStatus = overallStatus === 'healthy' ? 'degraded' : overallStatus;
  }

  // External service check
  try {
    const start = Date.now();
    const response = await fetch('https://api.external.com/health');
    checks.externalApi = {
      status: response.ok ? 'pass' : 'fail',
      latency: Date.now() - start,
    };
  } catch (error) {
    checks.externalApi = { status: 'fail', message: error.message };
  }

  const health: HealthStatus = {
    status: overallStatus,
    checks,
    version: process.env.APP_VERSION || 'unknown',
    uptime: process.uptime(),
  };

  const statusCode = overallStatus === 'healthy' ? 200 :
                     overallStatus === 'degraded' ? 200 : 503;

  res.status(statusCode).json(health);
});

// Kubernetes probes
app.get('/ready', async (req, res) => {
  // Check if ready to receive traffic
  const ready = await checkDatabaseConnection();
  res.status(ready ? 200 : 503).send(ready ? 'ready' : 'not ready');
});

app.get('/live', (req, res) => {
  // Simple liveness check
  res.status(200).send('alive');
});

Best Practices#

Design for failure: Assume any component can fail
Use health checks: Let load balancers route around failures
Implement graceful shutdown: Drain connections before stopping
Monitor everything: Metrics, logs, and traces
Test at scale: Load test before production
Plan for capacity: Know your limits

Effective scaling requires stateless design, proper load balancing, and careful attention to data consistency. Start with horizontal scaling for web servers, add caching layers, and scale databases with read replicas. Monitor continuously and adjust based on real traffic patterns.