Load Balancing Strategies for Web Applications

Load balancers distribute traffic across multiple servers, improving availability and performance. Here's how to choose and configure the right strategy.

Load Balancing Algorithms#

Round Robin:
- Requests distributed sequentially
- Simple and fair
- Doesn't consider server capacity

Weighted Round Robin:
- Servers assigned weights
- Higher weights get more traffic
- Good for mixed server capacities

Least Connections:
- Routes to server with fewest active connections
- Better for varying request durations
- More complex tracking

IP Hash:
- Routes based on client IP
- Same client always hits same server
- Enables session persistence

Least Response Time:
- Routes to fastest responding server
- Accounts for server load and latency
- Requires active monitoring

NGINX Load Balancing#

# Basic round robin
upstream api_servers {
    server api1.example.com:3000;
    server api2.example.com:3000;
    server api3.example.com:3000;
}

# Weighted distribution
upstream api_servers_weighted {
    server api1.example.com:3000 weight=5;
    server api2.example.com:3000 weight=3;
    server api3.example.com:3000 weight=2;
}

# Least connections
upstream api_servers_least {
    least_conn;
    server api1.example.com:3000;
    server api2.example.com:3000;
    server api3.example.com:3000;
}

# IP hash for sticky sessions
upstream api_servers_sticky {
    ip_hash;
    server api1.example.com:3000;
    server api2.example.com:3000;
    server api3.example.com:3000;
}

# Health checks
upstream api_servers_health {
    server api1.example.com:3000 max_fails=3 fail_timeout=30s;
    server api2.example.com:3000 max_fails=3 fail_timeout=30s;
    server api3.example.com:3000 backup;
}

server {
    listen 80;

    location /api/ {
        proxy_pass http://api_servers;
        proxy_http_version 1.1;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;

        # Timeouts
        proxy_connect_timeout 5s;
        proxy_read_timeout 60s;
        proxy_send_timeout 60s;

        # Retry on failure
        proxy_next_upstream error timeout http_502 http_503;
        proxy_next_upstream_tries 3;
    }
}

Health Checks#

// Health check endpoint
app.get('/health', (req, res) => {
  res.json({ status: 'healthy' });
});

// Detailed health check
app.get('/health/detailed', async (req, res) => {
  const checks = {
    database: await checkDatabase(),
    redis: await checkRedis(),
    memory: checkMemory(),
    cpu: checkCPU(),
  };

  const healthy = Object.values(checks).every((c) => c.healthy);

  res.status(healthy ? 200 : 503).json({
    status: healthy ? 'healthy' : 'unhealthy',
    checks,
    uptime: process.uptime(),
    timestamp: new Date().toISOString(),
  });
});

async function checkDatabase(): Promise<HealthCheck> {
  try {
    await db.$queryRaw`SELECT 1`;
    return { healthy: true };
  } catch (error) {
    return { healthy: false, error: error.message };
  }
}

function checkMemory(): HealthCheck {
  const used = process.memoryUsage();
  const heapUsedPercent = (used.heapUsed / used.heapTotal) * 100;

  return {
    healthy: heapUsedPercent < 90,
    heapUsedPercent: heapUsedPercent.toFixed(2),
  };
}

Session Persistence#

// Cookie-based session affinity
// HAProxy configuration
const haproxyConfig = `
backend api_servers
    balance roundrobin
    cookie SERVERID insert indirect nocache
    server api1 api1:3000 check cookie api1
    server api2 api2:3000 check cookie api2
    server api3 api3:3000 check cookie api3
`;

// Application-level: Store sessions in Redis
import session from 'express-session';
import RedisStore from 'connect-redis';

app.use(
  session({
    store: new RedisStore({ client: redis }),
    secret: process.env.SESSION_SECRET!,
    resave: false,
    saveUninitialized: false,
    cookie: {
      secure: process.env.NODE_ENV === 'production',
      httpOnly: true,
      maxAge: 24 * 60 * 60 * 1000,
    },
  })
);

// Now any server can handle any request
// Session data is centralized in Redis

AWS Application Load Balancer#

// CloudFormation template
const albTemplate = {
  AWSTemplateFormatVersion: '2010-09-09',
  Resources: {
    LoadBalancer: {
      Type: 'AWS::ElasticLoadBalancingV2::LoadBalancer',
      Properties: {
        Name: 'api-alb',
        Scheme: 'internet-facing',
        Type: 'application',
        Subnets: ['subnet-1', 'subnet-2'],
        SecurityGroups: ['sg-alb'],
      },
    },
    TargetGroup: {
      Type: 'AWS::ElasticLoadBalancingV2::TargetGroup',
      Properties: {
        Name: 'api-targets',
        Port: 3000,
        Protocol: 'HTTP',
        VpcId: 'vpc-123',
        TargetType: 'ip',
        HealthCheckEnabled: true,
        HealthCheckPath: '/health',
        HealthCheckIntervalSeconds: 30,
        HealthyThresholdCount: 2,
        UnhealthyThresholdCount: 3,
        TargetGroupAttributes: [
          {
            Key: 'deregistration_delay.timeout_seconds',
            Value: '30',
          },
          {
            Key: 'stickiness.enabled',
            Value: 'true',
          },
          {
            Key: 'stickiness.type',
            Value: 'lb_cookie',
          },
          {
            Key: 'stickiness.lb_cookie.duration_seconds',
            Value: '86400',
          },
        ],
      },
    },
    Listener: {
      Type: 'AWS::ElasticLoadBalancingV2::Listener',
      Properties: {
        LoadBalancerArn: { Ref: 'LoadBalancer' },
        Port: 443,
        Protocol: 'HTTPS',
        Certificates: [{ CertificateArn: 'arn:aws:acm:...' }],
        DefaultActions: [
          {
            Type: 'forward',
            TargetGroupArn: { Ref: 'TargetGroup' },
          },
        ],
      },
    },
  },
};

Graceful Shutdown#

// Handle graceful shutdown for zero-downtime deployments
const server = app.listen(3000);

let isShuttingDown = false;

// Health check returns unhealthy during shutdown
app.get('/health', (req, res) => {
  if (isShuttingDown) {
    return res.status(503).json({ status: 'shutting_down' });
  }
  res.json({ status: 'healthy' });
});

async function gracefulShutdown(signal: string): Promise<void> {
  console.log(`${signal} received, starting graceful shutdown`);
  isShuttingDown = true;

  // Stop accepting new connections
  server.close(async () => {
    console.log('HTTP server closed');

    // Close database connections
    await db.$disconnect();

    // Close Redis
    await redis.quit();

    console.log('Graceful shutdown complete');
    process.exit(0);
  });

  // Force exit after timeout
  setTimeout(() => {
    console.error('Forced shutdown after timeout');
    process.exit(1);
  }, 30000);
}

process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
process.on('SIGINT', () => gracefulShutdown('SIGINT'));

Connection Draining#

# NGINX connection draining
upstream api_servers {
    server api1.example.com:3000;
    server api2.example.com:3000;
}

# Slow start for new servers
upstream api_servers_slow_start {
    server api1.example.com:3000 slow_start=30s;
    server api2.example.com:3000 slow_start=30s;
}

# Kubernetes: Graceful termination
apiVersion: v1
kind: Pod
spec:
  terminationGracePeriodSeconds: 30
  containers:
    - name: api
      lifecycle:
        preStop:
          exec:
            command: ["/bin/sh", "-c", "sleep 5"]
      readinessProbe:
        httpGet:
          path: /health
          port: 3000
        initialDelaySeconds: 5
        periodSeconds: 5

Monitoring#

// Track load balancer metrics
import { Counter, Histogram, Gauge } from 'prom-client';

const activeConnections = new Gauge({
  name: 'active_connections',
  help: 'Number of active connections',
  labelNames: ['server'],
});

const requestsTotal = new Counter({
  name: 'requests_total',
  help: 'Total requests',
  labelNames: ['server', 'status'],
});

const responseTime = new Histogram({
  name: 'response_time_seconds',
  help: 'Response time in seconds',
  labelNames: ['server'],
  buckets: [0.01, 0.05, 0.1, 0.5, 1, 5],
});

// Middleware to track metrics
app.use((req, res, next) => {
  const start = Date.now();
  activeConnections.inc({ server: process.env.SERVER_ID });

  res.on('finish', () => {
    activeConnections.dec({ server: process.env.SERVER_ID });
    requestsTotal.inc({
      server: process.env.SERVER_ID,
      status: res.statusCode.toString(),
    });
    responseTime.observe(
      { server: process.env.SERVER_ID },
      (Date.now() - start) / 1000
    );
  });

  next();
});

Best Practices#

Configuration:
✓ Use appropriate algorithm for workload
✓ Configure health checks
✓ Set reasonable timeouts
✓ Enable connection draining

Availability:
✓ Multiple availability zones
✓ Cross-region failover
✓ Backup servers
✓ Circuit breakers

Performance:
✓ Keep-alive connections
✓ Connection pooling
✓ SSL termination at LB
✓ Compression

Monitoring:
✓ Track server health
✓ Monitor response times
✓ Alert on failures
✓ Log access patterns

Load balancing is essential for scalable, highly available applications. Choose algorithms based on your workload, implement proper health checks, and plan for graceful deployments. Monitor continuously to catch issues before users do.