WebSocket connections are stateful, making scaling different from HTTP. Here's how to scale real-time applications effectively.
The Challenge#
WebSocket vs HTTP:
- HTTP: Stateless, any server can handle any request
- WebSocket: Stateful, connection bound to specific server
Scaling challenges:
- Session affinity required
- Connection state management
- Broadcasting across servers
- Graceful server shutdown
Sticky Sessions#
1# Nginx configuration
2upstream websocket_servers {
3 ip_hash; # Sticky sessions based on client IP
4 server ws1.example.com:3000;
5 server ws2.example.com:3000;
6 server ws3.example.com:3000;
7}
8
9server {
10 location /ws {
11 proxy_pass http://websocket_servers;
12 proxy_http_version 1.1;
13 proxy_set_header Upgrade $http_upgrade;
14 proxy_set_header Connection "upgrade";
15 proxy_set_header Host $host;
16 proxy_read_timeout 3600s;
17 }
18}1# AWS ALB with sticky sessions
2Resources:
3 TargetGroup:
4 Type: AWS::ElasticLoadBalancingV2::TargetGroup
5 Properties:
6 Protocol: HTTP
7 Port: 3000
8 TargetGroupAttributes:
9 - Key: stickiness.enabled
10 Value: "true"
11 - Key: stickiness.type
12 Value: lb_cookie
13 - Key: stickiness.lb_cookie.duration_seconds
14 Value: "86400"Redis Pub/Sub for Broadcasting#
1import { createClient } from 'redis';
2import { WebSocketServer, WebSocket } from 'ws';
3
4const publisher = createClient();
5const subscriber = createClient();
6
7await publisher.connect();
8await subscriber.connect();
9
10const wss = new WebSocketServer({ port: 3000 });
11const clients = new Map<string, WebSocket>();
12
13// Subscribe to broadcast channel
14await subscriber.subscribe('broadcast', (message) => {
15 const data = JSON.parse(message);
16
17 // Send to all local clients
18 clients.forEach((ws) => {
19 if (ws.readyState === WebSocket.OPEN) {
20 ws.send(JSON.stringify(data));
21 }
22 });
23});
24
25// Subscribe to user-specific channel
26await subscriber.pSubscribe('user:*', (message, channel) => {
27 const userId = channel.split(':')[1];
28 const ws = clients.get(userId);
29
30 if (ws && ws.readyState === WebSocket.OPEN) {
31 ws.send(message);
32 }
33});
34
35wss.on('connection', (ws, req) => {
36 const userId = getUserFromRequest(req);
37 clients.set(userId, ws);
38
39 ws.on('close', () => {
40 clients.delete(userId);
41 });
42});
43
44// Broadcast to all connected clients across all servers
45async function broadcast(data: object): Promise<void> {
46 await publisher.publish('broadcast', JSON.stringify(data));
47}
48
49// Send to specific user (could be on any server)
50async function sendToUser(userId: string, data: object): Promise<void> {
51 await publisher.publish(`user:${userId}`, JSON.stringify(data));
52}Room-Based Broadcasting#
1import { createClient } from 'redis';
2
3const redis = createClient();
4
5interface Room {
6 members: Set<string>;
7 subscribed: boolean;
8}
9
10const rooms = new Map<string, Room>();
11
12async function joinRoom(userId: string, roomId: string): Promise<void> {
13 // Track room membership in Redis (shared state)
14 await redis.sAdd(`room:${roomId}:members`, userId);
15
16 // Track locally
17 if (!rooms.has(roomId)) {
18 rooms.set(roomId, { members: new Set(), subscribed: false });
19 }
20 rooms.get(roomId)!.members.add(userId);
21
22 // Subscribe to room channel if first local member
23 const room = rooms.get(roomId)!;
24 if (!room.subscribed) {
25 room.subscribed = true;
26 await subscriber.subscribe(`room:${roomId}`, (message) => {
27 // Send to all local room members
28 room.members.forEach((memberId) => {
29 const ws = clients.get(memberId);
30 if (ws?.readyState === WebSocket.OPEN) {
31 ws.send(message);
32 }
33 });
34 });
35 }
36}
37
38async function leaveRoom(userId: string, roomId: string): Promise<void> {
39 await redis.sRem(`room:${roomId}:members`, userId);
40
41 const room = rooms.get(roomId);
42 if (room) {
43 room.members.delete(userId);
44
45 // Unsubscribe if no local members
46 if (room.members.size === 0) {
47 await subscriber.unsubscribe(`room:${roomId}`);
48 rooms.delete(roomId);
49 }
50 }
51}
52
53async function broadcastToRoom(roomId: string, data: object): Promise<void> {
54 await publisher.publish(`room:${roomId}`, JSON.stringify(data));
55}Connection State Management#
1import { createClient } from 'redis';
2
3const redis = createClient();
4
5interface ConnectionState {
6 userId: string;
7 serverId: string;
8 connectedAt: string;
9 lastActivity: string;
10}
11
12async function registerConnection(
13 userId: string,
14 serverId: string
15): Promise<void> {
16 const state: ConnectionState = {
17 userId,
18 serverId,
19 connectedAt: new Date().toISOString(),
20 lastActivity: new Date().toISOString(),
21 };
22
23 // Store connection state with TTL
24 await redis.setEx(
25 `connection:${userId}`,
26 3600, // 1 hour TTL
27 JSON.stringify(state)
28 );
29
30 // Add to server's connection set
31 await redis.sAdd(`server:${serverId}:connections`, userId);
32}
33
34async function unregisterConnection(
35 userId: string,
36 serverId: string
37): Promise<void> {
38 await redis.del(`connection:${userId}`);
39 await redis.sRem(`server:${serverId}:connections`, userId);
40}
41
42async function updateActivity(userId: string): Promise<void> {
43 const key = `connection:${userId}`;
44 const state = await redis.get(key);
45
46 if (state) {
47 const parsed = JSON.parse(state);
48 parsed.lastActivity = new Date().toISOString();
49 await redis.setEx(key, 3600, JSON.stringify(parsed));
50 }
51}
52
53async function isUserOnline(userId: string): Promise<boolean> {
54 return (await redis.exists(`connection:${userId}`)) === 1;
55}
56
57async function getOnlineUsers(userIds: string[]): Promise<string[]> {
58 const pipeline = redis.multi();
59 userIds.forEach((id) => pipeline.exists(`connection:${id}`));
60 const results = await pipeline.exec();
61
62 return userIds.filter((_, i) => results[i] === 1);
63}Graceful Shutdown#
1import { WebSocketServer, WebSocket } from 'ws';
2
3const wss = new WebSocketServer({ port: 3000 });
4let isShuttingDown = false;
5
6// Handle shutdown signals
7process.on('SIGTERM', gracefulShutdown);
8process.on('SIGINT', gracefulShutdown);
9
10async function gracefulShutdown(): Promise<void> {
11 if (isShuttingDown) return;
12 isShuttingDown = true;
13
14 console.log('Starting graceful shutdown...');
15
16 // Stop accepting new connections
17 wss.close();
18
19 // Notify load balancer to stop sending traffic
20 await notifyLoadBalancer('draining');
21
22 // Give time for load balancer to update
23 await sleep(5000);
24
25 // Close existing connections gracefully
26 const closePromises: Promise<void>[] = [];
27
28 wss.clients.forEach((ws) => {
29 closePromises.push(
30 new Promise((resolve) => {
31 // Send close message to client
32 ws.send(JSON.stringify({ type: 'server_shutdown' }));
33
34 // Close with normal closure code
35 ws.close(1000, 'Server shutting down');
36
37 ws.on('close', () => resolve());
38
39 // Force close after timeout
40 setTimeout(() => {
41 if (ws.readyState !== WebSocket.CLOSED) {
42 ws.terminate();
43 }
44 resolve();
45 }, 5000);
46 })
47 );
48 });
49
50 await Promise.all(closePromises);
51
52 // Clean up server state in Redis
53 await cleanupServerState(process.env.SERVER_ID!);
54
55 console.log('Shutdown complete');
56 process.exit(0);
57}Heartbeat and Reconnection#
1// Server-side heartbeat
2wss.on('connection', (ws) => {
3 let isAlive = true;
4
5 ws.on('pong', () => {
6 isAlive = true;
7 });
8
9 const heartbeatInterval = setInterval(() => {
10 if (!isAlive) {
11 ws.terminate();
12 return;
13 }
14
15 isAlive = false;
16 ws.ping();
17 }, 30000);
18
19 ws.on('close', () => {
20 clearInterval(heartbeatInterval);
21 });
22});
23
24// Client-side reconnection
25class ReconnectingWebSocket {
26 private ws: WebSocket | null = null;
27 private reconnectAttempts = 0;
28 private maxReconnectAttempts = 10;
29 private baseDelay = 1000;
30
31 connect(): void {
32 this.ws = new WebSocket(this.url);
33
34 this.ws.onopen = () => {
35 this.reconnectAttempts = 0;
36 };
37
38 this.ws.onclose = (event) => {
39 if (event.code !== 1000) {
40 this.scheduleReconnect();
41 }
42 };
43
44 this.ws.onerror = () => {
45 this.ws?.close();
46 };
47 }
48
49 private scheduleReconnect(): void {
50 if (this.reconnectAttempts >= this.maxReconnectAttempts) {
51 console.error('Max reconnection attempts reached');
52 return;
53 }
54
55 const delay = this.baseDelay * Math.pow(2, this.reconnectAttempts);
56 this.reconnectAttempts++;
57
58 setTimeout(() => this.connect(), delay);
59 }
60}Best Practices#
Scaling:
✓ Use Redis pub/sub for cross-server messaging
✓ Implement sticky sessions
✓ Track connection state centrally
✓ Handle server failures gracefully
Reliability:
✓ Implement heartbeat mechanism
✓ Handle client reconnection
✓ Plan for graceful shutdown
✓ Monitor connection counts
Performance:
✓ Batch messages when possible
✓ Use binary protocols for high volume
✓ Limit message size
✓ Compress large payloads
Conclusion#
Scaling WebSockets requires managing stateful connections across servers. Use Redis pub/sub for broadcasting, implement proper connection tracking, and plan for graceful shutdowns. Good architecture enables real-time features at scale.