The cluster module enables creating child processes that share server ports, allowing Node.js applications to utilize multiple CPU cores.
Basic Setup#
1import cluster from 'node:cluster';
2import http from 'node:http';
3import os from 'node:os';
4
5const numCPUs = os.cpus().length;
6
7if (cluster.isPrimary) {
8 console.log(`Primary ${process.pid} is running`);
9
10 // Fork workers
11 for (let i = 0; i < numCPUs; i++) {
12 cluster.fork();
13 }
14
15 cluster.on('exit', (worker, code, signal) => {
16 console.log(`Worker ${worker.process.pid} died`);
17 });
18} else {
19 // Workers share the TCP connection
20 http.createServer((req, res) => {
21 res.writeHead(200);
22 res.end(`Hello from worker ${process.pid}\n`);
23 }).listen(8000);
24
25 console.log(`Worker ${process.pid} started`);
26}Worker Management#
1import cluster from 'node:cluster';
2import os from 'node:os';
3
4if (cluster.isPrimary) {
5 const numCPUs = os.cpus().length;
6
7 // Fork workers
8 for (let i = 0; i < numCPUs; i++) {
9 const worker = cluster.fork();
10
11 // Listen for messages from worker
12 worker.on('message', (message) => {
13 console.log(`Message from worker ${worker.id}:`, message);
14 });
15 }
16
17 // Get all workers
18 for (const id in cluster.workers) {
19 console.log(`Worker ${id} is running`);
20 }
21
22 // Listen for worker events
23 cluster.on('online', (worker) => {
24 console.log(`Worker ${worker.id} is online`);
25 });
26
27 cluster.on('listening', (worker, address) => {
28 console.log(`Worker ${worker.id} listening on ${address.port}`);
29 });
30
31 cluster.on('disconnect', (worker) => {
32 console.log(`Worker ${worker.id} disconnected`);
33 });
34
35 cluster.on('exit', (worker, code, signal) => {
36 if (signal) {
37 console.log(`Worker ${worker.id} killed by signal ${signal}`);
38 } else if (code !== 0) {
39 console.log(`Worker ${worker.id} exited with code ${code}`);
40 }
41 });
42}Auto-Restart Workers#
1import cluster from 'node:cluster';
2import http from 'node:http';
3import os from 'node:os';
4
5if (cluster.isPrimary) {
6 const numCPUs = os.cpus().length;
7
8 for (let i = 0; i < numCPUs; i++) {
9 cluster.fork();
10 }
11
12 // Auto-restart dead workers
13 cluster.on('exit', (worker, code, signal) => {
14 console.log(`Worker ${worker.process.pid} died`);
15
16 // Don't restart if intentionally killed
17 if (!worker.exitedAfterDisconnect) {
18 console.log('Starting a new worker');
19 cluster.fork();
20 }
21 });
22} else {
23 http.createServer((req, res) => {
24 // Simulate occasional crash
25 if (Math.random() < 0.01) {
26 throw new Error('Random failure');
27 }
28 res.end('Hello World');
29 }).listen(8000);
30}Graceful Shutdown#
1import cluster from 'node:cluster';
2import http from 'node:http';
3import os from 'node:os';
4
5if (cluster.isPrimary) {
6 const numCPUs = os.cpus().length;
7
8 for (let i = 0; i < numCPUs; i++) {
9 cluster.fork();
10 }
11
12 // Graceful shutdown
13 process.on('SIGTERM', () => {
14 console.log('Primary received SIGTERM');
15
16 for (const id in cluster.workers) {
17 cluster.workers[id].send('shutdown');
18 }
19
20 // Wait for workers to finish
21 setTimeout(() => {
22 process.exit(0);
23 }, 5000);
24 });
25} else {
26 const server = http.createServer((req, res) => {
27 res.end('Hello World');
28 });
29
30 server.listen(8000);
31
32 process.on('message', (msg) => {
33 if (msg === 'shutdown') {
34 console.log(`Worker ${process.pid} shutting down`);
35
36 server.close(() => {
37 process.exit(0);
38 });
39
40 // Force exit after timeout
41 setTimeout(() => {
42 process.exit(1);
43 }, 5000);
44 }
45 });
46}Inter-Process Communication#
1import cluster from 'node:cluster';
2import os from 'node:os';
3
4if (cluster.isPrimary) {
5 const workers = [];
6
7 for (let i = 0; i < os.cpus().length; i++) {
8 const worker = cluster.fork();
9 workers.push(worker);
10
11 // Receive messages from workers
12 worker.on('message', (message) => {
13 console.log(`Primary received: ${JSON.stringify(message)}`);
14
15 // Broadcast to all workers
16 workers.forEach((w) => {
17 w.send({ type: 'broadcast', data: message });
18 });
19 });
20 }
21
22 // Send task to specific worker
23 setTimeout(() => {
24 workers[0].send({ type: 'task', data: 'Process this' });
25 }, 1000);
26} else {
27 // Worker process
28 process.on('message', (message) => {
29 console.log(`Worker ${process.pid} received:`, message);
30
31 if (message.type === 'task') {
32 // Process task and respond
33 process.send({ type: 'result', worker: process.pid, data: 'Done' });
34 }
35 });
36
37 // Send heartbeat
38 setInterval(() => {
39 process.send({ type: 'heartbeat', worker: process.pid });
40 }, 5000);
41}Load Balancing#
1import cluster from 'node:cluster';
2import http from 'node:http';
3import os from 'node:os';
4
5// Node.js uses round-robin by default on Linux
6// On Windows, it's handled by the OS
7
8if (cluster.isPrimary) {
9 // Custom scheduling policy
10 cluster.schedulingPolicy = cluster.SCHED_RR; // Round-robin
11
12 const numCPUs = os.cpus().length;
13
14 for (let i = 0; i < numCPUs; i++) {
15 cluster.fork();
16 }
17
18 // Track requests per worker
19 const requestCounts = {};
20
21 cluster.on('message', (worker, message) => {
22 if (message.type === 'request') {
23 requestCounts[worker.id] = (requestCounts[worker.id] || 0) + 1;
24 }
25 });
26
27 setInterval(() => {
28 console.log('Request distribution:', requestCounts);
29 }, 5000);
30} else {
31 http.createServer((req, res) => {
32 // Notify primary of request
33 process.send({ type: 'request' });
34
35 res.writeHead(200);
36 res.end(`Handled by worker ${process.pid}`);
37 }).listen(8000);
38}Zero-Downtime Restart#
1import cluster from 'node:cluster';
2import http from 'node:http';
3import os from 'node:os';
4
5if (cluster.isPrimary) {
6 const numCPUs = os.cpus().length;
7
8 for (let i = 0; i < numCPUs; i++) {
9 cluster.fork();
10 }
11
12 // Reload workers one by one
13 process.on('SIGHUP', () => {
14 console.log('Reloading workers...');
15
16 const workers = Object.values(cluster.workers);
17 let index = 0;
18
19 const reloadNext = () => {
20 if (index >= workers.length) {
21 console.log('All workers reloaded');
22 return;
23 }
24
25 const worker = workers[index++];
26 console.log(`Reloading worker ${worker.id}`);
27
28 // Fork new worker
29 const newWorker = cluster.fork();
30
31 // Wait for new worker to be ready
32 newWorker.on('listening', () => {
33 // Gracefully shutdown old worker
34 worker.disconnect();
35
36 worker.on('disconnect', () => {
37 console.log(`Worker ${worker.id} disconnected`);
38 reloadNext();
39 });
40 });
41 };
42
43 reloadNext();
44 });
45
46 cluster.on('exit', (worker, code, signal) => {
47 if (!worker.exitedAfterDisconnect) {
48 console.log(`Worker ${worker.id} crashed, restarting...`);
49 cluster.fork();
50 }
51 });
52} else {
53 const server = http.createServer((req, res) => {
54 res.end(`Worker ${process.pid} - Version 1.0`);
55 });
56
57 server.listen(8000);
58}Shared State with External Store#
1import cluster from 'node:cluster';
2import http from 'node:http';
3import os from 'node:os';
4import { createClient } from 'redis';
5
6if (cluster.isPrimary) {
7 for (let i = 0; i < os.cpus().length; i++) {
8 cluster.fork();
9 }
10} else {
11 // Each worker connects to Redis
12 const redis = createClient();
13 await redis.connect();
14
15 http.createServer(async (req, res) => {
16 if (req.url === '/increment') {
17 const count = await redis.incr('counter');
18 res.end(`Count: ${count} (Worker ${process.pid})`);
19 } else if (req.url === '/count') {
20 const count = await redis.get('counter');
21 res.end(`Count: ${count}`);
22 } else {
23 res.end('Hello');
24 }
25 }).listen(8000);
26}Worker Health Checks#
1import cluster from 'node:cluster';
2import http from 'node:http';
3import os from 'node:os';
4
5if (cluster.isPrimary) {
6 const workerHealth = new Map();
7
8 for (let i = 0; i < os.cpus().length; i++) {
9 const worker = cluster.fork();
10 workerHealth.set(worker.id, { lastHeartbeat: Date.now(), healthy: true });
11
12 worker.on('message', (msg) => {
13 if (msg.type === 'heartbeat') {
14 workerHealth.set(worker.id, {
15 lastHeartbeat: Date.now(),
16 healthy: true,
17 });
18 }
19 });
20 }
21
22 // Check health periodically
23 setInterval(() => {
24 const now = Date.now();
25
26 for (const [id, health] of workerHealth) {
27 if (now - health.lastHeartbeat > 10000) {
28 console.log(`Worker ${id} is unresponsive`);
29 const worker = cluster.workers[id];
30
31 if (worker) {
32 worker.kill();
33 workerHealth.delete(id);
34 }
35 }
36 }
37 }, 5000);
38
39 cluster.on('exit', (worker) => {
40 workerHealth.delete(worker.id);
41 const newWorker = cluster.fork();
42 workerHealth.set(newWorker.id, {
43 lastHeartbeat: Date.now(),
44 healthy: true,
45 });
46 });
47} else {
48 // Send heartbeat
49 setInterval(() => {
50 process.send({ type: 'heartbeat' });
51 }, 3000);
52
53 http.createServer((req, res) => {
54 res.end('OK');
55 }).listen(8000);
56}Best Practices#
Setup:
✓ Fork based on CPU count
✓ Implement auto-restart
✓ Handle graceful shutdown
✓ Use external state storage
Communication:
✓ Keep messages small
✓ Use structured message types
✓ Handle message errors
✓ Implement timeouts
Reliability:
✓ Health checks
✓ Zero-downtime restarts
✓ Crash recovery
✓ Resource limits
Avoid:
✗ Sharing memory between workers
✗ Too many workers
✗ Ignoring worker crashes
✗ Long-running synchronous code
Conclusion#
The cluster module enables horizontal scaling by utilizing multiple CPU cores. The primary process manages worker processes that share server ports. Implement auto-restart for crashed workers, graceful shutdown for deployments, and health checks for reliability. Use external stores like Redis for shared state since workers have separate memory spaces. For production, consider process managers like PM2 that handle clustering automatically.