Node.js runs on a single thread by default. Here's how to utilize all CPU cores with clustering.
Basic Cluster Setup#
1import cluster from 'cluster';
2import http from 'http';
3import os from 'os';
4
5const numCPUs = os.cpus().length;
6
7if (cluster.isPrimary) {
8 console.log(`Primary ${process.pid} is running`);
9 console.log(`Forking ${numCPUs} workers...`);
10
11 // Fork workers
12 for (let i = 0; i < numCPUs; i++) {
13 cluster.fork();
14 }
15
16 cluster.on('exit', (worker, code, signal) => {
17 console.log(`Worker ${worker.process.pid} died`);
18 // Optionally restart
19 cluster.fork();
20 });
21} else {
22 // Workers share the TCP connection
23 http.createServer((req, res) => {
24 res.writeHead(200);
25 res.end(`Hello from worker ${process.pid}\n`);
26 }).listen(3000);
27
28 console.log(`Worker ${process.pid} started`);
29}Express with Clustering#
1// cluster.ts
2import cluster from 'cluster';
3import os from 'os';
4
5export function startCluster(workerFn: () => void) {
6 const numCPUs = os.cpus().length;
7
8 if (cluster.isPrimary) {
9 console.log(`Primary process ${process.pid}`);
10
11 // Fork workers
12 for (let i = 0; i < numCPUs; i++) {
13 const worker = cluster.fork();
14 console.log(`Started worker ${worker.process.pid}`);
15 }
16
17 // Handle worker events
18 cluster.on('exit', (worker, code, signal) => {
19 console.log(`Worker ${worker.process.pid} exited (${signal || code})`);
20
21 // Restart worker unless shutting down
22 if (!worker.exitedAfterDisconnect) {
23 console.log('Starting replacement worker...');
24 cluster.fork();
25 }
26 });
27
28 cluster.on('online', (worker) => {
29 console.log(`Worker ${worker.process.pid} is online`);
30 });
31 } else {
32 workerFn();
33 }
34}
35
36// app.ts
37import express from 'express';
38import { startCluster } from './cluster';
39
40function startServer() {
41 const app = express();
42
43 app.get('/', (req, res) => {
44 res.json({
45 message: 'Hello',
46 pid: process.pid,
47 });
48 });
49
50 app.get('/heavy', (req, res) => {
51 // Simulate CPU-intensive work
52 let sum = 0;
53 for (let i = 0; i < 1e8; i++) {
54 sum += i;
55 }
56 res.json({ sum, pid: process.pid });
57 });
58
59 app.listen(3000, () => {
60 console.log(`Worker ${process.pid} listening on port 3000`);
61 });
62}
63
64startCluster(startServer);Graceful Shutdown#
1import cluster from 'cluster';
2import http from 'http';
3import os from 'os';
4
5const numCPUs = os.cpus().length;
6
7if (cluster.isPrimary) {
8 const workers: cluster.Worker[] = [];
9
10 // Fork workers
11 for (let i = 0; i < numCPUs; i++) {
12 workers.push(cluster.fork());
13 }
14
15 // Graceful shutdown handler
16 async function shutdown(signal: string) {
17 console.log(`\n${signal} received. Starting graceful shutdown...`);
18
19 // Stop accepting new connections
20 for (const worker of Object.values(cluster.workers || {})) {
21 worker?.send('shutdown');
22 }
23
24 // Wait for workers to finish
25 const timeout = setTimeout(() => {
26 console.log('Forcing shutdown after timeout');
27 process.exit(1);
28 }, 30000);
29
30 cluster.on('exit', () => {
31 const remaining = Object.keys(cluster.workers || {}).length;
32 if (remaining === 0) {
33 clearTimeout(timeout);
34 console.log('All workers stopped. Exiting.');
35 process.exit(0);
36 }
37 });
38 }
39
40 process.on('SIGTERM', () => shutdown('SIGTERM'));
41 process.on('SIGINT', () => shutdown('SIGINT'));
42
43 cluster.on('exit', (worker, code, signal) => {
44 if (!worker.exitedAfterDisconnect) {
45 console.log(`Worker ${worker.process.pid} crashed. Restarting...`);
46 cluster.fork();
47 }
48 });
49} else {
50 const server = http.createServer((req, res) => {
51 res.writeHead(200);
52 res.end(`Worker ${process.pid}\n`);
53 });
54
55 server.listen(3000);
56
57 // Handle shutdown message from primary
58 process.on('message', (msg) => {
59 if (msg === 'shutdown') {
60 console.log(`Worker ${process.pid} shutting down...`);
61
62 server.close(() => {
63 console.log(`Worker ${process.pid} closed all connections`);
64 process.exit(0);
65 });
66
67 // Force close after timeout
68 setTimeout(() => {
69 console.log(`Worker ${process.pid} forcing close`);
70 process.exit(0);
71 }, 10000);
72 }
73 });
74}Zero-Downtime Restart#
1import cluster from 'cluster';
2import http from 'http';
3import os from 'os';
4
5const numCPUs = os.cpus().length;
6
7if (cluster.isPrimary) {
8 const workers: Set<cluster.Worker> = new Set();
9
10 function forkWorker() {
11 const worker = cluster.fork();
12 workers.add(worker);
13
14 worker.on('exit', () => {
15 workers.delete(worker);
16 });
17
18 return worker;
19 }
20
21 // Initial fork
22 for (let i = 0; i < numCPUs; i++) {
23 forkWorker();
24 }
25
26 // Zero-downtime restart
27 async function restartWorkers() {
28 console.log('Starting zero-downtime restart...');
29
30 const currentWorkers = [...workers];
31
32 for (const worker of currentWorkers) {
33 // Fork new worker first
34 const newWorker = forkWorker();
35
36 // Wait for new worker to be ready
37 await new Promise<void>((resolve) => {
38 newWorker.on('listening', () => {
39 console.log(`New worker ${newWorker.process.pid} is ready`);
40
41 // Gracefully shutdown old worker
42 worker.send('shutdown');
43 worker.disconnect();
44
45 // Force kill after timeout
46 setTimeout(() => {
47 if (!worker.isDead()) {
48 worker.kill();
49 }
50 }, 10000);
51
52 resolve();
53 });
54 });
55
56 // Small delay between restarts
57 await new Promise(r => setTimeout(r, 1000));
58 }
59
60 console.log('All workers restarted');
61 }
62
63 // Trigger restart on SIGUSR2
64 process.on('SIGUSR2', restartWorkers);
65
66 cluster.on('exit', (worker, code, signal) => {
67 if (!worker.exitedAfterDisconnect && code !== 0) {
68 console.log(`Worker ${worker.process.pid} crashed. Restarting...`);
69 forkWorker();
70 }
71 });
72} else {
73 // Worker code
74 const server = http.createServer((req, res) => {
75 res.writeHead(200);
76 res.end(`Worker ${process.pid}\n`);
77 });
78
79 server.listen(3000, () => {
80 console.log(`Worker ${process.pid} listening`);
81 });
82
83 process.on('message', (msg) => {
84 if (msg === 'shutdown') {
85 server.close(() => {
86 process.exit(0);
87 });
88 }
89 });
90}Shared State with IPC#
1import cluster from 'cluster';
2import http from 'http';
3import os from 'os';
4
5interface Message {
6 type: string;
7 data?: any;
8}
9
10if (cluster.isPrimary) {
11 // Shared state in primary process
12 const stats = {
13 totalRequests: 0,
14 requestsByWorker: {} as Record<number, number>,
15 };
16
17 for (let i = 0; i < os.cpus().length; i++) {
18 const worker = cluster.fork();
19
20 worker.on('message', (msg: Message) => {
21 if (msg.type === 'request') {
22 stats.totalRequests++;
23 stats.requestsByWorker[worker.process.pid!] =
24 (stats.requestsByWorker[worker.process.pid!] || 0) + 1;
25 }
26
27 if (msg.type === 'getStats') {
28 worker.send({ type: 'stats', data: stats });
29 }
30 });
31 }
32
33 // Log stats periodically
34 setInterval(() => {
35 console.log('Stats:', stats);
36 }, 5000);
37} else {
38 const server = http.createServer((req, res) => {
39 // Notify primary of request
40 process.send?.({ type: 'request' });
41
42 if (req.url === '/stats') {
43 process.send?.({ type: 'getStats' });
44
45 const handler = (msg: Message) => {
46 if (msg.type === 'stats') {
47 res.writeHead(200, { 'Content-Type': 'application/json' });
48 res.end(JSON.stringify(msg.data));
49 process.off('message', handler);
50 }
51 };
52
53 process.on('message', handler);
54 } else {
55 res.writeHead(200);
56 res.end(`Worker ${process.pid}\n`);
57 }
58 });
59
60 server.listen(3000);
61}Sticky Sessions#
1import cluster from 'cluster';
2import http from 'http';
3import net from 'net';
4import os from 'os';
5
6const numCPUs = os.cpus().length;
7
8if (cluster.isPrimary) {
9 const workers: cluster.Worker[] = [];
10
11 // Fork workers
12 for (let i = 0; i < numCPUs; i++) {
13 workers.push(cluster.fork());
14 }
15
16 // Create proxy server
17 const server = net.createServer({ pauseOnConnect: true }, (connection) => {
18 // Get client IP for sticky routing
19 const remoteAddress = connection.remoteAddress || '';
20
21 // Simple hash to pick worker
22 let hash = 0;
23 for (let i = 0; i < remoteAddress.length; i++) {
24 hash = ((hash << 5) - hash) + remoteAddress.charCodeAt(i);
25 hash = hash & hash;
26 }
27
28 const workerIndex = Math.abs(hash) % workers.length;
29 const worker = workers[workerIndex];
30
31 // Send connection to worker
32 worker.send('connection', connection);
33 });
34
35 server.listen(3000, () => {
36 console.log('Primary listening on port 3000');
37 });
38} else {
39 const server = http.createServer((req, res) => {
40 res.writeHead(200);
41 res.end(`Handled by worker ${process.pid}\n`);
42 });
43
44 // Don't bind to port, receive connections from primary
45 server.listen(0, 'localhost');
46
47 process.on('message', (message, connection) => {
48 if (message === 'connection') {
49 server.emit('connection', connection);
50 (connection as net.Socket).resume();
51 }
52 });
53}PM2 Alternative#
1// ecosystem.config.js for PM2
2module.exports = {
3 apps: [{
4 name: 'api',
5 script: './dist/server.js',
6 instances: 'max', // Use all CPUs
7 exec_mode: 'cluster',
8 watch: false,
9 max_memory_restart: '1G',
10 env: {
11 NODE_ENV: 'production',
12 },
13 // Zero-downtime restart
14 wait_ready: true,
15 listen_timeout: 10000,
16 kill_timeout: 5000,
17 }],
18};
19
20// In your app, signal ready
21process.send?.('ready');
22
23// Handle graceful shutdown
24process.on('SIGINT', () => {
25 // Close connections, etc.
26 process.exit(0);
27});Load Testing#
1# Install autocannon
2npm install -g autocannon
3
4# Test single instance
5node app-single.js &
6autocannon -c 100 -d 10 http://localhost:3000
7
8# Test cluster
9node app-cluster.js &
10autocannon -c 100 -d 10 http://localhost:3000Best Practices#
Scaling:
✓ Match workers to CPU cores
✓ Implement graceful shutdown
✓ Handle worker crashes
✓ Use zero-downtime restarts
State:
✓ Keep workers stateless
✓ Use external storage (Redis)
✓ IPC for coordination only
✓ Avoid shared mutable state
Monitoring:
✓ Track per-worker metrics
✓ Monitor memory usage
✓ Log worker lifecycle events
✓ Set up health checks
Production:
✓ Consider PM2 or similar
✓ Use process managers
✓ Configure restart limits
✓ Set memory limits
Conclusion#
Clustering scales Node.js across all CPU cores. Implement graceful shutdown for production reliability, use IPC sparingly for coordination, and keep workers stateless. For production, consider PM2 or Kubernetes for advanced orchestration. Always test with load to verify scaling benefits.