When a service fails, retrying immediately can overwhelm it further. The circuit breaker pattern prevents cascading failures by failing fast when a service is unhealthy.
How It Works#
Circuit States:
CLOSED (normal)
↓ failures reach threshold
OPEN (failing fast)
↓ timeout expires
HALF-OPEN (testing)
↓ success → CLOSED
↓ failure → OPEN
Basic Implementation#
1enum CircuitState {
2 CLOSED = 'CLOSED',
3 OPEN = 'OPEN',
4 HALF_OPEN = 'HALF_OPEN',
5}
6
7interface CircuitBreakerOptions {
8 failureThreshold: number;
9 successThreshold: number;
10 timeout: number;
11}
12
13class CircuitBreaker {
14 private state: CircuitState = CircuitState.CLOSED;
15 private failureCount = 0;
16 private successCount = 0;
17 private lastFailureTime: number | null = null;
18
19 constructor(private options: CircuitBreakerOptions) {}
20
21 async execute<T>(fn: () => Promise<T>): Promise<T> {
22 if (this.state === CircuitState.OPEN) {
23 if (this.shouldAttemptReset()) {
24 this.state = CircuitState.HALF_OPEN;
25 } else {
26 throw new Error('Circuit breaker is OPEN');
27 }
28 }
29
30 try {
31 const result = await fn();
32 this.onSuccess();
33 return result;
34 } catch (error) {
35 this.onFailure();
36 throw error;
37 }
38 }
39
40 private shouldAttemptReset(): boolean {
41 if (!this.lastFailureTime) return false;
42 return Date.now() - this.lastFailureTime >= this.options.timeout;
43 }
44
45 private onSuccess() {
46 if (this.state === CircuitState.HALF_OPEN) {
47 this.successCount++;
48 if (this.successCount >= this.options.successThreshold) {
49 this.reset();
50 }
51 } else {
52 this.failureCount = 0;
53 }
54 }
55
56 private onFailure() {
57 this.failureCount++;
58 this.lastFailureTime = Date.now();
59
60 if (this.state === CircuitState.HALF_OPEN) {
61 this.state = CircuitState.OPEN;
62 this.successCount = 0;
63 } else if (this.failureCount >= this.options.failureThreshold) {
64 this.state = CircuitState.OPEN;
65 }
66 }
67
68 private reset() {
69 this.state = CircuitState.CLOSED;
70 this.failureCount = 0;
71 this.successCount = 0;
72 this.lastFailureTime = null;
73 }
74
75 getState(): CircuitState {
76 return this.state;
77 }
78}Usage Example#
1const paymentCircuit = new CircuitBreaker({
2 failureThreshold: 5, // Open after 5 failures
3 successThreshold: 3, // Close after 3 successes in half-open
4 timeout: 30000, // Try again after 30 seconds
5});
6
7async function processPayment(order: Order): Promise<PaymentResult> {
8 try {
9 return await paymentCircuit.execute(async () => {
10 return await paymentService.charge(order);
11 });
12 } catch (error) {
13 if (error.message === 'Circuit breaker is OPEN') {
14 // Return cached response or fallback
15 return { status: 'pending', message: 'Payment processing delayed' };
16 }
17 throw error;
18 }
19}Advanced Implementation#
1interface CircuitBreakerConfig {
2 name: string;
3 failureThreshold: number;
4 successThreshold: number;
5 timeout: number;
6 volumeThreshold: number; // Minimum requests before opening
7 errorPercentageThreshold: number; // Open at this error rate
8 onStateChange?: (from: CircuitState, to: CircuitState) => void;
9 isFailure?: (error: Error) => boolean; // Custom failure detection
10}
11
12class AdvancedCircuitBreaker {
13 private state: CircuitState = CircuitState.CLOSED;
14 private metrics: RequestMetrics;
15 private config: CircuitBreakerConfig;
16
17 constructor(config: CircuitBreakerConfig) {
18 this.config = config;
19 this.metrics = new RequestMetrics(config.timeout);
20 }
21
22 async execute<T>(fn: () => Promise<T>): Promise<T> {
23 if (!this.allowRequest()) {
24 this.metrics.recordRejection();
25 throw new CircuitOpenError(this.config.name);
26 }
27
28 const startTime = Date.now();
29
30 try {
31 const result = await fn();
32 this.metrics.recordSuccess(Date.now() - startTime);
33 this.evaluateState();
34 return result;
35 } catch (error) {
36 const isFailure = this.config.isFailure?.(error as Error) ?? true;
37
38 if (isFailure) {
39 this.metrics.recordFailure(Date.now() - startTime);
40 } else {
41 this.metrics.recordSuccess(Date.now() - startTime);
42 }
43
44 this.evaluateState();
45 throw error;
46 }
47 }
48
49 private allowRequest(): boolean {
50 if (this.state === CircuitState.CLOSED) return true;
51 if (this.state === CircuitState.OPEN) {
52 if (this.shouldAttemptReset()) {
53 this.transitionTo(CircuitState.HALF_OPEN);
54 return true;
55 }
56 return false;
57 }
58 // HALF_OPEN: allow limited requests
59 return this.metrics.halfOpenRequests < this.config.successThreshold;
60 }
61
62 private evaluateState() {
63 const stats = this.metrics.getStats();
64
65 if (this.state === CircuitState.HALF_OPEN) {
66 if (stats.recentSuccesses >= this.config.successThreshold) {
67 this.transitionTo(CircuitState.CLOSED);
68 } else if (stats.recentFailures > 0) {
69 this.transitionTo(CircuitState.OPEN);
70 }
71 } else if (this.state === CircuitState.CLOSED) {
72 if (
73 stats.totalRequests >= this.config.volumeThreshold &&
74 stats.errorPercentage >= this.config.errorPercentageThreshold
75 ) {
76 this.transitionTo(CircuitState.OPEN);
77 }
78 }
79 }
80
81 private transitionTo(newState: CircuitState) {
82 const oldState = this.state;
83 this.state = newState;
84 this.config.onStateChange?.(oldState, newState);
85 }
86
87 private shouldAttemptReset(): boolean {
88 return this.metrics.timeSinceLastFailure() >= this.config.timeout;
89 }
90}Metrics and Monitoring#
1class RequestMetrics {
2 private requests: RequestRecord[] = [];
3 private windowMs: number;
4
5 constructor(windowMs: number) {
6 this.windowMs = windowMs;
7 }
8
9 recordSuccess(latency: number) {
10 this.requests.push({
11 timestamp: Date.now(),
12 success: true,
13 latency,
14 });
15 this.cleanup();
16 }
17
18 recordFailure(latency: number) {
19 this.requests.push({
20 timestamp: Date.now(),
21 success: false,
22 latency,
23 });
24 this.cleanup();
25 }
26
27 getStats() {
28 this.cleanup();
29
30 const total = this.requests.length;
31 const failures = this.requests.filter((r) => !r.success).length;
32 const successes = total - failures;
33
34 return {
35 totalRequests: total,
36 successCount: successes,
37 failureCount: failures,
38 errorPercentage: total > 0 ? (failures / total) * 100 : 0,
39 avgLatency:
40 total > 0
41 ? this.requests.reduce((sum, r) => sum + r.latency, 0) / total
42 : 0,
43 };
44 }
45
46 private cleanup() {
47 const cutoff = Date.now() - this.windowMs;
48 this.requests = this.requests.filter((r) => r.timestamp > cutoff);
49 }
50}
51
52// Expose metrics for monitoring
53app.get('/health/circuits', (req, res) => {
54 res.json({
55 payment: {
56 state: paymentCircuit.getState(),
57 metrics: paymentCircuit.getMetrics(),
58 },
59 inventory: {
60 state: inventoryCircuit.getState(),
61 metrics: inventoryCircuit.getMetrics(),
62 },
63 });
64});Fallback Strategies#
1async function getProductWithFallback(id: string): Promise<Product> {
2 try {
3 return await productCircuit.execute(async () => {
4 return await productService.getProduct(id);
5 });
6 } catch (error) {
7 // Fallback strategies
8
9 // 1. Return cached data
10 const cached = await cache.get(`product:${id}`);
11 if (cached) return cached;
12
13 // 2. Return default/degraded response
14 return {
15 id,
16 name: 'Product Unavailable',
17 price: 0,
18 available: false,
19 };
20
21 // 3. Try alternative service
22 // return await backupProductService.getProduct(id);
23 }
24}Using Libraries#
1// Using opossum library
2import CircuitBreaker from 'opossum';
3
4const options = {
5 timeout: 3000,
6 errorThresholdPercentage: 50,
7 resetTimeout: 30000,
8};
9
10const breaker = new CircuitBreaker(asyncFunction, options);
11
12breaker.fallback(() => 'Fallback response');
13
14breaker.on('success', (result) => console.log('Success:', result));
15breaker.on('timeout', () => console.log('Timeout'));
16breaker.on('reject', () => console.log('Rejected'));
17breaker.on('open', () => console.log('Circuit opened'));
18breaker.on('halfOpen', () => console.log('Circuit half-open'));
19breaker.on('close', () => console.log('Circuit closed'));
20
21const result = await breaker.fire(params);Best Practices#
Configuration:
✓ Set appropriate thresholds for your SLAs
✓ Use volume threshold to avoid false positives
✓ Tune timeout based on recovery time
✓ Monitor state changes
Implementation:
✓ Apply per-service, not globally
✓ Implement meaningful fallbacks
✓ Log state transitions
✓ Expose health endpoints
Avoid:
✗ Opening circuit on all errors
✗ Too short timeout (thrashing)
✗ Ignoring circuit state in responses
✗ Missing fallback strategies
Conclusion#
Circuit breakers are essential for resilient distributed systems. They prevent cascading failures by failing fast and giving services time to recover.
Combine with retries, timeouts, and fallbacks for comprehensive fault tolerance.