Canary releases gradually roll out changes to a subset of users before full deployment.
Canary vs Blue-Green#
Blue-Green: 100% traffic switch
┌──────────┐ ┌──────────┐
│ v1.0.0 │ ──▶ │ v2.0.0 │
│ 100% │ │ 100% │
└──────────┘ └──────────┘
Canary: Gradual traffic shift
┌──────────┬──────────┐
│ v1.0.0 │ v2.0.0 │
│ 95% │ 5% │ ← Start
└──────────┴──────────┘
↓
┌──────────┬──────────┐
│ v1.0.0 │ v2.0.0 │
│ 50% │ 50% │ ← Progress
└──────────┴──────────┘
↓
┌──────────┬──────────┐
│ v1.0.0 │ v2.0.0 │
│ 0% │ 100% │ ← Complete
└──────────┴──────────┘
Kubernetes with Istio#
1# destination-rule.yaml
2apiVersion: networking.istio.io/v1beta1
3kind: DestinationRule
4metadata:
5 name: myapp
6spec:
7 host: myapp
8 subsets:
9 - name: stable
10 labels:
11 version: v1
12 - name: canary
13 labels:
14 version: v2
15---
16# virtual-service.yaml
17apiVersion: networking.istio.io/v1beta1
18kind: VirtualService
19metadata:
20 name: myapp
21spec:
22 hosts:
23 - myapp
24 http:
25 - match:
26 - headers:
27 x-canary:
28 exact: "true"
29 route:
30 - destination:
31 host: myapp
32 subset: canary
33 - route:
34 - destination:
35 host: myapp
36 subset: stable
37 weight: 95
38 - destination:
39 host: myapp
40 subset: canary
41 weight: 5Flagger Automated Canary#
1apiVersion: flagger.app/v1beta1
2kind: Canary
3metadata:
4 name: myapp
5spec:
6 targetRef:
7 apiVersion: apps/v1
8 kind: Deployment
9 name: myapp
10 service:
11 port: 80
12 analysis:
13 interval: 1m
14 threshold: 5
15 maxWeight: 50
16 stepWeight: 10
17 metrics:
18 - name: request-success-rate
19 thresholdRange:
20 min: 99
21 interval: 1m
22 - name: request-duration
23 thresholdRange:
24 max: 500
25 interval: 1m
26 webhooks:
27 - name: smoke-test
28 type: pre-rollout
29 url: http://flagger-loadtester/
30 timeout: 30s
31 metadata:
32 type: bash
33 cmd: "curl -s http://myapp-canary/health"Custom Canary Controller#
1interface CanaryConfig {
2 stages: number[]; // [5, 10, 25, 50, 100]
3 interval: number; // ms between stages
4 successThreshold: number;
5 metrics: MetricCheck[];
6}
7
8async function executeCanary(
9 deployment: string,
10 newVersion: string,
11 config: CanaryConfig
12) {
13 // Deploy canary with 0 traffic
14 await deployCanary(deployment, newVersion);
15
16 for (const weight of config.stages) {
17 console.log(`Setting canary weight to ${weight}%`);
18 await setCanaryWeight(deployment, weight);
19
20 // Wait for interval
21 await sleep(config.interval);
22
23 // Check metrics
24 const healthy = await checkMetrics(deployment, config.metrics);
25
26 if (!healthy) {
27 console.log('Canary failed metrics check, rolling back');
28 await rollbackCanary(deployment);
29 throw new Error('Canary deployment failed');
30 }
31
32 console.log(`Stage ${weight}% passed`);
33 }
34
35 // Promote canary to stable
36 await promoteCanary(deployment);
37 console.log('Canary promoted successfully');
38}
39
40async function checkMetrics(
41 deployment: string,
42 checks: MetricCheck[]
43): Promise<boolean> {
44 for (const check of checks) {
45 const value = await queryMetric(check.query);
46
47 if (check.type === 'min' && value < check.threshold) {
48 console.log(`Metric ${check.name} below threshold: ${value}`);
49 return false;
50 }
51
52 if (check.type === 'max' && value > check.threshold) {
53 console.log(`Metric ${check.name} above threshold: ${value}`);
54 return false;
55 }
56 }
57
58 return true;
59}User-Based Canary#
1// Route specific users to canary
2function shouldUseCanary(userId: string): boolean {
3 // Hash user ID for consistent assignment
4 const hash = hashCode(userId);
5 const percentage = Math.abs(hash) % 100;
6
7 return percentage < CANARY_PERCENTAGE;
8}
9
10// Express middleware
11app.use((req, res, next) => {
12 const userId = req.user?.id || req.sessionId;
13
14 if (shouldUseCanary(userId)) {
15 req.headers['x-canary'] = 'true';
16 }
17
18 next();
19});Feature Flag Integration#
1import { getFeatureFlags } from './feature-flags';
2
3async function routeRequest(req: Request): Promise<string> {
4 const flags = await getFeatureFlags(req.user);
5
6 // Route based on feature flags
7 if (flags.useNewCheckout) {
8 return 'checkout-v2';
9 }
10
11 // Route based on percentage
12 if (flags.canaryPercentage > 0) {
13 const hash = hashCode(req.user.id);
14 if (Math.abs(hash) % 100 < flags.canaryPercentage) {
15 return 'app-canary';
16 }
17 }
18
19 return 'app-stable';
20}Monitoring and Alerting#
1// Prometheus queries for canary health
2const CANARY_METRICS = {
3 errorRate: `
4 sum(rate(http_requests_total{status=~"5..", version="canary"}[5m]))
5 /
6 sum(rate(http_requests_total{version="canary"}[5m]))
7 `,
8 latencyP99: `
9 histogram_quantile(0.99,
10 sum(rate(http_request_duration_seconds_bucket{version="canary"}[5m]))
11 by (le)
12 )
13 `,
14 successRate: `
15 sum(rate(http_requests_total{status=~"2..", version="canary"}[5m]))
16 /
17 sum(rate(http_requests_total{version="canary"}[5m]))
18 `,
19};
20
21async function compareCanaryToStable(): Promise<ComparisonResult> {
22 const canaryError = await queryPrometheus(CANARY_METRICS.errorRate);
23 const stableError = await queryPrometheus(
24 CANARY_METRICS.errorRate.replace('canary', 'stable')
25 );
26
27 return {
28 canaryErrorRate: canaryError,
29 stableErrorRate: stableError,
30 degradation: canaryError - stableError,
31 shouldRollback: canaryError > stableError * 1.1, // 10% worse
32 };
33}Canary releases reduce risk by catching issues before they affect all users.