Application Monitoring
Patterns for monitoring, logging, and observability in production.
Overview#
Monitoring helps you understand application health and debug issues. This pattern covers:
- OpenTelemetry setup
- Custom metrics with Prometheus
- Error tracking with Sentry
- Health check endpoints
- Performance monitoring
Prerequisites#
npm install @opentelemetry/api @opentelemetry/sdk-node @sentry/nextjs prom-clientCode Example#
OpenTelemetry Setup#
1// instrumentation.ts (Next.js)
2import { NodeSDK } from '@opentelemetry/sdk-node'
3import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http'
4import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node'
5import { Resource } from '@opentelemetry/resources'
6import {
7 SEMRESATTRS_SERVICE_NAME,
8 SEMRESATTRS_SERVICE_VERSION
9} from '@opentelemetry/semantic-conventions'
10
11export function register() {
12 if (process.env.NEXT_RUNTIME === 'nodejs') {
13 const sdk = new NodeSDK({
14 resource: new Resource({
15 [SEMRESATTRS_SERVICE_NAME]: 'my-nextjs-app',
16 [SEMRESATTRS_SERVICE_VERSION]: process.env.npm_package_version ?? '1.0.0'
17 }),
18 traceExporter: new OTLPTraceExporter({
19 url: process.env.OTEL_EXPORTER_OTLP_ENDPOINT
20 }),
21 instrumentations: [getNodeAutoInstrumentations()]
22 })
23
24 sdk.start()
25 }
26}Custom Metrics with Prometheus#
1// lib/metrics.ts
2import { Counter, Histogram, Registry, collectDefaultMetrics } from 'prom-client'
3
4const register = new Registry()
5
6// Collect default Node.js metrics
7collectDefaultMetrics({ register })
8
9// HTTP request metrics
10export const httpRequestsTotal = new Counter({
11 name: 'http_requests_total',
12 help: 'Total number of HTTP requests',
13 labelNames: ['method', 'path', 'status'],
14 registers: [register]
15})
16
17export const httpRequestDuration = new Histogram({
18 name: 'http_request_duration_seconds',
19 help: 'HTTP request duration in seconds',
20 labelNames: ['method', 'path'],
21 buckets: [0.01, 0.05, 0.1, 0.5, 1, 2, 5],
22 registers: [register]
23})
24
25// Business metrics
26export const userSignups = new Counter({
27 name: 'user_signups_total',
28 help: 'Total number of user signups',
29 labelNames: ['plan'],
30 registers: [register]
31})
32
33export const apiCalls = new Counter({
34 name: 'api_calls_total',
35 help: 'Total API calls',
36 labelNames: ['endpoint', 'status'],
37 registers: [register]
38})
39
40export { register }
41
42// API endpoint for Prometheus scraping
43// app/api/metrics/route.ts
44import { NextResponse } from 'next/server'
45import { register } from '@/lib/metrics'
46
47export async function GET() {
48 const metrics = await register.metrics()
49 return new NextResponse(metrics, {
50 headers: {
51 'Content-Type': register.contentType
52 }
53 })
54}Error Tracking with Sentry#
1// sentry.client.config.ts
2import * as Sentry from '@sentry/nextjs'
3
4Sentry.init({
5 dsn: process.env.NEXT_PUBLIC_SENTRY_DSN,
6 environment: process.env.NODE_ENV,
7 tracesSampleRate: 1.0,
8 replaysSessionSampleRate: 0.1,
9 replaysOnErrorSampleRate: 1.0,
10 integrations: [
11 Sentry.replayIntegration({
12 maskAllText: true,
13 blockAllMedia: true
14 })
15 ]
16})
17
18// sentry.server.config.ts
19import * as Sentry from '@sentry/nextjs'
20
21Sentry.init({
22 dsn: process.env.SENTRY_DSN,
23 environment: process.env.NODE_ENV,
24 tracesSampleRate: 1.0
25})
26
27// lib/error-tracking.ts
28import * as Sentry from '@sentry/nextjs'
29
30export function captureError(error: Error, context?: Record<string, any>) {
31 Sentry.withScope(scope => {
32 if (context) {
33 scope.setExtras(context)
34 }
35 Sentry.captureException(error)
36 })
37}
38
39export function setUserContext(user: { id: string; email: string }) {
40 Sentry.setUser({ id: user.id, email: user.email })
41}
42
43export function addBreadcrumb(message: string, data?: Record<string, any>) {
44 Sentry.addBreadcrumb({
45 message,
46 data,
47 level: 'info'
48 })
49}Health Check Endpoints#
1// app/api/health/route.ts
2import { NextResponse } from 'next/server'
3import { prisma } from '@/lib/db'
4import { redis } from '@/lib/redis'
5
6interface HealthCheck {
7 status: 'healthy' | 'unhealthy'
8 checks: {
9 [key: string]: {
10 status: 'pass' | 'fail'
11 responseTime?: number
12 error?: string
13 }
14 }
15 timestamp: string
16 version: string
17}
18
19export async function GET() {
20 const checks: HealthCheck['checks'] = {}
21
22 // Database check
23 const dbStart = Date.now()
24 try {
25 await prisma.$queryRaw`SELECT 1`
26 checks.database = {
27 status: 'pass',
28 responseTime: Date.now() - dbStart
29 }
30 } catch (error) {
31 checks.database = {
32 status: 'fail',
33 error: error instanceof Error ? error.message : 'Unknown error'
34 }
35 }
36
37 // Redis check
38 const redisStart = Date.now()
39 try {
40 await redis.ping()
41 checks.redis = {
42 status: 'pass',
43 responseTime: Date.now() - redisStart
44 }
45 } catch (error) {
46 checks.redis = {
47 status: 'fail',
48 error: error instanceof Error ? error.message : 'Unknown error'
49 }
50 }
51
52 const allHealthy = Object.values(checks).every(c => c.status === 'pass')
53
54 const response: HealthCheck = {
55 status: allHealthy ? 'healthy' : 'unhealthy',
56 checks,
57 timestamp: new Date().toISOString(),
58 version: process.env.npm_package_version ?? '1.0.0'
59 }
60
61 return NextResponse.json(response, {
62 status: allHealthy ? 200 : 503
63 })
64}
65
66// Kubernetes probes
67// app/api/health/ready/route.ts
68export async function GET() {
69 // Check if app is ready to receive traffic
70 return NextResponse.json({ ready: true })
71}
72
73// app/api/health/live/route.ts
74export async function GET() {
75 // Check if app is alive (not deadlocked)
76 return NextResponse.json({ alive: true })
77}Request Tracing Middleware#
1// middleware.ts
2import { NextRequest, NextResponse } from 'next/server'
3import { v4 as uuidv4 } from 'uuid'
4
5export function middleware(request: NextRequest) {
6 const requestId = uuidv4()
7 const start = Date.now()
8
9 // Add request ID header
10 const response = NextResponse.next()
11 response.headers.set('X-Request-ID', requestId)
12
13 // Log request
14 console.log(JSON.stringify({
15 type: 'request',
16 requestId,
17 method: request.method,
18 path: request.nextUrl.pathname,
19 timestamp: new Date().toISOString()
20 }))
21
22 return response
23}Performance Monitoring#
1// lib/performance.ts
2import { trace, SpanStatusCode } from '@opentelemetry/api'
3
4const tracer = trace.getTracer('my-app')
5
6export async function withSpan<T>(
7 name: string,
8 fn: () => Promise<T>,
9 attributes?: Record<string, string | number>
10): Promise<T> {
11 return tracer.startActiveSpan(name, async span => {
12 if (attributes) {
13 span.setAttributes(attributes)
14 }
15
16 try {
17 const result = await fn()
18 span.setStatus({ code: SpanStatusCode.OK })
19 return result
20 } catch (error) {
21 span.setStatus({
22 code: SpanStatusCode.ERROR,
23 message: error instanceof Error ? error.message : 'Unknown error'
24 })
25 span.recordException(error as Error)
26 throw error
27 } finally {
28 span.end()
29 }
30 })
31}
32
33// Usage
34const users = await withSpan(
35 'fetch-users',
36 () => prisma.user.findMany(),
37 { 'db.operation': 'findMany', 'db.table': 'users' }
38)Structured Logging#
1// lib/logger.ts
2type LogLevel = 'debug' | 'info' | 'warn' | 'error'
3
4interface LogEntry {
5 level: LogLevel
6 message: string
7 timestamp: string
8 [key: string]: any
9}
10
11function log(level: LogLevel, message: string, data?: Record<string, any>) {
12 const entry: LogEntry = {
13 level,
14 message,
15 timestamp: new Date().toISOString(),
16 ...data
17 }
18
19 // In production, send to log aggregator
20 if (process.env.NODE_ENV === 'production') {
21 console.log(JSON.stringify(entry))
22 } else {
23 console[level](message, data)
24 }
25}
26
27export const logger = {
28 debug: (msg: string, data?: Record<string, any>) => log('debug', msg, data),
29 info: (msg: string, data?: Record<string, any>) => log('info', msg, data),
30 warn: (msg: string, data?: Record<string, any>) => log('warn', msg, data),
31 error: (msg: string, data?: Record<string, any>) => log('error', msg, data)
32}
33
34// Usage
35logger.info('User signed up', { userId: user.id, plan: 'pro' })
36logger.error('Payment failed', { userId: user.id, error: err.message })Dashboard Configuration#
1# docker-compose.monitoring.yml
2version: '3.8'
3services:
4 prometheus:
5 image: prom/prometheus:latest
6 ports:
7 - '9090:9090'
8 volumes:
9 - ./prometheus.yml:/etc/prometheus/prometheus.yml
10 - prometheus_data:/prometheus
11
12 grafana:
13 image: grafana/grafana:latest
14 ports:
15 - '3001:3000'
16 volumes:
17 - grafana_data:/var/lib/grafana
18 environment:
19 - GF_SECURITY_ADMIN_PASSWORD=admin
20
21 jaeger:
22 image: jaegertracing/all-in-one:latest
23 ports:
24 - '16686:16686'
25 - '4317:4317'
26 - '4318:4318'
27
28volumes:
29 prometheus_data:
30 grafana_data:1# prometheus.yml
2global:
3 scrape_interval: 15s
4
5scrape_configs:
6 - job_name: 'nextjs-app'
7 static_configs:
8 - targets: ['host.docker.internal:3000']
9 metrics_path: '/api/metrics'Usage Instructions#
- Set up OpenTelemetry for distributed tracing
- Add Prometheus metrics for custom monitoring
- Configure Sentry for error tracking
- Create health check endpoints
- Implement structured logging
Best Practices#
- Correlation IDs - Track requests across services
- Structured logs - Use JSON for easy parsing
- Meaningful metrics - Focus on business-relevant data
- Alert thresholds - Set alerts before users notice
- Sampling - Sample high-volume traces in production
- Retention - Define log retention policies
- Dashboards - Create actionable dashboards
Related Patterns#
- Audit Logging - Track user actions
- CI/CD - Deployment automation
- Profiling - Performance analysis
- Error Handling - Error responses