Application Monitoring

Patterns for monitoring, logging, and observability in production.

Overview#

Monitoring helps you understand application health and debug issues. This pattern covers:

  • OpenTelemetry setup
  • Custom metrics with Prometheus
  • Error tracking with Sentry
  • Health check endpoints
  • Performance monitoring

Prerequisites#

npm install @opentelemetry/api @opentelemetry/sdk-node @sentry/nextjs prom-client

Code Example#

OpenTelemetry Setup#

1// instrumentation.ts (Next.js) 2import { NodeSDK } from '@opentelemetry/sdk-node' 3import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http' 4import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node' 5import { Resource } from '@opentelemetry/resources' 6import { 7 SEMRESATTRS_SERVICE_NAME, 8 SEMRESATTRS_SERVICE_VERSION 9} from '@opentelemetry/semantic-conventions' 10 11export function register() { 12 if (process.env.NEXT_RUNTIME === 'nodejs') { 13 const sdk = new NodeSDK({ 14 resource: new Resource({ 15 [SEMRESATTRS_SERVICE_NAME]: 'my-nextjs-app', 16 [SEMRESATTRS_SERVICE_VERSION]: process.env.npm_package_version ?? '1.0.0' 17 }), 18 traceExporter: new OTLPTraceExporter({ 19 url: process.env.OTEL_EXPORTER_OTLP_ENDPOINT 20 }), 21 instrumentations: [getNodeAutoInstrumentations()] 22 }) 23 24 sdk.start() 25 } 26}

Custom Metrics with Prometheus#

1// lib/metrics.ts 2import { Counter, Histogram, Registry, collectDefaultMetrics } from 'prom-client' 3 4const register = new Registry() 5 6// Collect default Node.js metrics 7collectDefaultMetrics({ register }) 8 9// HTTP request metrics 10export const httpRequestsTotal = new Counter({ 11 name: 'http_requests_total', 12 help: 'Total number of HTTP requests', 13 labelNames: ['method', 'path', 'status'], 14 registers: [register] 15}) 16 17export const httpRequestDuration = new Histogram({ 18 name: 'http_request_duration_seconds', 19 help: 'HTTP request duration in seconds', 20 labelNames: ['method', 'path'], 21 buckets: [0.01, 0.05, 0.1, 0.5, 1, 2, 5], 22 registers: [register] 23}) 24 25// Business metrics 26export const userSignups = new Counter({ 27 name: 'user_signups_total', 28 help: 'Total number of user signups', 29 labelNames: ['plan'], 30 registers: [register] 31}) 32 33export const apiCalls = new Counter({ 34 name: 'api_calls_total', 35 help: 'Total API calls', 36 labelNames: ['endpoint', 'status'], 37 registers: [register] 38}) 39 40export { register } 41 42// API endpoint for Prometheus scraping 43// app/api/metrics/route.ts 44import { NextResponse } from 'next/server' 45import { register } from '@/lib/metrics' 46 47export async function GET() { 48 const metrics = await register.metrics() 49 return new NextResponse(metrics, { 50 headers: { 51 'Content-Type': register.contentType 52 } 53 }) 54}

Error Tracking with Sentry#

1// sentry.client.config.ts 2import * as Sentry from '@sentry/nextjs' 3 4Sentry.init({ 5 dsn: process.env.NEXT_PUBLIC_SENTRY_DSN, 6 environment: process.env.NODE_ENV, 7 tracesSampleRate: 1.0, 8 replaysSessionSampleRate: 0.1, 9 replaysOnErrorSampleRate: 1.0, 10 integrations: [ 11 Sentry.replayIntegration({ 12 maskAllText: true, 13 blockAllMedia: true 14 }) 15 ] 16}) 17 18// sentry.server.config.ts 19import * as Sentry from '@sentry/nextjs' 20 21Sentry.init({ 22 dsn: process.env.SENTRY_DSN, 23 environment: process.env.NODE_ENV, 24 tracesSampleRate: 1.0 25}) 26 27// lib/error-tracking.ts 28import * as Sentry from '@sentry/nextjs' 29 30export function captureError(error: Error, context?: Record<string, any>) { 31 Sentry.withScope(scope => { 32 if (context) { 33 scope.setExtras(context) 34 } 35 Sentry.captureException(error) 36 }) 37} 38 39export function setUserContext(user: { id: string; email: string }) { 40 Sentry.setUser({ id: user.id, email: user.email }) 41} 42 43export function addBreadcrumb(message: string, data?: Record<string, any>) { 44 Sentry.addBreadcrumb({ 45 message, 46 data, 47 level: 'info' 48 }) 49}

Health Check Endpoints#

1// app/api/health/route.ts 2import { NextResponse } from 'next/server' 3import { prisma } from '@/lib/db' 4import { redis } from '@/lib/redis' 5 6interface HealthCheck { 7 status: 'healthy' | 'unhealthy' 8 checks: { 9 [key: string]: { 10 status: 'pass' | 'fail' 11 responseTime?: number 12 error?: string 13 } 14 } 15 timestamp: string 16 version: string 17} 18 19export async function GET() { 20 const checks: HealthCheck['checks'] = {} 21 22 // Database check 23 const dbStart = Date.now() 24 try { 25 await prisma.$queryRaw`SELECT 1` 26 checks.database = { 27 status: 'pass', 28 responseTime: Date.now() - dbStart 29 } 30 } catch (error) { 31 checks.database = { 32 status: 'fail', 33 error: error instanceof Error ? error.message : 'Unknown error' 34 } 35 } 36 37 // Redis check 38 const redisStart = Date.now() 39 try { 40 await redis.ping() 41 checks.redis = { 42 status: 'pass', 43 responseTime: Date.now() - redisStart 44 } 45 } catch (error) { 46 checks.redis = { 47 status: 'fail', 48 error: error instanceof Error ? error.message : 'Unknown error' 49 } 50 } 51 52 const allHealthy = Object.values(checks).every(c => c.status === 'pass') 53 54 const response: HealthCheck = { 55 status: allHealthy ? 'healthy' : 'unhealthy', 56 checks, 57 timestamp: new Date().toISOString(), 58 version: process.env.npm_package_version ?? '1.0.0' 59 } 60 61 return NextResponse.json(response, { 62 status: allHealthy ? 200 : 503 63 }) 64} 65 66// Kubernetes probes 67// app/api/health/ready/route.ts 68export async function GET() { 69 // Check if app is ready to receive traffic 70 return NextResponse.json({ ready: true }) 71} 72 73// app/api/health/live/route.ts 74export async function GET() { 75 // Check if app is alive (not deadlocked) 76 return NextResponse.json({ alive: true }) 77}

Request Tracing Middleware#

1// middleware.ts 2import { NextRequest, NextResponse } from 'next/server' 3import { v4 as uuidv4 } from 'uuid' 4 5export function middleware(request: NextRequest) { 6 const requestId = uuidv4() 7 const start = Date.now() 8 9 // Add request ID header 10 const response = NextResponse.next() 11 response.headers.set('X-Request-ID', requestId) 12 13 // Log request 14 console.log(JSON.stringify({ 15 type: 'request', 16 requestId, 17 method: request.method, 18 path: request.nextUrl.pathname, 19 timestamp: new Date().toISOString() 20 })) 21 22 return response 23}

Performance Monitoring#

1// lib/performance.ts 2import { trace, SpanStatusCode } from '@opentelemetry/api' 3 4const tracer = trace.getTracer('my-app') 5 6export async function withSpan<T>( 7 name: string, 8 fn: () => Promise<T>, 9 attributes?: Record<string, string | number> 10): Promise<T> { 11 return tracer.startActiveSpan(name, async span => { 12 if (attributes) { 13 span.setAttributes(attributes) 14 } 15 16 try { 17 const result = await fn() 18 span.setStatus({ code: SpanStatusCode.OK }) 19 return result 20 } catch (error) { 21 span.setStatus({ 22 code: SpanStatusCode.ERROR, 23 message: error instanceof Error ? error.message : 'Unknown error' 24 }) 25 span.recordException(error as Error) 26 throw error 27 } finally { 28 span.end() 29 } 30 }) 31} 32 33// Usage 34const users = await withSpan( 35 'fetch-users', 36 () => prisma.user.findMany(), 37 { 'db.operation': 'findMany', 'db.table': 'users' } 38)

Structured Logging#

1// lib/logger.ts 2type LogLevel = 'debug' | 'info' | 'warn' | 'error' 3 4interface LogEntry { 5 level: LogLevel 6 message: string 7 timestamp: string 8 [key: string]: any 9} 10 11function log(level: LogLevel, message: string, data?: Record<string, any>) { 12 const entry: LogEntry = { 13 level, 14 message, 15 timestamp: new Date().toISOString(), 16 ...data 17 } 18 19 // In production, send to log aggregator 20 if (process.env.NODE_ENV === 'production') { 21 console.log(JSON.stringify(entry)) 22 } else { 23 console[level](message, data) 24 } 25} 26 27export const logger = { 28 debug: (msg: string, data?: Record<string, any>) => log('debug', msg, data), 29 info: (msg: string, data?: Record<string, any>) => log('info', msg, data), 30 warn: (msg: string, data?: Record<string, any>) => log('warn', msg, data), 31 error: (msg: string, data?: Record<string, any>) => log('error', msg, data) 32} 33 34// Usage 35logger.info('User signed up', { userId: user.id, plan: 'pro' }) 36logger.error('Payment failed', { userId: user.id, error: err.message })

Dashboard Configuration#

1# docker-compose.monitoring.yml 2version: '3.8' 3services: 4 prometheus: 5 image: prom/prometheus:latest 6 ports: 7 - '9090:9090' 8 volumes: 9 - ./prometheus.yml:/etc/prometheus/prometheus.yml 10 - prometheus_data:/prometheus 11 12 grafana: 13 image: grafana/grafana:latest 14 ports: 15 - '3001:3000' 16 volumes: 17 - grafana_data:/var/lib/grafana 18 environment: 19 - GF_SECURITY_ADMIN_PASSWORD=admin 20 21 jaeger: 22 image: jaegertracing/all-in-one:latest 23 ports: 24 - '16686:16686' 25 - '4317:4317' 26 - '4318:4318' 27 28volumes: 29 prometheus_data: 30 grafana_data:
1# prometheus.yml 2global: 3 scrape_interval: 15s 4 5scrape_configs: 6 - job_name: 'nextjs-app' 7 static_configs: 8 - targets: ['host.docker.internal:3000'] 9 metrics_path: '/api/metrics'

Usage Instructions#

  1. Set up OpenTelemetry for distributed tracing
  2. Add Prometheus metrics for custom monitoring
  3. Configure Sentry for error tracking
  4. Create health check endpoints
  5. Implement structured logging

Best Practices#

  • Correlation IDs - Track requests across services
  • Structured logs - Use JSON for easy parsing
  • Meaningful metrics - Focus on business-relevant data
  • Alert thresholds - Set alerts before users notice
  • Sampling - Sample high-volume traces in production
  • Retention - Define log retention policies
  • Dashboards - Create actionable dashboards