API Rate Limiting Implementation Strategies

Rate limiting protects your API from abuse and ensures fair usage. Here are proven algorithms and implementation patterns for production systems.

Why Rate Limit?#

Protection against:
- DDoS attacks
- Brute force attempts
- API abuse
- Resource exhaustion
- Unfair usage

Business benefits:
- Predictable costs
- SLA enforcement
- Monetization tiers
- Quality of service

Token Bucket Algorithm#

class TokenBucket {
  private tokens: number;
  private lastRefill: number;

  constructor(
    private capacity: number,
    private refillRate: number // tokens per second
  ) {
    this.tokens = capacity;
    this.lastRefill = Date.now();
  }

  consume(tokens: number = 1): boolean {
    this.refill();

    if (this.tokens >= tokens) {
      this.tokens -= tokens;
      return true;
    }

    return false;
  }

  private refill() {
    const now = Date.now();
    const elapsed = (now - this.lastRefill) / 1000;
    const newTokens = elapsed * this.refillRate;

    this.tokens = Math.min(this.capacity, this.tokens + newTokens);
    this.lastRefill = now;
  }

  getTokens(): number {
    this.refill();
    return this.tokens;
  }
}

// Usage
const bucket = new TokenBucket(100, 10); // 100 capacity, 10/sec refill

if (bucket.consume()) {
  // Process request
} else {
  // Rate limited
}

Sliding Window Algorithm#

class SlidingWindowRateLimiter {
  private requests: Map<string, number[]> = new Map();

  constructor(
    private windowMs: number,
    private maxRequests: number
  ) {}

  isAllowed(key: string): boolean {
    const now = Date.now();
    const windowStart = now - this.windowMs;

    // Get existing requests for this key
    let timestamps = this.requests.get(key) || [];

    // Remove expired timestamps
    timestamps = timestamps.filter((ts) => ts > windowStart);

    if (timestamps.length >= this.maxRequests) {
      return false;
    }

    // Add current request
    timestamps.push(now);
    this.requests.set(key, timestamps);

    return true;
  }

  getRemainingRequests(key: string): number {
    const now = Date.now();
    const windowStart = now - this.windowMs;
    const timestamps = this.requests.get(key) || [];
    const validRequests = timestamps.filter((ts) => ts > windowStart);

    return Math.max(0, this.maxRequests - validRequests.length);
  }
}

// 100 requests per minute
const limiter = new SlidingWindowRateLimiter(60000, 100);

Redis-Based Distributed Rate Limiting#

import Redis from 'ioredis';

class RedisRateLimiter {
  constructor(
    private redis: Redis,
    private windowMs: number,
    private maxRequests: number
  ) {}

  async isAllowed(key: string): Promise<{
    allowed: boolean;
    remaining: number;
    resetAt: number;
  }> {
    const now = Date.now();
    const windowKey = `ratelimit:${key}:${Math.floor(now / this.windowMs)}`;

    const multi = this.redis.multi();
    multi.incr(windowKey);
    multi.pexpire(windowKey, this.windowMs);

    const results = await multi.exec();
    const count = results![0][1] as number;

    const resetAt = (Math.floor(now / this.windowMs) + 1) * this.windowMs;

    return {
      allowed: count <= this.maxRequests,
      remaining: Math.max(0, this.maxRequests - count),
      resetAt,
    };
  }
}

// Sliding window with Redis
class RedisSlidingWindowLimiter {
  constructor(
    private redis: Redis,
    private windowMs: number,
    private maxRequests: number
  ) {}

  async isAllowed(key: string): Promise<boolean> {
    const now = Date.now();
    const windowStart = now - this.windowMs;
    const redisKey = `ratelimit:sliding:${key}`;

    // Lua script for atomic operation
    const script = `
      local key = KEYS[1]
      local now = tonumber(ARGV[1])
      local window_start = tonumber(ARGV[2])
      local max_requests = tonumber(ARGV[3])
      local window_ms = tonumber(ARGV[4])

      -- Remove old entries
      redis.call('ZREMRANGEBYSCORE', key, '-inf', window_start)

      -- Count current entries
      local count = redis.call('ZCARD', key)

      if count < max_requests then
        -- Add new entry
        redis.call('ZADD', key, now, now .. '-' .. math.random())
        redis.call('PEXPIRE', key, window_ms)
        return 1
      else
        return 0
      end
    `;

    const result = await this.redis.eval(
      script,
      1,
      redisKey,
      now,
      windowStart,
      this.maxRequests,
      this.windowMs
    );

    return result === 1;
  }
}

Express Middleware#

import rateLimit from 'express-rate-limit';
import RedisStore from 'rate-limit-redis';

// Basic in-memory rate limiting
const basicLimiter = rateLimit({
  windowMs: 15 * 60 * 1000, // 15 minutes
  max: 100, // 100 requests per window
  message: {
    error: 'Too many requests',
    retryAfter: 900,
  },
  standardHeaders: true, // Return rate limit info in headers
  legacyHeaders: false,
});

// Redis-backed for distributed systems
const redisLimiter = rateLimit({
  windowMs: 15 * 60 * 1000,
  max: 100,
  store: new RedisStore({
    sendCommand: (...args: string[]) => redis.call(...args),
  }),
});

// Different limits for different routes
const authLimiter = rateLimit({
  windowMs: 60 * 60 * 1000, // 1 hour
  max: 5, // 5 login attempts per hour
  skipSuccessfulRequests: true,
});

// Apply to routes
app.use('/api/', basicLimiter);
app.use('/api/auth/login', authLimiter);

Response Headers#

function addRateLimitHeaders(
  res: Response,
  limit: number,
  remaining: number,
  resetAt: number
) {
  res.setHeader('X-RateLimit-Limit', limit);
  res.setHeader('X-RateLimit-Remaining', remaining);
  res.setHeader('X-RateLimit-Reset', Math.ceil(resetAt / 1000));

  // Standard headers (RFC 6585)
  res.setHeader('RateLimit-Limit', limit);
  res.setHeader('RateLimit-Remaining', remaining);
  res.setHeader('RateLimit-Reset', Math.ceil(resetAt / 1000));
}

// When rate limited
function sendRateLimitResponse(res: Response, resetAt: number) {
  res.status(429);
  res.setHeader('Retry-After', Math.ceil((resetAt - Date.now()) / 1000));
  res.json({
    error: 'Too Many Requests',
    message: 'Rate limit exceeded. Please try again later.',
    retryAfter: Math.ceil((resetAt - Date.now()) / 1000),
  });
}

Tiered Rate Limiting#

interface RateLimitTier {
  requestsPerMinute: number;
  requestsPerDay: number;
  burstLimit: number;
}

const tiers: Record<string, RateLimitTier> = {
  free: {
    requestsPerMinute: 10,
    requestsPerDay: 1000,
    burstLimit: 20,
  },
  pro: {
    requestsPerMinute: 100,
    requestsPerDay: 50000,
    burstLimit: 200,
  },
  enterprise: {
    requestsPerMinute: 1000,
    requestsPerDay: 1000000,
    burstLimit: 2000,
  },
};

class TieredRateLimiter {
  async checkLimit(userId: string, tier: string): Promise<boolean> {
    const limits = tiers[tier] || tiers.free;

    // Check minute limit
    const minuteKey = `ratelimit:minute:${userId}`;
    const minuteCount = await this.redis.incr(minuteKey);
    if (minuteCount === 1) {
      await this.redis.expire(minuteKey, 60);
    }

    if (minuteCount > limits.requestsPerMinute) {
      return false;
    }

    // Check daily limit
    const dayKey = `ratelimit:day:${userId}:${this.getDay()}`;
    const dayCount = await this.redis.incr(dayKey);
    if (dayCount === 1) {
      await this.redis.expire(dayKey, 86400);
    }

    if (dayCount > limits.requestsPerDay) {
      return false;
    }

    return true;
  }

  private getDay(): string {
    return new Date().toISOString().split('T')[0];
  }
}

Best Practices#

DO:
✓ Use distributed storage for multi-instance
✓ Return informative headers
✓ Implement graceful degradation
✓ Different limits for different endpoints
✓ Consider user tiers
✓ Log rate limit events

DON'T:
✗ Rate limit health checks
✗ Use only IP-based limiting
✗ Set limits too low for normal usage
✗ Forget to handle edge cases
✗ Ignore legitimate high-volume users

Conclusion#

Rate limiting is essential API protection. Choose the right algorithm for your use case—token bucket for bursts, sliding window for smooth limits—and use distributed storage for scaled systems.

Always communicate limits clearly through headers and error messages.

Share this article