Rate Limiting and Throttling: Protecting Your APIs

Rate limiting protects your APIs from abuse, ensures fair resource distribution, and maintains service stability. Here's how to implement effective rate limiting strategies.

Why Rate Limit?#

Without rate limiting:
- Single client can overwhelm your service
- No protection against DoS attacks
- Unfair resource distribution
- Unpredictable costs

With rate limiting:
- Guaranteed service availability
- Protection from abuse
- Fair usage across clients
- Predictable scaling

Rate Limiting Algorithms#

Fixed Window#

class FixedWindowLimiter {
  private counts = new Map<string, { count: number; windowStart: number }>();

  constructor(
    private maxRequests: number,
    private windowMs: number,
  ) {}

  isAllowed(key: string): boolean {
    const now = Date.now();
    const windowStart = Math.floor(now / this.windowMs) * this.windowMs;

    const record = this.counts.get(key);

    if (!record || record.windowStart !== windowStart) {
      this.counts.set(key, { count: 1, windowStart });
      return true;
    }

    if (record.count >= this.maxRequests) {
      return false;
    }

    record.count++;
    return true;
  }
}

// 100 requests per minute
const limiter = new FixedWindowLimiter(100, 60000);

Sliding Window Log#

class SlidingWindowLogLimiter {
  private logs = new Map<string, number[]>();

  constructor(
    private maxRequests: number,
    private windowMs: number,
  ) {}

  isAllowed(key: string): boolean {
    const now = Date.now();
    const windowStart = now - this.windowMs;

    let timestamps = this.logs.get(key) || [];

    // Remove old entries
    timestamps = timestamps.filter(t => t > windowStart);

    if (timestamps.length >= this.maxRequests) {
      this.logs.set(key, timestamps);
      return false;
    }

    timestamps.push(now);
    this.logs.set(key, timestamps);
    return true;
  }
}

Sliding Window Counter#

class SlidingWindowCounterLimiter {
  private windows = new Map<string, { current: number; previous: number; currentStart: number }>();

  constructor(
    private maxRequests: number,
    private windowMs: number,
  ) {}

  isAllowed(key: string): boolean {
    const now = Date.now();
    const currentWindow = Math.floor(now / this.windowMs);
    const currentStart = currentWindow * this.windowMs;

    let record = this.windows.get(key);

    if (!record || record.currentStart < currentStart - this.windowMs) {
      record = { current: 0, previous: 0, currentStart };
    } else if (record.currentStart < currentStart) {
      record = { current: 0, previous: record.current, currentStart };
    }

    // Calculate weighted count
    const elapsedRatio = (now - currentStart) / this.windowMs;
    const weightedCount = record.current + record.previous * (1 - elapsedRatio);

    if (weightedCount >= this.maxRequests) {
      this.windows.set(key, record);
      return false;
    }

    record.current++;
    this.windows.set(key, record);
    return true;
  }
}

Token Bucket#

class TokenBucketLimiter {
  private buckets = new Map<string, { tokens: number; lastRefill: number }>();

  constructor(
    private bucketSize: number,      // Max tokens
    private refillRate: number,       // Tokens per second
  ) {}

  isAllowed(key: string, tokens: number = 1): boolean {
    const now = Date.now();
    let bucket = this.buckets.get(key);

    if (!bucket) {
      bucket = { tokens: this.bucketSize, lastRefill: now };
    }

    // Refill tokens
    const elapsed = (now - bucket.lastRefill) / 1000;
    bucket.tokens = Math.min(
      this.bucketSize,
      bucket.tokens + elapsed * this.refillRate,
    );
    bucket.lastRefill = now;

    if (bucket.tokens < tokens) {
      this.buckets.set(key, bucket);
      return false;
    }

    bucket.tokens -= tokens;
    this.buckets.set(key, bucket);
    return true;
  }
}

// 100 tokens max, refill 10 per second
const limiter = new TokenBucketLimiter(100, 10);

Leaky Bucket#

class LeakyBucketLimiter {
  private queues = new Map<string, { queue: Array<() => void>; processing: boolean }>();

  constructor(
    private bucketSize: number,
    private leakRateMs: number,  // Time between leaks
  ) {}

  async process<T>(key: string, task: () => Promise<T>): Promise<T> {
    return new Promise((resolve, reject) => {
      let bucket = this.queues.get(key);

      if (!bucket) {
        bucket = { queue: [], processing: false };
        this.queues.set(key, bucket);
      }

      if (bucket.queue.length >= this.bucketSize) {
        reject(new Error('Rate limit exceeded'));
        return;
      }

      bucket.queue.push(async () => {
        try {
          resolve(await task());
        } catch (e) {
          reject(e);
        }
      });

      this.startProcessing(key);
    });
  }

  private async startProcessing(key: string): Promise<void> {
    const bucket = this.queues.get(key)!;

    if (bucket.processing) return;
    bucket.processing = true;

    while (bucket.queue.length > 0) {
      const task = bucket.queue.shift()!;
      await task();
      await new Promise(r => setTimeout(r, this.leakRateMs));
    }

    bucket.processing = false;
  }
}

Distributed Rate Limiting#

Redis Implementation#

import Redis from 'ioredis';

class RedisRateLimiter {
  constructor(
    private redis: Redis,
    private maxRequests: number,
    private windowMs: number,
  ) {}

  async isAllowed(key: string): Promise<{ allowed: boolean; remaining: number; resetAt: number }> {
    const now = Date.now();
    const windowKey = `ratelimit:${key}:${Math.floor(now / this.windowMs)}`;

    const multi = this.redis.multi();
    multi.incr(windowKey);
    multi.pttl(windowKey);

    const [[, count], [, ttl]] = await multi.exec() as [[null, number], [null, number]];

    if (ttl === -1) {
      await this.redis.pexpire(windowKey, this.windowMs);
    }

    const resetAt = Math.ceil(now / this.windowMs) * this.windowMs;
    const remaining = Math.max(0, this.maxRequests - count);

    return {
      allowed: count <= this.maxRequests,
      remaining,
      resetAt,
    };
  }
}

Lua Script for Atomicity#

class AtomicRedisLimiter {
  private script = `
    local key = KEYS[1]
    local limit = tonumber(ARGV[1])
    local window = tonumber(ARGV[2])

    local current = redis.call('INCR', key)

    if current == 1 then
      redis.call('PEXPIRE', key, window)
    end

    if current > limit then
      return {0, limit - current, redis.call('PTTL', key)}
    end

    return {1, limit - current, redis.call('PTTL', key)}
  `;

  async isAllowed(key: string): Promise<RateLimitResult> {
    const [allowed, remaining, ttl] = await this.redis.eval(
      this.script,
      1,
      `ratelimit:${key}`,
      this.maxRequests,
      this.windowMs,
    ) as [number, number, number];

    return {
      allowed: allowed === 1,
      remaining,
      resetAt: Date.now() + ttl,
    };
  }
}

Express Middleware#

import rateLimit from 'express-rate-limit';
import RedisStore from 'rate-limit-redis';

// Basic rate limiting
const limiter = rateLimit({
  windowMs: 15 * 60 * 1000, // 15 minutes
  max: 100,
  message: { error: 'Too many requests, please try again later' },
  standardHeaders: true,
  legacyHeaders: false,
});

app.use('/api', limiter);

// Redis store for distributed systems
const distributedLimiter = rateLimit({
  windowMs: 15 * 60 * 1000,
  max: 100,
  store: new RedisStore({
    sendCommand: (...args: string[]) => redis.call(...args),
  }),
});

// Different limits per route
const authLimiter = rateLimit({
  windowMs: 60 * 60 * 1000, // 1 hour
  max: 5,
  message: { error: 'Too many login attempts' },
});

app.use('/api/auth/login', authLimiter);

Response Headers#

function rateLimitMiddleware(req: Request, res: Response, next: NextFunction) {
  const key = req.ip;
  const result = limiter.check(key);

  // Standard headers
  res.setHeader('RateLimit-Limit', result.limit);
  res.setHeader('RateLimit-Remaining', result.remaining);
  res.setHeader('RateLimit-Reset', Math.ceil(result.resetAt / 1000));

  // Retry-After for 429 responses
  if (!result.allowed) {
    const retryAfter = Math.ceil((result.resetAt - Date.now()) / 1000);
    res.setHeader('Retry-After', retryAfter);
    return res.status(429).json({
      error: 'Too many requests',
      retryAfter,
    });
  }

  next();
}

Advanced Patterns#

Tiered Rate Limits#

const tierLimits = {
  free: { requests: 100, window: 3600000 },
  pro: { requests: 1000, window: 3600000 },
  enterprise: { requests: 10000, window: 3600000 },
};

async function checkRateLimit(userId: string): Promise<boolean> {
  const user = await getUser(userId);
  const tier = tierLimits[user.plan];

  return limiter.isAllowed(userId, tier.requests, tier.window);
}

Endpoint-Specific Limits#

const endpointLimits = {
  'POST /api/upload': { requests: 10, window: 60000 },
  'GET /api/search': { requests: 30, window: 60000 },
  'POST /api/messages': { requests: 60, window: 60000 },
  default: { requests: 100, window: 60000 },
};

function getLimit(method: string, path: string) {
  const key = `${method} ${path}`;
  return endpointLimits[key] || endpointLimits.default;
}

Cost-Based Limiting#

// Different operations have different costs
const operationCosts = {
  'GET /api/users': 1,
  'POST /api/users': 5,
  'GET /api/reports': 10,
  'POST /api/export': 50,
};

class CostBasedLimiter {
  private tokenBucket: TokenBucketLimiter;

  constructor(tokensPerHour: number) {
    this.tokenBucket = new TokenBucketLimiter(tokensPerHour, tokensPerHour / 3600);
  }

  isAllowed(key: string, operation: string): boolean {
    const cost = operationCosts[operation] || 1;
    return this.tokenBucket.isAllowed(key, cost);
  }
}

Graceful Degradation#

class GracefulLimiter {
  async handleRequest(req: Request, res: Response): Promise<void> {
    const result = await this.limiter.check(req.ip);

    if (result.allowed) {
      return this.processFullRequest(req, res);
    }

    // Near limit: return cached/simplified response
    if (result.remaining < 10) {
      return this.processDegradedRequest(req, res);
    }

    // Over limit: reject
    res.status(429).json({ error: 'Rate limit exceeded' });
  }

  private async processDegradedRequest(req: Request, res: Response): Promise<void> {
    // Return cached data
    const cached = await this.cache.get(req.url);
    if (cached) {
      res.setHeader('X-Degraded-Response', 'true');
      return res.json(cached);
    }

    res.status(429).json({ error: 'Rate limit exceeded' });
  }
}

Rate limiting is essential for API reliability and security. Choose the right algorithm for your use case—token bucket for bursty traffic, sliding window for smooth limiting. Always return proper headers so clients can adapt.

Remember: good rate limiting protects your service while giving users clear feedback and reasonable limits.