Node.js Streams: A Complete Guide

Streams process data piece by piece, enabling efficient memory usage for large files and real-time data. Here's how to use them effectively.

Stream Types#

import { Readable, Writable, Transform, Duplex } from 'stream';
import { pipeline } from 'stream/promises';
import fs from 'fs';

// Four stream types:
// 1. Readable - source of data
// 2. Writable - destination for data
// 3. Duplex - both readable and writable
// 4. Transform - modify data as it passes through

// Reading a file as stream
const readStream = fs.createReadStream('large-file.txt', {
  encoding: 'utf8',
  highWaterMark: 64 * 1024, // 64KB chunks
});

readStream.on('data', (chunk) => {
  console.log(`Received ${chunk.length} bytes`);
});

readStream.on('end', () => {
  console.log('Finished reading');
});

readStream.on('error', (err) => {
  console.error('Error:', err);
});

Creating Custom Streams#

import { Readable, Writable, Transform } from 'stream';

// Custom Readable stream
class CounterStream extends Readable {
  private current = 0;
  private max: number;

  constructor(max: number) {
    super({ objectMode: true });
    this.max = max;
  }

  _read() {
    if (this.current < this.max) {
      this.push({ count: this.current++ });
    } else {
      this.push(null); // Signal end of stream
    }
  }
}

// Usage
const counter = new CounterStream(10);
counter.on('data', (data) => console.log(data));

// Custom Writable stream
class LoggerStream extends Writable {
  _write(
    chunk: Buffer,
    encoding: string,
    callback: (error?: Error | null) => void
  ) {
    console.log(`[LOG] ${chunk.toString()}`);
    callback(); // Signal completion
  }

  _writev(
    chunks: Array<{ chunk: Buffer; encoding: string }>,
    callback: (error?: Error | null) => void
  ) {
    // Handle multiple chunks at once (optional optimization)
    chunks.forEach(({ chunk }) => {
      console.log(`[LOG] ${chunk.toString()}`);
    });
    callback();
  }
}

// Custom Transform stream
class UppercaseTransform extends Transform {
  _transform(
    chunk: Buffer,
    encoding: string,
    callback: (error?: Error | null, data?: any) => void
  ) {
    const upperCased = chunk.toString().toUpperCase();
    callback(null, upperCased);
  }
}

// Chain them together
const readable = fs.createReadStream('input.txt');
const transform = new UppercaseTransform();
const writable = fs.createWriteStream('output.txt');

readable.pipe(transform).pipe(writable);

Pipeline and Error Handling#

import { pipeline } from 'stream/promises';
import { createReadStream, createWriteStream } from 'fs';
import { createGzip, createGunzip } from 'zlib';

// Using pipeline (recommended)
async function compressFile(source: string, destination: string) {
  await pipeline(
    createReadStream(source),
    createGzip(),
    createWriteStream(destination)
  );
  console.log('Compression complete');
}

// Pipeline handles errors and cleanup automatically
async function processFile() {
  try {
    await pipeline(
      createReadStream('input.txt'),
      new UppercaseTransform(),
      createWriteStream('output.txt')
    );
  } catch (error) {
    console.error('Pipeline failed:', error);
  }
}

// Multiple transforms
async function processWithMultipleTransforms() {
  await pipeline(
    createReadStream('data.json'),
    new ParseJSONTransform(),
    new FilterTransform((item) => item.active),
    new MapTransform((item) => ({ ...item, processed: true })),
    new StringifyTransform(),
    createWriteStream('processed.json')
  );
}

Object Mode Streams#

import { Transform, Readable } from 'stream';

// Object mode for non-buffer data
class JSONParseTransform extends Transform {
  constructor() {
    super({
      objectMode: true,
      readableObjectMode: true,
      writableObjectMode: false,
    });
  }

  _transform(chunk: Buffer, encoding: string, callback: Function) {
    try {
      const lines = chunk.toString().split('\n').filter(Boolean);
      for (const line of lines) {
        this.push(JSON.parse(line));
      }
      callback();
    } catch (error) {
      callback(error);
    }
  }
}

// Database cursor as stream
class DatabaseStream extends Readable {
  private cursor: any;
  private reading = false;

  constructor(cursor: any) {
    super({ objectMode: true });
    this.cursor = cursor;
  }

  async _read() {
    if (this.reading) return;
    this.reading = true;

    try {
      const doc = await this.cursor.next();
      if (doc) {
        this.push(doc);
      } else {
        this.push(null); // End of stream
      }
    } catch (error) {
      this.destroy(error as Error);
    } finally {
      this.reading = false;
    }
  }
}

// Usage with MongoDB
async function streamFromDatabase() {
  const cursor = db.collection('users').find({});
  const stream = new DatabaseStream(cursor);

  for await (const user of stream) {
    console.log(user.name);
  }
}

Backpressure Handling#

import { Writable, Readable } from 'stream';

// Manual backpressure handling
function copyWithBackpressure(source: Readable, destination: Writable) {
  source.on('data', (chunk) => {
    // write() returns false if internal buffer is full
    const canContinue = destination.write(chunk);

    if (!canContinue) {
      // Pause reading until drain
      source.pause();
    }
  });

  destination.on('drain', () => {
    // Resume reading when buffer is drained
    source.resume();
  });

  source.on('end', () => {
    destination.end();
  });
}

// Writable with backpressure
class SlowWriter extends Writable {
  constructor() {
    super({ highWaterMark: 1024 }); // Small buffer
  }

  _write(chunk: Buffer, encoding: string, callback: Function) {
    // Simulate slow write
    setTimeout(() => {
      console.log(`Wrote ${chunk.length} bytes`);
      callback();
    }, 100);
  }
}

// pipeline handles backpressure automatically
await pipeline(
  createReadStream('large-file.txt'),
  new SlowWriter()
);

Async Iterators with Streams#

import { Readable } from 'stream';

// Streams are async iterable
async function processStream(stream: Readable) {
  for await (const chunk of stream) {
    console.log(chunk.toString());
  }
}

// Create readable from async generator
async function* generateData() {
  for (let i = 0; i < 10; i++) {
    await new Promise((resolve) => setTimeout(resolve, 100));
    yield `Line ${i}\n`;
  }
}

const stream = Readable.from(generateData());

// Create readable from array
const arrayStream = Readable.from(['a', 'b', 'c']);

// Process line by line
import readline from 'readline';

async function processLines(filePath: string) {
  const fileStream = createReadStream(filePath);

  const rl = readline.createInterface({
    input: fileStream,
    crlfDelay: Infinity,
  });

  for await (const line of rl) {
    console.log(`Line: ${line}`);
  }
}

Practical Examples#

// CSV Parser Transform
class CSVParser extends Transform {
  private headers: string[] | null = null;
  private buffer = '';

  constructor() {
    super({ objectMode: true });
  }

  _transform(chunk: Buffer, encoding: string, callback: Function) {
    this.buffer += chunk.toString();
    const lines = this.buffer.split('\n');
    this.buffer = lines.pop() || '';

    for (const line of lines) {
      if (!line.trim()) continue;

      const values = line.split(',').map((v) => v.trim());

      if (!this.headers) {
        this.headers = values;
      } else {
        const obj: Record<string, string> = {};
        this.headers.forEach((header, i) => {
          obj[header] = values[i];
        });
        this.push(obj);
      }
    }

    callback();
  }

  _flush(callback: Function) {
    if (this.buffer.trim()) {
      const values = this.buffer.split(',').map((v) => v.trim());
      if (this.headers) {
        const obj: Record<string, string> = {};
        this.headers.forEach((header, i) => {
          obj[header] = values[i];
        });
        this.push(obj);
      }
    }
    callback();
  }
}

// HTTP streaming response
import { createServer } from 'http';

const server = createServer(async (req, res) => {
  if (req.url === '/download') {
    res.setHeader('Content-Type', 'application/octet-stream');
    res.setHeader('Content-Disposition', 'attachment; filename="large.zip"');

    await pipeline(
      createReadStream('large.zip'),
      res
    );
  }
});

// Upload with streams
import { IncomingMessage } from 'http';

async function handleUpload(req: IncomingMessage) {
  const writeStream = createWriteStream('uploaded-file.bin');

  await pipeline(req, writeStream);

  console.log('Upload complete');
}

Memory-Efficient Processing#

// Process large JSON file
import { parser } from 'stream-json';
import { streamArray } from 'stream-json/streamers/StreamArray';

async function processLargeJSON(filePath: string) {
  const pipeline = createReadStream(filePath)
    .pipe(parser())
    .pipe(streamArray());

  for await (const { value } of pipeline) {
    // Process each item
    await processItem(value);
  }
}

// Batch processing
class BatchTransform extends Transform {
  private batch: any[] = [];
  private batchSize: number;

  constructor(batchSize: number) {
    super({ objectMode: true });
    this.batchSize = batchSize;
  }

  _transform(item: any, encoding: string, callback: Function) {
    this.batch.push(item);

    if (this.batch.length >= this.batchSize) {
      this.push(this.batch);
      this.batch = [];
    }

    callback();
  }

  _flush(callback: Function) {
    if (this.batch.length > 0) {
      this.push(this.batch);
    }
    callback();
  }
}

// Usage
await pipeline(
  createReadStream('data.ndjson'),
  new JSONParseTransform(),
  new BatchTransform(100),
  new Writable({
    objectMode: true,
    write(batch, encoding, callback) {
      // Insert batch to database
      db.insertMany(batch).then(() => callback());
    },
  })
);

Best Practices#

Design:
✓ Use pipeline() for chaining
✓ Handle errors in all streams
✓ Set appropriate highWaterMark
✓ Use objectMode for non-binary data

Performance:
✓ Process in chunks, not all at once
✓ Respect backpressure
✓ Use streams for large files
✓ Avoid loading entire files into memory

Error Handling:
✓ Always handle 'error' event
✓ Use pipeline for automatic cleanup
✓ Implement _destroy() for cleanup
✓ Propagate errors correctly

Node.js streams enable efficient processing of large data sets with constant memory usage. Use Readable for data sources, Writable for destinations, and Transform for processing. Always use pipeline() for proper error handling and backpressure management. Streams are essential for scalable Node.js applications.