Node.js Stream API Guide

Streams are Node.js's way of handling data piece by piece, enabling efficient processing of large files and real-time data. Here's how to use them.

Stream Types#

import { Readable, Writable, Transform, Duplex } from 'node:stream';

// Readable - source of data
// Writable - destination for data
// Transform - modify data as it passes through
// Duplex - both readable and writable

Reading Files with Streams#

import { createReadStream } from 'node:fs';

// Basic file reading
const readStream = createReadStream('large-file.txt', {
  encoding: 'utf8',
  highWaterMark: 64 * 1024, // 64KB chunks
});

readStream.on('data', (chunk) => {
  console.log(`Received ${chunk.length} bytes`);
});

readStream.on('end', () => {
  console.log('Finished reading');
});

readStream.on('error', (err) => {
  console.error('Error:', err);
});

Writing Files with Streams#

import { createWriteStream } from 'node:fs';

const writeStream = createWriteStream('output.txt');

writeStream.write('Hello, ');
writeStream.write('World!');
writeStream.end('\n');

writeStream.on('finish', () => {
  console.log('Finished writing');
});

writeStream.on('error', (err) => {
  console.error('Error:', err);
});

Piping Streams#

import { createReadStream, createWriteStream } from 'node:fs';
import { createGzip, createGunzip } from 'node:zlib';

// Copy file
createReadStream('source.txt')
  .pipe(createWriteStream('destination.txt'));

// Compress file
createReadStream('data.txt')
  .pipe(createGzip())
  .pipe(createWriteStream('data.txt.gz'));

// Decompress file
createReadStream('data.txt.gz')
  .pipe(createGunzip())
  .pipe(createWriteStream('data.txt'));

Pipeline with Error Handling#

import { pipeline } from 'node:stream/promises';
import { createReadStream, createWriteStream } from 'node:fs';
import { createGzip } from 'node:zlib';

async function compress(input, output) {
  await pipeline(
    createReadStream(input),
    createGzip(),
    createWriteStream(output)
  );
  console.log('Compression complete');
}

compress('data.txt', 'data.txt.gz').catch(console.error);

Custom Readable Stream#

import { Readable } from 'node:stream';

// Using class
class CounterStream extends Readable {
  constructor(max) {
    super();
    this.max = max;
    this.current = 0;
  }

  _read() {
    if (this.current < this.max) {
      this.push(String(this.current++));
    } else {
      this.push(null); // Signal end
    }
  }
}

const counter = new CounterStream(5);
counter.on('data', (num) => console.log(num));
// 0, 1, 2, 3, 4

// Using Readable.from
const arrayStream = Readable.from(['a', 'b', 'c']);

// Async generator
async function* generateData() {
  for (let i = 0; i < 5; i++) {
    yield `Item ${i}\n`;
    await new Promise((r) => setTimeout(r, 100));
  }
}

const asyncStream = Readable.from(generateData());

Custom Writable Stream#

import { Writable } from 'node:stream';

class LogStream extends Writable {
  constructor(options) {
    super(options);
    this.logs = [];
  }

  _write(chunk, encoding, callback) {
    const log = {
      timestamp: new Date().toISOString(),
      message: chunk.toString(),
    };
    this.logs.push(log);
    console.log(`[${log.timestamp}] ${log.message}`);
    callback();
  }

  _final(callback) {
    console.log(`Total logs: ${this.logs.length}`);
    callback();
  }
}

const logger = new LogStream();
logger.write('First message');
logger.write('Second message');
logger.end();

Transform Streams#

import { Transform } from 'node:stream';

// Uppercase transform
class UpperCaseTransform extends Transform {
  _transform(chunk, encoding, callback) {
    this.push(chunk.toString().toUpperCase());
    callback();
  }
}

// JSON parser transform
class JSONParser extends Transform {
  constructor() {
    super({ objectMode: true });
    this.buffer = '';
  }

  _transform(chunk, encoding, callback) {
    this.buffer += chunk.toString();
    const lines = this.buffer.split('\n');
    this.buffer = lines.pop(); // Keep incomplete line

    for (const line of lines) {
      if (line.trim()) {
        try {
          this.push(JSON.parse(line));
        } catch (err) {
          callback(err);
          return;
        }
      }
    }
    callback();
  }

  _flush(callback) {
    if (this.buffer.trim()) {
      try {
        this.push(JSON.parse(this.buffer));
      } catch (err) {
        callback(err);
        return;
      }
    }
    callback();
  }
}

Object Mode Streams#

import { Transform } from 'node:stream';

// Process objects instead of buffers
const objectTransform = new Transform({
  objectMode: true,
  transform(user, encoding, callback) {
    this.push({
      ...user,
      fullName: `${user.firstName} ${user.lastName}`,
      createdAt: new Date(),
    });
    callback();
  },
});

// Usage
objectTransform.write({ firstName: 'John', lastName: 'Doe' });
objectTransform.on('data', (user) => console.log(user));

Duplex Streams#

import { Duplex } from 'node:stream';

class EchoStream extends Duplex {
  constructor() {
    super();
    this.data = [];
  }

  _write(chunk, encoding, callback) {
    this.data.push(chunk);
    callback();
  }

  _read() {
    if (this.data.length) {
      this.push(this.data.shift());
    } else {
      this.push(null);
    }
  }
}

HTTP Streaming#

import http from 'node:http';
import { createReadStream } from 'node:fs';

const server = http.createServer((req, res) => {
  if (req.url === '/video') {
    res.setHeader('Content-Type', 'video/mp4');
    createReadStream('video.mp4').pipe(res);
  }

  if (req.url === '/large-file') {
    res.setHeader('Content-Type', 'application/octet-stream');
    res.setHeader('Content-Disposition', 'attachment; filename="data.csv"');
    createReadStream('large-data.csv').pipe(res);
  }
});

Stream Composition#

import { pipeline } from 'node:stream/promises';
import { createReadStream, createWriteStream } from 'node:fs';
import { Transform } from 'node:stream';

// Filter transform
const filter = new Transform({
  objectMode: true,
  transform(line, encoding, callback) {
    if (line.includes('ERROR')) {
      this.push(line);
    }
    callback();
  },
});

// Line splitter
const lineSplitter = new Transform({
  transform(chunk, encoding, callback) {
    const lines = chunk.toString().split('\n');
    lines.forEach((line) => {
      if (line.trim()) this.push(line + '\n');
    });
    callback();
  },
});

// Process log file
await pipeline(
  createReadStream('app.log'),
  lineSplitter,
  filter,
  createWriteStream('errors.log')
);

Async Iteration#

import { createReadStream } from 'node:fs';
import { createInterface } from 'node:readline';

// Read file line by line
async function processLines(filename) {
  const fileStream = createReadStream(filename);
  const rl = createInterface({
    input: fileStream,
    crlfDelay: Infinity,
  });

  for await (const line of rl) {
    console.log(`Line: ${line}`);
  }
}

// Iterate over stream directly
async function processStream(stream) {
  for await (const chunk of stream) {
    console.log(`Chunk: ${chunk.length} bytes`);
  }
}

Backpressure Handling#

import { createReadStream, createWriteStream } from 'node:fs';

const readable = createReadStream('large-file.txt');
const writable = createWriteStream('output.txt');

readable.on('data', (chunk) => {
  // Check if we can write more
  const canContinue = writable.write(chunk);

  if (!canContinue) {
    // Pause until drained
    readable.pause();
    writable.once('drain', () => {
      readable.resume();
    });
  }
});

readable.on('end', () => {
  writable.end();
});

Memory-Efficient Processing#

import { createReadStream } from 'node:fs';
import { createHash } from 'node:crypto';

// Hash large file without loading into memory
async function hashFile(filename) {
  return new Promise((resolve, reject) => {
    const hash = createHash('sha256');
    const stream = createReadStream(filename);

    stream.on('data', (chunk) => hash.update(chunk));
    stream.on('end', () => resolve(hash.digest('hex')));
    stream.on('error', reject);
  });
}

// Count lines in large file
async function countLines(filename) {
  let count = 0;
  const stream = createReadStream(filename);

  for await (const chunk of stream) {
    for (const char of chunk) {
      if (char === 10) count++; // newline
    }
  }

  return count;
}

Best Practices#

Performance:
✓ Use streams for large files
✓ Set appropriate highWaterMark
✓ Handle backpressure
✓ Use pipeline for error handling

Error Handling:
✓ Always handle 'error' events
✓ Use pipeline/promises
✓ Clean up resources
✓ Handle stream destruction

Patterns:
✓ Pipe for simple cases
✓ Pipeline for complex chains
✓ Object mode for structured data
✓ Async iteration for simplicity

Avoid:
✗ Loading large files into memory
✗ Ignoring backpressure
✗ Missing error handlers
✗ Not ending streams properly

Node.js streams enable efficient processing of large data sets by handling data in chunks. Use readable streams for data sources, writable streams for destinations, and transform streams for data manipulation. Always use pipeline for proper error handling and cleanup, and be mindful of backpressure when processing data faster than it can be written.