Data Modeling Best Practices for Application Development

Good data models are the foundation of maintainable applications. They affect performance, scalability, and how easily you can evolve your system.

Start with the Domain#

// Model your domain first, database second

// Domain concepts
interface Order {
  id: string;
  customer: Customer;
  items: OrderItem[];
  shippingAddress: Address;
  billingAddress: Address;
  status: OrderStatus;
  placedAt: Date;
  total: Money;
}

interface OrderItem {
  product: Product;
  quantity: number;
  unitPrice: Money;
  discount?: Discount;
}

interface Money {
  amount: number;
  currency: string;
}

// Then translate to database schema

Normalization Levels#

-- First Normal Form (1NF)
-- No repeating groups, atomic values
-- ❌ Bad
CREATE TABLE orders (
  id INT,
  items VARCHAR(1000)  -- "item1,item2,item3"
);

-- ✅ Good
CREATE TABLE orders (id INT PRIMARY KEY);
CREATE TABLE order_items (
  id INT PRIMARY KEY,
  order_id INT REFERENCES orders(id),
  product_id INT,
  quantity INT
);

-- Second Normal Form (2NF)
-- All non-key attributes depend on entire primary key
-- ❌ Bad
CREATE TABLE order_items (
  order_id INT,
  product_id INT,
  product_name VARCHAR(100),  -- Depends only on product_id
  quantity INT,
  PRIMARY KEY (order_id, product_id)
);

-- ✅ Good
CREATE TABLE products (
  id INT PRIMARY KEY,
  name VARCHAR(100)
);
CREATE TABLE order_items (
  order_id INT,
  product_id INT REFERENCES products(id),
  quantity INT,
  PRIMARY KEY (order_id, product_id)
);

-- Third Normal Form (3NF)
-- No transitive dependencies
-- ❌ Bad
CREATE TABLE orders (
  id INT PRIMARY KEY,
  customer_id INT,
  customer_name VARCHAR(100),  -- Depends on customer_id
  customer_email VARCHAR(100)  -- Depends on customer_id
);

-- ✅ Good
CREATE TABLE customers (
  id INT PRIMARY KEY,
  name VARCHAR(100),
  email VARCHAR(100)
);
CREATE TABLE orders (
  id INT PRIMARY KEY,
  customer_id INT REFERENCES customers(id)
);

Strategic Denormalization#

-- Denormalize for read performance

-- Frequently joined data
CREATE TABLE orders (
  id INT PRIMARY KEY,
  customer_id INT REFERENCES customers(id),
  customer_name VARCHAR(100),  -- Copied for fast reads
  customer_email VARCHAR(100),
  total_amount DECIMAL(10,2),  -- Calculated and stored
  item_count INT               -- Calculated and stored
);

-- Keep denormalized data in sync
CREATE OR REPLACE FUNCTION update_order_totals()
RETURNS TRIGGER AS $$
BEGIN
  UPDATE orders
  SET total_amount = (
    SELECT SUM(quantity * unit_price)
    FROM order_items WHERE order_id = NEW.order_id
  ),
  item_count = (
    SELECT COUNT(*) FROM order_items WHERE order_id = NEW.order_id
  )
  WHERE id = NEW.order_id;
  RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER order_items_change
AFTER INSERT OR UPDATE OR DELETE ON order_items
FOR EACH ROW EXECUTE FUNCTION update_order_totals();

Relationship Patterns#

-- One-to-Many
CREATE TABLE users (id INT PRIMARY KEY);
CREATE TABLE posts (
  id INT PRIMARY KEY,
  user_id INT REFERENCES users(id),
  content TEXT
);

-- Many-to-Many with join table
CREATE TABLE users (id INT PRIMARY KEY);
CREATE TABLE roles (id INT PRIMARY KEY);
CREATE TABLE user_roles (
  user_id INT REFERENCES users(id),
  role_id INT REFERENCES roles(id),
  assigned_at TIMESTAMP DEFAULT NOW(),
  PRIMARY KEY (user_id, role_id)
);

-- Self-referential (hierarchies)
CREATE TABLE categories (
  id INT PRIMARY KEY,
  name VARCHAR(100),
  parent_id INT REFERENCES categories(id)
);

-- With path for efficient queries
CREATE TABLE categories (
  id INT PRIMARY KEY,
  name VARCHAR(100),
  parent_id INT REFERENCES categories(id),
  path TEXT,  -- '/1/5/12' for ancestry
  depth INT
);

Prisma Schema Example#

// schema.prisma

model User {
  id        String   @id @default(cuid())
  email     String   @unique
  name      String?
  role      Role     @default(USER)
  posts     Post[]
  profile   Profile?
  createdAt DateTime @default(now())
  updatedAt DateTime @updatedAt

  @@index([email])
}

model Profile {
  id     String @id @default(cuid())
  bio    String?
  avatar String?
  user   User   @relation(fields: [userId], references: [id])
  userId String @unique
}

model Post {
  id          String     @id @default(cuid())
  title       String
  content     String?
  published   Boolean    @default(false)
  author      User       @relation(fields: [authorId], references: [id])
  authorId    String
  categories  Category[]
  createdAt   DateTime   @default(now())
  updatedAt   DateTime   @updatedAt

  @@index([authorId])
  @@index([published, createdAt])
}

model Category {
  id    String @id @default(cuid())
  name  String @unique
  posts Post[]
}

enum Role {
  USER
  ADMIN
}

Schema Evolution#

-- Safe migrations

-- Adding columns (safe)
ALTER TABLE users ADD COLUMN phone VARCHAR(20);

-- Adding with default (safe)
ALTER TABLE users ADD COLUMN status VARCHAR(20) DEFAULT 'active';

-- Renaming columns (deploy in phases)
-- Phase 1: Add new column
ALTER TABLE users ADD COLUMN full_name VARCHAR(100);
UPDATE users SET full_name = name;

-- Phase 2: Update application to write to both
-- Phase 3: Update application to read from new
-- Phase 4: Drop old column
ALTER TABLE users DROP COLUMN name;

-- Removing columns (careful!)
-- First: Stop writing to column
-- Then: Stop reading from column
-- Finally: Drop column
ALTER TABLE users DROP COLUMN deprecated_field;

Soft Deletes#

-- Soft delete pattern
CREATE TABLE posts (
  id INT PRIMARY KEY,
  title VARCHAR(200),
  deleted_at TIMESTAMP NULL,
  -- other fields
);

-- Query active records
SELECT * FROM posts WHERE deleted_at IS NULL;

-- Partial index for performance
CREATE INDEX idx_posts_active ON posts(id)
WHERE deleted_at IS NULL;

// Prisma with soft deletes
const posts = await prisma.post.findMany({
  where: { deletedAt: null },
});

// Soft delete
await prisma.post.update({
  where: { id },
  data: { deletedAt: new Date() },
});

Audit Trails#

-- Audit table
CREATE TABLE audit_log (
  id BIGSERIAL PRIMARY KEY,
  table_name VARCHAR(100),
  record_id VARCHAR(100),
  action VARCHAR(10),  -- INSERT, UPDATE, DELETE
  old_data JSONB,
  new_data JSONB,
  changed_by VARCHAR(100),
  changed_at TIMESTAMP DEFAULT NOW()
);

-- Generic audit trigger
CREATE OR REPLACE FUNCTION audit_trigger()
RETURNS TRIGGER AS $$
BEGIN
  INSERT INTO audit_log (table_name, record_id, action, old_data, new_data, changed_by)
  VALUES (
    TG_TABLE_NAME,
    COALESCE(NEW.id::TEXT, OLD.id::TEXT),
    TG_OP,
    CASE WHEN TG_OP IN ('UPDATE', 'DELETE') THEN row_to_json(OLD) END,
    CASE WHEN TG_OP IN ('INSERT', 'UPDATE') THEN row_to_json(NEW) END,
    current_user
  );
  RETURN COALESCE(NEW, OLD);
END;
$$ LANGUAGE plpgsql;

-- Apply to tables
CREATE TRIGGER audit_users
AFTER INSERT OR UPDATE OR DELETE ON users
FOR EACH ROW EXECUTE FUNCTION audit_trigger();

Best Practices#

DO:
✓ Start with normalized design
✓ Denormalize based on measured needs
✓ Use appropriate data types
✓ Add indexes for query patterns
✓ Plan for schema evolution
✓ Document relationships

DON'T:
✗ Over-normalize (too many joins)
✗ Under-normalize (update anomalies)
✗ Use generic columns (data1, data2)
✗ Store computed values without updates
✗ Ignore NULL semantics

Conclusion#

Data modeling is both art and science. Start normalized, understand your access patterns, and denormalize strategically. Plan for evolution—schemas change as requirements grow.

A well-designed data model makes the application code simpler and the database faster.

Share this article