Good data models are the foundation of maintainable applications. They affect performance, scalability, and how easily you can evolve your system.
Start with the Domain#
1// Model your domain first, database second
2
3// Domain concepts
4interface Order {
5 id: string;
6 customer: Customer;
7 items: OrderItem[];
8 shippingAddress: Address;
9 billingAddress: Address;
10 status: OrderStatus;
11 placedAt: Date;
12 total: Money;
13}
14
15interface OrderItem {
16 product: Product;
17 quantity: number;
18 unitPrice: Money;
19 discount?: Discount;
20}
21
22interface Money {
23 amount: number;
24 currency: string;
25}
26
27// Then translate to database schemaNormalization Levels#
1-- First Normal Form (1NF)
2-- No repeating groups, atomic values
3-- ❌ Bad
4CREATE TABLE orders (
5 id INT,
6 items VARCHAR(1000) -- "item1,item2,item3"
7);
8
9-- ✅ Good
10CREATE TABLE orders (id INT PRIMARY KEY);
11CREATE TABLE order_items (
12 id INT PRIMARY KEY,
13 order_id INT REFERENCES orders(id),
14 product_id INT,
15 quantity INT
16);
17
18-- Second Normal Form (2NF)
19-- All non-key attributes depend on entire primary key
20-- ❌ Bad
21CREATE TABLE order_items (
22 order_id INT,
23 product_id INT,
24 product_name VARCHAR(100), -- Depends only on product_id
25 quantity INT,
26 PRIMARY KEY (order_id, product_id)
27);
28
29-- ✅ Good
30CREATE TABLE products (
31 id INT PRIMARY KEY,
32 name VARCHAR(100)
33);
34CREATE TABLE order_items (
35 order_id INT,
36 product_id INT REFERENCES products(id),
37 quantity INT,
38 PRIMARY KEY (order_id, product_id)
39);
40
41-- Third Normal Form (3NF)
42-- No transitive dependencies
43-- ❌ Bad
44CREATE TABLE orders (
45 id INT PRIMARY KEY,
46 customer_id INT,
47 customer_name VARCHAR(100), -- Depends on customer_id
48 customer_email VARCHAR(100) -- Depends on customer_id
49);
50
51-- ✅ Good
52CREATE TABLE customers (
53 id INT PRIMARY KEY,
54 name VARCHAR(100),
55 email VARCHAR(100)
56);
57CREATE TABLE orders (
58 id INT PRIMARY KEY,
59 customer_id INT REFERENCES customers(id)
60);Strategic Denormalization#
1-- Denormalize for read performance
2
3-- Frequently joined data
4CREATE TABLE orders (
5 id INT PRIMARY KEY,
6 customer_id INT REFERENCES customers(id),
7 customer_name VARCHAR(100), -- Copied for fast reads
8 customer_email VARCHAR(100),
9 total_amount DECIMAL(10,2), -- Calculated and stored
10 item_count INT -- Calculated and stored
11);
12
13-- Keep denormalized data in sync
14CREATE OR REPLACE FUNCTION update_order_totals()
15RETURNS TRIGGER AS $$
16BEGIN
17 UPDATE orders
18 SET total_amount = (
19 SELECT SUM(quantity * unit_price)
20 FROM order_items WHERE order_id = NEW.order_id
21 ),
22 item_count = (
23 SELECT COUNT(*) FROM order_items WHERE order_id = NEW.order_id
24 )
25 WHERE id = NEW.order_id;
26 RETURN NEW;
27END;
28$$ LANGUAGE plpgsql;
29
30CREATE TRIGGER order_items_change
31AFTER INSERT OR UPDATE OR DELETE ON order_items
32FOR EACH ROW EXECUTE FUNCTION update_order_totals();Relationship Patterns#
1-- One-to-Many
2CREATE TABLE users (id INT PRIMARY KEY);
3CREATE TABLE posts (
4 id INT PRIMARY KEY,
5 user_id INT REFERENCES users(id),
6 content TEXT
7);
8
9-- Many-to-Many with join table
10CREATE TABLE users (id INT PRIMARY KEY);
11CREATE TABLE roles (id INT PRIMARY KEY);
12CREATE TABLE user_roles (
13 user_id INT REFERENCES users(id),
14 role_id INT REFERENCES roles(id),
15 assigned_at TIMESTAMP DEFAULT NOW(),
16 PRIMARY KEY (user_id, role_id)
17);
18
19-- Self-referential (hierarchies)
20CREATE TABLE categories (
21 id INT PRIMARY KEY,
22 name VARCHAR(100),
23 parent_id INT REFERENCES categories(id)
24);
25
26-- With path for efficient queries
27CREATE TABLE categories (
28 id INT PRIMARY KEY,
29 name VARCHAR(100),
30 parent_id INT REFERENCES categories(id),
31 path TEXT, -- '/1/5/12' for ancestry
32 depth INT
33);Prisma Schema Example#
1// schema.prisma
2
3model User {
4 id String @id @default(cuid())
5 email String @unique
6 name String?
7 role Role @default(USER)
8 posts Post[]
9 profile Profile?
10 createdAt DateTime @default(now())
11 updatedAt DateTime @updatedAt
12
13 @@index([email])
14}
15
16model Profile {
17 id String @id @default(cuid())
18 bio String?
19 avatar String?
20 user User @relation(fields: [userId], references: [id])
21 userId String @unique
22}
23
24model Post {
25 id String @id @default(cuid())
26 title String
27 content String?
28 published Boolean @default(false)
29 author User @relation(fields: [authorId], references: [id])
30 authorId String
31 categories Category[]
32 createdAt DateTime @default(now())
33 updatedAt DateTime @updatedAt
34
35 @@index([authorId])
36 @@index([published, createdAt])
37}
38
39model Category {
40 id String @id @default(cuid())
41 name String @unique
42 posts Post[]
43}
44
45enum Role {
46 USER
47 ADMIN
48}Schema Evolution#
1-- Safe migrations
2
3-- Adding columns (safe)
4ALTER TABLE users ADD COLUMN phone VARCHAR(20);
5
6-- Adding with default (safe)
7ALTER TABLE users ADD COLUMN status VARCHAR(20) DEFAULT 'active';
8
9-- Renaming columns (deploy in phases)
10-- Phase 1: Add new column
11ALTER TABLE users ADD COLUMN full_name VARCHAR(100);
12UPDATE users SET full_name = name;
13
14-- Phase 2: Update application to write to both
15-- Phase 3: Update application to read from new
16-- Phase 4: Drop old column
17ALTER TABLE users DROP COLUMN name;
18
19-- Removing columns (careful!)
20-- First: Stop writing to column
21-- Then: Stop reading from column
22-- Finally: Drop column
23ALTER TABLE users DROP COLUMN deprecated_field;Soft Deletes#
1-- Soft delete pattern
2CREATE TABLE posts (
3 id INT PRIMARY KEY,
4 title VARCHAR(200),
5 deleted_at TIMESTAMP NULL,
6 -- other fields
7);
8
9-- Query active records
10SELECT * FROM posts WHERE deleted_at IS NULL;
11
12-- Partial index for performance
13CREATE INDEX idx_posts_active ON posts(id)
14WHERE deleted_at IS NULL;1// Prisma with soft deletes
2const posts = await prisma.post.findMany({
3 where: { deletedAt: null },
4});
5
6// Soft delete
7await prisma.post.update({
8 where: { id },
9 data: { deletedAt: new Date() },
10});Audit Trails#
1-- Audit table
2CREATE TABLE audit_log (
3 id BIGSERIAL PRIMARY KEY,
4 table_name VARCHAR(100),
5 record_id VARCHAR(100),
6 action VARCHAR(10), -- INSERT, UPDATE, DELETE
7 old_data JSONB,
8 new_data JSONB,
9 changed_by VARCHAR(100),
10 changed_at TIMESTAMP DEFAULT NOW()
11);
12
13-- Generic audit trigger
14CREATE OR REPLACE FUNCTION audit_trigger()
15RETURNS TRIGGER AS $$
16BEGIN
17 INSERT INTO audit_log (table_name, record_id, action, old_data, new_data, changed_by)
18 VALUES (
19 TG_TABLE_NAME,
20 COALESCE(NEW.id::TEXT, OLD.id::TEXT),
21 TG_OP,
22 CASE WHEN TG_OP IN ('UPDATE', 'DELETE') THEN row_to_json(OLD) END,
23 CASE WHEN TG_OP IN ('INSERT', 'UPDATE') THEN row_to_json(NEW) END,
24 current_user
25 );
26 RETURN COALESCE(NEW, OLD);
27END;
28$$ LANGUAGE plpgsql;
29
30-- Apply to tables
31CREATE TRIGGER audit_users
32AFTER INSERT OR UPDATE OR DELETE ON users
33FOR EACH ROW EXECUTE FUNCTION audit_trigger();Best Practices#
DO:
✓ Start with normalized design
✓ Denormalize based on measured needs
✓ Use appropriate data types
✓ Add indexes for query patterns
✓ Plan for schema evolution
✓ Document relationships
DON'T:
✗ Over-normalize (too many joins)
✗ Under-normalize (update anomalies)
✗ Use generic columns (data1, data2)
✗ Store computed values without updates
✗ Ignore NULL semantics
Conclusion#
Data modeling is both art and science. Start normalized, understand your access patterns, and denormalize strategically. Plan for evolution—schemas change as requirements grow.
A well-designed data model makes the application code simpler and the database faster.