From 03c3878eb28d7c4eff404129ce9c8f462f8f8846 Mon Sep 17 00:00:00 2001
From: kclaka <33363343+kclaka@users.noreply.github.com>
Date: Wed, 25 Feb 2026 08:32:41 -0800
Subject: [PATCH] v1.2.1: Add criterion benchmarks and fix CI integration test
 parallelism

- Add criterion benchmark suite (engine, classify, output formatters)
  measuring rows/sec throughput across generation strategies
- Fix MySQL integration test failures caused by parallel test execution
  against a shared database (--test-threads=1)
- Add performance table to README with baseline benchmark results
- Bump version to 1.2.1
---
 .github/workflows/ci.yml                |   2 +-
 Cargo.lock                              | 236 +++++++++++++++++-
 Cargo.toml                              |   3 +-
 README.md                               |  21 +-
 crates/seedkit-core/Cargo.toml          |  13 +
 crates/seedkit-core/benches/classify.rs | 128 ++++++++++
 crates/seedkit-core/benches/engine.rs   | 310 ++++++++++++++++++++++++
 crates/seedkit-core/benches/output.rs   | 138 +++++++++++
 8 files changed, 843 insertions(+), 8 deletions(-)
 create mode 100644 crates/seedkit-core/benches/classify.rs
 create mode 100644 crates/seedkit-core/benches/engine.rs
 create mode 100644 crates/seedkit-core/benches/output.rs

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 83ee00e..4411561 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -69,4 +69,4 @@ jobs:
         env:
           TEST_POSTGRES_URL: postgres://seedkit:seedkit@localhost:5432/seedkit_test
           TEST_MYSQL_URL: mysql://seedkit:seedkit@localhost:3306/seedkit_test
-        run: cargo test --test '*'
+        run: cargo test --test '*' -- --test-threads=1
diff --git a/Cargo.lock b/Cargo.lock
index d1867cc..415e340 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -26,6 +26,12 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "anes"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
+
 [[package]]
 name = "anstream"
 version = "0.6.21"
@@ -166,6 +172,12 @@ version = "1.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
 
+[[package]]
+name = "cast"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
+
 [[package]]
 name = "cc"
 version = "1.2.56"
@@ -196,6 +208,33 @@ dependencies = [
  "windows-link",
 ]
 
+[[package]]
+name = "ciborium"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
+dependencies = [
+ "ciborium-io",
+ "ciborium-ll",
+ "serde",
+]
+
+[[package]]
+name = "ciborium-io"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
+
+[[package]]
+name = "ciborium-ll"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
+dependencies = [
+ "ciborium-io",
+ "half",
+]
+
 [[package]]
 name = "clap"
 version = "4.5.60"
@@ -331,6 +370,61 @@ version = "2.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
 
+[[package]]
+name = "criterion"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
+dependencies = [
+ "anes",
+ "cast",
+ "ciborium",
+ "clap",
+ "criterion-plot",
+ "is-terminal",
+ "itertools",
+ "num-traits",
+ "once_cell",
+ "oorandom",
+ "plotters",
+ "rayon",
+ "regex",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "tinytemplate",
+ "walkdir",
+]
+
+[[package]]
+name = "criterion-plot"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
+dependencies = [
+ "cast",
+ "itertools",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
 [[package]]
 name = "crossbeam-queue"
 version = "0.3.12"
@@ -369,6 +463,12 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "crunchy"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
+
 [[package]]
 name = "crypto-common"
 version = "0.1.7"
@@ -710,6 +810,17 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "half"
+version = "2.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
+dependencies = [
+ "cfg-if",
+ "crunchy",
+ "zerocopy",
+]
+
 [[package]]
 name = "hashbrown"
 version = "0.15.5"
@@ -742,6 +853,12 @@ version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 
+[[package]]
+name = "hermit-abi"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
+
 [[package]]
 name = "hex"
 version = "0.4.3"
@@ -1066,12 +1183,32 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "is-terminal"
+version = "0.4.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "windows-sys 0.61.2",
+]
+
 [[package]]
 name = "is_terminal_polyfill"
 version = "1.70.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
 
+[[package]]
+name = "itertools"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
+dependencies = [
+ "either",
+]
+
 [[package]]
 name = "itoa"
 version = "1.0.17"
@@ -1302,6 +1439,12 @@ version = "1.70.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
 
+[[package]]
+name = "oorandom"
+version = "11.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
+
 [[package]]
 name = "openssl"
 version = "0.10.75"
@@ -1439,6 +1582,34 @@ version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
 
+[[package]]
+name = "plotters"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
+dependencies = [
+ "num-traits",
+ "plotters-backend",
+ "plotters-svg",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "plotters-backend"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"
+
+[[package]]
+name = "plotters-svg"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
+dependencies = [
+ "plotters-backend",
+]
+
 [[package]]
 name = "portable-atomic"
 version = "1.13.1"
@@ -1556,6 +1727,26 @@ dependencies = [
  "getrandom 0.3.4",
 ]
 
+[[package]]
+name = "rayon"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
 [[package]]
 name = "redox_syscall"
 version = "0.5.18"
@@ -1736,6 +1927,15 @@ version = "1.0.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
 
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
 [[package]]
 name = "schannel"
 version = "0.1.28"
@@ -1776,7 +1976,7 @@ dependencies = [
 
 [[package]]
 name = "seedkit-cli"
-version = "1.2.0"
+version = "1.2.1"
 dependencies = [
  "anyhow",
  "clap",
@@ -1794,12 +1994,13 @@ dependencies = [
 
 [[package]]
 name = "seedkit-core"
-version = "1.2.0"
+version = "1.2.1"
 dependencies = [
  "anyhow",
  "base64",
  "chrono",
  "comfy-table",
+ "criterion",
  "dotenvy",
  "fake",
  "indexmap",
@@ -1823,7 +2024,7 @@ dependencies = [
 
 [[package]]
 name = "seedkit-testutil"
-version = "1.2.0"
+version = "1.2.1"
 dependencies = [
  "indexmap",
  "seedkit-core",
@@ -2339,6 +2540,16 @@ dependencies = [
  "zerovec",
 ]
 
+[[package]]
+name = "tinytemplate"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "tinyvec"
 version = "1.10.0"
@@ -2694,6 +2905,16 @@ version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
 
+[[package]]
+name = "walkdir"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+
 [[package]]
 name = "want"
 version = "0.3.1"
@@ -2890,6 +3111,15 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
 
+[[package]]
+name = "winapi-util"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
 [[package]]
 name = "winapi-x86_64-pc-windows-gnu"
 version = "0.4.0"
diff --git a/Cargo.toml b/Cargo.toml
index fd9aefb..6cabcce 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,7 +3,7 @@ members = ["crates/seedkit-core", "crates/seedkit-cli", "crates/seedkit-testutil
 resolver = "2"
 
 [workspace.package]
-version = "1.2.0"
+version = "1.2.1"
 edition = "2021"
 authors = ["SeedKit Contributors"]
 license = "MIT"
@@ -69,6 +69,7 @@ dotenvy = "0.15"
 
 # Testing
 tempfile = "3"
+criterion = { version = "0.5", features = ["html_reports"] }
 
 # Logging
 tracing = "0.1"
diff --git a/README.md b/README.md
index f26e1a4..d7d9c8d 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
   <p align="center">
     <a href="https://github.com/kclaka/seedkit/actions/workflows/ci.yml"><img src="https://github.com/kclaka/seedkit/actions/workflows/ci.yml/badge.svg?branch=main" alt="CI"></a>
     <img src="https://img.shields.io/badge/tests-221_passing-brightgreen" alt="Tests">
-    <img src="https://img.shields.io/badge/version-1.2.0-blue" alt="Version">
+    <img src="https://img.shields.io/badge/version-1.2.1-blue" alt="Version">
     <img src="https://img.shields.io/badge/rust-1.75%2B-orange?logo=rust" alt="Rust">
     <a href="LICENSE"><img src="https://img.shields.io/badge/license-MIT-green" alt="License: MIT"></a>
     <img src="https://img.shields.io/badge/databases-PostgreSQL%20%7C%20MySQL%20%7C%20SQLite-blueviolet" alt="Databases">
@@ -81,7 +81,7 @@ cargo install --path crates/seedkit-cli
 
 ```bash
 seedkit --version
-# seedkit 1.2.0
+# seedkit 1.2.1
 ```
 
 ### Zero-Config Database Detection
@@ -271,6 +271,21 @@ git checkout --ours seedkit.lock
 seedkit generate --force
 ```
 
+## Performance
+
+Benchmarked with [criterion](https://github.com/bheisler/criterion.rs) on Apple Silicon (M-series). Run `cargo bench` to reproduce.
+
+| Operation | Throughput |
+|---|---|
+| Generation (10 cols, semantic providers) | ~480K rows/sec |
+| Generation (FK references only) | ~3.7M rows/sec |
+| Generation (weighted value lists) | ~6.9M rows/sec |
+| Generation (distribution sampling) | ~8.6M rows/sec |
+| Classification (100 tables x 20 cols) | ~2.1M cols/sec |
+| SQL output formatting | ~1.5M rows/sec |
+| JSON output formatting | ~1.1M rows/sec |
+| CSV output formatting | ~1.5M rows/sec |
+
 ## Comparison
 
 | Feature | SeedKit | Faker/factory_bot | Snaplet |
@@ -314,7 +329,7 @@ cargo test
 docker compose -f docker/docker-compose.test.yml up -d
 TEST_POSTGRES_URL=postgres://seedkit:seedkit@localhost:5432/seedkit_test \
 TEST_MYSQL_URL=mysql://seedkit:seedkit@localhost:3307/seedkit_test \
-  cargo test --test '*'
+  cargo test --test '*' -- --test-threads=1
 ```
 
 ## License
diff --git a/crates/seedkit-core/Cargo.toml b/crates/seedkit-core/Cargo.toml
index f017e3e..6a7460c 100644
--- a/crates/seedkit-core/Cargo.toml
+++ b/crates/seedkit-core/Cargo.toml
@@ -33,3 +33,16 @@ url.workspace = true
 [dev-dependencies]
 tokio = { workspace = true, features = ["test-util"] }
 tempfile.workspace = true
+criterion.workspace = true
+
+[[bench]]
+name = "engine"
+harness = false
+
+[[bench]]
+name = "classify"
+harness = false
+
+[[bench]]
+name = "output"
+harness = false
diff --git a/crates/seedkit-core/benches/classify.rs b/crates/seedkit-core/benches/classify.rs
new file mode 100644
index 0000000..ad4997c
--- /dev/null
+++ b/crates/seedkit-core/benches/classify.rs
@@ -0,0 +1,128 @@
+//! Benchmarks for column classification — regex matching and schema-wide classification.
+//!
+//! Classification runs once per introspection, but regex compilation and matching
+//! are worth measuring to catch regressions from rule changes.
+
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
+
+use seedkit_core::classify::rules::{classify_column, classify_schema};
+use seedkit_core::schema::types::*;
+
+/// Column name/type pairs representing a realistic mix of columns.
+fn realistic_columns() -> Vec<(&'static str, DataType)> {
+    vec![
+        ("id", DataType::Serial),
+        ("email", DataType::VarChar),
+        ("first_name", DataType::VarChar),
+        ("last_name", DataType::VarChar),
+        ("password_hash", DataType::VarChar),
+        ("created_at", DataType::TimestampTz),
+        ("updated_at", DataType::TimestampTz),
+        ("is_active", DataType::Boolean),
+        ("age", DataType::Integer),
+        ("price", DataType::Numeric),
+        ("description", DataType::Text),
+        ("avatar_url", DataType::VarChar),
+        ("phone", DataType::VarChar),
+        ("city", DataType::VarChar),
+        ("zip_code", DataType::VarChar),
+        ("country", DataType::VarChar),
+        ("status", DataType::VarChar),
+        ("metadata", DataType::Jsonb),
+        ("slug", DataType::VarChar),
+        ("quantity", DataType::Integer),
+    ]
+}
+
+fn bench_classify_single_column(c: &mut Criterion) {
+    let mut group = c.benchmark_group("classify/single_column");
+
+    // Benchmark individual column classification across different match depths.
+    // "email" is an early match, "status" is mid-list, "zzz_unknown" misses all rules.
+    let cases = vec![
+        ("early_match", "email", DataType::VarChar),
+        ("mid_match", "status", DataType::VarChar),
+        ("late_match", "tenant_id", DataType::Integer),
+        ("no_match", "zzz_unknown", DataType::VarChar),
+        ("camel_case", "firstName", DataType::VarChar),
+        ("type_constrained", "age", DataType::Integer),
+    ];
+
+    for (label, col_name, data_type) in &cases {
+        group.bench_with_input(
+            BenchmarkId::new("type", label),
+            &(col_name, data_type),
+            |b, &(name, dt)| {
+                b.iter(|| {
+                    classify_column(name, dt, "users", false, false, None);
+                });
+            },
+        );
+    }
+    group.finish();
+}
+
+fn bench_classify_schema(c: &mut Criterion) {
+    let mut group = c.benchmark_group("classify/schema");
+    let columns = realistic_columns();
+
+    for table_count in [10, 50, 100] {
+        let schema = build_schema(table_count, &columns);
+        let total_columns = table_count * columns.len();
+
+        group.throughput(Throughput::Elements(total_columns as u64));
+        group.bench_with_input(
+            BenchmarkId::new("tables", table_count),
+            &schema,
+            |b, schema| {
+                b.iter(|| {
+                    classify_schema(schema);
+                });
+            },
+        );
+    }
+    group.finish();
+}
+
+/// Build a schema with N tables, each containing the realistic column set.
+fn build_schema(table_count: usize, columns: &[(&str, DataType)]) -> DatabaseSchema {
+    let mut schema = DatabaseSchema::new(DatabaseType::PostgreSQL, "bench".to_string());
+
+    let table_prefixes = [
+        "users", "orders", "products", "reviews", "categories",
+        "tags", "comments", "posts", "sessions", "notifications",
+        "invoices", "payments", "addresses", "companies", "departments",
+        "employees", "projects", "tasks", "events", "logs",
+    ];
+
+    for i in 0..table_count {
+        let table_name = if i < table_prefixes.len() {
+            table_prefixes[i].to_string()
+        } else {
+            format!("table_{}", i)
+        };
+
+        let mut table = Table::new(table_name.clone());
+        for (col_name, data_type) in columns {
+            let mut col = Column::new(
+                col_name.to_string(),
+                data_type.clone(),
+                data_type.to_string(),
+            );
+            if *col_name == "id" {
+                col.is_auto_increment = true;
+            }
+            table.columns.insert(col_name.to_string(), col);
+        }
+        table.primary_key = Some(PrimaryKey {
+            columns: vec!["id".to_string()],
+            name: None,
+        });
+        schema.tables.insert(table_name, table);
+    }
+
+    schema
+}
+
+criterion_group!(benches, bench_classify_single_column, bench_classify_schema);
+criterion_main!(benches);
diff --git a/crates/seedkit-core/benches/engine.rs b/crates/seedkit-core/benches/engine.rs
new file mode 100644
index 0000000..5dd1d82
--- /dev/null
+++ b/crates/seedkit-core/benches/engine.rs
@@ -0,0 +1,310 @@
+//! Benchmarks for the generation engine — the core hot path.
+//!
+//! Measures rows-per-second throughput for `execute_plan` across
+//! different table sizes, column counts, and strategy mixes.
+
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
+use std::collections::{BTreeMap, HashMap};
+
+use seedkit_core::classify::semantic::SemanticType;
+use seedkit_core::generate::engine::execute_plan;
+use seedkit_core::generate::plan::*;
+use seedkit_core::sample::stats::ColumnDistribution;
+use seedkit_core::schema::types::*;
+
+/// Build a schema with one table containing N semantic columns (no FKs).
+fn single_table_schema(num_columns: usize) -> (DatabaseSchema, HashMap<(String, String), SemanticType>) {
+    let mut schema = DatabaseSchema::new(DatabaseType::PostgreSQL, "bench".to_string());
+    let mut table = Table::new("items".to_string());
+    let mut classifications = HashMap::new();
+
+    let types = [
+        ("email", DataType::VarChar, SemanticType::Email),
+        ("first_name", DataType::VarChar, SemanticType::FirstName),
+        ("last_name", DataType::VarChar, SemanticType::LastName),
+        ("price", DataType::Numeric, SemanticType::Price),
+        ("created_at", DataType::TimestampTz, SemanticType::CreatedAt),
+        ("is_active", DataType::Boolean, SemanticType::BooleanFlag),
+        ("description", DataType::Text, SemanticType::Description),
+        ("status", DataType::VarChar, SemanticType::Status),
+        ("quantity", DataType::Integer, SemanticType::Quantity),
+        ("url", DataType::VarChar, SemanticType::Url),
+    ];
+
+    for i in 0..num_columns {
+        let (name, dt, st) = &types[i % types.len()];
+        let col_name = if i < types.len() {
+            name.to_string()
+        } else {
+            format!("{}_{}", name, i / types.len())
+        };
+        let col = Column::new(col_name.clone(), dt.clone(), dt.to_string());
+        table.columns.insert(col_name.clone(), col);
+        classifications.insert(("items".to_string(), col_name), *st);
+    }
+
+    schema.tables.insert("items".to_string(), table);
+    (schema, classifications)
+}
+
+/// Build a schema with parent/child FK relationship.
+fn fk_schema() -> (DatabaseSchema, HashMap<(String, String), SemanticType>) {
+    let mut schema = DatabaseSchema::new(DatabaseType::PostgreSQL, "bench".to_string());
+    let mut classifications = HashMap::new();
+
+    // Parent: users
+    let mut users = Table::new("users".to_string());
+    let mut id_col = Column::new("id".to_string(), DataType::Serial, "serial".to_string());
+    id_col.is_auto_increment = true;
+    users.columns.insert("id".to_string(), id_col);
+    users.primary_key = Some(PrimaryKey {
+        columns: vec!["id".to_string()],
+        name: None,
+    });
+    let email_col = Column::new("email".to_string(), DataType::VarChar, "varchar".to_string());
+    users.columns.insert("email".to_string(), email_col);
+    classifications.insert(("users".to_string(), "id".to_string()), SemanticType::AutoIncrement);
+    classifications.insert(("users".to_string(), "email".to_string()), SemanticType::Email);
+
+    // Child: orders
+    let mut orders = Table::new("orders".to_string());
+    let mut order_id = Column::new("id".to_string(), DataType::Serial, "serial".to_string());
+    order_id.is_auto_increment = true;
+    orders.columns.insert("id".to_string(), order_id);
+    orders.primary_key = Some(PrimaryKey {
+        columns: vec!["id".to_string()],
+        name: None,
+    });
+    let user_id_col = Column::new("user_id".to_string(), DataType::Integer, "integer".to_string());
+    orders.columns.insert("user_id".to_string(), user_id_col);
+    orders.foreign_keys.push(ForeignKey {
+        name: Some("orders_user_id_fkey".to_string()),
+        source_columns: vec!["user_id".to_string()],
+        referenced_table: "users".to_string(),
+        referenced_columns: vec!["id".to_string()],
+        on_delete: ForeignKeyAction::Cascade,
+        on_update: ForeignKeyAction::NoAction,
+        is_deferrable: false,
+    });
+    let amount_col = Column::new("amount".to_string(), DataType::Numeric, "numeric".to_string());
+    orders.columns.insert("amount".to_string(), amount_col);
+
+    classifications.insert(("orders".to_string(), "id".to_string()), SemanticType::AutoIncrement);
+    classifications.insert(("orders".to_string(), "user_id".to_string()), SemanticType::ExternalId);
+    classifications.insert(("orders".to_string(), "amount".to_string()), SemanticType::Price);
+
+    schema.tables.insert("users".to_string(), users);
+    schema.tables.insert("orders".to_string(), orders);
+    (schema, classifications)
+}
+
+fn bench_single_table_generation(c: &mut Criterion) {
+    let mut group = c.benchmark_group("engine/single_table");
+
+    let (schema, classifications) = single_table_schema(10);
+    let insertion_order = vec!["items".to_string()];
+    let empty_overrides = BTreeMap::new();
+    let empty_col_overrides = BTreeMap::new();
+
+    for row_count in [100, 1000, 10_000] {
+        group.throughput(Throughput::Elements(row_count as u64));
+        group.bench_with_input(
+            BenchmarkId::new("rows", row_count),
+            &row_count,
+            |b, &rows| {
+                let plan = GenerationPlan::build(
+                    &schema,
+                    &classifications,
+                    &insertion_order,
+                    Vec::new(),
+                    rows,
+                    &empty_overrides,
+                    42,
+                    None,
+                    &empty_col_overrides,
+                    None,
+                );
+                b.iter(|| {
+                    execute_plan(&plan, &schema, None).unwrap();
+                });
+            },
+        );
+    }
+    group.finish();
+}
+
+fn bench_column_count(c: &mut Criterion) {
+    let mut group = c.benchmark_group("engine/column_count");
+    let row_count = 1000;
+    let empty_overrides = BTreeMap::new();
+    let empty_col_overrides = BTreeMap::new();
+
+    for col_count in [5, 10, 20] {
+        let (schema, classifications) = single_table_schema(col_count);
+        let insertion_order = vec!["items".to_string()];
+
+        group.throughput(Throughput::Elements(row_count as u64));
+        group.bench_with_input(
+            BenchmarkId::new("cols", col_count),
+            &col_count,
+            |b, _| {
+                let plan = GenerationPlan::build(
+                    &schema,
+                    &classifications,
+                    &insertion_order,
+                    Vec::new(),
+                    row_count,
+                    &empty_overrides,
+                    42,
+                    None,
+                    &empty_col_overrides,
+                    None,
+                );
+                b.iter(|| {
+                    execute_plan(&plan, &schema, None).unwrap();
+                });
+            },
+        );
+    }
+    group.finish();
+}
+
+fn bench_fk_generation(c: &mut Criterion) {
+    let mut group = c.benchmark_group("engine/foreign_keys");
+    let (schema, classifications) = fk_schema();
+    let insertion_order = vec!["users".to_string(), "orders".to_string()];
+    let empty_col_overrides = BTreeMap::new();
+
+    // Parent:child ratios — 100 users + varying order counts
+    for order_count in [500, 2000, 10_000] {
+        let mut overrides = BTreeMap::new();
+        overrides.insert("users".to_string(), 100);
+        overrides.insert("orders".to_string(), order_count);
+        let total = 100 + order_count;
+
+        group.throughput(Throughput::Elements(total as u64));
+        group.bench_with_input(
+            BenchmarkId::new("orders", order_count),
+            &order_count,
+            |b, _| {
+                let plan = GenerationPlan::build(
+                    &schema,
+                    &classifications,
+                    &insertion_order,
+                    Vec::new(),
+                    100,
+                    &overrides,
+                    42,
+                    None,
+                    &empty_col_overrides,
+                    None,
+                );
+                b.iter(|| {
+                    execute_plan(&plan, &schema, None).unwrap();
+                });
+            },
+        );
+    }
+    group.finish();
+}
+
+fn bench_value_list_strategy(c: &mut Criterion) {
+    let mut group = c.benchmark_group("engine/value_list");
+
+    let mut schema = DatabaseSchema::new(DatabaseType::PostgreSQL, "bench".to_string());
+    let mut table = Table::new("items".to_string());
+    let col = Column::new("color".to_string(), DataType::VarChar, "varchar".to_string());
+    table.columns.insert("color".to_string(), col);
+    schema.tables.insert("items".to_string(), table);
+
+    let plan = GenerationPlan {
+        table_plans: vec![TableGenerationPlan {
+            table_name: "items".to_string(),
+            row_count: 10_000,
+            column_plans: vec![ColumnGenerationPlan {
+                column_name: "color".to_string(),
+                semantic_type: SemanticType::Unknown,
+                strategy: GenerationStrategy::ValueList {
+                    values: vec![
+                        "red".into(), "blue".into(), "green".into(),
+                        "black".into(), "white".into(),
+                    ],
+                    weights: Some(vec![0.25, 0.20, 0.20, 0.20, 0.15]),
+                },
+                nullable: false,
+                null_probability: 0.0,
+                check_constraints: Vec::new(),
+            }],
+            correlation_groups: Vec::new(),
+        }],
+        deferred_edges: Vec::new(),
+        seed: 42,
+        default_row_count: 10_000,
+        base_time: chrono::Utc::now().naive_utc(),
+        sequence_offset: 0,
+    };
+
+    group.throughput(Throughput::Elements(10_000));
+    group.bench_function("weighted_10k", |b| {
+        b.iter(|| {
+            execute_plan(&plan, &schema, None).unwrap();
+        });
+    });
+    group.finish();
+}
+
+fn bench_distribution_strategy(c: &mut Criterion) {
+    let mut group = c.benchmark_group("engine/distribution");
+
+    let mut schema = DatabaseSchema::new(DatabaseType::PostgreSQL, "bench".to_string());
+    let mut table = Table::new("items".to_string());
+    let col = Column::new("price".to_string(), DataType::Numeric, "numeric".to_string());
+    table.columns.insert("price".to_string(), col);
+    schema.tables.insert("items".to_string(), table);
+
+    let plan = GenerationPlan {
+        table_plans: vec![TableGenerationPlan {
+            table_name: "items".to_string(),
+            row_count: 10_000,
+            column_plans: vec![ColumnGenerationPlan {
+                column_name: "price".to_string(),
+                semantic_type: SemanticType::Unknown,
+                strategy: GenerationStrategy::Distribution {
+                    distribution: ColumnDistribution::Numeric {
+                        min: 0.0,
+                        max: 1000.0,
+                        mean: 49.99,
+                        stddev: 25.0,
+                    },
+                },
+                nullable: false,
+                null_probability: 0.0,
+                check_constraints: Vec::new(),
+            }],
+            correlation_groups: Vec::new(),
+        }],
+        deferred_edges: Vec::new(),
+        seed: 42,
+        default_row_count: 10_000,
+        base_time: chrono::Utc::now().naive_utc(),
+        sequence_offset: 0,
+    };
+
+    group.throughput(Throughput::Elements(10_000));
+    group.bench_function("numeric_normal_10k", |b| {
+        b.iter(|| {
+            execute_plan(&plan, &schema, None).unwrap();
+        });
+    });
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    bench_single_table_generation,
+    bench_column_count,
+    bench_fk_generation,
+    bench_value_list_strategy,
+    bench_distribution_strategy,
+);
+criterion_main!(benches);
diff --git a/crates/seedkit-core/benches/output.rs b/crates/seedkit-core/benches/output.rs
new file mode 100644
index 0000000..5ba3ac4
--- /dev/null
+++ b/crates/seedkit-core/benches/output.rs
@@ -0,0 +1,138 @@
+//! Benchmarks for output formatters — SQL, JSON, and CSV serialization.
+//!
+//! Measures throughput of formatting pre-generated data into various output
+//! formats. Uses a black-hole writer to isolate formatter cost from I/O.
+
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
+use indexmap::IndexMap;
+use std::borrow::Cow;
+use std::io::Write;
+
+use seedkit_core::generate::engine::GeneratedData;
+use seedkit_core::generate::value::Value;
+use seedkit_core::output::{csv, json, sql};
+use seedkit_core::schema::types::*;
+
+/// A writer that discards all output — isolates formatter cost from I/O.
+struct NullWriter;
+
+impl Write for NullWriter {
+    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
+        Ok(buf.len())
+    }
+    fn flush(&mut self) -> std::io::Result<()> {
+        Ok(())
+    }
+}
+
+/// Build pre-generated data with N rows of realistic column types.
+fn make_generated_data(row_count: usize) -> GeneratedData {
+    let mut rows = Vec::with_capacity(row_count);
+    for i in 0..row_count {
+        let mut row = IndexMap::new();
+        row.insert(
+            "name".to_string(),
+            Value::String(Cow::Owned(format!("User {}", i))),
+        );
+        row.insert(
+            "email".to_string(),
+            Value::String(Cow::Owned(format!("user{}@example.com", i))),
+        );
+        row.insert("age".to_string(), Value::Int(20 + (i as i64 % 60)));
+        row.insert("price".to_string(), Value::Float(9.99 + i as f64 * 0.01));
+        row.insert("active".to_string(), Value::Bool(i % 3 != 0));
+        row.insert(
+            "created_at".to_string(),
+            Value::Timestamp(
+                chrono::NaiveDateTime::new(
+                    chrono::NaiveDate::from_ymd_opt(2025, 1, 1).unwrap(),
+                    chrono::NaiveTime::from_hms_opt(12, 0, 0).unwrap(),
+                ) + chrono::Duration::seconds(i as i64),
+            ),
+        );
+        if i % 10 == 0 {
+            row.insert("bio".to_string(), Value::Null);
+        } else {
+            row.insert(
+                "bio".to_string(),
+                Value::String(Cow::Owned(format!(
+                    "A longer description field that contains commas, \"quotes\", and other special characters for row {}.",
+                    i
+                ))),
+            );
+        }
+        rows.push(row);
+    }
+
+    let mut tables = IndexMap::new();
+    tables.insert("users".to_string(), rows);
+
+    GeneratedData {
+        tables,
+        deferred_updates: Vec::new(),
+    }
+}
+
+fn bench_sql_output(c: &mut Criterion) {
+    let mut group = c.benchmark_group("output/sql");
+    let schema = DatabaseSchema::new(DatabaseType::PostgreSQL, "bench".to_string());
+
+    for row_count in [100, 1000, 10_000] {
+        let data = make_generated_data(row_count);
+        group.throughput(Throughput::Elements(row_count as u64));
+        group.bench_with_input(
+            BenchmarkId::new("rows", row_count),
+            &data,
+            |b, data| {
+                b.iter(|| {
+                    let mut w = NullWriter;
+                    sql::write_sql(&mut w, data, &schema).unwrap();
+                });
+            },
+        );
+    }
+    group.finish();
+}
+
+fn bench_json_output(c: &mut Criterion) {
+    let mut group = c.benchmark_group("output/json");
+
+    for row_count in [100, 1000, 10_000] {
+        let data = make_generated_data(row_count);
+        group.throughput(Throughput::Elements(row_count as u64));
+        group.bench_with_input(
+            BenchmarkId::new("rows", row_count),
+            &data,
+            |b, data| {
+                b.iter(|| {
+                    let mut w = NullWriter;
+                    json::write_json(&mut w, data).unwrap();
+                });
+            },
+        );
+    }
+    group.finish();
+}
+
+fn bench_csv_output(c: &mut Criterion) {
+    let mut group = c.benchmark_group("output/csv");
+
+    for row_count in [100, 1000, 10_000] {
+        let data = make_generated_data(row_count);
+        group.throughput(Throughput::Elements(row_count as u64));
+        group.bench_with_input(
+            BenchmarkId::new("rows", row_count),
+            &data,
+            |b, data| {
+                b.iter(|| {
+                    let mut w = NullWriter;
+                    csv::write_csv(&mut w, data).unwrap();
+                });
+            },
+        );
+    }
+    group.finish();
+}
+
+criterion_group!(benches, bench_sql_output, bench_json_output, bench_csv_output);
+criterion_main!(benches);