diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 83ee00e..4411561 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -69,4 +69,4 @@ jobs: env: TEST_POSTGRES_URL: postgres://seedkit:seedkit@localhost:5432/seedkit_test TEST_MYSQL_URL: mysql://seedkit:seedkit@localhost:3306/seedkit_test - run: cargo test --test '*' + run: cargo test --test '*' -- --test-threads=1 diff --git a/Cargo.lock b/Cargo.lock index d1867cc..415e340 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,6 +26,12 @@ dependencies = [ "libc", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anstream" version = "0.6.21" @@ -166,6 +172,12 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" version = "1.2.56" @@ -196,6 +208,33 @@ dependencies = [ "windows-link", ] +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "clap" version = "4.5.60" @@ -331,6 +370,61 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-queue" version = "0.3.12" @@ -369,6 +463,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "crypto-common" version = "0.1.7" @@ -710,6 +810,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + [[package]] name = "hashbrown" version = "0.15.5" @@ -742,6 +853,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "hex" version = "0.4.3" @@ -1066,12 +1183,32 @@ dependencies = [ "serde", ] +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.61.2", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.17" @@ -1302,6 +1439,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + [[package]] name = "openssl" version = "0.10.75" @@ -1439,6 +1582,34 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "portable-atomic" version = "1.13.1" @@ -1556,6 +1727,26 @@ dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -1736,6 +1927,15 @@ version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.28" @@ -1776,7 +1976,7 @@ dependencies = [ [[package]] name = "seedkit-cli" -version = "1.2.0" +version = "1.2.1" dependencies = [ "anyhow", "clap", @@ -1794,12 +1994,13 @@ dependencies = [ [[package]] name = "seedkit-core" -version = "1.2.0" +version = "1.2.1" dependencies = [ "anyhow", "base64", "chrono", "comfy-table", + "criterion", "dotenvy", "fake", "indexmap", @@ -1823,7 +2024,7 @@ dependencies = [ [[package]] name = "seedkit-testutil" -version = "1.2.0" +version = "1.2.1" dependencies = [ "indexmap", "seedkit-core", @@ -2339,6 +2540,16 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tinyvec" version = "1.10.0" @@ -2694,6 +2905,16 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -2890,6 +3111,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/Cargo.toml b/Cargo.toml index fd9aefb..6cabcce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ members = ["crates/seedkit-core", "crates/seedkit-cli", "crates/seedkit-testutil resolver = "2" [workspace.package] -version = "1.2.0" +version = "1.2.1" edition = "2021" authors = ["SeedKit Contributors"] license = "MIT" @@ -69,6 +69,7 @@ dotenvy = "0.15" # Testing tempfile = "3" +criterion = { version = "0.5", features = ["html_reports"] } # Logging tracing = "0.1" diff --git a/README.md b/README.md index f26e1a4..d7d9c8d 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@
-
+
@@ -81,7 +81,7 @@ cargo install --path crates/seedkit-cli
```bash
seedkit --version
-# seedkit 1.2.0
+# seedkit 1.2.1
```
### Zero-Config Database Detection
@@ -271,6 +271,21 @@ git checkout --ours seedkit.lock
seedkit generate --force
```
+## Performance
+
+Benchmarked with [criterion](https://github.com/bheisler/criterion.rs) on Apple Silicon (M-series). Run `cargo bench` to reproduce.
+
+| Operation | Throughput |
+|---|---|
+| Generation (10 cols, semantic providers) | ~480K rows/sec |
+| Generation (FK references only) | ~3.7M rows/sec |
+| Generation (weighted value lists) | ~6.9M rows/sec |
+| Generation (distribution sampling) | ~8.6M rows/sec |
+| Classification (100 tables x 20 cols) | ~2.1M cols/sec |
+| SQL output formatting | ~1.5M rows/sec |
+| JSON output formatting | ~1.1M rows/sec |
+| CSV output formatting | ~1.5M rows/sec |
+
## Comparison
| Feature | SeedKit | Faker/factory_bot | Snaplet |
@@ -314,7 +329,7 @@ cargo test
docker compose -f docker/docker-compose.test.yml up -d
TEST_POSTGRES_URL=postgres://seedkit:seedkit@localhost:5432/seedkit_test \
TEST_MYSQL_URL=mysql://seedkit:seedkit@localhost:3307/seedkit_test \
- cargo test --test '*'
+ cargo test --test '*' -- --test-threads=1
```
## License
diff --git a/crates/seedkit-core/Cargo.toml b/crates/seedkit-core/Cargo.toml
index f017e3e..6a7460c 100644
--- a/crates/seedkit-core/Cargo.toml
+++ b/crates/seedkit-core/Cargo.toml
@@ -33,3 +33,16 @@ url.workspace = true
[dev-dependencies]
tokio = { workspace = true, features = ["test-util"] }
tempfile.workspace = true
+criterion.workspace = true
+
+[[bench]]
+name = "engine"
+harness = false
+
+[[bench]]
+name = "classify"
+harness = false
+
+[[bench]]
+name = "output"
+harness = false
diff --git a/crates/seedkit-core/benches/classify.rs b/crates/seedkit-core/benches/classify.rs
new file mode 100644
index 0000000..ad4997c
--- /dev/null
+++ b/crates/seedkit-core/benches/classify.rs
@@ -0,0 +1,128 @@
+//! Benchmarks for column classification — regex matching and schema-wide classification.
+//!
+//! Classification runs once per introspection, but regex compilation and matching
+//! are worth measuring to catch regressions from rule changes.
+
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
+
+use seedkit_core::classify::rules::{classify_column, classify_schema};
+use seedkit_core::schema::types::*;
+
+/// Column name/type pairs representing a realistic mix of columns.
+fn realistic_columns() -> Vec<(&'static str, DataType)> {
+ vec![
+ ("id", DataType::Serial),
+ ("email", DataType::VarChar),
+ ("first_name", DataType::VarChar),
+ ("last_name", DataType::VarChar),
+ ("password_hash", DataType::VarChar),
+ ("created_at", DataType::TimestampTz),
+ ("updated_at", DataType::TimestampTz),
+ ("is_active", DataType::Boolean),
+ ("age", DataType::Integer),
+ ("price", DataType::Numeric),
+ ("description", DataType::Text),
+ ("avatar_url", DataType::VarChar),
+ ("phone", DataType::VarChar),
+ ("city", DataType::VarChar),
+ ("zip_code", DataType::VarChar),
+ ("country", DataType::VarChar),
+ ("status", DataType::VarChar),
+ ("metadata", DataType::Jsonb),
+ ("slug", DataType::VarChar),
+ ("quantity", DataType::Integer),
+ ]
+}
+
+fn bench_classify_single_column(c: &mut Criterion) {
+ let mut group = c.benchmark_group("classify/single_column");
+
+ // Benchmark individual column classification across different match depths.
+ // "email" is an early match, "status" is mid-list, "zzz_unknown" misses all rules.
+ let cases = vec![
+ ("early_match", "email", DataType::VarChar),
+ ("mid_match", "status", DataType::VarChar),
+ ("late_match", "tenant_id", DataType::Integer),
+ ("no_match", "zzz_unknown", DataType::VarChar),
+ ("camel_case", "firstName", DataType::VarChar),
+ ("type_constrained", "age", DataType::Integer),
+ ];
+
+ for (label, col_name, data_type) in &cases {
+ group.bench_with_input(
+ BenchmarkId::new("type", label),
+ &(col_name, data_type),
+ |b, &(name, dt)| {
+ b.iter(|| {
+ classify_column(name, dt, "users", false, false, None);
+ });
+ },
+ );
+ }
+ group.finish();
+}
+
+fn bench_classify_schema(c: &mut Criterion) {
+ let mut group = c.benchmark_group("classify/schema");
+ let columns = realistic_columns();
+
+ for table_count in [10, 50, 100] {
+ let schema = build_schema(table_count, &columns);
+ let total_columns = table_count * columns.len();
+
+ group.throughput(Throughput::Elements(total_columns as u64));
+ group.bench_with_input(
+ BenchmarkId::new("tables", table_count),
+ &schema,
+ |b, schema| {
+ b.iter(|| {
+ classify_schema(schema);
+ });
+ },
+ );
+ }
+ group.finish();
+}
+
+/// Build a schema with N tables, each containing the realistic column set.
+fn build_schema(table_count: usize, columns: &[(&str, DataType)]) -> DatabaseSchema {
+ let mut schema = DatabaseSchema::new(DatabaseType::PostgreSQL, "bench".to_string());
+
+ let table_prefixes = [
+ "users", "orders", "products", "reviews", "categories",
+ "tags", "comments", "posts", "sessions", "notifications",
+ "invoices", "payments", "addresses", "companies", "departments",
+ "employees", "projects", "tasks", "events", "logs",
+ ];
+
+ for i in 0..table_count {
+ let table_name = if i < table_prefixes.len() {
+ table_prefixes[i].to_string()
+ } else {
+ format!("table_{}", i)
+ };
+
+ let mut table = Table::new(table_name.clone());
+ for (col_name, data_type) in columns {
+ let mut col = Column::new(
+ col_name.to_string(),
+ data_type.clone(),
+ data_type.to_string(),
+ );
+ if *col_name == "id" {
+ col.is_auto_increment = true;
+ }
+ table.columns.insert(col_name.to_string(), col);
+ }
+ table.primary_key = Some(PrimaryKey {
+ columns: vec!["id".to_string()],
+ name: None,
+ });
+ schema.tables.insert(table_name, table);
+ }
+
+ schema
+}
+
+criterion_group!(benches, bench_classify_single_column, bench_classify_schema);
+criterion_main!(benches);
diff --git a/crates/seedkit-core/benches/engine.rs b/crates/seedkit-core/benches/engine.rs
new file mode 100644
index 0000000..5dd1d82
--- /dev/null
+++ b/crates/seedkit-core/benches/engine.rs
@@ -0,0 +1,310 @@
+//! Benchmarks for the generation engine — the core hot path.
+//!
+//! Measures rows-per-second throughput for `execute_plan` across
+//! different table sizes, column counts, and strategy mixes.
+
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
+use std::collections::{BTreeMap, HashMap};
+
+use seedkit_core::classify::semantic::SemanticType;
+use seedkit_core::generate::engine::execute_plan;
+use seedkit_core::generate::plan::*;
+use seedkit_core::sample::stats::ColumnDistribution;
+use seedkit_core::schema::types::*;
+
+/// Build a schema with one table containing N semantic columns (no FKs).
+fn single_table_schema(num_columns: usize) -> (DatabaseSchema, HashMap<(String, String), SemanticType>) {
+ let mut schema = DatabaseSchema::new(DatabaseType::PostgreSQL, "bench".to_string());
+ let mut table = Table::new("items".to_string());
+ let mut classifications = HashMap::new();
+
+ let types = [
+ ("email", DataType::VarChar, SemanticType::Email),
+ ("first_name", DataType::VarChar, SemanticType::FirstName),
+ ("last_name", DataType::VarChar, SemanticType::LastName),
+ ("price", DataType::Numeric, SemanticType::Price),
+ ("created_at", DataType::TimestampTz, SemanticType::CreatedAt),
+ ("is_active", DataType::Boolean, SemanticType::BooleanFlag),
+ ("description", DataType::Text, SemanticType::Description),
+ ("status", DataType::VarChar, SemanticType::Status),
+ ("quantity", DataType::Integer, SemanticType::Quantity),
+ ("url", DataType::VarChar, SemanticType::Url),
+ ];
+
+ for i in 0..num_columns {
+ let (name, dt, st) = &types[i % types.len()];
+ let col_name = if i < types.len() {
+ name.to_string()
+ } else {
+ format!("{}_{}", name, i / types.len())
+ };
+ let col = Column::new(col_name.clone(), dt.clone(), dt.to_string());
+ table.columns.insert(col_name.clone(), col);
+ classifications.insert(("items".to_string(), col_name), *st);
+ }
+
+ schema.tables.insert("items".to_string(), table);
+ (schema, classifications)
+}
+
+/// Build a schema with parent/child FK relationship.
+fn fk_schema() -> (DatabaseSchema, HashMap<(String, String), SemanticType>) {
+ let mut schema = DatabaseSchema::new(DatabaseType::PostgreSQL, "bench".to_string());
+ let mut classifications = HashMap::new();
+
+ // Parent: users
+ let mut users = Table::new("users".to_string());
+ let mut id_col = Column::new("id".to_string(), DataType::Serial, "serial".to_string());
+ id_col.is_auto_increment = true;
+ users.columns.insert("id".to_string(), id_col);
+ users.primary_key = Some(PrimaryKey {
+ columns: vec!["id".to_string()],
+ name: None,
+ });
+ let email_col = Column::new("email".to_string(), DataType::VarChar, "varchar".to_string());
+ users.columns.insert("email".to_string(), email_col);
+ classifications.insert(("users".to_string(), "id".to_string()), SemanticType::AutoIncrement);
+ classifications.insert(("users".to_string(), "email".to_string()), SemanticType::Email);
+
+ // Child: orders
+ let mut orders = Table::new("orders".to_string());
+ let mut order_id = Column::new("id".to_string(), DataType::Serial, "serial".to_string());
+ order_id.is_auto_increment = true;
+ orders.columns.insert("id".to_string(), order_id);
+ orders.primary_key = Some(PrimaryKey {
+ columns: vec!["id".to_string()],
+ name: None,
+ });
+ let user_id_col = Column::new("user_id".to_string(), DataType::Integer, "integer".to_string());
+ orders.columns.insert("user_id".to_string(), user_id_col);
+ orders.foreign_keys.push(ForeignKey {
+ name: Some("orders_user_id_fkey".to_string()),
+ source_columns: vec!["user_id".to_string()],
+ referenced_table: "users".to_string(),
+ referenced_columns: vec!["id".to_string()],
+ on_delete: ForeignKeyAction::Cascade,
+ on_update: ForeignKeyAction::NoAction,
+ is_deferrable: false,
+ });
+ let amount_col = Column::new("amount".to_string(), DataType::Numeric, "numeric".to_string());
+ orders.columns.insert("amount".to_string(), amount_col);
+
+ classifications.insert(("orders".to_string(), "id".to_string()), SemanticType::AutoIncrement);
+ classifications.insert(("orders".to_string(), "user_id".to_string()), SemanticType::ExternalId);
+ classifications.insert(("orders".to_string(), "amount".to_string()), SemanticType::Price);
+
+ schema.tables.insert("users".to_string(), users);
+ schema.tables.insert("orders".to_string(), orders);
+ (schema, classifications)
+}
+
+fn bench_single_table_generation(c: &mut Criterion) {
+ let mut group = c.benchmark_group("engine/single_table");
+
+ let (schema, classifications) = single_table_schema(10);
+ let insertion_order = vec!["items".to_string()];
+ let empty_overrides = BTreeMap::new();
+ let empty_col_overrides = BTreeMap::new();
+
+ for row_count in [100, 1000, 10_000] {
+ group.throughput(Throughput::Elements(row_count as u64));
+ group.bench_with_input(
+ BenchmarkId::new("rows", row_count),
+ &row_count,
+ |b, &rows| {
+ let plan = GenerationPlan::build(
+ &schema,
+ &classifications,
+ &insertion_order,
+ Vec::new(),
+ rows,
+ &empty_overrides,
+ 42,
+ None,
+ &empty_col_overrides,
+ None,
+ );
+ b.iter(|| {
+ execute_plan(&plan, &schema, None).unwrap();
+ });
+ },
+ );
+ }
+ group.finish();
+}
+
+fn bench_column_count(c: &mut Criterion) {
+ let mut group = c.benchmark_group("engine/column_count");
+ let row_count = 1000;
+ let empty_overrides = BTreeMap::new();
+ let empty_col_overrides = BTreeMap::new();
+
+ for col_count in [5, 10, 20] {
+ let (schema, classifications) = single_table_schema(col_count);
+ let insertion_order = vec!["items".to_string()];
+
+ group.throughput(Throughput::Elements(row_count as u64));
+ group.bench_with_input(
+ BenchmarkId::new("cols", col_count),
+ &col_count,
+ |b, _| {
+ let plan = GenerationPlan::build(
+ &schema,
+ &classifications,
+ &insertion_order,
+ Vec::new(),
+ row_count,
+ &empty_overrides,
+ 42,
+ None,
+ &empty_col_overrides,
+ None,
+ );
+ b.iter(|| {
+ execute_plan(&plan, &schema, None).unwrap();
+ });
+ },
+ );
+ }
+ group.finish();
+}
+
+fn bench_fk_generation(c: &mut Criterion) {
+ let mut group = c.benchmark_group("engine/foreign_keys");
+ let (schema, classifications) = fk_schema();
+ let insertion_order = vec!["users".to_string(), "orders".to_string()];
+ let empty_col_overrides = BTreeMap::new();
+
+ // Parent:child ratios — 100 users + varying order counts
+ for order_count in [500, 2000, 10_000] {
+ let mut overrides = BTreeMap::new();
+ overrides.insert("users".to_string(), 100);
+ overrides.insert("orders".to_string(), order_count);
+ let total = 100 + order_count;
+
+ group.throughput(Throughput::Elements(total as u64));
+ group.bench_with_input(
+ BenchmarkId::new("orders", order_count),
+ &order_count,
+ |b, _| {
+ let plan = GenerationPlan::build(
+ &schema,
+ &classifications,
+ &insertion_order,
+ Vec::new(),
+ 100,
+ &overrides,
+ 42,
+ None,
+ &empty_col_overrides,
+ None,
+ );
+ b.iter(|| {
+ execute_plan(&plan, &schema, None).unwrap();
+ });
+ },
+ );
+ }
+ group.finish();
+}
+
+fn bench_value_list_strategy(c: &mut Criterion) {
+ let mut group = c.benchmark_group("engine/value_list");
+
+ let mut schema = DatabaseSchema::new(DatabaseType::PostgreSQL, "bench".to_string());
+ let mut table = Table::new("items".to_string());
+ let col = Column::new("color".to_string(), DataType::VarChar, "varchar".to_string());
+ table.columns.insert("color".to_string(), col);
+ schema.tables.insert("items".to_string(), table);
+
+ let plan = GenerationPlan {
+ table_plans: vec![TableGenerationPlan {
+ table_name: "items".to_string(),
+ row_count: 10_000,
+ column_plans: vec![ColumnGenerationPlan {
+ column_name: "color".to_string(),
+ semantic_type: SemanticType::Unknown,
+ strategy: GenerationStrategy::ValueList {
+ values: vec![
+ "red".into(), "blue".into(), "green".into(),
+ "black".into(), "white".into(),
+ ],
+ weights: Some(vec![0.25, 0.20, 0.20, 0.20, 0.15]),
+ },
+ nullable: false,
+ null_probability: 0.0,
+ check_constraints: Vec::new(),
+ }],
+ correlation_groups: Vec::new(),
+ }],
+ deferred_edges: Vec::new(),
+ seed: 42,
+ default_row_count: 10_000,
+ base_time: chrono::Utc::now().naive_utc(),
+ sequence_offset: 0,
+ };
+
+ group.throughput(Throughput::Elements(10_000));
+ group.bench_function("weighted_10k", |b| {
+ b.iter(|| {
+ execute_plan(&plan, &schema, None).unwrap();
+ });
+ });
+ group.finish();
+}
+
+fn bench_distribution_strategy(c: &mut Criterion) {
+ let mut group = c.benchmark_group("engine/distribution");
+
+ let mut schema = DatabaseSchema::new(DatabaseType::PostgreSQL, "bench".to_string());
+ let mut table = Table::new("items".to_string());
+ let col = Column::new("price".to_string(), DataType::Numeric, "numeric".to_string());
+ table.columns.insert("price".to_string(), col);
+ schema.tables.insert("items".to_string(), table);
+
+ let plan = GenerationPlan {
+ table_plans: vec![TableGenerationPlan {
+ table_name: "items".to_string(),
+ row_count: 10_000,
+ column_plans: vec![ColumnGenerationPlan {
+ column_name: "price".to_string(),
+ semantic_type: SemanticType::Unknown,
+ strategy: GenerationStrategy::Distribution {
+ distribution: ColumnDistribution::Numeric {
+ min: 0.0,
+ max: 1000.0,
+ mean: 49.99,
+ stddev: 25.0,
+ },
+ },
+ nullable: false,
+ null_probability: 0.0,
+ check_constraints: Vec::new(),
+ }],
+ correlation_groups: Vec::new(),
+ }],
+ deferred_edges: Vec::new(),
+ seed: 42,
+ default_row_count: 10_000,
+ base_time: chrono::Utc::now().naive_utc(),
+ sequence_offset: 0,
+ };
+
+ group.throughput(Throughput::Elements(10_000));
+ group.bench_function("numeric_normal_10k", |b| {
+ b.iter(|| {
+ execute_plan(&plan, &schema, None).unwrap();
+ });
+ });
+ group.finish();
+}
+
+criterion_group!(
+ benches,
+ bench_single_table_generation,
+ bench_column_count,
+ bench_fk_generation,
+ bench_value_list_strategy,
+ bench_distribution_strategy,
+);
+criterion_main!(benches);
diff --git a/crates/seedkit-core/benches/output.rs b/crates/seedkit-core/benches/output.rs
new file mode 100644
index 0000000..5ba3ac4
--- /dev/null
+++ b/crates/seedkit-core/benches/output.rs
@@ -0,0 +1,138 @@
+//! Benchmarks for output formatters — SQL, JSON, and CSV serialization.
+//!
+//! Measures throughput of formatting pre-generated data into various output
+//! formats. Uses a black-hole writer to isolate formatter cost from I/O.
+
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
+use indexmap::IndexMap;
+use std::borrow::Cow;
+use std::io::Write;
+
+use seedkit_core::generate::engine::GeneratedData;
+use seedkit_core::generate::value::Value;
+use seedkit_core::output::{csv, json, sql};
+use seedkit_core::schema::types::*;
+
+/// A writer that discards all output — isolates formatter cost from I/O.
+struct NullWriter;
+
+impl Write for NullWriter {
+ fn write(&mut self, buf: &[u8]) -> std::io::Result