From 8dd490b47c6211dee387d5b355b58fb15c7788a0 Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Thu, 19 Mar 2026 17:21:48 -0500 Subject: [PATCH 1/3] fix(store): keep WAL journal mode during bulk write to prevent DB corruption on crash cbm_store_begin_bulk() was switching the SQLite journal mode from WAL to MEMORY for write throughput. If the process crashed mid-bulk-write the in-memory rollback journal was lost, leaving the database file in a partially-written, unrecoverable state. WAL mode is inherently crash-safe: uncommitted WAL entries are discarded on the next open. The performance benefit of bulk mode is preserved via synchronous=OFF and an enlarged cache_size, both of which are safe under WAL. Remove the PRAGMA journal_mode = MEMORY from cbm_store_begin_bulk and the matching PRAGMA journal_mode = WAL from cbm_store_end_bulk. Update the header comments to reflect the new invariant. Add tests/test_store_bulk.c with three tests: - bulk_pragma_wal_invariant: asserts journal_mode remains "wal" after cbm_store_begin_bulk via an independent read-only connection - bulk_pragma_end_wal_invariant: asserts journal_mode remains "wal" after cbm_store_end_bulk - bulk_crash_recovery: forks a child that enters bulk mode, opens an explicit transaction, writes data, then calls _exit() without committing; the parent verifies the database opens cleanly and baseline data survives Co-Authored-By: Claude Sonnet 4.6 --- Makefile.cbm | 3 +- src/store/store.c | 18 ++--- src/store/store.h | 5 +- tests/test_main.c | 2 + tests/test_store_bulk.c | 162 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 177 insertions(+), 13 deletions(-) create mode 100644 tests/test_store_bulk.c diff --git a/Makefile.cbm b/Makefile.cbm index 666a9455..5bf2193a 100644 --- a/Makefile.cbm +++ b/Makefile.cbm @@ -247,7 +247,8 @@ TEST_STORE_SRCS = \ tests/test_store_nodes.c \ tests/test_store_edges.c \ tests/test_store_search.c \ - tests/test_store_arch.c + tests/test_store_arch.c \ + tests/test_store_bulk.c TEST_CYPHER_SRCS = \ tests/test_cypher.c diff --git a/src/store/store.c b/src/store/store.c index 28e91ed8..51ebf7f7 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -427,11 +427,13 @@ int cbm_store_rollback(cbm_store_t *s) { /* ── Bulk write ─────────────────────────────────────────────────── */ int cbm_store_begin_bulk(cbm_store_t *s) { - int rc = exec_sql(s, "PRAGMA journal_mode = MEMORY;"); - if (rc != CBM_STORE_OK) { - return rc; - } - rc = exec_sql(s, "PRAGMA synchronous = OFF;"); + /* Stay in WAL mode throughout. Switching to MEMORY journal mode would + * make the database unrecoverable if the process crashes mid-write, + * because the in-memory rollback journal is lost on crash. + * WAL mode is crash-safe: uncommitted WAL entries are simply discarded + * on the next open. Performance is preserved via synchronous=OFF and a + * larger cache, which are safe with WAL. */ + int rc = exec_sql(s, "PRAGMA synchronous = OFF;"); if (rc != CBM_STORE_OK) { return rc; } @@ -439,11 +441,7 @@ int cbm_store_begin_bulk(cbm_store_t *s) { } int cbm_store_end_bulk(cbm_store_t *s) { - int rc = exec_sql(s, "PRAGMA journal_mode = WAL;"); - if (rc != CBM_STORE_OK) { - return rc; - } - rc = exec_sql(s, "PRAGMA synchronous = NORMAL;"); + int rc = exec_sql(s, "PRAGMA synchronous = NORMAL;"); if (rc != CBM_STORE_OK) { return rc; } diff --git a/src/store/store.h b/src/store/store.h index 9864ac5f..e3e8fc8e 100644 --- a/src/store/store.h +++ b/src/store/store.h @@ -212,10 +212,11 @@ int cbm_store_rollback(cbm_store_t *s); /* ── Bulk write optimization ────────────────────────────────────── */ -/* Switch to MEMORY journal for maximum write throughput. */ +/* Tune pragmas for bulk write throughput (synchronous=OFF, large cache). + * WAL journal mode is preserved throughout for crash safety. */ int cbm_store_begin_bulk(cbm_store_t *s); -/* Restore WAL journal mode after bulk writes. */ +/* Restore normal pragmas (synchronous=NORMAL, default cache) after bulk writes. */ int cbm_store_end_bulk(cbm_store_t *s); /* Drop user indexes for faster bulk inserts. */ diff --git a/tests/test_main.c b/tests/test_main.c index 47c5c542..0fec7f47 100644 --- a/tests/test_main.c +++ b/tests/test_main.c @@ -37,6 +37,7 @@ extern void suite_sqlite_writer(void); extern void suite_go_lsp(void); extern void suite_c_lsp(void); extern void suite_store_arch(void); +extern void suite_store_bulk(void); extern void suite_httplink(void); extern void suite_traces(void); extern void suite_configlink(void); @@ -69,6 +70,7 @@ int main(void) { RUN_SUITE(store_nodes); RUN_SUITE(store_edges); RUN_SUITE(store_search); + RUN_SUITE(store_bulk); /* Cypher (M6) */ RUN_SUITE(cypher); diff --git a/tests/test_store_bulk.c b/tests/test_store_bulk.c new file mode 100644 index 00000000..5b4ed270 --- /dev/null +++ b/tests/test_store_bulk.c @@ -0,0 +1,162 @@ +/* + * test_store_bulk.c — Crash-safety tests for bulk write mode. + * + * Verifies that cbm_store_begin_bulk / cbm_store_end_bulk never switch away + * from WAL journal mode. Switching to MEMORY journal mode during bulk writes + * makes the database unrecoverable on a crash because the in-memory rollback + * journal is lost. WAL mode is inherently crash-safe: uncommitted WAL entries + * are discarded on the next open. + * + * Tests: + * bulk_pragma_wal_invariant — journal_mode stays "wal" after begin_bulk + * bulk_pragma_end_wal_invariant — journal_mode stays "wal" after end_bulk + * bulk_crash_recovery — DB is readable after simulated crash mid-bulk + */ +#include "test_framework.h" +#include +#include +#include +#include +#include +#include +#include + +/* ── Helpers ──────────────────────────────────────────────────── */ + +/* Query journal_mode via a separate read-only connection so the result is + * independent of any state held inside the cbm_store_t under test. */ +static char *get_journal_mode(const char *db_path) { + sqlite3 *db; + if (sqlite3_open_v2(db_path, &db, SQLITE_OPEN_READONLY, NULL) != SQLITE_OK) + return NULL; + sqlite3_stmt *stmt; + char *mode = NULL; + if (sqlite3_prepare_v2(db, "PRAGMA journal_mode;", -1, &stmt, NULL) == SQLITE_OK) { + if (sqlite3_step(stmt) == SQLITE_ROW) + mode = strdup((const char *)sqlite3_column_text(stmt, 0)); + sqlite3_finalize(stmt); + } + sqlite3_close(db); + return mode; +} + +static void make_temp_path(char *buf, size_t n) { + snprintf(buf, n, "/tmp/cmm_bulk_test_%d.db", (int)getpid()); +} + +static void cleanup_db(const char *path) { + remove(path); + char aux[512]; + snprintf(aux, sizeof(aux), "%s-wal", path); + remove(aux); + snprintf(aux, sizeof(aux), "%s-shm", path); + remove(aux); +} + +/* ── Tests ──────────────────────────────────────────────────────── */ + +/* begin_bulk must NOT switch journal_mode away from WAL. */ +TEST(bulk_pragma_wal_invariant) { + char db_path[256]; + make_temp_path(db_path, sizeof(db_path)); + cleanup_db(db_path); + + cbm_store_t *s = cbm_store_open_path(db_path); + ASSERT_NOT_NULL(s); + + char *before = get_journal_mode(db_path); + ASSERT_NOT_NULL(before); + ASSERT_STR_EQ(before, "wal"); + free(before); + + int rc = cbm_store_begin_bulk(s); + ASSERT_EQ(rc, CBM_STORE_OK); + + char *after = get_journal_mode(db_path); + ASSERT_NOT_NULL(after); + ASSERT_STR_EQ(after, "wal"); /* FAILS with bug, PASSES with fix */ + free(after); + + cbm_store_end_bulk(s); + cbm_store_close(s); + cleanup_db(db_path); + PASS(); +} + +/* end_bulk must also leave journal_mode as WAL. */ +TEST(bulk_pragma_end_wal_invariant) { + char db_path[256]; + make_temp_path(db_path, sizeof(db_path)); + cleanup_db(db_path); + + cbm_store_t *s = cbm_store_open_path(db_path); + ASSERT_NOT_NULL(s); + + cbm_store_begin_bulk(s); + cbm_store_end_bulk(s); + + char *mode = get_journal_mode(db_path); + ASSERT_NOT_NULL(mode); + ASSERT_STR_EQ(mode, "wal"); + free(mode); + + cbm_store_close(s); + cleanup_db(db_path); + PASS(); +} + +/* Simulate a crash mid-bulk-write: fork a child that calls begin_bulk, opens + * an explicit transaction, and then calls _exit() without committing or calling + * end_bulk. The parent verifies the database is still openable and that + * committed baseline data is intact. */ +TEST(bulk_crash_recovery) { + char db_path[256]; + make_temp_path(db_path, sizeof(db_path)); + cleanup_db(db_path); + + /* Write committed baseline data. */ + cbm_store_t *s = cbm_store_open_path(db_path); + ASSERT_NOT_NULL(s); + int rc = cbm_store_upsert_project(s, "baseline", "/tmp/baseline"); + ASSERT_EQ(rc, CBM_STORE_OK); + cbm_store_close(s); + + /* Child: enter bulk mode, start a transaction, write, then crash. */ + pid_t pid = fork(); + if (pid == 0) { + cbm_store_t *cs = cbm_store_open_path(db_path); + if (!cs) + _exit(1); + cbm_store_begin_bulk(cs); + cbm_store_begin(cs); /* explicit open transaction */ + cbm_store_upsert_project(cs, "crashed", "/tmp/crashed"); + /* Crash: no COMMIT, no end_bulk, no close. */ + _exit(0); + } + ASSERT_GT(pid, 0); + int status; + waitpid(pid, &status, 0); + + /* Recovery: database must open cleanly. */ + cbm_store_t *recovered = cbm_store_open_path(db_path); + ASSERT_NOT_NULL(recovered); /* NULL would indicate corruption */ + + /* Baseline commit must survive. */ + cbm_project_t p = {0}; + rc = cbm_store_get_project(recovered, "baseline", &p); + ASSERT_EQ(rc, CBM_STORE_OK); + ASSERT_STR_EQ(p.name, "baseline"); + cbm_project_free_fields(&p); + + cbm_store_close(recovered); + cleanup_db(db_path); + PASS(); +} + +/* ── Suite ──────────────────────────────────────────────────────── */ + +SUITE(store_bulk) { + RUN_TEST(bulk_pragma_wal_invariant); + RUN_TEST(bulk_pragma_end_wal_invariant); + RUN_TEST(bulk_crash_recovery); +} From 675fdf341e6fa3500cb9a66d14cc655bc43d4875 Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Thu, 19 Mar 2026 18:07:17 -0500 Subject: [PATCH 2/3] fix(store): address QA round 1 findings - Wrap bulk_crash_recovery test and its POSIX includes with #ifndef _WIN32 guards to fix compilation failure on Windows (fork/waitpid unavailable) - Add negative assertion that the uncommitted "crashed" row is absent after crash recovery, completing the test's correctness verification Co-Authored-By: Claude Sonnet 4.6 --- tests/test_store_bulk.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/test_store_bulk.c b/tests/test_store_bulk.c index 5b4ed270..3cd06323 100644 --- a/tests/test_store_bulk.c +++ b/tests/test_store_bulk.c @@ -18,8 +18,10 @@ #include #include #include +#ifndef _WIN32 #include #include +#endif /* ── Helpers ──────────────────────────────────────────────────── */ @@ -108,7 +110,10 @@ TEST(bulk_pragma_end_wal_invariant) { /* Simulate a crash mid-bulk-write: fork a child that calls begin_bulk, opens * an explicit transaction, and then calls _exit() without committing or calling * end_bulk. The parent verifies the database is still openable and that - * committed baseline data is intact. */ + * committed baseline data is intact and uncommitted data is absent. + * + * This test uses fork()/waitpid() and is therefore POSIX-only. */ +#ifndef _WIN32 TEST(bulk_crash_recovery) { char db_path[256]; make_temp_path(db_path, sizeof(db_path)); @@ -148,15 +153,23 @@ TEST(bulk_crash_recovery) { ASSERT_STR_EQ(p.name, "baseline"); cbm_project_free_fields(&p); + /* Uncommitted "crashed" write must NOT appear after recovery. */ + cbm_project_t p2 = {0}; + int rc2 = cbm_store_get_project(recovered, "crashed", &p2); + ASSERT_NEQ(rc2, CBM_STORE_OK); /* row must be absent */ + cbm_store_close(recovered); cleanup_db(db_path); PASS(); } +#endif /* _WIN32 */ /* ── Suite ──────────────────────────────────────────────────────── */ SUITE(store_bulk) { RUN_TEST(bulk_pragma_wal_invariant); RUN_TEST(bulk_pragma_end_wal_invariant); +#ifndef _WIN32 RUN_TEST(bulk_crash_recovery); +#endif } From a0e809d425be89fe15878f6c6a9a5c7fd537de9b Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Thu, 19 Mar 2026 18:17:55 -0500 Subject: [PATCH 3/3] fix(store): address QA round 2 findings - Validate child exit status (WIFEXITED + WEXITSTATUS) after waitpid so the test fails fast if the child couldn't open the store, rather than passing vacuously due to the "crashed" row never being written Co-Authored-By: Claude Sonnet 4.6 --- tests/test_store_bulk.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_store_bulk.c b/tests/test_store_bulk.c index 3cd06323..c7b89c55 100644 --- a/tests/test_store_bulk.c +++ b/tests/test_store_bulk.c @@ -141,6 +141,8 @@ TEST(bulk_crash_recovery) { ASSERT_GT(pid, 0); int status; waitpid(pid, &status, 0); + /* Confirm child exited normally so the write actually occurred. */ + ASSERT(WIFEXITED(status) && WEXITSTATUS(status) == 0); /* Recovery: database must open cleanly. */ cbm_store_t *recovered = cbm_store_open_path(db_path);