From 172d3a658a9131b28fcae6c34fe2685ad0125577 Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Sun, 22 Mar 2026 15:14:19 -0500 Subject: [PATCH 1/5] fix(mcp): extend project-not-indexed guard to all query handlers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add static helper verify_project_indexed() before handle_get_graph_schema - Replace inline guard in handle_get_architecture with helper call - Apply guard to handle_search_graph, handle_get_graph_schema, handle_trace_call_path, handle_get_code_snippet, handle_query_graph - All five query handlers now return {"error":"project not indexed — run index_repository first"} instead of silently returning empty results Co-Authored-By: Claude Sonnet 4.6 --- src/mcp/mcp.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index bdfdae8b..5acd06ef 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -745,11 +745,36 @@ static char *handle_list_projects(cbm_mcp_server_t *srv, const char *args) { return result; } +/* verify_project_indexed — returns a heap-allocated error JSON string when the + * named project has not been indexed yet, or NULL when the project exists. + * resolve_store uses SQLITE_OPEN_CREATE so store is always non-NULL even for + * unindexed projects; this check catches that silent-empty case. + * Callers that receive a non-NULL return value must free(project) themselves + * before returning the error string. */ +static char *verify_project_indexed(cbm_store_t *store, const char *project) { + if (!project) { + return NULL; /* default project — always exists */ + } + cbm_project_t proj_check = {0}; + if (cbm_store_get_project(store, project, &proj_check) != CBM_STORE_OK) { + return cbm_mcp_text_result( + "{\"error\":\"project not indexed — run index_repository first\"}", true); + } + cbm_project_free_fields(&proj_check); + return NULL; +} + static char *handle_get_graph_schema(cbm_mcp_server_t *srv, const char *args) { char *project = cbm_mcp_get_string_arg(args, "project"); cbm_store_t *store = resolve_store(srv, project); REQUIRE_STORE(store, project); + char *not_indexed = verify_project_indexed(store, project); + if (not_indexed) { + free(project); + return not_indexed; + } + cbm_schema_info_t schema = {0}; cbm_store_get_schema(store, project, &schema); @@ -807,6 +832,13 @@ static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { char *project = cbm_mcp_get_string_arg(args, "project"); cbm_store_t *store = resolve_store(srv, project); REQUIRE_STORE(store, project); + + char *not_indexed = verify_project_indexed(store, project); + if (not_indexed) { + free(project); + return not_indexed; + } + char *label = cbm_mcp_get_string_arg(args, "label"); char *name_pattern = cbm_mcp_get_string_arg(args, "name_pattern"); char *file_pattern = cbm_mcp_get_string_arg(args, "file_pattern"); @@ -882,6 +914,13 @@ static char *handle_query_graph(cbm_mcp_server_t *srv, const char *args) { return cbm_mcp_text_result("{\"error\":\"no project loaded\"}", true); } + char *not_indexed = verify_project_indexed(store, project); + if (not_indexed) { + free(project); + free(query); + return not_indexed; + } + cbm_cypher_result_t result = {0}; int rc = cbm_cypher_execute(store, query, project, max_rows, &result); @@ -1012,6 +1051,13 @@ static char *handle_get_architecture(cbm_mcp_server_t *srv, const char *args) { cbm_store_t *store = resolve_store(srv, project); REQUIRE_STORE(store, project); + char *not_indexed = verify_project_indexed(store, project); + if (not_indexed) { + free(project); + return not_indexed; + } + + cbm_schema_info_t schema = {0}; cbm_store_get_schema(store, project, &schema); @@ -1085,6 +1131,15 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { free(direction); return cbm_mcp_text_result("{\"error\":\"no project loaded\"}", true); } + + char *not_indexed = verify_project_indexed(store, project); + if (not_indexed) { + free(func_name); + free(project); + free(direction); + return not_indexed; + } + if (!direction) { direction = heap_strdup("both"); } @@ -1575,6 +1630,13 @@ static char *handle_get_code_snippet(cbm_mcp_server_t *srv, const char *args) { return cbm_mcp_text_result("no project loaded — run index_repository first", true); } + char *not_indexed = verify_project_indexed(store, project); + if (not_indexed) { + free(qn); + free(project); + return not_indexed; + } + /* Default to current project (same as all other tools) */ const char *effective_project = project ? project : srv->current_project; From a109e9753e545ba047106573f66561e1c3d6b2a1 Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Sun, 22 Mar 2026 15:20:02 -0500 Subject: [PATCH 2/5] fix(store,mcp): prevent ghost .db file creation for unknown projects - Add cbm_store_open_path_query() that opens with SQLITE_OPEN_READWRITE only (no SQLITE_OPEN_CREATE); returns NULL when file is absent - Declare cbm_store_open_path_query() in store.h - Change resolve_store() in mcp.c to call cbm_store_open_path_query so querying a nonexistent project never creates a ghost .db file - Indexing path (cbm_store_open_path) retains SQLITE_OPEN_CREATE Co-Authored-By: Claude Sonnet 4.6 --- src/mcp/mcp.c | 5 +++-- src/store/store.c | 39 +++++++++++++++++++++++++++++++++++++++ src/store/store.h | 4 ++++ 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 5acd06ef..5d27a5d4 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -637,10 +637,11 @@ static cbm_store_t *resolve_store(cbm_mcp_server_t *srv, const char *project) { srv->store = NULL; } - /* Open project's .db file */ + /* Open project's .db file — query-only open (no SQLITE_OPEN_CREATE) to + * prevent ghost .db file creation for unknown/unindexed projects. */ char path[1024]; project_db_path(project, path, sizeof(path)); - srv->store = cbm_store_open_path(path); + srv->store = cbm_store_open_path_query(path); srv->owns_store = true; free(srv->current_project); srv->current_project = heap_strdup(project); diff --git a/src/store/store.c b/src/store/store.c index 4b00d4e7..37a2de77 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -365,6 +365,45 @@ cbm_store_t *cbm_store_open_path(const char *db_path) { return store_open_internal(db_path, false); } +cbm_store_t *cbm_store_open_path_query(const char *db_path) { + if (!db_path) { + return NULL; + } + + cbm_store_t *s = calloc(1, sizeof(cbm_store_t)); + if (!s) { + return NULL; + } + + /* Open read-write but do NOT create — returns SQLITE_CANTOPEN if absent. */ + int rc = sqlite3_open_v2(db_path, &s->db, SQLITE_OPEN_READWRITE, NULL); + if (rc != SQLITE_OK) { + /* File does not exist or cannot be opened — return NULL without creating. */ + free(s); + return NULL; + } + + s->db_path = heap_strdup(db_path); + + /* Security: block ATTACH/DETACH to prevent file creation via SQL injection. */ + sqlite3_set_authorizer(s->db, store_authorizer, NULL); + + /* Register REGEXP functions. */ + sqlite3_create_function(s->db, "regexp", 2, SQLITE_UTF8 | SQLITE_DETERMINISTIC, NULL, + sqlite_regexp, NULL, NULL); + sqlite3_create_function(s->db, "iregexp", 2, SQLITE_UTF8 | SQLITE_DETERMINISTIC, NULL, + sqlite_iregexp, NULL, NULL); + + if (configure_pragmas(s, false) != CBM_STORE_OK) { + sqlite3_close(s->db); + free((void *)s->db_path); + free(s); + return NULL; + } + + return s; +} + cbm_store_t *cbm_store_open(const char *project) { if (!project) { return NULL; diff --git a/src/store/store.h b/src/store/store.h index 0bf385a8..8cfd4865 100644 --- a/src/store/store.h +++ b/src/store/store.h @@ -190,6 +190,10 @@ cbm_store_t *cbm_store_open_memory(void); /* Open a file-backed database at the given path. Creates if needed. */ cbm_store_t *cbm_store_open_path(const char *db_path); +/* Open an existing file-backed database for querying only (no SQLITE_OPEN_CREATE). + * Returns NULL if the file does not exist — never creates a new .db file. */ +cbm_store_t *cbm_store_open_path_query(const char *db_path); + /* Open database for a named project in the default cache dir. */ cbm_store_t *cbm_store_open(const char *project); From e6717457d79ed7f20787dc760c4645183689fe29 Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Sun, 22 Mar 2026 18:17:59 -0500 Subject: [PATCH 3/5] test(smoke): add smoke_guard.sh for guard and ghost-file invariants Asserts that query handlers return a guard error for unknown projects and that no ghost .db file is created in the cache directory. Co-Authored-By: Claude Sonnet 4.6 --- tests/smoke_guard.sh | 59 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100755 tests/smoke_guard.sh diff --git a/tests/smoke_guard.sh b/tests/smoke_guard.sh new file mode 100755 index 00000000..3d720c93 --- /dev/null +++ b/tests/smoke_guard.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# smoke_guard.sh — Smoke test for guard and ghost-file invariants. +# +# Verifies two properties: +# 1. Query handlers return "project not indexed" for unknown projects. +# 2. No ghost .db file is created for the unknown project name. +# +# Usage: bash tests/smoke_guard.sh +# Exit 0 on success, non-zero on failure. + +set -euo pipefail + +PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" +BINARY="$PROJECT_ROOT/build/c/codebase-memory-mcp" +FAKE_PROJECT="nonexistent_smoke_test_xyz" +CACHE_DIR="${HOME}/.cache/codebase-memory-mcp" +GHOST_FILE="$CACHE_DIR/${FAKE_PROJECT}.db" + +# ── Step 1: Build ───────────────────────────────────────────────── +echo "[smoke_guard] Building project..." +make -f "$PROJECT_ROOT/Makefile.cbm" cbm -C "$PROJECT_ROOT" --quiet 2>&1 +if [ ! -x "$BINARY" ]; then + echo "[smoke_guard] FAIL: binary not found at $BINARY after build" >&2 + exit 1 +fi +echo "[smoke_guard] Build OK: $BINARY" + +# ── Step 2: Pre-clean ghost file if somehow present ─────────────── +if [ -f "$GHOST_FILE" ]; then + echo "[smoke_guard] WARNING: ghost file already exists before test; removing: $GHOST_FILE" + rm -f "$GHOST_FILE" +fi + +# ── Step 3: Invoke query tool with unknown project ──────────────── +echo "[smoke_guard] Invoking search_graph with project='$FAKE_PROJECT'..." +RESPONSE="$("$BINARY" cli search_graph "{\"project\":\"$FAKE_PROJECT\",\"name_pattern\":\".*\"}" 2>/dev/null)" +echo "[smoke_guard] Response: $RESPONSE" + +# ── Step 4: Assert error message present ───────────────────────── +# For a truly absent project (no .db file), cbm_store_open_path_query returns +# NULL, so REQUIRE_STORE fires with "no project loaded" before +# verify_project_indexed is reached. Both messages confirm the guard is active. +if ! echo "$RESPONSE" | grep -qE "no project loaded|not indexed"; then + echo "[smoke_guard] FAIL: response does not contain guard error ('no project loaded' or 'not indexed')" >&2 + echo "[smoke_guard] Got: $RESPONSE" >&2 + exit 1 +fi +echo "[smoke_guard] PASS: guard error message present" + +# ── Step 5: Assert no ghost .db file was created ───────────────── +if [ -f "$GHOST_FILE" ]; then + echo "[smoke_guard] FAIL: ghost file was created at $GHOST_FILE" >&2 + rm -f "$GHOST_FILE" + exit 1 +fi +echo "[smoke_guard] PASS: no ghost .db file created" + +echo "[smoke_guard] All checks passed." +exit 0 From 2791d0d8af055b9bd062746464d680349654ff95 Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Sun, 22 Mar 2026 18:28:27 -0500 Subject: [PATCH 4/5] fix(mcp,store): address QA round 1 - Update stale comment in verify_project_indexed: resolve_store now uses cbm_store_open_path_query (no SQLITE_OPEN_CREATE), so store is NULL for missing files; the helper catches the empty-but-present .db case - Guard state updates in resolve_store behind successful open check: only set owns_store=true and update current_project when store is non-NULL, preventing misleading state when an unknown project is queried - Expand smoke_guard.sh to test all 5 guarded handlers (search_graph, query_graph, get_graph_schema, trace_call_path, get_code_snippet) instead of search_graph only; each checks both the guard error and no-ghost-file invariant Co-Authored-By: Claude Sonnet 4.6 --- src/mcp/mcp.c | 17 ++++++++---- tests/smoke_guard.sh | 65 ++++++++++++++++++++++++++++---------------- 2 files changed, 54 insertions(+), 28 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 5d27a5d4..97d9c334 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -642,9 +642,14 @@ static cbm_store_t *resolve_store(cbm_mcp_server_t *srv, const char *project) { char path[1024]; project_db_path(project, path, sizeof(path)); srv->store = cbm_store_open_path_query(path); - srv->owns_store = true; - free(srv->current_project); - srv->current_project = heap_strdup(project); + if (srv->store) { + /* Only update ownership and cached project name on successful open. + * When the file is absent, store is NULL and current_project retains + * its previous value so the next call correctly retries the open. */ + srv->owns_store = true; + free(srv->current_project); + srv->current_project = heap_strdup(project); + } return srv->store; } @@ -748,8 +753,10 @@ static char *handle_list_projects(cbm_mcp_server_t *srv, const char *args) { /* verify_project_indexed — returns a heap-allocated error JSON string when the * named project has not been indexed yet, or NULL when the project exists. - * resolve_store uses SQLITE_OPEN_CREATE so store is always non-NULL even for - * unindexed projects; this check catches that silent-empty case. + * resolve_store uses cbm_store_open_path_query (no SQLITE_OPEN_CREATE), so + * store is NULL for missing .db files (REQUIRE_STORE fires first). This + * function catches the remaining case: a .db file exists but has no indexed + * nodes (e.g., an empty or half-initialised project). * Callers that receive a non-NULL return value must free(project) themselves * before returning the error string. */ static char *verify_project_indexed(cbm_store_t *store, const char *project) { diff --git a/tests/smoke_guard.sh b/tests/smoke_guard.sh index 3d720c93..37cf8b4d 100755 --- a/tests/smoke_guard.sh +++ b/tests/smoke_guard.sh @@ -1,8 +1,8 @@ #!/usr/bin/env bash # smoke_guard.sh — Smoke test for guard and ghost-file invariants. # -# Verifies two properties: -# 1. Query handlers return "project not indexed" for unknown projects. +# Verifies two properties across all 5 guarded query handlers: +# 1. Each handler returns a guard error for unknown/unindexed projects. # 2. No ghost .db file is created for the unknown project name. # # Usage: bash tests/smoke_guard.sh @@ -15,6 +15,7 @@ BINARY="$PROJECT_ROOT/build/c/codebase-memory-mcp" FAKE_PROJECT="nonexistent_smoke_test_xyz" CACHE_DIR="${HOME}/.cache/codebase-memory-mcp" GHOST_FILE="$CACHE_DIR/${FAKE_PROJECT}.db" +FAILURES=0 # ── Step 1: Build ───────────────────────────────────────────────── echo "[smoke_guard] Building project..." @@ -31,29 +32,47 @@ if [ -f "$GHOST_FILE" ]; then rm -f "$GHOST_FILE" fi -# ── Step 3: Invoke query tool with unknown project ──────────────── -echo "[smoke_guard] Invoking search_graph with project='$FAKE_PROJECT'..." -RESPONSE="$("$BINARY" cli search_graph "{\"project\":\"$FAKE_PROJECT\",\"name_pattern\":\".*\"}" 2>/dev/null)" -echo "[smoke_guard] Response: $RESPONSE" - -# ── Step 4: Assert error message present ───────────────────────── -# For a truly absent project (no .db file), cbm_store_open_path_query returns -# NULL, so REQUIRE_STORE fires with "no project loaded" before -# verify_project_indexed is reached. Both messages confirm the guard is active. -if ! echo "$RESPONSE" | grep -qE "no project loaded|not indexed"; then - echo "[smoke_guard] FAIL: response does not contain guard error ('no project loaded' or 'not indexed')" >&2 - echo "[smoke_guard] Got: $RESPONSE" >&2 - exit 1 -fi -echo "[smoke_guard] PASS: guard error message present" +# ── Helper: assert guard error and no ghost file ────────────────── +check_handler() { + local handler="$1" + local args="$2" + echo "[smoke_guard] Invoking $handler with project='$FAKE_PROJECT'..." + local response + response="$("$BINARY" cli "$handler" "$args" 2>/dev/null)" + echo "[smoke_guard] Response: $response" -# ── Step 5: Assert no ghost .db file was created ───────────────── -if [ -f "$GHOST_FILE" ]; then - echo "[smoke_guard] FAIL: ghost file was created at $GHOST_FILE" >&2 - rm -f "$GHOST_FILE" + # For a missing .db file, cbm_store_open_path_query returns NULL so + # REQUIRE_STORE fires ("no project loaded"). For an empty .db, + # verify_project_indexed fires ("project not indexed"). Both are valid. + if ! echo "$response" | grep -qE "no project loaded|not indexed"; then + echo "[smoke_guard] FAIL [$handler]: response does not contain guard error" >&2 + echo "[smoke_guard] Got: $response" >&2 + FAILURES=$((FAILURES + 1)) + else + echo "[smoke_guard] PASS [$handler]: guard error present" + fi + + if [ -f "$GHOST_FILE" ]; then + echo "[smoke_guard] FAIL [$handler]: ghost file created at $GHOST_FILE" >&2 + rm -f "$GHOST_FILE" + FAILURES=$((FAILURES + 1)) + else + echo "[smoke_guard] PASS [$handler]: no ghost .db file" + fi +} + +# ── Step 3: Test all 5 guarded handlers ─────────────────────────── +check_handler "search_graph" "{\"project\":\"$FAKE_PROJECT\",\"name_pattern\":\".*\"}" +check_handler "query_graph" "{\"project\":\"$FAKE_PROJECT\",\"cypher\":\"MATCH (n) RETURN n LIMIT 1\"}" +check_handler "get_graph_schema" "{\"project\":\"$FAKE_PROJECT\"}" +check_handler "trace_call_path" "{\"project\":\"$FAKE_PROJECT\",\"function_name\":\"main\",\"direction\":\"both\",\"depth\":1}" +check_handler "get_code_snippet" "{\"project\":\"$FAKE_PROJECT\",\"qualified_name\":\"main\"}" + +# ── Step 4: Final result ────────────────────────────────────────── +if [ "$FAILURES" -gt 0 ]; then + echo "[smoke_guard] FAILED: $FAILURES check(s) failed." >&2 exit 1 fi -echo "[smoke_guard] PASS: no ghost .db file created" -echo "[smoke_guard] All checks passed." +echo "[smoke_guard] All checks passed (5 handlers, guard + ghost-file invariants)." exit 0 From a3f11bd7fa34d78b91ed2f530fbba6edce6f1e85 Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Sun, 22 Mar 2026 18:33:10 -0500 Subject: [PATCH 5/5] fix(mcp,store): address QA round 2 - Fix handle_get_code_snippet inline store-NULL check to return the same JSON error format as REQUIRE_STORE and the other inline checks: {"error":"no project loaded"} instead of a plain string - Fix smoke_guard.sh query_graph invocation: pass "query" parameter (not "cypher") to match what handle_query_graph actually reads; the wrong key caused the handler to early-return before reaching the guard - Remove extra blank line in handle_get_graph_schema guard block Co-Authored-By: Claude Sonnet 4.6 --- src/mcp/mcp.c | 3 +-- tests/smoke_guard.sh | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 97d9c334..601c8225 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -1065,7 +1065,6 @@ static char *handle_get_architecture(cbm_mcp_server_t *srv, const char *args) { return not_indexed; } - cbm_schema_info_t schema = {0}; cbm_store_get_schema(store, project, &schema); @@ -1635,7 +1634,7 @@ static char *handle_get_code_snippet(cbm_mcp_server_t *srv, const char *args) { if (!store) { free(qn); free(project); - return cbm_mcp_text_result("no project loaded — run index_repository first", true); + return cbm_mcp_text_result("{\"error\":\"no project loaded\"}", true); } char *not_indexed = verify_project_indexed(store, project); diff --git a/tests/smoke_guard.sh b/tests/smoke_guard.sh index 37cf8b4d..2fbeeb93 100755 --- a/tests/smoke_guard.sh +++ b/tests/smoke_guard.sh @@ -63,7 +63,7 @@ check_handler() { # ── Step 3: Test all 5 guarded handlers ─────────────────────────── check_handler "search_graph" "{\"project\":\"$FAKE_PROJECT\",\"name_pattern\":\".*\"}" -check_handler "query_graph" "{\"project\":\"$FAKE_PROJECT\",\"cypher\":\"MATCH (n) RETURN n LIMIT 1\"}" +check_handler "query_graph" "{\"project\":\"$FAKE_PROJECT\",\"query\":\"MATCH (n) RETURN n LIMIT 1\"}" check_handler "get_graph_schema" "{\"project\":\"$FAKE_PROJECT\"}" check_handler "trace_call_path" "{\"project\":\"$FAKE_PROJECT\",\"function_name\":\"main\",\"direction\":\"both\",\"depth\":1}" check_handler "get_code_snippet" "{\"project\":\"$FAKE_PROJECT\",\"qualified_name\":\"main\"}"