Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 88 additions & 22 deletions src/cypher/cypher.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,25 +92,30 @@ static void lex_string_literal(const char *input, int len, int *pos, char quote,
int start = *pos;
char buf[CBM_SZ_4K];
int blen = 0;
const int max_blen = CBM_SZ_4K - 1;
while (*pos < len && input[*pos] != quote) {
if (input[*pos] == '\\' && *pos + SKIP_ONE < len) {
(*pos)++;
Comment on lines +95 to 98
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

1. Escaped quote truncated wrong 🐞 Bug ≡ Correctness

lex_string_literal() now skips escape handling once the buffer is full, so a backslash at/after the
truncation boundary can be ignored and an escaped quote (\") can incorrectly terminate the string
literal early. This can produce wrong tokens / parse failures for long strings with escapes near
byte 4095.
Agent Prompt
## Issue description
`lex_string_literal()` truncates output once `blen` reaches `CBM_SZ_4K - 1`, but the new truncation path skips escape handling. If a long literal contains an escaped quote (e.g. `\"`) after the truncation boundary, the lexer can incorrectly treat that quote as the terminator.

## Issue Context
The lexer must keep *parsing* the literal correctly even when it stops *buffering* characters.

## Fix Focus Areas
- src/cypher/cypher.c[88-124]

## Suggested fix
Refactor the loop so position advancement always respects escapes, and buffering is conditional:
- Always detect `\\` and advance `*pos` by 2 (when possible) regardless of `blen`.
- Only write into `buf` / increment `blen` when `blen < max_blen`.
- Keep the current `buf[blen] = '\0'` termination behavior.

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

switch (input[*pos]) {
case 'n':
buf[blen++] = '\n';
break;
case 't':
buf[blen++] = '\t';
break;
case '\\':
buf[blen++] = '\\';
break;
default:
buf[blen++] = input[*pos];
break;
if (blen < max_blen) {
switch (input[*pos]) {
case 'n':
buf[blen++] = '\n';
break;
case 't':
buf[blen++] = '\t';
break;
case '\\':
buf[blen++] = '\\';
break;
default:
buf[blen++] = input[*pos];
break;
}
}
} else {
buf[blen++] = input[*pos];
if (blen < max_blen) {
buf[blen++] = input[*pos];
}
}
(*pos)++;
}
Expand Down Expand Up @@ -469,6 +474,9 @@ static int parse_props(parser_t *p, cbm_prop_filter_t **out, int *count) {
int cap = CYP_INIT_CAP4;
int n = 0;
cbm_prop_filter_t *arr = malloc(cap * sizeof(cbm_prop_filter_t));
if (!arr) {
return CBM_NOT_FOUND;
}

while (!check(p, TOK_RBRACE) && !check(p, TOK_EOF)) {
const cbm_token_t *key = expect(p, TOK_IDENT);
Expand All @@ -487,8 +495,18 @@ static int parse_props(parser_t *p, cbm_prop_filter_t **out, int *count) {
}

if (n >= cap) {
cap *= PAIR_LEN;
arr = safe_realloc(arr, cap * sizeof(cbm_prop_filter_t));
int new_cap = cap * PAIR_LEN;
void *tmp = realloc(arr, new_cap * sizeof(cbm_prop_filter_t));
if (!tmp) {
for (int i = 0; i < n; i++) {
free((void *)arr[i].key);
free((void *)arr[i].value);
}
free(arr);
return CBM_NOT_FOUND;
}
arr = tmp;
cap = new_cap;
}
arr[n].key = heap_strdup(key->text);
arr[n].value = heap_strdup(val->text);
Expand Down Expand Up @@ -569,6 +587,9 @@ static int parse_rel_types(parser_t *p, cbm_rel_pattern_t *out) {
int cap = CYP_INIT_CAP4;
int n = 0;
const char **types = malloc(cap * sizeof(const char *));
if (!types) {
return CBM_NOT_FOUND;
}

const cbm_token_t *t = expect(p, TOK_IDENT);
if (!t) {
Expand All @@ -587,8 +608,17 @@ static int parse_rel_types(parser_t *p, cbm_rel_pattern_t *out) {
return CBM_NOT_FOUND;
}
if (n >= cap) {
cap *= PAIR_LEN;
types = safe_realloc(types, cap * sizeof(const char *));
int new_cap = cap * PAIR_LEN;
void *tmp = realloc(types, new_cap * sizeof(const char *));
if (!tmp) {
for (int i = 0; i < n; i++) {
free((void *)types[i]);
}
free(types);
return CBM_NOT_FOUND;
}
types = (const char **)tmp;
cap = new_cap;
}
types[n++] = heap_strdup(t->text);
}
Expand Down Expand Up @@ -762,14 +792,32 @@ static cbm_expr_t *parse_in_list(parser_t *p, cbm_condition_t *c) {
int vcap = CYP_INIT_CAP8;
int vn = 0;
const char **vals = malloc(vcap * sizeof(const char *));
if (!vals) {
free((void *)c->variable);
free((void *)c->property);
free((void *)c->op);
return NULL;
}
while (!check(p, TOK_RBRACKET) && !check(p, TOK_EOF)) {
if (vn > 0) {
match(p, TOK_COMMA);
}
if (check(p, TOK_STRING) || check(p, TOK_NUMBER)) {
if (vn >= vcap) {
vcap *= PAIR_LEN;
vals = safe_realloc(vals, vcap * sizeof(const char *));
int new_vcap = vcap * PAIR_LEN;
void *tmp = realloc((void *)vals, new_vcap * sizeof(const char *));
if (!tmp) {
for (int i = 0; i < vn; i++) {
free((void *)vals[i]);
}
free((void *)vals);
free((void *)c->variable);
free((void *)c->property);
free((void *)c->op);
return NULL;
}
vals = (const char **)tmp;
vcap = new_vcap;
}
vals[vn++] = heap_strdup(advance(p)->text);
} else {
Expand Down Expand Up @@ -1061,8 +1109,15 @@ static const char *parse_value_literal(parser_t *p) {
static cbm_case_expr_t *parse_case_expr(parser_t *p) {
/* CASE already consumed */
cbm_case_expr_t *kase = calloc(CBM_ALLOC_ONE, sizeof(cbm_case_expr_t));
if (!kase) {
return NULL;
}
int bcap = CYP_INIT_CAP4;
kase->branches = malloc(bcap * sizeof(cbm_case_branch_t));
if (!kase->branches) {
free(kase);
return NULL;
}
Comment on lines 1111 to +1120
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parse_case_expr can now return NULL on allocation failure, but callers need a reliable way to detect that as a parse error. Today parse_return_item assigns item->variable = "CASE" unconditionally after calling parse_case_expr, and does not check for NULL, which can allow an OOM to be treated as a successful parse and lead to incorrect execution or downstream NULL handling issues. Consider setting a parser error / returning an error code when parse_case_expr fails, and ensure the caller propagates that failure.

Copilot uses AI. Check for mistakes.
Comment on lines 1111 to +1120
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

2. Oom still crashes on realloc 🐞 Bug ☼ Reliability

The new NULL checks cover initial malloc/calloc, but
parse_case_expr/parse_props/parse_rel_types/parse_in_list still call safe_realloc() without checking
for NULL. On realloc OOM, safe_realloc() frees the old buffer and returns NULL, and the next write
dereferences NULL causing a crash.
Agent Prompt
## Issue description
Several parser functions still crash on OOM during array growth because they use `safe_realloc()` without checking for NULL. `safe_realloc()` frees the old pointer on failure, so a failed growth loses the only reference and the next dereference crashes.

## Issue Context
This PR aims to prevent OOM crashes, so growth paths must be hardened too (not just initial allocations).

## Fix Focus Areas
- src/cypher/cypher.c[463-509]
- src/cypher/cypher.c[572-607]
- src/cypher/cypher.c[760-797]
- src/cypher/cypher.c[1074-1109]
- src/foundation/platform.h[18-32]

## Suggested fix
For each growth site:
1. Use a temporary pointer:
   - `void *tmp = realloc(ptr, new_size);`
   - if `!tmp`: perform local cleanup (free any elements already duplicated), set `p->error` if available, and return an error.
   - else assign `ptr = tmp`.
2. Alternatively, change `safe_realloc()` semantics to *not free* on failure (so callers can handle errors without losing the old buffer), then audit call sites accordingly.
3. Ensure callers propagate an error code distinct enough that higher layers can abort cleanly.

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools


while (check(p, TOK_WHEN)) {
advance(p);
Expand All @@ -1073,8 +1128,19 @@ static cbm_case_expr_t *parse_case_expr(parser_t *p) {
}
const char *then_val = parse_value_literal(p);
if (kase->branch_count >= bcap) {
bcap *= PAIR_LEN;
kase->branches = safe_realloc(kase->branches, bcap * sizeof(cbm_case_branch_t));
int new_bcap = bcap * PAIR_LEN;
void *tmp = realloc(kase->branches, new_bcap * sizeof(cbm_case_branch_t));
if (!tmp) {
expr_free(when);
for (int i = 0; i < kase->branch_count; i++) {
expr_free(kase->branches[i].when_expr);
}
free(kase->branches);
free(kase);
return NULL;
}
kase->branches = tmp;
bcap = new_bcap;
}
kase->branches[kase->branch_count++] =
(cbm_case_branch_t){.when_expr = when, .then_val = then_val};
Expand Down
22 changes: 19 additions & 3 deletions src/store/store.c
Original file line number Diff line number Diff line change
Expand Up @@ -2552,7 +2552,12 @@ int cbm_store_get_schema(cbm_store_t *s, const char *project, cbm_schema_info_t
const char *sql = "SELECT label, COUNT(*) FROM nodes WHERE project = ?1 GROUP BY label "
"ORDER BY COUNT(*) DESC;";
sqlite3_stmt *stmt = NULL;
sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL);
if (sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK || !stmt) {
if (stmt) {
sqlite3_finalize(stmt);
}
return CBM_NOT_FOUND;
}
bind_text(stmt, SKIP_ONE, project);

int cap = ST_INIT_CAP_8;
Expand All @@ -2577,7 +2582,13 @@ int cbm_store_get_schema(cbm_store_t *s, const char *project, cbm_schema_info_t
const char *sql = "SELECT type, COUNT(*) FROM edges WHERE project = ?1 GROUP BY type ORDER "
"BY COUNT(*) DESC;";
sqlite3_stmt *stmt = NULL;
sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL);
if (sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK || !stmt) {
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the first (node labels) query succeeds and allocates out->node_labels, then the edge-types prepare failure returns immediately without freeing the already-populated fields in *out. Since callers typically only call cbm_store_schema_free() on CBM_STORE_OK, this leaks memory on prepare failures. Before returning here, free any partially populated schema (e.g., call cbm_store_schema_free(out) or manually clean up fields already set).

Suggested change
if (sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK || !stmt) {
if (sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK || !stmt) {
cbm_store_schema_free(out);

Copilot uses AI. Check for mistakes.
if (stmt) {
sqlite3_finalize(stmt);
}
cbm_store_schema_free(out);
return CBM_NOT_FOUND;
}
Comment on lines +2585 to +2591
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

3. Schema error path leaks 🐞 Bug ☼ Reliability

cbm_store_get_schema() now returns early if the second sqlite3_prepare_v2 fails, but it may already
have allocated out->node_labels from the first query. Callers like ui/layout3d.c only free schema
when cbm_store_get_schema returns OK, so this introduces a leak on that failure path.
Agent Prompt
## Issue description
`cbm_store_get_schema()` can now return early after partially filling `out` (e.g., after allocating `out->node_labels`). Some callers only free schema on success, so this introduces a leak.

## Issue Context
The function currently has two independent query blocks and returns directly inside the second block.

## Fix Focus Areas
- src/store/store.c[2544-2605]
- src/ui/layout3d.c[441-492]

## Suggested fix
Option A (preferred): make `cbm_store_get_schema()` self-cleaning on failure:
- Replace early `return CBM_NOT_FOUND;` with `goto cleanup;`.
- In `cleanup:`, call `sqlite3_finalize(stmt)` if non-NULL, and call `cbm_store_schema_free(out)` (or manual frees) before returning an error.

Option B: update callers to always call `cbm_store_schema_free(&schema)` regardless of return code (safe because it zeros and checks fields), but still consider Option A for robustness.

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

bind_text(stmt, SKIP_ONE, project);

int cap = ST_INIT_CAP_8;
Expand Down Expand Up @@ -3283,7 +3294,12 @@ static bool pkg_in_list(const char *pkg, char **list, int count) {
static int collect_pkg_names(cbm_store_t *s, const char *sql, const char *project, char **pkgs,
int max_pkgs) {
sqlite3_stmt *stmt = NULL;
sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL);
if (sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK || !stmt) {
if (stmt) {
sqlite3_finalize(stmt);
}
return CBM_NOT_FOUND;
}
Comment on lines 3296 to +3302
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On sqlite3_prepare_v2 failure this now returns 0, which is indistinguishable from “query succeeded but returned 0 rows”. That masks database/SQL errors and can cause silent misclassification in arch_layers. Consider returning an error code (or a negative sentinel) and letting the caller decide whether to treat it as fatal vs. optional, and set the store error buffer for diagnostics.

Copilot uses AI. Check for mistakes.
bind_text(stmt, SKIP_ONE, project);
int count = 0;
while (sqlite3_step(stmt) == SQLITE_ROW && count < max_pkgs) {
Expand Down
27 changes: 27 additions & 0 deletions tests/test_cypher.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,32 @@ TEST(cypher_lex_single_quote_string) {
PASS();
}

TEST(cypher_lex_string_overflow) {
/* Build a string literal longer than 4096 bytes to verify we don't
* overflow the stack buffer in lex_string_literal. */
const int big = 5000;
/* query: "AAAA...A" (quotes included) */
char *query = malloc(big + 3); /* quote + big chars + quote + NUL */
ASSERT_NOT_NULL(query);
query[0] = '"';
memset(query + 1, 'A', big);
query[big + 1] = '"';
query[big + 2] = '\0';

cbm_lex_result_t r = {0};
int rc = cbm_lex(query, &r);
ASSERT_EQ(rc, 0);
ASSERT_NULL(r.error);
ASSERT_GTE(r.count, 1);
ASSERT_EQ(r.tokens[0].type, TOK_STRING);
/* The string should be truncated to CBM_SZ_4K - 1 (4095) characters. */
ASSERT_EQ((int)strlen(r.tokens[0].text), 4095);

cbm_lex_free(&r);
free(query);
PASS();
}

TEST(cypher_lex_number) {
cbm_lex_result_t r = {0};
int rc = cbm_lex("42 3.14", &r);
Expand Down Expand Up @@ -2064,6 +2090,7 @@ SUITE(cypher) {
RUN_TEST(cypher_lex_relationship);
RUN_TEST(cypher_lex_string_literal);
RUN_TEST(cypher_lex_single_quote_string);
RUN_TEST(cypher_lex_string_overflow);
RUN_TEST(cypher_lex_number);
RUN_TEST(cypher_lex_operators);
RUN_TEST(cypher_lex_keywords_case_insensitive);
Expand Down