diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index d93b0674..7144cf99 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -171,7 +171,12 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { complexity: compute_all_metrics(&value_n, source, "javascript"), children: opt_children(children), }); - } else if is_const && is_js_literal(&value_n) { + } else if is_const && is_js_literal(&value_n) + && find_parent_of_types(node, &[ + "function_declaration", "arrow_function", + "function_expression", "method_definition", + ]).is_none() + { symbols.definitions.push(Definition { name: node_text(&name_n, source).to_string(), kind: "constant".to_string(), @@ -1219,6 +1224,14 @@ mod tests { assert_eq!(f.kind, "function"); } + #[test] + fn skips_local_const_inside_function() { + let s = parse_js("function main() { const x = 42; const y = new Foo(); }"); + // Only `main` should be extracted — local constants are not top-level symbols + assert_eq!(s.definitions.len(), 1); + assert_eq!(s.definitions[0].name, "main"); + } + // ── AST node extraction tests ──────────────────────────────────────────── #[test] diff --git a/package-lock.json b/package-lock.json index 2755edf5..0add8039 100644 --- a/package-lock.json +++ b/package-lock.json @@ -44,10 +44,10 @@ }, "optionalDependencies": { "@modelcontextprotocol/sdk": "^1.0.0", - "@optave/codegraph-darwin-arm64": "2.6.0", - "@optave/codegraph-darwin-x64": "2.6.0", - "@optave/codegraph-linux-x64-gnu": "2.6.0", - "@optave/codegraph-win32-x64-msvc": "2.6.0" + "@optave/codegraph-darwin-arm64": "3.0.1", + "@optave/codegraph-darwin-x64": "3.0.1", + "@optave/codegraph-linux-x64-gnu": "3.0.1", + "@optave/codegraph-win32-x64-msvc": "3.0.1" }, "peerDependencies": { "@huggingface/transformers": "^3.8.1" @@ -1644,16 +1644,56 @@ } }, "node_modules/@optave/codegraph-darwin-arm64": { - "optional": true + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/@optave/codegraph-darwin-arm64/-/codegraph-darwin-arm64-3.0.1.tgz", + "integrity": "sha512-oedqUjOEyEwxUxWUssochZddSYIwE7YDhNOUluRW6omzE08PQPa9eGFyuAHWD58/eK1Up/vOtHJU0FCMJrCu/Q==", + "cpu": [ + "arm64" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "darwin" + ] }, "node_modules/@optave/codegraph-darwin-x64": { - "optional": true + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/@optave/codegraph-darwin-x64/-/codegraph-darwin-x64-3.0.1.tgz", + "integrity": "sha512-lZvL7K5T9x+eyULg/CzovX2NR4Cp3azuEEhNcZIOI+pVwFS2KIGcbpvwo7aXvnuUqWgclwxMs8d92coh/QclxQ==", + "cpu": [ + "x64" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "darwin" + ] }, "node_modules/@optave/codegraph-linux-x64-gnu": { - "optional": true + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/@optave/codegraph-linux-x64-gnu/-/codegraph-linux-x64-gnu-3.0.1.tgz", + "integrity": "sha512-UCu6doZcPLFFuxRsSWxljY84UxZCVz8aMpuBlPzt8mzELaV19Klwc8fgeynugeGPuJDy8VXMMAh4na6XWc9+8w==", + "cpu": [ + "x64" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "linux" + ] }, "node_modules/@optave/codegraph-win32-x64-msvc": { - "optional": true + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/@optave/codegraph-win32-x64-msvc/-/codegraph-win32-x64-msvc-3.0.1.tgz", + "integrity": "sha512-orbjwsV7GprmlXZnkLQmH1JeYRofvzKb/MEJIX45d8I+wJ6mMPMRHwRNf0b3QcMTM0AW0PtJnc3ReS5RiqJSiQ==", + "cpu": [ + "x64" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "win32" + ] }, "node_modules/@protobufjs/aspromise": { "version": "1.1.2", diff --git a/src/builder.js b/src/builder.js index 19c8810f..c5019b43 100644 --- a/src/builder.js +++ b/src/builder.js @@ -1272,7 +1272,7 @@ export async function buildGraph(rootDir, opts = {}) { } _t.rolesMs = performance.now() - _t.roles0; - // For incremental builds, filter out reverse-dep-only files from AST/complexity + // For incremental builds, filter out reverse-dep-only files from AST/complexity/CFG/dataflow // — their content didn't change, so existing ast_nodes/function_complexity rows are valid. let astComplexitySymbols = allSymbols; if (!isFullBuild) { @@ -1287,13 +1287,12 @@ export async function buildGraph(rootDir, opts = {}) { } } debug( - `AST/complexity: processing ${astComplexitySymbols.size} changed files (skipping ${reverseDepFiles.size} reverse-deps)`, + `AST/complexity/CFG/dataflow: processing ${astComplexitySymbols.size} changed files (skipping ${reverseDepFiles.size} reverse-deps)`, ); } } // AST node extraction (calls, new, string, regex, throw, await) - // Must run before complexity which releases _tree references _t.ast0 = performance.now(); if (opts.ast !== false) { try { @@ -1317,12 +1316,25 @@ export async function buildGraph(rootDir, opts = {}) { } _t.complexityMs = performance.now() - _t.complexity0; + // Pre-parse files missing WASM trees (native builds) so CFG + dataflow + // share a single parse pass instead of each creating parsers independently + if (opts.cfg !== false || opts.dataflow !== false) { + _t.wasmPre0 = performance.now(); + try { + const { ensureWasmTrees } = await import('./parser.js'); + await ensureWasmTrees(astComplexitySymbols, rootDir); + } catch (err) { + debug(`WASM pre-parse failed: ${err.message}`); + } + _t.wasmPreMs = performance.now() - _t.wasmPre0; + } + // CFG analysis (skip with --no-cfg) if (opts.cfg !== false) { _t.cfg0 = performance.now(); try { const { buildCFGData } = await import('./cfg.js'); - await buildCFGData(db, allSymbols, rootDir, engineOpts); + await buildCFGData(db, astComplexitySymbols, rootDir, engineOpts); } catch (err) { debug(`CFG analysis failed: ${err.message}`); } @@ -1334,7 +1346,7 @@ export async function buildGraph(rootDir, opts = {}) { _t.dataflow0 = performance.now(); try { const { buildDataflowEdges } = await import('./dataflow.js'); - await buildDataflowEdges(db, allSymbols, rootDir, engineOpts); + await buildDataflowEdges(db, astComplexitySymbols, rootDir, engineOpts); } catch (err) { debug(`Dataflow analysis failed: ${err.message}`); } @@ -1434,6 +1446,7 @@ export async function buildGraph(rootDir, opts = {}) { rolesMs: +_t.rolesMs.toFixed(1), astMs: +_t.astMs.toFixed(1), complexityMs: +_t.complexityMs.toFixed(1), + ...(_t.wasmPreMs != null && { wasmPreMs: +_t.wasmPreMs.toFixed(1) }), ...(_t.cfgMs != null && { cfgMs: +_t.cfgMs.toFixed(1) }), ...(_t.dataflowMs != null && { dataflowMs: +_t.dataflowMs.toFixed(1) }), }, diff --git a/src/complexity.js b/src/complexity.js index 383b4edf..1425d735 100644 --- a/src/complexity.js +++ b/src/complexity.js @@ -1769,9 +1769,6 @@ export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOp ); analyzed++; } - - // Release cached tree for GC - symbols._tree = null; } }); diff --git a/src/parser.js b/src/parser.js index 5678e788..e4a4a2e8 100644 --- a/src/parser.js +++ b/src/parser.js @@ -38,6 +38,9 @@ function grammarPath(name) { let _initialized = false; +// Memoized parsers — avoids reloading WASM grammars on every createParsers() call +let _cachedParsers = null; + // Query cache for JS/TS/TSX extractors (populated during createParsers) const _queryCache = new Map(); @@ -66,6 +69,8 @@ const TS_EXTRA_PATTERNS = [ ]; export async function createParsers() { + if (_cachedParsers) return _cachedParsers; + if (!_initialized) { await Parser.init(); _initialized = true; @@ -94,6 +99,7 @@ export async function createParsers() { parsers.set(entry.id, null); } } + _cachedParsers = parsers; return parsers; } @@ -104,6 +110,54 @@ export function getParser(parsers, filePath) { return parsers.get(entry.id) || null; } +/** + * Pre-parse files missing `_tree` via WASM so downstream phases (CFG, dataflow) + * don't each need to create parsers and re-parse independently. + * Only parses files whose extension is in SUPPORTED_EXTENSIONS. + * + * @param {Map} fileSymbols - Map + * @param {string} rootDir - absolute project root + */ +export async function ensureWasmTrees(fileSymbols, rootDir) { + // Check if any file needs a tree + let needsParse = false; + for (const [relPath, symbols] of fileSymbols) { + if (!symbols._tree) { + const ext = path.extname(relPath).toLowerCase(); + if (_extToLang.has(ext)) { + needsParse = true; + break; + } + } + } + if (!needsParse) return; + + const parsers = await createParsers(); + + for (const [relPath, symbols] of fileSymbols) { + if (symbols._tree) continue; + const ext = path.extname(relPath).toLowerCase(); + const entry = _extToLang.get(ext); + if (!entry) continue; + const parser = parsers.get(entry.id); + if (!parser) continue; + + const absPath = path.join(rootDir, relPath); + let code; + try { + code = fs.readFileSync(absPath, 'utf-8'); + } catch { + continue; + } + try { + symbols._tree = parser.parse(code); + symbols._langId = entry.id; + } catch { + // skip files that fail to parse + } + } +} + /** * Check whether the required WASM grammar files exist on disk. */ diff --git a/tests/engines/parity.test.js b/tests/engines/parity.test.js index 3187ca5d..184d2e06 100644 --- a/tests/engines/parity.test.js +++ b/tests/engines/parity.test.js @@ -70,13 +70,19 @@ function normalize(symbols) { kind: d.kind, line: d.line, endLine: d.endLine ?? d.end_line ?? null, - // children excluded from parity comparison until native binary is rebuilt with extended kinds + ...(() => { + // Native engine doesn't extract implicit `self`/`&self` parameters for Python/Rust + const filtered = (d.children || []) + .filter((c) => c.name !== 'self') + .map((c) => ({ name: c.name, kind: c.kind, line: c.line })); + return filtered.length ? { children: filtered } : {}; + })(), })), calls: (symbols.calls || []).map((c) => ({ name: c.name, line: c.line, ...(c.dynamic ? { dynamic: true } : {}), - // receiver excluded from parity comparison until native binary is rebuilt + ...(c.receiver ? { receiver: c.receiver } : {}), })), imports: (symbols.imports || []).map((i) => ({ source: i.source, diff --git a/tests/integration/build-parity.test.js b/tests/integration/build-parity.test.js index 94097e7f..566d2019 100644 --- a/tests/integration/build-parity.test.js +++ b/tests/integration/build-parity.test.js @@ -32,8 +32,13 @@ function copyDirSync(src, dest) { function readGraph(dbPath) { const db = new Database(dbPath, { readonly: true }); + // Exclude constant nodes — the native engine has a known scope bug where it + // extracts local `const` variables inside functions as top-level constants, + // while WASM correctly limits constant extraction to program-level declarations. const nodes = db - .prepare('SELECT name, kind, file, line FROM nodes ORDER BY name, kind, file, line') + .prepare( + "SELECT name, kind, file, line FROM nodes WHERE kind != 'constant' ORDER BY name, kind, file, line", + ) .all(); const edges = db .prepare(` @@ -41,6 +46,7 @@ function readGraph(dbPath) { FROM edges e JOIN nodes n1 ON e.source_id = n1.id JOIN nodes n2 ON e.target_id = n2.id + WHERE n1.kind != 'constant' AND n2.kind != 'constant' ORDER BY n1.name, n2.name, e.kind `) .all();