diff --git a/src/ast.js b/src/ast.js index e0ccecc9..c88d73a4 100644 --- a/src/ast.js +++ b/src/ast.js @@ -156,9 +156,8 @@ export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) { return; } - const getNodeId = db.prepare( - 'SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?', - ); + // Bulk-fetch all node IDs per file (replaces per-def getNodeId calls) + const bulkGetNodeIds = db.prepare('SELECT id, name, kind, line FROM nodes WHERE file = ?'); const tx = db.transaction((rows) => { for (const r of rows) { @@ -172,14 +171,20 @@ export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) { const rows = []; const defs = symbols.definitions || []; + // Pre-load all node IDs for this file into a map + const nodeIdMap = new Map(); + for (const row of bulkGetNodeIds.all(relPath)) { + nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); + } + // 1. Call nodes from symbols.calls (all languages) if (symbols.calls) { for (const call of symbols.calls) { const parentDef = findParentDef(defs, call.line); let parentNodeId = null; if (parentDef) { - const row = getNodeId.get(parentDef.name, parentDef.kind, relPath, parentDef.line); - if (row) parentNodeId = row.id; + parentNodeId = + nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null; } rows.push({ file: relPath, @@ -199,7 +204,7 @@ export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) { if (symbols._tree) { // WASM path: walk the tree-sitter AST const astRows = []; - walkAst(symbols._tree.rootNode, defs, relPath, astRows, getNodeId); + walkAst(symbols._tree.rootNode, defs, relPath, astRows, nodeIdMap); rows.push(...astRows); } else if (symbols.astNodes?.length) { // Native path: use pre-extracted AST nodes from Rust @@ -207,8 +212,8 @@ export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) { const parentDef = findParentDef(defs, n.line); let parentNodeId = null; if (parentDef) { - const row = getNodeId.get(parentDef.name, parentDef.kind, relPath, parentDef.line); - if (row) parentNodeId = row.id; + parentNodeId = + nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null; } rows.push({ file: relPath, @@ -235,7 +240,7 @@ export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) { /** * Walk a tree-sitter AST and collect new/throw/await/string/regex nodes. */ -function walkAst(node, defs, relPath, rows, getNodeId) { +function walkAst(node, defs, relPath, rows, nodeIdMap) { const kind = JS_TS_AST_TYPES[node.type]; if (kind) { // tree-sitter lines are 0-indexed, our DB uses 1-indexed @@ -259,7 +264,7 @@ function walkAst(node, defs, relPath, rows, getNodeId) { if (content.length < 2) { // Still recurse children for (let i = 0; i < node.childCount; i++) { - walkAst(node.child(i), defs, relPath, rows, getNodeId); + walkAst(node.child(i), defs, relPath, rows, nodeIdMap); } return; } @@ -273,8 +278,7 @@ function walkAst(node, defs, relPath, rows, getNodeId) { const parentDef = findParentDef(defs, line); let parentNodeId = null; if (parentDef) { - const row = getNodeId.get(parentDef.name, parentDef.kind, relPath, parentDef.line); - if (row) parentNodeId = row.id; + parentNodeId = nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null; } rows.push({ @@ -293,7 +297,7 @@ function walkAst(node, defs, relPath, rows, getNodeId) { } for (let i = 0; i < node.childCount; i++) { - walkAst(node.child(i), defs, relPath, rows, getNodeId); + walkAst(node.child(i), defs, relPath, rows, nodeIdMap); } } diff --git a/src/builder.js b/src/builder.js index afe47307..19c8810f 100644 --- a/src/builder.js +++ b/src/builder.js @@ -4,7 +4,7 @@ import path from 'node:path'; import { performance } from 'node:perf_hooks'; import { loadConfig } from './config.js'; import { EXTENSIONS, IGNORE_DIRS, normalizePath } from './constants.js'; -import { closeDb, getBuildMeta, initSchema, openDb, setBuildMeta } from './db.js'; +import { closeDb, getBuildMeta, initSchema, MIGRATIONS, openDb, setBuildMeta } from './db.js'; import { readJournal, writeJournalHeader } from './journal.js'; import { debug, info, warn } from './logger.js'; import { getActiveEngine, parseFilesAuto } from './parser.js'; @@ -448,17 +448,21 @@ export async function buildGraph(rootDir, opts = {}) { const { name: engineName, version: engineVersion } = getActiveEngine(engineOpts); info(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`); - // Check for engine/version mismatch — auto-promote to full rebuild + // Check for engine/schema mismatch — auto-promote to full rebuild + // Only trigger on engine change or schema version change (not every patch/minor bump) + const CURRENT_SCHEMA_VERSION = MIGRATIONS[MIGRATIONS.length - 1].version; let forceFullRebuild = false; if (incremental) { const prevEngine = getBuildMeta(db, 'engine'); - const prevVersion = getBuildMeta(db, 'codegraph_version'); if (prevEngine && prevEngine !== engineName) { info(`Engine changed (${prevEngine} → ${engineName}), promoting to full rebuild.`); forceFullRebuild = true; } - if (prevVersion && prevVersion !== CODEGRAPH_VERSION) { - info(`Version changed (${prevVersion} → ${CODEGRAPH_VERSION}), promoting to full rebuild.`); + const prevSchema = getBuildMeta(db, 'schema_version'); + if (prevSchema && Number(prevSchema) !== CURRENT_SCHEMA_VERSION) { + info( + `Schema version changed (${prevSchema} → ${CURRENT_SCHEMA_VERSION}), promoting to full rebuild.`, + ); forceFullRebuild = true; } } @@ -715,44 +719,66 @@ export async function buildGraph(rootDir, opts = {}) { } } + // Bulk-fetch all node IDs for a file in one query (replaces per-node getNodeId calls) + const bulkGetNodeIds = db.prepare('SELECT id, name, kind, line FROM nodes WHERE file = ?'); + const insertAll = db.transaction(() => { for (const [relPath, symbols] of allSymbols) { fileSymbols.set(relPath, symbols); + // Phase 1: Insert file node + definitions + exports (no children yet) insertNode.run(relPath, 'file', relPath, 0, null, null); - const fileRow = getNodeId.get(relPath, 'file', relPath, 0); for (const def of symbols.definitions) { insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null, null); - const defRow = getNodeId.get(def.name, def.kind, relPath, def.line); + } + for (const exp of symbols.exports) { + insertNode.run(exp.name, exp.kind, relPath, exp.line, null, null); + } + + // Phase 2: Bulk-fetch IDs for file + definitions + const nodeIdMap = new Map(); + for (const row of bulkGetNodeIds.all(relPath)) { + nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); + } + + // Phase 3: Insert children with parent_id from the map + for (const def of symbols.definitions) { + if (!def.children?.length) continue; + const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`); + if (!defId) continue; + for (const child of def.children) { + insertNode.run(child.name, child.kind, relPath, child.line, child.endLine || null, defId); + } + } + + // Phase 4: Re-fetch to include children IDs + nodeIdMap.clear(); + for (const row of bulkGetNodeIds.all(relPath)) { + nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); + } + + // Phase 5: Insert edges using the cached ID map + const fileId = nodeIdMap.get(`${relPath}|file|0`); + for (const def of symbols.definitions) { + const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`); // File → top-level definition contains edge - if (fileRow && defRow) { - insertEdge.run(fileRow.id, defRow.id, 'contains', 1.0, 0); + if (fileId && defId) { + insertEdge.run(fileId, defId, 'contains', 1.0, 0); } - if (def.children?.length && defRow) { + if (def.children?.length && defId) { for (const child of def.children) { - insertNode.run( - child.name, - child.kind, - relPath, - child.line, - child.endLine || null, - defRow.id, - ); - // Parent → child contains edge - const childRow = getNodeId.get(child.name, child.kind, relPath, child.line); - if (childRow) { - insertEdge.run(defRow.id, childRow.id, 'contains', 1.0, 0); + const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`); + if (childId) { + // Parent → child contains edge + insertEdge.run(defId, childId, 'contains', 1.0, 0); // Parameter → parent parameter_of edge (inverse direction) if (child.kind === 'parameter') { - insertEdge.run(childRow.id, defRow.id, 'parameter_of', 1.0, 0); + insertEdge.run(childId, defId, 'parameter_of', 1.0, 0); } } } } } - for (const exp of symbols.exports) { - insertNode.run(exp.name, exp.kind, relPath, exp.line, null, null); - } // Update file hash with real mtime+size for incremental builds // Skip for reverse-dep files — they didn't actually change @@ -1223,7 +1249,9 @@ export async function buildGraph(rootDir, opts = {}) { } try { const { buildStructure } = await import('./structure.js'); - buildStructure(db, fileSymbols, rootDir, lineCountMap, relDirs); + // Pass changed file paths so incremental builds can scope the rebuild + const changedFilePaths = isFullBuild ? null : [...allSymbols.keys()]; + buildStructure(db, fileSymbols, rootDir, lineCountMap, relDirs, changedFilePaths); } catch (err) { debug(`Structure analysis failed: ${err.message}`); } @@ -1244,24 +1272,48 @@ export async function buildGraph(rootDir, opts = {}) { } _t.rolesMs = performance.now() - _t.roles0; - // Always-on AST node extraction (calls, new, string, regex, throw, await) + // For incremental builds, filter out reverse-dep-only files from AST/complexity + // — their content didn't change, so existing ast_nodes/function_complexity rows are valid. + let astComplexitySymbols = allSymbols; + if (!isFullBuild) { + const reverseDepFiles = new Set( + filesToParse.filter((item) => item._reverseDepOnly).map((item) => item.relPath), + ); + if (reverseDepFiles.size > 0) { + astComplexitySymbols = new Map(); + for (const [relPath, symbols] of allSymbols) { + if (!reverseDepFiles.has(relPath)) { + astComplexitySymbols.set(relPath, symbols); + } + } + debug( + `AST/complexity: processing ${astComplexitySymbols.size} changed files (skipping ${reverseDepFiles.size} reverse-deps)`, + ); + } + } + + // AST node extraction (calls, new, string, regex, throw, await) // Must run before complexity which releases _tree references _t.ast0 = performance.now(); - try { - const { buildAstNodes } = await import('./ast.js'); - await buildAstNodes(db, allSymbols, rootDir, engineOpts); - } catch (err) { - debug(`AST node extraction failed: ${err.message}`); + if (opts.ast !== false) { + try { + const { buildAstNodes } = await import('./ast.js'); + await buildAstNodes(db, astComplexitySymbols, rootDir, engineOpts); + } catch (err) { + debug(`AST node extraction failed: ${err.message}`); + } } _t.astMs = performance.now() - _t.ast0; // Compute per-function complexity metrics (cognitive, cyclomatic, nesting) _t.complexity0 = performance.now(); - try { - const { buildComplexityMetrics } = await import('./complexity.js'); - await buildComplexityMetrics(db, allSymbols, rootDir, engineOpts); - } catch (err) { - debug(`Complexity analysis failed: ${err.message}`); + if (opts.complexity !== false) { + try { + const { buildComplexityMetrics } = await import('./complexity.js'); + await buildComplexityMetrics(db, astComplexitySymbols, rootDir, engineOpts); + } catch (err) { + debug(`Complexity analysis failed: ${err.message}`); + } } _t.complexityMs = performance.now() - _t.complexity0; @@ -1342,6 +1394,7 @@ export async function buildGraph(rootDir, opts = {}) { engine: engineName, engine_version: engineVersion || '', codegraph_version: CODEGRAPH_VERSION, + schema_version: String(CURRENT_SCHEMA_VERSION), built_at: new Date().toISOString(), node_count: nodeCount, edge_count: actualEdgeCount, @@ -1379,6 +1432,7 @@ export async function buildGraph(rootDir, opts = {}) { edgesMs: +_t.edgesMs.toFixed(1), structureMs: +_t.structureMs.toFixed(1), rolesMs: +_t.rolesMs.toFixed(1), + astMs: +_t.astMs.toFixed(1), complexityMs: +_t.complexityMs.toFixed(1), ...(_t.cfgMs != null && { cfgMs: +_t.cfgMs.toFixed(1) }), ...(_t.dataflowMs != null && { dataflowMs: +_t.dataflowMs.toFixed(1) }), diff --git a/src/cli.js b/src/cli.js index df564b92..c799ef1c 100644 --- a/src/cli.js +++ b/src/cli.js @@ -105,6 +105,8 @@ program .command('build [dir]') .description('Parse repo and build graph in .codegraph/graph.db') .option('--no-incremental', 'Force full rebuild (ignore file hashes)') + .option('--no-ast', 'Skip AST node extraction (calls, new, string, regex, throw, await)') + .option('--no-complexity', 'Skip complexity metrics computation') .option('--no-dataflow', 'Skip data flow edge extraction') .option('--no-cfg', 'Skip control flow graph building') .action(async (dir, opts) => { @@ -112,6 +114,8 @@ program const engine = program.opts().engine; await buildGraph(root, { incremental: opts.incremental, + ast: opts.ast, + complexity: opts.complexity, engine, dataflow: opts.dataflow, cfg: opts.cfg, diff --git a/src/structure.js b/src/structure.js index 6169795d..f83445bd 100644 --- a/src/structure.js +++ b/src/structure.js @@ -17,7 +17,7 @@ import { isTestFile } from './queries.js'; * @param {Map} lineCountMap - Map of relPath → line count * @param {Set} directories - Set of relative directory paths */ -export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, directories) { +export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, directories, changedFiles) { const insertNode = db.prepare( 'INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)', ); @@ -33,15 +33,49 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) `); - // Clean previous directory nodes/edges (idempotent rebuild) - // Scope contains-edge delete to directory-sourced edges only, - // preserving symbol-level contains edges (file→def, class→method, etc.) - db.exec(` - DELETE FROM edges WHERE kind = 'contains' - AND source_id IN (SELECT id FROM nodes WHERE kind = 'directory'); - DELETE FROM node_metrics; - DELETE FROM nodes WHERE kind = 'directory'; - `); + const isIncremental = changedFiles != null && changedFiles.length > 0; + + if (isIncremental) { + // Incremental: only clean up data for changed files and their ancestor directories + const affectedDirs = new Set(); + for (const f of changedFiles) { + let d = normalizePath(path.dirname(f)); + while (d && d !== '.') { + affectedDirs.add(d); + d = normalizePath(path.dirname(d)); + } + } + const deleteContainsForDir = db.prepare( + "DELETE FROM edges WHERE kind = 'contains' AND source_id IN (SELECT id FROM nodes WHERE name = ? AND kind = 'directory')", + ); + const deleteMetricForNode = db.prepare('DELETE FROM node_metrics WHERE node_id = ?'); + db.transaction(() => { + // Delete contains edges only from affected directories + for (const dir of affectedDirs) { + deleteContainsForDir.run(dir); + } + // Delete metrics for changed files + for (const f of changedFiles) { + const fileRow = getNodeId.get(f, 'file', f, 0); + if (fileRow) deleteMetricForNode.run(fileRow.id); + } + // Delete metrics for affected directories + for (const dir of affectedDirs) { + const dirRow = getNodeId.get(dir, 'directory', dir, 0); + if (dirRow) deleteMetricForNode.run(dirRow.id); + } + })(); + } else { + // Full rebuild: clean previous directory nodes/edges (idempotent) + // Scope contains-edge delete to directory-sourced edges only, + // preserving symbol-level contains edges (file→def, class→method, etc.) + db.exec(` + DELETE FROM edges WHERE kind = 'contains' + AND source_id IN (SELECT id FROM nodes WHERE kind = 'directory'); + DELETE FROM node_metrics; + DELETE FROM nodes WHERE kind = 'directory'; + `); + } // Step 1: Ensure all directories are represented (including intermediate parents) const allDirs = new Set(); @@ -61,7 +95,7 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director } } - // Step 2: Insert directory nodes + // Step 2: Insert directory nodes (INSERT OR IGNORE — safe for incremental) const insertDirs = db.transaction(() => { for (const dir of allDirs) { insertNode.run(dir, 'directory', dir, 0, null); @@ -70,11 +104,28 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director insertDirs(); // Step 3: Insert 'contains' edges (dir → file, dir → subdirectory) + // On incremental, only re-insert for affected directories (others are intact) + const affectedDirs = isIncremental + ? (() => { + const dirs = new Set(); + for (const f of changedFiles) { + let d = normalizePath(path.dirname(f)); + while (d && d !== '.') { + dirs.add(d); + d = normalizePath(path.dirname(d)); + } + } + return dirs; + })() + : null; + const insertContains = db.transaction(() => { // dir → file for (const relPath of fileSymbols.keys()) { const dir = normalizePath(path.dirname(relPath)); if (!dir || dir === '.') continue; + // On incremental, skip dirs whose contains edges are intact + if (affectedDirs && !affectedDirs.has(dir)) continue; const dirRow = getNodeId.get(dir, 'directory', dir, 0); const fileRow = getNodeId.get(relPath, 'file', relPath, 0); if (dirRow && fileRow) { @@ -85,6 +136,8 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director for (const dir of allDirs) { const parent = normalizePath(path.dirname(dir)); if (!parent || parent === '.' || parent === dir) continue; + // On incremental, skip parent dirs whose contains edges are intact + if (affectedDirs && !affectedDirs.has(parent)) continue; const parentRow = getNodeId.get(parent, 'directory', parent, 0); const childRow = getNodeId.get(dir, 'directory', dir, 0); if (parentRow && childRow) { diff --git a/tests/engines/parity.test.js b/tests/engines/parity.test.js index a03ab989..3187ca5d 100644 --- a/tests/engines/parity.test.js +++ b/tests/engines/parity.test.js @@ -70,6 +70,7 @@ function normalize(symbols) { kind: d.kind, line: d.line, endLine: d.endLine ?? d.end_line ?? null, + // children excluded from parity comparison until native binary is rebuilt with extended kinds })), calls: (symbols.calls || []).map((c) => ({ name: c.name, diff --git a/tests/integration/build.test.js b/tests/integration/build.test.js index a45d09ad..65e8af8b 100644 --- a/tests/integration/build.test.js +++ b/tests/integration/build.test.js @@ -421,9 +421,9 @@ describe('version/engine mismatch auto-promotes to full rebuild', () => { }); test('version mismatch triggers full rebuild', async () => { - // Tamper the stored version to simulate an upgrade + // Tamper the stored schema version to simulate a schema upgrade const db = openDb(promoDbPath); - setBuildMeta(db, { codegraph_version: '0.0.0' }); + setBuildMeta(db, { schema_version: '0' }); closeDb(db); const stderrSpy = []; @@ -444,13 +444,13 @@ describe('version/engine mismatch auto-promotes to full rebuild', () => { // Should NOT say "No changes detected" (that would mean incremental ran) expect(output).not.toContain('No changes detected'); - // Verify the stored version is now updated + // Verify the stored schema version is now updated const db2 = new Database(promoDbPath, { readonly: true }); - const version = db2 - .prepare("SELECT value FROM build_meta WHERE key = 'codegraph_version'") + const schemaVersion = db2 + .prepare("SELECT value FROM build_meta WHERE key = 'schema_version'") .get(); db2.close(); - expect(version.value).not.toBe('0.0.0'); + expect(schemaVersion.value).not.toBe('0'); }); test('engine mismatch triggers full rebuild', async () => {