diff --git a/docs/guides/ai-agent-guide.md b/docs/guides/ai-agent-guide.md index 575ff12a..a378278a 100644 --- a/docs/guides/ai-agent-guide.md +++ b/docs/guides/ai-agent-guide.md @@ -166,7 +166,7 @@ codegraph fn resolve --file resolve.js --depth 5 | | | |---|---| -| **MCP tool** | `fn_deps` | +| **MCP tool** | `query` | | **Key flags** | `--depth ` (default: 3), `-f, --file` (scope to file), `-k, --kind` (filter kind), `-T` (no tests), `-j` (JSON) | | **When to use** | Tracing a call chain — "who calls this and what does it call?" | | **Output** | Direct callees, direct callers, transitive callers up to depth N | @@ -242,7 +242,7 @@ codegraph path parseConfig loadFile --max-depth 5 | | | |---|---| -| **MCP tool** | `symbol_path` | +| **MCP tool** | `query` (with `--path`) | | **Key flags** | `--max-depth ` (default: 10), `--kinds ` (default: calls), `--reverse`, `--from-file`, `--to-file`, `-k, --kind`, `-T` (no tests), `-j` (JSON) | | **When to use** | Understanding how two functions are connected through the call chain | | **Output** | Ordered path with edge kinds, hop count, alternate path count | @@ -493,7 +493,7 @@ codegraph query buildGraph | | | |---|---| -| **MCP tool** | `query_function` | +| **MCP tool** | `query` | | **Key flags** | `-T` (no tests), `-j` (JSON) | | **When to use** | Quick one-off lookup (prefer `fn` or `context` for richer data) | @@ -578,15 +578,14 @@ codegraph mcp --repos "myapp,lib" # Restricted repo list | MCP Tool | CLI Equivalent | Description | |----------|---------------|-------------| -| `query_function` | `query ` | Find callers and callees | +| `query` | `query ` | Find callers/callees, or shortest path between two symbols | | `file_deps` | `deps ` | File imports and importers | | `impact_analysis` | `impact ` | Transitive file-level impact | | `find_cycles` | `cycles` | Circular dependency detection | | `module_map` | `map` | Most-connected files overview | -| `fn_deps` | `fn ` | Function-level call chain | | `fn_impact` | `fn-impact ` | Function-level blast radius | -| `symbol_path` | `path ` | Shortest path between two symbols | | `context` | `context ` | Full function context | +| `symbol_children` | `children ` | Sub-declaration children (parameters, properties, constants) | | `explain` | `explain ` | Structural summary | | `where` | `where ` | Symbol definition and usage | | `diff_impact` | `diff-impact [ref]` | Git diff impact analysis | @@ -597,8 +596,7 @@ codegraph mcp --repos "myapp,lib" # Restricted repo list | `hotspots` | `hotspots` | Structural hotspot detection | | `node_roles` | `roles` | Node role classification | | `co_changes` | `co-change` | Git co-change analysis | -| `execution_flow` | `flow` | Execution flow tracing | -| `list_entry_points` | `flow --entry-points` | Framework entry point detection | +| `execution_flow` | `flow` | Execution flow tracing and entry point detection | | `complexity` | `complexity` | Per-function complexity metrics | | `communities` | `communities` | Community detection & drift | | `manifesto` | `manifesto` | Rule engine pass/fail | @@ -608,6 +606,9 @@ codegraph mcp --repos "myapp,lib" # Restricted repo list | `triage` | `triage` | Risk-ranked audit queue | | `check` | `check` | CI validation predicates | | `branch_compare` | `branch-compare` | Structural diff between refs | +| `ast_query` | *(MCP only)* | Search stored AST nodes (calls, literals, new, throw, await) | +| `cfg` | *(MCP only)* | Intraprocedural control flow graph for a function | +| `dataflow` | *(MCP only)* | Data flow edges or data-dependent blast radius | | `list_repos` | `registry list` | List registered repos (multi-repo only) | ### Server Modes diff --git a/docs/guides/recommended-practices.md b/docs/guides/recommended-practices.md index 43deee62..c349191f 100644 --- a/docs/guides/recommended-practices.md +++ b/docs/guides/recommended-practices.md @@ -193,7 +193,7 @@ claude mcp list Enable `--multi-repo` to let the agent query any registered repository, or use `--repos` to restrict access to a specific set of repos. -The server exposes 30 tools (31 in multi-repo mode): `query_function`, `file_deps`, `impact_analysis`, `find_cycles`, `module_map`, `fn_deps`, `fn_impact`, `symbol_path`, `context`, `explain`, `where`, `diff_impact`, `semantic_search`, `export_graph`, `list_functions`, `structure`, `hotspots`, `node_roles`, `co_changes`, `execution_flow`, `list_entry_points`, `complexity`, `communities`, `manifesto`, `code_owners`, `audit`, `batch_query`, `triage`, `check`, `branch_compare`, and `list_repos` (multi-repo only). See the [AI Agent Guide MCP reference](./ai-agent-guide.md#mcp-server-reference) for the full tool-to-CLI mapping table. +The server exposes 31 tools (32 in multi-repo mode): `query`, `file_deps`, `impact_analysis`, `find_cycles`, `module_map`, `fn_impact`, `context`, `explain`, `where`, `diff_impact`, `semantic_search`, `export_graph`, `list_functions`, `structure`, `hotspots`, `node_roles`, `co_changes`, `execution_flow`, `complexity`, `communities`, `manifesto`, `code_owners`, `audit`, `batch_query`, `triage`, `check`, `branch_compare`, `ast_query`, `cfg`, `dataflow`, `symbol_children`, and `list_repos` (multi-repo only). See the [AI Agent Guide MCP reference](./ai-agent-guide.md#mcp-server-reference) for the full tool-to-CLI mapping table. ### CLAUDE.md for your project diff --git a/src/ast.js b/src/ast.js new file mode 100644 index 00000000..8c349667 --- /dev/null +++ b/src/ast.js @@ -0,0 +1,392 @@ +/** + * Stored queryable AST nodes — build-time extraction + query functions. + * + * Persists selected AST nodes (calls, new, string, regex, throw, await) in the + * `ast_nodes` table during build. Queryable via CLI (`codegraph ast`), MCP + * (`ast_query`), and programmatic API. + */ + +import path from 'node:path'; +import { openReadonlyOrFail } from './db.js'; +import { debug } from './logger.js'; +import { paginateResult, printNdjson } from './paginate.js'; +import { LANGUAGE_REGISTRY } from './parser.js'; + +// ─── Constants ──────────────────────────────────────────────────────── + +export const AST_NODE_KINDS = ['call', 'new', 'string', 'regex', 'throw', 'await']; + +const KIND_ICONS = { + call: '\u0192', // ƒ + new: '\u2295', // ⊕ + string: '"', + regex: '/', + throw: '\u2191', // ↑ + await: '\u22B3', // ⊳ +}; + +/** Max length for the `text` column. */ +const TEXT_MAX = 200; + +/** tree-sitter node types that map to our AST node kinds (JS/TS/TSX). */ +const JS_TS_AST_TYPES = { + new_expression: 'new', + throw_statement: 'throw', + await_expression: 'await', + string: 'string', + template_string: 'string', + regex: 'regex', +}; + +/** Extensions that support full AST walk (new/throw/await/string/regex). */ +const WALK_EXTENSIONS = new Set(); +for (const lang of Object.values(LANGUAGE_REGISTRY)) { + if (['javascript', 'typescript', 'tsx'].includes(lang.id)) { + for (const ext of lang.extensions) WALK_EXTENSIONS.add(ext); + } +} + +// ─── Helpers ────────────────────────────────────────────────────────── + +function truncate(s, max = TEXT_MAX) { + if (!s) return null; + return s.length <= max ? s : `${s.slice(0, max - 1)}\u2026`; +} + +/** + * Extract the constructor name from a `new_expression` node. + * Handles `new Foo()`, `new a.Foo()`, `new Foo.Bar()`. + */ +function extractNewName(node) { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child.type === 'identifier') return child.text; + if (child.type === 'member_expression') { + // e.g. new a.Foo() → "a.Foo" + return child.text; + } + } + return node.text?.split('(')[0]?.replace('new ', '').trim() || '?'; +} + +/** + * Extract the expression text from a throw/await node. + */ +function extractExpressionText(node) { + // Skip keyword child, take the rest + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child.type !== 'throw' && child.type !== 'await') { + return truncate(child.text); + } + } + return truncate(node.text); +} + +/** + * Extract a meaningful name from throw/await nodes. + * For throw: the constructor or expression type. + * For await: the called function name. + */ +function extractName(kind, node) { + if (kind === 'throw') { + // throw new Error(...) → "Error"; throw x → "x" + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child.type === 'new_expression') return extractNewName(child); + if (child.type === 'call_expression') { + const fn = child.childForFieldName('function'); + return fn ? fn.text : child.text?.split('(')[0] || '?'; + } + if (child.type === 'identifier') return child.text; + } + return truncate(node.text); + } + if (kind === 'await') { + // await fetch(...) → "fetch"; await this.foo() → "this.foo" + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child.type === 'call_expression') { + const fn = child.childForFieldName('function'); + return fn ? fn.text : child.text?.split('(')[0] || '?'; + } + if (child.type === 'identifier' || child.type === 'member_expression') { + return child.text; + } + } + return truncate(node.text); + } + return truncate(node.text); +} + +/** + * Find the narrowest enclosing definition for a given line. + */ +function findParentDef(defs, line) { + let best = null; + for (const def of defs) { + if (def.line <= line && (def.endLine == null || def.endLine >= line)) { + if (!best || def.endLine - def.line < best.endLine - best.line) { + best = def; + } + } + } + return best; +} + +// ─── Build ──────────────────────────────────────────────────────────── + +/** + * Extract AST nodes from parsed files and persist to the ast_nodes table. + * + * @param {object} db - open better-sqlite3 database (read-write) + * @param {Map} fileSymbols - Map + * @param {string} rootDir - absolute project root path + * @param {object} [_engineOpts] - engine options (unused) + */ +export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) { + // Ensure table exists (migration may not have run on older DBs) + let insertStmt; + try { + insertStmt = db.prepare( + 'INSERT INTO ast_nodes (file, line, kind, name, text, receiver, parent_node_id) VALUES (?, ?, ?, ?, ?, ?, ?)', + ); + } catch { + debug('ast_nodes table not found — skipping AST extraction'); + return; + } + + const getNodeId = db.prepare( + 'SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?', + ); + + const tx = db.transaction((rows) => { + for (const r of rows) { + insertStmt.run(r.file, r.line, r.kind, r.name, r.text, r.receiver, r.parentNodeId); + } + }); + + let totalInserted = 0; + + for (const [relPath, symbols] of fileSymbols) { + const rows = []; + const defs = symbols.definitions || []; + + // 1. Call nodes from symbols.calls (all languages) + if (symbols.calls) { + for (const call of symbols.calls) { + const parentDef = findParentDef(defs, call.line); + let parentNodeId = null; + if (parentDef) { + const row = getNodeId.get(parentDef.name, parentDef.kind, relPath, parentDef.line); + if (row) parentNodeId = row.id; + } + rows.push({ + file: relPath, + line: call.line, + kind: 'call', + name: call.name, + text: call.dynamic ? `[dynamic] ${call.name}` : null, + receiver: call.receiver || null, + parentNodeId, + }); + } + } + + // 2. AST walk for JS/TS/TSX — extract new, throw, await, string, regex + const ext = path.extname(relPath).toLowerCase(); + if (WALK_EXTENSIONS.has(ext) && symbols._tree) { + const astRows = []; + walkAst(symbols._tree.rootNode, defs, relPath, astRows, getNodeId); + rows.push(...astRows); + } + + if (rows.length > 0) { + tx(rows); + totalInserted += rows.length; + } + } + + debug(`AST extraction: ${totalInserted} nodes stored`); +} + +/** + * Walk a tree-sitter AST and collect new/throw/await/string/regex nodes. + */ +function walkAst(node, defs, relPath, rows, getNodeId) { + const kind = JS_TS_AST_TYPES[node.type]; + if (kind) { + // tree-sitter lines are 0-indexed, our DB uses 1-indexed + const line = node.startPosition.row + 1; + + let name; + let text = null; + + if (kind === 'new') { + name = extractNewName(node); + text = truncate(node.text); + } else if (kind === 'throw') { + name = extractName('throw', node); + text = extractExpressionText(node); + } else if (kind === 'await') { + name = extractName('await', node); + text = extractExpressionText(node); + } else if (kind === 'string') { + // Skip trivial strings (length < 2 after removing quotes) + const content = node.text?.replace(/^['"`]|['"`]$/g, '') || ''; + if (content.length < 2) { + // Still recurse children + for (let i = 0; i < node.childCount; i++) { + walkAst(node.child(i), defs, relPath, rows, getNodeId); + } + return; + } + name = truncate(content, 100); + text = truncate(node.text); + } else if (kind === 'regex') { + name = node.text || '?'; + text = truncate(node.text); + } + + const parentDef = findParentDef(defs, line); + let parentNodeId = null; + if (parentDef) { + const row = getNodeId.get(parentDef.name, parentDef.kind, relPath, parentDef.line); + if (row) parentNodeId = row.id; + } + + rows.push({ + file: relPath, + line, + kind, + name, + text, + receiver: null, + parentNodeId, + }); + + // Don't recurse into the children of matched nodes for new/throw/await + // (we already extracted what we need, and nested strings inside them are noise) + if (kind !== 'string' && kind !== 'regex') return; + } + + for (let i = 0; i < node.childCount; i++) { + walkAst(node.child(i), defs, relPath, rows, getNodeId); + } +} + +// ─── Query ──────────────────────────────────────────────────────────── + +/** + * Query AST nodes — data-returning function. + * + * @param {string} [pattern] - GLOB pattern for node name (auto-wrapped in *..*) + * @param {string} [customDbPath] - path to graph.db + * @param {object} [opts] + * @returns {{ pattern, kind, count, results, _pagination? }} + */ +export function astQueryData(pattern, customDbPath, opts = {}) { + const db = openReadonlyOrFail(customDbPath); + const { kind, file, noTests, limit, offset } = opts; + + let where = 'WHERE 1=1'; + const params = []; + + // Pattern matching + if (pattern && pattern !== '*') { + // If user already uses wildcards, use as-is; otherwise wrap in *..* for substring + const globPattern = pattern.includes('*') ? pattern : `*${pattern}*`; + where += ' AND a.name GLOB ?'; + params.push(globPattern); + } + + if (kind) { + where += ' AND a.kind = ?'; + params.push(kind); + } + + if (file) { + where += ' AND a.file LIKE ?'; + params.push(`%${file}%`); + } + + if (noTests) { + where += ` AND a.file NOT LIKE '%.test.%' + AND a.file NOT LIKE '%.spec.%' + AND a.file NOT LIKE '%__test__%' + AND a.file NOT LIKE '%__tests__%' + AND a.file NOT LIKE '%.stories.%'`; + } + + const sql = ` + SELECT a.kind, a.name, a.file, a.line, a.text, a.receiver, a.parent_node_id, + p.name AS parent_name, p.kind AS parent_kind, p.file AS parent_file + FROM ast_nodes a + LEFT JOIN nodes p ON a.parent_node_id = p.id + ${where} + ORDER BY a.file, a.line + `; + + const rows = db.prepare(sql).all(...params); + db.close(); + + const results = rows.map((r) => ({ + kind: r.kind, + name: r.name, + file: r.file, + line: r.line, + text: r.text, + receiver: r.receiver, + parent: r.parent_node_id + ? { name: r.parent_name, kind: r.parent_kind, file: r.parent_file } + : null, + })); + + const data = { + pattern: pattern || '*', + kind: kind || null, + count: results.length, + results, + }; + + return paginateResult(data, 'results', { limit, offset }); +} + +/** + * Query AST nodes — display function (human/json/ndjson output). + */ +export function astQuery(pattern, customDbPath, opts = {}) { + const data = astQueryData(pattern, customDbPath, opts); + + if (opts.ndjson) { + printNdjson(data, 'results'); + return; + } + + if (opts.json) { + console.log(JSON.stringify(data, null, 2)); + return; + } + + // Human-readable output + if (data.results.length === 0) { + console.log(`No AST nodes found${pattern ? ` matching "${pattern}"` : ''}.`); + return; + } + + const kindLabel = opts.kind ? ` (kind: ${opts.kind})` : ''; + console.log(`\n${data.count} AST nodes${pattern ? ` matching "${pattern}"` : ''}${kindLabel}:\n`); + + for (const r of data.results) { + const icon = KIND_ICONS[r.kind] || '?'; + const parentInfo = r.parent ? ` (in ${r.parent.name})` : ''; + console.log(` ${icon} ${r.name} -- ${r.file}:${r.line}${parentInfo}`); + } + + if (data._pagination?.hasMore) { + console.log( + `\n ... ${data._pagination.total - data._pagination.offset - data._pagination.returned} more (use --offset ${data._pagination.offset + data._pagination.limit})`, + ); + } + console.log(); +} diff --git a/src/builder.js b/src/builder.js index 00d67186..8b51e300 100644 --- a/src/builder.js +++ b/src/builder.js @@ -396,6 +396,28 @@ export function purgeFilesFromGraph(db, files, options = {}) { deleteHashForFile = null; } } + let deleteAstNodesForFile; + try { + deleteAstNodesForFile = db.prepare('DELETE FROM ast_nodes WHERE file = ?'); + } catch { + deleteAstNodesForFile = null; + } + let deleteCfgForFile; + try { + deleteCfgForFile = db.prepare( + 'DELETE FROM cfg_edges WHERE function_node_id IN (SELECT id FROM nodes WHERE file = ?)', + ); + } catch { + deleteCfgForFile = null; + } + let deleteCfgBlocksForFile; + try { + deleteCfgBlocksForFile = db.prepare( + 'DELETE FROM cfg_blocks WHERE function_node_id IN (SELECT id FROM nodes WHERE file = ?)', + ); + } catch { + deleteCfgBlocksForFile = null; + } for (const relPath of files) { deleteEmbeddingsForFile?.run(relPath); @@ -403,6 +425,9 @@ export function purgeFilesFromGraph(db, files, options = {}) { deleteMetricsForFile.run(relPath); deleteComplexityForFile?.run(relPath); deleteDataflowForFile?.run(relPath, relPath); + deleteAstNodesForFile?.run(relPath); + deleteCfgForFile?.run(relPath); + deleteCfgBlocksForFile?.run(relPath); deleteNodesForFile.run(relPath); if (purgeHashes) deleteHashForFile?.run(relPath); } @@ -532,7 +557,7 @@ export async function buildGraph(rootDir, opts = {}) { if (isFullBuild) { const deletions = - 'PRAGMA foreign_keys = OFF; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM function_complexity; DELETE FROM dataflow; DELETE FROM nodes; PRAGMA foreign_keys = ON;'; + 'PRAGMA foreign_keys = OFF; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM function_complexity; DELETE FROM dataflow; DELETE FROM ast_nodes; DELETE FROM nodes; PRAGMA foreign_keys = ON;'; db.exec( hasEmbeddings ? `${deletions.replace('PRAGMA foreign_keys = ON;', '')} DELETE FROM embeddings; PRAGMA foreign_keys = ON;` @@ -1189,6 +1214,17 @@ export async function buildGraph(rootDir, opts = {}) { } _t.rolesMs = performance.now() - _t.roles0; + // Always-on AST node extraction (calls, new, string, regex, throw, await) + // Must run before complexity which releases _tree references + _t.ast0 = performance.now(); + try { + const { buildAstNodes } = await import('./ast.js'); + await buildAstNodes(db, allSymbols, rootDir, engineOpts); + } catch (err) { + debug(`AST node extraction failed: ${err.message}`); + } + _t.astMs = performance.now() - _t.ast0; + // Compute per-function complexity metrics (cognitive, cyclomatic, nesting) _t.complexity0 = performance.now(); try { @@ -1199,6 +1235,18 @@ export async function buildGraph(rootDir, opts = {}) { } _t.complexityMs = performance.now() - _t.complexity0; + // Opt-in CFG analysis (--cfg) + if (opts.cfg) { + _t.cfg0 = performance.now(); + try { + const { buildCFGData } = await import('./cfg.js'); + await buildCFGData(db, allSymbols, rootDir, engineOpts); + } catch (err) { + debug(`CFG analysis failed: ${err.message}`); + } + _t.cfgMs = performance.now() - _t.cfg0; + } + // Opt-in dataflow analysis (--dataflow) if (opts.dataflow) { _t.dataflow0 = performance.now(); @@ -1301,6 +1349,7 @@ export async function buildGraph(rootDir, opts = {}) { structureMs: +_t.structureMs.toFixed(1), rolesMs: +_t.rolesMs.toFixed(1), complexityMs: +_t.complexityMs.toFixed(1), + ...(_t.cfgMs != null && { cfgMs: +_t.cfgMs.toFixed(1) }), }, }; } diff --git a/src/cfg.js b/src/cfg.js new file mode 100644 index 00000000..c9f7dd0f --- /dev/null +++ b/src/cfg.js @@ -0,0 +1,1035 @@ +/** + * Intraprocedural Control Flow Graph (CFG) construction from tree-sitter AST. + * + * Builds basic-block CFGs for individual functions, stored in cfg_blocks + cfg_edges tables. + * Opt-in via `build --cfg`. JS/TS/TSX only for Phase 1. + */ + +import fs from 'node:fs'; +import path from 'node:path'; +import { COMPLEXITY_RULES } from './complexity.js'; +import { openReadonlyOrFail } from './db.js'; +import { info } from './logger.js'; +import { paginateResult, printNdjson } from './paginate.js'; +import { LANGUAGE_REGISTRY } from './parser.js'; +import { isTestFile } from './queries.js'; + +// ─── CFG Node Type Rules (extends COMPLEXITY_RULES) ────────────────────── + +const JS_TS_CFG = { + ifNode: 'if_statement', + elseClause: 'else_clause', + forNodes: new Set(['for_statement', 'for_in_statement']), + whileNode: 'while_statement', + doNode: 'do_statement', + switchNode: 'switch_statement', + caseNode: 'switch_case', + defaultNode: 'switch_default', + tryNode: 'try_statement', + catchNode: 'catch_clause', + finallyNode: 'finally_clause', + returnNode: 'return_statement', + throwNode: 'throw_statement', + breakNode: 'break_statement', + continueNode: 'continue_statement', + blockNode: 'statement_block', + labeledNode: 'labeled_statement', + functionNodes: new Set([ + 'function_declaration', + 'function_expression', + 'arrow_function', + 'method_definition', + 'generator_function', + 'generator_function_declaration', + ]), +}; + +export const CFG_RULES = new Map([ + ['javascript', JS_TS_CFG], + ['typescript', JS_TS_CFG], + ['tsx', JS_TS_CFG], +]); + +// Language IDs that support CFG (Phase 1: JS/TS/TSX only) +const CFG_LANG_IDS = new Set(['javascript', 'typescript', 'tsx']); + +// JS/TS extensions +const CFG_EXTENSIONS = new Set(); +for (const entry of LANGUAGE_REGISTRY) { + if (CFG_LANG_IDS.has(entry.id)) { + for (const ext of entry.extensions) CFG_EXTENSIONS.add(ext); + } +} + +// ─── Core Algorithm: AST → CFG ────────────────────────────────────────── + +/** + * Build a control flow graph for a single function AST node. + * + * @param {object} functionNode - tree-sitter function AST node + * @param {string} langId - language identifier (javascript, typescript, tsx) + * @returns {{ blocks: object[], edges: object[] }} - CFG blocks and edges + */ +export function buildFunctionCFG(functionNode, langId) { + const rules = CFG_RULES.get(langId); + if (!rules) return { blocks: [], edges: [] }; + + const blocks = []; + const edges = []; + let nextIndex = 0; + + function makeBlock(type, startLine = null, endLine = null, label = null) { + const block = { + index: nextIndex++, + type, + startLine, + endLine, + label, + }; + blocks.push(block); + return block; + } + + function addEdge(source, target, kind) { + edges.push({ + sourceIndex: source.index, + targetIndex: target.index, + kind, + }); + } + + const entryBlock = makeBlock('entry'); + const exitBlock = makeBlock('exit'); + + // Loop context stack for break/continue resolution + const loopStack = []; + + // Label map for labeled break/continue + const labelMap = new Map(); + + /** + * Get the body node of a function (handles arrow functions with expression bodies). + */ + function getFunctionBody(fnNode) { + const body = fnNode.childForFieldName('body'); + if (!body) return null; + return body; + } + + /** + * Get statement children from a block or statement list. + */ + function getStatements(node) { + if (!node) return []; + // statement_block: get named children + if (node.type === rules.blockNode) { + const stmts = []; + for (let i = 0; i < node.namedChildCount; i++) { + stmts.push(node.namedChild(i)); + } + return stmts; + } + // Single statement (e.g., arrow fn with expression body, or unbraced if body) + return [node]; + } + + /** + * Process a list of statements, creating blocks and edges. + * Returns the last "current" block after processing, or null if all paths terminated. + */ + function processStatements(stmts, currentBlock) { + let cur = currentBlock; + + for (const stmt of stmts) { + if (!cur) { + // Dead code after return/break/continue/throw — skip remaining + break; + } + cur = processStatement(stmt, cur); + } + + return cur; + } + + /** + * Process a single statement, returns the new current block or null if terminated. + */ + function processStatement(stmt, currentBlock) { + if (!stmt || !currentBlock) return currentBlock; + + const type = stmt.type; + + // Labeled statement: register label then process inner statement + if (type === rules.labeledNode) { + const labelNode = stmt.childForFieldName('label'); + const labelName = labelNode ? labelNode.text : null; + const body = stmt.childForFieldName('body'); + if (body && labelName) { + // Will be filled when we encounter the loop + const labelCtx = { headerBlock: null, exitBlock: null }; + labelMap.set(labelName, labelCtx); + const result = processStatement(body, currentBlock); + labelMap.delete(labelName); + return result; + } + return currentBlock; + } + + // If statement + if (type === rules.ifNode) { + return processIf(stmt, currentBlock); + } + + // For / for-in loops + if (rules.forNodes.has(type)) { + return processForLoop(stmt, currentBlock); + } + + // While loop + if (type === rules.whileNode) { + return processWhileLoop(stmt, currentBlock); + } + + // Do-while loop + if (type === rules.doNode) { + return processDoWhileLoop(stmt, currentBlock); + } + + // Switch statement + if (type === rules.switchNode) { + return processSwitch(stmt, currentBlock); + } + + // Try/catch/finally + if (type === rules.tryNode) { + return processTryCatch(stmt, currentBlock); + } + + // Return statement + if (type === rules.returnNode) { + currentBlock.endLine = stmt.startPosition.row + 1; + addEdge(currentBlock, exitBlock, 'return'); + return null; // path terminated + } + + // Throw statement + if (type === rules.throwNode) { + currentBlock.endLine = stmt.startPosition.row + 1; + addEdge(currentBlock, exitBlock, 'exception'); + return null; // path terminated + } + + // Break statement + if (type === rules.breakNode) { + const labelNode = stmt.childForFieldName('label'); + const labelName = labelNode ? labelNode.text : null; + + let target = null; + if (labelName && labelMap.has(labelName)) { + target = labelMap.get(labelName).exitBlock; + } else if (loopStack.length > 0) { + target = loopStack[loopStack.length - 1].exitBlock; + } + + if (target) { + currentBlock.endLine = stmt.startPosition.row + 1; + addEdge(currentBlock, target, 'break'); + return null; // path terminated + } + // break with no enclosing loop/switch — treat as no-op + return currentBlock; + } + + // Continue statement + if (type === rules.continueNode) { + const labelNode = stmt.childForFieldName('label'); + const labelName = labelNode ? labelNode.text : null; + + let target = null; + if (labelName && labelMap.has(labelName)) { + target = labelMap.get(labelName).headerBlock; + } else if (loopStack.length > 0) { + target = loopStack[loopStack.length - 1].headerBlock; + } + + if (target) { + currentBlock.endLine = stmt.startPosition.row + 1; + addEdge(currentBlock, target, 'continue'); + return null; // path terminated + } + return currentBlock; + } + + // Regular statement — extend current block + if (!currentBlock.startLine) { + currentBlock.startLine = stmt.startPosition.row + 1; + } + currentBlock.endLine = stmt.endPosition.row + 1; + return currentBlock; + } + + /** + * Process an if/else-if/else chain. + */ + function processIf(ifStmt, currentBlock) { + // Terminate current block at condition + currentBlock.endLine = ifStmt.startPosition.row + 1; + + const condBlock = makeBlock( + 'condition', + ifStmt.startPosition.row + 1, + ifStmt.startPosition.row + 1, + 'if', + ); + addEdge(currentBlock, condBlock, 'fallthrough'); + + const joinBlock = makeBlock('body'); + + // True branch (consequent) + const consequent = ifStmt.childForFieldName('consequence'); + const trueBlock = makeBlock('branch_true', null, null, 'then'); + addEdge(condBlock, trueBlock, 'branch_true'); + const trueStmts = getStatements(consequent); + const trueEnd = processStatements(trueStmts, trueBlock); + if (trueEnd) { + addEdge(trueEnd, joinBlock, 'fallthrough'); + } + + // False branch (alternative / else / else-if) + const alternative = ifStmt.childForFieldName('alternative'); + if (alternative) { + if (alternative.type === rules.elseClause) { + // else clause — may contain another if (else-if) or a block + const elseChildren = []; + for (let i = 0; i < alternative.namedChildCount; i++) { + elseChildren.push(alternative.namedChild(i)); + } + if (elseChildren.length === 1 && elseChildren[0].type === rules.ifNode) { + // else-if: recurse + const falseBlock = makeBlock('branch_false', null, null, 'else-if'); + addEdge(condBlock, falseBlock, 'branch_false'); + const elseIfEnd = processIf(elseChildren[0], falseBlock); + if (elseIfEnd) { + addEdge(elseIfEnd, joinBlock, 'fallthrough'); + } + } else { + // else block + const falseBlock = makeBlock('branch_false', null, null, 'else'); + addEdge(condBlock, falseBlock, 'branch_false'); + const falseEnd = processStatements(elseChildren, falseBlock); + if (falseEnd) { + addEdge(falseEnd, joinBlock, 'fallthrough'); + } + } + } + } else { + // No else: condition-false goes directly to join + addEdge(condBlock, joinBlock, 'branch_false'); + } + + return joinBlock; + } + + /** + * Process a for/for-in loop. + */ + function processForLoop(forStmt, currentBlock) { + const headerBlock = makeBlock( + 'loop_header', + forStmt.startPosition.row + 1, + forStmt.startPosition.row + 1, + 'for', + ); + addEdge(currentBlock, headerBlock, 'fallthrough'); + + const loopExitBlock = makeBlock('body'); + + // Register loop context + const loopCtx = { headerBlock, exitBlock: loopExitBlock }; + loopStack.push(loopCtx); + + // Update label map if this is inside a labeled statement + for (const [, ctx] of labelMap) { + if (!ctx.headerBlock) { + ctx.headerBlock = headerBlock; + ctx.exitBlock = loopExitBlock; + } + } + + // Loop body + const body = forStmt.childForFieldName('body'); + const bodyBlock = makeBlock('loop_body'); + addEdge(headerBlock, bodyBlock, 'branch_true'); + + const bodyStmts = getStatements(body); + const bodyEnd = processStatements(bodyStmts, bodyBlock); + + if (bodyEnd) { + addEdge(bodyEnd, headerBlock, 'loop_back'); + } + + // Loop exit + addEdge(headerBlock, loopExitBlock, 'loop_exit'); + + loopStack.pop(); + return loopExitBlock; + } + + /** + * Process a while loop. + */ + function processWhileLoop(whileStmt, currentBlock) { + const headerBlock = makeBlock( + 'loop_header', + whileStmt.startPosition.row + 1, + whileStmt.startPosition.row + 1, + 'while', + ); + addEdge(currentBlock, headerBlock, 'fallthrough'); + + const loopExitBlock = makeBlock('body'); + + const loopCtx = { headerBlock, exitBlock: loopExitBlock }; + loopStack.push(loopCtx); + + for (const [, ctx] of labelMap) { + if (!ctx.headerBlock) { + ctx.headerBlock = headerBlock; + ctx.exitBlock = loopExitBlock; + } + } + + const body = whileStmt.childForFieldName('body'); + const bodyBlock = makeBlock('loop_body'); + addEdge(headerBlock, bodyBlock, 'branch_true'); + + const bodyStmts = getStatements(body); + const bodyEnd = processStatements(bodyStmts, bodyBlock); + + if (bodyEnd) { + addEdge(bodyEnd, headerBlock, 'loop_back'); + } + + addEdge(headerBlock, loopExitBlock, 'loop_exit'); + + loopStack.pop(); + return loopExitBlock; + } + + /** + * Process a do-while loop. + */ + function processDoWhileLoop(doStmt, currentBlock) { + const bodyBlock = makeBlock('loop_body', doStmt.startPosition.row + 1, null, 'do'); + addEdge(currentBlock, bodyBlock, 'fallthrough'); + + const condBlock = makeBlock('loop_header', null, null, 'do-while'); + const loopExitBlock = makeBlock('body'); + + const loopCtx = { headerBlock: condBlock, exitBlock: loopExitBlock }; + loopStack.push(loopCtx); + + for (const [, ctx] of labelMap) { + if (!ctx.headerBlock) { + ctx.headerBlock = condBlock; + ctx.exitBlock = loopExitBlock; + } + } + + const body = doStmt.childForFieldName('body'); + const bodyStmts = getStatements(body); + const bodyEnd = processStatements(bodyStmts, bodyBlock); + + if (bodyEnd) { + addEdge(bodyEnd, condBlock, 'fallthrough'); + } + + // Condition: loop_back or exit + addEdge(condBlock, bodyBlock, 'loop_back'); + addEdge(condBlock, loopExitBlock, 'loop_exit'); + + loopStack.pop(); + return loopExitBlock; + } + + /** + * Process a switch statement. + */ + function processSwitch(switchStmt, currentBlock) { + currentBlock.endLine = switchStmt.startPosition.row + 1; + + const switchHeader = makeBlock( + 'condition', + switchStmt.startPosition.row + 1, + switchStmt.startPosition.row + 1, + 'switch', + ); + addEdge(currentBlock, switchHeader, 'fallthrough'); + + const joinBlock = makeBlock('body'); + + // Switch acts like a break target for contained break statements + const switchCtx = { headerBlock: switchHeader, exitBlock: joinBlock }; + loopStack.push(switchCtx); + + // Collect case clauses from the switch body + const switchBody = switchStmt.childForFieldName('body'); + if (switchBody) { + let hasDefault = false; + for (let i = 0; i < switchBody.namedChildCount; i++) { + const caseClause = switchBody.namedChild(i); + const isDefault = + caseClause.type === rules.defaultNode || + (caseClause.type === rules.caseNode && !caseClause.childForFieldName('value')); + + const caseLabel = isDefault ? 'default' : 'case'; + const caseBlock = makeBlock( + isDefault ? 'case' : 'case', + caseClause.startPosition.row + 1, + null, + caseLabel, + ); + addEdge(switchHeader, caseBlock, isDefault ? 'branch_false' : 'branch_true'); + if (isDefault) hasDefault = true; + + // Process case body statements + const caseStmts = []; + for (let j = 0; j < caseClause.namedChildCount; j++) { + const child = caseClause.namedChild(j); + // Skip the case value expression + if (child.type !== 'identifier' && child.type !== 'string' && child.type !== 'number') { + caseStmts.push(child); + } + } + + const caseEnd = processStatements(caseStmts, caseBlock); + if (caseEnd) { + // Fall-through to join (or next case, but we simplify to join) + addEdge(caseEnd, joinBlock, 'fallthrough'); + } + } + + // If no default case, switch header can skip to join + if (!hasDefault) { + addEdge(switchHeader, joinBlock, 'branch_false'); + } + } + + loopStack.pop(); + return joinBlock; + } + + /** + * Process try/catch/finally. + */ + function processTryCatch(tryStmt, currentBlock) { + currentBlock.endLine = tryStmt.startPosition.row + 1; + + const joinBlock = makeBlock('body'); + + // Try body + const tryBody = tryStmt.childForFieldName('body'); + const tryBlock = makeBlock('body', tryBody ? tryBody.startPosition.row + 1 : null, null, 'try'); + addEdge(currentBlock, tryBlock, 'fallthrough'); + + const tryStmts = getStatements(tryBody); + const tryEnd = processStatements(tryStmts, tryBlock); + + // Catch handler + let catchHandler = null; + let finallyHandler = null; + for (let i = 0; i < tryStmt.namedChildCount; i++) { + const child = tryStmt.namedChild(i); + if (child.type === rules.catchNode) catchHandler = child; + if (child.type === rules.finallyNode) finallyHandler = child; + } + + if (catchHandler) { + const catchBlock = makeBlock('catch', catchHandler.startPosition.row + 1, null, 'catch'); + // Exception edge from try to catch + addEdge(tryBlock, catchBlock, 'exception'); + + const catchBody = catchHandler.childForFieldName('body'); + const catchStmts = getStatements(catchBody); + const catchEnd = processStatements(catchStmts, catchBlock); + + if (finallyHandler) { + const finallyBlock = makeBlock( + 'finally', + finallyHandler.startPosition.row + 1, + null, + 'finally', + ); + if (tryEnd) addEdge(tryEnd, finallyBlock, 'fallthrough'); + if (catchEnd) addEdge(catchEnd, finallyBlock, 'fallthrough'); + + const finallyBody = finallyHandler.childForFieldName('body'); + const finallyStmts = getStatements(finallyBody); + const finallyEnd = processStatements(finallyStmts, finallyBlock); + if (finallyEnd) addEdge(finallyEnd, joinBlock, 'fallthrough'); + } else { + if (tryEnd) addEdge(tryEnd, joinBlock, 'fallthrough'); + if (catchEnd) addEdge(catchEnd, joinBlock, 'fallthrough'); + } + } else if (finallyHandler) { + const finallyBlock = makeBlock( + 'finally', + finallyHandler.startPosition.row + 1, + null, + 'finally', + ); + if (tryEnd) addEdge(tryEnd, finallyBlock, 'fallthrough'); + + const finallyBody = finallyHandler.childForFieldName('body'); + const finallyStmts = getStatements(finallyBody); + const finallyEnd = processStatements(finallyStmts, finallyBlock); + if (finallyEnd) addEdge(finallyEnd, joinBlock, 'fallthrough'); + } else { + if (tryEnd) addEdge(tryEnd, joinBlock, 'fallthrough'); + } + + return joinBlock; + } + + // ── Main entry point ────────────────────────────────────────────────── + + const body = getFunctionBody(functionNode); + if (!body) { + // Empty function or expression body + addEdge(entryBlock, exitBlock, 'fallthrough'); + return { blocks, edges }; + } + + const stmts = getStatements(body); + if (stmts.length === 0) { + addEdge(entryBlock, exitBlock, 'fallthrough'); + return { blocks, edges }; + } + + const firstBlock = makeBlock('body'); + addEdge(entryBlock, firstBlock, 'fallthrough'); + + const lastBlock = processStatements(stmts, firstBlock); + if (lastBlock) { + addEdge(lastBlock, exitBlock, 'fallthrough'); + } + + return { blocks, edges }; +} + +// ─── Build-Time: Compute CFG for Changed Files ───────────────────────── + +/** + * Build CFG data for all function/method definitions and persist to DB. + * + * @param {object} db - open better-sqlite3 database (read-write) + * @param {Map} fileSymbols - Map + * @param {string} rootDir - absolute project root path + * @param {object} [_engineOpts] - engine options (unused; always uses WASM for AST) + */ +export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { + // Lazily init WASM parsers if needed + let parsers = null; + let extToLang = null; + let needsFallback = false; + + for (const [relPath, symbols] of fileSymbols) { + if (!symbols._tree) { + const ext = path.extname(relPath).toLowerCase(); + if (CFG_EXTENSIONS.has(ext)) { + needsFallback = true; + break; + } + } + } + + if (needsFallback) { + const { createParsers } = await import('./parser.js'); + parsers = await createParsers(); + extToLang = new Map(); + for (const entry of LANGUAGE_REGISTRY) { + for (const ext of entry.extensions) { + extToLang.set(ext, entry.id); + } + } + } + + let getParserFn = null; + if (parsers) { + const mod = await import('./parser.js'); + getParserFn = mod.getParser; + } + + const { findFunctionNode } = await import('./complexity.js'); + + const insertBlock = db.prepare( + `INSERT INTO cfg_blocks (function_node_id, block_index, block_type, start_line, end_line, label) + VALUES (?, ?, ?, ?, ?, ?)`, + ); + const insertEdge = db.prepare( + `INSERT INTO cfg_edges (function_node_id, source_block_id, target_block_id, kind) + VALUES (?, ?, ?, ?)`, + ); + const deleteBlocks = db.prepare('DELETE FROM cfg_blocks WHERE function_node_id = ?'); + const deleteEdges = db.prepare('DELETE FROM cfg_edges WHERE function_node_id = ?'); + const getNodeId = db.prepare( + "SELECT id FROM nodes WHERE name = ? AND kind IN ('function','method') AND file = ? AND line = ?", + ); + + let analyzed = 0; + + const tx = db.transaction(() => { + for (const [relPath, symbols] of fileSymbols) { + const ext = path.extname(relPath).toLowerCase(); + if (!CFG_EXTENSIONS.has(ext)) continue; + + let tree = symbols._tree; + let langId = symbols._langId; + + // WASM fallback if no cached tree + if (!tree) { + if (!extToLang || !getParserFn) continue; + langId = extToLang.get(ext); + if (!langId || !CFG_LANG_IDS.has(langId)) continue; + + const absPath = path.join(rootDir, relPath); + let code; + try { + code = fs.readFileSync(absPath, 'utf-8'); + } catch { + continue; + } + + const parser = getParserFn(parsers, absPath); + if (!parser) continue; + + try { + tree = parser.parse(code); + } catch { + continue; + } + } + + if (!langId) { + langId = extToLang ? extToLang.get(ext) : null; + if (!langId) continue; + } + + const cfgRules = CFG_RULES.get(langId); + if (!cfgRules) continue; + + const complexityRules = COMPLEXITY_RULES.get(langId); + if (!complexityRules) continue; + + for (const def of symbols.definitions) { + if (def.kind !== 'function' && def.kind !== 'method') continue; + if (!def.line) continue; + + const row = getNodeId.get(def.name, relPath, def.line); + if (!row) continue; + + const funcNode = findFunctionNode(tree.rootNode, def.line, def.endLine, complexityRules); + if (!funcNode) continue; + + const cfg = buildFunctionCFG(funcNode, langId); + if (cfg.blocks.length === 0) continue; + + // Clear old CFG data for this function + deleteEdges.run(row.id); + deleteBlocks.run(row.id); + + // Insert blocks and build index→dbId mapping + const blockDbIds = new Map(); + for (const block of cfg.blocks) { + const result = insertBlock.run( + row.id, + block.index, + block.type, + block.startLine, + block.endLine, + block.label, + ); + blockDbIds.set(block.index, result.lastInsertRowid); + } + + // Insert edges + for (const edge of cfg.edges) { + const sourceDbId = blockDbIds.get(edge.sourceIndex); + const targetDbId = blockDbIds.get(edge.targetIndex); + if (sourceDbId && targetDbId) { + insertEdge.run(row.id, sourceDbId, targetDbId, edge.kind); + } + } + + analyzed++; + } + + // Don't release _tree here — complexity/dataflow may still need it + } + }); + + tx(); + + if (analyzed > 0) { + info(`CFG: ${analyzed} functions analyzed`); + } +} + +// ─── Query-Time Functions ─────────────────────────────────────────────── + +function hasCfgTables(db) { + try { + db.prepare('SELECT 1 FROM cfg_blocks LIMIT 0').get(); + return true; + } catch { + return false; + } +} + +function findNodes(db, name, opts = {}) { + const kinds = opts.kind ? [opts.kind] : ['function', 'method']; + const placeholders = kinds.map(() => '?').join(', '); + const params = [`%${name}%`, ...kinds]; + + let fileCondition = ''; + if (opts.file) { + fileCondition = ' AND n.file LIKE ?'; + params.push(`%${opts.file}%`); + } + + const rows = db + .prepare( + `SELECT n.id, n.name, n.kind, n.file, n.line, n.end_line + FROM nodes n + WHERE n.name LIKE ? AND n.kind IN (${placeholders})${fileCondition}`, + ) + .all(...params); + + return opts.noTests ? rows.filter((n) => !isTestFile(n.file)) : rows; +} + +/** + * Load CFG data for a function from the database. + * + * @param {string} name - Function name (partial match) + * @param {string} [customDbPath] - Path to graph.db + * @param {object} [opts] - Options + * @returns {{ function: object, blocks: object[], edges: object[], summary: object }} + */ +export function cfgData(name, customDbPath, opts = {}) { + const db = openReadonlyOrFail(customDbPath); + const noTests = opts.noTests || false; + + if (!hasCfgTables(db)) { + db.close(); + return { + name, + results: [], + warning: 'No CFG data found. Run `codegraph build --cfg` first.', + }; + } + + const nodes = findNodes(db, name, { noTests, file: opts.file, kind: opts.kind }); + if (nodes.length === 0) { + db.close(); + return { name, results: [] }; + } + + const blockStmt = db.prepare( + `SELECT id, block_index, block_type, start_line, end_line, label + FROM cfg_blocks WHERE function_node_id = ? + ORDER BY block_index`, + ); + const edgeStmt = db.prepare( + `SELECT e.kind, + sb.block_index AS source_index, sb.block_type AS source_type, + tb.block_index AS target_index, tb.block_type AS target_type + FROM cfg_edges e + JOIN cfg_blocks sb ON e.source_block_id = sb.id + JOIN cfg_blocks tb ON e.target_block_id = tb.id + WHERE e.function_node_id = ? + ORDER BY sb.block_index, tb.block_index`, + ); + + const results = nodes.map((node) => { + const cfgBlocks = blockStmt.all(node.id); + const cfgEdges = edgeStmt.all(node.id); + + return { + name: node.name, + kind: node.kind, + file: node.file, + line: node.line, + blocks: cfgBlocks.map((b) => ({ + index: b.block_index, + type: b.block_type, + startLine: b.start_line, + endLine: b.end_line, + label: b.label, + })), + edges: cfgEdges.map((e) => ({ + source: e.source_index, + sourceType: e.source_type, + target: e.target_index, + targetType: e.target_type, + kind: e.kind, + })), + summary: { + blockCount: cfgBlocks.length, + edgeCount: cfgEdges.length, + }, + }; + }); + + db.close(); + return paginateResult({ name, results }, 'results', opts); +} + +// ─── Export Formats ───────────────────────────────────────────────────── + +/** + * Convert CFG data to DOT format for Graphviz rendering. + */ +export function cfgToDOT(cfgResult) { + const lines = []; + + for (const r of cfgResult.results) { + lines.push(`digraph "${r.name}" {`); + lines.push(' rankdir=TB;'); + lines.push(' node [shape=box, fontname="monospace", fontsize=10];'); + + for (const block of r.blocks) { + const label = blockLabel(block); + const shape = block.type === 'entry' || block.type === 'exit' ? 'ellipse' : 'box'; + const style = + block.type === 'condition' || block.type === 'loop_header' + ? ', style=filled, fillcolor="#ffffcc"' + : ''; + lines.push(` B${block.index} [label="${label}", shape=${shape}${style}];`); + } + + for (const edge of r.edges) { + const style = edgeStyle(edge.kind); + lines.push(` B${edge.source} -> B${edge.target} [label="${edge.kind}"${style}];`); + } + + lines.push('}'); + } + + return lines.join('\n'); +} + +/** + * Convert CFG data to Mermaid format. + */ +export function cfgToMermaid(cfgResult) { + const lines = []; + + for (const r of cfgResult.results) { + lines.push(`graph TD`); + lines.push(` subgraph "${r.name}"`); + + for (const block of r.blocks) { + const label = blockLabel(block); + if (block.type === 'entry' || block.type === 'exit') { + lines.push(` B${block.index}(["${label}"])`); + } else if (block.type === 'condition' || block.type === 'loop_header') { + lines.push(` B${block.index}{"${label}"}`); + } else { + lines.push(` B${block.index}["${label}"]`); + } + } + + for (const edge of r.edges) { + const label = edge.kind; + lines.push(` B${edge.source} -->|${label}| B${edge.target}`); + } + + lines.push(' end'); + } + + return lines.join('\n'); +} + +function blockLabel(block) { + const loc = + block.startLine && block.endLine + ? ` L${block.startLine}${block.endLine !== block.startLine ? `-${block.endLine}` : ''}` + : ''; + const label = block.label ? ` (${block.label})` : ''; + return `${block.type}${label}${loc}`; +} + +function edgeStyle(kind) { + if (kind === 'exception') return ', color=red, fontcolor=red'; + if (kind === 'branch_true') return ', color=green, fontcolor=green'; + if (kind === 'branch_false') return ', color=red, fontcolor=red'; + if (kind === 'loop_back') return ', style=dashed, color=blue'; + if (kind === 'loop_exit') return ', color=orange'; + if (kind === 'return') return ', color=purple'; + if (kind === 'break') return ', color=orange, style=dashed'; + if (kind === 'continue') return ', color=blue, style=dashed'; + return ''; +} + +// ─── CLI Printer ──────────────────────────────────────────────────────── + +/** + * CLI display for cfg command. + */ +export function cfg(name, customDbPath, opts = {}) { + const data = cfgData(name, customDbPath, opts); + + if (opts.json) { + console.log(JSON.stringify(data, null, 2)); + return; + } + if (opts.ndjson) { + printNdjson(data.results); + return; + } + + if (data.warning) { + console.log(`\u26A0 ${data.warning}`); + return; + } + if (data.results.length === 0) { + console.log(`No symbols matching "${name}".`); + return; + } + + const format = opts.format || 'text'; + if (format === 'dot') { + console.log(cfgToDOT(data)); + return; + } + if (format === 'mermaid') { + console.log(cfgToMermaid(data)); + return; + } + + // Text format + for (const r of data.results) { + console.log(`\n${r.kind} ${r.name} (${r.file}:${r.line})`); + console.log('\u2500'.repeat(60)); + console.log(` Blocks: ${r.summary.blockCount} Edges: ${r.summary.edgeCount}`); + + if (r.blocks.length > 0) { + console.log('\n Blocks:'); + for (const b of r.blocks) { + const loc = b.startLine + ? ` L${b.startLine}${b.endLine && b.endLine !== b.startLine ? `-${b.endLine}` : ''}` + : ''; + const label = b.label ? ` (${b.label})` : ''; + console.log(` [${b.index}] ${b.type}${label}${loc}`); + } + } + + if (r.edges.length > 0) { + console.log('\n Edges:'); + for (const e of r.edges) { + console.log(` B${e.source} \u2192 B${e.target} [${e.kind}]`); + } + } + } +} diff --git a/src/cli.js b/src/cli.js index c3081664..500031f7 100644 --- a/src/cli.js +++ b/src/cli.js @@ -106,10 +106,16 @@ program .description('Parse repo and build graph in .codegraph/graph.db') .option('--no-incremental', 'Force full rebuild (ignore file hashes)') .option('--dataflow', 'Extract data flow edges (flows_to, returns, mutates)') + .option('--cfg', 'Build intraprocedural control flow graphs') .action(async (dir, opts) => { const root = path.resolve(dir || '.'); const engine = program.opts().engine; - await buildGraph(root, { incremental: opts.incremental, engine, dataflow: opts.dataflow }); + await buildGraph(root, { + incremental: opts.incremental, + engine, + dataflow: opts.dataflow, + cfg: opts.cfg, + }); }); program @@ -1120,6 +1126,37 @@ program }); }); +program + .command('cfg ') + .description('Show control flow graph for a function') + .option('-d, --db ', 'Path to graph.db') + .option('--format ', 'Output format: text, dot, mermaid', 'text') + .option('-f, --file ', 'Scope to file (partial match)') + .option('-k, --kind ', 'Filter by symbol kind') + .option('-T, --no-tests', 'Exclude test/spec files from results') + .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') + .option('-j, --json', 'Output as JSON') + .option('--ndjson', 'Newline-delimited JSON output') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .action(async (name, opts) => { + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); + process.exit(1); + } + const { cfg } = await import('./cfg.js'); + cfg(name, opts.db, { + format: opts.format, + file: opts.file, + kind: opts.kind, + noTests: resolveNoTests(opts), + json: opts.json, + ndjson: opts.ndjson, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + }); + }); + program .command('complexity [target]') .description('Show per-function complexity metrics (cognitive, cyclomatic, nesting depth, MI)') @@ -1160,6 +1197,35 @@ program }); }); +program + .command('ast [pattern]') + .description('Search stored AST nodes (calls, new, string, regex, throw, await) by pattern') + .option('-d, --db ', 'Path to graph.db') + .option('-k, --kind ', 'Filter by AST node kind (call, new, string, regex, throw, await)') + .option('-f, --file ', 'Scope to file (partial match)') + .option('-T, --no-tests', 'Exclude test/spec files from results') + .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') + .option('-j, --json', 'Output as JSON') + .option('--ndjson', 'Newline-delimited JSON output') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .action(async (pattern, opts) => { + const { AST_NODE_KINDS, astQuery } = await import('./ast.js'); + if (opts.kind && !AST_NODE_KINDS.includes(opts.kind)) { + console.error(`Invalid AST kind "${opts.kind}". Valid: ${AST_NODE_KINDS.join(', ')}`); + process.exit(1); + } + astQuery(pattern, opts.db, { + kind: opts.kind, + file: opts.file, + noTests: resolveNoTests(opts), + json: opts.json, + ndjson: opts.ndjson, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + }); + }); + program .command('manifesto') .description('Evaluate manifesto rules (pass/fail verdicts for code health)') diff --git a/src/complexity.js b/src/complexity.js index f97cb616..132ccb25 100644 --- a/src/complexity.js +++ b/src/complexity.js @@ -1574,7 +1574,7 @@ export function computeAllMetrics(functionNode, langId) { /** * Find the function body node in a parse tree that matches a given line range. */ -function findFunctionNode(rootNode, startLine, _endLine, rules) { +export function findFunctionNode(rootNode, startLine, _endLine, rules) { // tree-sitter lines are 0-indexed const targetStart = startLine - 1; diff --git a/src/db.js b/src/db.js index 9f40d7cc..3e17327e 100644 --- a/src/db.js +++ b/src/db.js @@ -173,6 +173,58 @@ export const MIGRATIONS = [ CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id); `, }, + { + version: 12, + up: ` + CREATE TABLE IF NOT EXISTS cfg_blocks ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + function_node_id INTEGER NOT NULL, + block_index INTEGER NOT NULL, + block_type TEXT NOT NULL, + start_line INTEGER, + end_line INTEGER, + label TEXT, + FOREIGN KEY(function_node_id) REFERENCES nodes(id), + UNIQUE(function_node_id, block_index) + ); + CREATE INDEX IF NOT EXISTS idx_cfg_blocks_fn ON cfg_blocks(function_node_id); + + CREATE TABLE IF NOT EXISTS cfg_edges ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + function_node_id INTEGER NOT NULL, + source_block_id INTEGER NOT NULL, + target_block_id INTEGER NOT NULL, + kind TEXT NOT NULL, + FOREIGN KEY(function_node_id) REFERENCES nodes(id), + FOREIGN KEY(source_block_id) REFERENCES cfg_blocks(id), + FOREIGN KEY(target_block_id) REFERENCES cfg_blocks(id) + ); + CREATE INDEX IF NOT EXISTS idx_cfg_edges_fn ON cfg_edges(function_node_id); + CREATE INDEX IF NOT EXISTS idx_cfg_edges_src ON cfg_edges(source_block_id); + CREATE INDEX IF NOT EXISTS idx_cfg_edges_tgt ON cfg_edges(target_block_id); + `, + }, + { + version: 13, + up: ` + CREATE TABLE IF NOT EXISTS ast_nodes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + file TEXT NOT NULL, + line INTEGER NOT NULL, + kind TEXT NOT NULL, + name TEXT NOT NULL, + text TEXT, + receiver TEXT, + parent_node_id INTEGER, + FOREIGN KEY(parent_node_id) REFERENCES nodes(id) + ); + CREATE INDEX IF NOT EXISTS idx_ast_kind ON ast_nodes(kind); + CREATE INDEX IF NOT EXISTS idx_ast_name ON ast_nodes(name); + CREATE INDEX IF NOT EXISTS idx_ast_file ON ast_nodes(file); + CREATE INDEX IF NOT EXISTS idx_ast_parent ON ast_nodes(parent_node_id); + CREATE INDEX IF NOT EXISTS idx_ast_kind_name ON ast_nodes(kind, name); + `, + }, ]; export function getBuildMeta(db, key) { diff --git a/src/index.js b/src/index.js index 27c88762..e58cc643 100644 --- a/src/index.js +++ b/src/index.js @@ -5,6 +5,8 @@ * import { buildGraph, queryNameData, findCycles, exportDOT } from 'codegraph'; */ +// AST node queries +export { AST_NODE_KINDS, astQuery, astQueryData } from './ast.js'; // Audit (composite report) export { audit, auditData } from './audit.js'; // Batch querying @@ -22,6 +24,16 @@ export { evaluateBoundaries, PRESETS, validateBoundaryConfig } from './boundarie export { branchCompareData, branchCompareMermaid } from './branch-compare.js'; // Graph building export { buildGraph, collectFiles, loadPathAliases, resolveImportPath } from './builder.js'; +// Control flow graph (intraprocedural) +export { + buildCFGData, + buildFunctionCFG, + CFG_RULES, + cfg, + cfgData, + cfgToDOT, + cfgToMermaid, +} from './cfg.js'; // Check (CI validation predicates) export { check, checkData } from './check.js'; // Co-change analysis @@ -44,6 +56,7 @@ export { computeHalsteadMetrics, computeLOCMetrics, computeMaintainabilityIndex, + findFunctionNode, HALSTEAD_RULES, iterComplexity, } from './complexity.js'; diff --git a/src/mcp.js b/src/mcp.js index d48aefec..b9b897be 100644 --- a/src/mcp.js +++ b/src/mcp.js @@ -6,6 +6,7 @@ */ import { createRequire } from 'node:module'; +import { AST_NODE_KINDS } from './ast.js'; import { findCycles } from './cycles.js'; import { findDbPath } from './db.js'; import { MCP_DEFAULTS, MCP_MAX_LIMIT } from './paginate.js'; @@ -656,6 +657,26 @@ const BASE_TOOLS = [ required: ['base', 'target'], }, }, + { + name: 'cfg', + description: 'Show intraprocedural control flow graph for a function. Requires build --cfg.', + inputSchema: { + type: 'object', + properties: { + name: { type: 'string', description: 'Function/method name (partial match)' }, + format: { + type: 'string', + enum: ['json', 'dot', 'mermaid'], + description: 'Output format (default: json)', + }, + file: { type: 'string', description: 'Scope to file (partial match)' }, + kind: { type: 'string', enum: EVERY_SYMBOL_KIND, description: 'Filter by symbol kind' }, + no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + ...PAGINATION_PROPS, + }, + required: ['name'], + }, + }, { name: 'dataflow', description: 'Show data flow edges or data-dependent blast radius. Requires build --dataflow.', @@ -698,6 +719,28 @@ const BASE_TOOLS = [ }, }, }, + { + name: 'ast_query', + description: + 'Search stored AST nodes (calls, literals, new, throw, await) by pattern. Requires a prior build.', + inputSchema: { + type: 'object', + properties: { + pattern: { + type: 'string', + description: 'GLOB pattern for node name (auto-wrapped in *..* for substring match)', + }, + kind: { + type: 'string', + enum: AST_NODE_KINDS, + description: 'Filter by AST node kind', + }, + file: { type: 'string', description: 'Scope to file (partial match)' }, + no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + ...PAGINATION_PROPS, + }, + }, + }, ]; const LIST_REPOS_TOOL = { @@ -1235,6 +1278,24 @@ export async function startMCPServer(customDbPath, options = {}) { result = args.format === 'mermaid' ? branchCompareMermaid(bcData) : bcData; break; } + case 'cfg': { + const { cfgData, cfgToDOT, cfgToMermaid } = await import('./cfg.js'); + const cfgResult = cfgData(args.name, dbPath, { + file: args.file, + kind: args.kind, + noTests: args.no_tests, + limit: Math.min(args.limit ?? MCP_DEFAULTS.query, MCP_MAX_LIMIT), + offset: args.offset ?? 0, + }); + if (args.format === 'dot') { + result = { text: cfgToDOT(cfgResult) }; + } else if (args.format === 'mermaid') { + result = { text: cfgToMermaid(cfgResult) }; + } else { + result = cfgResult; + } + break; + } case 'dataflow': { const dfMode = args.mode || 'edges'; if (dfMode === 'impact') { @@ -1273,6 +1334,17 @@ export async function startMCPServer(customDbPath, options = {}) { }); break; } + case 'ast_query': { + const { astQueryData } = await import('./ast.js'); + result = astQueryData(args.pattern, dbPath, { + kind: args.kind, + file: args.file, + noTests: args.no_tests, + limit: Math.min(args.limit ?? MCP_DEFAULTS.ast_query, MCP_MAX_LIMIT), + offset: args.offset ?? 0, + }); + break; + } case 'list_repos': { const { listRepos, pruneRegistry } = await import('./registry.js'); pruneRegistry(); diff --git a/src/paginate.js b/src/paginate.js index 79bfaa27..09cc03b7 100644 --- a/src/paginate.js +++ b/src/paginate.js @@ -30,6 +30,7 @@ export const MCP_DEFAULTS = { communities: 20, structure: 30, triage: 20, + ast_query: 50, }; /** Hard cap to prevent abuse via MCP. */ diff --git a/src/queries.js b/src/queries.js index 98632618..7f99501f 100644 --- a/src/queries.js +++ b/src/queries.js @@ -2897,6 +2897,7 @@ export function normalizeSymbol(row, db, hashCache) { fileHash, }; } + function whereSymbolImpl(db, target, noTests) { const placeholders = ALL_SYMBOL_KINDS.map(() => '?').join(', '); let nodes = db diff --git a/tests/integration/ast.test.js b/tests/integration/ast.test.js new file mode 100644 index 00000000..60cee696 --- /dev/null +++ b/tests/integration/ast.test.js @@ -0,0 +1,234 @@ +/** + * Integration tests for AST node queries. + * + * Uses a hand-crafted in-memory DB with known AST nodes. + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import Database from 'better-sqlite3'; +import { afterAll, beforeAll, describe, expect, test } from 'vitest'; +import { AST_NODE_KINDS, astQueryData } from '../../src/ast.js'; +import { initSchema } from '../../src/db.js'; + +// ─── Helpers ─────────────────────────────────────────────────────────── + +function insertNode(db, name, kind, file, line) { + return db + .prepare('INSERT INTO nodes (name, kind, file, line) VALUES (?, ?, ?, ?)') + .run(name, kind, file, line).lastInsertRowid; +} + +function insertAstNode(db, file, line, kind, name, text, receiver, parentNodeId) { + return db + .prepare( + 'INSERT INTO ast_nodes (file, line, kind, name, text, receiver, parent_node_id) VALUES (?, ?, ?, ?, ?, ?, ?)', + ) + .run(file, line, kind, name, text, receiver, parentNodeId).lastInsertRowid; +} + +// ─── Fixture DB ──────────────────────────────────────────────────────── + +let tmpDir, dbPath; + +beforeAll(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-ast-')); + fs.mkdirSync(path.join(tmpDir, '.codegraph')); + dbPath = path.join(tmpDir, '.codegraph', 'graph.db'); + + const db = new Database(dbPath); + db.pragma('journal_mode = WAL'); + initSchema(db); + + // Insert function nodes + const processId = insertNode(db, 'processInput', 'function', 'src/utils.js', 10); + const loaderId = insertNode(db, 'loadModule', 'function', 'src/loader.js', 5); + const handlerId = insertNode(db, 'handleRequest', 'function', 'src/handler.js', 20); + const defaultsId = insertNode(db, 'defaults', 'function', 'src/config.js', 1); + const testFnId = insertNode(db, 'testUtils', 'function', 'tests/utils.test.js', 1); + + // Calls + insertAstNode(db, 'src/utils.js', 42, 'call', 'eval', null, null, processId); + insertAstNode(db, 'src/loader.js', 8, 'call', 'require', null, null, loaderId); + insertAstNode(db, 'src/handler.js', 25, 'call', 'console.log', null, 'console', handlerId); + insertAstNode(db, 'src/handler.js', 30, 'call', 'console.error', null, 'console', handlerId); + insertAstNode(db, 'src/utils.js', 50, 'call', 'fetch', null, null, processId); + + // new expressions + insertAstNode(db, 'src/handler.js', 30, 'new', 'Error', 'new Error("bad")', null, handlerId); + insertAstNode(db, 'src/loader.js', 12, 'new', 'Map', 'new Map()', null, loaderId); + + // strings + insertAstNode( + db, + 'src/config.js', + 18, + 'string', + 'password123', + '"password123"', + null, + defaultsId, + ); + insertAstNode( + db, + 'src/config.js', + 19, + 'string', + 'localhost:3000', + '"localhost:3000"', + null, + defaultsId, + ); + + // throw + insertAstNode( + db, + 'src/handler.js', + 35, + 'throw', + 'Error', + 'new Error("not found")', + null, + handlerId, + ); + + // await + insertAstNode(db, 'src/utils.js', 55, 'await', 'fetch', 'fetch(url)', null, processId); + + // regex + insertAstNode(db, 'src/utils.js', 60, 'regex', '/\\d+/g', '/\\d+/g', null, processId); + + // Test file nodes (should be excluded by noTests) + insertAstNode(db, 'tests/utils.test.js', 5, 'call', 'eval', null, null, testFnId); + + db.close(); +}); + +afterAll(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +// ─── Tests ───────────────────────────────────────────────────────────── + +describe('AST_NODE_KINDS', () => { + test('exports all expected kinds', () => { + expect(AST_NODE_KINDS).toEqual(['call', 'new', 'string', 'regex', 'throw', 'await']); + }); +}); + +describe('astQueryData', () => { + test('returns all nodes when no pattern given', () => { + const data = astQueryData(undefined, dbPath); + expect(data.count).toBeGreaterThan(0); + expect(data.pattern).toBe('*'); + }); + + test('substring pattern match', () => { + const data = astQueryData('eval', dbPath); + // Should match 'eval' in src/utils.js and tests/utils.test.js + expect(data.results.length).toBeGreaterThanOrEqual(2); + expect(data.results.every((r) => r.name.includes('eval'))).toBe(true); + }); + + test('glob wildcard pattern', () => { + const data = astQueryData('console.*', dbPath); + expect(data.results.length).toBe(2); + expect(data.results.every((r) => r.name.startsWith('console.'))).toBe(true); + }); + + test('exact pattern with star', () => { + const data = astQueryData('*', dbPath); + expect(data.count).toBeGreaterThan(0); + }); + + test('kind filter — call', () => { + const data = astQueryData(undefined, dbPath, { kind: 'call' }); + expect(data.results.every((r) => r.kind === 'call')).toBe(true); + expect(data.results.length).toBeGreaterThanOrEqual(5); + }); + + test('kind filter — string', () => { + const data = astQueryData(undefined, dbPath, { kind: 'string' }); + expect(data.results.every((r) => r.kind === 'string')).toBe(true); + expect(data.results.length).toBe(2); + }); + + test('kind filter — new', () => { + const data = astQueryData(undefined, dbPath, { kind: 'new' }); + expect(data.results.every((r) => r.kind === 'new')).toBe(true); + expect(data.results.length).toBe(2); + }); + + test('kind filter — throw', () => { + const data = astQueryData(undefined, dbPath, { kind: 'throw' }); + expect(data.results.every((r) => r.kind === 'throw')).toBe(true); + expect(data.results.length).toBe(1); + }); + + test('kind filter — await', () => { + const data = astQueryData(undefined, dbPath, { kind: 'await' }); + expect(data.results.every((r) => r.kind === 'await')).toBe(true); + expect(data.results.length).toBe(1); + }); + + test('kind filter — regex', () => { + const data = astQueryData(undefined, dbPath, { kind: 'regex' }); + expect(data.results.every((r) => r.kind === 'regex')).toBe(true); + expect(data.results.length).toBe(1); + }); + + test('file filter', () => { + const data = astQueryData(undefined, dbPath, { file: 'config' }); + expect(data.results.every((r) => r.file.includes('config'))).toBe(true); + expect(data.results.length).toBe(2); + }); + + test('noTests excludes test files', () => { + const withTests = astQueryData('eval', dbPath); + const noTests = astQueryData('eval', dbPath, { noTests: true }); + expect(noTests.results.length).toBeLessThan(withTests.results.length); + expect(noTests.results.every((r) => !r.file.includes('.test.'))).toBe(true); + }); + + test('pagination — limit', () => { + const data = astQueryData(undefined, dbPath, { limit: 3 }); + expect(data.results.length).toBe(3); + expect(data._pagination).toBeDefined(); + expect(data._pagination.total).toBeGreaterThan(3); + expect(data._pagination.hasMore).toBe(true); + }); + + test('pagination — offset', () => { + const page1 = astQueryData(undefined, dbPath, { limit: 3, offset: 0 }); + const page2 = astQueryData(undefined, dbPath, { limit: 3, offset: 3 }); + expect(page1.results[0].name).not.toBe(page2.results[0].name); + }); + + test('parent node resolution', () => { + const data = astQueryData('eval', dbPath, { noTests: true }); + expect(data.results.length).toBe(1); + const r = data.results[0]; + expect(r.parent).toBeDefined(); + expect(r.parent.name).toBe('processInput'); + expect(r.parent.kind).toBe('function'); + }); + + test('receiver field for calls', () => { + const data = astQueryData('console.log', dbPath); + expect(data.results.length).toBe(1); + expect(data.results[0].receiver).toBe('console'); + }); + + test('empty results for non-matching pattern', () => { + const data = astQueryData('nonexistent_xyz', dbPath); + expect(data.results.length).toBe(0); + expect(data.count).toBe(0); + }); + + test('combined kind + file filter', () => { + const data = astQueryData(undefined, dbPath, { kind: 'call', file: 'handler' }); + expect(data.results.every((r) => r.kind === 'call' && r.file.includes('handler'))).toBe(true); + expect(data.results.length).toBe(2); + }); +}); diff --git a/tests/integration/cfg.test.js b/tests/integration/cfg.test.js new file mode 100644 index 00000000..3fdbeab0 --- /dev/null +++ b/tests/integration/cfg.test.js @@ -0,0 +1,199 @@ +/** + * Integration tests for CFG queries. + * + * Uses a hand-crafted in-memory DB with known CFG topology. + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import Database from 'better-sqlite3'; +import { afterAll, beforeAll, describe, expect, test } from 'vitest'; +import { cfgData, cfgToDOT, cfgToMermaid } from '../../src/cfg.js'; +import { initSchema } from '../../src/db.js'; + +// ─── Helpers ─────────────────────────────────────────────────────────── + +function insertNode(db, name, kind, file, line) { + return db + .prepare('INSERT INTO nodes (name, kind, file, line) VALUES (?, ?, ?, ?)') + .run(name, kind, file, line).lastInsertRowid; +} + +function insertBlock(db, fnNodeId, blockIndex, blockType, startLine, endLine, label) { + return db + .prepare( + 'INSERT INTO cfg_blocks (function_node_id, block_index, block_type, start_line, end_line, label) VALUES (?, ?, ?, ?, ?, ?)', + ) + .run(fnNodeId, blockIndex, blockType, startLine, endLine, label).lastInsertRowid; +} + +function insertEdge(db, fnNodeId, sourceBlockId, targetBlockId, kind) { + db.prepare( + 'INSERT INTO cfg_edges (function_node_id, source_block_id, target_block_id, kind) VALUES (?, ?, ?, ?)', + ).run(fnNodeId, sourceBlockId, targetBlockId, kind); +} + +// ─── Fixture DB ──────────────────────────────────────────────────────── + +let tmpDir, dbPath; + +beforeAll(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-cfg-')); + fs.mkdirSync(path.join(tmpDir, '.codegraph')); + dbPath = path.join(tmpDir, '.codegraph', 'graph.db'); + + const db = new Database(dbPath); + db.pragma('journal_mode = WAL'); + initSchema(db); + + // Insert function nodes + const processId = insertNode(db, 'processItems', 'function', 'src/process.js', 10); + const helperId = insertNode(db, 'helper', 'function', 'src/helper.js', 5); + insertNode(db, 'testFn', 'function', 'tests/process.test.js', 1); + + // CFG for processItems: entry → body → condition → [true, false] → join → exit + const b0 = insertBlock(db, processId, 0, 'entry', null, null, null); + const b1 = insertBlock(db, processId, 1, 'exit', null, null, null); + const b2 = insertBlock(db, processId, 2, 'body', 10, 12, null); + const b3 = insertBlock(db, processId, 3, 'condition', 13, 13, 'if'); + const b4 = insertBlock(db, processId, 4, 'branch_true', 14, 15, 'then'); + const b5 = insertBlock(db, processId, 5, 'branch_false', 16, 17, 'else'); + const b6 = insertBlock(db, processId, 6, 'body', 18, 19, null); + + insertEdge(db, processId, b0, b2, 'fallthrough'); + insertEdge(db, processId, b2, b3, 'fallthrough'); + insertEdge(db, processId, b3, b4, 'branch_true'); + insertEdge(db, processId, b3, b5, 'branch_false'); + insertEdge(db, processId, b4, b6, 'fallthrough'); + insertEdge(db, processId, b5, b6, 'fallthrough'); + insertEdge(db, processId, b6, b1, 'fallthrough'); + + // CFG for helper: entry → body → exit (simple) + const h0 = insertBlock(db, helperId, 0, 'entry', null, null, null); + const h1 = insertBlock(db, helperId, 1, 'exit', null, null, null); + const h2 = insertBlock(db, helperId, 2, 'body', 5, 8, null); + + insertEdge(db, helperId, h0, h2, 'fallthrough'); + insertEdge(db, helperId, h2, h1, 'return'); + + db.close(); +}); + +afterAll(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +// ─── Tests ───────────────────────────────────────────────────────────── + +describe('cfgData', () => { + test('returns CFG blocks and edges for a known function', () => { + const data = cfgData('processItems', dbPath); + expect(data.results.length).toBe(1); + + const r = data.results[0]; + expect(r.name).toBe('processItems'); + expect(r.file).toBe('src/process.js'); + expect(r.summary.blockCount).toBe(7); + expect(r.summary.edgeCount).toBe(7); + expect(r.blocks[0].type).toBe('entry'); + expect(r.blocks[1].type).toBe('exit'); + }); + + test('returns edges with correct kinds', () => { + const data = cfgData('processItems', dbPath); + const r = data.results[0]; + const edgeKinds = r.edges.map((e) => e.kind); + expect(edgeKinds).toContain('branch_true'); + expect(edgeKinds).toContain('branch_false'); + expect(edgeKinds).toContain('fallthrough'); + }); + + test('simple function has return edge', () => { + const data = cfgData('helper', dbPath); + expect(data.results.length).toBe(1); + const r = data.results[0]; + expect(r.summary.blockCount).toBe(3); + expect(r.edges.some((e) => e.kind === 'return')).toBe(true); + }); + + test('returns empty results for non-existent function', () => { + const data = cfgData('nonexistent', dbPath); + expect(data.results.length).toBe(0); + }); + + test('noTests option excludes test file functions', () => { + const data = cfgData('testFn', dbPath, { noTests: true }); + expect(data.results.length).toBe(0); + }); + + test('file filter scopes results', () => { + const data = cfgData('processItems', dbPath, { file: 'helper.js' }); + expect(data.results.length).toBe(0); + + const data2 = cfgData('processItems', dbPath, { file: 'process.js' }); + expect(data2.results.length).toBe(1); + }); +}); + +describe('cfgToDOT', () => { + test('produces valid DOT output', () => { + const data = cfgData('processItems', dbPath); + const dot = cfgToDOT(data); + expect(dot).toContain('digraph'); + expect(dot).toContain('B0'); + expect(dot).toContain('->'); + expect(dot).toContain('branch_true'); + expect(dot).toContain('}'); + }); + + test('entry/exit nodes use ellipse shape', () => { + const data = cfgData('processItems', dbPath); + const dot = cfgToDOT(data); + expect(dot).toMatch(/B0.*shape=ellipse/); + expect(dot).toMatch(/B1.*shape=ellipse/); + }); +}); + +describe('cfgToMermaid', () => { + test('produces valid Mermaid output', () => { + const data = cfgData('processItems', dbPath); + const mermaid = cfgToMermaid(data); + expect(mermaid).toContain('graph TD'); + expect(mermaid).toContain('B0'); + expect(mermaid).toContain('-->'); + expect(mermaid).toContain('branch_true'); + }); + + test('entry/exit use stadium shape', () => { + const data = cfgData('processItems', dbPath); + const mermaid = cfgToMermaid(data); + // Stadium shapes use (["..."]) + expect(mermaid).toMatch(/B0\(\[/); + expect(mermaid).toMatch(/B1\(\[/); + }); +}); + +describe('warning when no CFG tables', () => { + test('returns warning when DB has no CFG data', () => { + // Create a bare DB without cfg tables + const bareDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-cfg-bare-')); + fs.mkdirSync(path.join(bareDir, '.codegraph')); + const bareDbPath = path.join(bareDir, '.codegraph', 'graph.db'); + + const db = new Database(bareDbPath); + db.pragma('journal_mode = WAL'); + // Only create nodes table, skip migrations + db.exec(` + CREATE TABLE schema_version (version INTEGER NOT NULL DEFAULT 0); + INSERT INTO schema_version VALUES (8); + CREATE TABLE nodes (id INTEGER PRIMARY KEY, name TEXT, kind TEXT, file TEXT, line INTEGER); + `); + db.close(); + + const data = cfgData('anything', bareDbPath); + expect(data.warning).toMatch(/No CFG data/); + + fs.rmSync(bareDir, { recursive: true, force: true }); + }); +}); diff --git a/tests/parsers/ast-nodes.test.js b/tests/parsers/ast-nodes.test.js new file mode 100644 index 00000000..d9ca53f7 --- /dev/null +++ b/tests/parsers/ast-nodes.test.js @@ -0,0 +1,185 @@ +/** + * Tests for AST node extraction from parsed source code. + * + * Parses JS fixtures through tree-sitter, runs AST extraction via buildAstNodes, + * and verifies the correct nodes are captured in the DB. + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import Database from 'better-sqlite3'; +import { afterAll, beforeAll, describe, expect, test } from 'vitest'; +import { buildAstNodes } from '../../src/ast.js'; +import { initSchema } from '../../src/db.js'; +import { parseFilesAuto } from '../../src/parser.js'; + +// ─── Fixture ────────────────────────────────────────────────────────── + +const FIXTURE_CODE = ` +export function processData(input) { + const result = new Map(); + const pattern = /^[a-z]+$/i; + const greeting = "hello world"; + + if (typeof input === 'string') { + eval(input); + } + + try { + const data = await fetch('/api/data'); + result.set('data', data); + } catch (err) { + throw new Error('fetch failed'); + } + + console.log(result); + return result; +} + +function helper() { + const re = /\\d{3}-\\d{4}/; + const msg = \`template string value\`; + return msg; +} +`; + +// ─── Setup ──────────────────────────────────────────────────────────── + +let tmpDir, dbPath, db; + +beforeAll(async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-ast-extract-')); + const srcDir = path.join(tmpDir, 'src'); + fs.mkdirSync(srcDir, { recursive: true }); + fs.mkdirSync(path.join(tmpDir, '.codegraph')); + + // Write fixture file + const fixturePath = path.join(srcDir, 'fixture.js'); + fs.writeFileSync(fixturePath, FIXTURE_CODE); + + // Parse fixture using parseFilesAuto (preserves _tree for AST walk) + const allSymbols = await parseFilesAuto([fixturePath], tmpDir, { engine: 'wasm' }); + const symbols = allSymbols.get('src/fixture.js'); + if (!symbols) throw new Error('Failed to parse fixture file'); + + // Create DB and schema + dbPath = path.join(tmpDir, '.codegraph', 'graph.db'); + db = new Database(dbPath); + db.pragma('journal_mode = WAL'); + initSchema(db); + + // Insert nodes for definitions so parent resolution works + const insertNode = db.prepare( + 'INSERT INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)', + ); + for (const def of symbols.definitions) { + insertNode.run(def.name, def.kind, 'src/fixture.js', def.line, def.endLine); + } + + // Build AST nodes + await buildAstNodes(db, allSymbols, tmpDir); +}); + +afterAll(() => { + if (db) db.close(); + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +// ─── Helpers ────────────────────────────────────────────────────────── + +function queryAstNodes(kind) { + return db.prepare('SELECT * FROM ast_nodes WHERE kind = ? ORDER BY line').all(kind); +} + +function queryAllAstNodes() { + return db.prepare('SELECT * FROM ast_nodes ORDER BY line').all(); +} + +// ─── Tests ──────────────────────────────────────────────────────────── + +describe('buildAstNodes — JS extraction', () => { + test('captures call nodes from symbols.calls', () => { + const calls = queryAstNodes('call'); + expect(calls.length).toBeGreaterThanOrEqual(1); + const callNames = calls.map((c) => c.name); + // eval, fetch, console.log should be among calls (depending on parser extraction) + expect(callNames.some((n) => n === 'eval' || n === 'fetch' || n === 'console.log')).toBe(true); + }); + + test('captures new_expression as kind:new', () => { + const nodes = queryAstNodes('new'); + expect(nodes.length).toBeGreaterThanOrEqual(1); + const names = nodes.map((n) => n.name); + expect(names).toContain('Map'); + // Note: `throw new Error(...)` is captured as kind:throw, not kind:new + // The new_expression inside throw is not separately emitted + }); + + test('captures string literals as kind:string', () => { + const nodes = queryAstNodes('string'); + expect(nodes.length).toBeGreaterThanOrEqual(1); + const names = nodes.map((n) => n.name); + // "hello world" should be captured, short strings like 'string' might vary + expect(names.some((n) => n.includes('hello world'))).toBe(true); + }); + + test('skips trivial strings shorter than 2 chars', () => { + const nodes = queryAstNodes('string'); + // No single-char or empty strings should be present + for (const node of nodes) { + expect(node.name.length).toBeGreaterThanOrEqual(2); + } + }); + + test('captures regex as kind:regex', () => { + const nodes = queryAstNodes('regex'); + expect(nodes.length).toBeGreaterThanOrEqual(1); + // At least one regex pattern should be present + expect(nodes.some((n) => n.name.includes('[a-z]') || n.name.includes('\\d'))).toBe(true); + }); + + test('captures throw as kind:throw', () => { + const nodes = queryAstNodes('throw'); + expect(nodes.length).toBeGreaterThanOrEqual(1); + // throw new Error('fetch failed') → name should be "Error" + expect(nodes.some((n) => n.name === 'Error')).toBe(true); + }); + + test('captures await as kind:await', () => { + const nodes = queryAstNodes('await'); + expect(nodes.length).toBeGreaterThanOrEqual(1); + // await fetch('/api/data') → name should include "fetch" + expect(nodes.some((n) => n.name.includes('fetch'))).toBe(true); + }); + + test('parent_node_id is resolved for nodes inside functions', () => { + const all = queryAllAstNodes(); + const withParent = all.filter((n) => n.parent_node_id != null); + expect(withParent.length).toBeGreaterThan(0); + + // Verify the parent exists in the nodes table + for (const node of withParent) { + const parent = db.prepare('SELECT * FROM nodes WHERE id = ?').get(node.parent_node_id); + expect(parent).toBeDefined(); + expect(['function', 'method', 'class']).toContain(parent.kind); + } + }); + + test('all inserted nodes have valid kinds', () => { + const all = queryAllAstNodes(); + const validKinds = new Set(['call', 'new', 'string', 'regex', 'throw', 'await']); + for (const node of all) { + expect(validKinds.has(node.kind)).toBe(true); + } + }); + + test('text column is truncated to max length', () => { + const all = queryAllAstNodes(); + for (const node of all) { + if (node.text) { + expect(node.text.length).toBeLessThanOrEqual(201); // 200 + possible ellipsis char + } + } + }); +}); diff --git a/tests/unit/cfg.test.js b/tests/unit/cfg.test.js new file mode 100644 index 00000000..99a52471 --- /dev/null +++ b/tests/unit/cfg.test.js @@ -0,0 +1,457 @@ +/** + * Unit tests for src/cfg.js — buildFunctionCFG + * + * Hand-crafted code snippets parsed with tree-sitter to verify + * correct CFG block/edge construction. + */ + +import { beforeAll, describe, expect, it } from 'vitest'; +import { buildFunctionCFG } from '../../src/cfg.js'; +import { COMPLEXITY_RULES } from '../../src/complexity.js'; +import { createParsers } from '../../src/parser.js'; + +let jsParser; + +beforeAll(async () => { + const parsers = await createParsers(); + jsParser = parsers.get('javascript'); +}); + +function parse(code) { + const tree = jsParser.parse(code); + return tree.rootNode; +} + +function getFunctionNode(root) { + const rules = COMPLEXITY_RULES.get('javascript'); + function find(node) { + if (rules.functionNodes.has(node.type)) return node; + for (let i = 0; i < node.childCount; i++) { + const result = find(node.child(i)); + if (result) return result; + } + return null; + } + return find(root); +} + +function buildCFG(code) { + const root = parse(code); + const funcNode = getFunctionNode(root); + if (!funcNode) throw new Error('No function found in code snippet'); + return buildFunctionCFG(funcNode, 'javascript'); +} + +function hasEdge(cfg, sourceIndex, targetIndex, kind) { + return cfg.edges.some( + (e) => e.sourceIndex === sourceIndex && e.targetIndex === targetIndex && e.kind === kind, + ); +} + +function blockByType(cfg, type) { + return cfg.blocks.filter((b) => b.type === type); +} + +// ─── Tests ────────────────────────────────────────────────────────────── + +describe('buildFunctionCFG', () => { + describe('empty / simple functions', () => { + it('empty function: ENTRY → EXIT', () => { + const cfg = buildCFG('function empty() {}'); + expect(cfg.blocks.length).toBeGreaterThanOrEqual(2); + const entry = cfg.blocks.find((b) => b.type === 'entry'); + const exit = cfg.blocks.find((b) => b.type === 'exit'); + expect(entry).toBeDefined(); + expect(exit).toBeDefined(); + expect(hasEdge(cfg, entry.index, exit.index, 'fallthrough')).toBe(true); + }); + + it('simple function with no branching: ENTRY → body → EXIT', () => { + const cfg = buildCFG(` + function simple() { + const a = 1; + const b = 2; + return a + b; + } + `); + const entry = cfg.blocks.find((b) => b.type === 'entry'); + const exit = cfg.blocks.find((b) => b.type === 'exit'); + expect(entry).toBeDefined(); + expect(exit).toBeDefined(); + // Should have return edge to exit + expect(cfg.edges.some((e) => e.targetIndex === exit.index && e.kind === 'return')).toBe(true); + }); + + it('function with only statements (no return): body falls through to EXIT', () => { + const cfg = buildCFG(` + function noReturn() { + const x = 1; + console.log(x); + } + `); + const exit = cfg.blocks.find((b) => b.type === 'exit'); + expect(cfg.edges.some((e) => e.targetIndex === exit.index && e.kind === 'fallthrough')).toBe( + true, + ); + }); + }); + + describe('if statements', () => { + it('single if (no else): condition → [true branch, join]', () => { + const cfg = buildCFG(` + function singleIf(x) { + if (x > 0) { + console.log('positive'); + } + return x; + } + `); + const conditions = blockByType(cfg, 'condition'); + expect(conditions.length).toBe(1); + const trueBlocks = blockByType(cfg, 'branch_true'); + expect(trueBlocks.length).toBe(1); + // Condition has branch_true and branch_false edges + const condIdx = conditions[0].index; + expect(cfg.edges.some((e) => e.sourceIndex === condIdx && e.kind === 'branch_true')).toBe( + true, + ); + expect(cfg.edges.some((e) => e.sourceIndex === condIdx && e.kind === 'branch_false')).toBe( + true, + ); + }); + + it('if/else: condition → [true, false] → join', () => { + const cfg = buildCFG(` + function ifElse(x) { + if (x > 0) { + return 'positive'; + } else { + return 'non-positive'; + } + } + `); + const conditions = blockByType(cfg, 'condition'); + expect(conditions.length).toBe(1); + const trueBlocks = blockByType(cfg, 'branch_true'); + const falseBlocks = blockByType(cfg, 'branch_false'); + expect(trueBlocks.length).toBe(1); + expect(falseBlocks.length).toBe(1); + }); + + it('if/else-if/else chain', () => { + const cfg = buildCFG(` + function chain(x) { + if (x > 10) { + return 'big'; + } else if (x > 0) { + return 'small'; + } else { + return 'negative'; + } + } + `); + // Should have at least 2 conditions (if + else-if) + const conditions = blockByType(cfg, 'condition'); + expect(conditions.length).toBeGreaterThanOrEqual(2); + }); + }); + + describe('loops', () => { + it('while loop: header → [body → loop_back, exit]', () => { + const cfg = buildCFG(` + function whileLoop(n) { + let i = 0; + while (i < n) { + i++; + } + return i; + } + `); + const headers = blockByType(cfg, 'loop_header'); + expect(headers.length).toBe(1); + const bodyBlocks = blockByType(cfg, 'loop_body'); + expect(bodyBlocks.length).toBe(1); + // Header has branch_true to body and loop_exit + const hIdx = headers[0].index; + expect(cfg.edges.some((e) => e.sourceIndex === hIdx && e.kind === 'branch_true')).toBe(true); + expect(cfg.edges.some((e) => e.sourceIndex === hIdx && e.kind === 'loop_exit')).toBe(true); + // Body has loop_back to header + expect(cfg.edges.some((e) => e.kind === 'loop_back' && e.targetIndex === hIdx)).toBe(true); + }); + + it('for loop: header → [body → loop_back, exit]', () => { + const cfg = buildCFG(` + function forLoop() { + for (let i = 0; i < 10; i++) { + console.log(i); + } + } + `); + const headers = blockByType(cfg, 'loop_header'); + expect(headers.length).toBe(1); + expect(headers[0].label).toBe('for'); + expect(cfg.edges.some((e) => e.kind === 'loop_back')).toBe(true); + expect(cfg.edges.some((e) => e.kind === 'loop_exit')).toBe(true); + }); + + it('for-in loop', () => { + const cfg = buildCFG(` + function forIn(obj) { + for (const key in obj) { + console.log(key); + } + } + `); + const headers = blockByType(cfg, 'loop_header'); + expect(headers.length).toBe(1); + expect(cfg.edges.some((e) => e.kind === 'loop_back')).toBe(true); + }); + + it('do-while loop: body → condition → [loop_back, exit]', () => { + const cfg = buildCFG(` + function doWhile() { + let i = 0; + do { + i++; + } while (i < 10); + return i; + } + `); + const headers = blockByType(cfg, 'loop_header'); + expect(headers.length).toBe(1); + expect(headers[0].label).toBe('do-while'); + const bodyBlocks = blockByType(cfg, 'loop_body'); + expect(bodyBlocks.length).toBe(1); + // Condition has loop_back to body and loop_exit + const hIdx = headers[0].index; + expect(cfg.edges.some((e) => e.sourceIndex === hIdx && e.kind === 'loop_back')).toBe(true); + expect(cfg.edges.some((e) => e.sourceIndex === hIdx && e.kind === 'loop_exit')).toBe(true); + }); + }); + + describe('break and continue', () => { + it('break in loop: terminates → loop exit', () => { + const cfg = buildCFG(` + function withBreak() { + for (let i = 0; i < 10; i++) { + if (i === 5) break; + console.log(i); + } + } + `); + expect(cfg.edges.some((e) => e.kind === 'break')).toBe(true); + }); + + it('continue in loop: terminates → loop header', () => { + const cfg = buildCFG(` + function withContinue() { + for (let i = 0; i < 10; i++) { + if (i % 2 === 0) continue; + console.log(i); + } + } + `); + expect(cfg.edges.some((e) => e.kind === 'continue')).toBe(true); + }); + }); + + describe('switch statement', () => { + it('switch/case: header → each case → join', () => { + const cfg = buildCFG(` + function switchCase(x) { + switch (x) { + case 1: + return 'one'; + case 2: + return 'two'; + default: + return 'other'; + } + } + `); + const conditions = cfg.blocks.filter((b) => b.type === 'condition' && b.label === 'switch'); + expect(conditions.length).toBe(1); + const caseBlocks = blockByType(cfg, 'case'); + expect(caseBlocks.length).toBeGreaterThanOrEqual(2); + }); + }); + + describe('try/catch/finally', () => { + it('try/catch: try body → [catch via exception, join]', () => { + const cfg = buildCFG(` + function tryCatch() { + try { + riskyCall(); + } catch (e) { + console.error(e); + } + } + `); + const catchBlocks = blockByType(cfg, 'catch'); + expect(catchBlocks.length).toBe(1); + expect(cfg.edges.some((e) => e.kind === 'exception')).toBe(true); + }); + + it('try/catch/finally: try → [catch, finally] → exit', () => { + const cfg = buildCFG(` + function tryCatchFinally() { + try { + riskyCall(); + } catch (e) { + console.error(e); + } finally { + cleanup(); + } + } + `); + const catchBlocks = blockByType(cfg, 'catch'); + const finallyBlocks = blockByType(cfg, 'finally'); + expect(catchBlocks.length).toBe(1); + expect(finallyBlocks.length).toBe(1); + }); + + it('try/finally (no catch)', () => { + const cfg = buildCFG(` + function tryFinally() { + try { + riskyCall(); + } finally { + cleanup(); + } + } + `); + const finallyBlocks = blockByType(cfg, 'finally'); + expect(finallyBlocks.length).toBe(1); + }); + }); + + describe('early return and throw', () => { + it('early return terminates path → EXIT', () => { + const cfg = buildCFG(` + function earlyReturn(x) { + if (x < 0) { + return -1; + } + return x * 2; + } + `); + const exit = cfg.blocks.find((b) => b.type === 'exit'); + const returnEdges = cfg.edges.filter( + (e) => e.targetIndex === exit.index && e.kind === 'return', + ); + // Two returns: the early return and the final return + expect(returnEdges.length).toBe(2); + }); + + it('throw terminates path → EXIT via exception', () => { + const cfg = buildCFG(` + function throwError(x) { + if (x < 0) { + throw new Error('negative'); + } + return x; + } + `); + const exit = cfg.blocks.find((b) => b.type === 'exit'); + expect(cfg.edges.some((e) => e.targetIndex === exit.index && e.kind === 'exception')).toBe( + true, + ); + }); + }); + + describe('nested structures', () => { + it('nested loops with break resolves to correct enclosing loop', () => { + const cfg = buildCFG(` + function nested() { + for (let i = 0; i < 10; i++) { + for (let j = 0; j < 10; j++) { + if (j === 5) break; + } + } + } + `); + const headers = blockByType(cfg, 'loop_header'); + expect(headers.length).toBe(2); + expect(cfg.edges.some((e) => e.kind === 'break')).toBe(true); + }); + + it('if inside loop', () => { + const cfg = buildCFG(` + function ifInLoop() { + for (let i = 0; i < 10; i++) { + if (i > 5) { + console.log('big'); + } else { + console.log('small'); + } + } + } + `); + expect(blockByType(cfg, 'loop_header').length).toBe(1); + expect(blockByType(cfg, 'condition').length).toBe(1); + expect(blockByType(cfg, 'branch_true').length).toBe(1); + expect(blockByType(cfg, 'branch_false').length).toBe(1); + }); + }); + + describe('arrow functions and methods', () => { + it('arrow function with block body', () => { + const cfg = buildCFG(` + const fn = (x) => { + if (x) return 1; + return 0; + }; + `); + expect(cfg.blocks.find((b) => b.type === 'entry')).toBeDefined(); + expect(cfg.blocks.find((b) => b.type === 'exit')).toBeDefined(); + }); + + it('arrow function with expression body: ENTRY → EXIT', () => { + const cfg = buildCFG(` + const fn = (x) => x + 1; + `); + const entry = cfg.blocks.find((b) => b.type === 'entry'); + const exit = cfg.blocks.find((b) => b.type === 'exit'); + expect(entry).toBeDefined(); + expect(exit).toBeDefined(); + // Expression body: entry → body → exit + expect(cfg.blocks.length).toBeGreaterThanOrEqual(2); + }); + }); + + describe('block and edge counts', () => { + it('complex function has reasonable block/edge counts', () => { + const cfg = buildCFG(` + function complex(arr) { + if (!arr) return null; + const result = []; + for (const item of arr) { + if (item.skip) continue; + try { + result.push(transform(item)); + } catch (e) { + console.error(e); + } + } + return result; + } + `); + // Should have meaningful structure + expect(cfg.blocks.length).toBeGreaterThan(5); + expect(cfg.edges.length).toBeGreaterThan(5); + // Must have entry and exit + expect(cfg.blocks.find((b) => b.type === 'entry')).toBeDefined(); + expect(cfg.blocks.find((b) => b.type === 'exit')).toBeDefined(); + }); + }); + + describe('unsupported language', () => { + it('returns empty CFG for unsupported language', () => { + const root = parse('function foo() { return 1; }'); + const funcNode = getFunctionNode(root); + const cfg = buildFunctionCFG(funcNode, 'haskell'); + expect(cfg.blocks).toEqual([]); + expect(cfg.edges).toEqual([]); + }); + }); +}); diff --git a/tests/unit/mcp.test.js b/tests/unit/mcp.test.js index 4dc2c43a..68ab0718 100644 --- a/tests/unit/mcp.test.js +++ b/tests/unit/mcp.test.js @@ -37,8 +37,10 @@ const ALL_TOOL_NAMES = [ 'batch_query', 'triage', 'branch_compare', + 'cfg', 'dataflow', 'check', + 'ast_query', 'list_repos', ];