From 1f1cff037600d8af9a3cdd6cd83fd2b10724bb43 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 17:29:02 -0700 Subject: [PATCH 01/12] feat: add normalizeSymbol utility for stable JSON schema Add normalizeSymbol(row, db, hashCache) that returns a consistent 7-field symbol shape (name, kind, file, line, endLine, role, fileHash) across all query and search commands. Update queryNameData, fnDepsData, fnImpactData, explainFunctionImpl, listFunctionsData, rolesData, whereSymbolImpl in queries.js and searchData, multiSearchData, ftsSearchData, hybridSearchData in embedder.js to use normalizeSymbol. Update SQL in listFunctionsData, rolesData, iterListFunctions, iterRoles, _prepareSearch, and ftsSearchData to include end_line and role columns. Export normalizeSymbol from index.js. Add docs/json-schema.md documenting the stable schema. Add 8 unit tests and 7 integration schema conformance tests. Impact: 13 functions changed, 33 affected Impact: 14 functions changed, 42 affected --- docs/json-schema.md | 228 ++++++++++++++++++++++++++++ src/embedder.js | 32 ++-- src/index.js | 1 + src/queries.js | 103 +++++++++---- tests/integration/queries.test.js | 85 +++++++++++ tests/unit/normalize-symbol.test.js | 114 ++++++++++++++ 6 files changed, 515 insertions(+), 48 deletions(-) create mode 100644 docs/json-schema.md create mode 100644 tests/unit/normalize-symbol.test.js diff --git a/docs/json-schema.md b/docs/json-schema.md new file mode 100644 index 00000000..e45c925f --- /dev/null +++ b/docs/json-schema.md @@ -0,0 +1,228 @@ +# JSON Schema — Stable Symbol Metadata + +Every codegraph command that returns symbol data includes a **stable base shape** of 7 fields. Commands may add extra fields (e.g. `similarity`, `callees`), but these 7 are always present. + +## Base Symbol Shape + +| Field | Type | Description | +|------------|-------------------|-------------| +| `name` | `string` | Symbol identifier (e.g. `"buildGraph"`, `"MyClass.method"`) | +| `kind` | `string` | Symbol kind — see [Valid Kinds](#valid-kinds) | +| `file` | `string` | Repo-relative file path (forward slashes) | +| `line` | `number` | 1-based start line | +| `endLine` | `number \| null` | 1-based end line, or `null` if unavailable | +| `role` | `string \| null` | Architectural role classification, or `null` if unclassified — see [Valid Roles](#valid-roles) | +| `fileHash` | `string \| null` | SHA-256 hash of the file at build time, or `null` if unavailable | + +### Valid Kinds + +``` +function method class interface type struct enum trait record module +``` + +Language-specific types use their native kind (e.g. Go structs use `struct`, Rust traits use `trait`, Ruby modules use `module`). + +### Valid Roles + +``` +entry core utility adapter dead leaf +``` + +Roles are assigned during `codegraph build` based on call-graph topology. Symbols without enough signal remain `null`. + +## Command Envelopes + +### `where` (symbol mode) + +```jsonc +{ + "target": "buildGraph", + "mode": "symbol", + "results": [ + { + "name": "buildGraph", // ← base 7 + "kind": "function", + "file": "src/builder.js", + "line": 42, + "endLine": 180, + "role": "core", + "fileHash": "abc123...", + "exported": true, // ← command-specific + "uses": [ // lightweight refs (4 fields) + { "name": "parseFile", "file": "src/parser.js", "line": 10 } + ] + } + ] +} +``` + +### `query` + +```jsonc +{ + "query": "buildGraph", + "results": [ + { + "name": "buildGraph", // ← base 7 + "kind": "function", + "file": "src/builder.js", + "line": 42, + "endLine": 180, + "role": "core", + "fileHash": "abc123...", + "callees": [ // lightweight refs + { "name": "parseFile", "kind": "function", "file": "src/parser.js", "line": 10, "edgeKind": "calls" } + ], + "callers": [ + { "name": "main", "kind": "function", "file": "src/cli.js", "line": 5, "edgeKind": "calls" } + ] + } + ] +} +``` + +### `fn` (fnDeps) + +```jsonc +{ + "name": "buildGraph", + "results": [ + { + "name": "buildGraph", // ← base 7 + "kind": "function", + "file": "src/builder.js", + "line": 42, + "endLine": 180, + "role": "core", + "fileHash": "abc123...", + "callees": [/* lightweight */], + "callers": [/* lightweight */], + "transitiveCallers": { "2": [/* lightweight */] } + } + ] +} +``` + +### `fn-impact` + +```jsonc +{ + "name": "buildGraph", + "results": [ + { + "name": "buildGraph", // ← base 7 + "kind": "function", + "file": "src/builder.js", + "line": 42, + "endLine": 180, + "role": "core", + "fileHash": "abc123...", + "levels": { "1": [/* lightweight */], "2": [/* lightweight */] }, + "totalDependents": 5 + } + ] +} +``` + +### `explain` (function mode) + +```jsonc +{ + "kind": "function", + "results": [ + { + "name": "buildGraph", // ← base 7 + "kind": "function", + "file": "src/builder.js", + "line": 42, + "endLine": 180, + "role": "core", + "fileHash": "abc123...", + "lineCount": 138, // ← command-specific + "summary": "...", + "signature": "...", + "complexity": { ... }, + "callees": [/* lightweight */], + "callers": [/* lightweight */], + "relatedTests": [/* { file } */] + } + ] +} +``` + +### `search` / `multi-search` / `fts` / `hybrid` + +```jsonc +{ + "results": [ + { + "name": "buildGraph", // ← base 7 + "kind": "function", + "file": "src/builder.js", + "line": 42, + "endLine": 180, + "role": "core", + "fileHash": "abc123...", + "similarity": 0.85 // ← search-specific (varies by mode) + } + ] +} +``` + +### `list-functions` + +```jsonc +{ + "count": 42, + "functions": [ + { + "name": "buildGraph", // ← base 7 + "kind": "function", + "file": "src/builder.js", + "line": 42, + "endLine": 180, + "role": "core", + "fileHash": "abc123..." + } + ] +} +``` + +### `roles` + +```jsonc +{ + "count": 42, + "summary": { "core": 10, "utility": 20, "entry": 5, "leaf": 7 }, + "symbols": [ + { + "name": "buildGraph", // ← base 7 + "kind": "function", + "file": "src/builder.js", + "line": 42, + "endLine": 180, + "role": "core", + "fileHash": "abc123..." + } + ] +} +``` + +## Lightweight Inner References + +Nested/secondary references (callees, callers, transitive hops, path nodes) use a lightweight 4-field shape: + +| Field | Type | +|--------|----------| +| `name` | `string` | +| `kind` | `string` | +| `file` | `string` | +| `line` | `number` | + +Some contexts add extra fields like `edgeKind` or `viaHierarchy`. + +## Notes + +- `variable` is not a tracked kind — codegraph tracks function/type-level symbols only. +- Iterator functions (`iterListFunctions`, `iterRoles`) yield `endLine` and `role` but not `fileHash` (streaming avoids holding DB open for per-row hash lookups). +- The `normalizeSymbol(row, db, hashCache)` utility is exported from both `src/queries.js` and `src/index.js` for programmatic consumers. diff --git a/src/embedder.js b/src/embedder.js index 265f12a6..c715109e 100644 --- a/src/embedder.js +++ b/src/embedder.js @@ -4,6 +4,7 @@ import path from 'node:path'; import { createInterface } from 'node:readline'; import { closeDb, findDbPath, openDb, openReadonlyOrFail } from './db.js'; import { info, warn } from './logger.js'; +import { normalizeSymbol } from './queries.js'; /** * Split an identifier into readable words. @@ -582,7 +583,7 @@ function _prepareSearch(customDbPath, opts = {}) { const noTests = opts.noTests || false; const TEST_PATTERN = /\.(test|spec)\.|__test__|__tests__|\.stories\./; let sql = ` - SELECT e.node_id, e.vector, e.text_preview, n.name, n.kind, n.file, n.line + SELECT e.node_id, e.vector, e.text_preview, n.name, n.kind, n.file, n.line, n.end_line, n.role FROM embeddings e JOIN nodes n ON e.node_id = n.id `; @@ -638,6 +639,7 @@ export async function searchData(query, customDbPath, opts = {}) { return null; } + const hc = new Map(); const results = []; for (const row of rows) { const vec = new Float32Array(new Uint8Array(row.vector).buffer); @@ -645,10 +647,7 @@ export async function searchData(query, customDbPath, opts = {}) { if (sim >= minScore) { results.push({ - name: row.name, - kind: row.kind, - file: row.file, - line: row.line, + ...normalizeSymbol(row, db, hc), similarity: sim, }); } @@ -734,14 +733,12 @@ export async function multiSearchData(queries, customDbPath, opts = {}) { } // Build results sorted by RRF score + const hc = new Map(); const results = []; for (const [rowIndex, entry] of fusionMap) { const row = rows[rowIndex]; results.push({ - name: row.name, - kind: row.kind, - file: row.file, - line: row.line, + ...normalizeSymbol(row, db, hc), rrf: entry.rrfScore, queryScores: entry.queryScores, }); @@ -804,7 +801,7 @@ export function ftsSearchData(query, customDbPath, opts = {}) { let sql = ` SELECT f.rowid AS node_id, rank AS bm25_score, - n.name, n.kind, n.file, n.line + n.name, n.kind, n.file, n.line, n.end_line, n.role FROM fts_index f JOIN nodes n ON f.rowid = n.id WHERE fts_index MATCH ? @@ -841,16 +838,13 @@ export function ftsSearchData(query, customDbPath, opts = {}) { rows = rows.filter((row) => !TEST_PATTERN.test(row.file)); } - db.close(); - + const hc = new Map(); const results = rows.slice(0, limit).map((row) => ({ - name: row.name, - kind: row.kind, - file: row.file, - line: row.line, + ...normalizeSymbol(row, db, hc), bm25Score: -row.bm25_score, // FTS5 rank is negative; negate for display })); + db.close(); return { results }; } @@ -924,6 +918,9 @@ export async function hybridSearchData(query, customDbPath, opts = {}) { kind: item.kind, file: item.file, line: item.line, + endLine: item.endLine ?? null, + role: item.role ?? null, + fileHash: item.fileHash ?? null, rrfScore: 0, bm25Score: null, bm25Rank: null, @@ -955,6 +952,9 @@ export async function hybridSearchData(query, customDbPath, opts = {}) { kind: e.kind, file: e.file, line: e.line, + endLine: e.endLine, + role: e.role, + fileHash: e.fileHash, rrf: e.rrfScore, bm25Score: e.bm25Score, bm25Rank: e.bm25Rank, diff --git a/src/index.js b/src/index.js index ae8f3f43..62089ada 100644 --- a/src/index.js +++ b/src/index.js @@ -106,6 +106,7 @@ export { iterWhere, kindIcon, moduleMapData, + normalizeSymbol, pathData, queryNameData, rolesData, diff --git a/src/queries.js b/src/queries.js index c490744f..8a0ef0f1 100644 --- a/src/queries.js +++ b/src/queries.js @@ -207,6 +207,7 @@ export function queryNameData(name, customDbPath, opts = {}) { return { query: name, results: [] }; } + const hc = new Map(); const results = nodes.map((node) => { let callees = db .prepare(` @@ -230,10 +231,7 @@ export function queryNameData(name, customDbPath, opts = {}) { } return { - name: node.name, - kind: node.kind, - file: node.file, - line: node.line, + ...normalizeSymbol(node, db, hc), callees: callees.map((c) => ({ name: c.name, kind: c.kind, @@ -402,6 +400,7 @@ export function fnDepsData(name, customDbPath, opts = {}) { const db = openReadonlyOrFail(customDbPath); const depth = opts.depth || 3; const noTests = opts.noTests || false; + const hc = new Map(); const nodes = findMatchingNodes(db, name, { noTests, file: opts.file, kind: opts.kind }); if (nodes.length === 0) { @@ -493,10 +492,7 @@ export function fnDepsData(name, customDbPath, opts = {}) { } return { - name: node.name, - kind: node.kind, - file: node.file, - line: node.line, + ...normalizeSymbol(node, db, hc), callees: filteredCallees.map((c) => ({ name: c.name, kind: c.kind, @@ -523,6 +519,7 @@ export function fnImpactData(name, customDbPath, opts = {}) { const db = openReadonlyOrFail(customDbPath); const maxDepth = opts.depth || 5; const noTests = opts.noTests || false; + const hc = new Map(); const nodes = findMatchingNodes(db, name, { noTests, file: opts.file, kind: opts.kind }); if (nodes.length === 0) { @@ -559,10 +556,7 @@ export function fnImpactData(name, customDbPath, opts = {}) { } return { - name: node.name, - kind: node.kind, - file: node.file, - line: node.line, + ...normalizeSymbol(node, db, hc), levels, totalDependents: visited.size - 1, }; @@ -1194,14 +1188,16 @@ export function listFunctionsData(customDbPath, opts = {}) { let rows = db .prepare( - `SELECT name, kind, file, line, role FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY file, line`, + `SELECT name, kind, file, line, end_line, role FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY file, line`, ) .all(...params); if (noTests) rows = rows.filter((r) => !isTestFile(r.file)); + const hc = new Map(); + const functions = rows.map((r) => normalizeSymbol(r, db, hc)); db.close(); - const base = { count: rows.length, functions: rows }; + const base = { count: functions.length, functions }; return paginateResult(base, 'functions', { limit: opts.limit, offset: opts.offset }); } @@ -1234,11 +1230,18 @@ export function* iterListFunctions(customDbPath, opts = {}) { } const stmt = db.prepare( - `SELECT name, kind, file, line, role FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY file, line`, + `SELECT name, kind, file, line, end_line, role FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY file, line`, ); for (const row of stmt.iterate(...params)) { if (noTests && isTestFile(row.file)) continue; - yield { name: row.name, kind: row.kind, file: row.file, line: row.line, role: row.role }; + yield { + name: row.name, + kind: row.kind, + file: row.file, + line: row.line, + endLine: row.end_line ?? null, + role: row.role ?? null, + }; } } finally { db.close(); @@ -1252,7 +1255,7 @@ export function* iterListFunctions(customDbPath, opts = {}) { * @param {boolean} [opts.noTests] * @param {string} [opts.role] * @param {string} [opts.file] - * @yields {{ name: string, kind: string, file: string, line: number, role: string }} + * @yields {{ name: string, kind: string, file: string, line: number, endLine: number|null, role: string }} */ export function* iterRoles(customDbPath, opts = {}) { const db = openReadonlyOrFail(customDbPath); @@ -1271,11 +1274,18 @@ export function* iterRoles(customDbPath, opts = {}) { } const stmt = db.prepare( - `SELECT name, kind, file, line, role FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY role, file, line`, + `SELECT name, kind, file, line, end_line, role FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY role, file, line`, ); for (const row of stmt.iterate(...params)) { if (noTests && isTestFile(row.file)) continue; - yield { name: row.name, kind: row.kind, file: row.file, line: row.line, role: row.role }; + yield { + name: row.name, + kind: row.kind, + file: row.file, + line: row.line, + endLine: row.end_line ?? null, + role: row.role ?? null, + }; } } finally { db.close(); @@ -2457,6 +2467,7 @@ function explainFunctionImpl(db, target, noTests, getFileLines) { if (noTests) nodes = nodes.filter((n) => !isTestFile(n.file)); if (nodes.length === 0) return []; + const hc = new Map(); return nodes.slice(0, 10).map((node) => { const fileLines = getFileLines(node.file); const lineCount = node.end_line ? node.end_line - node.line + 1 : null; @@ -2514,12 +2525,7 @@ function explainFunctionImpl(db, target, noTests, getFileLines) { } return { - name: node.name, - kind: node.kind, - file: node.file, - line: node.line, - role: node.role || null, - endLine: node.end_line || null, + ...normalizeSymbol(node, db, hc), lineCount, summary, signature, @@ -2732,6 +2738,40 @@ export function explain(target, customDbPath, opts = {}) { // ─── whereData ────────────────────────────────────────────────────────── +function getFileHash(db, file) { + const row = db.prepare('SELECT hash FROM file_hashes WHERE file = ?').get(file); + return row ? row.hash : null; +} + +/** + * Normalize a raw DB/query row into the stable 7-field symbol shape. + * @param {object} row - Raw row (from SELECT * or explicit columns) + * @param {object} [db] - Open DB handle; when null, fileHash will be null + * @param {Map} [hashCache] - Optional per-file cache to avoid repeated getFileHash calls + * @returns {{ name: string, kind: string, file: string, line: number, endLine: number|null, role: string|null, fileHash: string|null }} + */ +export function normalizeSymbol(row, db, hashCache) { + let fileHash = null; + if (db) { + if (hashCache) { + if (!hashCache.has(row.file)) { + hashCache.set(row.file, getFileHash(db, row.file)); + } + fileHash = hashCache.get(row.file); + } else { + fileHash = getFileHash(db, row.file); + } + } + return { + name: row.name, + kind: row.kind, + file: row.file, + line: row.line, + endLine: row.end_line ?? row.endLine ?? null, + role: row.role ?? null, + fileHash, + }; +} function whereSymbolImpl(db, target, noTests) { const placeholders = ALL_SYMBOL_KINDS.map(() => '?').join(', '); let nodes = db @@ -2741,6 +2781,7 @@ function whereSymbolImpl(db, target, noTests) { .all(`%${target}%`, ...ALL_SYMBOL_KINDS); if (noTests) nodes = nodes.filter((n) => !isTestFile(n.file)); + const hc = new Map(); return nodes.map((node) => { const crossFileCallers = db .prepare( @@ -2759,11 +2800,7 @@ function whereSymbolImpl(db, target, noTests) { if (noTests) uses = uses.filter((u) => !isTestFile(u.file)); return { - name: node.name, - kind: node.kind, - file: node.file, - line: node.line, - role: node.role || null, + ...normalizeSymbol(node, db, hc), exported, uses: uses.map((u) => ({ name: u.name, file: u.file, line: u.line })), }; @@ -2908,7 +2945,7 @@ export function rolesData(customDbPath, opts = {}) { let rows = db .prepare( - `SELECT name, kind, file, line, role FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY role, file, line`, + `SELECT name, kind, file, line, end_line, role FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY role, file, line`, ) .all(...params); @@ -2919,8 +2956,10 @@ export function rolesData(customDbPath, opts = {}) { summary[r.role] = (summary[r.role] || 0) + 1; } + const hc = new Map(); + const symbols = rows.map((r) => normalizeSymbol(r, db, hc)); db.close(); - const base = { count: rows.length, summary, symbols: rows }; + const base = { count: symbols.length, summary, symbols }; return paginateResult(base, 'symbols', { limit: opts.limit, offset: opts.offset }); } diff --git a/tests/integration/queries.test.js b/tests/integration/queries.test.js index 69cf916b..1bbbdfa6 100644 --- a/tests/integration/queries.test.js +++ b/tests/integration/queries.test.js @@ -32,6 +32,7 @@ import { fnDepsData, fnImpactData, impactAnalysisData, + listFunctionsData, moduleMapData, pathData, queryNameData, @@ -101,6 +102,16 @@ beforeAll(() => { // Low-confidence call edge for quality tests insertEdge(db, formatResponse, validateToken, 'calls', 0.3); + // File hashes (for fileHash exposure) + for (const f of ['auth.js', 'middleware.js', 'routes.js', 'utils.js', 'auth.test.js']) { + db.prepare('INSERT INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)').run( + f, + `hash_${f.replace('.', '_')}`, + Date.now(), + 100, + ); + } + db.close(); }); @@ -645,3 +656,77 @@ describe('noTests filtering', () => { expect(filteredFiles).not.toContain('auth.test.js'); }); }); + +// ─── Stable symbol schema conformance ────────────────────────────────── + +const STABLE_FIELDS = ['name', 'kind', 'file', 'line', 'endLine', 'role', 'fileHash']; + +function expectStableSymbol(sym) { + for (const field of STABLE_FIELDS) { + expect(sym).toHaveProperty(field); + } + expect(typeof sym.name).toBe('string'); + expect(typeof sym.kind).toBe('string'); + expect(typeof sym.file).toBe('string'); + expect(typeof sym.line).toBe('number'); + // endLine, role, fileHash may be null + expect(sym.endLine === null || typeof sym.endLine === 'number').toBe(true); + expect(sym.role === null || typeof sym.role === 'string').toBe(true); + expect(sym.fileHash === null || typeof sym.fileHash === 'string').toBe(true); +} + +describe('stable symbol schema', () => { + test('queryNameData results have all 7 stable fields', () => { + const data = queryNameData('authenticate', dbPath); + expect(data.results.length).toBeGreaterThan(0); + for (const r of data.results) { + expectStableSymbol(r); + } + }); + + test('fnDepsData results have all 7 stable fields', () => { + const data = fnDepsData('handleRoute', dbPath); + expect(data.results.length).toBeGreaterThan(0); + for (const r of data.results) { + expectStableSymbol(r); + } + }); + + test('fnImpactData results have all 7 stable fields', () => { + const data = fnImpactData('authenticate', dbPath); + expect(data.results.length).toBeGreaterThan(0); + for (const r of data.results) { + expectStableSymbol(r); + } + }); + + test('whereData (symbol) results have all 7 stable fields', () => { + const data = whereData('authMiddleware', dbPath); + expect(data.results.length).toBeGreaterThan(0); + for (const r of data.results) { + expectStableSymbol(r); + } + }); + + test('explainData (function) results have all 7 stable fields', () => { + const data = explainData('authMiddleware', dbPath); + expect(data.results.length).toBeGreaterThan(0); + for (const r of data.results) { + expectStableSymbol(r); + } + }); + + test('listFunctionsData results have all 7 stable fields', () => { + const data = listFunctionsData(dbPath); + expect(data.functions.length).toBeGreaterThan(0); + for (const r of data.functions) { + expectStableSymbol(r); + } + }); + + test('fileHash values match expected hashes', () => { + const data = queryNameData('authenticate', dbPath); + const fn = data.results.find((r) => r.name === 'authenticate' && r.kind === 'function'); + expect(fn.fileHash).toBe('hash_auth_js'); + }); +}); diff --git a/tests/unit/normalize-symbol.test.js b/tests/unit/normalize-symbol.test.js new file mode 100644 index 00000000..8a27b344 --- /dev/null +++ b/tests/unit/normalize-symbol.test.js @@ -0,0 +1,114 @@ +import { describe, expect, test, vi } from 'vitest'; +import { normalizeSymbol } from '../../src/queries.js'; + +describe('normalizeSymbol', () => { + test('full row with all fields', () => { + const row = { + name: 'foo', + kind: 'function', + file: 'src/bar.js', + line: 10, + end_line: 20, + role: 'core', + }; + const db = { + prepare: vi.fn().mockReturnValue({ + get: vi.fn().mockReturnValue({ hash: 'abc123' }), + }), + }; + const result = normalizeSymbol(row, db); + expect(result).toEqual({ + name: 'foo', + kind: 'function', + file: 'src/bar.js', + line: 10, + endLine: 20, + role: 'core', + fileHash: 'abc123', + }); + }); + + test('minimal row defaults endLine, role, fileHash to null', () => { + const row = { name: 'bar', kind: 'method', file: 'a.js', line: 1 }; + const result = normalizeSymbol(row, null); + expect(result).toEqual({ + name: 'bar', + kind: 'method', + file: 'a.js', + line: 1, + endLine: null, + role: null, + fileHash: null, + }); + }); + + test('prefers end_line over endLine (raw SQLite column)', () => { + const row = { + name: 'baz', + kind: 'class', + file: 'b.js', + line: 5, + end_line: 50, + endLine: 99, + }; + const result = normalizeSymbol(row, null); + expect(result.endLine).toBe(50); + }); + + test('falls back to endLine when end_line is undefined', () => { + const row = { + name: 'baz', + kind: 'class', + file: 'b.js', + line: 5, + endLine: 99, + }; + const result = normalizeSymbol(row, null); + expect(result.endLine).toBe(99); + }); + + test('db = null yields fileHash = null', () => { + const row = { name: 'x', kind: 'function', file: 'c.js', line: 1, end_line: 10, role: 'leaf' }; + const result = normalizeSymbol(row, null); + expect(result.fileHash).toBeNull(); + }); + + test('hashCache reuses result for same file', () => { + const getSpy = vi.fn().mockReturnValue({ hash: 'h1' }); + const db = { prepare: vi.fn().mockReturnValue({ get: getSpy }) }; + const hc = new Map(); + + const row1 = { name: 'a', kind: 'function', file: 'x.js', line: 1 }; + const row2 = { name: 'b', kind: 'function', file: 'x.js', line: 10 }; + + normalizeSymbol(row1, db, hc); + normalizeSymbol(row2, db, hc); + + // DB was queried only once for x.js + expect(getSpy).toHaveBeenCalledTimes(1); + expect(hc.get('x.js')).toBe('h1'); + }); + + test('hashCache queries once per unique file', () => { + const getSpy = vi.fn((file) => (file === 'a.js' ? { hash: 'ha' } : { hash: 'hb' })); + const db = { prepare: vi.fn().mockReturnValue({ get: getSpy }) }; + const hc = new Map(); + + normalizeSymbol({ name: 'x', kind: 'function', file: 'a.js', line: 1 }, db, hc); + normalizeSymbol({ name: 'y', kind: 'function', file: 'b.js', line: 1 }, db, hc); + normalizeSymbol({ name: 'z', kind: 'function', file: 'a.js', line: 5 }, db, hc); + + expect(getSpy).toHaveBeenCalledTimes(2); + }); + + test('file with no hash returns fileHash null', () => { + const db = { + prepare: vi.fn().mockReturnValue({ + get: vi.fn().mockReturnValue(undefined), + }), + }; + const row = { name: 'x', kind: 'function', file: 'missing.js', line: 1 }; + const result = normalizeSymbol(row, db); + expect(result.fileHash).toBeNull(); + }); +}); From bea8597df62338fd7150361c0ca960d3994727ae Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 19:22:32 -0700 Subject: [PATCH 02/12] feat: expand node types with parameter, property, constant kinds (Phase 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add sub-declaration node extraction to all 9 WASM language extractors, enabling structural queries like "which functions take a Request param?" or "which classes have a userId field?" without reading source code. Schema: migration v11 adds nullable parent_id column with indexes. Builder: insertNode links children to parent via parent_id FK. Extractors: JS/TS, Python, Go, Rust, Java, C#, Ruby, PHP, HCL now emit children arrays for parameters, properties, and constants. Queries: new childrenData() function, children in contextData output. CLI: new `children` command, EVERY_SYMBOL_KIND validation on --kind. MCP: new `symbol_children` tool, extended kind enum on all kind fields. Constants: CORE_SYMBOL_KINDS (10), EXTENDED_SYMBOL_KINDS (3), EVERY_SYMBOL_KIND (13). ALL_SYMBOL_KINDS preserved for backward compat. Native Rust engine: Definition struct gains children field but actual extraction is deferred to Phase 2 — WASM fallback handles new kinds. Impact: 63 functions changed, 62 affected --- crates/codegraph-core/src/types.rs | 2 + src/builder.js | 23 +- src/cli.js | 72 ++-- src/db.js | 23 ++ src/extractors/csharp.js | 65 +++- src/extractors/go.js | 67 +++- src/extractors/hcl.js | 22 ++ src/extractors/java.js | 62 ++- src/extractors/javascript.js | 142 +++++++ src/extractors/php.js | 79 ++++ src/extractors/python.js | 134 +++++++ src/extractors/ruby.js | 89 +++++ src/extractors/rust.js | 72 +++- src/index.js | 4 + src/mcp.js | 40 +- src/parser.js | 8 + src/queries.js | 109 +++++- tests/integration/build-parity.test.js | 7 +- tests/parsers/csharp.test.js | 2 +- tests/parsers/extended-kinds.test.js | 504 +++++++++++++++++++++++++ tests/unit/mcp.test.js | 16 + 21 files changed, 1501 insertions(+), 41 deletions(-) create mode 100644 tests/parsers/extended-kinds.test.js diff --git a/crates/codegraph-core/src/types.rs b/crates/codegraph-core/src/types.rs index f6593ebc..ed299f0c 100644 --- a/crates/codegraph-core/src/types.rs +++ b/crates/codegraph-core/src/types.rs @@ -65,6 +65,8 @@ pub struct Definition { #[napi(ts_type = "string[] | undefined")] pub decorators: Option>, pub complexity: Option, + #[napi(ts_type = "Definition[] | undefined")] + pub children: Option>, } #[napi(object)] diff --git a/src/builder.js b/src/builder.js index a9ae11d4..7a916647 100644 --- a/src/builder.js +++ b/src/builder.js @@ -543,7 +543,7 @@ export async function buildGraph(rootDir, opts = {}) { } const insertNode = db.prepare( - 'INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)', + 'INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line, parent_id) VALUES (?, ?, ?, ?, ?, ?)', ); const getNodeId = db.prepare( 'SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?', @@ -597,12 +597,27 @@ export async function buildGraph(rootDir, opts = {}) { for (const [relPath, symbols] of allSymbols) { fileSymbols.set(relPath, symbols); - insertNode.run(relPath, 'file', relPath, 0, null); + insertNode.run(relPath, 'file', relPath, 0, null, null); for (const def of symbols.definitions) { - insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null); + insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null, null); + if (def.children?.length) { + const parentRow = getNodeId.get(def.name, def.kind, relPath, def.line); + if (parentRow) { + for (const child of def.children) { + insertNode.run( + child.name, + child.kind, + relPath, + child.line, + child.endLine || null, + parentRow.id, + ); + } + } + } } for (const exp of symbols.exports) { - insertNode.run(exp.name, exp.kind, relPath, exp.line, null); + insertNode.run(exp.name, exp.kind, relPath, exp.line, null, null); } // Update file hash with real mtime+size for incremental builds diff --git a/src/cli.js b/src/cli.js index ddd853aa..391d2274 100644 --- a/src/cli.js +++ b/src/cli.js @@ -20,9 +20,10 @@ import { exportDOT, exportJSON, exportMermaid } from './export.js'; import { setVerbose } from './logger.js'; import { printNdjson } from './paginate.js'; import { - ALL_SYMBOL_KINDS, + children, context, diffImpact, + EVERY_SYMBOL_KIND, explain, fileDeps, fnDeps, @@ -122,8 +123,8 @@ program .option('--offset ', 'Skip N results (default: 0)') .option('--ndjson', 'Newline-delimited JSON output') .action((name, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } if (opts.path) { @@ -231,8 +232,8 @@ program .option('--offset ', 'Skip N results (default: 0)') .option('--ndjson', 'Newline-delimited JSON output') .action((name, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } fnImpact(name, opts.db, { @@ -263,8 +264,8 @@ program .option('--offset ', 'Skip N results (default: 0)') .option('--ndjson', 'Newline-delimited JSON output') .action((name, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } context(name, opts.db, { @@ -281,6 +282,31 @@ program }); }); +program + .command('children ') + .description('List parameters, properties, and constants of a symbol') + .option('-d, --db ', 'Path to graph.db') + .option('-f, --file ', 'Scope search to symbols in this file (partial match)') + .option('-k, --kind ', 'Filter to a specific symbol kind') + .option('-T, --no-tests', 'Exclude test/spec files from results') + .option('-j, --json', 'Output as JSON') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .action((name, opts) => { + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); + process.exit(1); + } + children(name, opts.db, { + file: opts.file, + kind: opts.kind, + noTests: resolveNoTests(opts), + json: opts.json, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + }); + }); + program .command('explain ') .description('Structural summary of a file or function (no LLM needed)') @@ -314,8 +340,8 @@ program .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .option('-j, --json', 'Output as JSON') .action((target, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } audit(target, opts.db, { @@ -917,8 +943,8 @@ program console.error('Provide a function/entry point name or use --list to see all entry points.'); process.exit(1); } - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } const { flow } = await import('./flow.js'); @@ -950,8 +976,8 @@ program .option('--impact', 'Show data-dependent blast radius') .option('--depth ', 'Max traversal depth', '5') .action(async (name, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } const { dataflow } = await import('./dataflow.js'); @@ -988,8 +1014,8 @@ program .option('--offset ', 'Skip N results (default: 0)') .option('--ndjson', 'Newline-delimited JSON output') .action(async (target, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } const { complexity } = await import('./complexity.js'); @@ -1021,8 +1047,8 @@ program .option('--offset ', 'Skip N results (default: 0)') .option('--ndjson', 'Newline-delimited JSON output') .action(async (opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } const { manifesto } = await import('./manifesto.js'); @@ -1083,8 +1109,8 @@ program .option('--ndjson', 'Newline-delimited JSON output') .option('--weights ', 'Custom weights JSON (e.g. \'{"fanIn":1,"complexity":0}\')') .action(async (opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } if (opts.role && !VALID_ROLES.includes(opts.role)) { @@ -1246,8 +1272,8 @@ program .option('-T, --no-tests', 'Exclude test/spec files from results') .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .action(async (command, positionalTargets, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } @@ -1310,8 +1336,8 @@ program .option('-T, --no-tests', 'Exclude test/spec files from results') .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .action(async (positionalTargets, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } diff --git a/src/db.js b/src/db.js index f3f55fa4..9f40d7cc 100644 --- a/src/db.js +++ b/src/db.js @@ -165,6 +165,14 @@ export const MIGRATIONS = [ CREATE INDEX IF NOT EXISTS idx_dataflow_source_kind ON dataflow(source_id, kind); `, }, + { + version: 11, + up: ` + ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id); + CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id); + CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id); + `, + }, ]; export function getBuildMeta(db, key) { @@ -286,6 +294,21 @@ export function initSchema(db) { } catch { /* already exists */ } + try { + db.exec('ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id)'); + } catch { + /* already exists */ + } + try { + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id)'); + } catch { + /* already exists */ + } + try { + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id)'); + } catch { + /* already exists */ + } } export function findDbPath(customPath) { diff --git a/src/extractors/csharp.js b/src/extractors/csharp.js index 5af523f3..43231d1e 100644 --- a/src/extractors/csharp.js +++ b/src/extractors/csharp.js @@ -33,11 +33,13 @@ export function extractCSharpSymbols(tree, _filePath) { case 'class_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const classChildren = extractCSharpClassFields(node); definitions.push({ name: nameNode.text, kind: 'class', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: classChildren.length > 0 ? classChildren : undefined, }); extractCSharpBaseTypes(node, nameNode.text, classes); } @@ -47,11 +49,13 @@ export function extractCSharpSymbols(tree, _filePath) { case 'struct_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const structChildren = extractCSharpClassFields(node); definitions.push({ name: nameNode.text, kind: 'struct', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: structChildren.length > 0 ? structChildren : undefined, }); extractCSharpBaseTypes(node, nameNode.text, classes); } @@ -105,11 +109,13 @@ export function extractCSharpSymbols(tree, _filePath) { case 'enum_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const enumChildren = extractCSharpEnumMembers(node); definitions.push({ name: nameNode.text, kind: 'enum', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: enumChildren.length > 0 ? enumChildren : undefined, }); } break; @@ -120,11 +126,13 @@ export function extractCSharpSymbols(tree, _filePath) { if (nameNode) { const parentType = findCSharpParentType(node); const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; + const params = extractCSharpParameters(node.childForFieldName('parameters')); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -135,11 +143,13 @@ export function extractCSharpSymbols(tree, _filePath) { if (nameNode) { const parentType = findCSharpParentType(node); const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; + const params = extractCSharpParameters(node.childForFieldName('parameters')); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -152,7 +162,7 @@ export function extractCSharpSymbols(tree, _filePath) { const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; definitions.push({ name: fullName, - kind: 'method', + kind: 'property', line: node.startPosition.row + 1, endLine: nodeEndLine(node), }); @@ -220,6 +230,59 @@ export function extractCSharpSymbols(tree, _filePath) { return { definitions, calls, imports, classes, exports }; } +// ── Child extraction helpers ──────────────────────────────────────────────── + +function extractCSharpParameters(paramListNode) { + const params = []; + if (!paramListNode) return params; + for (let i = 0; i < paramListNode.childCount; i++) { + const param = paramListNode.child(i); + if (!param || param.type !== 'parameter') continue; + const nameNode = param.childForFieldName('name'); + if (nameNode) { + params.push({ name: nameNode.text, kind: 'parameter', line: param.startPosition.row + 1 }); + } + } + return params; +} + +function extractCSharpClassFields(classNode) { + const fields = []; + const body = classNode.childForFieldName('body') || findChild(classNode, 'declaration_list'); + if (!body) return fields; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== 'field_declaration') continue; + const varDecl = findChild(member, 'variable_declaration'); + if (!varDecl) continue; + for (let j = 0; j < varDecl.childCount; j++) { + const child = varDecl.child(j); + if (!child || child.type !== 'variable_declarator') continue; + const nameNode = child.childForFieldName('name'); + if (nameNode) { + fields.push({ name: nameNode.text, kind: 'property', line: member.startPosition.row + 1 }); + } + } + } + return fields; +} + +function extractCSharpEnumMembers(enumNode) { + const constants = []; + const body = + enumNode.childForFieldName('body') || findChild(enumNode, 'enum_member_declaration_list'); + if (!body) return constants; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== 'enum_member_declaration') continue; + const nameNode = member.childForFieldName('name'); + if (nameNode) { + constants.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 }); + } + } + return constants; +} + function extractCSharpBaseTypes(node, className, classes) { const baseList = node.childForFieldName('bases'); if (!baseList) return; diff --git a/src/extractors/go.js b/src/extractors/go.js index 8b943012..a3a50158 100644 --- a/src/extractors/go.js +++ b/src/extractors/go.js @@ -1,4 +1,4 @@ -import { nodeEndLine } from './helpers.js'; +import { findChild, nodeEndLine } from './helpers.js'; /** * Extract symbols from Go files. @@ -15,11 +15,13 @@ export function extractGoSymbols(tree, _filePath) { case 'function_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const params = extractGoParameters(node.childForFieldName('parameters')); definitions.push({ name: nameNode.text, kind: 'function', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -46,11 +48,13 @@ export function extractGoSymbols(tree, _filePath) { } } const fullName = receiverType ? `${receiverType}.${nameNode.text}` : nameNode.text; + const params = extractGoParameters(node.childForFieldName('parameters')); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -64,11 +68,13 @@ export function extractGoSymbols(tree, _filePath) { const typeNode = spec.childForFieldName('type'); if (nameNode && typeNode) { if (typeNode.type === 'struct_type') { + const fields = extractStructFields(typeNode); definitions.push({ name: nameNode.text, kind: 'struct', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: fields.length > 0 ? fields : undefined, }); } else if (typeNode.type === 'interface_type') { definitions.push({ @@ -145,6 +151,23 @@ export function extractGoSymbols(tree, _filePath) { break; } + case 'const_declaration': { + for (let i = 0; i < node.childCount; i++) { + const spec = node.child(i); + if (!spec || spec.type !== 'const_spec') continue; + const constName = spec.childForFieldName('name'); + if (constName) { + definitions.push({ + name: constName.text, + kind: 'constant', + line: spec.startPosition.row + 1, + endLine: spec.endPosition.row + 1, + }); + } + } + break; + } + case 'call_expression': { const fn = node.childForFieldName('function'); if (fn) { @@ -170,3 +193,45 @@ export function extractGoSymbols(tree, _filePath) { walkGoNode(tree.rootNode); return { definitions, calls, imports, classes, exports }; } + +// ── Child extraction helpers ──────────────────────────────────────────────── + +function extractGoParameters(paramListNode) { + const params = []; + if (!paramListNode) return params; + for (let i = 0; i < paramListNode.childCount; i++) { + const param = paramListNode.child(i); + if (!param || param.type !== 'parameter_declaration') continue; + // A parameter_declaration may have multiple identifiers (e.g., `a, b int`) + for (let j = 0; j < param.childCount; j++) { + const child = param.child(j); + if (child && child.type === 'identifier') { + params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } + } + } + return params; +} + +function extractStructFields(structTypeNode) { + const fields = []; + const fieldList = findChild(structTypeNode, 'field_declaration_list'); + if (!fieldList) return fields; + for (let i = 0; i < fieldList.childCount; i++) { + const field = fieldList.child(i); + if (!field || field.type !== 'field_declaration') continue; + const nameNode = field.childForFieldName('name'); + if (nameNode) { + fields.push({ name: nameNode.text, kind: 'property', line: field.startPosition.row + 1 }); + } else { + // Struct fields may have multiple names or use first identifier child + for (let j = 0; j < field.childCount; j++) { + const child = field.child(j); + if (child && child.type === 'field_identifier') { + fields.push({ name: child.text, kind: 'property', line: field.startPosition.row + 1 }); + } + } + } + } + return fields; +} diff --git a/src/extractors/hcl.js b/src/extractors/hcl.js index 4df5af4d..aba022a5 100644 --- a/src/extractors/hcl.js +++ b/src/extractors/hcl.js @@ -36,11 +36,33 @@ export function extractHCLSymbols(tree, _filePath) { } if (name) { + // Extract attributes as property children for variable/output blocks + let blockChildren; + if (blockType === 'variable' || blockType === 'output') { + blockChildren = []; + const body = children.find((c) => c.type === 'body'); + if (body) { + for (let j = 0; j < body.childCount; j++) { + const attr = body.child(j); + if (attr && attr.type === 'attribute') { + const key = attr.childForFieldName('key') || attr.child(0); + if (key) { + blockChildren.push({ + name: key.text, + kind: 'property', + line: attr.startPosition.row + 1, + }); + } + } + } + } + } definitions.push({ name, kind: blockType, line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: blockChildren?.length > 0 ? blockChildren : undefined, }); } diff --git a/src/extractors/java.js b/src/extractors/java.js index 87f10d39..bfa24571 100644 --- a/src/extractors/java.js +++ b/src/extractors/java.js @@ -1,4 +1,4 @@ -import { nodeEndLine } from './helpers.js'; +import { findChild, nodeEndLine } from './helpers.js'; /** * Extract symbols from Java files. @@ -31,11 +31,13 @@ export function extractJavaSymbols(tree, _filePath) { case 'class_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const classChildren = extractClassFields(node); definitions.push({ name: nameNode.text, kind: 'class', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: classChildren.length > 0 ? classChildren : undefined, }); const superclass = node.childForFieldName('superclass'); @@ -139,11 +141,13 @@ export function extractJavaSymbols(tree, _filePath) { case 'enum_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const enumChildren = extractEnumConstants(node); definitions.push({ name: nameNode.text, kind: 'enum', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: enumChildren.length > 0 ? enumChildren : undefined, }); } break; @@ -154,11 +158,13 @@ export function extractJavaSymbols(tree, _filePath) { if (nameNode) { const parentClass = findJavaParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractJavaParameters(node.childForFieldName('parameters')); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -169,11 +175,13 @@ export function extractJavaSymbols(tree, _filePath) { if (nameNode) { const parentClass = findJavaParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractJavaParameters(node.childForFieldName('parameters')); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -228,3 +236,55 @@ export function extractJavaSymbols(tree, _filePath) { walkJavaNode(tree.rootNode); return { definitions, calls, imports, classes, exports }; } + +// ── Child extraction helpers ──────────────────────────────────────────────── + +function extractJavaParameters(paramListNode) { + const params = []; + if (!paramListNode) return params; + for (let i = 0; i < paramListNode.childCount; i++) { + const param = paramListNode.child(i); + if (!param) continue; + if (param.type === 'formal_parameter' || param.type === 'spread_parameter') { + const nameNode = param.childForFieldName('name'); + if (nameNode) { + params.push({ name: nameNode.text, kind: 'parameter', line: param.startPosition.row + 1 }); + } + } + } + return params; +} + +function extractClassFields(classNode) { + const fields = []; + const body = classNode.childForFieldName('body') || findChild(classNode, 'class_body'); + if (!body) return fields; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== 'field_declaration') continue; + for (let j = 0; j < member.childCount; j++) { + const child = member.child(j); + if (!child || child.type !== 'variable_declarator') continue; + const nameNode = child.childForFieldName('name'); + if (nameNode) { + fields.push({ name: nameNode.text, kind: 'property', line: member.startPosition.row + 1 }); + } + } + } + return fields; +} + +function extractEnumConstants(enumNode) { + const constants = []; + const body = enumNode.childForFieldName('body') || findChild(enumNode, 'enum_body'); + if (!body) return constants; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== 'enum_constant') continue; + const nameNode = member.childForFieldName('name'); + if (nameNode) { + constants.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 }); + } + } + return constants; +} diff --git a/src/extractors/javascript.js b/src/extractors/javascript.js index 57ba0392..c4a0d3bf 100644 --- a/src/extractors/javascript.js +++ b/src/extractors/javascript.js @@ -28,31 +28,37 @@ function extractSymbolsQuery(tree, query) { if (c.fn_node) { // function_declaration + const fnChildren = extractParameters(c.fn_node); definitions.push({ name: c.fn_name.text, kind: 'function', line: c.fn_node.startPosition.row + 1, endLine: nodeEndLine(c.fn_node), + children: fnChildren.length > 0 ? fnChildren : undefined, }); } else if (c.varfn_name) { // variable_declarator with arrow_function / function_expression const declNode = c.varfn_name.parent?.parent; const line = declNode ? declNode.startPosition.row + 1 : c.varfn_name.startPosition.row + 1; + const varFnChildren = extractParameters(c.varfn_value); definitions.push({ name: c.varfn_name.text, kind: 'function', line, endLine: nodeEndLine(c.varfn_value), + children: varFnChildren.length > 0 ? varFnChildren : undefined, }); } else if (c.cls_node) { // class_declaration const className = c.cls_name.text; const startLine = c.cls_node.startPosition.row + 1; + const clsChildren = extractClassProperties(c.cls_node); definitions.push({ name: className, kind: 'class', line: startLine, endLine: nodeEndLine(c.cls_node), + children: clsChildren.length > 0 ? clsChildren : undefined, }); const heritage = c.cls_node.childForFieldName('heritage') || findChild(c.cls_node, 'class_heritage'); @@ -69,11 +75,13 @@ function extractSymbolsQuery(tree, query) { const methName = c.meth_name.text; const parentClass = findParentClass(c.meth_node); const fullName = parentClass ? `${parentClass}.${methName}` : methName; + const methChildren = extractParameters(c.meth_node); definitions.push({ name: fullName, kind: 'method', line: c.meth_node.startPosition.row + 1, endLine: nodeEndLine(c.meth_node), + children: methChildren.length > 0 ? methChildren : undefined, }); } else if (c.iface_node) { // interface_declaration (TS/TSX only) @@ -231,11 +239,13 @@ function extractSymbolsWalk(tree) { case 'function_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const fnChildren = extractParameters(node); definitions.push({ name: nameNode.text, kind: 'function', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: fnChildren.length > 0 ? fnChildren : undefined, }); } break; @@ -246,11 +256,13 @@ function extractSymbolsWalk(tree) { if (nameNode) { const className = nameNode.text; const startLine = node.startPosition.row + 1; + const clsChildren = extractClassProperties(node); definitions.push({ name: className, kind: 'class', line: startLine, endLine: nodeEndLine(node), + children: clsChildren.length > 0 ? clsChildren : undefined, }); const heritage = node.childForFieldName('heritage') || findChild(node, 'class_heritage'); if (heritage) { @@ -272,11 +284,13 @@ function extractSymbolsWalk(tree) { if (nameNode) { const parentClass = findParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const methChildren = extractParameters(node); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: methChildren.length > 0 ? methChildren : undefined, }); } break; @@ -317,6 +331,7 @@ function extractSymbolsWalk(tree) { case 'lexical_declaration': case 'variable_declaration': { + const isConst = node.text.startsWith('const '); for (let i = 0; i < node.childCount; i++) { const declarator = node.child(i); if (declarator && declarator.type === 'variable_declarator') { @@ -329,15 +344,59 @@ function extractSymbolsWalk(tree) { valType === 'function_expression' || valType === 'function' ) { + const varFnChildren = extractParameters(valueN); definitions.push({ name: nameN.text, kind: 'function', line: node.startPosition.row + 1, endLine: nodeEndLine(valueN), + children: varFnChildren.length > 0 ? varFnChildren : undefined, }); + } else if (isConst && nameN.type === 'identifier' && isConstantValue(valueN)) { + definitions.push({ + name: nameN.text, + kind: 'constant', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + } else if (isConst && nameN && nameN.type === 'identifier' && !valueN) { + // const with no value (shouldn't happen but be safe) + } + } + } + break; + } + + case 'enum_declaration': { + // TypeScript enum + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const enumChildren = []; + const body = node.childForFieldName('body') || findChild(node, 'enum_body'); + if (body) { + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member) continue; + if (member.type === 'enum_assignment' || member.type === 'property_identifier') { + const mName = member.childForFieldName('name') || member.child(0); + if (mName) { + enumChildren.push({ + name: mName.text, + kind: 'constant', + line: member.startPosition.row + 1, + }); + } } } } + definitions.push({ + name: nameNode.text, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: enumChildren.length > 0 ? enumChildren : undefined, + }); } break; } @@ -471,6 +530,89 @@ function extractSymbolsWalk(tree) { return { definitions, calls, imports, classes, exports }; } +// ── Child extraction helpers ──────────────────────────────────────────────── + +function extractParameters(node) { + const params = []; + const paramsNode = node.childForFieldName('parameters') || findChild(node, 'formal_parameters'); + if (!paramsNode) return params; + for (let i = 0; i < paramsNode.childCount; i++) { + const child = paramsNode.child(i); + if (!child) continue; + const t = child.type; + if (t === 'identifier') { + params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } else if ( + t === 'required_parameter' || + t === 'optional_parameter' || + t === 'assignment_pattern' + ) { + const nameNode = + child.childForFieldName('pattern') || child.childForFieldName('left') || child.child(0); + if ( + nameNode && + (nameNode.type === 'identifier' || + nameNode.type === 'shorthand_property_identifier_pattern') + ) { + params.push({ name: nameNode.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } + } else if (t === 'rest_pattern' || t === 'rest_element') { + const nameNode = child.child(1) || child.childForFieldName('name'); + if (nameNode && nameNode.type === 'identifier') { + params.push({ name: nameNode.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } + } + } + return params; +} + +function extractClassProperties(classNode) { + const props = []; + const body = classNode.childForFieldName('body') || findChild(classNode, 'class_body'); + if (!body) return props; + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (!child) continue; + if ( + child.type === 'field_definition' || + child.type === 'public_field_definition' || + child.type === 'property_definition' + ) { + const nameNode = + child.childForFieldName('name') || child.childForFieldName('property') || child.child(0); + if ( + nameNode && + (nameNode.type === 'property_identifier' || + nameNode.type === 'identifier' || + nameNode.type === 'private_property_identifier') + ) { + props.push({ name: nameNode.text, kind: 'property', line: child.startPosition.row + 1 }); + } + } + } + return props; +} + +function isConstantValue(valueNode) { + if (!valueNode) return false; + const t = valueNode.type; + return ( + t === 'number' || + t === 'string' || + t === 'template_string' || + t === 'true' || + t === 'false' || + t === 'null' || + t === 'undefined' || + t === 'array' || + t === 'object' || + t === 'regex' || + t === 'unary_expression' || + t === 'binary_expression' || + t === 'new_expression' + ); +} + // ── Shared helpers ────────────────────────────────────────────────────────── function extractInterfaceMethods(bodyNode, interfaceName, definitions) { diff --git a/src/extractors/php.js b/src/extractors/php.js index 95b44570..d2b4f09d 100644 --- a/src/extractors/php.js +++ b/src/extractors/php.js @@ -1,5 +1,76 @@ import { findChild, nodeEndLine } from './helpers.js'; +function extractPhpParameters(fnNode) { + const params = []; + const paramsNode = + fnNode.childForFieldName('parameters') || findChild(fnNode, 'formal_parameters'); + if (!paramsNode) return params; + for (let i = 0; i < paramsNode.childCount; i++) { + const param = paramsNode.child(i); + if (!param) continue; + if (param.type === 'simple_parameter' || param.type === 'variadic_parameter') { + const nameNode = param.childForFieldName('name') || findChild(param, 'variable_name'); + if (nameNode) { + params.push({ name: nameNode.text, kind: 'parameter', line: param.startPosition.row + 1 }); + } + } + } + return params; +} + +function extractPhpClassChildren(classNode) { + const children = []; + const body = classNode.childForFieldName('body') || findChild(classNode, 'declaration_list'); + if (!body) return children; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member) continue; + if (member.type === 'property_declaration') { + for (let j = 0; j < member.childCount; j++) { + const el = member.child(j); + if (!el || el.type !== 'property_element') continue; + const varNode = findChild(el, 'variable_name'); + if (varNode) { + children.push({ + name: varNode.text, + kind: 'property', + line: member.startPosition.row + 1, + }); + } + } + } else if (member.type === 'const_declaration') { + for (let j = 0; j < member.childCount; j++) { + const el = member.child(j); + if (!el || el.type !== 'const_element') continue; + const nameNode = el.childForFieldName('name') || findChild(el, 'name'); + if (nameNode) { + children.push({ + name: nameNode.text, + kind: 'constant', + line: member.startPosition.row + 1, + }); + } + } + } + } + return children; +} + +function extractPhpEnumCases(enumNode) { + const children = []; + const body = enumNode.childForFieldName('body') || findChild(enumNode, 'enum_declaration_list'); + if (!body) return children; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== 'enum_case') continue; + const nameNode = member.childForFieldName('name'); + if (nameNode) { + children.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 }); + } + } + return children; +} + /** * Extract symbols from PHP files. */ @@ -31,11 +102,13 @@ export function extractPHPSymbols(tree, _filePath) { case 'function_definition': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const params = extractPhpParameters(node); definitions.push({ name: nameNode.text, kind: 'function', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -44,11 +117,13 @@ export function extractPHPSymbols(tree, _filePath) { case 'class_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const classChildren = extractPhpClassChildren(node); definitions.push({ name: nameNode.text, kind: 'class', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: classChildren.length > 0 ? classChildren : undefined, }); // Check base clause (extends) @@ -132,11 +207,13 @@ export function extractPHPSymbols(tree, _filePath) { case 'enum_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const enumChildren = extractPhpEnumCases(node); definitions.push({ name: nameNode.text, kind: 'enum', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: enumChildren.length > 0 ? enumChildren : undefined, }); } break; @@ -147,11 +224,13 @@ export function extractPHPSymbols(tree, _filePath) { if (nameNode) { const parentClass = findPHPParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractPhpParameters(node); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; diff --git a/src/extractors/python.js b/src/extractors/python.js index 832232f0..6542aab7 100644 --- a/src/extractors/python.js +++ b/src/extractors/python.js @@ -22,12 +22,14 @@ export function extractPythonSymbols(tree, _filePath) { const parentClass = findPythonParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; const kind = parentClass ? 'method' : 'function'; + const fnChildren = extractPythonParameters(node); definitions.push({ name: fullName, kind, line: node.startPosition.row + 1, endLine: nodeEndLine(node), decorators, + children: fnChildren.length > 0 ? fnChildren : undefined, }); } break; @@ -36,11 +38,13 @@ export function extractPythonSymbols(tree, _filePath) { case 'class_definition': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const clsChildren = extractPythonClassProperties(node); definitions.push({ name: nameNode.text, kind: 'class', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: clsChildren.length > 0 ? clsChildren : undefined, }); const superclasses = node.childForFieldName('superclasses') || findChild(node, 'argument_list'); @@ -108,6 +112,24 @@ export function extractPythonSymbols(tree, _filePath) { break; } + case 'expression_statement': { + // Module-level UPPER_CASE assignments → constants + if (node.parent && node.parent.type === 'module') { + const assignment = findChild(node, 'assignment'); + if (assignment) { + const left = assignment.childForFieldName('left'); + if (left && left.type === 'identifier' && /^[A-Z_][A-Z0-9_]*$/.test(left.text)) { + definitions.push({ + name: left.text, + kind: 'constant', + line: node.startPosition.row + 1, + }); + } + } + } + break; + } + case 'import_from_statement': { let source = ''; const names = []; @@ -133,6 +155,118 @@ export function extractPythonSymbols(tree, _filePath) { for (let i = 0; i < node.childCount; i++) walkPythonNode(node.child(i)); } + function extractPythonParameters(fnNode) { + const params = []; + const paramsNode = fnNode.childForFieldName('parameters') || findChild(fnNode, 'parameters'); + if (!paramsNode) return params; + for (let i = 0; i < paramsNode.childCount; i++) { + const child = paramsNode.child(i); + if (!child) continue; + const t = child.type; + if (t === 'identifier') { + params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } else if ( + t === 'typed_parameter' || + t === 'default_parameter' || + t === 'typed_default_parameter' + ) { + const nameNode = child.childForFieldName('name') || child.child(0); + if (nameNode && nameNode.type === 'identifier') { + params.push({ + name: nameNode.text, + kind: 'parameter', + line: child.startPosition.row + 1, + }); + } + } else if (t === 'list_splat_pattern' || t === 'dictionary_splat_pattern') { + // *args, **kwargs + for (let j = 0; j < child.childCount; j++) { + const inner = child.child(j); + if (inner && inner.type === 'identifier') { + params.push({ name: inner.text, kind: 'parameter', line: child.startPosition.row + 1 }); + break; + } + } + } + } + return params; + } + + function extractPythonClassProperties(classNode) { + const props = []; + const seen = new Set(); + const body = classNode.childForFieldName('body') || findChild(classNode, 'block'); + if (!body) return props; + + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (!child) continue; + + // Direct class attribute assignments: x = 5 + if (child.type === 'expression_statement') { + const assignment = findChild(child, 'assignment'); + if (assignment) { + const left = assignment.childForFieldName('left'); + if (left && left.type === 'identifier' && !seen.has(left.text)) { + seen.add(left.text); + props.push({ name: left.text, kind: 'property', line: child.startPosition.row + 1 }); + } + } + } + + // __init__ method: self.x = ... assignments + if (child.type === 'function_definition') { + const fnName = child.childForFieldName('name'); + if (fnName && fnName.text === '__init__') { + const initBody = child.childForFieldName('body') || findChild(child, 'block'); + if (initBody) { + walkInitBody(initBody, seen, props); + } + } + } + + // decorated __init__ + if (child.type === 'decorated_definition') { + for (let j = 0; j < child.childCount; j++) { + const inner = child.child(j); + if (inner && inner.type === 'function_definition') { + const fnName = inner.childForFieldName('name'); + if (fnName && fnName.text === '__init__') { + const initBody = inner.childForFieldName('body') || findChild(inner, 'block'); + if (initBody) { + walkInitBody(initBody, seen, props); + } + } + } + } + } + } + return props; + } + + function walkInitBody(bodyNode, seen, props) { + for (let i = 0; i < bodyNode.childCount; i++) { + const stmt = bodyNode.child(i); + if (!stmt || stmt.type !== 'expression_statement') continue; + const assignment = findChild(stmt, 'assignment'); + if (!assignment) continue; + const left = assignment.childForFieldName('left'); + if (!left || left.type !== 'attribute') continue; + const obj = left.childForFieldName('object'); + const attr = left.childForFieldName('attribute'); + if ( + obj && + obj.text === 'self' && + attr && + attr.type === 'identifier' && + !seen.has(attr.text) + ) { + seen.add(attr.text); + props.push({ name: attr.text, kind: 'property', line: stmt.startPosition.row + 1 }); + } + } + } + function findPythonParentClass(node) { let current = node.parent; while (current) { diff --git a/src/extractors/ruby.js b/src/extractors/ruby.js index 73b3f0d4..400d410d 100644 --- a/src/extractors/ruby.js +++ b/src/extractors/ruby.js @@ -31,11 +31,13 @@ export function extractRubySymbols(tree, _filePath) { case 'class': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const classChildren = extractRubyClassChildren(node); definitions.push({ name: nameNode.text, kind: 'class', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: classChildren.length > 0 ? classChildren : undefined, }); const superclass = node.childForFieldName('superclass'); if (superclass) { @@ -73,11 +75,13 @@ export function extractRubySymbols(tree, _filePath) { case 'module': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const moduleChildren = extractRubyBodyConstants(node); definitions.push({ name: nameNode.text, kind: 'module', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: moduleChildren.length > 0 ? moduleChildren : undefined, }); } break; @@ -88,11 +92,13 @@ export function extractRubySymbols(tree, _filePath) { if (nameNode) { const parentClass = findRubyParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractRubyParameters(node); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -103,16 +109,34 @@ export function extractRubySymbols(tree, _filePath) { if (nameNode) { const parentClass = findRubyParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractRubyParameters(node); definitions.push({ name: fullName, kind: 'function', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; } + case 'assignment': { + // Top-level constant assignments (parent is program) + if (node.parent && node.parent.type === 'program') { + const left = node.childForFieldName('left'); + if (left && left.type === 'constant') { + definitions.push({ + name: left.text, + kind: 'constant', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + } + break; + } + case 'call': { const methodNode = node.childForFieldName('method'); if (methodNode) { @@ -186,3 +210,68 @@ export function extractRubySymbols(tree, _filePath) { walkRubyNode(tree.rootNode); return { definitions, calls, imports, classes, exports }; } + +// ── Child extraction helpers ──────────────────────────────────────────────── + +const RUBY_PARAM_TYPES = new Set([ + 'identifier', + 'optional_parameter', + 'splat_parameter', + 'hash_splat_parameter', + 'block_parameter', + 'keyword_parameter', +]); + +function extractRubyParameters(methodNode) { + const params = []; + const paramList = + methodNode.childForFieldName('parameters') || findChild(methodNode, 'method_parameters'); + if (!paramList) return params; + for (let i = 0; i < paramList.childCount; i++) { + const param = paramList.child(i); + if (!param || !RUBY_PARAM_TYPES.has(param.type)) continue; + let name; + if (param.type === 'identifier') { + name = param.text; + } else { + // Compound parameter types have an identifier child for the name + const id = findChild(param, 'identifier'); + name = id ? id.text : param.text; + } + params.push({ name, kind: 'parameter', line: param.startPosition.row + 1 }); + } + return params; +} + +function extractRubyBodyConstants(containerNode) { + const children = []; + const body = containerNode.childForFieldName('body') || findChild(containerNode, 'body'); + if (!body) return children; + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (!child || child.type !== 'assignment') continue; + const left = child.childForFieldName('left'); + if (left && left.type === 'constant') { + children.push({ name: left.text, kind: 'constant', line: child.startPosition.row + 1 }); + } + } + return children; +} + +function extractRubyClassChildren(classNode) { + const children = []; + const body = classNode.childForFieldName('body') || findChild(classNode, 'body'); + if (!body) return children; + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (!child || child.type !== 'assignment') continue; + const left = child.childForFieldName('left'); + if (!left) continue; + if (left.type === 'instance_variable') { + children.push({ name: left.text, kind: 'property', line: child.startPosition.row + 1 }); + } else if (left.type === 'constant') { + children.push({ name: left.text, kind: 'constant', line: child.startPosition.row + 1 }); + } + } + return children; +} diff --git a/src/extractors/rust.js b/src/extractors/rust.js index 5a8d6225..2a013481 100644 --- a/src/extractors/rust.js +++ b/src/extractors/rust.js @@ -1,4 +1,4 @@ -import { nodeEndLine } from './helpers.js'; +import { findChild, nodeEndLine } from './helpers.js'; /** * Extract symbols from Rust files. @@ -30,11 +30,13 @@ export function extractRustSymbols(tree, _filePath) { const implType = findCurrentImpl(node); const fullName = implType ? `${implType}.${nameNode.text}` : nameNode.text; const kind = implType ? 'method' : 'function'; + const params = extractRustParameters(node.childForFieldName('parameters')); definitions.push({ name: fullName, kind, line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -43,11 +45,13 @@ export function extractRustSymbols(tree, _filePath) { case 'struct_item': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const fields = extractStructFields(node); definitions.push({ name: nameNode.text, kind: 'struct', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: fields.length > 0 ? fields : undefined, }); } break; @@ -56,11 +60,26 @@ export function extractRustSymbols(tree, _filePath) { case 'enum_item': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const variants = extractEnumVariants(node); definitions.push({ name: nameNode.text, kind: 'enum', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: variants.length > 0 ? variants : undefined, + }); + } + break; + } + + case 'const_item': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'constant', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), }); } break; @@ -170,6 +189,57 @@ export function extractRustSymbols(tree, _filePath) { return { definitions, calls, imports, classes, exports }; } +// ── Child extraction helpers ──────────────────────────────────────────────── + +function extractRustParameters(paramListNode) { + const params = []; + if (!paramListNode) return params; + for (let i = 0; i < paramListNode.childCount; i++) { + const param = paramListNode.child(i); + if (!param) continue; + if (param.type === 'self_parameter') { + params.push({ name: 'self', kind: 'parameter', line: param.startPosition.row + 1 }); + } else if (param.type === 'parameter') { + const pattern = param.childForFieldName('pattern'); + if (pattern) { + params.push({ name: pattern.text, kind: 'parameter', line: param.startPosition.row + 1 }); + } + } + } + return params; +} + +function extractStructFields(structNode) { + const fields = []; + const fieldList = + structNode.childForFieldName('body') || findChild(structNode, 'field_declaration_list'); + if (!fieldList) return fields; + for (let i = 0; i < fieldList.childCount; i++) { + const field = fieldList.child(i); + if (!field || field.type !== 'field_declaration') continue; + const nameNode = field.childForFieldName('name'); + if (nameNode) { + fields.push({ name: nameNode.text, kind: 'property', line: field.startPosition.row + 1 }); + } + } + return fields; +} + +function extractEnumVariants(enumNode) { + const variants = []; + const body = enumNode.childForFieldName('body') || findChild(enumNode, 'enum_variant_list'); + if (!body) return variants; + for (let i = 0; i < body.childCount; i++) { + const variant = body.child(i); + if (!variant || variant.type !== 'enum_variant') continue; + const nameNode = variant.childForFieldName('name'); + if (nameNode) { + variants.push({ name: nameNode.text, kind: 'constant', line: variant.startPosition.row + 1 }); + } + } + return variants; +} + function extractRustUsePath(node) { if (!node) return []; diff --git a/src/index.js b/src/index.js index 03be6853..973d2475 100644 --- a/src/index.js +++ b/src/index.js @@ -107,9 +107,13 @@ export { getActiveEngine, parseFileAuto, parseFilesAuto } from './parser.js'; // Query functions (data-returning) export { ALL_SYMBOL_KINDS, + CORE_SYMBOL_KINDS, + childrenData, contextData, diffImpactData, diffImpactMermaid, + EVERY_SYMBOL_KIND, + EXTENDED_SYMBOL_KINDS, explainData, FALSE_POSITIVE_CALLER_THRESHOLD, FALSE_POSITIVE_NAMES, diff --git a/src/mcp.js b/src/mcp.js index 405b09c2..d02cdf29 100644 --- a/src/mcp.js +++ b/src/mcp.js @@ -9,7 +9,7 @@ import { createRequire } from 'node:module'; import { findCycles } from './cycles.js'; import { findDbPath } from './db.js'; import { MCP_DEFAULTS, MCP_MAX_LIMIT } from './paginate.js'; -import { ALL_SYMBOL_KINDS, diffImpactMermaid, VALID_ROLES } from './queries.js'; +import { diffImpactMermaid, EVERY_SYMBOL_KIND, VALID_ROLES } from './queries.js'; const REPO_PROP = { repo: { @@ -47,7 +47,7 @@ const BASE_TOOLS = [ }, kind: { type: 'string', - enum: ALL_SYMBOL_KINDS, + enum: EVERY_SYMBOL_KIND, description: 'Filter by symbol kind', }, to: { type: 'string', description: 'Target symbol for path mode (required in path mode)' }, @@ -129,7 +129,7 @@ const BASE_TOOLS = [ }, kind: { type: 'string', - enum: ALL_SYMBOL_KINDS, + enum: EVERY_SYMBOL_KIND, description: 'Filter to a specific symbol kind', }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, @@ -157,7 +157,7 @@ const BASE_TOOLS = [ }, kind: { type: 'string', - enum: ALL_SYMBOL_KINDS, + enum: EVERY_SYMBOL_KIND, description: 'Filter to a specific symbol kind', }, no_source: { @@ -176,6 +176,22 @@ const BASE_TOOLS = [ required: ['name'], }, }, + { + name: 'symbol_children', + description: + 'List sub-declaration children of a symbol: parameters, properties, constants. Answers "what fields does this class have?" without reading source.', + inputSchema: { + type: 'object', + properties: { + name: { type: 'string', description: 'Function/method/class name (partial match)' }, + file: { type: 'string', description: 'Scope to file (partial match)' }, + kind: { type: 'string', enum: EVERY_SYMBOL_KIND, description: 'Filter by symbol kind' }, + no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + ...PAGINATION_PROPS, + }, + required: ['name'], + }, + }, { name: 'explain', description: @@ -394,7 +410,7 @@ const BASE_TOOLS = [ }, kind: { type: 'string', - enum: ALL_SYMBOL_KINDS, + enum: EVERY_SYMBOL_KIND, description: 'Filter to a specific symbol kind', }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, @@ -560,7 +576,7 @@ const BASE_TOOLS = [ }, kind: { type: 'string', - enum: ALL_SYMBOL_KINDS, + enum: EVERY_SYMBOL_KIND, description: 'Filter symbol kind', }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, @@ -639,7 +655,7 @@ const BASE_TOOLS = [ }, depth: { type: 'number', description: 'Max depth for impact mode', default: 5 }, file: { type: 'string', description: 'Scope to file (partial match)' }, - kind: { type: 'string', enum: ALL_SYMBOL_KINDS, description: 'Filter by symbol kind' }, + kind: { type: 'string', enum: EVERY_SYMBOL_KIND, description: 'Filter by symbol kind' }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, ...PAGINATION_PROPS, }, @@ -740,6 +756,7 @@ export async function startMCPServer(customDbPath, options = {}) { fnImpactData, pathData, contextData, + childrenData, explainData, whereData, diffImpactData, @@ -864,6 +881,15 @@ export async function startMCPServer(customDbPath, options = {}) { offset: args.offset ?? 0, }); break; + case 'symbol_children': + result = childrenData(args.name, dbPath, { + file: args.file, + kind: args.kind, + noTests: args.no_tests, + limit: Math.min(args.limit ?? MCP_DEFAULTS.context, MCP_MAX_LIMIT), + offset: args.offset ?? 0, + }); + break; case 'explain': result = explainData(args.target, dbPath, { noTests: args.no_tests, diff --git a/src/parser.js b/src/parser.js index f70e67c2..54eb0820 100644 --- a/src/parser.js +++ b/src/parser.js @@ -142,6 +142,14 @@ function normalizeNativeSymbols(result) { maintainabilityIndex: d.complexity.maintainabilityIndex ?? null, } : null, + children: d.children?.length + ? d.children.map((c) => ({ + name: c.name, + kind: c.kind, + line: c.line, + endLine: c.endLine ?? c.end_line ?? null, + })) + : undefined, })), calls: (result.calls || []).map((c) => ({ name: c.name, diff --git a/src/queries.js b/src/queries.js index e8874364..dc1fb1ad 100644 --- a/src/queries.js +++ b/src/queries.js @@ -59,7 +59,9 @@ export const FALSE_POSITIVE_NAMES = new Set([ export const FALSE_POSITIVE_CALLER_THRESHOLD = 20; const FUNCTION_KINDS = ['function', 'method', 'class']; -export const ALL_SYMBOL_KINDS = [ + +// Original 10 kinds — used as default query scope +export const CORE_SYMBOL_KINDS = [ 'function', 'method', 'class', @@ -72,6 +74,21 @@ export const ALL_SYMBOL_KINDS = [ 'module', ]; +// Sub-declaration kinds (Phase 1) +export const EXTENDED_SYMBOL_KINDS = [ + 'parameter', + 'property', + 'constant', + // Phase 2 (reserved, not yet extracted): + // 'constructor', 'namespace', 'decorator', 'getter', 'setter', +]; + +// Full set for --kind validation and MCP enum +export const EVERY_SYMBOL_KIND = [...CORE_SYMBOL_KINDS, ...EXTENDED_SYMBOL_KINDS]; + +// Backward compat: ALL_SYMBOL_KINDS stays as the core 10 +export const ALL_SYMBOL_KINDS = CORE_SYMBOL_KINDS; + export const VALID_ROLES = ['entry', 'core', 'utility', 'adapter', 'dead', 'leaf']; /** @@ -190,6 +207,12 @@ export function kindIcon(kind) { return 'I'; case 'type': return 'T'; + case 'parameter': + return 'p'; + case 'property': + return '.'; + case 'constant': + return 'C'; default: return '-'; } @@ -2224,6 +2247,17 @@ export function contextData(name, customDbPath, opts = {}) { /* table may not exist */ } + // Children (parameters, properties, constants) + let nodeChildren = []; + try { + nodeChildren = db + .prepare('SELECT name, kind, line, end_line FROM nodes WHERE parent_id = ? ORDER BY line') + .all(node.id) + .map((c) => ({ name: c.name, kind: c.kind, line: c.line, endLine: c.end_line || null })); + } catch { + /* parent_id column may not exist */ + } + return { name: node.name, kind: node.kind, @@ -2234,6 +2268,7 @@ export function contextData(name, customDbPath, opts = {}) { source, signature, complexity: complexityMetrics, + children: nodeChildren.length > 0 ? nodeChildren : undefined, callees, callers, relatedTests, @@ -2273,6 +2308,15 @@ export function context(name, customDbPath, opts = {}) { console.log(); } + // Children + if (r.children && r.children.length > 0) { + console.log(`## Children (${r.children.length})`); + for (const c of r.children) { + console.log(` ${kindIcon(c.kind)} ${c.name} :${c.line}`); + } + console.log(); + } + // Complexity if (r.complexity) { const cx = r.complexity; @@ -2345,6 +2389,69 @@ export function context(name, customDbPath, opts = {}) { } } +// ─── childrenData ─────────────────────────────────────────────────────── + +export function childrenData(name, customDbPath, opts = {}) { + const db = openReadonlyOrFail(customDbPath); + const noTests = opts.noTests || false; + + const nodes = findMatchingNodes(db, name, { noTests, file: opts.file, kind: opts.kind }); + if (nodes.length === 0) { + db.close(); + return { name, results: [] }; + } + + const results = nodes.map((node) => { + let children; + try { + children = db + .prepare('SELECT name, kind, line, end_line FROM nodes WHERE parent_id = ? ORDER BY line') + .all(node.id); + } catch { + children = []; + } + if (noTests) children = children.filter((c) => !isTestFile(c.file || node.file)); + return { + name: node.name, + kind: node.kind, + file: node.file, + line: node.line, + children: children.map((c) => ({ + name: c.name, + kind: c.kind, + line: c.line, + endLine: c.end_line || null, + })), + }; + }); + + db.close(); + const base = { name, results }; + return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset }); +} + +export function children(name, customDbPath, opts = {}) { + const data = childrenData(name, customDbPath, opts); + if (opts.json) { + console.log(JSON.stringify(data, null, 2)); + return; + } + if (data.results.length === 0) { + console.log(`No symbol matching "${name}"`); + return; + } + for (const r of data.results) { + console.log(`\n${kindIcon(r.kind)} ${r.name} ${r.file}:${r.line}`); + if (r.children.length === 0) { + console.log(' (no children)'); + } else { + for (const c of r.children) { + console.log(` ${kindIcon(c.kind)} ${c.name} :${c.line}`); + } + } + } +} + // ─── explainData ──────────────────────────────────────────────────────── function isFileLikeTarget(target) { diff --git a/tests/integration/build-parity.test.js b/tests/integration/build-parity.test.js index 94097e7f..5651a61b 100644 --- a/tests/integration/build-parity.test.js +++ b/tests/integration/build-parity.test.js @@ -76,9 +76,14 @@ describeOrSkip('Build parity: native vs WASM', () => { }); it('produces identical nodes', () => { + // Filter out extended kinds (parameter, property, constant) — WASM extracts + // these as children but native engine defers child extraction for now. + const EXTENDED = new Set(['parameter', 'property', 'constant']); + const filterCore = (nodes) => nodes.filter((n) => !EXTENDED.has(n.kind)); + const wasmGraph = readGraph(path.join(wasmDir, '.codegraph', 'graph.db')); const nativeGraph = readGraph(path.join(nativeDir, '.codegraph', 'graph.db')); - expect(nativeGraph.nodes).toEqual(wasmGraph.nodes); + expect(filterCore(nativeGraph.nodes)).toEqual(filterCore(wasmGraph.nodes)); }); it('produces identical edges', () => { diff --git a/tests/parsers/csharp.test.js b/tests/parsers/csharp.test.js index f49913d2..e8031262 100644 --- a/tests/parsers/csharp.test.js +++ b/tests/parsers/csharp.test.js @@ -108,7 +108,7 @@ public class Foo {}`); public string Name { get; set; } }`); expect(symbols.definitions).toContainEqual( - expect.objectContaining({ name: 'User.Name', kind: 'method' }), + expect.objectContaining({ name: 'User.Name', kind: 'property' }), ); }); }); diff --git a/tests/parsers/extended-kinds.test.js b/tests/parsers/extended-kinds.test.js new file mode 100644 index 00000000..266ac44a --- /dev/null +++ b/tests/parsers/extended-kinds.test.js @@ -0,0 +1,504 @@ +/** + * Extended kind extraction tests (parameters, properties, constants). + * + * Validates that each language extractor populates the `children` array + * on definitions with parameter, property, and constant entries. + */ +import { beforeAll, describe, expect, it } from 'vitest'; +import { + createParsers, + extractCSharpSymbols, + extractGoSymbols, + extractJavaSymbols, + extractPHPSymbols, + extractPythonSymbols, + extractRubySymbols, + extractRustSymbols, + extractSymbols, +} from '../../src/parser.js'; + +// ── JavaScript ────────────────────────────────────────────────────────────── + +describe('JavaScript extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseJS(code) { + const parser = parsers.get('javascript'); + const tree = parser.parse(code); + return extractSymbols(tree, 'test.js'); + } + + describe('parameter extraction', () => { + it('extracts parameters from function declarations', () => { + const symbols = parseJS('function greet(name, age) { }'); + const greet = symbols.definitions.find((d) => d.name === 'greet'); + expect(greet).toBeDefined(); + expect(greet.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'name', kind: 'parameter' }), + expect.objectContaining({ name: 'age', kind: 'parameter' }), + ]), + ); + }); + + it('extracts parameters from arrow functions', () => { + const symbols = parseJS('const add = (a, b) => a + b;'); + const add = symbols.definitions.find((d) => d.name === 'add'); + expect(add).toBeDefined(); + expect(add.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'a', kind: 'parameter' }), + expect.objectContaining({ name: 'b', kind: 'parameter' }), + ]), + ); + }); + + it('extracts parameters from class methods', () => { + const symbols = parseJS('class Foo { bar(x, y) {} }'); + const bar = symbols.definitions.find((d) => d.name === 'Foo.bar'); + expect(bar).toBeDefined(); + expect(bar.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'x', kind: 'parameter' }), + expect.objectContaining({ name: 'y', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts class field properties', () => { + const symbols = parseJS('class User { name; age; greet() {} }'); + const user = symbols.definitions.find((d) => d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'name', kind: 'property' }), + expect.objectContaining({ name: 'age', kind: 'property' }), + ]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts constant definitions from const declarations', () => { + const symbols = parseJS('const MAX = 100;'); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'MAX', kind: 'constant' }), + ); + }); + }); +}); + +// ── Python ────────────────────────────────────────────────────────────────── + +describe('Python extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parsePython(code) { + const parser = parsers.get('python'); + if (!parser) throw new Error('Python parser not available'); + const tree = parser.parse(code); + return extractPythonSymbols(tree, 'test.py'); + } + + describe('parameter extraction', () => { + it('extracts parameters from function definitions', () => { + const symbols = parsePython('def greet(name, age=30):\n pass'); + const greet = symbols.definitions.find((d) => d.name === 'greet'); + expect(greet).toBeDefined(); + expect(greet.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'name', kind: 'parameter' }), + expect.objectContaining({ name: 'age', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts properties from __init__ self assignments', () => { + const symbols = parsePython( + ['class User:', ' def __init__(self, x, y):', ' self.x = x', ' self.y = y'].join( + '\n', + ), + ); + const user = symbols.definitions.find((d) => d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'x', kind: 'property' }), + expect.objectContaining({ name: 'y', kind: 'property' }), + ]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts module-level UPPER_CASE constants', () => { + const symbols = parsePython('MAX_RETRIES = 3'); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'MAX_RETRIES', kind: 'constant' }), + ); + }); + }); +}); + +// ── Go ────────────────────────────────────────────────────────────────────── + +describe('Go extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseGo(code) { + const parser = parsers.get('go'); + if (!parser) throw new Error('Go parser not available'); + const tree = parser.parse(code); + return extractGoSymbols(tree, 'test.go'); + } + + describe('parameter extraction', () => { + it('extracts parameters from function declarations', () => { + const symbols = parseGo('package main\nfunc add(a int, b int) int { return a + b }'); + const add = symbols.definitions.find((d) => d.name === 'add'); + expect(add).toBeDefined(); + expect(add.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'a', kind: 'parameter' }), + expect.objectContaining({ name: 'b', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts struct fields as properties', () => { + const symbols = parseGo('package main\ntype User struct {\n Name string\n Age int\n}'); + const user = symbols.definitions.find((d) => d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'Name', kind: 'property' }), + expect.objectContaining({ name: 'Age', kind: 'property' }), + ]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts const declarations', () => { + const symbols = parseGo('package main\nconst MaxRetries = 3'); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'MaxRetries', kind: 'constant' }), + ); + }); + }); +}); + +// ── Rust ───────────────────────────────────────────────────────────────────── + +describe('Rust extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseRust(code) { + const parser = parsers.get('rust'); + if (!parser) throw new Error('Rust parser not available'); + const tree = parser.parse(code); + return extractRustSymbols(tree, 'test.rs'); + } + + describe('parameter extraction', () => { + it('extracts parameters from function declarations', () => { + const symbols = parseRust('fn add(a: i32, b: i32) -> i32 { a + b }'); + const add = symbols.definitions.find((d) => d.name === 'add'); + expect(add).toBeDefined(); + expect(add.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'a', kind: 'parameter' }), + expect.objectContaining({ name: 'b', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts struct fields as properties', () => { + const symbols = parseRust('struct User { name: String, age: u32 }'); + const user = symbols.definitions.find((d) => d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'name', kind: 'property' }), + expect.objectContaining({ name: 'age', kind: 'property' }), + ]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts const item declarations', () => { + const symbols = parseRust('const MAX: i32 = 100;'); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'MAX', kind: 'constant' }), + ); + }); + + it('extracts enum variants as constant children', () => { + const symbols = parseRust('enum Color { Red, Green, Blue }'); + const color = symbols.definitions.find((d) => d.name === 'Color'); + expect(color).toBeDefined(); + expect(color.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'Red', kind: 'constant' }), + expect.objectContaining({ name: 'Green', kind: 'constant' }), + expect.objectContaining({ name: 'Blue', kind: 'constant' }), + ]), + ); + }); + }); +}); + +// ── Java ───────────────────────────────────────────────────────────────────── + +describe('Java extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseJava(code) { + const parser = parsers.get('java'); + if (!parser) throw new Error('Java parser not available'); + const tree = parser.parse(code); + return extractJavaSymbols(tree, 'Test.java'); + } + + describe('parameter extraction', () => { + it('extracts method parameters', () => { + const symbols = parseJava('class Foo { void bar(int x, String y) {} }'); + const bar = symbols.definitions.find((d) => d.name === 'Foo.bar'); + expect(bar).toBeDefined(); + expect(bar.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'x', kind: 'parameter' }), + expect.objectContaining({ name: 'y', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts class field declarations as properties', () => { + const symbols = parseJava('class User { String name; int age; }'); + const user = symbols.definitions.find((d) => d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'name', kind: 'property' }), + expect.objectContaining({ name: 'age', kind: 'property' }), + ]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts enum constants as children', () => { + const symbols = parseJava('enum Status { ACTIVE, INACTIVE }'); + const status = symbols.definitions.find((d) => d.name === 'Status'); + expect(status).toBeDefined(); + expect(status.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'ACTIVE', kind: 'constant' }), + expect.objectContaining({ name: 'INACTIVE', kind: 'constant' }), + ]), + ); + }); + }); +}); + +// ── C# ────────────────────────────────────────────────────────────────────── + +describe('C# extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseCSharp(code) { + const parser = parsers.get('csharp'); + if (!parser) throw new Error('C# parser not available'); + const tree = parser.parse(code); + return extractCSharpSymbols(tree, 'Test.cs'); + } + + describe('parameter extraction', () => { + it('extracts method parameters', () => { + const symbols = parseCSharp('class Foo { void Bar(int x, string y) {} }'); + const bar = symbols.definitions.find((d) => d.name === 'Foo.Bar'); + expect(bar).toBeDefined(); + expect(bar.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'x', kind: 'parameter' }), + expect.objectContaining({ name: 'y', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts class field declarations as properties', () => { + const symbols = parseCSharp('class User { string Name; int Age; }'); + const user = symbols.definitions.find((d) => d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'Name', kind: 'property' }), + expect.objectContaining({ name: 'Age', kind: 'property' }), + ]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts enum member declarations as constants', () => { + const symbols = parseCSharp('enum Status { Active, Inactive }'); + const status = symbols.definitions.find((d) => d.name === 'Status'); + expect(status).toBeDefined(); + expect(status.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'Active', kind: 'constant' }), + expect.objectContaining({ name: 'Inactive', kind: 'constant' }), + ]), + ); + }); + }); +}); + +// ── Ruby ───────────────────────────────────────────────────────────────────── + +describe('Ruby extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseRuby(code) { + const parser = parsers.get('ruby'); + if (!parser) throw new Error('Ruby parser not available'); + const tree = parser.parse(code); + return extractRubySymbols(tree, 'test.rb'); + } + + describe('parameter extraction', () => { + it('extracts method parameters', () => { + const symbols = parseRuby('def greet(name, age)\nend'); + const greet = symbols.definitions.find((d) => d.name === 'greet'); + expect(greet).toBeDefined(); + expect(greet.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'name', kind: 'parameter' }), + expect.objectContaining({ name: 'age', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts instance variable assignments as properties', () => { + const symbols = parseRuby('class User\n @name = nil\nend'); + const user = symbols.definitions.find((d) => d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([expect.objectContaining({ name: '@name', kind: 'property' })]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts class-level constant assignments', () => { + const symbols = parseRuby('class Foo\n MAX = 100\nend'); + const foo = symbols.definitions.find((d) => d.name === 'Foo'); + expect(foo).toBeDefined(); + expect(foo.children).toEqual( + expect.arrayContaining([expect.objectContaining({ name: 'MAX', kind: 'constant' })]), + ); + }); + }); +}); + +// ── PHP ────────────────────────────────────────────────────────────────────── + +describe('PHP extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parsePHP(code) { + const parser = parsers.get('php'); + if (!parser) throw new Error('PHP parser not available'); + const tree = parser.parse(code); + return extractPHPSymbols(tree, 'test.php'); + } + + describe('parameter extraction', () => { + it('extracts function parameters', () => { + const symbols = parsePHP(' d.name === 'greet'); + expect(greet).toBeDefined(); + expect(greet.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: '$name', kind: 'parameter' }), + expect.objectContaining({ name: '$age', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts class property declarations', () => { + const symbols = parsePHP(' d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: '$name', kind: 'property' }), + expect.objectContaining({ name: '$age', kind: 'property' }), + ]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts enum case declarations as constants', () => { + const symbols = parsePHP(' d.name === 'Status'); + expect(status).toBeDefined(); + expect(status.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'Active', kind: 'constant' }), + expect.objectContaining({ name: 'Inactive', kind: 'constant' }), + ]), + ); + }); + }); +}); diff --git a/tests/unit/mcp.test.js b/tests/unit/mcp.test.js index fc610c4b..3b38f590 100644 --- a/tests/unit/mcp.test.js +++ b/tests/unit/mcp.test.js @@ -16,6 +16,7 @@ const ALL_TOOL_NAMES = [ 'module_map', 'fn_impact', 'context', + 'symbol_children', 'explain', 'where', 'diff_impact', @@ -249,6 +250,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(() => ({ name: 'test', results: [] })), fnImpactData: vi.fn(() => ({ name: 'test', results: [] })), contextData: vi.fn(() => ({ name: 'test', results: [] })), + childrenData: vi.fn(() => ({ name: 'test', results: [] })), explainData: vi.fn(() => ({ target: 'test', kind: 'function', results: [] })), whereData: vi.fn(() => ({ target: 'test', mode: 'symbol', results: [] })), diffImpactData: vi.fn(() => ({ changedFiles: 0, affectedFunctions: [] })), @@ -312,6 +314,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: fnDepsMock, fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -371,6 +374,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: fnImpactMock, contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -427,6 +431,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: diffImpactMock, @@ -486,6 +491,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -546,6 +552,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: fnDepsMock, fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -604,6 +611,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -656,6 +664,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -710,6 +719,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: fnDepsMock, fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -774,6 +784,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -831,6 +842,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -879,6 +891,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -927,6 +940,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -975,6 +989,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -1024,6 +1039,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), From 3802b9f4b257d76d7b44cb3cd8d55071ede8a5d1 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 20:03:10 -0700 Subject: [PATCH 03/12] =?UTF-8?q?feat:=20add=20expanded=20edge=20types=20?= =?UTF-8?q?=E2=80=94=20contains,=20parameter=5Fof,=20receiver=20(Phase=202?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Build file→definition and parent→child contains edges, parameter_of inverse edges, and receiver edges for method-call dispatch. Add CORE_EDGE_KINDS, STRUCTURAL_EDGE_KINDS, EVERY_EDGE_KIND constants. Exclude structural edges from moduleMapData coupling counts. Scope directory contains-edge cleanup to preserve symbol-level edges. Impact: 3 functions changed, 22 affected --- src/builder.js | 62 ++++++++++++++---- src/index.js | 3 + src/mcp.js | 4 +- src/queries.js | 24 ++++++- src/structure.js | 5 +- tests/integration/build-parity.test.js | 25 +++++++- tests/integration/queries.test.js | 87 +++++++++++++++++++++++++- 7 files changed, 187 insertions(+), 23 deletions(-) diff --git a/src/builder.js b/src/builder.js index 7a916647..79fd9d47 100644 --- a/src/builder.js +++ b/src/builder.js @@ -598,20 +598,32 @@ export async function buildGraph(rootDir, opts = {}) { fileSymbols.set(relPath, symbols); insertNode.run(relPath, 'file', relPath, 0, null, null); + const fileRow = getNodeId.get(relPath, 'file', relPath, 0); for (const def of symbols.definitions) { insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null, null); - if (def.children?.length) { - const parentRow = getNodeId.get(def.name, def.kind, relPath, def.line); - if (parentRow) { - for (const child of def.children) { - insertNode.run( - child.name, - child.kind, - relPath, - child.line, - child.endLine || null, - parentRow.id, - ); + const defRow = getNodeId.get(def.name, def.kind, relPath, def.line); + // File → top-level definition contains edge + if (fileRow && defRow) { + insertEdge.run(fileRow.id, defRow.id, 'contains', 1.0, 0); + } + if (def.children?.length && defRow) { + for (const child of def.children) { + insertNode.run( + child.name, + child.kind, + relPath, + child.line, + child.endLine || null, + defRow.id, + ); + // Parent → child contains edge + const childRow = getNodeId.get(child.name, child.kind, relPath, child.line); + if (childRow) { + insertEdge.run(defRow.id, childRow.id, 'contains', 1.0, 0); + // Parameter → parent parameter_of edge (inverse direction) + if (child.kind === 'parameter') { + insertEdge.run(childRow.id, defRow.id, 'parameter_of', 1.0, 0); + } } } } @@ -797,7 +809,7 @@ export async function buildGraph(rootDir, opts = {}) { // N+1 optimization: pre-load all nodes into a lookup map for edge building const allNodes = db .prepare( - `SELECT id, name, kind, file FROM nodes WHERE kind IN ('function','method','class','interface')`, + `SELECT id, name, kind, file FROM nodes WHERE kind IN ('function','method','class','interface','struct','type','module','enum','trait')`, ) .all(); const nodesByName = new Map(); @@ -956,6 +968,30 @@ export async function buildGraph(rootDir, opts = {}) { edgeCount++; } } + + // Receiver edge: caller → receiver type node + if ( + call.receiver && + !BUILTIN_RECEIVERS.has(call.receiver) && + call.receiver !== 'this' && + call.receiver !== 'self' && + call.receiver !== 'super' + ) { + const receiverKinds = new Set(['class', 'struct', 'interface', 'type', 'module']); + // Same-file first, then global + const samefile = nodesByNameAndFile.get(`${call.receiver}|${relPath}`) || []; + const candidates = samefile.length > 0 ? samefile : nodesByName.get(call.receiver) || []; + const receiverNodes = candidates.filter((n) => receiverKinds.has(n.kind)); + if (receiverNodes.length > 0 && caller) { + const recvTarget = receiverNodes[0]; + const recvKey = `recv|${caller.id}|${recvTarget.id}`; + if (!seenCallEdges.has(recvKey)) { + seenCallEdges.add(recvKey); + insertEdge.run(caller.id, recvTarget.id, 'receiver', 0.7, 0); + edgeCount++; + } + } + } } // Class extends edges (use pre-loaded maps instead of inline DB queries) diff --git a/src/index.js b/src/index.js index 973d2475..6774d54b 100644 --- a/src/index.js +++ b/src/index.js @@ -107,11 +107,13 @@ export { getActiveEngine, parseFileAuto, parseFilesAuto } from './parser.js'; // Query functions (data-returning) export { ALL_SYMBOL_KINDS, + CORE_EDGE_KINDS, CORE_SYMBOL_KINDS, childrenData, contextData, diffImpactData, diffImpactMermaid, + EVERY_EDGE_KIND, EVERY_SYMBOL_KIND, EXTENDED_SYMBOL_KINDS, explainData, @@ -130,6 +132,7 @@ export { pathData, queryNameData, rolesData, + STRUCTURAL_EDGE_KINDS, statsData, VALID_ROLES, whereData, diff --git a/src/mcp.js b/src/mcp.js index d02cdf29..cd0b8808 100644 --- a/src/mcp.js +++ b/src/mcp.js @@ -9,7 +9,7 @@ import { createRequire } from 'node:module'; import { findCycles } from './cycles.js'; import { findDbPath } from './db.js'; import { MCP_DEFAULTS, MCP_MAX_LIMIT } from './paginate.js'; -import { diffImpactMermaid, EVERY_SYMBOL_KIND, VALID_ROLES } from './queries.js'; +import { diffImpactMermaid, EVERY_EDGE_KIND, EVERY_SYMBOL_KIND, VALID_ROLES } from './queries.js'; const REPO_PROP = { repo: { @@ -53,7 +53,7 @@ const BASE_TOOLS = [ to: { type: 'string', description: 'Target symbol for path mode (required in path mode)' }, edge_kinds: { type: 'array', - items: { type: 'string' }, + items: { type: 'string', enum: EVERY_EDGE_KIND }, description: 'Edge kinds to follow in path mode (default: ["calls"])', }, reverse: { diff --git a/src/queries.js b/src/queries.js index dc1fb1ad..6d094108 100644 --- a/src/queries.js +++ b/src/queries.js @@ -89,6 +89,24 @@ export const EVERY_SYMBOL_KIND = [...CORE_SYMBOL_KINDS, ...EXTENDED_SYMBOL_KINDS // Backward compat: ALL_SYMBOL_KINDS stays as the core 10 export const ALL_SYMBOL_KINDS = CORE_SYMBOL_KINDS; +// ── Edge kind constants ───────────────────────────────────────────── +// Core edge kinds — coupling and dependency relationships +export const CORE_EDGE_KINDS = [ + 'imports', + 'imports-type', + 'reexports', + 'calls', + 'extends', + 'implements', + 'contains', +]; + +// Structural edge kinds — parent/child and type relationships +export const STRUCTURAL_EDGE_KINDS = ['parameter_of', 'receiver']; + +// Full set for MCP enum and validation +export const EVERY_EDGE_KIND = [...CORE_EDGE_KINDS, ...STRUCTURAL_EDGE_KINDS]; + export const VALID_ROLES = ['entry', 'core', 'utility', 'adapter', 'dead', 'leaf']; /** @@ -348,12 +366,12 @@ export function moduleMapData(customDbPath, limit = 20, opts = {}) { const nodes = db .prepare(` SELECT n.*, - (SELECT COUNT(*) FROM edges WHERE source_id = n.id AND kind != 'contains') as out_edges, - (SELECT COUNT(*) FROM edges WHERE target_id = n.id AND kind != 'contains') as in_edges + (SELECT COUNT(*) FROM edges WHERE source_id = n.id AND kind NOT IN ('contains', 'parameter_of', 'receiver')) as out_edges, + (SELECT COUNT(*) FROM edges WHERE target_id = n.id AND kind NOT IN ('contains', 'parameter_of', 'receiver')) as in_edges FROM nodes n WHERE n.kind = 'file' ${testFilter} - ORDER BY (SELECT COUNT(*) FROM edges WHERE target_id = n.id AND kind != 'contains') DESC + ORDER BY (SELECT COUNT(*) FROM edges WHERE target_id = n.id AND kind NOT IN ('contains', 'parameter_of', 'receiver')) DESC LIMIT ? `) .all(limit); diff --git a/src/structure.js b/src/structure.js index a4c28f41..6169795d 100644 --- a/src/structure.js +++ b/src/structure.js @@ -34,8 +34,11 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director `); // Clean previous directory nodes/edges (idempotent rebuild) + // Scope contains-edge delete to directory-sourced edges only, + // preserving symbol-level contains edges (file→def, class→method, etc.) db.exec(` - DELETE FROM edges WHERE kind = 'contains'; + DELETE FROM edges WHERE kind = 'contains' + AND source_id IN (SELECT id FROM nodes WHERE kind = 'directory'); DELETE FROM node_metrics; DELETE FROM nodes WHERE kind = 'directory'; `); diff --git a/tests/integration/build-parity.test.js b/tests/integration/build-parity.test.js index 5651a61b..7811f6df 100644 --- a/tests/integration/build-parity.test.js +++ b/tests/integration/build-parity.test.js @@ -87,8 +87,27 @@ describeOrSkip('Build parity: native vs WASM', () => { }); it('produces identical edges', () => { - const wasmGraph = readGraph(path.join(wasmDir, '.codegraph', 'graph.db')); - const nativeGraph = readGraph(path.join(nativeDir, '.codegraph', 'graph.db')); - expect(nativeGraph.edges).toEqual(wasmGraph.edges); + // Filter out edges involving extended-kind nodes (parameter, property, constant) + // — WASM extracts children but native engine defers child extraction for now. + function readCoreEdges(dbPath) { + const db = new Database(dbPath, { readonly: true }); + const edges = db + .prepare(` + SELECT n1.name AS source_name, n2.name AS target_name, e.kind + FROM edges e + JOIN nodes n1 ON e.source_id = n1.id + JOIN nodes n2 ON e.target_id = n2.id + WHERE n1.kind NOT IN ('parameter', 'property', 'constant') + AND n2.kind NOT IN ('parameter', 'property', 'constant') + ORDER BY n1.name, n2.name, e.kind + `) + .all(); + db.close(); + return edges; + } + + const wasmEdges = readCoreEdges(path.join(wasmDir, '.codegraph', 'graph.db')); + const nativeEdges = readCoreEdges(path.join(nativeDir, '.codegraph', 'graph.db')); + expect(nativeEdges).toEqual(wasmEdges); }); }); diff --git a/tests/integration/queries.test.js b/tests/integration/queries.test.js index 0bb3b7dc..af288060 100644 --- a/tests/integration/queries.test.js +++ b/tests/integration/queries.test.js @@ -103,6 +103,24 @@ beforeAll(() => { // Low-confidence call edge for quality tests insertEdge(db, formatResponse, validateToken, 'calls', 0.3); + // ── Phase 2: expanded node/edge types ────────────────────────────── + // Class with method and property children + const userService = insertNode(db, 'UserService', 'class', 'auth.js', 40); + const getUser = insertNode(db, 'UserService.getUser', 'method', 'auth.js', 42); + const dbConn = insertNode(db, 'dbConn', 'property', 'auth.js', 41); + const userId = insertNode(db, 'userId', 'parameter', 'auth.js', 10); + + // Symbol-level contains edges (file → class, class → method/property) + insertEdge(db, fAuth, userService, 'contains'); + insertEdge(db, userService, getUser, 'contains'); + insertEdge(db, userService, dbConn, 'contains'); + + // parameter_of edge (parameter → owning function) + insertEdge(db, userId, authenticate, 'parameter_of'); + + // receiver edge (caller → receiver type) + insertEdge(db, handleRoute, userService, 'receiver', 0.7); + // File hashes (for fileHash exposure) for (const f of ['auth.js', 'middleware.js', 'routes.js', 'utils.js', 'auth.test.js']) { db.prepare('INSERT INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)').run( @@ -448,7 +466,7 @@ describe('explainData', () => { const r = data.results[0]; expect(r.file).toBe('auth.js'); - expect(r.symbolCount).toBe(2); + expect(r.symbolCount).toBe(6); // Both authenticate and validateToken are called from middleware.js expect(r.publicApi.map((s) => s.name)).toContain('authenticate'); expect(r.publicApi.map((s) => s.name)).toContain('validateToken'); @@ -661,6 +679,73 @@ describe('noTests filtering', () => { }); }); +// ─── Expanded edge types (Phase 2) ───────────────────────────────────── + +describe('expanded edge types', () => { + test('statsData counts new edge kinds', () => { + const data = statsData(dbPath); + expect(data.edges.byKind.contains).toBeGreaterThanOrEqual(3); + expect(data.edges.byKind.parameter_of).toBeGreaterThanOrEqual(1); + expect(data.edges.byKind.receiver).toBeGreaterThanOrEqual(1); + }); + + test('moduleMapData excludes structural edges from coupling', () => { + const data = moduleMapData(dbPath); + // auth.js has contains, parameter_of, receiver edges but they should + // not inflate coupling counts — only imports/calls/etc. count + const authNode = data.topNodes.find((n) => n.file === 'auth.js'); + expect(authNode).toBeDefined(); + // in_edges should not include contains/parameter_of/receiver + // auth.js is imported by middleware.js and auth.test.js → in_edges = 2 + expect(authNode.inEdges).toBe(2); + }); + + test('queryNameData returns new edge kinds in callers/callees', () => { + // authenticate has a parameter_of edge from userId + const authData = queryNameData('authenticate', dbPath); + const fn = authData.results.find((r) => r.kind === 'function' && r.name === 'authenticate'); + expect(fn).toBeDefined(); + const paramCaller = fn.callers.find((c) => c.edgeKind === 'parameter_of'); + expect(paramCaller).toBeDefined(); + expect(paramCaller.name).toBe('userId'); + + // UserService has contains callees (method and property) + const usData = queryNameData('UserService', dbPath); + const cls = usData.results.find((r) => r.kind === 'class' && r.name === 'UserService'); + expect(cls).toBeDefined(); + const containsCallees = cls.callees.filter((c) => c.edgeKind === 'contains'); + expect(containsCallees.length).toBeGreaterThanOrEqual(2); + const names = containsCallees.map((c) => c.name); + expect(names).toContain('UserService.getUser'); + expect(names).toContain('dbConn'); + + // UserService has a receiver caller (handleRoute) + const receiverCaller = cls.callers.find((c) => c.edgeKind === 'receiver'); + expect(receiverCaller).toBeDefined(); + expect(receiverCaller.name).toBe('handleRoute'); + }); + + test('pathData traverses contains edges', () => { + const data = pathData('UserService', 'UserService.getUser', dbPath, { + edgeKinds: ['contains'], + }); + expect(data.found).toBe(true); + expect(data.hops).toBe(1); + expect(data.path[0].name).toBe('UserService'); + expect(data.path[1].name).toBe('UserService.getUser'); + expect(data.path[1].edgeKind).toBe('contains'); + }); + + test('pathData traverses receiver edges', () => { + const data = pathData('handleRoute', 'UserService', dbPath, { + edgeKinds: ['receiver'], + }); + expect(data.found).toBe(true); + expect(data.hops).toBe(1); + expect(data.path[1].edgeKind).toBe('receiver'); + }); +}); + // ─── Stable symbol schema conformance ────────────────────────────────── const STABLE_FIELDS = ['name', 'kind', 'file', 'line', 'endLine', 'role', 'fileHash']; From 7633f1d3d10aaac9ffe5dc70bec586deae38f7a4 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 20:24:04 -0700 Subject: [PATCH 04/12] feat: add intraprocedural control flow graph (CFG) construction Add opt-in CFG analysis that builds basic-block control flow graphs from tree-sitter AST for individual functions. Enables complexity-aware impact analysis and opens the path to dataflow (def-use chains). - DB migration v12: cfg_blocks + cfg_edges tables - New src/cfg.js module: CFG_RULES, buildFunctionCFG, buildCFGData, cfgData, cfgToDOT, cfgToMermaid, cfg CLI printer - Builder integration: --cfg flag triggers CFG after complexity pass - CLI: `cfg ` command with --format text/dot/mermaid, -j, --ndjson - MCP: cfg tool with name, format, file, kind, pagination props - Exports findFunctionNode from complexity.js for reuse - 24 unit tests + 11 integration tests (35 total) Phase 1: JS/TS/TSX only. Handles if/else, for/while/do-while, switch, try/catch/finally, break/continue (with labels), return/throw. Impact: 27 functions changed, 36 affected --- src/builder.js | 13 + src/cfg.js | 1035 +++++++++++++++++++++++++++++++++ src/cli.js | 39 +- src/complexity.js | 2 +- src/db.js | 31 + src/index.js | 11 + src/mcp.js | 38 ++ tests/integration/cfg.test.js | 199 +++++++ tests/unit/cfg.test.js | 457 +++++++++++++++ tests/unit/mcp.test.js | 1 + 10 files changed, 1824 insertions(+), 2 deletions(-) create mode 100644 src/cfg.js create mode 100644 tests/integration/cfg.test.js create mode 100644 tests/unit/cfg.test.js diff --git a/src/builder.js b/src/builder.js index 79fd9d47..6ceec39e 100644 --- a/src/builder.js +++ b/src/builder.js @@ -1139,6 +1139,18 @@ export async function buildGraph(rootDir, opts = {}) { } _t.complexityMs = performance.now() - _t.complexity0; + // Opt-in CFG analysis (--cfg) + if (opts.cfg) { + _t.cfg0 = performance.now(); + try { + const { buildCFGData } = await import('./cfg.js'); + await buildCFGData(db, allSymbols, rootDir, engineOpts); + } catch (err) { + debug(`CFG analysis failed: ${err.message}`); + } + _t.cfgMs = performance.now() - _t.cfg0; + } + // Opt-in dataflow analysis (--dataflow) if (opts.dataflow) { _t.dataflow0 = performance.now(); @@ -1241,6 +1253,7 @@ export async function buildGraph(rootDir, opts = {}) { structureMs: +_t.structureMs.toFixed(1), rolesMs: +_t.rolesMs.toFixed(1), complexityMs: +_t.complexityMs.toFixed(1), + ...(_t.cfgMs != null && { cfgMs: +_t.cfgMs.toFixed(1) }), }, }; } diff --git a/src/cfg.js b/src/cfg.js new file mode 100644 index 00000000..0e6e49be --- /dev/null +++ b/src/cfg.js @@ -0,0 +1,1035 @@ +/** + * Intraprocedural Control Flow Graph (CFG) construction from tree-sitter AST. + * + * Builds basic-block CFGs for individual functions, stored in cfg_blocks + cfg_edges tables. + * Opt-in via `build --cfg`. JS/TS/TSX only for Phase 1. + */ + +import fs from 'node:fs'; +import path from 'node:path'; +import { COMPLEXITY_RULES } from './complexity.js'; +import { openReadonlyOrFail } from './db.js'; +import { info } from './logger.js'; +import { paginateResult, printNdjson } from './paginate.js'; +import { LANGUAGE_REGISTRY } from './parser.js'; +import { isTestFile } from './queries.js'; + +// ─── CFG Node Type Rules (extends COMPLEXITY_RULES) ────────────────────── + +const JS_TS_CFG = { + ifNode: 'if_statement', + elseClause: 'else_clause', + forNodes: new Set(['for_statement', 'for_in_statement']), + whileNode: 'while_statement', + doNode: 'do_statement', + switchNode: 'switch_statement', + caseNode: 'switch_case', + defaultNode: 'switch_default', + tryNode: 'try_statement', + catchNode: 'catch_clause', + finallyNode: 'finally_clause', + returnNode: 'return_statement', + throwNode: 'throw_statement', + breakNode: 'break_statement', + continueNode: 'continue_statement', + blockNode: 'statement_block', + labeledNode: 'labeled_statement', + functionNodes: new Set([ + 'function_declaration', + 'function_expression', + 'arrow_function', + 'method_definition', + 'generator_function', + 'generator_function_declaration', + ]), +}; + +export const CFG_RULES = new Map([ + ['javascript', JS_TS_CFG], + ['typescript', JS_TS_CFG], + ['tsx', JS_TS_CFG], +]); + +// Language IDs that support CFG (Phase 1: JS/TS/TSX only) +const CFG_LANG_IDS = new Set(['javascript', 'typescript', 'tsx']); + +// JS/TS extensions +const CFG_EXTENSIONS = new Set(); +for (const entry of LANGUAGE_REGISTRY) { + if (CFG_LANG_IDS.has(entry.id)) { + for (const ext of entry.extensions) CFG_EXTENSIONS.add(ext); + } +} + +// ─── Core Algorithm: AST → CFG ────────────────────────────────────────── + +/** + * Build a control flow graph for a single function AST node. + * + * @param {object} functionNode - tree-sitter function AST node + * @param {string} langId - language identifier (javascript, typescript, tsx) + * @returns {{ blocks: object[], edges: object[] }} - CFG blocks and edges + */ +export function buildFunctionCFG(functionNode, langId) { + const rules = CFG_RULES.get(langId); + if (!rules) return { blocks: [], edges: [] }; + + const blocks = []; + const edges = []; + let nextIndex = 0; + + function makeBlock(type, startLine = null, endLine = null, label = null) { + const block = { + index: nextIndex++, + type, + startLine, + endLine, + label, + }; + blocks.push(block); + return block; + } + + function addEdge(source, target, kind) { + edges.push({ + sourceIndex: source.index, + targetIndex: target.index, + kind, + }); + } + + const entryBlock = makeBlock('entry'); + const exitBlock = makeBlock('exit'); + + // Loop context stack for break/continue resolution + const loopStack = []; + + // Label map for labeled break/continue + const labelMap = new Map(); + + /** + * Get the body node of a function (handles arrow functions with expression bodies). + */ + function getFunctionBody(fnNode) { + const body = fnNode.childForFieldName('body'); + if (!body) return null; + return body; + } + + /** + * Get statement children from a block or statement list. + */ + function getStatements(node) { + if (!node) return []; + // statement_block: get named children + if (node.type === rules.blockNode) { + const stmts = []; + for (let i = 0; i < node.namedChildCount; i++) { + stmts.push(node.namedChild(i)); + } + return stmts; + } + // Single statement (e.g., arrow fn with expression body, or unbraced if body) + return [node]; + } + + /** + * Process a list of statements, creating blocks and edges. + * Returns the last "current" block after processing, or null if all paths terminated. + */ + function processStatements(stmts, currentBlock) { + let cur = currentBlock; + + for (const stmt of stmts) { + if (!cur) { + // Dead code after return/break/continue/throw — skip remaining + break; + } + cur = processStatement(stmt, cur); + } + + return cur; + } + + /** + * Process a single statement, returns the new current block or null if terminated. + */ + function processStatement(stmt, currentBlock) { + if (!stmt || !currentBlock) return currentBlock; + + const type = stmt.type; + + // Labeled statement: register label then process inner statement + if (type === rules.labeledNode) { + const labelNode = stmt.childForFieldName('label'); + const labelName = labelNode ? labelNode.text : null; + const body = stmt.childForFieldName('body'); + if (body && labelName) { + // Will be filled when we encounter the loop + const labelCtx = { headerBlock: null, exitBlock: null }; + labelMap.set(labelName, labelCtx); + const result = processStatement(body, currentBlock); + labelMap.delete(labelName); + return result; + } + return currentBlock; + } + + // If statement + if (type === rules.ifNode) { + return processIf(stmt, currentBlock); + } + + // For / for-in loops + if (rules.forNodes.has(type)) { + return processForLoop(stmt, currentBlock); + } + + // While loop + if (type === rules.whileNode) { + return processWhileLoop(stmt, currentBlock); + } + + // Do-while loop + if (type === rules.doNode) { + return processDoWhileLoop(stmt, currentBlock); + } + + // Switch statement + if (type === rules.switchNode) { + return processSwitch(stmt, currentBlock); + } + + // Try/catch/finally + if (type === rules.tryNode) { + return processTryCatch(stmt, currentBlock); + } + + // Return statement + if (type === rules.returnNode) { + currentBlock.endLine = stmt.startPosition.row + 1; + addEdge(currentBlock, exitBlock, 'return'); + return null; // path terminated + } + + // Throw statement + if (type === rules.throwNode) { + currentBlock.endLine = stmt.startPosition.row + 1; + addEdge(currentBlock, exitBlock, 'exception'); + return null; // path terminated + } + + // Break statement + if (type === rules.breakNode) { + const labelNode = stmt.childForFieldName('label'); + const labelName = labelNode ? labelNode.text : null; + + let target = null; + if (labelName && labelMap.has(labelName)) { + target = labelMap.get(labelName).exitBlock; + } else if (loopStack.length > 0) { + target = loopStack[loopStack.length - 1].exitBlock; + } + + if (target) { + currentBlock.endLine = stmt.startPosition.row + 1; + addEdge(currentBlock, target, 'break'); + return null; // path terminated + } + // break outside loop (switch case) — just continue + return currentBlock; + } + + // Continue statement + if (type === rules.continueNode) { + const labelNode = stmt.childForFieldName('label'); + const labelName = labelNode ? labelNode.text : null; + + let target = null; + if (labelName && labelMap.has(labelName)) { + target = labelMap.get(labelName).headerBlock; + } else if (loopStack.length > 0) { + target = loopStack[loopStack.length - 1].headerBlock; + } + + if (target) { + currentBlock.endLine = stmt.startPosition.row + 1; + addEdge(currentBlock, target, 'continue'); + return null; // path terminated + } + return currentBlock; + } + + // Regular statement — extend current block + if (!currentBlock.startLine) { + currentBlock.startLine = stmt.startPosition.row + 1; + } + currentBlock.endLine = stmt.endPosition.row + 1; + return currentBlock; + } + + /** + * Process an if/else-if/else chain. + */ + function processIf(ifStmt, currentBlock) { + // Terminate current block at condition + currentBlock.endLine = ifStmt.startPosition.row + 1; + + const condBlock = makeBlock( + 'condition', + ifStmt.startPosition.row + 1, + ifStmt.startPosition.row + 1, + 'if', + ); + addEdge(currentBlock, condBlock, 'fallthrough'); + + const joinBlock = makeBlock('body'); + + // True branch (consequent) + const consequent = ifStmt.childForFieldName('consequence'); + const trueBlock = makeBlock('branch_true', null, null, 'then'); + addEdge(condBlock, trueBlock, 'branch_true'); + const trueStmts = getStatements(consequent); + const trueEnd = processStatements(trueStmts, trueBlock); + if (trueEnd) { + addEdge(trueEnd, joinBlock, 'fallthrough'); + } + + // False branch (alternative / else / else-if) + const alternative = ifStmt.childForFieldName('alternative'); + if (alternative) { + if (alternative.type === rules.elseClause) { + // else clause — may contain another if (else-if) or a block + const elseChildren = []; + for (let i = 0; i < alternative.namedChildCount; i++) { + elseChildren.push(alternative.namedChild(i)); + } + if (elseChildren.length === 1 && elseChildren[0].type === rules.ifNode) { + // else-if: recurse + const falseBlock = makeBlock('branch_false', null, null, 'else-if'); + addEdge(condBlock, falseBlock, 'branch_false'); + const elseIfEnd = processIf(elseChildren[0], falseBlock); + if (elseIfEnd) { + addEdge(elseIfEnd, joinBlock, 'fallthrough'); + } + } else { + // else block + const falseBlock = makeBlock('branch_false', null, null, 'else'); + addEdge(condBlock, falseBlock, 'branch_false'); + const falseEnd = processStatements(elseChildren, falseBlock); + if (falseEnd) { + addEdge(falseEnd, joinBlock, 'fallthrough'); + } + } + } + } else { + // No else: condition-false goes directly to join + addEdge(condBlock, joinBlock, 'branch_false'); + } + + return joinBlock; + } + + /** + * Process a for/for-in loop. + */ + function processForLoop(forStmt, currentBlock) { + const headerBlock = makeBlock( + 'loop_header', + forStmt.startPosition.row + 1, + forStmt.startPosition.row + 1, + 'for', + ); + addEdge(currentBlock, headerBlock, 'fallthrough'); + + const loopExitBlock = makeBlock('body'); + + // Register loop context + const loopCtx = { headerBlock, exitBlock: loopExitBlock }; + loopStack.push(loopCtx); + + // Update label map if this is inside a labeled statement + for (const [, ctx] of labelMap) { + if (!ctx.headerBlock) { + ctx.headerBlock = headerBlock; + ctx.exitBlock = loopExitBlock; + } + } + + // Loop body + const body = forStmt.childForFieldName('body'); + const bodyBlock = makeBlock('loop_body'); + addEdge(headerBlock, bodyBlock, 'branch_true'); + + const bodyStmts = getStatements(body); + const bodyEnd = processStatements(bodyStmts, bodyBlock); + + if (bodyEnd) { + addEdge(bodyEnd, headerBlock, 'loop_back'); + } + + // Loop exit + addEdge(headerBlock, loopExitBlock, 'loop_exit'); + + loopStack.pop(); + return loopExitBlock; + } + + /** + * Process a while loop. + */ + function processWhileLoop(whileStmt, currentBlock) { + const headerBlock = makeBlock( + 'loop_header', + whileStmt.startPosition.row + 1, + whileStmt.startPosition.row + 1, + 'while', + ); + addEdge(currentBlock, headerBlock, 'fallthrough'); + + const loopExitBlock = makeBlock('body'); + + const loopCtx = { headerBlock, exitBlock: loopExitBlock }; + loopStack.push(loopCtx); + + for (const [, ctx] of labelMap) { + if (!ctx.headerBlock) { + ctx.headerBlock = headerBlock; + ctx.exitBlock = loopExitBlock; + } + } + + const body = whileStmt.childForFieldName('body'); + const bodyBlock = makeBlock('loop_body'); + addEdge(headerBlock, bodyBlock, 'branch_true'); + + const bodyStmts = getStatements(body); + const bodyEnd = processStatements(bodyStmts, bodyBlock); + + if (bodyEnd) { + addEdge(bodyEnd, headerBlock, 'loop_back'); + } + + addEdge(headerBlock, loopExitBlock, 'loop_exit'); + + loopStack.pop(); + return loopExitBlock; + } + + /** + * Process a do-while loop. + */ + function processDoWhileLoop(doStmt, currentBlock) { + const bodyBlock = makeBlock('loop_body', doStmt.startPosition.row + 1, null, 'do'); + addEdge(currentBlock, bodyBlock, 'fallthrough'); + + const condBlock = makeBlock('loop_header', null, null, 'do-while'); + const loopExitBlock = makeBlock('body'); + + const loopCtx = { headerBlock: condBlock, exitBlock: loopExitBlock }; + loopStack.push(loopCtx); + + for (const [, ctx] of labelMap) { + if (!ctx.headerBlock) { + ctx.headerBlock = condBlock; + ctx.exitBlock = loopExitBlock; + } + } + + const body = doStmt.childForFieldName('body'); + const bodyStmts = getStatements(body); + const bodyEnd = processStatements(bodyStmts, bodyBlock); + + if (bodyEnd) { + addEdge(bodyEnd, condBlock, 'fallthrough'); + } + + // Condition: loop_back or exit + addEdge(condBlock, bodyBlock, 'loop_back'); + addEdge(condBlock, loopExitBlock, 'loop_exit'); + + loopStack.pop(); + return loopExitBlock; + } + + /** + * Process a switch statement. + */ + function processSwitch(switchStmt, currentBlock) { + currentBlock.endLine = switchStmt.startPosition.row + 1; + + const switchHeader = makeBlock( + 'condition', + switchStmt.startPosition.row + 1, + switchStmt.startPosition.row + 1, + 'switch', + ); + addEdge(currentBlock, switchHeader, 'fallthrough'); + + const joinBlock = makeBlock('body'); + + // Switch acts like a break target for contained break statements + const switchCtx = { headerBlock: switchHeader, exitBlock: joinBlock }; + loopStack.push(switchCtx); + + // Collect case clauses from the switch body + const switchBody = switchStmt.childForFieldName('body'); + if (switchBody) { + let hasDefault = false; + for (let i = 0; i < switchBody.namedChildCount; i++) { + const caseClause = switchBody.namedChild(i); + const isDefault = + caseClause.type === rules.defaultNode || + (caseClause.type === rules.caseNode && !caseClause.childForFieldName('value')); + + const caseLabel = isDefault ? 'default' : 'case'; + const caseBlock = makeBlock( + isDefault ? 'case' : 'case', + caseClause.startPosition.row + 1, + null, + caseLabel, + ); + addEdge(switchHeader, caseBlock, isDefault ? 'branch_false' : 'branch_true'); + if (isDefault) hasDefault = true; + + // Process case body statements + const caseStmts = []; + for (let j = 0; j < caseClause.namedChildCount; j++) { + const child = caseClause.namedChild(j); + // Skip the case value expression + if (child.type !== 'identifier' && child.type !== 'string' && child.type !== 'number') { + caseStmts.push(child); + } + } + + const caseEnd = processStatements(caseStmts, caseBlock); + if (caseEnd) { + // Fall-through to join (or next case, but we simplify to join) + addEdge(caseEnd, joinBlock, 'fallthrough'); + } + } + + // If no default case, switch header can skip to join + if (!hasDefault) { + addEdge(switchHeader, joinBlock, 'branch_false'); + } + } + + loopStack.pop(); + return joinBlock; + } + + /** + * Process try/catch/finally. + */ + function processTryCatch(tryStmt, currentBlock) { + currentBlock.endLine = tryStmt.startPosition.row + 1; + + const joinBlock = makeBlock('body'); + + // Try body + const tryBody = tryStmt.childForFieldName('body'); + const tryBlock = makeBlock('body', tryBody ? tryBody.startPosition.row + 1 : null, null, 'try'); + addEdge(currentBlock, tryBlock, 'fallthrough'); + + const tryStmts = getStatements(tryBody); + const tryEnd = processStatements(tryStmts, tryBlock); + + // Catch handler + let catchHandler = null; + let finallyHandler = null; + for (let i = 0; i < tryStmt.namedChildCount; i++) { + const child = tryStmt.namedChild(i); + if (child.type === rules.catchNode) catchHandler = child; + if (child.type === rules.finallyNode) finallyHandler = child; + } + + if (catchHandler) { + const catchBlock = makeBlock('catch', catchHandler.startPosition.row + 1, null, 'catch'); + // Exception edge from try to catch + addEdge(tryBlock, catchBlock, 'exception'); + + const catchBody = catchHandler.childForFieldName('body'); + const catchStmts = getStatements(catchBody); + const catchEnd = processStatements(catchStmts, catchBlock); + + if (finallyHandler) { + const finallyBlock = makeBlock( + 'finally', + finallyHandler.startPosition.row + 1, + null, + 'finally', + ); + if (tryEnd) addEdge(tryEnd, finallyBlock, 'fallthrough'); + if (catchEnd) addEdge(catchEnd, finallyBlock, 'fallthrough'); + + const finallyBody = finallyHandler.childForFieldName('body'); + const finallyStmts = getStatements(finallyBody); + const finallyEnd = processStatements(finallyStmts, finallyBlock); + if (finallyEnd) addEdge(finallyEnd, joinBlock, 'fallthrough'); + } else { + if (tryEnd) addEdge(tryEnd, joinBlock, 'fallthrough'); + if (catchEnd) addEdge(catchEnd, joinBlock, 'fallthrough'); + } + } else if (finallyHandler) { + const finallyBlock = makeBlock( + 'finally', + finallyHandler.startPosition.row + 1, + null, + 'finally', + ); + if (tryEnd) addEdge(tryEnd, finallyBlock, 'fallthrough'); + + const finallyBody = finallyHandler.childForFieldName('body'); + const finallyStmts = getStatements(finallyBody); + const finallyEnd = processStatements(finallyStmts, finallyBlock); + if (finallyEnd) addEdge(finallyEnd, joinBlock, 'fallthrough'); + } else { + if (tryEnd) addEdge(tryEnd, joinBlock, 'fallthrough'); + } + + return joinBlock; + } + + // ── Main entry point ────────────────────────────────────────────────── + + const body = getFunctionBody(functionNode); + if (!body) { + // Empty function or expression body + addEdge(entryBlock, exitBlock, 'fallthrough'); + return { blocks, edges }; + } + + const stmts = getStatements(body); + if (stmts.length === 0) { + addEdge(entryBlock, exitBlock, 'fallthrough'); + return { blocks, edges }; + } + + const firstBlock = makeBlock('body'); + addEdge(entryBlock, firstBlock, 'fallthrough'); + + const lastBlock = processStatements(stmts, firstBlock); + if (lastBlock) { + addEdge(lastBlock, exitBlock, 'fallthrough'); + } + + return { blocks, edges }; +} + +// ─── Build-Time: Compute CFG for Changed Files ───────────────────────── + +/** + * Build CFG data for all function/method definitions and persist to DB. + * + * @param {object} db - open better-sqlite3 database (read-write) + * @param {Map} fileSymbols - Map + * @param {string} rootDir - absolute project root path + * @param {object} [_engineOpts] - engine options (unused; always uses WASM for AST) + */ +export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { + // Lazily init WASM parsers if needed + let parsers = null; + let extToLang = null; + let needsFallback = false; + + for (const [relPath, symbols] of fileSymbols) { + if (!symbols._tree) { + const ext = path.extname(relPath).toLowerCase(); + if (CFG_EXTENSIONS.has(ext)) { + needsFallback = true; + break; + } + } + } + + if (needsFallback) { + const { createParsers } = await import('./parser.js'); + parsers = await createParsers(); + extToLang = new Map(); + for (const entry of LANGUAGE_REGISTRY) { + for (const ext of entry.extensions) { + extToLang.set(ext, entry.id); + } + } + } + + let getParserFn = null; + if (parsers) { + const mod = await import('./parser.js'); + getParserFn = mod.getParser; + } + + const { findFunctionNode } = await import('./complexity.js'); + + const insertBlock = db.prepare( + `INSERT INTO cfg_blocks (function_node_id, block_index, block_type, start_line, end_line, label) + VALUES (?, ?, ?, ?, ?, ?)`, + ); + const insertEdge = db.prepare( + `INSERT INTO cfg_edges (function_node_id, source_block_id, target_block_id, kind) + VALUES (?, ?, ?, ?)`, + ); + const deleteBlocks = db.prepare('DELETE FROM cfg_blocks WHERE function_node_id = ?'); + const deleteEdges = db.prepare('DELETE FROM cfg_edges WHERE function_node_id = ?'); + const getNodeId = db.prepare( + "SELECT id FROM nodes WHERE name = ? AND kind IN ('function','method') AND file = ? AND line = ?", + ); + + let analyzed = 0; + + const tx = db.transaction(() => { + for (const [relPath, symbols] of fileSymbols) { + const ext = path.extname(relPath).toLowerCase(); + if (!CFG_EXTENSIONS.has(ext)) continue; + + let tree = symbols._tree; + let langId = symbols._langId; + + // WASM fallback if no cached tree + if (!tree) { + if (!extToLang || !getParserFn) continue; + langId = extToLang.get(ext); + if (!langId || !CFG_LANG_IDS.has(langId)) continue; + + const absPath = path.join(rootDir, relPath); + let code; + try { + code = fs.readFileSync(absPath, 'utf-8'); + } catch { + continue; + } + + const parser = getParserFn(parsers, absPath); + if (!parser) continue; + + try { + tree = parser.parse(code); + } catch { + continue; + } + } + + if (!langId) { + langId = extToLang ? extToLang.get(ext) : null; + if (!langId) continue; + } + + const cfgRules = CFG_RULES.get(langId); + if (!cfgRules) continue; + + const complexityRules = COMPLEXITY_RULES.get(langId); + if (!complexityRules) continue; + + for (const def of symbols.definitions) { + if (def.kind !== 'function' && def.kind !== 'method') continue; + if (!def.line) continue; + + const row = getNodeId.get(def.name, relPath, def.line); + if (!row) continue; + + const funcNode = findFunctionNode(tree.rootNode, def.line, def.endLine, complexityRules); + if (!funcNode) continue; + + const cfg = buildFunctionCFG(funcNode, langId); + if (cfg.blocks.length === 0) continue; + + // Clear old CFG data for this function + deleteEdges.run(row.id); + deleteBlocks.run(row.id); + + // Insert blocks and build index→dbId mapping + const blockDbIds = new Map(); + for (const block of cfg.blocks) { + const result = insertBlock.run( + row.id, + block.index, + block.type, + block.startLine, + block.endLine, + block.label, + ); + blockDbIds.set(block.index, result.lastInsertRowid); + } + + // Insert edges + for (const edge of cfg.edges) { + const sourceDbId = blockDbIds.get(edge.sourceIndex); + const targetDbId = blockDbIds.get(edge.targetIndex); + if (sourceDbId && targetDbId) { + insertEdge.run(row.id, sourceDbId, targetDbId, edge.kind); + } + } + + analyzed++; + } + + // Don't release _tree here — complexity/dataflow may still need it + } + }); + + tx(); + + if (analyzed > 0) { + info(`CFG: ${analyzed} functions analyzed`); + } +} + +// ─── Query-Time Functions ─────────────────────────────────────────────── + +function hasCfgTables(db) { + try { + db.prepare('SELECT 1 FROM cfg_blocks LIMIT 0').get(); + return true; + } catch { + return false; + } +} + +function findNodes(db, name, opts = {}) { + const kinds = opts.kind ? [opts.kind] : ['function', 'method']; + const placeholders = kinds.map(() => '?').join(', '); + const params = [`%${name}%`, ...kinds]; + + let fileCondition = ''; + if (opts.file) { + fileCondition = ' AND n.file LIKE ?'; + params.push(`%${opts.file}%`); + } + + const rows = db + .prepare( + `SELECT n.id, n.name, n.kind, n.file, n.line, n.end_line + FROM nodes n + WHERE n.name LIKE ? AND n.kind IN (${placeholders})${fileCondition}`, + ) + .all(...params); + + return opts.noTests ? rows.filter((n) => !isTestFile(n.file)) : rows; +} + +/** + * Load CFG data for a function from the database. + * + * @param {string} name - Function name (partial match) + * @param {string} [customDbPath] - Path to graph.db + * @param {object} [opts] - Options + * @returns {{ function: object, blocks: object[], edges: object[], summary: object }} + */ +export function cfgData(name, customDbPath, opts = {}) { + const db = openReadonlyOrFail(customDbPath); + const noTests = opts.noTests || false; + + if (!hasCfgTables(db)) { + db.close(); + return { + name, + results: [], + warning: 'No CFG data found. Run `codegraph build --cfg` first.', + }; + } + + const nodes = findNodes(db, name, { noTests, file: opts.file, kind: opts.kind }); + if (nodes.length === 0) { + db.close(); + return { name, results: [] }; + } + + const blockStmt = db.prepare( + `SELECT id, block_index, block_type, start_line, end_line, label + FROM cfg_blocks WHERE function_node_id = ? + ORDER BY block_index`, + ); + const edgeStmt = db.prepare( + `SELECT e.kind, + sb.block_index AS source_index, sb.block_type AS source_type, + tb.block_index AS target_index, tb.block_type AS target_type + FROM cfg_edges e + JOIN cfg_blocks sb ON e.source_block_id = sb.id + JOIN cfg_blocks tb ON e.target_block_id = tb.id + WHERE e.function_node_id = ? + ORDER BY sb.block_index, tb.block_index`, + ); + + const results = nodes.map((node) => { + const cfgBlocks = blockStmt.all(node.id); + const cfgEdges = edgeStmt.all(node.id); + + return { + name: node.name, + kind: node.kind, + file: node.file, + line: node.line, + blocks: cfgBlocks.map((b) => ({ + index: b.block_index, + type: b.block_type, + startLine: b.start_line, + endLine: b.end_line, + label: b.label, + })), + edges: cfgEdges.map((e) => ({ + source: e.source_index, + sourceType: e.source_type, + target: e.target_index, + targetType: e.target_type, + kind: e.kind, + })), + summary: { + blockCount: cfgBlocks.length, + edgeCount: cfgEdges.length, + }, + }; + }); + + db.close(); + return paginateResult({ name, results }, 'results', opts); +} + +// ─── Export Formats ───────────────────────────────────────────────────── + +/** + * Convert CFG data to DOT format for Graphviz rendering. + */ +export function cfgToDOT(cfgResult) { + const lines = []; + + for (const r of cfgResult.results) { + lines.push(`digraph "${r.name}" {`); + lines.push(' rankdir=TB;'); + lines.push(' node [shape=box, fontname="monospace", fontsize=10];'); + + for (const block of r.blocks) { + const label = blockLabel(block); + const shape = block.type === 'entry' || block.type === 'exit' ? 'ellipse' : 'box'; + const style = + block.type === 'condition' || block.type === 'loop_header' + ? ', style=filled, fillcolor="#ffffcc"' + : ''; + lines.push(` B${block.index} [label="${label}", shape=${shape}${style}];`); + } + + for (const edge of r.edges) { + const style = edgeStyle(edge.kind); + lines.push(` B${edge.source} -> B${edge.target} [label="${edge.kind}"${style}];`); + } + + lines.push('}'); + } + + return lines.join('\n'); +} + +/** + * Convert CFG data to Mermaid format. + */ +export function cfgToMermaid(cfgResult) { + const lines = []; + + for (const r of cfgResult.results) { + lines.push(`graph TD`); + lines.push(` subgraph "${r.name}"`); + + for (const block of r.blocks) { + const label = blockLabel(block); + if (block.type === 'entry' || block.type === 'exit') { + lines.push(` B${block.index}(["${label}"])`); + } else if (block.type === 'condition' || block.type === 'loop_header') { + lines.push(` B${block.index}{"${label}"}`); + } else { + lines.push(` B${block.index}["${label}"]`); + } + } + + for (const edge of r.edges) { + const label = edge.kind; + lines.push(` B${edge.source} -->|${label}| B${edge.target}`); + } + + lines.push(' end'); + } + + return lines.join('\n'); +} + +function blockLabel(block) { + const loc = + block.startLine && block.endLine + ? ` L${block.startLine}${block.endLine !== block.startLine ? `-${block.endLine}` : ''}` + : ''; + const label = block.label ? ` (${block.label})` : ''; + return `${block.type}${label}${loc}`; +} + +function edgeStyle(kind) { + if (kind === 'exception') return ', color=red, fontcolor=red'; + if (kind === 'branch_true') return ', color=green, fontcolor=green'; + if (kind === 'branch_false') return ', color=red, fontcolor=red'; + if (kind === 'loop_back') return ', style=dashed, color=blue'; + if (kind === 'loop_exit') return ', color=orange'; + if (kind === 'return') return ', color=purple'; + if (kind === 'break') return ', color=orange, style=dashed'; + if (kind === 'continue') return ', color=blue, style=dashed'; + return ''; +} + +// ─── CLI Printer ──────────────────────────────────────────────────────── + +/** + * CLI display for cfg command. + */ +export function cfg(name, customDbPath, opts = {}) { + const data = cfgData(name, customDbPath, opts); + + if (opts.json) { + console.log(JSON.stringify(data, null, 2)); + return; + } + if (opts.ndjson) { + printNdjson(data.results); + return; + } + + if (data.warning) { + console.log(`\u26A0 ${data.warning}`); + return; + } + if (data.results.length === 0) { + console.log(`No symbols matching "${name}".`); + return; + } + + const format = opts.format || 'text'; + if (format === 'dot') { + console.log(cfgToDOT(data)); + return; + } + if (format === 'mermaid') { + console.log(cfgToMermaid(data)); + return; + } + + // Text format + for (const r of data.results) { + console.log(`\n${r.kind} ${r.name} (${r.file}:${r.line})`); + console.log('\u2500'.repeat(60)); + console.log(` Blocks: ${r.summary.blockCount} Edges: ${r.summary.edgeCount}`); + + if (r.blocks.length > 0) { + console.log('\n Blocks:'); + for (const b of r.blocks) { + const loc = b.startLine + ? ` L${b.startLine}${b.endLine && b.endLine !== b.startLine ? `-${b.endLine}` : ''}` + : ''; + const label = b.label ? ` (${b.label})` : ''; + console.log(` [${b.index}] ${b.type}${label}${loc}`); + } + } + + if (r.edges.length > 0) { + console.log('\n Edges:'); + for (const e of r.edges) { + console.log(` B${e.source} \u2192 B${e.target} [${e.kind}]`); + } + } + } +} diff --git a/src/cli.js b/src/cli.js index 391d2274..737ce4ae 100644 --- a/src/cli.js +++ b/src/cli.js @@ -98,10 +98,16 @@ program .description('Parse repo and build graph in .codegraph/graph.db') .option('--no-incremental', 'Force full rebuild (ignore file hashes)') .option('--dataflow', 'Extract data flow edges (flows_to, returns, mutates)') + .option('--cfg', 'Build intraprocedural control flow graphs') .action(async (dir, opts) => { const root = path.resolve(dir || '.'); const engine = program.opts().engine; - await buildGraph(root, { incremental: opts.incremental, engine, dataflow: opts.dataflow }); + await buildGraph(root, { + incremental: opts.incremental, + engine, + dataflow: opts.dataflow, + cfg: opts.cfg, + }); }); program @@ -994,6 +1000,37 @@ program }); }); +program + .command('cfg ') + .description('Show control flow graph for a function') + .option('-d, --db ', 'Path to graph.db') + .option('--format ', 'Output format: text, dot, mermaid', 'text') + .option('-f, --file ', 'Scope to file (partial match)') + .option('-k, --kind ', 'Filter by symbol kind') + .option('-T, --no-tests', 'Exclude test/spec files from results') + .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') + .option('-j, --json', 'Output as JSON') + .option('--ndjson', 'Newline-delimited JSON output') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .action(async (name, opts) => { + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); + process.exit(1); + } + const { cfg } = await import('./cfg.js'); + cfg(name, opts.db, { + format: opts.format, + file: opts.file, + kind: opts.kind, + noTests: resolveNoTests(opts), + json: opts.json, + ndjson: opts.ndjson, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + }); + }); + program .command('complexity [target]') .description('Show per-function complexity metrics (cognitive, cyclomatic, nesting depth, MI)') diff --git a/src/complexity.js b/src/complexity.js index f97cb616..132ccb25 100644 --- a/src/complexity.js +++ b/src/complexity.js @@ -1574,7 +1574,7 @@ export function computeAllMetrics(functionNode, langId) { /** * Find the function body node in a parse tree that matches a given line range. */ -function findFunctionNode(rootNode, startLine, _endLine, rules) { +export function findFunctionNode(rootNode, startLine, _endLine, rules) { // tree-sitter lines are 0-indexed const targetStart = startLine - 1; diff --git a/src/db.js b/src/db.js index 9f40d7cc..ff31fd39 100644 --- a/src/db.js +++ b/src/db.js @@ -173,6 +173,37 @@ export const MIGRATIONS = [ CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id); `, }, + { + version: 12, + up: ` + CREATE TABLE IF NOT EXISTS cfg_blocks ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + function_node_id INTEGER NOT NULL, + block_index INTEGER NOT NULL, + block_type TEXT NOT NULL, + start_line INTEGER, + end_line INTEGER, + label TEXT, + FOREIGN KEY(function_node_id) REFERENCES nodes(id), + UNIQUE(function_node_id, block_index) + ); + CREATE INDEX IF NOT EXISTS idx_cfg_blocks_fn ON cfg_blocks(function_node_id); + + CREATE TABLE IF NOT EXISTS cfg_edges ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + function_node_id INTEGER NOT NULL, + source_block_id INTEGER NOT NULL, + target_block_id INTEGER NOT NULL, + kind TEXT NOT NULL, + FOREIGN KEY(function_node_id) REFERENCES nodes(id), + FOREIGN KEY(source_block_id) REFERENCES cfg_blocks(id), + FOREIGN KEY(target_block_id) REFERENCES cfg_blocks(id) + ); + CREATE INDEX IF NOT EXISTS idx_cfg_edges_fn ON cfg_edges(function_node_id); + CREATE INDEX IF NOT EXISTS idx_cfg_edges_src ON cfg_edges(source_block_id); + CREATE INDEX IF NOT EXISTS idx_cfg_edges_tgt ON cfg_edges(target_block_id); + `, + }, ]; export function getBuildMeta(db, key) { diff --git a/src/index.js b/src/index.js index 6774d54b..8d44699a 100644 --- a/src/index.js +++ b/src/index.js @@ -22,6 +22,16 @@ export { evaluateBoundaries, PRESETS, validateBoundaryConfig } from './boundarie export { branchCompareData, branchCompareMermaid } from './branch-compare.js'; // Graph building export { buildGraph, collectFiles, loadPathAliases, resolveImportPath } from './builder.js'; +// Control flow graph (intraprocedural) +export { + buildCFGData, + buildFunctionCFG, + CFG_RULES, + cfg, + cfgData, + cfgToDOT, + cfgToMermaid, +} from './cfg.js'; // Check (CI validation predicates) export { check, checkData } from './check.js'; // Co-change analysis @@ -44,6 +54,7 @@ export { computeHalsteadMetrics, computeLOCMetrics, computeMaintainabilityIndex, + findFunctionNode, HALSTEAD_RULES, iterComplexity, } from './complexity.js'; diff --git a/src/mcp.js b/src/mcp.js index cd0b8808..81cb1b16 100644 --- a/src/mcp.js +++ b/src/mcp.js @@ -641,6 +641,26 @@ const BASE_TOOLS = [ required: ['base', 'target'], }, }, + { + name: 'cfg', + description: 'Show intraprocedural control flow graph for a function. Requires build --cfg.', + inputSchema: { + type: 'object', + properties: { + name: { type: 'string', description: 'Function/method name (partial match)' }, + format: { + type: 'string', + enum: ['json', 'dot', 'mermaid'], + description: 'Output format (default: json)', + }, + file: { type: 'string', description: 'Scope to file (partial match)' }, + kind: { type: 'string', enum: EVERY_SYMBOL_KIND, description: 'Filter by symbol kind' }, + no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + ...PAGINATION_PROPS, + }, + required: ['name'], + }, + }, { name: 'dataflow', description: 'Show data flow edges or data-dependent blast radius. Requires build --dataflow.', @@ -1192,6 +1212,24 @@ export async function startMCPServer(customDbPath, options = {}) { result = args.format === 'mermaid' ? branchCompareMermaid(bcData) : bcData; break; } + case 'cfg': { + const { cfgData, cfgToDOT, cfgToMermaid } = await import('./cfg.js'); + const cfgResult = cfgData(args.name, dbPath, { + file: args.file, + kind: args.kind, + noTests: args.no_tests, + limit: Math.min(args.limit ?? MCP_DEFAULTS.query, MCP_MAX_LIMIT), + offset: args.offset ?? 0, + }); + if (args.format === 'dot') { + result = { text: cfgToDOT(cfgResult) }; + } else if (args.format === 'mermaid') { + result = { text: cfgToMermaid(cfgResult) }; + } else { + result = cfgResult; + } + break; + } case 'dataflow': { const dfMode = args.mode || 'edges'; if (dfMode === 'impact') { diff --git a/tests/integration/cfg.test.js b/tests/integration/cfg.test.js new file mode 100644 index 00000000..3fdbeab0 --- /dev/null +++ b/tests/integration/cfg.test.js @@ -0,0 +1,199 @@ +/** + * Integration tests for CFG queries. + * + * Uses a hand-crafted in-memory DB with known CFG topology. + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import Database from 'better-sqlite3'; +import { afterAll, beforeAll, describe, expect, test } from 'vitest'; +import { cfgData, cfgToDOT, cfgToMermaid } from '../../src/cfg.js'; +import { initSchema } from '../../src/db.js'; + +// ─── Helpers ─────────────────────────────────────────────────────────── + +function insertNode(db, name, kind, file, line) { + return db + .prepare('INSERT INTO nodes (name, kind, file, line) VALUES (?, ?, ?, ?)') + .run(name, kind, file, line).lastInsertRowid; +} + +function insertBlock(db, fnNodeId, blockIndex, blockType, startLine, endLine, label) { + return db + .prepare( + 'INSERT INTO cfg_blocks (function_node_id, block_index, block_type, start_line, end_line, label) VALUES (?, ?, ?, ?, ?, ?)', + ) + .run(fnNodeId, blockIndex, blockType, startLine, endLine, label).lastInsertRowid; +} + +function insertEdge(db, fnNodeId, sourceBlockId, targetBlockId, kind) { + db.prepare( + 'INSERT INTO cfg_edges (function_node_id, source_block_id, target_block_id, kind) VALUES (?, ?, ?, ?)', + ).run(fnNodeId, sourceBlockId, targetBlockId, kind); +} + +// ─── Fixture DB ──────────────────────────────────────────────────────── + +let tmpDir, dbPath; + +beforeAll(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-cfg-')); + fs.mkdirSync(path.join(tmpDir, '.codegraph')); + dbPath = path.join(tmpDir, '.codegraph', 'graph.db'); + + const db = new Database(dbPath); + db.pragma('journal_mode = WAL'); + initSchema(db); + + // Insert function nodes + const processId = insertNode(db, 'processItems', 'function', 'src/process.js', 10); + const helperId = insertNode(db, 'helper', 'function', 'src/helper.js', 5); + insertNode(db, 'testFn', 'function', 'tests/process.test.js', 1); + + // CFG for processItems: entry → body → condition → [true, false] → join → exit + const b0 = insertBlock(db, processId, 0, 'entry', null, null, null); + const b1 = insertBlock(db, processId, 1, 'exit', null, null, null); + const b2 = insertBlock(db, processId, 2, 'body', 10, 12, null); + const b3 = insertBlock(db, processId, 3, 'condition', 13, 13, 'if'); + const b4 = insertBlock(db, processId, 4, 'branch_true', 14, 15, 'then'); + const b5 = insertBlock(db, processId, 5, 'branch_false', 16, 17, 'else'); + const b6 = insertBlock(db, processId, 6, 'body', 18, 19, null); + + insertEdge(db, processId, b0, b2, 'fallthrough'); + insertEdge(db, processId, b2, b3, 'fallthrough'); + insertEdge(db, processId, b3, b4, 'branch_true'); + insertEdge(db, processId, b3, b5, 'branch_false'); + insertEdge(db, processId, b4, b6, 'fallthrough'); + insertEdge(db, processId, b5, b6, 'fallthrough'); + insertEdge(db, processId, b6, b1, 'fallthrough'); + + // CFG for helper: entry → body → exit (simple) + const h0 = insertBlock(db, helperId, 0, 'entry', null, null, null); + const h1 = insertBlock(db, helperId, 1, 'exit', null, null, null); + const h2 = insertBlock(db, helperId, 2, 'body', 5, 8, null); + + insertEdge(db, helperId, h0, h2, 'fallthrough'); + insertEdge(db, helperId, h2, h1, 'return'); + + db.close(); +}); + +afterAll(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +// ─── Tests ───────────────────────────────────────────────────────────── + +describe('cfgData', () => { + test('returns CFG blocks and edges for a known function', () => { + const data = cfgData('processItems', dbPath); + expect(data.results.length).toBe(1); + + const r = data.results[0]; + expect(r.name).toBe('processItems'); + expect(r.file).toBe('src/process.js'); + expect(r.summary.blockCount).toBe(7); + expect(r.summary.edgeCount).toBe(7); + expect(r.blocks[0].type).toBe('entry'); + expect(r.blocks[1].type).toBe('exit'); + }); + + test('returns edges with correct kinds', () => { + const data = cfgData('processItems', dbPath); + const r = data.results[0]; + const edgeKinds = r.edges.map((e) => e.kind); + expect(edgeKinds).toContain('branch_true'); + expect(edgeKinds).toContain('branch_false'); + expect(edgeKinds).toContain('fallthrough'); + }); + + test('simple function has return edge', () => { + const data = cfgData('helper', dbPath); + expect(data.results.length).toBe(1); + const r = data.results[0]; + expect(r.summary.blockCount).toBe(3); + expect(r.edges.some((e) => e.kind === 'return')).toBe(true); + }); + + test('returns empty results for non-existent function', () => { + const data = cfgData('nonexistent', dbPath); + expect(data.results.length).toBe(0); + }); + + test('noTests option excludes test file functions', () => { + const data = cfgData('testFn', dbPath, { noTests: true }); + expect(data.results.length).toBe(0); + }); + + test('file filter scopes results', () => { + const data = cfgData('processItems', dbPath, { file: 'helper.js' }); + expect(data.results.length).toBe(0); + + const data2 = cfgData('processItems', dbPath, { file: 'process.js' }); + expect(data2.results.length).toBe(1); + }); +}); + +describe('cfgToDOT', () => { + test('produces valid DOT output', () => { + const data = cfgData('processItems', dbPath); + const dot = cfgToDOT(data); + expect(dot).toContain('digraph'); + expect(dot).toContain('B0'); + expect(dot).toContain('->'); + expect(dot).toContain('branch_true'); + expect(dot).toContain('}'); + }); + + test('entry/exit nodes use ellipse shape', () => { + const data = cfgData('processItems', dbPath); + const dot = cfgToDOT(data); + expect(dot).toMatch(/B0.*shape=ellipse/); + expect(dot).toMatch(/B1.*shape=ellipse/); + }); +}); + +describe('cfgToMermaid', () => { + test('produces valid Mermaid output', () => { + const data = cfgData('processItems', dbPath); + const mermaid = cfgToMermaid(data); + expect(mermaid).toContain('graph TD'); + expect(mermaid).toContain('B0'); + expect(mermaid).toContain('-->'); + expect(mermaid).toContain('branch_true'); + }); + + test('entry/exit use stadium shape', () => { + const data = cfgData('processItems', dbPath); + const mermaid = cfgToMermaid(data); + // Stadium shapes use (["..."]) + expect(mermaid).toMatch(/B0\(\[/); + expect(mermaid).toMatch(/B1\(\[/); + }); +}); + +describe('warning when no CFG tables', () => { + test('returns warning when DB has no CFG data', () => { + // Create a bare DB without cfg tables + const bareDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-cfg-bare-')); + fs.mkdirSync(path.join(bareDir, '.codegraph')); + const bareDbPath = path.join(bareDir, '.codegraph', 'graph.db'); + + const db = new Database(bareDbPath); + db.pragma('journal_mode = WAL'); + // Only create nodes table, skip migrations + db.exec(` + CREATE TABLE schema_version (version INTEGER NOT NULL DEFAULT 0); + INSERT INTO schema_version VALUES (8); + CREATE TABLE nodes (id INTEGER PRIMARY KEY, name TEXT, kind TEXT, file TEXT, line INTEGER); + `); + db.close(); + + const data = cfgData('anything', bareDbPath); + expect(data.warning).toMatch(/No CFG data/); + + fs.rmSync(bareDir, { recursive: true, force: true }); + }); +}); diff --git a/tests/unit/cfg.test.js b/tests/unit/cfg.test.js new file mode 100644 index 00000000..99a52471 --- /dev/null +++ b/tests/unit/cfg.test.js @@ -0,0 +1,457 @@ +/** + * Unit tests for src/cfg.js — buildFunctionCFG + * + * Hand-crafted code snippets parsed with tree-sitter to verify + * correct CFG block/edge construction. + */ + +import { beforeAll, describe, expect, it } from 'vitest'; +import { buildFunctionCFG } from '../../src/cfg.js'; +import { COMPLEXITY_RULES } from '../../src/complexity.js'; +import { createParsers } from '../../src/parser.js'; + +let jsParser; + +beforeAll(async () => { + const parsers = await createParsers(); + jsParser = parsers.get('javascript'); +}); + +function parse(code) { + const tree = jsParser.parse(code); + return tree.rootNode; +} + +function getFunctionNode(root) { + const rules = COMPLEXITY_RULES.get('javascript'); + function find(node) { + if (rules.functionNodes.has(node.type)) return node; + for (let i = 0; i < node.childCount; i++) { + const result = find(node.child(i)); + if (result) return result; + } + return null; + } + return find(root); +} + +function buildCFG(code) { + const root = parse(code); + const funcNode = getFunctionNode(root); + if (!funcNode) throw new Error('No function found in code snippet'); + return buildFunctionCFG(funcNode, 'javascript'); +} + +function hasEdge(cfg, sourceIndex, targetIndex, kind) { + return cfg.edges.some( + (e) => e.sourceIndex === sourceIndex && e.targetIndex === targetIndex && e.kind === kind, + ); +} + +function blockByType(cfg, type) { + return cfg.blocks.filter((b) => b.type === type); +} + +// ─── Tests ────────────────────────────────────────────────────────────── + +describe('buildFunctionCFG', () => { + describe('empty / simple functions', () => { + it('empty function: ENTRY → EXIT', () => { + const cfg = buildCFG('function empty() {}'); + expect(cfg.blocks.length).toBeGreaterThanOrEqual(2); + const entry = cfg.blocks.find((b) => b.type === 'entry'); + const exit = cfg.blocks.find((b) => b.type === 'exit'); + expect(entry).toBeDefined(); + expect(exit).toBeDefined(); + expect(hasEdge(cfg, entry.index, exit.index, 'fallthrough')).toBe(true); + }); + + it('simple function with no branching: ENTRY → body → EXIT', () => { + const cfg = buildCFG(` + function simple() { + const a = 1; + const b = 2; + return a + b; + } + `); + const entry = cfg.blocks.find((b) => b.type === 'entry'); + const exit = cfg.blocks.find((b) => b.type === 'exit'); + expect(entry).toBeDefined(); + expect(exit).toBeDefined(); + // Should have return edge to exit + expect(cfg.edges.some((e) => e.targetIndex === exit.index && e.kind === 'return')).toBe(true); + }); + + it('function with only statements (no return): body falls through to EXIT', () => { + const cfg = buildCFG(` + function noReturn() { + const x = 1; + console.log(x); + } + `); + const exit = cfg.blocks.find((b) => b.type === 'exit'); + expect(cfg.edges.some((e) => e.targetIndex === exit.index && e.kind === 'fallthrough')).toBe( + true, + ); + }); + }); + + describe('if statements', () => { + it('single if (no else): condition → [true branch, join]', () => { + const cfg = buildCFG(` + function singleIf(x) { + if (x > 0) { + console.log('positive'); + } + return x; + } + `); + const conditions = blockByType(cfg, 'condition'); + expect(conditions.length).toBe(1); + const trueBlocks = blockByType(cfg, 'branch_true'); + expect(trueBlocks.length).toBe(1); + // Condition has branch_true and branch_false edges + const condIdx = conditions[0].index; + expect(cfg.edges.some((e) => e.sourceIndex === condIdx && e.kind === 'branch_true')).toBe( + true, + ); + expect(cfg.edges.some((e) => e.sourceIndex === condIdx && e.kind === 'branch_false')).toBe( + true, + ); + }); + + it('if/else: condition → [true, false] → join', () => { + const cfg = buildCFG(` + function ifElse(x) { + if (x > 0) { + return 'positive'; + } else { + return 'non-positive'; + } + } + `); + const conditions = blockByType(cfg, 'condition'); + expect(conditions.length).toBe(1); + const trueBlocks = blockByType(cfg, 'branch_true'); + const falseBlocks = blockByType(cfg, 'branch_false'); + expect(trueBlocks.length).toBe(1); + expect(falseBlocks.length).toBe(1); + }); + + it('if/else-if/else chain', () => { + const cfg = buildCFG(` + function chain(x) { + if (x > 10) { + return 'big'; + } else if (x > 0) { + return 'small'; + } else { + return 'negative'; + } + } + `); + // Should have at least 2 conditions (if + else-if) + const conditions = blockByType(cfg, 'condition'); + expect(conditions.length).toBeGreaterThanOrEqual(2); + }); + }); + + describe('loops', () => { + it('while loop: header → [body → loop_back, exit]', () => { + const cfg = buildCFG(` + function whileLoop(n) { + let i = 0; + while (i < n) { + i++; + } + return i; + } + `); + const headers = blockByType(cfg, 'loop_header'); + expect(headers.length).toBe(1); + const bodyBlocks = blockByType(cfg, 'loop_body'); + expect(bodyBlocks.length).toBe(1); + // Header has branch_true to body and loop_exit + const hIdx = headers[0].index; + expect(cfg.edges.some((e) => e.sourceIndex === hIdx && e.kind === 'branch_true')).toBe(true); + expect(cfg.edges.some((e) => e.sourceIndex === hIdx && e.kind === 'loop_exit')).toBe(true); + // Body has loop_back to header + expect(cfg.edges.some((e) => e.kind === 'loop_back' && e.targetIndex === hIdx)).toBe(true); + }); + + it('for loop: header → [body → loop_back, exit]', () => { + const cfg = buildCFG(` + function forLoop() { + for (let i = 0; i < 10; i++) { + console.log(i); + } + } + `); + const headers = blockByType(cfg, 'loop_header'); + expect(headers.length).toBe(1); + expect(headers[0].label).toBe('for'); + expect(cfg.edges.some((e) => e.kind === 'loop_back')).toBe(true); + expect(cfg.edges.some((e) => e.kind === 'loop_exit')).toBe(true); + }); + + it('for-in loop', () => { + const cfg = buildCFG(` + function forIn(obj) { + for (const key in obj) { + console.log(key); + } + } + `); + const headers = blockByType(cfg, 'loop_header'); + expect(headers.length).toBe(1); + expect(cfg.edges.some((e) => e.kind === 'loop_back')).toBe(true); + }); + + it('do-while loop: body → condition → [loop_back, exit]', () => { + const cfg = buildCFG(` + function doWhile() { + let i = 0; + do { + i++; + } while (i < 10); + return i; + } + `); + const headers = blockByType(cfg, 'loop_header'); + expect(headers.length).toBe(1); + expect(headers[0].label).toBe('do-while'); + const bodyBlocks = blockByType(cfg, 'loop_body'); + expect(bodyBlocks.length).toBe(1); + // Condition has loop_back to body and loop_exit + const hIdx = headers[0].index; + expect(cfg.edges.some((e) => e.sourceIndex === hIdx && e.kind === 'loop_back')).toBe(true); + expect(cfg.edges.some((e) => e.sourceIndex === hIdx && e.kind === 'loop_exit')).toBe(true); + }); + }); + + describe('break and continue', () => { + it('break in loop: terminates → loop exit', () => { + const cfg = buildCFG(` + function withBreak() { + for (let i = 0; i < 10; i++) { + if (i === 5) break; + console.log(i); + } + } + `); + expect(cfg.edges.some((e) => e.kind === 'break')).toBe(true); + }); + + it('continue in loop: terminates → loop header', () => { + const cfg = buildCFG(` + function withContinue() { + for (let i = 0; i < 10; i++) { + if (i % 2 === 0) continue; + console.log(i); + } + } + `); + expect(cfg.edges.some((e) => e.kind === 'continue')).toBe(true); + }); + }); + + describe('switch statement', () => { + it('switch/case: header → each case → join', () => { + const cfg = buildCFG(` + function switchCase(x) { + switch (x) { + case 1: + return 'one'; + case 2: + return 'two'; + default: + return 'other'; + } + } + `); + const conditions = cfg.blocks.filter((b) => b.type === 'condition' && b.label === 'switch'); + expect(conditions.length).toBe(1); + const caseBlocks = blockByType(cfg, 'case'); + expect(caseBlocks.length).toBeGreaterThanOrEqual(2); + }); + }); + + describe('try/catch/finally', () => { + it('try/catch: try body → [catch via exception, join]', () => { + const cfg = buildCFG(` + function tryCatch() { + try { + riskyCall(); + } catch (e) { + console.error(e); + } + } + `); + const catchBlocks = blockByType(cfg, 'catch'); + expect(catchBlocks.length).toBe(1); + expect(cfg.edges.some((e) => e.kind === 'exception')).toBe(true); + }); + + it('try/catch/finally: try → [catch, finally] → exit', () => { + const cfg = buildCFG(` + function tryCatchFinally() { + try { + riskyCall(); + } catch (e) { + console.error(e); + } finally { + cleanup(); + } + } + `); + const catchBlocks = blockByType(cfg, 'catch'); + const finallyBlocks = blockByType(cfg, 'finally'); + expect(catchBlocks.length).toBe(1); + expect(finallyBlocks.length).toBe(1); + }); + + it('try/finally (no catch)', () => { + const cfg = buildCFG(` + function tryFinally() { + try { + riskyCall(); + } finally { + cleanup(); + } + } + `); + const finallyBlocks = blockByType(cfg, 'finally'); + expect(finallyBlocks.length).toBe(1); + }); + }); + + describe('early return and throw', () => { + it('early return terminates path → EXIT', () => { + const cfg = buildCFG(` + function earlyReturn(x) { + if (x < 0) { + return -1; + } + return x * 2; + } + `); + const exit = cfg.blocks.find((b) => b.type === 'exit'); + const returnEdges = cfg.edges.filter( + (e) => e.targetIndex === exit.index && e.kind === 'return', + ); + // Two returns: the early return and the final return + expect(returnEdges.length).toBe(2); + }); + + it('throw terminates path → EXIT via exception', () => { + const cfg = buildCFG(` + function throwError(x) { + if (x < 0) { + throw new Error('negative'); + } + return x; + } + `); + const exit = cfg.blocks.find((b) => b.type === 'exit'); + expect(cfg.edges.some((e) => e.targetIndex === exit.index && e.kind === 'exception')).toBe( + true, + ); + }); + }); + + describe('nested structures', () => { + it('nested loops with break resolves to correct enclosing loop', () => { + const cfg = buildCFG(` + function nested() { + for (let i = 0; i < 10; i++) { + for (let j = 0; j < 10; j++) { + if (j === 5) break; + } + } + } + `); + const headers = blockByType(cfg, 'loop_header'); + expect(headers.length).toBe(2); + expect(cfg.edges.some((e) => e.kind === 'break')).toBe(true); + }); + + it('if inside loop', () => { + const cfg = buildCFG(` + function ifInLoop() { + for (let i = 0; i < 10; i++) { + if (i > 5) { + console.log('big'); + } else { + console.log('small'); + } + } + } + `); + expect(blockByType(cfg, 'loop_header').length).toBe(1); + expect(blockByType(cfg, 'condition').length).toBe(1); + expect(blockByType(cfg, 'branch_true').length).toBe(1); + expect(blockByType(cfg, 'branch_false').length).toBe(1); + }); + }); + + describe('arrow functions and methods', () => { + it('arrow function with block body', () => { + const cfg = buildCFG(` + const fn = (x) => { + if (x) return 1; + return 0; + }; + `); + expect(cfg.blocks.find((b) => b.type === 'entry')).toBeDefined(); + expect(cfg.blocks.find((b) => b.type === 'exit')).toBeDefined(); + }); + + it('arrow function with expression body: ENTRY → EXIT', () => { + const cfg = buildCFG(` + const fn = (x) => x + 1; + `); + const entry = cfg.blocks.find((b) => b.type === 'entry'); + const exit = cfg.blocks.find((b) => b.type === 'exit'); + expect(entry).toBeDefined(); + expect(exit).toBeDefined(); + // Expression body: entry → body → exit + expect(cfg.blocks.length).toBeGreaterThanOrEqual(2); + }); + }); + + describe('block and edge counts', () => { + it('complex function has reasonable block/edge counts', () => { + const cfg = buildCFG(` + function complex(arr) { + if (!arr) return null; + const result = []; + for (const item of arr) { + if (item.skip) continue; + try { + result.push(transform(item)); + } catch (e) { + console.error(e); + } + } + return result; + } + `); + // Should have meaningful structure + expect(cfg.blocks.length).toBeGreaterThan(5); + expect(cfg.edges.length).toBeGreaterThan(5); + // Must have entry and exit + expect(cfg.blocks.find((b) => b.type === 'entry')).toBeDefined(); + expect(cfg.blocks.find((b) => b.type === 'exit')).toBeDefined(); + }); + }); + + describe('unsupported language', () => { + it('returns empty CFG for unsupported language', () => { + const root = parse('function foo() { return 1; }'); + const funcNode = getFunctionNode(root); + const cfg = buildFunctionCFG(funcNode, 'haskell'); + expect(cfg.blocks).toEqual([]); + expect(cfg.edges).toEqual([]); + }); + }); +}); diff --git a/tests/unit/mcp.test.js b/tests/unit/mcp.test.js index 3b38f590..7d14bffc 100644 --- a/tests/unit/mcp.test.js +++ b/tests/unit/mcp.test.js @@ -36,6 +36,7 @@ const ALL_TOOL_NAMES = [ 'batch_query', 'triage', 'branch_compare', + 'cfg', 'dataflow', 'check', 'list_repos', From 698540c313cf529a160cc2d1e3c229802b5a5ecb Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 21:16:16 -0700 Subject: [PATCH 05/12] feat: add stored queryable AST nodes (calls, new, string, regex, throw, await) Persist selected AST nodes in a dedicated ast_nodes SQLite table during build, queryable via CLI (codegraph ast), MCP (ast_query), and programmatic API. - DB migration v13: ast_nodes table with indexes on kind, name, file, parent, and (kind,name) - New src/ast.js module: buildAstNodes (extraction), astQueryData/ astQuery (query), AST_NODE_KINDS constant - Builder integration: full-rebuild deletion, incremental cleanup, always-on post-parse extraction (before complexity to preserve _tree) - CLI: codegraph ast [pattern] with -k, -f, -T, -j, --ndjson, --limit, --offset options - MCP: ast_query tool with pattern, kind, file, no_tests, pagination - JS/TS/TSX Phase 1: full AST walk for new/throw/await/string/regex; all languages get call nodes from symbols.calls - Pattern matching uses SQL GLOB with auto-wrapping for substring search - Parent node resolution via narrowest enclosing definition Impact: 12 functions changed, 26 affected --- src/ast.js | 392 ++++++++++++++++++++++++++++++++ src/builder.js | 21 +- src/cli.js | 29 +++ src/db.js | 21 ++ src/index.js | 2 + src/mcp.js | 34 +++ src/paginate.js | 1 + tests/integration/ast.test.js | 234 +++++++++++++++++++ tests/parsers/ast-nodes.test.js | 185 +++++++++++++++ tests/unit/mcp.test.js | 1 + 10 files changed, 919 insertions(+), 1 deletion(-) create mode 100644 src/ast.js create mode 100644 tests/integration/ast.test.js create mode 100644 tests/parsers/ast-nodes.test.js diff --git a/src/ast.js b/src/ast.js new file mode 100644 index 00000000..8c349667 --- /dev/null +++ b/src/ast.js @@ -0,0 +1,392 @@ +/** + * Stored queryable AST nodes — build-time extraction + query functions. + * + * Persists selected AST nodes (calls, new, string, regex, throw, await) in the + * `ast_nodes` table during build. Queryable via CLI (`codegraph ast`), MCP + * (`ast_query`), and programmatic API. + */ + +import path from 'node:path'; +import { openReadonlyOrFail } from './db.js'; +import { debug } from './logger.js'; +import { paginateResult, printNdjson } from './paginate.js'; +import { LANGUAGE_REGISTRY } from './parser.js'; + +// ─── Constants ──────────────────────────────────────────────────────── + +export const AST_NODE_KINDS = ['call', 'new', 'string', 'regex', 'throw', 'await']; + +const KIND_ICONS = { + call: '\u0192', // ƒ + new: '\u2295', // ⊕ + string: '"', + regex: '/', + throw: '\u2191', // ↑ + await: '\u22B3', // ⊳ +}; + +/** Max length for the `text` column. */ +const TEXT_MAX = 200; + +/** tree-sitter node types that map to our AST node kinds (JS/TS/TSX). */ +const JS_TS_AST_TYPES = { + new_expression: 'new', + throw_statement: 'throw', + await_expression: 'await', + string: 'string', + template_string: 'string', + regex: 'regex', +}; + +/** Extensions that support full AST walk (new/throw/await/string/regex). */ +const WALK_EXTENSIONS = new Set(); +for (const lang of Object.values(LANGUAGE_REGISTRY)) { + if (['javascript', 'typescript', 'tsx'].includes(lang.id)) { + for (const ext of lang.extensions) WALK_EXTENSIONS.add(ext); + } +} + +// ─── Helpers ────────────────────────────────────────────────────────── + +function truncate(s, max = TEXT_MAX) { + if (!s) return null; + return s.length <= max ? s : `${s.slice(0, max - 1)}\u2026`; +} + +/** + * Extract the constructor name from a `new_expression` node. + * Handles `new Foo()`, `new a.Foo()`, `new Foo.Bar()`. + */ +function extractNewName(node) { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child.type === 'identifier') return child.text; + if (child.type === 'member_expression') { + // e.g. new a.Foo() → "a.Foo" + return child.text; + } + } + return node.text?.split('(')[0]?.replace('new ', '').trim() || '?'; +} + +/** + * Extract the expression text from a throw/await node. + */ +function extractExpressionText(node) { + // Skip keyword child, take the rest + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child.type !== 'throw' && child.type !== 'await') { + return truncate(child.text); + } + } + return truncate(node.text); +} + +/** + * Extract a meaningful name from throw/await nodes. + * For throw: the constructor or expression type. + * For await: the called function name. + */ +function extractName(kind, node) { + if (kind === 'throw') { + // throw new Error(...) → "Error"; throw x → "x" + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child.type === 'new_expression') return extractNewName(child); + if (child.type === 'call_expression') { + const fn = child.childForFieldName('function'); + return fn ? fn.text : child.text?.split('(')[0] || '?'; + } + if (child.type === 'identifier') return child.text; + } + return truncate(node.text); + } + if (kind === 'await') { + // await fetch(...) → "fetch"; await this.foo() → "this.foo" + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child.type === 'call_expression') { + const fn = child.childForFieldName('function'); + return fn ? fn.text : child.text?.split('(')[0] || '?'; + } + if (child.type === 'identifier' || child.type === 'member_expression') { + return child.text; + } + } + return truncate(node.text); + } + return truncate(node.text); +} + +/** + * Find the narrowest enclosing definition for a given line. + */ +function findParentDef(defs, line) { + let best = null; + for (const def of defs) { + if (def.line <= line && (def.endLine == null || def.endLine >= line)) { + if (!best || def.endLine - def.line < best.endLine - best.line) { + best = def; + } + } + } + return best; +} + +// ─── Build ──────────────────────────────────────────────────────────── + +/** + * Extract AST nodes from parsed files and persist to the ast_nodes table. + * + * @param {object} db - open better-sqlite3 database (read-write) + * @param {Map} fileSymbols - Map + * @param {string} rootDir - absolute project root path + * @param {object} [_engineOpts] - engine options (unused) + */ +export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) { + // Ensure table exists (migration may not have run on older DBs) + let insertStmt; + try { + insertStmt = db.prepare( + 'INSERT INTO ast_nodes (file, line, kind, name, text, receiver, parent_node_id) VALUES (?, ?, ?, ?, ?, ?, ?)', + ); + } catch { + debug('ast_nodes table not found — skipping AST extraction'); + return; + } + + const getNodeId = db.prepare( + 'SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?', + ); + + const tx = db.transaction((rows) => { + for (const r of rows) { + insertStmt.run(r.file, r.line, r.kind, r.name, r.text, r.receiver, r.parentNodeId); + } + }); + + let totalInserted = 0; + + for (const [relPath, symbols] of fileSymbols) { + const rows = []; + const defs = symbols.definitions || []; + + // 1. Call nodes from symbols.calls (all languages) + if (symbols.calls) { + for (const call of symbols.calls) { + const parentDef = findParentDef(defs, call.line); + let parentNodeId = null; + if (parentDef) { + const row = getNodeId.get(parentDef.name, parentDef.kind, relPath, parentDef.line); + if (row) parentNodeId = row.id; + } + rows.push({ + file: relPath, + line: call.line, + kind: 'call', + name: call.name, + text: call.dynamic ? `[dynamic] ${call.name}` : null, + receiver: call.receiver || null, + parentNodeId, + }); + } + } + + // 2. AST walk for JS/TS/TSX — extract new, throw, await, string, regex + const ext = path.extname(relPath).toLowerCase(); + if (WALK_EXTENSIONS.has(ext) && symbols._tree) { + const astRows = []; + walkAst(symbols._tree.rootNode, defs, relPath, astRows, getNodeId); + rows.push(...astRows); + } + + if (rows.length > 0) { + tx(rows); + totalInserted += rows.length; + } + } + + debug(`AST extraction: ${totalInserted} nodes stored`); +} + +/** + * Walk a tree-sitter AST and collect new/throw/await/string/regex nodes. + */ +function walkAst(node, defs, relPath, rows, getNodeId) { + const kind = JS_TS_AST_TYPES[node.type]; + if (kind) { + // tree-sitter lines are 0-indexed, our DB uses 1-indexed + const line = node.startPosition.row + 1; + + let name; + let text = null; + + if (kind === 'new') { + name = extractNewName(node); + text = truncate(node.text); + } else if (kind === 'throw') { + name = extractName('throw', node); + text = extractExpressionText(node); + } else if (kind === 'await') { + name = extractName('await', node); + text = extractExpressionText(node); + } else if (kind === 'string') { + // Skip trivial strings (length < 2 after removing quotes) + const content = node.text?.replace(/^['"`]|['"`]$/g, '') || ''; + if (content.length < 2) { + // Still recurse children + for (let i = 0; i < node.childCount; i++) { + walkAst(node.child(i), defs, relPath, rows, getNodeId); + } + return; + } + name = truncate(content, 100); + text = truncate(node.text); + } else if (kind === 'regex') { + name = node.text || '?'; + text = truncate(node.text); + } + + const parentDef = findParentDef(defs, line); + let parentNodeId = null; + if (parentDef) { + const row = getNodeId.get(parentDef.name, parentDef.kind, relPath, parentDef.line); + if (row) parentNodeId = row.id; + } + + rows.push({ + file: relPath, + line, + kind, + name, + text, + receiver: null, + parentNodeId, + }); + + // Don't recurse into the children of matched nodes for new/throw/await + // (we already extracted what we need, and nested strings inside them are noise) + if (kind !== 'string' && kind !== 'regex') return; + } + + for (let i = 0; i < node.childCount; i++) { + walkAst(node.child(i), defs, relPath, rows, getNodeId); + } +} + +// ─── Query ──────────────────────────────────────────────────────────── + +/** + * Query AST nodes — data-returning function. + * + * @param {string} [pattern] - GLOB pattern for node name (auto-wrapped in *..*) + * @param {string} [customDbPath] - path to graph.db + * @param {object} [opts] + * @returns {{ pattern, kind, count, results, _pagination? }} + */ +export function astQueryData(pattern, customDbPath, opts = {}) { + const db = openReadonlyOrFail(customDbPath); + const { kind, file, noTests, limit, offset } = opts; + + let where = 'WHERE 1=1'; + const params = []; + + // Pattern matching + if (pattern && pattern !== '*') { + // If user already uses wildcards, use as-is; otherwise wrap in *..* for substring + const globPattern = pattern.includes('*') ? pattern : `*${pattern}*`; + where += ' AND a.name GLOB ?'; + params.push(globPattern); + } + + if (kind) { + where += ' AND a.kind = ?'; + params.push(kind); + } + + if (file) { + where += ' AND a.file LIKE ?'; + params.push(`%${file}%`); + } + + if (noTests) { + where += ` AND a.file NOT LIKE '%.test.%' + AND a.file NOT LIKE '%.spec.%' + AND a.file NOT LIKE '%__test__%' + AND a.file NOT LIKE '%__tests__%' + AND a.file NOT LIKE '%.stories.%'`; + } + + const sql = ` + SELECT a.kind, a.name, a.file, a.line, a.text, a.receiver, a.parent_node_id, + p.name AS parent_name, p.kind AS parent_kind, p.file AS parent_file + FROM ast_nodes a + LEFT JOIN nodes p ON a.parent_node_id = p.id + ${where} + ORDER BY a.file, a.line + `; + + const rows = db.prepare(sql).all(...params); + db.close(); + + const results = rows.map((r) => ({ + kind: r.kind, + name: r.name, + file: r.file, + line: r.line, + text: r.text, + receiver: r.receiver, + parent: r.parent_node_id + ? { name: r.parent_name, kind: r.parent_kind, file: r.parent_file } + : null, + })); + + const data = { + pattern: pattern || '*', + kind: kind || null, + count: results.length, + results, + }; + + return paginateResult(data, 'results', { limit, offset }); +} + +/** + * Query AST nodes — display function (human/json/ndjson output). + */ +export function astQuery(pattern, customDbPath, opts = {}) { + const data = astQueryData(pattern, customDbPath, opts); + + if (opts.ndjson) { + printNdjson(data, 'results'); + return; + } + + if (opts.json) { + console.log(JSON.stringify(data, null, 2)); + return; + } + + // Human-readable output + if (data.results.length === 0) { + console.log(`No AST nodes found${pattern ? ` matching "${pattern}"` : ''}.`); + return; + } + + const kindLabel = opts.kind ? ` (kind: ${opts.kind})` : ''; + console.log(`\n${data.count} AST nodes${pattern ? ` matching "${pattern}"` : ''}${kindLabel}:\n`); + + for (const r of data.results) { + const icon = KIND_ICONS[r.kind] || '?'; + const parentInfo = r.parent ? ` (in ${r.parent.name})` : ''; + console.log(` ${icon} ${r.name} -- ${r.file}:${r.line}${parentInfo}`); + } + + if (data._pagination?.hasMore) { + console.log( + `\n ... ${data._pagination.total - data._pagination.offset - data._pagination.returned} more (use --offset ${data._pagination.offset + data._pagination.limit})`, + ); + } + console.log(); +} diff --git a/src/builder.js b/src/builder.js index 6ceec39e..322ac552 100644 --- a/src/builder.js +++ b/src/builder.js @@ -435,7 +435,7 @@ export async function buildGraph(rootDir, opts = {}) { if (isFullBuild) { const deletions = - 'PRAGMA foreign_keys = OFF; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM function_complexity; DELETE FROM dataflow; DELETE FROM nodes; PRAGMA foreign_keys = ON;'; + 'PRAGMA foreign_keys = OFF; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM function_complexity; DELETE FROM dataflow; DELETE FROM ast_nodes; DELETE FROM nodes; PRAGMA foreign_keys = ON;'; db.exec( hasEmbeddings ? `${deletions.replace('PRAGMA foreign_keys = ON;', '')} DELETE FROM embeddings; PRAGMA foreign_keys = ON;` @@ -513,12 +513,19 @@ export async function buildGraph(rootDir, opts = {}) { } catch { deleteDataflowForFile = null; } + let deleteAstNodesForFile; + try { + deleteAstNodesForFile = db.prepare('DELETE FROM ast_nodes WHERE file = ?'); + } catch { + deleteAstNodesForFile = null; + } for (const relPath of removed) { deleteEmbeddingsForFile?.run(relPath); deleteEdgesForFile.run({ f: relPath }); deleteMetricsForFile.run(relPath); deleteComplexityForFile?.run(relPath); deleteDataflowForFile?.run(relPath, relPath); + deleteAstNodesForFile?.run(relPath); deleteNodesForFile.run(relPath); } for (const item of parseChanges) { @@ -528,6 +535,7 @@ export async function buildGraph(rootDir, opts = {}) { deleteMetricsForFile.run(relPath); deleteComplexityForFile?.run(relPath); deleteDataflowForFile?.run(relPath, relPath); + deleteAstNodesForFile?.run(relPath); deleteNodesForFile.run(relPath); } @@ -1129,6 +1137,17 @@ export async function buildGraph(rootDir, opts = {}) { } _t.rolesMs = performance.now() - _t.roles0; + // Always-on AST node extraction (calls, new, string, regex, throw, await) + // Must run before complexity which releases _tree references + _t.ast0 = performance.now(); + try { + const { buildAstNodes } = await import('./ast.js'); + await buildAstNodes(db, allSymbols, rootDir, engineOpts); + } catch (err) { + debug(`AST node extraction failed: ${err.message}`); + } + _t.astMs = performance.now() - _t.ast0; + // Compute per-function complexity metrics (cognitive, cyclomatic, nesting) _t.complexity0 = performance.now(); try { diff --git a/src/cli.js b/src/cli.js index 737ce4ae..882c1c2d 100644 --- a/src/cli.js +++ b/src/cli.js @@ -1071,6 +1071,35 @@ program }); }); +program + .command('ast [pattern]') + .description('Search stored AST nodes (calls, new, string, regex, throw, await) by pattern') + .option('-d, --db ', 'Path to graph.db') + .option('-k, --kind ', 'Filter by AST node kind (call, new, string, regex, throw, await)') + .option('-f, --file ', 'Scope to file (partial match)') + .option('-T, --no-tests', 'Exclude test/spec files from results') + .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') + .option('-j, --json', 'Output as JSON') + .option('--ndjson', 'Newline-delimited JSON output') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .action(async (pattern, opts) => { + const { AST_NODE_KINDS, astQuery } = await import('./ast.js'); + if (opts.kind && !AST_NODE_KINDS.includes(opts.kind)) { + console.error(`Invalid AST kind "${opts.kind}". Valid: ${AST_NODE_KINDS.join(', ')}`); + process.exit(1); + } + astQuery(pattern, opts.db, { + kind: opts.kind, + file: opts.file, + noTests: resolveNoTests(opts), + json: opts.json, + ndjson: opts.ndjson, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + }); + }); + program .command('manifesto') .description('Evaluate manifesto rules (pass/fail verdicts for code health)') diff --git a/src/db.js b/src/db.js index ff31fd39..3e17327e 100644 --- a/src/db.js +++ b/src/db.js @@ -204,6 +204,27 @@ export const MIGRATIONS = [ CREATE INDEX IF NOT EXISTS idx_cfg_edges_tgt ON cfg_edges(target_block_id); `, }, + { + version: 13, + up: ` + CREATE TABLE IF NOT EXISTS ast_nodes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + file TEXT NOT NULL, + line INTEGER NOT NULL, + kind TEXT NOT NULL, + name TEXT NOT NULL, + text TEXT, + receiver TEXT, + parent_node_id INTEGER, + FOREIGN KEY(parent_node_id) REFERENCES nodes(id) + ); + CREATE INDEX IF NOT EXISTS idx_ast_kind ON ast_nodes(kind); + CREATE INDEX IF NOT EXISTS idx_ast_name ON ast_nodes(name); + CREATE INDEX IF NOT EXISTS idx_ast_file ON ast_nodes(file); + CREATE INDEX IF NOT EXISTS idx_ast_parent ON ast_nodes(parent_node_id); + CREATE INDEX IF NOT EXISTS idx_ast_kind_name ON ast_nodes(kind, name); + `, + }, ]; export function getBuildMeta(db, key) { diff --git a/src/index.js b/src/index.js index 8d44699a..f4921d8f 100644 --- a/src/index.js +++ b/src/index.js @@ -5,6 +5,8 @@ * import { buildGraph, queryNameData, findCycles, exportDOT } from 'codegraph'; */ +// AST node queries +export { AST_NODE_KINDS, astQuery, astQueryData } from './ast.js'; // Audit (composite report) export { audit, auditData } from './audit.js'; // Batch querying diff --git a/src/mcp.js b/src/mcp.js index 81cb1b16..38cdbfec 100644 --- a/src/mcp.js +++ b/src/mcp.js @@ -6,6 +6,7 @@ */ import { createRequire } from 'node:module'; +import { AST_NODE_KINDS } from './ast.js'; import { findCycles } from './cycles.js'; import { findDbPath } from './db.js'; import { MCP_DEFAULTS, MCP_MAX_LIMIT } from './paginate.js'; @@ -703,6 +704,28 @@ const BASE_TOOLS = [ }, }, }, + { + name: 'ast_query', + description: + 'Search stored AST nodes (calls, literals, new, throw, await) by pattern. Requires a prior build.', + inputSchema: { + type: 'object', + properties: { + pattern: { + type: 'string', + description: 'GLOB pattern for node name (auto-wrapped in *..* for substring match)', + }, + kind: { + type: 'string', + enum: AST_NODE_KINDS, + description: 'Filter by AST node kind', + }, + file: { type: 'string', description: 'Scope to file (partial match)' }, + no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + ...PAGINATION_PROPS, + }, + }, + }, ]; const LIST_REPOS_TOOL = { @@ -1268,6 +1291,17 @@ export async function startMCPServer(customDbPath, options = {}) { }); break; } + case 'ast_query': { + const { astQueryData } = await import('./ast.js'); + result = astQueryData(args.pattern, dbPath, { + kind: args.kind, + file: args.file, + noTests: args.no_tests, + limit: Math.min(args.limit ?? MCP_DEFAULTS.ast_query, MCP_MAX_LIMIT), + offset: args.offset ?? 0, + }); + break; + } case 'list_repos': { const { listRepos, pruneRegistry } = await import('./registry.js'); pruneRegistry(); diff --git a/src/paginate.js b/src/paginate.js index 8802b65a..5b768993 100644 --- a/src/paginate.js +++ b/src/paginate.js @@ -29,6 +29,7 @@ export const MCP_DEFAULTS = { communities: 20, structure: 30, triage: 20, + ast_query: 50, }; /** Hard cap to prevent abuse via MCP. */ diff --git a/tests/integration/ast.test.js b/tests/integration/ast.test.js new file mode 100644 index 00000000..60cee696 --- /dev/null +++ b/tests/integration/ast.test.js @@ -0,0 +1,234 @@ +/** + * Integration tests for AST node queries. + * + * Uses a hand-crafted in-memory DB with known AST nodes. + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import Database from 'better-sqlite3'; +import { afterAll, beforeAll, describe, expect, test } from 'vitest'; +import { AST_NODE_KINDS, astQueryData } from '../../src/ast.js'; +import { initSchema } from '../../src/db.js'; + +// ─── Helpers ─────────────────────────────────────────────────────────── + +function insertNode(db, name, kind, file, line) { + return db + .prepare('INSERT INTO nodes (name, kind, file, line) VALUES (?, ?, ?, ?)') + .run(name, kind, file, line).lastInsertRowid; +} + +function insertAstNode(db, file, line, kind, name, text, receiver, parentNodeId) { + return db + .prepare( + 'INSERT INTO ast_nodes (file, line, kind, name, text, receiver, parent_node_id) VALUES (?, ?, ?, ?, ?, ?, ?)', + ) + .run(file, line, kind, name, text, receiver, parentNodeId).lastInsertRowid; +} + +// ─── Fixture DB ──────────────────────────────────────────────────────── + +let tmpDir, dbPath; + +beforeAll(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-ast-')); + fs.mkdirSync(path.join(tmpDir, '.codegraph')); + dbPath = path.join(tmpDir, '.codegraph', 'graph.db'); + + const db = new Database(dbPath); + db.pragma('journal_mode = WAL'); + initSchema(db); + + // Insert function nodes + const processId = insertNode(db, 'processInput', 'function', 'src/utils.js', 10); + const loaderId = insertNode(db, 'loadModule', 'function', 'src/loader.js', 5); + const handlerId = insertNode(db, 'handleRequest', 'function', 'src/handler.js', 20); + const defaultsId = insertNode(db, 'defaults', 'function', 'src/config.js', 1); + const testFnId = insertNode(db, 'testUtils', 'function', 'tests/utils.test.js', 1); + + // Calls + insertAstNode(db, 'src/utils.js', 42, 'call', 'eval', null, null, processId); + insertAstNode(db, 'src/loader.js', 8, 'call', 'require', null, null, loaderId); + insertAstNode(db, 'src/handler.js', 25, 'call', 'console.log', null, 'console', handlerId); + insertAstNode(db, 'src/handler.js', 30, 'call', 'console.error', null, 'console', handlerId); + insertAstNode(db, 'src/utils.js', 50, 'call', 'fetch', null, null, processId); + + // new expressions + insertAstNode(db, 'src/handler.js', 30, 'new', 'Error', 'new Error("bad")', null, handlerId); + insertAstNode(db, 'src/loader.js', 12, 'new', 'Map', 'new Map()', null, loaderId); + + // strings + insertAstNode( + db, + 'src/config.js', + 18, + 'string', + 'password123', + '"password123"', + null, + defaultsId, + ); + insertAstNode( + db, + 'src/config.js', + 19, + 'string', + 'localhost:3000', + '"localhost:3000"', + null, + defaultsId, + ); + + // throw + insertAstNode( + db, + 'src/handler.js', + 35, + 'throw', + 'Error', + 'new Error("not found")', + null, + handlerId, + ); + + // await + insertAstNode(db, 'src/utils.js', 55, 'await', 'fetch', 'fetch(url)', null, processId); + + // regex + insertAstNode(db, 'src/utils.js', 60, 'regex', '/\\d+/g', '/\\d+/g', null, processId); + + // Test file nodes (should be excluded by noTests) + insertAstNode(db, 'tests/utils.test.js', 5, 'call', 'eval', null, null, testFnId); + + db.close(); +}); + +afterAll(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +// ─── Tests ───────────────────────────────────────────────────────────── + +describe('AST_NODE_KINDS', () => { + test('exports all expected kinds', () => { + expect(AST_NODE_KINDS).toEqual(['call', 'new', 'string', 'regex', 'throw', 'await']); + }); +}); + +describe('astQueryData', () => { + test('returns all nodes when no pattern given', () => { + const data = astQueryData(undefined, dbPath); + expect(data.count).toBeGreaterThan(0); + expect(data.pattern).toBe('*'); + }); + + test('substring pattern match', () => { + const data = astQueryData('eval', dbPath); + // Should match 'eval' in src/utils.js and tests/utils.test.js + expect(data.results.length).toBeGreaterThanOrEqual(2); + expect(data.results.every((r) => r.name.includes('eval'))).toBe(true); + }); + + test('glob wildcard pattern', () => { + const data = astQueryData('console.*', dbPath); + expect(data.results.length).toBe(2); + expect(data.results.every((r) => r.name.startsWith('console.'))).toBe(true); + }); + + test('exact pattern with star', () => { + const data = astQueryData('*', dbPath); + expect(data.count).toBeGreaterThan(0); + }); + + test('kind filter — call', () => { + const data = astQueryData(undefined, dbPath, { kind: 'call' }); + expect(data.results.every((r) => r.kind === 'call')).toBe(true); + expect(data.results.length).toBeGreaterThanOrEqual(5); + }); + + test('kind filter — string', () => { + const data = astQueryData(undefined, dbPath, { kind: 'string' }); + expect(data.results.every((r) => r.kind === 'string')).toBe(true); + expect(data.results.length).toBe(2); + }); + + test('kind filter — new', () => { + const data = astQueryData(undefined, dbPath, { kind: 'new' }); + expect(data.results.every((r) => r.kind === 'new')).toBe(true); + expect(data.results.length).toBe(2); + }); + + test('kind filter — throw', () => { + const data = astQueryData(undefined, dbPath, { kind: 'throw' }); + expect(data.results.every((r) => r.kind === 'throw')).toBe(true); + expect(data.results.length).toBe(1); + }); + + test('kind filter — await', () => { + const data = astQueryData(undefined, dbPath, { kind: 'await' }); + expect(data.results.every((r) => r.kind === 'await')).toBe(true); + expect(data.results.length).toBe(1); + }); + + test('kind filter — regex', () => { + const data = astQueryData(undefined, dbPath, { kind: 'regex' }); + expect(data.results.every((r) => r.kind === 'regex')).toBe(true); + expect(data.results.length).toBe(1); + }); + + test('file filter', () => { + const data = astQueryData(undefined, dbPath, { file: 'config' }); + expect(data.results.every((r) => r.file.includes('config'))).toBe(true); + expect(data.results.length).toBe(2); + }); + + test('noTests excludes test files', () => { + const withTests = astQueryData('eval', dbPath); + const noTests = astQueryData('eval', dbPath, { noTests: true }); + expect(noTests.results.length).toBeLessThan(withTests.results.length); + expect(noTests.results.every((r) => !r.file.includes('.test.'))).toBe(true); + }); + + test('pagination — limit', () => { + const data = astQueryData(undefined, dbPath, { limit: 3 }); + expect(data.results.length).toBe(3); + expect(data._pagination).toBeDefined(); + expect(data._pagination.total).toBeGreaterThan(3); + expect(data._pagination.hasMore).toBe(true); + }); + + test('pagination — offset', () => { + const page1 = astQueryData(undefined, dbPath, { limit: 3, offset: 0 }); + const page2 = astQueryData(undefined, dbPath, { limit: 3, offset: 3 }); + expect(page1.results[0].name).not.toBe(page2.results[0].name); + }); + + test('parent node resolution', () => { + const data = astQueryData('eval', dbPath, { noTests: true }); + expect(data.results.length).toBe(1); + const r = data.results[0]; + expect(r.parent).toBeDefined(); + expect(r.parent.name).toBe('processInput'); + expect(r.parent.kind).toBe('function'); + }); + + test('receiver field for calls', () => { + const data = astQueryData('console.log', dbPath); + expect(data.results.length).toBe(1); + expect(data.results[0].receiver).toBe('console'); + }); + + test('empty results for non-matching pattern', () => { + const data = astQueryData('nonexistent_xyz', dbPath); + expect(data.results.length).toBe(0); + expect(data.count).toBe(0); + }); + + test('combined kind + file filter', () => { + const data = astQueryData(undefined, dbPath, { kind: 'call', file: 'handler' }); + expect(data.results.every((r) => r.kind === 'call' && r.file.includes('handler'))).toBe(true); + expect(data.results.length).toBe(2); + }); +}); diff --git a/tests/parsers/ast-nodes.test.js b/tests/parsers/ast-nodes.test.js new file mode 100644 index 00000000..d9ca53f7 --- /dev/null +++ b/tests/parsers/ast-nodes.test.js @@ -0,0 +1,185 @@ +/** + * Tests for AST node extraction from parsed source code. + * + * Parses JS fixtures through tree-sitter, runs AST extraction via buildAstNodes, + * and verifies the correct nodes are captured in the DB. + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import Database from 'better-sqlite3'; +import { afterAll, beforeAll, describe, expect, test } from 'vitest'; +import { buildAstNodes } from '../../src/ast.js'; +import { initSchema } from '../../src/db.js'; +import { parseFilesAuto } from '../../src/parser.js'; + +// ─── Fixture ────────────────────────────────────────────────────────── + +const FIXTURE_CODE = ` +export function processData(input) { + const result = new Map(); + const pattern = /^[a-z]+$/i; + const greeting = "hello world"; + + if (typeof input === 'string') { + eval(input); + } + + try { + const data = await fetch('/api/data'); + result.set('data', data); + } catch (err) { + throw new Error('fetch failed'); + } + + console.log(result); + return result; +} + +function helper() { + const re = /\\d{3}-\\d{4}/; + const msg = \`template string value\`; + return msg; +} +`; + +// ─── Setup ──────────────────────────────────────────────────────────── + +let tmpDir, dbPath, db; + +beforeAll(async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-ast-extract-')); + const srcDir = path.join(tmpDir, 'src'); + fs.mkdirSync(srcDir, { recursive: true }); + fs.mkdirSync(path.join(tmpDir, '.codegraph')); + + // Write fixture file + const fixturePath = path.join(srcDir, 'fixture.js'); + fs.writeFileSync(fixturePath, FIXTURE_CODE); + + // Parse fixture using parseFilesAuto (preserves _tree for AST walk) + const allSymbols = await parseFilesAuto([fixturePath], tmpDir, { engine: 'wasm' }); + const symbols = allSymbols.get('src/fixture.js'); + if (!symbols) throw new Error('Failed to parse fixture file'); + + // Create DB and schema + dbPath = path.join(tmpDir, '.codegraph', 'graph.db'); + db = new Database(dbPath); + db.pragma('journal_mode = WAL'); + initSchema(db); + + // Insert nodes for definitions so parent resolution works + const insertNode = db.prepare( + 'INSERT INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)', + ); + for (const def of symbols.definitions) { + insertNode.run(def.name, def.kind, 'src/fixture.js', def.line, def.endLine); + } + + // Build AST nodes + await buildAstNodes(db, allSymbols, tmpDir); +}); + +afterAll(() => { + if (db) db.close(); + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +// ─── Helpers ────────────────────────────────────────────────────────── + +function queryAstNodes(kind) { + return db.prepare('SELECT * FROM ast_nodes WHERE kind = ? ORDER BY line').all(kind); +} + +function queryAllAstNodes() { + return db.prepare('SELECT * FROM ast_nodes ORDER BY line').all(); +} + +// ─── Tests ──────────────────────────────────────────────────────────── + +describe('buildAstNodes — JS extraction', () => { + test('captures call nodes from symbols.calls', () => { + const calls = queryAstNodes('call'); + expect(calls.length).toBeGreaterThanOrEqual(1); + const callNames = calls.map((c) => c.name); + // eval, fetch, console.log should be among calls (depending on parser extraction) + expect(callNames.some((n) => n === 'eval' || n === 'fetch' || n === 'console.log')).toBe(true); + }); + + test('captures new_expression as kind:new', () => { + const nodes = queryAstNodes('new'); + expect(nodes.length).toBeGreaterThanOrEqual(1); + const names = nodes.map((n) => n.name); + expect(names).toContain('Map'); + // Note: `throw new Error(...)` is captured as kind:throw, not kind:new + // The new_expression inside throw is not separately emitted + }); + + test('captures string literals as kind:string', () => { + const nodes = queryAstNodes('string'); + expect(nodes.length).toBeGreaterThanOrEqual(1); + const names = nodes.map((n) => n.name); + // "hello world" should be captured, short strings like 'string' might vary + expect(names.some((n) => n.includes('hello world'))).toBe(true); + }); + + test('skips trivial strings shorter than 2 chars', () => { + const nodes = queryAstNodes('string'); + // No single-char or empty strings should be present + for (const node of nodes) { + expect(node.name.length).toBeGreaterThanOrEqual(2); + } + }); + + test('captures regex as kind:regex', () => { + const nodes = queryAstNodes('regex'); + expect(nodes.length).toBeGreaterThanOrEqual(1); + // At least one regex pattern should be present + expect(nodes.some((n) => n.name.includes('[a-z]') || n.name.includes('\\d'))).toBe(true); + }); + + test('captures throw as kind:throw', () => { + const nodes = queryAstNodes('throw'); + expect(nodes.length).toBeGreaterThanOrEqual(1); + // throw new Error('fetch failed') → name should be "Error" + expect(nodes.some((n) => n.name === 'Error')).toBe(true); + }); + + test('captures await as kind:await', () => { + const nodes = queryAstNodes('await'); + expect(nodes.length).toBeGreaterThanOrEqual(1); + // await fetch('/api/data') → name should include "fetch" + expect(nodes.some((n) => n.name.includes('fetch'))).toBe(true); + }); + + test('parent_node_id is resolved for nodes inside functions', () => { + const all = queryAllAstNodes(); + const withParent = all.filter((n) => n.parent_node_id != null); + expect(withParent.length).toBeGreaterThan(0); + + // Verify the parent exists in the nodes table + for (const node of withParent) { + const parent = db.prepare('SELECT * FROM nodes WHERE id = ?').get(node.parent_node_id); + expect(parent).toBeDefined(); + expect(['function', 'method', 'class']).toContain(parent.kind); + } + }); + + test('all inserted nodes have valid kinds', () => { + const all = queryAllAstNodes(); + const validKinds = new Set(['call', 'new', 'string', 'regex', 'throw', 'await']); + for (const node of all) { + expect(validKinds.has(node.kind)).toBe(true); + } + }); + + test('text column is truncated to max length', () => { + const all = queryAllAstNodes(); + for (const node of all) { + if (node.text) { + expect(node.text.length).toBeLessThanOrEqual(201); // 200 + possible ellipsis char + } + } + }); +}); diff --git a/tests/unit/mcp.test.js b/tests/unit/mcp.test.js index 7d14bffc..e0b309f7 100644 --- a/tests/unit/mcp.test.js +++ b/tests/unit/mcp.test.js @@ -39,6 +39,7 @@ const ALL_TOOL_NAMES = [ 'cfg', 'dataflow', 'check', + 'ast_query', 'list_repos', ]; From 8341c59fb58cfbad4e644267c88b323906a715eb Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 21:17:37 -0700 Subject: [PATCH 06/12] fix: correct misleading comment for break without enclosing loop/switch The comment incorrectly suggested this code path handled break inside switch cases. It actually handles break with no enclosing loop/switch context (invalid syntax) as a no-op. Impact: 2 functions changed, 9 affected --- src/cfg.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cfg.js b/src/cfg.js index 0e6e49be..c9f7dd0f 100644 --- a/src/cfg.js +++ b/src/cfg.js @@ -236,7 +236,7 @@ export function buildFunctionCFG(functionNode, langId) { addEdge(currentBlock, target, 'break'); return null; // path terminated } - // break outside loop (switch case) — just continue + // break with no enclosing loop/switch — treat as no-op return currentBlock; } From 0809f67a711b679c2cf3fbaf150dd2b29d603c94 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 21:43:41 -0700 Subject: [PATCH 07/12] feat: add `exports ` command for per-symbol consumer analysis Show all exported symbols of a file with their consumers (who calls each export from other files), re-export detection, and counts. Available as CLI command, MCP tool (file_exports), batch command, and programmatic API (exportsData/fileExports). Impact: 5 functions changed, 8 affected --- src/batch.js | 2 + src/cli.js | 21 ++++ src/index.js | 2 + src/mcp.js | 22 +++++ src/paginate.js | 1 + src/queries.js | 154 +++++++++++++++++++++++++++++ tests/integration/exports.test.js | 157 ++++++++++++++++++++++++++++++ 7 files changed, 359 insertions(+) create mode 100644 tests/integration/exports.test.js diff --git a/src/batch.js b/src/batch.js index 2a703a3c..17494dc0 100644 --- a/src/batch.js +++ b/src/batch.js @@ -11,6 +11,7 @@ import { flowData } from './flow.js'; import { contextData, explainData, + exportsData, fileDepsData, fnDepsData, fnImpactData, @@ -34,6 +35,7 @@ export const BATCH_COMMANDS = { query: { fn: fnDepsData, sig: 'name' }, impact: { fn: impactAnalysisData, sig: 'file' }, deps: { fn: fileDepsData, sig: 'file' }, + exports: { fn: exportsData, sig: 'file' }, flow: { fn: flowData, sig: 'name' }, dataflow: { fn: dataflowData, sig: 'name' }, complexity: { fn: complexityData, sig: 'dbOnly' }, diff --git a/src/cli.js b/src/cli.js index 882c1c2d..54fa84d5 100644 --- a/src/cli.js +++ b/src/cli.js @@ -26,6 +26,7 @@ import { EVERY_SYMBOL_KIND, explain, fileDeps, + fileExports, fnDeps, fnImpact, impactAnalysis, @@ -224,6 +225,26 @@ program }); }); +program + .command('exports ') + .description('Show exported symbols with per-symbol consumers') + .option('-d, --db ', 'Path to graph.db') + .option('-T, --no-tests', 'Exclude test/spec files from results') + .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') + .option('-j, --json', 'Output as JSON') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .option('--ndjson', 'Newline-delimited JSON output') + .action((file, opts) => { + fileExports(file, opts.db, { + noTests: resolveNoTests(opts), + json: opts.json, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + ndjson: opts.ndjson, + }); + }); + program .command('fn-impact ') .description('Function-level impact: what functions break if this one changes') diff --git a/src/index.js b/src/index.js index f4921d8f..14d59542 100644 --- a/src/index.js +++ b/src/index.js @@ -130,9 +130,11 @@ export { EVERY_SYMBOL_KIND, EXTENDED_SYMBOL_KINDS, explainData, + exportsData, FALSE_POSITIVE_CALLER_THRESHOLD, FALSE_POSITIVE_NAMES, fileDepsData, + fileExports, fnDepsData, fnImpactData, impactAnalysisData, diff --git a/src/mcp.js b/src/mcp.js index 38cdbfec..55aa8958 100644 --- a/src/mcp.js +++ b/src/mcp.js @@ -83,6 +83,20 @@ const BASE_TOOLS = [ required: ['file'], }, }, + { + name: 'file_exports', + description: + 'Show exported symbols of a file with per-symbol consumers — who calls each export and from where', + inputSchema: { + type: 'object', + properties: { + file: { type: 'string', description: 'File path (partial match supported)' }, + no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + ...PAGINATION_PROPS, + }, + required: ['file'], + }, + }, { name: 'impact_analysis', description: 'Show files affected by changes to a given file (transitive)', @@ -795,6 +809,7 @@ export async function startMCPServer(customDbPath, options = {}) { impactAnalysisData, moduleMapData, fileDepsData, + exportsData, fnDepsData, fnImpactData, pathData, @@ -885,6 +900,13 @@ export async function startMCPServer(customDbPath, options = {}) { offset: args.offset ?? 0, }); break; + case 'file_exports': + result = exportsData(args.file, dbPath, { + noTests: args.no_tests, + limit: Math.min(args.limit ?? MCP_DEFAULTS.file_exports, MCP_MAX_LIMIT), + offset: args.offset ?? 0, + }); + break; case 'impact_analysis': result = impactAnalysisData(args.file, dbPath, { noTests: args.no_tests, diff --git a/src/paginate.js b/src/paginate.js index 5b768993..09cc03b7 100644 --- a/src/paginate.js +++ b/src/paginate.js @@ -18,6 +18,7 @@ export const MCP_DEFAULTS = { context: 5, explain: 10, file_deps: 20, + file_exports: 20, diff_impact: 30, impact_analysis: 20, semantic_search: 20, diff --git a/src/queries.js b/src/queries.js index 6d094108..2cff1357 100644 --- a/src/queries.js +++ b/src/queries.js @@ -3233,3 +3233,157 @@ export function diffImpact(customDbPath, opts = {}) { console.log(`${summaryLine}\n`); } } + +// ─── File Exports ─────────────────────────────────────────────────────── + +function exportsFileImpl(db, file, noTests, getFileLines) { + const fileNodes = db + .prepare(`SELECT * FROM nodes WHERE file LIKE ? AND kind = 'file'`) + .all(`%${file}%`); + if (fileNodes.length === 0) return []; + + return fileNodes.map((fn) => { + const symbols = db + .prepare(`SELECT * FROM nodes WHERE file = ? AND kind != 'file' ORDER BY line`) + .all(fn.file); + + // IDs of symbols that have incoming calls from other files (exported) + const exportedIds = new Set( + db + .prepare( + `SELECT DISTINCT e.target_id FROM edges e + JOIN nodes caller ON e.source_id = caller.id + JOIN nodes target ON e.target_id = target.id + WHERE target.file = ? AND caller.file != ? AND e.kind = 'calls'`, + ) + .all(fn.file, fn.file) + .map((r) => r.target_id), + ); + + const fileLines = getFileLines(fn.file); + + const exported = symbols.filter((s) => exportedIds.has(s.id)); + const internal = symbols.filter((s) => !exportedIds.has(s.id)); + + const results = exported.map((s) => { + let consumers = db + .prepare( + `SELECT n.name, n.kind, n.file, n.line FROM edges e + JOIN nodes n ON e.source_id = n.id + WHERE e.target_id = ? AND e.kind = 'calls' AND n.file != ?`, + ) + .all(s.id, fn.file); + if (noTests) consumers = consumers.filter((c) => !isTestFile(c.file)); + + return { + name: s.name, + kind: s.kind, + line: s.line, + endLine: s.end_line || null, + role: s.role || null, + signature: fileLines ? extractSignature(fileLines, s.line) : null, + summary: fileLines ? extractSummary(fileLines, s.line) : null, + consumers: consumers.map((c) => ({ + name: c.name, + kind: c.kind, + file: c.file, + line: c.line, + })), + consumerCount: consumers.length, + }; + }); + + // Re-exports: files that re-export this file + const reexports = db + .prepare( + `SELECT DISTINCT n.file FROM edges e + JOIN nodes n ON e.source_id = n.id + WHERE e.target_id = ? AND e.kind = 'reexports'`, + ) + .all(fn.id) + .map((r) => ({ file: r.file })); + + return { + file: fn.file, + results, + reexports, + totalExported: exported.length, + totalInternal: internal.length, + }; + }); +} + +export function exportsData(file, customDbPath, opts = {}) { + const db = openReadonlyOrFail(customDbPath); + const noTests = opts.noTests || false; + + const dbPath = findDbPath(customDbPath); + const repoRoot = path.resolve(path.dirname(dbPath), '..'); + + const fileCache = new Map(); + function getFileLines(f) { + if (fileCache.has(f)) return fileCache.get(f); + try { + const absPath = safePath(repoRoot, f); + if (!absPath) { + fileCache.set(f, null); + return null; + } + const lines = fs.readFileSync(absPath, 'utf-8').split('\n'); + fileCache.set(f, lines); + return lines; + } catch (e) { + debug(`getFileLines failed for ${f}: ${e.message}`); + fileCache.set(f, null); + return null; + } + } + + const all = exportsFileImpl(db, file, noTests, getFileLines); + db.close(); + + // Single-file command: take first match + const match = + all.length > 0 + ? all[0] + : { file, results: [], reexports: [], totalExported: 0, totalInternal: 0 }; + return paginateResult(match, 'results', { limit: opts.limit, offset: opts.offset }); +} + +export function fileExports(file, customDbPath, opts = {}) { + const data = exportsData(file, customDbPath, opts); + if (opts.ndjson) { + printNdjson(data, 'results'); + return; + } + if (opts.json) { + console.log(JSON.stringify(data, null, 2)); + return; + } + if (data.results.length === 0 && data.totalExported === 0 && data.totalInternal === 0) { + console.log(`No file matching "${file}" in graph`); + return; + } + + console.log( + `\n# ${data.file} — ${data.totalExported} exported, ${data.totalInternal} internal\n`, + ); + + for (const s of data.results) { + const sig = s.signature?.params != null ? `(${s.signature.params})` : ''; + const roleTag = s.role ? ` [${s.role}]` : ''; + console.log(` ${kindIcon(s.kind)} ${s.name}${sig}${roleTag} :${s.line}`); + if (s.consumers.length > 0) { + for (const c of s.consumers) { + console.log(` <- ${c.name} ${c.file}:${c.line}`); + } + } else { + console.log(' (no consumers)'); + } + } + + if (data.reexports.length > 0) { + console.log(`\n Re-exports: ${data.reexports.map((r) => r.file).join(', ')}`); + } + console.log(); +} diff --git a/tests/integration/exports.test.js b/tests/integration/exports.test.js new file mode 100644 index 00000000..0088c91e --- /dev/null +++ b/tests/integration/exports.test.js @@ -0,0 +1,157 @@ +/** + * Integration tests for the `exports` command (exportsData). + * + * Test graph: + * + * Files: lib.js, app.js, barrel.js, lib.test.js + * + * Symbols in lib.js: add (function, line 1), multiply (function, line 10), helper (function, line 20) + * Symbols in app.js: main (function, line 1) + * Symbols in lib.test.js: testAdd (function, line 1) + * + * Call edges: + * main → add (cross-file) + * main → multiply (cross-file) + * add → helper (same-file, internal) + * testAdd → add (cross-file, from test) + * + * Reexport edge: + * barrel.js → lib.js (kind: 'reexports') + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import Database from 'better-sqlite3'; +import { afterAll, beforeAll, describe, expect, test } from 'vitest'; +import { initSchema } from '../../src/db.js'; +import { exportsData } from '../../src/queries.js'; + +// ─── Helpers ─────────────────────────────────────────────────────────── + +function insertNode(db, name, kind, file, line) { + return db + .prepare('INSERT INTO nodes (name, kind, file, line) VALUES (?, ?, ?, ?)') + .run(name, kind, file, line).lastInsertRowid; +} + +function insertEdge(db, sourceId, targetId, kind, confidence = 1.0) { + db.prepare( + 'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, ?, 0)', + ).run(sourceId, targetId, kind, confidence); +} + +// ─── Fixture DB ──────────────────────────────────────────────────────── + +let tmpDir, dbPath; + +beforeAll(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-exports-')); + fs.mkdirSync(path.join(tmpDir, '.codegraph')); + dbPath = path.join(tmpDir, '.codegraph', 'graph.db'); + + const db = new Database(dbPath); + db.pragma('journal_mode = WAL'); + initSchema(db); + + // File nodes + const fLib = insertNode(db, 'lib.js', 'file', 'lib.js', 0); + const fApp = insertNode(db, 'app.js', 'file', 'app.js', 0); + const fBarrel = insertNode(db, 'barrel.js', 'file', 'barrel.js', 0); + const fTest = insertNode(db, 'lib.test.js', 'file', 'lib.test.js', 0); + + // Function nodes in lib.js + const add = insertNode(db, 'add', 'function', 'lib.js', 1); + const multiply = insertNode(db, 'multiply', 'function', 'lib.js', 10); + const helper = insertNode(db, 'helper', 'function', 'lib.js', 20); + + // Function nodes in app.js + const main = insertNode(db, 'main', 'function', 'app.js', 1); + + // Function nodes in lib.test.js + const testAdd = insertNode(db, 'testAdd', 'function', 'lib.test.js', 1); + + // Import edges + insertEdge(db, fApp, fLib, 'imports'); + insertEdge(db, fTest, fLib, 'imports'); + + // Call edges + insertEdge(db, main, add, 'calls'); // cross-file: app.js → lib.js + insertEdge(db, main, multiply, 'calls'); // cross-file: app.js → lib.js + insertEdge(db, add, helper, 'calls'); // same-file: lib.js internal + insertEdge(db, testAdd, add, 'calls'); // cross-file: test → lib.js + + // Reexport edge: barrel.js re-exports lib.js + insertEdge(db, fBarrel, fLib, 'reexports'); + + db.close(); +}); + +afterAll(() => { + if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +// ─── Tests ───────────────────────────────────────────────────────────── + +describe('exportsData', () => { + test('returns exported symbols with consumers', () => { + const data = exportsData('lib.js', dbPath); + expect(data.file).toBe('lib.js'); + expect(data.results.length).toBe(2); // add, multiply + + const addExport = data.results.find((r) => r.name === 'add'); + expect(addExport).toBeDefined(); + expect(addExport.kind).toBe('function'); + expect(addExport.line).toBe(1); + // main and testAdd both call add from other files + expect(addExport.consumers.length).toBe(2); + expect(addExport.consumers.map((c) => c.name).sort()).toEqual(['main', 'testAdd']); + + const mulExport = data.results.find((r) => r.name === 'multiply'); + expect(mulExport).toBeDefined(); + expect(mulExport.consumers.length).toBe(1); + expect(mulExport.consumers[0].name).toBe('main'); + + // helper is internal (same-file caller only) + const helperExport = data.results.find((r) => r.name === 'helper'); + expect(helperExport).toBeUndefined(); + }); + + test('totalExported and totalInternal counts', () => { + const data = exportsData('lib.js', dbPath); + expect(data.totalExported).toBe(2); + expect(data.totalInternal).toBe(1); // helper + }); + + test('reexports detected', () => { + const data = exportsData('lib.js', dbPath); + expect(data.reexports.length).toBe(1); + expect(data.reexports[0].file).toBe('barrel.js'); + }); + + test('noTests filters test consumers', () => { + const data = exportsData('lib.js', dbPath, { noTests: true }); + const addExport = data.results.find((r) => r.name === 'add'); + expect(addExport).toBeDefined(); + // testAdd from lib.test.js should be filtered out + expect(addExport.consumers.length).toBe(1); + expect(addExport.consumers[0].name).toBe('main'); + expect(addExport.consumerCount).toBe(1); + }); + + test('empty result for unknown file', () => { + const data = exportsData('nonexistent.js', dbPath); + expect(data.results).toEqual([]); + expect(data.totalExported).toBe(0); + expect(data.totalInternal).toBe(0); + }); + + test('pagination works', () => { + const data = exportsData('lib.js', dbPath, { limit: 1 }); + expect(data.results.length).toBe(1); + expect(data._pagination).toBeDefined(); + expect(data._pagination.total).toBe(2); + expect(data._pagination.hasMore).toBe(true); + expect(data._pagination.returned).toBe(1); + }); +}); From 96273f377c5ea62e22cce583f787c8ad91f7619e Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 22:27:10 -0700 Subject: [PATCH 08/12] refactor: consolidate CLI by removing 5 redundant commands - Remove `batch-query` (use `batch where` instead) - Fold `hotspots` into `triage --level file|directory` - Merge `manifesto` into `check` (no args = manifesto, --rules for both) - Replace `explain` with `audit --quick` - Add standalone `path `, deprecate `query --path` All data functions, MCP tools, and programmatic API unchanged. --- src/cli.js | 276 +++++++++++++++----------------- tests/integration/batch.test.js | 37 ----- tests/integration/cli.test.js | 51 +++++- 3 files changed, 177 insertions(+), 187 deletions(-) diff --git a/src/cli.js b/src/cli.js index 500031f7..b557c500 100644 --- a/src/cli.js +++ b/src/cli.js @@ -4,7 +4,7 @@ import fs from 'node:fs'; import path from 'node:path'; import { Command } from 'commander'; import { audit } from './audit.js'; -import { BATCH_COMMANDS, batch, batchQuery, multiBatchData, splitTargets } from './batch.js'; +import { BATCH_COMMANDS, batch, multiBatchData, splitTargets } from './batch.js'; import { buildGraph } from './builder.js'; import { loadConfig } from './config.js'; import { findCycles, formatCycles } from './cycles.js'; @@ -142,6 +142,7 @@ program process.exit(1); } if (opts.path) { + console.error('Note: "query --path" is deprecated, use "codegraph path " instead'); symbolPath(name, opts.path, opts.db, { maxDepth: opts.depth ? parseInt(opts.depth, 10) : 10, edgeKinds: opts.kinds ? opts.kinds.split(',').map((s) => s.trim()) : undefined, @@ -166,6 +167,39 @@ program } }); +program + .command('path ') + .description('Find shortest path between two symbols') + .option('-d, --db ', 'Path to graph.db') + .option('--reverse', 'Follow edges backward') + .option('--kinds ', 'Comma-separated edge kinds to follow (default: calls)') + .option('--from-file ', 'Disambiguate source symbol by file') + .option('--to-file ', 'Disambiguate target symbol by file') + .option('--depth ', 'Max traversal depth', '10') + .option('-k, --kind ', 'Filter to a specific symbol kind') + .option('-T, --no-tests', 'Exclude test/spec files from results') + .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') + .option('-j, --json', 'Output as JSON') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .option('--ndjson', 'Newline-delimited JSON output') + .action((from, to, opts) => { + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); + process.exit(1); + } + symbolPath(from, to, opts.db, { + maxDepth: opts.depth ? parseInt(opts.depth, 10) : 10, + edgeKinds: opts.kinds ? opts.kinds.split(',').map((s) => s.trim()) : undefined, + reverse: opts.reverse, + fromFile: opts.fromFile, + toFile: opts.toFile, + kind: opts.kind, + noTests: resolveNoTests(opts), + json: opts.json, + }); + }); + program .command('impact ') .description('Show what depends on this file (transitive)') @@ -341,43 +375,36 @@ program }); }); -program - .command('explain ') - .description('Structural summary of a file or function (no LLM needed)') - .option('-d, --db ', 'Path to graph.db') - .option('--depth ', 'Recursively explain dependencies up to N levels deep', '0') - .option('-T, --no-tests', 'Exclude test/spec files from results') - .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') - .option('-j, --json', 'Output as JSON') - .option('--limit ', 'Max results to return') - .option('--offset ', 'Skip N results (default: 0)') - .option('--ndjson', 'Newline-delimited JSON output') - .action((target, opts) => { - explain(target, opts.db, { - depth: parseInt(opts.depth, 10), - noTests: resolveNoTests(opts), - json: opts.json, - limit: opts.limit ? parseInt(opts.limit, 10) : undefined, - offset: opts.offset ? parseInt(opts.offset, 10) : undefined, - ndjson: opts.ndjson, - }); - }); - program .command('audit ') .description('Composite report: explain + impact + health metrics per function') .option('-d, --db ', 'Path to graph.db') - .option('--depth ', 'Impact analysis depth', '3') + .option('--quick', 'Structural summary only (skip impact analysis and health metrics)') + .option('--depth ', 'Impact/explain depth', '3') .option('-f, --file ', 'Scope to file (partial match)') .option('-k, --kind ', 'Filter by symbol kind') .option('-T, --no-tests', 'Exclude test/spec files from results') .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .option('-j, --json', 'Output as JSON') + .option('--limit ', 'Max results to return (quick mode)') + .option('--offset ', 'Skip N results (quick mode)') + .option('--ndjson', 'Newline-delimited JSON output (quick mode)') .action((target, opts) => { if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } + if (opts.quick) { + explain(target, opts.db, { + depth: parseInt(opts.depth, 10), + noTests: resolveNoTests(opts), + json: opts.json, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + ndjson: opts.ndjson, + }); + return; + } audit(target, opts.db, { depth: parseInt(opts.depth, 10), file: opts.file, @@ -443,18 +470,48 @@ program program .command('check [ref]') - .description('Run validation predicates against git changes (CI gate)') + .description( + 'CI gate: run manifesto rules (no args), diff predicates (with ref/--staged), or both (--rules)', + ) .option('-d, --db ', 'Path to graph.db') .option('--staged', 'Analyze staged changes') + .option('--rules', 'Also run manifesto rules alongside diff predicates') .option('--cycles', 'Assert no dependency cycles involve changed files') .option('--blast-radius ', 'Assert no function exceeds N transitive callers') .option('--signatures', 'Assert no function declaration lines were modified') .option('--boundaries', 'Assert no cross-owner boundary violations') .option('--depth ', 'Max BFS depth for blast radius (default: 3)') + .option('-f, --file ', 'Scope to file (partial match, manifesto mode)') + .option('-k, --kind ', 'Filter by symbol kind (manifesto mode)') .option('-T, --no-tests', 'Exclude test/spec files from results') .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .option('-j, --json', 'Output as JSON') + .option('--limit ', 'Max results to return (manifesto mode)') + .option('--offset ', 'Skip N results (manifesto mode)') + .option('--ndjson', 'Newline-delimited JSON output (manifesto mode)') .action(async (ref, opts) => { + const isDiffMode = ref || opts.staged; + + if (!isDiffMode && !opts.rules) { + // No ref, no --staged → run manifesto rules on whole codebase + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); + process.exit(1); + } + const { manifesto } = await import('./manifesto.js'); + manifesto(opts.db, { + file: opts.file, + kind: opts.kind, + noTests: resolveNoTests(opts), + json: opts.json, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + ndjson: opts.ndjson, + }); + return; + } + + // Diff predicates mode const { check } = await import('./check.js'); check(opts.db, { ref, @@ -467,6 +524,24 @@ program noTests: resolveNoTests(opts), json: opts.json, }); + + // If --rules, also run manifesto after diff predicates + if (opts.rules) { + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); + process.exit(1); + } + const { manifesto } = await import('./manifesto.js'); + manifesto(opts.db, { + file: opts.file, + kind: opts.kind, + noTests: resolveNoTests(opts), + json: opts.json, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + ndjson: opts.ndjson, + }); + } }); // ─── New commands ──────────────────────────────────────────────────────── @@ -925,38 +1000,6 @@ program } }); -program - .command('hotspots') - .description( - 'Find structural hotspots: files or directories with extreme fan-in, fan-out, or symbol density', - ) - .option('-d, --db ', 'Path to graph.db') - .option('-n, --limit ', 'Number of results', '10') - .option('--metric ', 'fan-in | fan-out | density | coupling', 'fan-in') - .option('--level ', 'file | directory', 'file') - .option('-T, --no-tests', 'Exclude test/spec files from results') - .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') - .option('-j, --json', 'Output as JSON') - .option('--offset ', 'Skip N results (default: 0)') - .option('--ndjson', 'Newline-delimited JSON output') - .action(async (opts) => { - const { hotspotsData, formatHotspots } = await import('./structure.js'); - const data = hotspotsData(opts.db, { - metric: opts.metric, - level: opts.level, - limit: parseInt(opts.limit, 10), - offset: opts.offset ? parseInt(opts.offset, 10) : undefined, - noTests: resolveNoTests(opts), - }); - if (opts.ndjson) { - printNdjson(data, 'hotspots'); - } else if (opts.json) { - console.log(JSON.stringify(data, null, 2)); - } else { - console.log(formatHotspots(data)); - } - }); - program .command('roles') .description('Show node role classification: entry, core, utility, adapter, dead, leaf') @@ -1226,35 +1269,6 @@ program }); }); -program - .command('manifesto') - .description('Evaluate manifesto rules (pass/fail verdicts for code health)') - .option('-d, --db ', 'Path to graph.db') - .option('-T, --no-tests', 'Exclude test/spec files from results') - .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') - .option('-f, --file ', 'Scope to file (partial match)') - .option('-k, --kind ', 'Filter by symbol kind') - .option('-j, --json', 'Output as JSON') - .option('--limit ', 'Max results to return') - .option('--offset ', 'Skip N results (default: 0)') - .option('--ndjson', 'Newline-delimited JSON output') - .action(async (opts) => { - if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); - process.exit(1); - } - const { manifesto } = await import('./manifesto.js'); - manifesto(opts.db, { - file: opts.file, - kind: opts.kind, - noTests: resolveNoTests(opts), - json: opts.json, - limit: opts.limit ? parseInt(opts.limit, 10) : undefined, - offset: opts.offset ? parseInt(opts.offset, 10) : undefined, - ndjson: opts.ndjson, - }); - }); - program .command('communities') .description('Detect natural module boundaries using Louvain community detection') @@ -1289,7 +1303,16 @@ program ) .option('-d, --db ', 'Path to graph.db') .option('-n, --limit ', 'Max results to return', '20') - .option('--sort ', 'Sort metric: risk | complexity | churn | fan-in | mi', 'risk') + .option( + '--level ', + 'Granularity: function (default) | file | directory. File/directory level shows hotspots', + 'function', + ) + .option( + '--sort ', + 'Sort metric: risk | complexity | churn | fan-in | mi (function level); fan-in | fan-out | density | coupling (file/directory level)', + 'risk', + ) .option('--min-score ', 'Only show symbols with risk score >= threshold') .option('--role ', 'Filter by role (entry, core, utility, adapter, leaf, dead)') .option('-f, --file ', 'Scope to a specific file (partial match)') @@ -1301,6 +1324,27 @@ program .option('--ndjson', 'Newline-delimited JSON output') .option('--weights ', 'Custom weights JSON (e.g. \'{"fanIn":1,"complexity":0}\')') .action(async (opts) => { + if (opts.level === 'file' || opts.level === 'directory') { + // Delegate to hotspots for file/directory level + const { hotspotsData, formatHotspots } = await import('./structure.js'); + const metric = opts.sort === 'risk' ? 'fan-in' : opts.sort; + const data = hotspotsData(opts.db, { + metric, + level: opts.level, + limit: parseInt(opts.limit, 10), + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + noTests: resolveNoTests(opts), + }); + if (opts.ndjson) { + printNdjson(data, 'hotspots'); + } else if (opts.json) { + console.log(JSON.stringify(data, null, 2)); + } else { + console.log(formatHotspots(data)); + } + return; + } + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); @@ -1513,62 +1557,4 @@ program } }); -program - .command('batch-query [targets...]') - .description( - `Batch symbol lookup — resolve multiple references in one call.\nDefaults to 'where' command. Accepts comma-separated targets.\nValid commands: ${Object.keys(BATCH_COMMANDS).join(', ')}`, - ) - .option('-d, --db ', 'Path to graph.db') - .option('-c, --command ', 'Query command to run (default: where)', 'where') - .option('--from-file ', 'Read targets from file (JSON array or newline-delimited)') - .option('--stdin', 'Read targets from stdin (JSON array)') - .option('--depth ', 'Traversal depth passed to underlying command') - .option('-f, --file ', 'Scope to file (partial match)') - .option('-k, --kind ', 'Filter by symbol kind') - .option('-T, --no-tests', 'Exclude test/spec files from results') - .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') - .action(async (positionalTargets, opts) => { - if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); - process.exit(1); - } - - let targets; - try { - if (opts.fromFile) { - const raw = fs.readFileSync(opts.fromFile, 'utf-8').trim(); - if (raw.startsWith('[')) { - targets = JSON.parse(raw); - } else { - targets = raw.split(/\r?\n/).filter(Boolean); - } - } else if (opts.stdin) { - const chunks = []; - for await (const chunk of process.stdin) chunks.push(chunk); - const raw = Buffer.concat(chunks).toString('utf-8').trim(); - targets = raw.startsWith('[') ? JSON.parse(raw) : raw.split(/\r?\n/).filter(Boolean); - } else { - targets = splitTargets(positionalTargets); - } - } catch (err) { - console.error(`Failed to parse targets: ${err.message}`); - process.exit(1); - } - - if (!targets || targets.length === 0) { - console.error('No targets provided. Pass targets as arguments, --from-file, or --stdin.'); - process.exit(1); - } - - const batchOpts = { - command: opts.command, - depth: opts.depth ? parseInt(opts.depth, 10) : undefined, - file: opts.file, - kind: opts.kind, - noTests: resolveNoTests(opts), - }; - - batchQuery(targets, opts.db, batchOpts); - }); - program.parse(); diff --git a/tests/integration/batch.test.js b/tests/integration/batch.test.js index 85d7775d..a4c5e6db 100644 --- a/tests/integration/batch.test.js +++ b/tests/integration/batch.test.js @@ -327,40 +327,3 @@ describe('multiBatchData', () => { } }); }); - -// ─── batch-query CLI ────────────────────────────────────────────────── - -describe('batch-query CLI', () => { - const cliPath = path.resolve( - path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/i, '$1')), - '../../src/cli.js', - ); - - test('comma-separated targets default to where command', () => { - const out = execFileSync( - 'node', - [cliPath, 'batch-query', 'authenticate,validateToken', '--db', dbPath], - { encoding: 'utf-8', timeout: 30_000 }, - ); - const parsed = JSON.parse(out); - expect(parsed.command).toBe('where'); - expect(parsed.total).toBe(2); - expect(parsed.results).toHaveLength(2); - expect(parsed.results.map((r) => r.target)).toEqual(['authenticate', 'validateToken']); - for (const r of parsed.results) { - expect(r.ok).toBe(true); - } - }); - - test('--command override works', () => { - const out = execFileSync( - 'node', - [cliPath, 'batch-query', 'authenticate', '--command', 'fn-impact', '--db', dbPath], - { encoding: 'utf-8', timeout: 30_000 }, - ); - const parsed = JSON.parse(out); - expect(parsed.command).toBe('fn-impact'); - expect(parsed.total).toBe(1); - expect(parsed.results[0].ok).toBe(true); - }); -}); diff --git a/tests/integration/cli.test.js b/tests/integration/cli.test.js index c225ae19..a366cd8c 100644 --- a/tests/integration/cli.test.js +++ b/tests/integration/cli.test.js @@ -160,21 +160,62 @@ describe('CLI smoke tests', () => { expect(data).toHaveProperty('count'); }); - // ─── Hotspots ────────────────────────────────────────────────────── - test('hotspots --json returns valid JSON with hotspots', () => { - const out = run('hotspots', '--db', dbPath, '--json'); + // ─── Triage --level (formerly hotspots) ───────────────────────────── + test('triage --level file --json returns valid JSON with hotspots', () => { + const out = run('triage', '--level', 'file', '--db', dbPath, '--json'); const data = JSON.parse(out); expect(data).toHaveProperty('hotspots'); expect(data).toHaveProperty('metric'); expect(data).toHaveProperty('level'); }); - test('hotspots --level directory returns directory hotspots', () => { - const out = run('hotspots', '--db', dbPath, '--level', 'directory', '--json'); + test('triage --level directory --json returns directory hotspots', () => { + const out = run('triage', '--level', 'directory', '--db', dbPath, '--json'); const data = JSON.parse(out); expect(data.level).toBe('directory'); }); + // ─── Audit --quick (formerly explain) ────────────────────────────── + test('audit --quick --json returns structural summary', () => { + const out = run('audit', 'math.js', '--quick', '--db', dbPath, '--json'); + const data = JSON.parse(out); + expect(data).toHaveProperty('target'); + }); + + // ─── Path (standalone) ───────────────────────────────────────────── + test('path --json returns valid JSON with path info', () => { + const out = run('path', 'sumOfSquares', 'add', '--db', dbPath, '--json'); + const data = JSON.parse(out); + expect(data).toHaveProperty('found'); + expect(data).toHaveProperty('path'); + expect(data).toHaveProperty('hops'); + }); + + // ─── Query --path deprecation ────────────────────────────────────── + test('query --path prints deprecation warning to stderr', () => { + const { spawnSync } = require('node:child_process'); + const result = spawnSync( + 'node', + [CLI, 'query', 'sumOfSquares', '--path', 'add', '--db', dbPath, '--json'], + { + cwd: tmpDir, + encoding: 'utf-8', + timeout: 30_000, + env: { ...process.env, HOME: tmpHome, USERPROFILE: tmpHome }, + }, + ); + expect(result.stderr).toContain('deprecated'); + }); + + // ─── Check (manifesto mode) ──────────────────────────────────────── + test('check --json with no ref/staged runs manifesto rules', () => { + const out = run('check', '--db', dbPath, '--json'); + const data = JSON.parse(out); + expect(data).toHaveProperty('rules'); + expect(data).toHaveProperty('summary'); + expect(data).toHaveProperty('passed'); + }); + // ─── Info ──────────────────────────────────────────────────────────── test('info outputs engine diagnostics', () => { const out = run('info'); From 09f1f7547f361401cb8d93d7ef6a067666746a17 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 22:36:47 -0700 Subject: [PATCH 09/12] docs: update all docs to reflect CLI consolidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update command references across all documentation: - `codegraph explain` → `codegraph audit --quick` - `codegraph hotspots` → `codegraph triage --level` - `codegraph manifesto` → `codegraph check` - Add `codegraph path` references MCP tool references are left unchanged (backward compat). --- README.md | 46 ++++++++++++++----------- docs/examples/CLI.md | 28 ++++++++++------ docs/guides/ai-agent-guide.md | 50 ++++++++++++++-------------- docs/guides/recommended-practices.md | 14 ++++---- docs/use-cases/titan-paradigm.md | 20 +++++------ 5 files changed, 85 insertions(+), 73 deletions(-) diff --git a/README.md b/README.md index aba7ae02..97ec8c50 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,7 @@ Full agent setup: [AI Agent Guide](docs/guides/ai-agent-guide.md) · [CLAU | 📁 | **File dependencies** | See what a file imports and what imports it | | 💥 | **Impact analysis** | Trace every file affected by a change (transitive) | | 🧬 | **Function-level tracing** | Call chains, caller trees, function-level impact, and A→B pathfinding with qualified call resolution | -| 🎯 | **Deep context** | `context` gives AI agents source, deps, callers, signature, and tests for a function in one call; `explain` gives structural summaries of files or functions | +| 🎯 | **Deep context** | `context` gives AI agents source, deps, callers, signature, and tests for a function in one call; `audit --quick` gives structural summaries of files or functions | | 📍 | **Fast lookup** | `where` shows exactly where a symbol is defined and used — minimal, fast | | 📊 | **Diff impact** | Parse `git diff`, find overlapping functions, trace their callers | | 🔗 | **Co-change analysis** | Analyze git history for files that always change together — surfaces hidden coupling the static graph can't see; enriches `diff-impact` with historically coupled files | @@ -184,7 +184,7 @@ Full agent setup: [AI Agent Guide](docs/guides/ai-agent-guide.md) · [CLAU | ⚡ | **Always fresh** | Three-tier incremental detection — sub-second rebuilds even on large codebases | | 🧮 | **Complexity metrics** | Cognitive, cyclomatic, nesting depth, Halstead, and Maintainability Index per function | | 🏘️ | **Community detection** | Louvain clustering to discover natural module boundaries and architectural drift | -| 📜 | **Manifesto rule engine** | Configurable pass/fail rules with warn/fail thresholds for CI gates (exit code 1 on fail) | +| 📜 | **Manifesto rule engine** | Configurable pass/fail rules with warn/fail thresholds for CI gates via `check` (exit code 1 on fail) | | 👥 | **CODEOWNERS integration** | Map graph nodes to CODEOWNERS entries — see who owns each function, ownership boundaries in `diff-impact` | | 💾 | **Graph snapshots** | `snapshot save`/`restore` for instant DB backup and rollback — checkpoint before refactoring, restore without rebuilding | | 🔎 | **Hybrid BM25 + semantic search** | FTS5 keyword search + embedding-based semantic search fused via Reciprocal Rank Fusion — `hybrid`, `semantic`, or `keyword` modes | @@ -229,8 +229,8 @@ codegraph roles --role core --file src/ # Core symbols in src/ ```bash codegraph context # Full context: source, deps, callers, signature, tests codegraph context --depth 2 --no-tests # Include callee source 2 levels deep -codegraph explain # Structural summary: public API, internals, data flow -codegraph explain # Function summary: signature, calls, callers, tests +codegraph audit --quick # Structural summary: public API, internals, data flow +codegraph audit --quick # Function summary: signature, calls, callers, tests ``` ### Impact Analysis @@ -240,9 +240,9 @@ codegraph impact # Transitive reverse dependency trace codegraph query # Function-level: callers, callees, call chain codegraph query --no-tests --depth 5 codegraph fn-impact # What functions break if this one changes -codegraph query --path # Shortest path between two symbols (A calls...calls B) -codegraph query --path --reverse # Follow edges backward -codegraph query --path --depth 5 --kinds calls,imports +codegraph path # Shortest path between two symbols (A calls...calls B) +codegraph path --reverse # Follow edges backward +codegraph path --depth 5 --kinds calls,imports codegraph diff-impact # Impact of unstaged git changes codegraph diff-impact --staged # Impact of staged changes codegraph diff-impact HEAD~3 # Impact vs a specific ref @@ -273,8 +273,8 @@ Co-change data also enriches `diff-impact` — historically coupled files appear ```bash codegraph structure # Directory overview with cohesion scores -codegraph hotspots # Files with extreme fan-in, fan-out, or density -codegraph hotspots --metric coupling --level directory --no-tests +codegraph triage --level file # Files with extreme fan-in, fan-out, or density +codegraph triage --level directory --sort coupling --no-tests ``` ### Code Health & Architecture @@ -287,8 +287,8 @@ codegraph complexity --above-threshold -T # Only functions exceeding warn thres codegraph communities # Louvain community detection — natural module boundaries codegraph communities --drift -T # Drift analysis only — split/merge candidates codegraph communities --functions # Function-level community detection -codegraph manifesto # Pass/fail rule engine (exit code 1 on fail) -codegraph manifesto -T # Exclude test files from rule evaluation +codegraph check # Pass/fail rule engine (exit code 1 on fail) +codegraph check -T # Exclude test files from rule evaluation ``` ### Audit, Triage & Batch @@ -296,10 +296,13 @@ codegraph manifesto -T # Exclude test files from rule evaluation Composite commands for risk-driven workflows and multi-agent dispatch. ```bash -codegraph audit # Combined explain + impact + health in one report +codegraph audit # Combined structural summary + impact + health in one report +codegraph audit --quick # Structural summary only (skip impact and health) codegraph audit src/queries.js -T # Audit all functions in a file codegraph triage # Ranked audit priority queue (connectivity + hotspots + roles) codegraph triage -T --limit 20 # Top 20 riskiest functions, excluding tests +codegraph triage --level file -T # File-level hotspot analysis +codegraph triage --level directory -T # Directory-level hotspot analysis codegraph batch target1 target2 ... # Batch query multiple targets in one call codegraph batch --json targets.json # Batch from a JSON file ``` @@ -309,7 +312,9 @@ codegraph batch --json targets.json # Batch from a JSON file `codegraph check` provides configurable pass/fail predicates for CI gates and state machines. Exit code 0 = pass, 1 = fail. ```bash -codegraph check --staged # Check staged changes +codegraph check # Run manifesto rules on whole codebase +codegraph check --staged # Check staged changes (diff predicates) +codegraph check --staged --rules # Run both diff predicates AND manifesto rules codegraph check --no-new-cycles # Fail if staged changes introduce cycles codegraph check --max-complexity 30 # Fail if any function exceeds complexity threshold codegraph check --max-blast-radius 50 # Fail if blast radius exceeds limit @@ -413,7 +418,7 @@ codegraph registry remove # Unregister | Flag | Description | |---|---| | `-d, --db ` | Custom path to `graph.db` | -| `-T, --no-tests` | Exclude `.test.`, `.spec.`, `__test__` files (available on `fn`, `fn-impact`, `path`, `context`, `explain`, `where`, `diff-impact`, `search`, `map`, `hotspots`, `roles`, `co-change`, `deps`, `impact`, `complexity`, `communities`, `manifesto`, `branch-compare`, `audit`, `triage`, `check`) | +| `-T, --no-tests` | Exclude `.test.`, `.spec.`, `__test__` files (available on `fn`, `fn-impact`, `path`, `context`, `where`, `diff-impact`, `search`, `map`, `roles`, `co-change`, `deps`, `impact`, `complexity`, `communities`, `branch-compare`, `audit`, `triage`, `check`) | | `--depth ` | Transitive trace depth (default varies by command) | | `-j, --json` | Output as JSON | | `-v, --verbose` | Enable debug output | @@ -556,7 +561,7 @@ This project uses codegraph. The database is at `.codegraph/graph.db`. ### Before modifying code, always: 1. `codegraph where ` — find where the symbol lives -2. `codegraph explain ` — understand the structure +2. `codegraph audit --quick` — understand the structure 3. `codegraph context -T` — get full context (source, deps, callers) 4. `codegraph fn-impact -T` — check blast radius before editing @@ -567,16 +572,17 @@ This project uses codegraph. The database is at `.codegraph/graph.db`. - `codegraph build .` — rebuild the graph (incremental by default) - `codegraph map` — module overview - `codegraph query -T` — function call chain (callers + callees) -- `codegraph query --path -T` — shortest call path between two symbols +- `codegraph path -T` — shortest call path between two symbols - `codegraph deps ` — file-level dependencies - `codegraph roles --role dead -T` — find dead code (unreferenced symbols) - `codegraph roles --role core -T` — find core symbols (high fan-in) - `codegraph co-change ` — files that historically change together - `codegraph complexity -T` — per-function complexity metrics (cognitive, cyclomatic, MI) - `codegraph communities --drift -T` — module boundary drift analysis -- `codegraph manifesto -T` — pass/fail rule check (CI gate, exit code 1 on fail) -- `codegraph audit -T` — combined explain + impact + health in one report +- `codegraph check -T` — pass/fail rule check (CI gate, exit code 1 on fail) +- `codegraph audit -T` — combined structural summary + impact + health in one report - `codegraph triage -T` — ranked audit priority queue +- `codegraph triage --level file -T` — file-level hotspot analysis - `codegraph check --staged` — CI validation predicates (exit code 0/1) - `codegraph batch target1 target2` — batch query multiple targets at once - `codegraph owners [target]` — CODEOWNERS mapping for symbols @@ -665,7 +671,7 @@ Create a `.codegraphrc.json` in your project root to customize behavior: ### Manifesto rules -Configure pass/fail thresholds for `codegraph manifesto`: +Configure pass/fail thresholds for `codegraph check` (manifesto mode): ```json { @@ -681,7 +687,7 @@ Configure pass/fail thresholds for `codegraph manifesto`: } ``` -When any function exceeds a `fail` threshold, `codegraph manifesto` exits with code 1 — perfect for CI gates. +When any function exceeds a `fail` threshold, `codegraph check` exits with code 1 — perfect for CI gates. ### LLM credentials diff --git a/docs/examples/CLI.md b/docs/examples/CLI.md index b3ff00a9..703bf325 100644 --- a/docs/examples/CLI.md +++ b/docs/examples/CLI.md @@ -95,12 +95,12 @@ codegraph where -f src/db.js -T --- -## explain — Structural summary (file or function) +## audit --quick — Structural summary (file or function) ### On a file ```bash -codegraph explain src/builder.js -T +codegraph audit src/builder.js --quick -T ``` ``` @@ -132,7 +132,7 @@ codegraph explain src/builder.js -T ### On a function ```bash -codegraph explain buildGraph -T +codegraph audit buildGraph --quick -T ``` ``` @@ -450,10 +450,10 @@ tests/ (0 files, 32 symbols, <-0 ->6 cohesion=0.00) --- -## hotspots — Find structural hotspots +## triage --level — Find structural hotspots ```bash -codegraph hotspots --metric fan-in -T +codegraph triage --level file --sort fan-in -T ``` ``` @@ -469,7 +469,7 @@ Hotspots by fan-in (file-level, top 10): 8. src/cli.js <-0 ->10 (570L, 1 symbols) ``` -Other metrics: `fan-out`, `density`, `coupling`. +Other metrics: `fan-out`, `density`, `coupling`. Use `--level directory` for directory-level hotspots. --- @@ -903,14 +903,16 @@ codegraph communities --drift -T --- -## manifesto — Rule engine pass/fail +## check — Rule engine pass/fail (manifesto mode) + +Running `check` with no ref or `--staged` runs manifesto rules on the whole codebase: ```bash -codegraph manifesto -T +codegraph check -T ``` ``` -# Manifesto Results +# Manifesto Rules Rule Status Threshold Violations ────────────────────────── ──────── ──────────────── ────────── @@ -927,7 +929,7 @@ codegraph manifesto -T ## audit — Composite risk report -Combines explain + fn-impact + complexity metrics into one structured report per function. One call instead of 3-4. +Combines structural summary + fn-impact + complexity metrics into one structured report per function. Use `--quick` for structural summary only (skip impact and health metrics). ```bash codegraph audit src/builder.js -T @@ -962,7 +964,7 @@ codegraph audit buildGraph -T ## triage — Risk-ranked audit queue -Merges connectivity, hotspots, node roles, and complexity into a prioritized audit queue. +Merges connectivity, hotspots, node roles, and complexity into a prioritized audit queue. Use `--level file` or `--level directory` for file/directory-level hotspot analysis. ```bash codegraph triage -T --limit 5 @@ -1008,6 +1010,10 @@ codegraph batch buildGraph openDb parseFile -T --json Configurable pass/fail gates. Exit code 0 = pass, 1 = fail. +- `check` (no args) — runs manifesto rules on whole codebase (see above) +- `check --staged` / `check ` — runs diff predicates against changes +- `check --staged --rules` — runs both diff predicates AND manifesto rules + ```bash codegraph check --staged --no-new-cycles --max-complexity 30 ``` diff --git a/docs/guides/ai-agent-guide.md b/docs/guides/ai-agent-guide.md index 575ff12a..1cdc8242 100644 --- a/docs/guides/ai-agent-guide.md +++ b/docs/guides/ai-agent-guide.md @@ -18,7 +18,7 @@ Codegraph solves these problems by providing a pre-built dependency graph that a | Task | Without codegraph | With codegraph | Savings | |------|------------------|----------------|---------| | Understand a function | Read 3–5 full files (~10K tokens) | `context ` (~400 tokens) | ~96% | -| Find what a file does | Read the file + imports (~4K tokens) | `explain ` (~300 tokens) | ~92% | +| Find what a file does | Read the file + imports (~4K tokens) | `audit --quick ` (~300 tokens) | ~92% | | Locate a symbol | Grep + read matches (~3K tokens) | `where ` (~60 tokens) | ~98% | | Assess change impact | Read callers manually (~5K tokens) | `fn-impact ` (~200 tokens) | ~96% | | Pre-commit check | Manual review (~8K tokens) | `diff-impact --staged` (~300 tokens) | ~96% | @@ -58,8 +58,8 @@ codegraph search "error handling" # Semantic search (requires prior `embed`) Get a structural summary without reading raw source. ```bash -codegraph explain # File summary: public API, internal API, data flow -codegraph explain # Function summary: signature, calls, callers, tests +codegraph audit --quick # File summary: public API, internal API, data flow +codegraph audit --quick # Function summary: signature, calls, callers, tests ``` **When to use:** Before modifying anything. Understand the shape of the code first. @@ -124,19 +124,19 @@ codegraph where --file # File overview: symbols, imports, exports | **When to use** | First step when you know a name but not where it lives | | **Output** | Definition location (file:line), usage sites, export status | -#### `explain` — Structural summary +#### `audit --quick` — Structural summary -Get a human-readable summary of a file or function without reading raw source. +Get a human-readable summary of a file or function without reading raw source. (`audit --quick` replaces the former `explain` CLI command.) ```bash -codegraph explain src/parser.js # File: public API, internal functions, data flow -codegraph explain buildGraph # Function: signature, what it calls, who calls it +codegraph audit --quick src/parser.js # File: public API, internal functions, data flow +codegraph audit --quick buildGraph # Function: signature, what it calls, who calls it ``` | | | |---|---| | **MCP tool** | `explain` | -| **Key flags** | `-T` (no tests), `-j` (JSON) | +| **Key flags** | `--quick`, `-T` (no tests), `-j` (JSON) | | **When to use** | Before modifying code — understand structure first | | **Output** | For files: public/internal API, imports, dependents. For functions: signature, callees, callers, tests | @@ -326,19 +326,19 @@ codegraph structure --depth 2 --sort cohesion | **When to use** | Understanding project layout and identifying well/poorly-cohesive modules | | **Output** | Tree with per-directory metrics | -#### `hotspots` — Structural hotspots +#### `triage --level` — Structural hotspots -Find files or directories with extreme fan-in, fan-out, or symbol density. +Find files or directories with extreme fan-in, fan-out, or symbol density. (`triage --level file|directory` replaces the former `hotspots` CLI command.) ```bash -codegraph hotspots --metric coupling --limit 5 -codegraph hotspots --level directory --metric fan-out +codegraph triage --level file --sort coupling --limit 5 +codegraph triage --level directory --sort fan-out ``` | | | |---|---| | **MCP tool** | `hotspots` | -| **Key flags** | `--metric` (fan-in, fan-out, density, coupling; default: fan-in), `--level` (file, directory), `-n, --limit` (default: 10), `-T` (no tests), `-j` (JSON) | +| **Key flags** | `--level` (file, directory), `--sort` (fan-in, fan-out, density, coupling; default: fan-in), `-n, --limit` (default: 10), `-T` (no tests), `-j` (JSON) | | **When to use** | Finding the most critical or problematic parts of the codebase | | **Output** | Ranked list of files/directories by the chosen metric | @@ -362,7 +362,7 @@ codegraph cycles --functions #### `audit` — Composite risk report -Combines explain + impact + complexity metrics in one call per function or file. +Combines structural summary + impact + complexity metrics in one call per function or file. Use `--quick` for just the structural summary (no impact or health metrics). ```bash codegraph audit src/parser.js -T # Audit all functions in a file @@ -587,21 +587,21 @@ codegraph mcp --repos "myapp,lib" # Restricted repo list | `fn_impact` | `fn-impact ` | Function-level blast radius | | `symbol_path` | `path ` | Shortest path between two symbols | | `context` | `context ` | Full function context | -| `explain` | `explain ` | Structural summary | +| `explain` | `audit --quick ` | Structural summary | | `where` | `where ` | Symbol definition and usage | | `diff_impact` | `diff-impact [ref]` | Git diff impact analysis | | `semantic_search` | `search ` | Natural language code search | | `export_graph` | `export` | Graph export (DOT/Mermaid/JSON) | | `list_functions` | *(MCP only)* | List/filter symbols | | `structure` | `structure [dir]` | Directory tree with metrics | -| `hotspots` | `hotspots` | Structural hotspot detection | +| `hotspots` | `triage --level file` | Structural hotspot detection | | `node_roles` | `roles` | Node role classification | | `co_changes` | `co-change` | Git co-change analysis | | `execution_flow` | `flow` | Execution flow tracing | | `list_entry_points` | `flow --entry-points` | Framework entry point detection | | `complexity` | `complexity` | Per-function complexity metrics | | `communities` | `communities` | Community detection & drift | -| `manifesto` | `manifesto` | Rule engine pass/fail | +| `manifesto` | `check` (no args) | Rule engine pass/fail | | `code_owners` | `owners` | CODEOWNERS integration | | `audit` | `audit ` | Composite risk report | | `batch_query` | `batch ` | Multi-target batch querying | @@ -684,13 +684,13 @@ Hooks automate codegraph integration so the agent gets structural context withou **Trigger:** Before any Edit or Write operation (PreToolUse). -**What it does:** The first time the agent edits a source file, the hook injects a reminder via `additionalContext` to run `where`, `explain`, `context`, and `fn-impact` before proceeding. Subsequent edits to the same file in the same session are silently allowed (tracked in `.claude/codegraph-checked.log`). +**What it does:** The first time the agent edits a source file, the hook injects a reminder via `additionalContext` to run `where`, `audit --quick`, `context`, and `fn-impact` before proceeding. Subsequent edits to the same file in the same session are silently allowed (tracked in `.claude/codegraph-checked.log`). **Example output the agent sees:** ``` [codegraph reminder] You are about to edit src/parser.js. Did you run codegraph first? -Before editing, always: (1) where , (2) explain src/parser.js, +Before editing, always: (1) where , (2) audit --quick src/parser.js, (3) context -T, (4) fn-impact -T. If you already did this, proceed. ``` @@ -801,7 +801,7 @@ This project uses codegraph for dependency analysis. The graph is at `.codegraph ### Before modifying code, always: 1. `codegraph where ` — find where the symbol lives -2. `codegraph explain ` — understand the structure +2. `codegraph audit --quick ` — understand the structure 3. `codegraph context -T` — get full context (source, deps, callers) 4. `codegraph fn-impact -T` — check blast radius before editing @@ -812,7 +812,7 @@ This project uses codegraph for dependency analysis. The graph is at `.codegraph - `codegraph build .` — rebuild the graph (incremental by default) - `codegraph map` — module overview - `codegraph stats` — graph health and quality score -- `codegraph audit -T` — combined explain + impact + health in one report +- `codegraph audit -T` — combined structural summary + impact + health in one report - `codegraph triage -T` — ranked audit priority queue - `codegraph check --staged` — CI validation predicates (exit code 0/1) - `codegraph batch target1 target2` — batch query multiple targets at once @@ -954,7 +954,7 @@ fi | I want to... | Command | |---------------|---------| | Find where a function is defined | `codegraph where ` | -| See what a file does | `codegraph explain ` | +| See what a file does | `codegraph audit --quick ` | | Understand a function fully | `codegraph context -T` | | See what calls a function | `codegraph fn -T` | | See what a function calls | `codegraph fn -T` | @@ -965,7 +965,7 @@ fi | Get a codebase overview | `codegraph map` | | Check graph health | `codegraph stats` | | Find circular dependencies | `codegraph cycles` | -| Find hotspots | `codegraph hotspots --metric coupling` | +| Find hotspots | `codegraph triage --level file --sort coupling` | | See project structure | `codegraph structure --depth 2` | | List symbols in a file | `codegraph where --file ` | | Get a full risk report for a function | `codegraph audit -T` | @@ -986,7 +986,7 @@ fi | Flag | Short | Description | Available on | |------|-------|-------------|-------------| -| `--no-tests` | `-T` | Exclude test/spec files | All query commands (fn, fn-impact, context, explain, where, diff-impact, search, map, deps, impact, query, stats, hotspots, cycles, export, structure, audit, triage, check, batch, owners, branch-compare) | +| `--no-tests` | `-T` | Exclude test/spec files | All query commands (fn, fn-impact, context, where, diff-impact, search, map, deps, impact, query, path, stats, cycles, export, structure, audit, triage, check, batch, owners, branch-compare) | | `--json` | `-j` | JSON output | Most commands | | `--file ` | `-f` | Scope to a file | fn, fn-impact, context, where | | `--kind ` | `-k` | Filter by symbol kind | fn, fn-impact, context | @@ -1009,7 +1009,7 @@ fi 4. **Check impact before and after.** Run `fn-impact` before editing to know the blast radius. Run `diff-impact --staged` after to verify your changes. -5. **Use `explain` for orientation, `context` for implementation.** `explain` gives you the shape of the code. `context` gives you the actual source you need to write changes. +5. **Use `audit --quick` for orientation, `context` for implementation.** `audit --quick` gives you the shape of the code. `context` gives you the actual source you need to write changes. 6. **Multi-query semantic search.** When searching, phrase the same intent multiple ways: `codegraph search "parse imports, resolve require, extract dependencies"`. RRF ranking combines the results. diff --git a/docs/guides/recommended-practices.md b/docs/guides/recommended-practices.md index 43deee62..177e3791 100644 --- a/docs/guides/recommended-practices.md +++ b/docs/guides/recommended-practices.md @@ -112,13 +112,13 @@ Add a threshold check to your CI pipeline: ### Code health gate -Use `manifesto` to enforce code health rules in CI — it exits with code 1 when any function exceeds a fail-level threshold: +Use `check` to enforce code health rules in CI — it exits with code 1 when any function exceeds a fail-level threshold: ```yaml - name: Code health gate run: | npx codegraph build - npx codegraph manifesto -T # exits 1 on fail-level breach + npx codegraph check -T # exits 1 on fail-level breach (manifesto mode) ``` ### Change validation gate @@ -206,7 +206,7 @@ This project uses codegraph. The database is at `.codegraph/graph.db`. ### Before modifying code, always: 1. `codegraph where ` — find where the symbol lives -2. `codegraph explain ` — understand the structure +2. `codegraph audit --quick ` — understand the structure 3. `codegraph context -T` — get full context (source, deps, callers) 4. `codegraph fn-impact -T` — check blast radius before editing @@ -224,8 +224,8 @@ This project uses codegraph. The database is at `.codegraph/graph.db`. - `codegraph co-change ` — files that historically change together - `codegraph complexity -T` — per-function complexity metrics (cognitive, cyclomatic, MI) - `codegraph communities --drift -T` — module boundary drift analysis -- `codegraph manifesto -T` — pass/fail rule check (CI gate, exit code 1 on fail) -- `codegraph audit -T` — combined explain + impact + health in one report +- `codegraph check -T` — pass/fail rule check (CI gate, exit code 1 on fail) +- `codegraph audit -T` — combined structural summary + impact + health in one report - `codegraph triage -T` — ranked audit priority queue - `codegraph check --staged` — CI validation predicates (exit code 0/1) - `codegraph batch target1 target2` — batch query multiple targets at once @@ -326,7 +326,7 @@ You can configure [Claude Code hooks](https://docs.anthropic.com/en/docs/claude- **Doc check hook** (PreToolUse on Bash): when Claude runs `git commit` with source files staged (anything under `src/`, `cli.js`, `constants.js`, `parser.js`, `package.json`, or `grammars/`), the hook checks whether `README.md`, `CLAUDE.md`, and `ROADMAP.md` are also staged. If any are missing, it blocks the commit with a `deny` decision listing which docs weren't staged and what to review in each (language support tables, architecture docs, roadmap phases, etc.). Non-source-only commits (tests, docs, config) pass through without checks. -**Edit reminder hook** (PreToolUse on Edit/Write): before the agent writes code, a reminder is injected via `additionalContext` prompting it to check `where`, `explain`, `context`, and `fn-impact` first. Only fires once per file per session (tracks in `.claude/codegraph-checked.log`, gitignored). Non-blocking — it nudges but never prevents the edit. Skips non-source files like `.md`, `.json`, `.yml`. +**Edit reminder hook** (PreToolUse on Edit/Write): before the agent writes code, a reminder is injected via `additionalContext` prompting it to check `where`, `audit --quick`, `context`, and `fn-impact` first. Only fires once per file per session (tracks in `.claude/codegraph-checked.log`, gitignored). Non-blocking — it nudges but never prevents the edit. Skips non-source files like `.md`, `.json`, `.yml`. **Graph update hook** (PostToolUse on Edit/Write): keeps the graph incrementally updated after each file edit. Only changed files are re-parsed. @@ -659,7 +659,7 @@ cp node_modules/@optave/codegraph/.github/workflows/codegraph-impact.yml .github codegraph co-change --analyze # 7. (Optional) Verify code health rules pass -codegraph manifesto -T +codegraph check -T # 8. (Optional) Set up CI validation gate # codegraph check --staged --no-new-cycles --max-blast-radius 50 -T diff --git a/docs/use-cases/titan-paradigm.md b/docs/use-cases/titan-paradigm.md index 3b9402e8..73cb1fbb 100644 --- a/docs/use-cases/titan-paradigm.md +++ b/docs/use-cases/titan-paradigm.md @@ -41,7 +41,7 @@ codegraph build . codegraph map --limit 30 --no-tests # Find structural hotspots — extreme fan-in, fan-out, coupling -codegraph hotspots --no-tests +codegraph triage --level file --no-tests # Graph health overview — node/edge counts, quality score codegraph stats @@ -70,7 +70,7 @@ For deeper structural understanding before touching anything: ```bash # Structural summary of a high-traffic file — public API, internals, data flow -codegraph explain src/builder.js +codegraph audit --quick src/builder.js # Understand a specific function before auditing it codegraph context buildGraph -T @@ -108,10 +108,10 @@ codegraph complexity --file src/parser.js -T codegraph complexity --file src/parser.js --health -T # Pass/fail rule check — does this file meet the manifesto? -codegraph manifesto -T +codegraph check -T # Architecture boundary violations — are cross-module dependencies allowed? -codegraph manifesto -T # boundaries are enforced as manifesto rules +codegraph check -T # boundaries are enforced as manifesto rules ``` When a sub-agent decides a function needs decomposition (complexity > 7, nesting > 3, 10+ mocks), it needs to know what breaks. `fn-impact` gives the complete blast radius **before** the agent writes a single line of code. @@ -191,14 +191,14 @@ codegraph snapshot save pre-gauntlet codegraph snapshot restore pre-gauntlet ``` -Use `manifesto` as an additional CI gate — it exits with code 1 when any function exceeds a fail-level threshold: +Use `check` as an additional CI gate — it exits with code 1 when any function exceeds a fail-level threshold: ```bash # Pass/fail rule check — exit code 1 = fail → rollback trigger -codegraph manifesto -T +codegraph check -T ``` -The orchestrator can gate every commit: run `check --staged` for pass/fail validation, `diff-impact --staged --json` for detailed blast radius, and `manifesto -T` to verify code health rules. Auto-rollback if any exceeds thresholds. Combined with `codegraph watch` for real-time graph updates, the state machine always has a current picture of the codebase. +The orchestrator can gate every commit: run `check --staged` for pass/fail validation, `diff-impact --staged --json` for detailed blast radius, and `check -T` to verify code health rules (or `check --staged --rules` for both at once). Auto-rollback if any exceeds thresholds. Combined with `codegraph watch` for real-time graph updates, the state machine always has a current picture of the codebase. ```bash # Watch mode — graph updates automatically as agents edit files @@ -218,7 +218,7 @@ Several planned features would make codegraph even more powerful for the Titan P | Feature | Status | How it helps | |---------|--------|-------------| -| **Node classification** ([Backlog #4](../../roadmap/BACKLOG.md)) | **Done** | Auto-tags every symbol as Entry Point, Core, Utility, or Adapter based on fan-in/fan-out. Available via `codegraph roles`, `where`, `explain`, `context`, and the `node_roles` MCP tool | +| **Node classification** ([Backlog #4](../../roadmap/BACKLOG.md)) | **Done** | Auto-tags every symbol as Entry Point, Core, Utility, or Adapter based on fan-in/fan-out. Available via `codegraph roles`, `where`, `audit --quick`, `context`, and the `node_roles` MCP tool | | **Git change coupling** ([Backlog #9](../../roadmap/BACKLOG.md)) | **Done** | `codegraph co-change` analyzes git history for files that always change together. Integrated into `diff-impact` output via `historicallyCoupled` section. MCP tool `co_changes` | ### For THE GAUNTLET @@ -226,7 +226,7 @@ Several planned features would make codegraph even more powerful for the Titan P | Feature | Status | How it helps | |---------|--------|-------------| | **Formal code health metrics** ([Backlog #6](../../roadmap/BACKLOG.md)) | **Done** | `codegraph complexity` provides cognitive, cyclomatic, nesting depth, Halstead (volume, effort, bugs), and Maintainability Index per function. `--health` for full view, `--sort mi` to rank by MI, `--above-threshold` for flagged functions. Maps directly to the Gauntlet's "complexity > 7 is a failure" rule. PR #130 + #139 | -| **Manifesto-driven pass/fail** ([Backlog #22](../../roadmap/BACKLOG.md)) | **Done** | `codegraph manifesto` with 9 configurable rules and warn/fail thresholds. Exit code 1 on fail — the Gauntlet gets first-class pass/fail signals without parsing JSON. PR #138 | +| **Manifesto-driven pass/fail** ([Backlog #22](../../roadmap/BACKLOG.md)) | **Done** | `codegraph check` (manifesto mode) with 9 configurable rules and warn/fail thresholds. Exit code 1 on fail — the Gauntlet gets first-class pass/fail signals without parsing JSON. PR #138 | | **Community detection** ([Backlog #11](../../roadmap/BACKLOG.md)) | **Done** | `codegraph communities` with Louvain algorithm discovers natural module boundaries vs actual file organization. `--drift` reveals which directories should be split or merged. `--functions` for function-level clustering. PR #133/#134 | | **Build-time semantic metadata** ([Roadmap Phase 4.4](../../roadmap/ROADMAP.md#44--build-time-semantic-metadata)) | Planned | LLM-generated `complexity_notes`, `risk_score`, and `side_effects` per function. A sub-agent could query `codegraph assess ` and get "3 responsibilities, low cohesion — consider splitting" without analyzing the code itself | @@ -234,7 +234,7 @@ Several planned features would make codegraph even more powerful for the Titan P | Feature | Status | How it helps | |---------|--------|-------------| -| **Architecture boundary rules** ([Backlog #13](../../roadmap/BACKLOG.md)) | **Done** | `manifesto.boundaries` config defines allowed/forbidden dependencies between modules. Onion architecture preset available via `manifesto.boundaryPreset: "onion"`. Violations flagged in `manifesto` and enforceable via `check --no-boundary-violations`. PR #228 + #229 | +| **Architecture boundary rules** ([Backlog #13](../../roadmap/BACKLOG.md)) | **Done** | `manifesto.boundaries` config defines allowed/forbidden dependencies between modules. Onion architecture preset available via `manifesto.boundaryPreset: "onion"`. Violations flagged in `check` and enforceable via `check --no-boundary-violations`. PR #228 + #229 | | **CODEOWNERS integration** ([Backlog #18](../../roadmap/BACKLOG.md)) | **Done** | `codegraph owners` maps graph nodes to CODEOWNERS entries. Shows who owns each function, surfaces ownership boundaries in `diff-impact`. The GLOBAL SYNC agent can identify which teams need to coordinate. PR #195 | | **Refactoring analysis** ([Roadmap Phase 8.5](../../roadmap/ROADMAP.md#85--refactoring-analysis)) | Planned | `split_analysis`, `extraction_candidates`, `boundary_analysis` — LLM-powered structural analysis that identifies exactly where shared abstractions should be created | | **Dead code detection** ([Backlog #1](../../roadmap/BACKLOG.md)) | **Done** | `codegraph roles --role dead -T` lists all symbols with zero fan-in that aren't exported. Delivered as part of node classification | From 61c1232e757ec43e1a7200144a95aaafc974b937 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 22:45:45 -0700 Subject: [PATCH 10/12] revert: remove docs changes from CLI consolidation PR Docs updates belong in a separate PR. This reverts commit bd7af72 and the remind-codegraph.sh change from the merge commit. --- README.md | 46 ++++++++--------- docs/examples/CLI.md | 28 ++++------- .../claude-code-hooks/remind-codegraph.sh | 2 +- docs/guides/ai-agent-guide.md | 50 +++++++++---------- docs/guides/recommended-practices.md | 14 +++--- docs/use-cases/titan-paradigm.md | 20 ++++---- 6 files changed, 74 insertions(+), 86 deletions(-) diff --git a/README.md b/README.md index 97ec8c50..aba7ae02 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,7 @@ Full agent setup: [AI Agent Guide](docs/guides/ai-agent-guide.md) · [CLAU | 📁 | **File dependencies** | See what a file imports and what imports it | | 💥 | **Impact analysis** | Trace every file affected by a change (transitive) | | 🧬 | **Function-level tracing** | Call chains, caller trees, function-level impact, and A→B pathfinding with qualified call resolution | -| 🎯 | **Deep context** | `context` gives AI agents source, deps, callers, signature, and tests for a function in one call; `audit --quick` gives structural summaries of files or functions | +| 🎯 | **Deep context** | `context` gives AI agents source, deps, callers, signature, and tests for a function in one call; `explain` gives structural summaries of files or functions | | 📍 | **Fast lookup** | `where` shows exactly where a symbol is defined and used — minimal, fast | | 📊 | **Diff impact** | Parse `git diff`, find overlapping functions, trace their callers | | 🔗 | **Co-change analysis** | Analyze git history for files that always change together — surfaces hidden coupling the static graph can't see; enriches `diff-impact` with historically coupled files | @@ -184,7 +184,7 @@ Full agent setup: [AI Agent Guide](docs/guides/ai-agent-guide.md) · [CLAU | ⚡ | **Always fresh** | Three-tier incremental detection — sub-second rebuilds even on large codebases | | 🧮 | **Complexity metrics** | Cognitive, cyclomatic, nesting depth, Halstead, and Maintainability Index per function | | 🏘️ | **Community detection** | Louvain clustering to discover natural module boundaries and architectural drift | -| 📜 | **Manifesto rule engine** | Configurable pass/fail rules with warn/fail thresholds for CI gates via `check` (exit code 1 on fail) | +| 📜 | **Manifesto rule engine** | Configurable pass/fail rules with warn/fail thresholds for CI gates (exit code 1 on fail) | | 👥 | **CODEOWNERS integration** | Map graph nodes to CODEOWNERS entries — see who owns each function, ownership boundaries in `diff-impact` | | 💾 | **Graph snapshots** | `snapshot save`/`restore` for instant DB backup and rollback — checkpoint before refactoring, restore without rebuilding | | 🔎 | **Hybrid BM25 + semantic search** | FTS5 keyword search + embedding-based semantic search fused via Reciprocal Rank Fusion — `hybrid`, `semantic`, or `keyword` modes | @@ -229,8 +229,8 @@ codegraph roles --role core --file src/ # Core symbols in src/ ```bash codegraph context # Full context: source, deps, callers, signature, tests codegraph context --depth 2 --no-tests # Include callee source 2 levels deep -codegraph audit --quick # Structural summary: public API, internals, data flow -codegraph audit --quick # Function summary: signature, calls, callers, tests +codegraph explain # Structural summary: public API, internals, data flow +codegraph explain # Function summary: signature, calls, callers, tests ``` ### Impact Analysis @@ -240,9 +240,9 @@ codegraph impact # Transitive reverse dependency trace codegraph query # Function-level: callers, callees, call chain codegraph query --no-tests --depth 5 codegraph fn-impact # What functions break if this one changes -codegraph path # Shortest path between two symbols (A calls...calls B) -codegraph path --reverse # Follow edges backward -codegraph path --depth 5 --kinds calls,imports +codegraph query --path # Shortest path between two symbols (A calls...calls B) +codegraph query --path --reverse # Follow edges backward +codegraph query --path --depth 5 --kinds calls,imports codegraph diff-impact # Impact of unstaged git changes codegraph diff-impact --staged # Impact of staged changes codegraph diff-impact HEAD~3 # Impact vs a specific ref @@ -273,8 +273,8 @@ Co-change data also enriches `diff-impact` — historically coupled files appear ```bash codegraph structure # Directory overview with cohesion scores -codegraph triage --level file # Files with extreme fan-in, fan-out, or density -codegraph triage --level directory --sort coupling --no-tests +codegraph hotspots # Files with extreme fan-in, fan-out, or density +codegraph hotspots --metric coupling --level directory --no-tests ``` ### Code Health & Architecture @@ -287,8 +287,8 @@ codegraph complexity --above-threshold -T # Only functions exceeding warn thres codegraph communities # Louvain community detection — natural module boundaries codegraph communities --drift -T # Drift analysis only — split/merge candidates codegraph communities --functions # Function-level community detection -codegraph check # Pass/fail rule engine (exit code 1 on fail) -codegraph check -T # Exclude test files from rule evaluation +codegraph manifesto # Pass/fail rule engine (exit code 1 on fail) +codegraph manifesto -T # Exclude test files from rule evaluation ``` ### Audit, Triage & Batch @@ -296,13 +296,10 @@ codegraph check -T # Exclude test files from rule evaluation Composite commands for risk-driven workflows and multi-agent dispatch. ```bash -codegraph audit # Combined structural summary + impact + health in one report -codegraph audit --quick # Structural summary only (skip impact and health) +codegraph audit # Combined explain + impact + health in one report codegraph audit src/queries.js -T # Audit all functions in a file codegraph triage # Ranked audit priority queue (connectivity + hotspots + roles) codegraph triage -T --limit 20 # Top 20 riskiest functions, excluding tests -codegraph triage --level file -T # File-level hotspot analysis -codegraph triage --level directory -T # Directory-level hotspot analysis codegraph batch target1 target2 ... # Batch query multiple targets in one call codegraph batch --json targets.json # Batch from a JSON file ``` @@ -312,9 +309,7 @@ codegraph batch --json targets.json # Batch from a JSON file `codegraph check` provides configurable pass/fail predicates for CI gates and state machines. Exit code 0 = pass, 1 = fail. ```bash -codegraph check # Run manifesto rules on whole codebase -codegraph check --staged # Check staged changes (diff predicates) -codegraph check --staged --rules # Run both diff predicates AND manifesto rules +codegraph check --staged # Check staged changes codegraph check --no-new-cycles # Fail if staged changes introduce cycles codegraph check --max-complexity 30 # Fail if any function exceeds complexity threshold codegraph check --max-blast-radius 50 # Fail if blast radius exceeds limit @@ -418,7 +413,7 @@ codegraph registry remove # Unregister | Flag | Description | |---|---| | `-d, --db ` | Custom path to `graph.db` | -| `-T, --no-tests` | Exclude `.test.`, `.spec.`, `__test__` files (available on `fn`, `fn-impact`, `path`, `context`, `where`, `diff-impact`, `search`, `map`, `roles`, `co-change`, `deps`, `impact`, `complexity`, `communities`, `branch-compare`, `audit`, `triage`, `check`) | +| `-T, --no-tests` | Exclude `.test.`, `.spec.`, `__test__` files (available on `fn`, `fn-impact`, `path`, `context`, `explain`, `where`, `diff-impact`, `search`, `map`, `hotspots`, `roles`, `co-change`, `deps`, `impact`, `complexity`, `communities`, `manifesto`, `branch-compare`, `audit`, `triage`, `check`) | | `--depth ` | Transitive trace depth (default varies by command) | | `-j, --json` | Output as JSON | | `-v, --verbose` | Enable debug output | @@ -561,7 +556,7 @@ This project uses codegraph. The database is at `.codegraph/graph.db`. ### Before modifying code, always: 1. `codegraph where ` — find where the symbol lives -2. `codegraph audit --quick` — understand the structure +2. `codegraph explain ` — understand the structure 3. `codegraph context -T` — get full context (source, deps, callers) 4. `codegraph fn-impact -T` — check blast radius before editing @@ -572,17 +567,16 @@ This project uses codegraph. The database is at `.codegraph/graph.db`. - `codegraph build .` — rebuild the graph (incremental by default) - `codegraph map` — module overview - `codegraph query -T` — function call chain (callers + callees) -- `codegraph path -T` — shortest call path between two symbols +- `codegraph query --path -T` — shortest call path between two symbols - `codegraph deps ` — file-level dependencies - `codegraph roles --role dead -T` — find dead code (unreferenced symbols) - `codegraph roles --role core -T` — find core symbols (high fan-in) - `codegraph co-change ` — files that historically change together - `codegraph complexity -T` — per-function complexity metrics (cognitive, cyclomatic, MI) - `codegraph communities --drift -T` — module boundary drift analysis -- `codegraph check -T` — pass/fail rule check (CI gate, exit code 1 on fail) -- `codegraph audit -T` — combined structural summary + impact + health in one report +- `codegraph manifesto -T` — pass/fail rule check (CI gate, exit code 1 on fail) +- `codegraph audit -T` — combined explain + impact + health in one report - `codegraph triage -T` — ranked audit priority queue -- `codegraph triage --level file -T` — file-level hotspot analysis - `codegraph check --staged` — CI validation predicates (exit code 0/1) - `codegraph batch target1 target2` — batch query multiple targets at once - `codegraph owners [target]` — CODEOWNERS mapping for symbols @@ -671,7 +665,7 @@ Create a `.codegraphrc.json` in your project root to customize behavior: ### Manifesto rules -Configure pass/fail thresholds for `codegraph check` (manifesto mode): +Configure pass/fail thresholds for `codegraph manifesto`: ```json { @@ -687,7 +681,7 @@ Configure pass/fail thresholds for `codegraph check` (manifesto mode): } ``` -When any function exceeds a `fail` threshold, `codegraph check` exits with code 1 — perfect for CI gates. +When any function exceeds a `fail` threshold, `codegraph manifesto` exits with code 1 — perfect for CI gates. ### LLM credentials diff --git a/docs/examples/CLI.md b/docs/examples/CLI.md index 703bf325..b3ff00a9 100644 --- a/docs/examples/CLI.md +++ b/docs/examples/CLI.md @@ -95,12 +95,12 @@ codegraph where -f src/db.js -T --- -## audit --quick — Structural summary (file or function) +## explain — Structural summary (file or function) ### On a file ```bash -codegraph audit src/builder.js --quick -T +codegraph explain src/builder.js -T ``` ``` @@ -132,7 +132,7 @@ codegraph audit src/builder.js --quick -T ### On a function ```bash -codegraph audit buildGraph --quick -T +codegraph explain buildGraph -T ``` ``` @@ -450,10 +450,10 @@ tests/ (0 files, 32 symbols, <-0 ->6 cohesion=0.00) --- -## triage --level — Find structural hotspots +## hotspots — Find structural hotspots ```bash -codegraph triage --level file --sort fan-in -T +codegraph hotspots --metric fan-in -T ``` ``` @@ -469,7 +469,7 @@ Hotspots by fan-in (file-level, top 10): 8. src/cli.js <-0 ->10 (570L, 1 symbols) ``` -Other metrics: `fan-out`, `density`, `coupling`. Use `--level directory` for directory-level hotspots. +Other metrics: `fan-out`, `density`, `coupling`. --- @@ -903,16 +903,14 @@ codegraph communities --drift -T --- -## check — Rule engine pass/fail (manifesto mode) - -Running `check` with no ref or `--staged` runs manifesto rules on the whole codebase: +## manifesto — Rule engine pass/fail ```bash -codegraph check -T +codegraph manifesto -T ``` ``` -# Manifesto Rules +# Manifesto Results Rule Status Threshold Violations ────────────────────────── ──────── ──────────────── ────────── @@ -929,7 +927,7 @@ codegraph check -T ## audit — Composite risk report -Combines structural summary + fn-impact + complexity metrics into one structured report per function. Use `--quick` for structural summary only (skip impact and health metrics). +Combines explain + fn-impact + complexity metrics into one structured report per function. One call instead of 3-4. ```bash codegraph audit src/builder.js -T @@ -964,7 +962,7 @@ codegraph audit buildGraph -T ## triage — Risk-ranked audit queue -Merges connectivity, hotspots, node roles, and complexity into a prioritized audit queue. Use `--level file` or `--level directory` for file/directory-level hotspot analysis. +Merges connectivity, hotspots, node roles, and complexity into a prioritized audit queue. ```bash codegraph triage -T --limit 5 @@ -1010,10 +1008,6 @@ codegraph batch buildGraph openDb parseFile -T --json Configurable pass/fail gates. Exit code 0 = pass, 1 = fail. -- `check` (no args) — runs manifesto rules on whole codebase (see above) -- `check --staged` / `check ` — runs diff predicates against changes -- `check --staged --rules` — runs both diff predicates AND manifesto rules - ```bash codegraph check --staged --no-new-cycles --max-complexity 30 ``` diff --git a/docs/examples/claude-code-hooks/remind-codegraph.sh b/docs/examples/claude-code-hooks/remind-codegraph.sh index 710afe35..d0a5d630 100644 --- a/docs/examples/claude-code-hooks/remind-codegraph.sh +++ b/docs/examples/claude-code-hooks/remind-codegraph.sh @@ -61,7 +61,7 @@ cat <' to locate the symbol, (2) 'codegraph audit --quick ${REL_PATH}' to understand the file, (3) 'codegraph context -T' for full context, (4) 'codegraph fn-impact -T' to check blast radius. If you already did this, proceed." + "additionalContext": "[codegraph reminder] You are about to edit ${REL_PATH}. Did you run codegraph first? Before editing, always: (1) 'codegraph where ' to locate the symbol, (2) 'codegraph explain ${REL_PATH}' to understand the file, (3) 'codegraph context -T' for full context, (4) 'codegraph fn-impact -T' to check blast radius. If you already did this, proceed." } } HOOK_OUTPUT diff --git a/docs/guides/ai-agent-guide.md b/docs/guides/ai-agent-guide.md index b9ef0712..a378278a 100644 --- a/docs/guides/ai-agent-guide.md +++ b/docs/guides/ai-agent-guide.md @@ -18,7 +18,7 @@ Codegraph solves these problems by providing a pre-built dependency graph that a | Task | Without codegraph | With codegraph | Savings | |------|------------------|----------------|---------| | Understand a function | Read 3–5 full files (~10K tokens) | `context ` (~400 tokens) | ~96% | -| Find what a file does | Read the file + imports (~4K tokens) | `audit --quick ` (~300 tokens) | ~92% | +| Find what a file does | Read the file + imports (~4K tokens) | `explain ` (~300 tokens) | ~92% | | Locate a symbol | Grep + read matches (~3K tokens) | `where ` (~60 tokens) | ~98% | | Assess change impact | Read callers manually (~5K tokens) | `fn-impact ` (~200 tokens) | ~96% | | Pre-commit check | Manual review (~8K tokens) | `diff-impact --staged` (~300 tokens) | ~96% | @@ -58,8 +58,8 @@ codegraph search "error handling" # Semantic search (requires prior `embed`) Get a structural summary without reading raw source. ```bash -codegraph audit --quick # File summary: public API, internal API, data flow -codegraph audit --quick # Function summary: signature, calls, callers, tests +codegraph explain # File summary: public API, internal API, data flow +codegraph explain # Function summary: signature, calls, callers, tests ``` **When to use:** Before modifying anything. Understand the shape of the code first. @@ -124,19 +124,19 @@ codegraph where --file # File overview: symbols, imports, exports | **When to use** | First step when you know a name but not where it lives | | **Output** | Definition location (file:line), usage sites, export status | -#### `audit --quick` — Structural summary +#### `explain` — Structural summary -Get a human-readable summary of a file or function without reading raw source. (`audit --quick` replaces the former `explain` CLI command.) +Get a human-readable summary of a file or function without reading raw source. ```bash -codegraph audit --quick src/parser.js # File: public API, internal functions, data flow -codegraph audit --quick buildGraph # Function: signature, what it calls, who calls it +codegraph explain src/parser.js # File: public API, internal functions, data flow +codegraph explain buildGraph # Function: signature, what it calls, who calls it ``` | | | |---|---| | **MCP tool** | `explain` | -| **Key flags** | `--quick`, `-T` (no tests), `-j` (JSON) | +| **Key flags** | `-T` (no tests), `-j` (JSON) | | **When to use** | Before modifying code — understand structure first | | **Output** | For files: public/internal API, imports, dependents. For functions: signature, callees, callers, tests | @@ -326,19 +326,19 @@ codegraph structure --depth 2 --sort cohesion | **When to use** | Understanding project layout and identifying well/poorly-cohesive modules | | **Output** | Tree with per-directory metrics | -#### `triage --level` — Structural hotspots +#### `hotspots` — Structural hotspots -Find files or directories with extreme fan-in, fan-out, or symbol density. (`triage --level file|directory` replaces the former `hotspots` CLI command.) +Find files or directories with extreme fan-in, fan-out, or symbol density. ```bash -codegraph triage --level file --sort coupling --limit 5 -codegraph triage --level directory --sort fan-out +codegraph hotspots --metric coupling --limit 5 +codegraph hotspots --level directory --metric fan-out ``` | | | |---|---| | **MCP tool** | `hotspots` | -| **Key flags** | `--level` (file, directory), `--sort` (fan-in, fan-out, density, coupling; default: fan-in), `-n, --limit` (default: 10), `-T` (no tests), `-j` (JSON) | +| **Key flags** | `--metric` (fan-in, fan-out, density, coupling; default: fan-in), `--level` (file, directory), `-n, --limit` (default: 10), `-T` (no tests), `-j` (JSON) | | **When to use** | Finding the most critical or problematic parts of the codebase | | **Output** | Ranked list of files/directories by the chosen metric | @@ -362,7 +362,7 @@ codegraph cycles --functions #### `audit` — Composite risk report -Combines structural summary + impact + complexity metrics in one call per function or file. Use `--quick` for just the structural summary (no impact or health metrics). +Combines explain + impact + complexity metrics in one call per function or file. ```bash codegraph audit src/parser.js -T # Audit all functions in a file @@ -586,20 +586,20 @@ codegraph mcp --repos "myapp,lib" # Restricted repo list | `fn_impact` | `fn-impact ` | Function-level blast radius | | `context` | `context ` | Full function context | | `symbol_children` | `children ` | Sub-declaration children (parameters, properties, constants) | -| `explain` | `audit --quick ` | Structural summary | +| `explain` | `explain ` | Structural summary | | `where` | `where ` | Symbol definition and usage | | `diff_impact` | `diff-impact [ref]` | Git diff impact analysis | | `semantic_search` | `search ` | Natural language code search | | `export_graph` | `export` | Graph export (DOT/Mermaid/JSON) | | `list_functions` | *(MCP only)* | List/filter symbols | | `structure` | `structure [dir]` | Directory tree with metrics | -| `hotspots` | `triage --level file` | Structural hotspot detection | +| `hotspots` | `hotspots` | Structural hotspot detection | | `node_roles` | `roles` | Node role classification | | `co_changes` | `co-change` | Git co-change analysis | | `execution_flow` | `flow` | Execution flow tracing and entry point detection | | `complexity` | `complexity` | Per-function complexity metrics | | `communities` | `communities` | Community detection & drift | -| `manifesto` | `check` (no args) | Rule engine pass/fail | +| `manifesto` | `manifesto` | Rule engine pass/fail | | `code_owners` | `owners` | CODEOWNERS integration | | `audit` | `audit ` | Composite risk report | | `batch_query` | `batch ` | Multi-target batch querying | @@ -685,13 +685,13 @@ Hooks automate codegraph integration so the agent gets structural context withou **Trigger:** Before any Edit or Write operation (PreToolUse). -**What it does:** The first time the agent edits a source file, the hook injects a reminder via `additionalContext` to run `where`, `audit --quick`, `context`, and `fn-impact` before proceeding. Subsequent edits to the same file in the same session are silently allowed (tracked in `.claude/codegraph-checked.log`). +**What it does:** The first time the agent edits a source file, the hook injects a reminder via `additionalContext` to run `where`, `explain`, `context`, and `fn-impact` before proceeding. Subsequent edits to the same file in the same session are silently allowed (tracked in `.claude/codegraph-checked.log`). **Example output the agent sees:** ``` [codegraph reminder] You are about to edit src/parser.js. Did you run codegraph first? -Before editing, always: (1) where , (2) audit --quick src/parser.js, +Before editing, always: (1) where , (2) explain src/parser.js, (3) context -T, (4) fn-impact -T. If you already did this, proceed. ``` @@ -802,7 +802,7 @@ This project uses codegraph for dependency analysis. The graph is at `.codegraph ### Before modifying code, always: 1. `codegraph where ` — find where the symbol lives -2. `codegraph audit --quick ` — understand the structure +2. `codegraph explain ` — understand the structure 3. `codegraph context -T` — get full context (source, deps, callers) 4. `codegraph fn-impact -T` — check blast radius before editing @@ -813,7 +813,7 @@ This project uses codegraph for dependency analysis. The graph is at `.codegraph - `codegraph build .` — rebuild the graph (incremental by default) - `codegraph map` — module overview - `codegraph stats` — graph health and quality score -- `codegraph audit -T` — combined structural summary + impact + health in one report +- `codegraph audit -T` — combined explain + impact + health in one report - `codegraph triage -T` — ranked audit priority queue - `codegraph check --staged` — CI validation predicates (exit code 0/1) - `codegraph batch target1 target2` — batch query multiple targets at once @@ -955,7 +955,7 @@ fi | I want to... | Command | |---------------|---------| | Find where a function is defined | `codegraph where ` | -| See what a file does | `codegraph audit --quick ` | +| See what a file does | `codegraph explain ` | | Understand a function fully | `codegraph context -T` | | See what calls a function | `codegraph fn -T` | | See what a function calls | `codegraph fn -T` | @@ -966,7 +966,7 @@ fi | Get a codebase overview | `codegraph map` | | Check graph health | `codegraph stats` | | Find circular dependencies | `codegraph cycles` | -| Find hotspots | `codegraph triage --level file --sort coupling` | +| Find hotspots | `codegraph hotspots --metric coupling` | | See project structure | `codegraph structure --depth 2` | | List symbols in a file | `codegraph where --file ` | | Get a full risk report for a function | `codegraph audit -T` | @@ -987,7 +987,7 @@ fi | Flag | Short | Description | Available on | |------|-------|-------------|-------------| -| `--no-tests` | `-T` | Exclude test/spec files | All query commands (fn, fn-impact, context, where, diff-impact, search, map, deps, impact, query, path, stats, cycles, export, structure, audit, triage, check, batch, owners, branch-compare) | +| `--no-tests` | `-T` | Exclude test/spec files | All query commands (fn, fn-impact, context, explain, where, diff-impact, search, map, deps, impact, query, stats, hotspots, cycles, export, structure, audit, triage, check, batch, owners, branch-compare) | | `--json` | `-j` | JSON output | Most commands | | `--file ` | `-f` | Scope to a file | fn, fn-impact, context, where | | `--kind ` | `-k` | Filter by symbol kind | fn, fn-impact, context | @@ -1010,7 +1010,7 @@ fi 4. **Check impact before and after.** Run `fn-impact` before editing to know the blast radius. Run `diff-impact --staged` after to verify your changes. -5. **Use `audit --quick` for orientation, `context` for implementation.** `audit --quick` gives you the shape of the code. `context` gives you the actual source you need to write changes. +5. **Use `explain` for orientation, `context` for implementation.** `explain` gives you the shape of the code. `context` gives you the actual source you need to write changes. 6. **Multi-query semantic search.** When searching, phrase the same intent multiple ways: `codegraph search "parse imports, resolve require, extract dependencies"`. RRF ranking combines the results. diff --git a/docs/guides/recommended-practices.md b/docs/guides/recommended-practices.md index a8694c2d..c349191f 100644 --- a/docs/guides/recommended-practices.md +++ b/docs/guides/recommended-practices.md @@ -112,13 +112,13 @@ Add a threshold check to your CI pipeline: ### Code health gate -Use `check` to enforce code health rules in CI — it exits with code 1 when any function exceeds a fail-level threshold: +Use `manifesto` to enforce code health rules in CI — it exits with code 1 when any function exceeds a fail-level threshold: ```yaml - name: Code health gate run: | npx codegraph build - npx codegraph check -T # exits 1 on fail-level breach (manifesto mode) + npx codegraph manifesto -T # exits 1 on fail-level breach ``` ### Change validation gate @@ -206,7 +206,7 @@ This project uses codegraph. The database is at `.codegraph/graph.db`. ### Before modifying code, always: 1. `codegraph where ` — find where the symbol lives -2. `codegraph audit --quick ` — understand the structure +2. `codegraph explain ` — understand the structure 3. `codegraph context -T` — get full context (source, deps, callers) 4. `codegraph fn-impact -T` — check blast radius before editing @@ -224,8 +224,8 @@ This project uses codegraph. The database is at `.codegraph/graph.db`. - `codegraph co-change ` — files that historically change together - `codegraph complexity -T` — per-function complexity metrics (cognitive, cyclomatic, MI) - `codegraph communities --drift -T` — module boundary drift analysis -- `codegraph check -T` — pass/fail rule check (CI gate, exit code 1 on fail) -- `codegraph audit -T` — combined structural summary + impact + health in one report +- `codegraph manifesto -T` — pass/fail rule check (CI gate, exit code 1 on fail) +- `codegraph audit -T` — combined explain + impact + health in one report - `codegraph triage -T` — ranked audit priority queue - `codegraph check --staged` — CI validation predicates (exit code 0/1) - `codegraph batch target1 target2` — batch query multiple targets at once @@ -326,7 +326,7 @@ You can configure [Claude Code hooks](https://docs.anthropic.com/en/docs/claude- **Doc check hook** (PreToolUse on Bash): when Claude runs `git commit` with source files staged (anything under `src/`, `cli.js`, `constants.js`, `parser.js`, `package.json`, or `grammars/`), the hook checks whether `README.md`, `CLAUDE.md`, and `ROADMAP.md` are also staged. If any are missing, it blocks the commit with a `deny` decision listing which docs weren't staged and what to review in each (language support tables, architecture docs, roadmap phases, etc.). Non-source-only commits (tests, docs, config) pass through without checks. -**Edit reminder hook** (PreToolUse on Edit/Write): before the agent writes code, a reminder is injected via `additionalContext` prompting it to check `where`, `audit --quick`, `context`, and `fn-impact` first. Only fires once per file per session (tracks in `.claude/codegraph-checked.log`, gitignored). Non-blocking — it nudges but never prevents the edit. Skips non-source files like `.md`, `.json`, `.yml`. +**Edit reminder hook** (PreToolUse on Edit/Write): before the agent writes code, a reminder is injected via `additionalContext` prompting it to check `where`, `explain`, `context`, and `fn-impact` first. Only fires once per file per session (tracks in `.claude/codegraph-checked.log`, gitignored). Non-blocking — it nudges but never prevents the edit. Skips non-source files like `.md`, `.json`, `.yml`. **Graph update hook** (PostToolUse on Edit/Write): keeps the graph incrementally updated after each file edit. Only changed files are re-parsed. @@ -659,7 +659,7 @@ cp node_modules/@optave/codegraph/.github/workflows/codegraph-impact.yml .github codegraph co-change --analyze # 7. (Optional) Verify code health rules pass -codegraph check -T +codegraph manifesto -T # 8. (Optional) Set up CI validation gate # codegraph check --staged --no-new-cycles --max-blast-radius 50 -T diff --git a/docs/use-cases/titan-paradigm.md b/docs/use-cases/titan-paradigm.md index 73cb1fbb..3b9402e8 100644 --- a/docs/use-cases/titan-paradigm.md +++ b/docs/use-cases/titan-paradigm.md @@ -41,7 +41,7 @@ codegraph build . codegraph map --limit 30 --no-tests # Find structural hotspots — extreme fan-in, fan-out, coupling -codegraph triage --level file --no-tests +codegraph hotspots --no-tests # Graph health overview — node/edge counts, quality score codegraph stats @@ -70,7 +70,7 @@ For deeper structural understanding before touching anything: ```bash # Structural summary of a high-traffic file — public API, internals, data flow -codegraph audit --quick src/builder.js +codegraph explain src/builder.js # Understand a specific function before auditing it codegraph context buildGraph -T @@ -108,10 +108,10 @@ codegraph complexity --file src/parser.js -T codegraph complexity --file src/parser.js --health -T # Pass/fail rule check — does this file meet the manifesto? -codegraph check -T +codegraph manifesto -T # Architecture boundary violations — are cross-module dependencies allowed? -codegraph check -T # boundaries are enforced as manifesto rules +codegraph manifesto -T # boundaries are enforced as manifesto rules ``` When a sub-agent decides a function needs decomposition (complexity > 7, nesting > 3, 10+ mocks), it needs to know what breaks. `fn-impact` gives the complete blast radius **before** the agent writes a single line of code. @@ -191,14 +191,14 @@ codegraph snapshot save pre-gauntlet codegraph snapshot restore pre-gauntlet ``` -Use `check` as an additional CI gate — it exits with code 1 when any function exceeds a fail-level threshold: +Use `manifesto` as an additional CI gate — it exits with code 1 when any function exceeds a fail-level threshold: ```bash # Pass/fail rule check — exit code 1 = fail → rollback trigger -codegraph check -T +codegraph manifesto -T ``` -The orchestrator can gate every commit: run `check --staged` for pass/fail validation, `diff-impact --staged --json` for detailed blast radius, and `check -T` to verify code health rules (or `check --staged --rules` for both at once). Auto-rollback if any exceeds thresholds. Combined with `codegraph watch` for real-time graph updates, the state machine always has a current picture of the codebase. +The orchestrator can gate every commit: run `check --staged` for pass/fail validation, `diff-impact --staged --json` for detailed blast radius, and `manifesto -T` to verify code health rules. Auto-rollback if any exceeds thresholds. Combined with `codegraph watch` for real-time graph updates, the state machine always has a current picture of the codebase. ```bash # Watch mode — graph updates automatically as agents edit files @@ -218,7 +218,7 @@ Several planned features would make codegraph even more powerful for the Titan P | Feature | Status | How it helps | |---------|--------|-------------| -| **Node classification** ([Backlog #4](../../roadmap/BACKLOG.md)) | **Done** | Auto-tags every symbol as Entry Point, Core, Utility, or Adapter based on fan-in/fan-out. Available via `codegraph roles`, `where`, `audit --quick`, `context`, and the `node_roles` MCP tool | +| **Node classification** ([Backlog #4](../../roadmap/BACKLOG.md)) | **Done** | Auto-tags every symbol as Entry Point, Core, Utility, or Adapter based on fan-in/fan-out. Available via `codegraph roles`, `where`, `explain`, `context`, and the `node_roles` MCP tool | | **Git change coupling** ([Backlog #9](../../roadmap/BACKLOG.md)) | **Done** | `codegraph co-change` analyzes git history for files that always change together. Integrated into `diff-impact` output via `historicallyCoupled` section. MCP tool `co_changes` | ### For THE GAUNTLET @@ -226,7 +226,7 @@ Several planned features would make codegraph even more powerful for the Titan P | Feature | Status | How it helps | |---------|--------|-------------| | **Formal code health metrics** ([Backlog #6](../../roadmap/BACKLOG.md)) | **Done** | `codegraph complexity` provides cognitive, cyclomatic, nesting depth, Halstead (volume, effort, bugs), and Maintainability Index per function. `--health` for full view, `--sort mi` to rank by MI, `--above-threshold` for flagged functions. Maps directly to the Gauntlet's "complexity > 7 is a failure" rule. PR #130 + #139 | -| **Manifesto-driven pass/fail** ([Backlog #22](../../roadmap/BACKLOG.md)) | **Done** | `codegraph check` (manifesto mode) with 9 configurable rules and warn/fail thresholds. Exit code 1 on fail — the Gauntlet gets first-class pass/fail signals without parsing JSON. PR #138 | +| **Manifesto-driven pass/fail** ([Backlog #22](../../roadmap/BACKLOG.md)) | **Done** | `codegraph manifesto` with 9 configurable rules and warn/fail thresholds. Exit code 1 on fail — the Gauntlet gets first-class pass/fail signals without parsing JSON. PR #138 | | **Community detection** ([Backlog #11](../../roadmap/BACKLOG.md)) | **Done** | `codegraph communities` with Louvain algorithm discovers natural module boundaries vs actual file organization. `--drift` reveals which directories should be split or merged. `--functions` for function-level clustering. PR #133/#134 | | **Build-time semantic metadata** ([Roadmap Phase 4.4](../../roadmap/ROADMAP.md#44--build-time-semantic-metadata)) | Planned | LLM-generated `complexity_notes`, `risk_score`, and `side_effects` per function. A sub-agent could query `codegraph assess ` and get "3 responsibilities, low cohesion — consider splitting" without analyzing the code itself | @@ -234,7 +234,7 @@ Several planned features would make codegraph even more powerful for the Titan P | Feature | Status | How it helps | |---------|--------|-------------| -| **Architecture boundary rules** ([Backlog #13](../../roadmap/BACKLOG.md)) | **Done** | `manifesto.boundaries` config defines allowed/forbidden dependencies between modules. Onion architecture preset available via `manifesto.boundaryPreset: "onion"`. Violations flagged in `check` and enforceable via `check --no-boundary-violations`. PR #228 + #229 | +| **Architecture boundary rules** ([Backlog #13](../../roadmap/BACKLOG.md)) | **Done** | `manifesto.boundaries` config defines allowed/forbidden dependencies between modules. Onion architecture preset available via `manifesto.boundaryPreset: "onion"`. Violations flagged in `manifesto` and enforceable via `check --no-boundary-violations`. PR #228 + #229 | | **CODEOWNERS integration** ([Backlog #18](../../roadmap/BACKLOG.md)) | **Done** | `codegraph owners` maps graph nodes to CODEOWNERS entries. Shows who owns each function, surfaces ownership boundaries in `diff-impact`. The GLOBAL SYNC agent can identify which teams need to coordinate. PR #195 | | **Refactoring analysis** ([Roadmap Phase 8.5](../../roadmap/ROADMAP.md#85--refactoring-analysis)) | Planned | `split_analysis`, `extraction_candidates`, `boundary_analysis` — LLM-powered structural analysis that identifies exactly where shared abstractions should be created | | **Dead code detection** ([Backlog #1](../../roadmap/BACKLOG.md)) | **Done** | `codegraph roles --role dead -T` lists all symbols with zero fan-in that aren't exported. Delivered as part of node classification | From b7883ca8addf948d1a63a7f72b01fb687eebd303 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 22:49:22 -0700 Subject: [PATCH 11/12] revert: remove docs changes from CLI consolidation PR Docs updates moved to separate PR #282. --- README.md | 46 ++++++++--------- docs/examples/CLI.md | 28 ++++------- .../claude-code-hooks/remind-codegraph.sh | 2 +- docs/guides/ai-agent-guide.md | 50 +++++++++---------- docs/guides/recommended-practices.md | 14 +++--- docs/use-cases/titan-paradigm.md | 20 ++++---- 6 files changed, 74 insertions(+), 86 deletions(-) diff --git a/README.md b/README.md index 97ec8c50..aba7ae02 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,7 @@ Full agent setup: [AI Agent Guide](docs/guides/ai-agent-guide.md) · [CLAU | 📁 | **File dependencies** | See what a file imports and what imports it | | 💥 | **Impact analysis** | Trace every file affected by a change (transitive) | | 🧬 | **Function-level tracing** | Call chains, caller trees, function-level impact, and A→B pathfinding with qualified call resolution | -| 🎯 | **Deep context** | `context` gives AI agents source, deps, callers, signature, and tests for a function in one call; `audit --quick` gives structural summaries of files or functions | +| 🎯 | **Deep context** | `context` gives AI agents source, deps, callers, signature, and tests for a function in one call; `explain` gives structural summaries of files or functions | | 📍 | **Fast lookup** | `where` shows exactly where a symbol is defined and used — minimal, fast | | 📊 | **Diff impact** | Parse `git diff`, find overlapping functions, trace their callers | | 🔗 | **Co-change analysis** | Analyze git history for files that always change together — surfaces hidden coupling the static graph can't see; enriches `diff-impact` with historically coupled files | @@ -184,7 +184,7 @@ Full agent setup: [AI Agent Guide](docs/guides/ai-agent-guide.md) · [CLAU | ⚡ | **Always fresh** | Three-tier incremental detection — sub-second rebuilds even on large codebases | | 🧮 | **Complexity metrics** | Cognitive, cyclomatic, nesting depth, Halstead, and Maintainability Index per function | | 🏘️ | **Community detection** | Louvain clustering to discover natural module boundaries and architectural drift | -| 📜 | **Manifesto rule engine** | Configurable pass/fail rules with warn/fail thresholds for CI gates via `check` (exit code 1 on fail) | +| 📜 | **Manifesto rule engine** | Configurable pass/fail rules with warn/fail thresholds for CI gates (exit code 1 on fail) | | 👥 | **CODEOWNERS integration** | Map graph nodes to CODEOWNERS entries — see who owns each function, ownership boundaries in `diff-impact` | | 💾 | **Graph snapshots** | `snapshot save`/`restore` for instant DB backup and rollback — checkpoint before refactoring, restore without rebuilding | | 🔎 | **Hybrid BM25 + semantic search** | FTS5 keyword search + embedding-based semantic search fused via Reciprocal Rank Fusion — `hybrid`, `semantic`, or `keyword` modes | @@ -229,8 +229,8 @@ codegraph roles --role core --file src/ # Core symbols in src/ ```bash codegraph context # Full context: source, deps, callers, signature, tests codegraph context --depth 2 --no-tests # Include callee source 2 levels deep -codegraph audit --quick # Structural summary: public API, internals, data flow -codegraph audit --quick # Function summary: signature, calls, callers, tests +codegraph explain # Structural summary: public API, internals, data flow +codegraph explain # Function summary: signature, calls, callers, tests ``` ### Impact Analysis @@ -240,9 +240,9 @@ codegraph impact # Transitive reverse dependency trace codegraph query # Function-level: callers, callees, call chain codegraph query --no-tests --depth 5 codegraph fn-impact # What functions break if this one changes -codegraph path # Shortest path between two symbols (A calls...calls B) -codegraph path --reverse # Follow edges backward -codegraph path --depth 5 --kinds calls,imports +codegraph query --path # Shortest path between two symbols (A calls...calls B) +codegraph query --path --reverse # Follow edges backward +codegraph query --path --depth 5 --kinds calls,imports codegraph diff-impact # Impact of unstaged git changes codegraph diff-impact --staged # Impact of staged changes codegraph diff-impact HEAD~3 # Impact vs a specific ref @@ -273,8 +273,8 @@ Co-change data also enriches `diff-impact` — historically coupled files appear ```bash codegraph structure # Directory overview with cohesion scores -codegraph triage --level file # Files with extreme fan-in, fan-out, or density -codegraph triage --level directory --sort coupling --no-tests +codegraph hotspots # Files with extreme fan-in, fan-out, or density +codegraph hotspots --metric coupling --level directory --no-tests ``` ### Code Health & Architecture @@ -287,8 +287,8 @@ codegraph complexity --above-threshold -T # Only functions exceeding warn thres codegraph communities # Louvain community detection — natural module boundaries codegraph communities --drift -T # Drift analysis only — split/merge candidates codegraph communities --functions # Function-level community detection -codegraph check # Pass/fail rule engine (exit code 1 on fail) -codegraph check -T # Exclude test files from rule evaluation +codegraph manifesto # Pass/fail rule engine (exit code 1 on fail) +codegraph manifesto -T # Exclude test files from rule evaluation ``` ### Audit, Triage & Batch @@ -296,13 +296,10 @@ codegraph check -T # Exclude test files from rule evaluation Composite commands for risk-driven workflows and multi-agent dispatch. ```bash -codegraph audit # Combined structural summary + impact + health in one report -codegraph audit --quick # Structural summary only (skip impact and health) +codegraph audit # Combined explain + impact + health in one report codegraph audit src/queries.js -T # Audit all functions in a file codegraph triage # Ranked audit priority queue (connectivity + hotspots + roles) codegraph triage -T --limit 20 # Top 20 riskiest functions, excluding tests -codegraph triage --level file -T # File-level hotspot analysis -codegraph triage --level directory -T # Directory-level hotspot analysis codegraph batch target1 target2 ... # Batch query multiple targets in one call codegraph batch --json targets.json # Batch from a JSON file ``` @@ -312,9 +309,7 @@ codegraph batch --json targets.json # Batch from a JSON file `codegraph check` provides configurable pass/fail predicates for CI gates and state machines. Exit code 0 = pass, 1 = fail. ```bash -codegraph check # Run manifesto rules on whole codebase -codegraph check --staged # Check staged changes (diff predicates) -codegraph check --staged --rules # Run both diff predicates AND manifesto rules +codegraph check --staged # Check staged changes codegraph check --no-new-cycles # Fail if staged changes introduce cycles codegraph check --max-complexity 30 # Fail if any function exceeds complexity threshold codegraph check --max-blast-radius 50 # Fail if blast radius exceeds limit @@ -418,7 +413,7 @@ codegraph registry remove # Unregister | Flag | Description | |---|---| | `-d, --db ` | Custom path to `graph.db` | -| `-T, --no-tests` | Exclude `.test.`, `.spec.`, `__test__` files (available on `fn`, `fn-impact`, `path`, `context`, `where`, `diff-impact`, `search`, `map`, `roles`, `co-change`, `deps`, `impact`, `complexity`, `communities`, `branch-compare`, `audit`, `triage`, `check`) | +| `-T, --no-tests` | Exclude `.test.`, `.spec.`, `__test__` files (available on `fn`, `fn-impact`, `path`, `context`, `explain`, `where`, `diff-impact`, `search`, `map`, `hotspots`, `roles`, `co-change`, `deps`, `impact`, `complexity`, `communities`, `manifesto`, `branch-compare`, `audit`, `triage`, `check`) | | `--depth ` | Transitive trace depth (default varies by command) | | `-j, --json` | Output as JSON | | `-v, --verbose` | Enable debug output | @@ -561,7 +556,7 @@ This project uses codegraph. The database is at `.codegraph/graph.db`. ### Before modifying code, always: 1. `codegraph where ` — find where the symbol lives -2. `codegraph audit --quick` — understand the structure +2. `codegraph explain ` — understand the structure 3. `codegraph context -T` — get full context (source, deps, callers) 4. `codegraph fn-impact -T` — check blast radius before editing @@ -572,17 +567,16 @@ This project uses codegraph. The database is at `.codegraph/graph.db`. - `codegraph build .` — rebuild the graph (incremental by default) - `codegraph map` — module overview - `codegraph query -T` — function call chain (callers + callees) -- `codegraph path -T` — shortest call path between two symbols +- `codegraph query --path -T` — shortest call path between two symbols - `codegraph deps ` — file-level dependencies - `codegraph roles --role dead -T` — find dead code (unreferenced symbols) - `codegraph roles --role core -T` — find core symbols (high fan-in) - `codegraph co-change ` — files that historically change together - `codegraph complexity -T` — per-function complexity metrics (cognitive, cyclomatic, MI) - `codegraph communities --drift -T` — module boundary drift analysis -- `codegraph check -T` — pass/fail rule check (CI gate, exit code 1 on fail) -- `codegraph audit -T` — combined structural summary + impact + health in one report +- `codegraph manifesto -T` — pass/fail rule check (CI gate, exit code 1 on fail) +- `codegraph audit -T` — combined explain + impact + health in one report - `codegraph triage -T` — ranked audit priority queue -- `codegraph triage --level file -T` — file-level hotspot analysis - `codegraph check --staged` — CI validation predicates (exit code 0/1) - `codegraph batch target1 target2` — batch query multiple targets at once - `codegraph owners [target]` — CODEOWNERS mapping for symbols @@ -671,7 +665,7 @@ Create a `.codegraphrc.json` in your project root to customize behavior: ### Manifesto rules -Configure pass/fail thresholds for `codegraph check` (manifesto mode): +Configure pass/fail thresholds for `codegraph manifesto`: ```json { @@ -687,7 +681,7 @@ Configure pass/fail thresholds for `codegraph check` (manifesto mode): } ``` -When any function exceeds a `fail` threshold, `codegraph check` exits with code 1 — perfect for CI gates. +When any function exceeds a `fail` threshold, `codegraph manifesto` exits with code 1 — perfect for CI gates. ### LLM credentials diff --git a/docs/examples/CLI.md b/docs/examples/CLI.md index 703bf325..b3ff00a9 100644 --- a/docs/examples/CLI.md +++ b/docs/examples/CLI.md @@ -95,12 +95,12 @@ codegraph where -f src/db.js -T --- -## audit --quick — Structural summary (file or function) +## explain — Structural summary (file or function) ### On a file ```bash -codegraph audit src/builder.js --quick -T +codegraph explain src/builder.js -T ``` ``` @@ -132,7 +132,7 @@ codegraph audit src/builder.js --quick -T ### On a function ```bash -codegraph audit buildGraph --quick -T +codegraph explain buildGraph -T ``` ``` @@ -450,10 +450,10 @@ tests/ (0 files, 32 symbols, <-0 ->6 cohesion=0.00) --- -## triage --level — Find structural hotspots +## hotspots — Find structural hotspots ```bash -codegraph triage --level file --sort fan-in -T +codegraph hotspots --metric fan-in -T ``` ``` @@ -469,7 +469,7 @@ Hotspots by fan-in (file-level, top 10): 8. src/cli.js <-0 ->10 (570L, 1 symbols) ``` -Other metrics: `fan-out`, `density`, `coupling`. Use `--level directory` for directory-level hotspots. +Other metrics: `fan-out`, `density`, `coupling`. --- @@ -903,16 +903,14 @@ codegraph communities --drift -T --- -## check — Rule engine pass/fail (manifesto mode) - -Running `check` with no ref or `--staged` runs manifesto rules on the whole codebase: +## manifesto — Rule engine pass/fail ```bash -codegraph check -T +codegraph manifesto -T ``` ``` -# Manifesto Rules +# Manifesto Results Rule Status Threshold Violations ────────────────────────── ──────── ──────────────── ────────── @@ -929,7 +927,7 @@ codegraph check -T ## audit — Composite risk report -Combines structural summary + fn-impact + complexity metrics into one structured report per function. Use `--quick` for structural summary only (skip impact and health metrics). +Combines explain + fn-impact + complexity metrics into one structured report per function. One call instead of 3-4. ```bash codegraph audit src/builder.js -T @@ -964,7 +962,7 @@ codegraph audit buildGraph -T ## triage — Risk-ranked audit queue -Merges connectivity, hotspots, node roles, and complexity into a prioritized audit queue. Use `--level file` or `--level directory` for file/directory-level hotspot analysis. +Merges connectivity, hotspots, node roles, and complexity into a prioritized audit queue. ```bash codegraph triage -T --limit 5 @@ -1010,10 +1008,6 @@ codegraph batch buildGraph openDb parseFile -T --json Configurable pass/fail gates. Exit code 0 = pass, 1 = fail. -- `check` (no args) — runs manifesto rules on whole codebase (see above) -- `check --staged` / `check ` — runs diff predicates against changes -- `check --staged --rules` — runs both diff predicates AND manifesto rules - ```bash codegraph check --staged --no-new-cycles --max-complexity 30 ``` diff --git a/docs/examples/claude-code-hooks/remind-codegraph.sh b/docs/examples/claude-code-hooks/remind-codegraph.sh index 710afe35..d0a5d630 100644 --- a/docs/examples/claude-code-hooks/remind-codegraph.sh +++ b/docs/examples/claude-code-hooks/remind-codegraph.sh @@ -61,7 +61,7 @@ cat <' to locate the symbol, (2) 'codegraph audit --quick ${REL_PATH}' to understand the file, (3) 'codegraph context -T' for full context, (4) 'codegraph fn-impact -T' to check blast radius. If you already did this, proceed." + "additionalContext": "[codegraph reminder] You are about to edit ${REL_PATH}. Did you run codegraph first? Before editing, always: (1) 'codegraph where ' to locate the symbol, (2) 'codegraph explain ${REL_PATH}' to understand the file, (3) 'codegraph context -T' for full context, (4) 'codegraph fn-impact -T' to check blast radius. If you already did this, proceed." } } HOOK_OUTPUT diff --git a/docs/guides/ai-agent-guide.md b/docs/guides/ai-agent-guide.md index b9ef0712..a378278a 100644 --- a/docs/guides/ai-agent-guide.md +++ b/docs/guides/ai-agent-guide.md @@ -18,7 +18,7 @@ Codegraph solves these problems by providing a pre-built dependency graph that a | Task | Without codegraph | With codegraph | Savings | |------|------------------|----------------|---------| | Understand a function | Read 3–5 full files (~10K tokens) | `context ` (~400 tokens) | ~96% | -| Find what a file does | Read the file + imports (~4K tokens) | `audit --quick ` (~300 tokens) | ~92% | +| Find what a file does | Read the file + imports (~4K tokens) | `explain ` (~300 tokens) | ~92% | | Locate a symbol | Grep + read matches (~3K tokens) | `where ` (~60 tokens) | ~98% | | Assess change impact | Read callers manually (~5K tokens) | `fn-impact ` (~200 tokens) | ~96% | | Pre-commit check | Manual review (~8K tokens) | `diff-impact --staged` (~300 tokens) | ~96% | @@ -58,8 +58,8 @@ codegraph search "error handling" # Semantic search (requires prior `embed`) Get a structural summary without reading raw source. ```bash -codegraph audit --quick # File summary: public API, internal API, data flow -codegraph audit --quick # Function summary: signature, calls, callers, tests +codegraph explain # File summary: public API, internal API, data flow +codegraph explain # Function summary: signature, calls, callers, tests ``` **When to use:** Before modifying anything. Understand the shape of the code first. @@ -124,19 +124,19 @@ codegraph where --file # File overview: symbols, imports, exports | **When to use** | First step when you know a name but not where it lives | | **Output** | Definition location (file:line), usage sites, export status | -#### `audit --quick` — Structural summary +#### `explain` — Structural summary -Get a human-readable summary of a file or function without reading raw source. (`audit --quick` replaces the former `explain` CLI command.) +Get a human-readable summary of a file or function without reading raw source. ```bash -codegraph audit --quick src/parser.js # File: public API, internal functions, data flow -codegraph audit --quick buildGraph # Function: signature, what it calls, who calls it +codegraph explain src/parser.js # File: public API, internal functions, data flow +codegraph explain buildGraph # Function: signature, what it calls, who calls it ``` | | | |---|---| | **MCP tool** | `explain` | -| **Key flags** | `--quick`, `-T` (no tests), `-j` (JSON) | +| **Key flags** | `-T` (no tests), `-j` (JSON) | | **When to use** | Before modifying code — understand structure first | | **Output** | For files: public/internal API, imports, dependents. For functions: signature, callees, callers, tests | @@ -326,19 +326,19 @@ codegraph structure --depth 2 --sort cohesion | **When to use** | Understanding project layout and identifying well/poorly-cohesive modules | | **Output** | Tree with per-directory metrics | -#### `triage --level` — Structural hotspots +#### `hotspots` — Structural hotspots -Find files or directories with extreme fan-in, fan-out, or symbol density. (`triage --level file|directory` replaces the former `hotspots` CLI command.) +Find files or directories with extreme fan-in, fan-out, or symbol density. ```bash -codegraph triage --level file --sort coupling --limit 5 -codegraph triage --level directory --sort fan-out +codegraph hotspots --metric coupling --limit 5 +codegraph hotspots --level directory --metric fan-out ``` | | | |---|---| | **MCP tool** | `hotspots` | -| **Key flags** | `--level` (file, directory), `--sort` (fan-in, fan-out, density, coupling; default: fan-in), `-n, --limit` (default: 10), `-T` (no tests), `-j` (JSON) | +| **Key flags** | `--metric` (fan-in, fan-out, density, coupling; default: fan-in), `--level` (file, directory), `-n, --limit` (default: 10), `-T` (no tests), `-j` (JSON) | | **When to use** | Finding the most critical or problematic parts of the codebase | | **Output** | Ranked list of files/directories by the chosen metric | @@ -362,7 +362,7 @@ codegraph cycles --functions #### `audit` — Composite risk report -Combines structural summary + impact + complexity metrics in one call per function or file. Use `--quick` for just the structural summary (no impact or health metrics). +Combines explain + impact + complexity metrics in one call per function or file. ```bash codegraph audit src/parser.js -T # Audit all functions in a file @@ -586,20 +586,20 @@ codegraph mcp --repos "myapp,lib" # Restricted repo list | `fn_impact` | `fn-impact ` | Function-level blast radius | | `context` | `context ` | Full function context | | `symbol_children` | `children ` | Sub-declaration children (parameters, properties, constants) | -| `explain` | `audit --quick ` | Structural summary | +| `explain` | `explain ` | Structural summary | | `where` | `where ` | Symbol definition and usage | | `diff_impact` | `diff-impact [ref]` | Git diff impact analysis | | `semantic_search` | `search ` | Natural language code search | | `export_graph` | `export` | Graph export (DOT/Mermaid/JSON) | | `list_functions` | *(MCP only)* | List/filter symbols | | `structure` | `structure [dir]` | Directory tree with metrics | -| `hotspots` | `triage --level file` | Structural hotspot detection | +| `hotspots` | `hotspots` | Structural hotspot detection | | `node_roles` | `roles` | Node role classification | | `co_changes` | `co-change` | Git co-change analysis | | `execution_flow` | `flow` | Execution flow tracing and entry point detection | | `complexity` | `complexity` | Per-function complexity metrics | | `communities` | `communities` | Community detection & drift | -| `manifesto` | `check` (no args) | Rule engine pass/fail | +| `manifesto` | `manifesto` | Rule engine pass/fail | | `code_owners` | `owners` | CODEOWNERS integration | | `audit` | `audit ` | Composite risk report | | `batch_query` | `batch ` | Multi-target batch querying | @@ -685,13 +685,13 @@ Hooks automate codegraph integration so the agent gets structural context withou **Trigger:** Before any Edit or Write operation (PreToolUse). -**What it does:** The first time the agent edits a source file, the hook injects a reminder via `additionalContext` to run `where`, `audit --quick`, `context`, and `fn-impact` before proceeding. Subsequent edits to the same file in the same session are silently allowed (tracked in `.claude/codegraph-checked.log`). +**What it does:** The first time the agent edits a source file, the hook injects a reminder via `additionalContext` to run `where`, `explain`, `context`, and `fn-impact` before proceeding. Subsequent edits to the same file in the same session are silently allowed (tracked in `.claude/codegraph-checked.log`). **Example output the agent sees:** ``` [codegraph reminder] You are about to edit src/parser.js. Did you run codegraph first? -Before editing, always: (1) where , (2) audit --quick src/parser.js, +Before editing, always: (1) where , (2) explain src/parser.js, (3) context -T, (4) fn-impact -T. If you already did this, proceed. ``` @@ -802,7 +802,7 @@ This project uses codegraph for dependency analysis. The graph is at `.codegraph ### Before modifying code, always: 1. `codegraph where ` — find where the symbol lives -2. `codegraph audit --quick ` — understand the structure +2. `codegraph explain ` — understand the structure 3. `codegraph context -T` — get full context (source, deps, callers) 4. `codegraph fn-impact -T` — check blast radius before editing @@ -813,7 +813,7 @@ This project uses codegraph for dependency analysis. The graph is at `.codegraph - `codegraph build .` — rebuild the graph (incremental by default) - `codegraph map` — module overview - `codegraph stats` — graph health and quality score -- `codegraph audit -T` — combined structural summary + impact + health in one report +- `codegraph audit -T` — combined explain + impact + health in one report - `codegraph triage -T` — ranked audit priority queue - `codegraph check --staged` — CI validation predicates (exit code 0/1) - `codegraph batch target1 target2` — batch query multiple targets at once @@ -955,7 +955,7 @@ fi | I want to... | Command | |---------------|---------| | Find where a function is defined | `codegraph where ` | -| See what a file does | `codegraph audit --quick ` | +| See what a file does | `codegraph explain ` | | Understand a function fully | `codegraph context -T` | | See what calls a function | `codegraph fn -T` | | See what a function calls | `codegraph fn -T` | @@ -966,7 +966,7 @@ fi | Get a codebase overview | `codegraph map` | | Check graph health | `codegraph stats` | | Find circular dependencies | `codegraph cycles` | -| Find hotspots | `codegraph triage --level file --sort coupling` | +| Find hotspots | `codegraph hotspots --metric coupling` | | See project structure | `codegraph structure --depth 2` | | List symbols in a file | `codegraph where --file ` | | Get a full risk report for a function | `codegraph audit -T` | @@ -987,7 +987,7 @@ fi | Flag | Short | Description | Available on | |------|-------|-------------|-------------| -| `--no-tests` | `-T` | Exclude test/spec files | All query commands (fn, fn-impact, context, where, diff-impact, search, map, deps, impact, query, path, stats, cycles, export, structure, audit, triage, check, batch, owners, branch-compare) | +| `--no-tests` | `-T` | Exclude test/spec files | All query commands (fn, fn-impact, context, explain, where, diff-impact, search, map, deps, impact, query, stats, hotspots, cycles, export, structure, audit, triage, check, batch, owners, branch-compare) | | `--json` | `-j` | JSON output | Most commands | | `--file ` | `-f` | Scope to a file | fn, fn-impact, context, where | | `--kind ` | `-k` | Filter by symbol kind | fn, fn-impact, context | @@ -1010,7 +1010,7 @@ fi 4. **Check impact before and after.** Run `fn-impact` before editing to know the blast radius. Run `diff-impact --staged` after to verify your changes. -5. **Use `audit --quick` for orientation, `context` for implementation.** `audit --quick` gives you the shape of the code. `context` gives you the actual source you need to write changes. +5. **Use `explain` for orientation, `context` for implementation.** `explain` gives you the shape of the code. `context` gives you the actual source you need to write changes. 6. **Multi-query semantic search.** When searching, phrase the same intent multiple ways: `codegraph search "parse imports, resolve require, extract dependencies"`. RRF ranking combines the results. diff --git a/docs/guides/recommended-practices.md b/docs/guides/recommended-practices.md index a8694c2d..c349191f 100644 --- a/docs/guides/recommended-practices.md +++ b/docs/guides/recommended-practices.md @@ -112,13 +112,13 @@ Add a threshold check to your CI pipeline: ### Code health gate -Use `check` to enforce code health rules in CI — it exits with code 1 when any function exceeds a fail-level threshold: +Use `manifesto` to enforce code health rules in CI — it exits with code 1 when any function exceeds a fail-level threshold: ```yaml - name: Code health gate run: | npx codegraph build - npx codegraph check -T # exits 1 on fail-level breach (manifesto mode) + npx codegraph manifesto -T # exits 1 on fail-level breach ``` ### Change validation gate @@ -206,7 +206,7 @@ This project uses codegraph. The database is at `.codegraph/graph.db`. ### Before modifying code, always: 1. `codegraph where ` — find where the symbol lives -2. `codegraph audit --quick ` — understand the structure +2. `codegraph explain ` — understand the structure 3. `codegraph context -T` — get full context (source, deps, callers) 4. `codegraph fn-impact -T` — check blast radius before editing @@ -224,8 +224,8 @@ This project uses codegraph. The database is at `.codegraph/graph.db`. - `codegraph co-change ` — files that historically change together - `codegraph complexity -T` — per-function complexity metrics (cognitive, cyclomatic, MI) - `codegraph communities --drift -T` — module boundary drift analysis -- `codegraph check -T` — pass/fail rule check (CI gate, exit code 1 on fail) -- `codegraph audit -T` — combined structural summary + impact + health in one report +- `codegraph manifesto -T` — pass/fail rule check (CI gate, exit code 1 on fail) +- `codegraph audit -T` — combined explain + impact + health in one report - `codegraph triage -T` — ranked audit priority queue - `codegraph check --staged` — CI validation predicates (exit code 0/1) - `codegraph batch target1 target2` — batch query multiple targets at once @@ -326,7 +326,7 @@ You can configure [Claude Code hooks](https://docs.anthropic.com/en/docs/claude- **Doc check hook** (PreToolUse on Bash): when Claude runs `git commit` with source files staged (anything under `src/`, `cli.js`, `constants.js`, `parser.js`, `package.json`, or `grammars/`), the hook checks whether `README.md`, `CLAUDE.md`, and `ROADMAP.md` are also staged. If any are missing, it blocks the commit with a `deny` decision listing which docs weren't staged and what to review in each (language support tables, architecture docs, roadmap phases, etc.). Non-source-only commits (tests, docs, config) pass through without checks. -**Edit reminder hook** (PreToolUse on Edit/Write): before the agent writes code, a reminder is injected via `additionalContext` prompting it to check `where`, `audit --quick`, `context`, and `fn-impact` first. Only fires once per file per session (tracks in `.claude/codegraph-checked.log`, gitignored). Non-blocking — it nudges but never prevents the edit. Skips non-source files like `.md`, `.json`, `.yml`. +**Edit reminder hook** (PreToolUse on Edit/Write): before the agent writes code, a reminder is injected via `additionalContext` prompting it to check `where`, `explain`, `context`, and `fn-impact` first. Only fires once per file per session (tracks in `.claude/codegraph-checked.log`, gitignored). Non-blocking — it nudges but never prevents the edit. Skips non-source files like `.md`, `.json`, `.yml`. **Graph update hook** (PostToolUse on Edit/Write): keeps the graph incrementally updated after each file edit. Only changed files are re-parsed. @@ -659,7 +659,7 @@ cp node_modules/@optave/codegraph/.github/workflows/codegraph-impact.yml .github codegraph co-change --analyze # 7. (Optional) Verify code health rules pass -codegraph check -T +codegraph manifesto -T # 8. (Optional) Set up CI validation gate # codegraph check --staged --no-new-cycles --max-blast-radius 50 -T diff --git a/docs/use-cases/titan-paradigm.md b/docs/use-cases/titan-paradigm.md index 73cb1fbb..3b9402e8 100644 --- a/docs/use-cases/titan-paradigm.md +++ b/docs/use-cases/titan-paradigm.md @@ -41,7 +41,7 @@ codegraph build . codegraph map --limit 30 --no-tests # Find structural hotspots — extreme fan-in, fan-out, coupling -codegraph triage --level file --no-tests +codegraph hotspots --no-tests # Graph health overview — node/edge counts, quality score codegraph stats @@ -70,7 +70,7 @@ For deeper structural understanding before touching anything: ```bash # Structural summary of a high-traffic file — public API, internals, data flow -codegraph audit --quick src/builder.js +codegraph explain src/builder.js # Understand a specific function before auditing it codegraph context buildGraph -T @@ -108,10 +108,10 @@ codegraph complexity --file src/parser.js -T codegraph complexity --file src/parser.js --health -T # Pass/fail rule check — does this file meet the manifesto? -codegraph check -T +codegraph manifesto -T # Architecture boundary violations — are cross-module dependencies allowed? -codegraph check -T # boundaries are enforced as manifesto rules +codegraph manifesto -T # boundaries are enforced as manifesto rules ``` When a sub-agent decides a function needs decomposition (complexity > 7, nesting > 3, 10+ mocks), it needs to know what breaks. `fn-impact` gives the complete blast radius **before** the agent writes a single line of code. @@ -191,14 +191,14 @@ codegraph snapshot save pre-gauntlet codegraph snapshot restore pre-gauntlet ``` -Use `check` as an additional CI gate — it exits with code 1 when any function exceeds a fail-level threshold: +Use `manifesto` as an additional CI gate — it exits with code 1 when any function exceeds a fail-level threshold: ```bash # Pass/fail rule check — exit code 1 = fail → rollback trigger -codegraph check -T +codegraph manifesto -T ``` -The orchestrator can gate every commit: run `check --staged` for pass/fail validation, `diff-impact --staged --json` for detailed blast radius, and `check -T` to verify code health rules (or `check --staged --rules` for both at once). Auto-rollback if any exceeds thresholds. Combined with `codegraph watch` for real-time graph updates, the state machine always has a current picture of the codebase. +The orchestrator can gate every commit: run `check --staged` for pass/fail validation, `diff-impact --staged --json` for detailed blast radius, and `manifesto -T` to verify code health rules. Auto-rollback if any exceeds thresholds. Combined with `codegraph watch` for real-time graph updates, the state machine always has a current picture of the codebase. ```bash # Watch mode — graph updates automatically as agents edit files @@ -218,7 +218,7 @@ Several planned features would make codegraph even more powerful for the Titan P | Feature | Status | How it helps | |---------|--------|-------------| -| **Node classification** ([Backlog #4](../../roadmap/BACKLOG.md)) | **Done** | Auto-tags every symbol as Entry Point, Core, Utility, or Adapter based on fan-in/fan-out. Available via `codegraph roles`, `where`, `audit --quick`, `context`, and the `node_roles` MCP tool | +| **Node classification** ([Backlog #4](../../roadmap/BACKLOG.md)) | **Done** | Auto-tags every symbol as Entry Point, Core, Utility, or Adapter based on fan-in/fan-out. Available via `codegraph roles`, `where`, `explain`, `context`, and the `node_roles` MCP tool | | **Git change coupling** ([Backlog #9](../../roadmap/BACKLOG.md)) | **Done** | `codegraph co-change` analyzes git history for files that always change together. Integrated into `diff-impact` output via `historicallyCoupled` section. MCP tool `co_changes` | ### For THE GAUNTLET @@ -226,7 +226,7 @@ Several planned features would make codegraph even more powerful for the Titan P | Feature | Status | How it helps | |---------|--------|-------------| | **Formal code health metrics** ([Backlog #6](../../roadmap/BACKLOG.md)) | **Done** | `codegraph complexity` provides cognitive, cyclomatic, nesting depth, Halstead (volume, effort, bugs), and Maintainability Index per function. `--health` for full view, `--sort mi` to rank by MI, `--above-threshold` for flagged functions. Maps directly to the Gauntlet's "complexity > 7 is a failure" rule. PR #130 + #139 | -| **Manifesto-driven pass/fail** ([Backlog #22](../../roadmap/BACKLOG.md)) | **Done** | `codegraph check` (manifesto mode) with 9 configurable rules and warn/fail thresholds. Exit code 1 on fail — the Gauntlet gets first-class pass/fail signals without parsing JSON. PR #138 | +| **Manifesto-driven pass/fail** ([Backlog #22](../../roadmap/BACKLOG.md)) | **Done** | `codegraph manifesto` with 9 configurable rules and warn/fail thresholds. Exit code 1 on fail — the Gauntlet gets first-class pass/fail signals without parsing JSON. PR #138 | | **Community detection** ([Backlog #11](../../roadmap/BACKLOG.md)) | **Done** | `codegraph communities` with Louvain algorithm discovers natural module boundaries vs actual file organization. `--drift` reveals which directories should be split or merged. `--functions` for function-level clustering. PR #133/#134 | | **Build-time semantic metadata** ([Roadmap Phase 4.4](../../roadmap/ROADMAP.md#44--build-time-semantic-metadata)) | Planned | LLM-generated `complexity_notes`, `risk_score`, and `side_effects` per function. A sub-agent could query `codegraph assess ` and get "3 responsibilities, low cohesion — consider splitting" without analyzing the code itself | @@ -234,7 +234,7 @@ Several planned features would make codegraph even more powerful for the Titan P | Feature | Status | How it helps | |---------|--------|-------------| -| **Architecture boundary rules** ([Backlog #13](../../roadmap/BACKLOG.md)) | **Done** | `manifesto.boundaries` config defines allowed/forbidden dependencies between modules. Onion architecture preset available via `manifesto.boundaryPreset: "onion"`. Violations flagged in `check` and enforceable via `check --no-boundary-violations`. PR #228 + #229 | +| **Architecture boundary rules** ([Backlog #13](../../roadmap/BACKLOG.md)) | **Done** | `manifesto.boundaries` config defines allowed/forbidden dependencies between modules. Onion architecture preset available via `manifesto.boundaryPreset: "onion"`. Violations flagged in `manifesto` and enforceable via `check --no-boundary-violations`. PR #228 + #229 | | **CODEOWNERS integration** ([Backlog #18](../../roadmap/BACKLOG.md)) | **Done** | `codegraph owners` maps graph nodes to CODEOWNERS entries. Shows who owns each function, surfaces ownership boundaries in `diff-impact`. The GLOBAL SYNC agent can identify which teams need to coordinate. PR #195 | | **Refactoring analysis** ([Roadmap Phase 8.5](../../roadmap/ROADMAP.md#85--refactoring-analysis)) | Planned | `split_analysis`, `extraction_candidates`, `boundary_analysis` — LLM-powered structural analysis that identifies exactly where shared abstractions should be created | | **Dead code detection** ([Backlog #1](../../roadmap/BACKLOG.md)) | **Done** | `codegraph roles --role dead -T` lists all symbols with zero fan-in that aren't exported. Delivered as part of node classification | From 82360a68176d0385152c0a2998b468dd492f2204 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 23:22:51 -0700 Subject: [PATCH 12/12] fix: remove unused --limit, --offset, --ndjson from path command --- src/cli.js | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/cli.js b/src/cli.js index b557c500..48e8d29a 100644 --- a/src/cli.js +++ b/src/cli.js @@ -180,9 +180,6 @@ program .option('-T, --no-tests', 'Exclude test/spec files from results') .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .option('-j, --json', 'Output as JSON') - .option('--limit ', 'Max results to return') - .option('--offset ', 'Skip N results (default: 0)') - .option('--ndjson', 'Newline-delimited JSON output') .action((from, to, opts) => { if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`);