From 03b11a5d87fbfcf1154dbf39e8249f590bdab6c9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 2 Mar 2026 04:48:54 -0700 Subject: [PATCH 01/30] docs: update incremental benchmarks (2.6.0) (#251) Co-authored-by: github-actions[bot] --- .../benchmarks/INCREMENTAL-BENCHMARKS.md | 40 +++++++++++++++---- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/generated/benchmarks/INCREMENTAL-BENCHMARKS.md b/generated/benchmarks/INCREMENTAL-BENCHMARKS.md index acd0e365..4ab0041b 100644 --- a/generated/benchmarks/INCREMENTAL-BENCHMARKS.md +++ b/generated/benchmarks/INCREMENTAL-BENCHMARKS.md @@ -6,28 +6,30 @@ Import resolution: native batch vs JS fallback throughput. | Version | Engine | Files | Full Build | No-op | 1-File | Resolve (native) | Resolve (JS) | |---------|--------|------:|-----------:|------:|-------:|------------------:|-------------:| +| 2.6.0 | native | 146 | 286ms ↑3% | 4ms ↓33% | 135ms ↑5% | 3ms ~ | 3ms ↓3% | +| 2.6.0 | wasm | 146 | 899ms ~ | 4ms ↓20% | 503ms ↑37% | 3ms ~ | 3ms ↓3% | | 2.5.1 | native | 142 | 277ms | 6ms | 129ms | 3ms | 3ms | | 2.5.1 | wasm | 142 | 888ms | 5ms | 368ms | 3ms | 3ms | ### Latest results -**Version:** 2.5.1 | **Files:** 142 | **Date:** 2026-03-02 +**Version:** 2.6.0 | **Files:** 146 | **Date:** 2026-03-02 #### Native (Rust) | Metric | Value | |--------|------:| -| Full build | 277ms | -| No-op rebuild | 6ms | -| 1-file rebuild | 129ms | +| Full build | 286ms | +| No-op rebuild | 4ms | +| 1-file rebuild | 135ms | #### WASM | Metric | Value | |--------|------:| -| Full build | 888ms | -| No-op rebuild | 5ms | -| 1-file rebuild | 368ms | +| Full build | 899ms | +| No-op rebuild | 4ms | +| 1-file rebuild | 503ms | #### Import Resolution @@ -38,10 +40,32 @@ Import resolution: native batch vs JS fallback throughput. | JS fallback | 3ms | | Per-import (native) | 0ms | | Per-import (JS) | 0ms | -| Speedup ratio | 1.2x | +| Speedup ratio | 1.1x | Phase 2 (Foundation Hardening) + |--> Phase 2.5 (Analysis Expansion) + |--> Phase 3 (Architectural Refactoring) + |--> Phase 4 (TypeScript Migration) + |--> Phase 5 (Embeddings + Metadata) --> Phase 6 (NL Queries + Narration) + |--> Phase 7 (Languages) + |--> Phase 8 (GitHub/CI) <-- Phase 5 (risk_score, side_effects) +Phases 1-6 --> Phase 9 (Visualization + Refactoring Analysis) ``` --- -## Phase 1 — Rust Core ✅ +## Phase 1 -- Rust Core ✅ -> **Status:** Complete — shipped in v1.3.0 +> **Status:** Complete -- shipped in v1.3.0 **Goal:** Move the CPU-intensive parsing and graph engine to Rust, keeping JS for CLI orchestration, MCP, and embeddings. This unlocks parallel parsing, incremental tree-sitter, lower memory usage, and optional standalone binary distribution. -### 1.1 — Rust Workspace & napi-rs Setup ✅ +### 1.1 -- Rust Workspace & napi-rs Setup ✅ Bootstrap the Rust side of the project. - Create `crates/codegraph-core/` with a Cargo workspace -- Set up [napi-rs](https://napi.rs/) to compile Rust → `.node` native addon +- Set up [napi-rs](https://napi.rs/) to compile Rust -> `.node` native addon - Configure CI matrix for prebuilt binaries: `linux-x64`, `darwin-arm64`, `darwin-x64`, `win32-x64` - Add npm optionalDependencies for platform-specific packages (same pattern as SWC/esbuild) - Fallback to existing JS/WASM path if native addon is unavailable **Result:** `npm install` pulls a prebuilt binary; no Rust toolchain required for end users. -### 1.2 — Native tree-sitter Parsing ✅ +### 1.2 -- Native tree-sitter Parsing ✅ Replace WASM-based parsing with native tree-sitter in Rust. @@ -68,7 +70,7 @@ Replace WASM-based parsing with native tree-sitter in Rust. **Affected files:** `src/parser.js` (becomes a thin JS wrapper over native addon) -### 1.3 — Incremental Parsing ✅ +### 1.3 -- Incremental Parsing ✅ Leverage native tree-sitter's `edit + re-parse` API. @@ -80,7 +82,7 @@ Leverage native tree-sitter's `edit + re-parse` API. **Affected files:** `src/watcher.js`, `src/parser.js` -### 1.4 — Import Resolution & Graph Algorithms in Rust ✅ +### 1.4 -- Import Resolution & Graph Algorithms in Rust ✅ Move the hot-path graph logic to Rust. @@ -91,12 +93,12 @@ Move the hot-path graph logic to Rust. **Result:** Import resolution and cycle detection run in Rust with full type safety. Complex state machines benefit from Rust's type system. -### 1.5 — Graceful Degradation & Migration ✅ +### 1.5 -- Graceful Degradation & Migration ✅ Ensure the transition is seamless. - Keep the existing JS/WASM parser as a fallback when the native addon is unavailable -- Auto-detect at startup: native addon available → use Rust path; otherwise → WASM path +- Auto-detect at startup: native addon available -> use Rust path; otherwise -> WASM path - No breaking changes to CLI, MCP, or programmatic API - Add `--engine native|wasm` flag for explicit selection - Migrate existing tests to validate both engines produce identical output @@ -105,13 +107,13 @@ Ensure the transition is seamless. --- -## Phase 2 — Foundation Hardening ✅ +## Phase 2 -- Foundation Hardening ✅ -> **Status:** Complete — shipped in v1.4.0 +> **Status:** Complete -- shipped in v1.4.0 **Goal:** Fix structural issues that make subsequent phases harder. -### 2.1 — Language Parser Registry ✅ +### 2.1 -- Language Parser Registry ✅ Replace scattered parser init/selection logic with a single declarative registry. @@ -125,9 +127,9 @@ Replace scattered parser init/selection logic with a single declarative registry **Affected files:** `src/parser.js`, `src/constants.js` -### 2.2 — Complete MCP Server ✅ +### 2.2 -- Complete MCP Server ✅ -Expose all CLI capabilities through MCP, going from 5 → 11 tools. +Expose all CLI capabilities through MCP, going from 5 -> 11 tools. | New tool | Wraps | Description | |----------|-------|-------------| @@ -136,11 +138,11 @@ Expose all CLI capabilities through MCP, going from 5 → 11 tools. | ✅ `diff_impact` | `diffImpactData` | Git diff impact analysis | | ✅ `semantic_search` | `searchData` | Embedding-powered search | | ✅ `export_graph` | export functions | DOT/Mermaid/JSON export | -| ✅ `list_functions` | — | List functions in a file or by pattern | +| ✅ `list_functions` | -- | List functions in a file or by pattern | **Affected files:** `src/mcp.js` -### 2.3 — Test Coverage Gaps ✅ +### 2.3 -- Test Coverage Gaps ✅ Add tests for currently untested modules. @@ -149,9 +151,9 @@ Add tests for currently untested modules. | ✅ `tests/unit/mcp.test.js` | All MCP tools (mock stdio transport) | | ✅ `tests/unit/config.test.js` | Config loading, defaults, env overrides, apiKeyCommand | | ✅ `tests/integration/cli.test.js` | End-to-end CLI smoke tests | -| ✅ `tests/unit/*.test.js` | Unit tests for 8 core modules (coverage 62% → 75%) | +| ✅ `tests/unit/*.test.js` | Unit tests for 8 core modules (coverage 62% -> 75%) | -### 2.4 — Enhanced Configuration ✅ +### 2.4 -- Enhanced Configuration ✅ New configuration options in `.codegraphrc.json`: @@ -171,11 +173,11 @@ New configuration options in `.codegraphrc.json`: ``` - ✅ Environment variable fallbacks: `CODEGRAPH_LLM_PROVIDER`, `CODEGRAPH_LLM_API_KEY`, `CODEGRAPH_LLM_MODEL` -- ✅ `apiKeyCommand` — shell out to external secret managers (1Password, Bitwarden, Vault, pass, macOS Keychain) at runtime via `execFileSync` (no shell injection). Priority: command output > env var > file config > defaults. Graceful fallback on failure. +- ✅ `apiKeyCommand` -- shell out to external secret managers (1Password, Bitwarden, Vault, pass, macOS Keychain) at runtime via `execFileSync` (no shell injection). Priority: command output > env var > file config > defaults. Graceful fallback on failure. **Affected files:** `src/config.js` -### 2.5 — Multi-Repo MCP ✅ +### 2.5 -- Multi-Repo MCP ✅ Support querying multiple codebases from a single MCP server instance. @@ -191,299 +193,457 @@ Support querying multiple codebases from a single MCP server instance. --- -## Phase 3 — Architectural Refactoring +## Phase 2.5 -- Analysis Expansion ✅ -**Goal:** Restructure the codebase for modularity, testability, and long-term maintainability. These are internal improvements — no new user-facing features, but they make every subsequent phase easier to build and maintain. +> **Status:** Complete -- shipped across v2.0.0 -> v2.6.0 -> Reference: [generated/architecture.md](../generated/architecture.md) — full analysis with code examples and rationale. +**Goal:** Build a comprehensive analysis toolkit on top of the graph -- complexity metrics, community detection, risk triage, architecture boundary enforcement, CI validation, and hybrid search. This phase emerged organically as features were needed and wasn't in the original roadmap. -### 3.1 — Parser Plugin System +### 2.5.1 -- Complexity Metrics ✅ -Split `parser.js` (2,200+ lines) into a modular directory structure with isolated per-language extractors. +Per-function complexity analysis using language-specific AST rules. -``` -src/parser/ - index.js # Public API: parseFileAuto, parseFilesAuto - registry.js # LANGUAGE_REGISTRY + extension mapping - engine.js # Native/WASM init, engine resolution, grammar loading - tree-utils.js # findChild, findParentClass, walkTree helpers - base-extractor.js # Shared walk loop + accumulator framework - extractors/ - javascript.js # JS/TS/TSX - python.js - go.js - rust.js - java.js - csharp.js - ruby.js - php.js - hcl.js -``` +- ✅ Cognitive complexity, cyclomatic complexity, max nesting depth for 8 languages +- ✅ Halstead metrics (vocabulary, volume, difficulty, effort, bugs) +- ✅ LOC, SLOC, comment lines per function +- ✅ Maintainability Index (MI) computation +- ✅ Native Rust engine support for all complexity metrics +- ✅ CLI: `codegraph complexity [target]` with `--sort`, `--limit`, `--kind` options +- ✅ `function_complexity` DB table for persistent storage -Introduce a `BaseExtractor` that owns the tree walk loop. Each language extractor declares a `nodeType → handler` map instead of reimplementing the traversal. Eliminates repeated walk-and-switch boilerplate across 9+ extractors. +**New file:** `src/complexity.js` (2,163 lines) -**Affected files:** `src/parser.js` → split into `src/parser/` +### 2.5.2 -- Community Detection & Drift ✅ -### 3.2 — Repository Pattern for Data Access +Louvain community detection at file or function level. -Consolidate all SQL into a single `Repository` class. Currently SQL is scattered across `builder.js`, `queries.js`, `embedder.js`, `watcher.js`, and `cycles.js`. +- ✅ Graphology-based Louvain algorithm for community assignment +- ✅ Modularity score computation +- ✅ Drift analysis: identify split/merge candidates between communities +- ✅ CLI: `codegraph communities` with `--level file|function` -``` -src/db/ - connection.js # Open, WAL mode, pragma tuning - migrations.js # Schema versions - repository.js # ALL data access methods (reads + writes) -``` +**New file:** `src/communities.js` (310 lines) -All prepared statements, index tuning, and schema knowledge live in one place. Consumers never see SQL. Enables an `InMemoryRepository` for fast unit tests. +### 2.5.3 -- Structure & Role Classification ✅ -**Affected files:** `src/db.js` → split into `src/db/`, SQL extracted from `builder.js`, `queries.js`, `embedder.js`, `watcher.js`, `cycles.js` +Directory structure graph with node role classification. -### 3.3 — Analysis / Formatting Separation +- ✅ Directory nodes and edges with cohesion, density, fan-in/fan-out metrics +- ✅ Node role classification: entry, core, utility, adapter, leaf, dead +- ✅ Framework entry point detection (route:, event:, command: prefixes) +- ✅ Hotspot detection: high fan-in x high complexity +- ✅ Module boundary analysis: high-cohesion directories with cross-boundary imports +- ✅ CLI: `codegraph structure`, `codegraph hotspots`, `codegraph roles` -Split `queries.js` (800+ lines) into pure analysis modules and presentation formatters. +**New file:** `src/structure.js` (668 lines) -``` -src/analysis/ # Pure data: take repository, return typed results - impact.js - call-chain.js - diff-impact.js - module-map.js - class-hierarchy.js +### 2.5.4 -- Execution Flow Tracing ✅ -src/formatters/ # Presentation: take data, produce strings - cli-formatter.js - json-formatter.js - table-formatter.js -``` +Forward BFS from framework entry points through callees to leaves. -Analysis modules return pure data. The CLI, MCP server, and programmatic API each pick their own formatter (or none). Eliminates the `*Data()` / `*()` dual-function pattern. +- ✅ Entry point enumeration with type classification +- ✅ Forward BFS trace with cycle detection +- ✅ CLI: `codegraph flow [name]` with `--list` and `--depth` options -**Affected files:** `src/queries.js` → split into `src/analysis/` + `src/formatters/` +**New file:** `src/flow.js` (362 lines) -### 3.4 — Builder Pipeline Architecture +### 2.5.5 -- Temporal Coupling (Co-change Analysis) ✅ -Refactor `buildGraph()` from a monolithic mega-function into explicit, independently testable pipeline stages. +Git history analysis for temporal file coupling. -```js -const pipeline = [ - collectFiles, // (rootDir, config) => filePaths[] - detectChanges, // (filePaths, db) => { changed, removed, isFullBuild } - parseFiles, // (filePaths, engineOpts) => Map - insertNodes, // (symbolMap, db) => nodeIndex - resolveImports, // (symbolMap, rootDir, aliases) => importEdges[] - buildCallEdges, // (symbolMap, nodeIndex) => callEdges[] - buildClassEdges, // (symbolMap, nodeIndex) => classEdges[] - resolveBarrels, // (edges, symbolMap) => resolvedEdges[] - insertEdges, // (allEdges, db) => stats -] -``` +- ✅ Jaccard similarity computation from commit history +- ✅ `co_changes`, `co_change_meta`, `file_commit_counts` DB tables +- ✅ Per-file and global co-change queries +- ✅ CLI: `codegraph co-change [file]` -Watch mode reuses the same stages (triggered per-file instead of per-project), eliminating the divergence between `watcher.js` and `builder.js` where bug fixes must be applied separately. +**New file:** `src/cochange.js` (502 lines) -**Affected files:** `src/builder.js`, `src/watcher.js` +### 2.5.6 -- Manifesto Rule Engine ✅ -### 3.5 — Unified Engine Interface +Configurable rule engine with warn/fail thresholds for function, file, and graph rules. -Replace scattered `engine.name === 'native'` branching with a Strategy pattern. Every consumer receives an engine object with the same API regardless of backend. +- ✅ Function rules: cognitive, cyclomatic, nesting depth +- ✅ File rules: imports, exports, LOC, fan-in, fan-out +- ✅ Graph rules: cycles, boundary violations +- ✅ Configurable via `.codegraphrc.json` `manifesto` section +- ✅ CLI: `codegraph manifesto` with table format -```js -const engine = createEngine(opts) // returns same interface for native or WASM -engine.parseFile(path, source) -engine.resolveImports(batch, rootDir, aliases) -engine.detectCycles(db) -``` +**New file:** `src/manifesto.js` (511 lines) -Consumers never branch on native vs WASM. Adding a third backend (e.g., remote parsing service) requires zero consumer changes. +### 2.5.7 -- Architecture Boundary Rules ✅ -**Affected files:** `src/parser.js`, `src/resolve.js`, `src/cycles.js`, `src/builder.js`, `src/native.js` +Architecture enforcement using glob patterns and presets. -### 3.6 — Qualified Names & Hierarchical Scoping +- ✅ Presets: hexagonal, layered, clean, onion +- ✅ Custom boundary definitions with allow/deny rules +- ✅ Violation detection from DB edges +- ✅ Integration with manifesto and check commands -Enrich the node model with scope information to reduce ambiguity. +**New file:** `src/boundaries.js` (347 lines) -```sql -ALTER TABLE nodes ADD COLUMN qualified_name TEXT; -- 'DateHelper.format' -ALTER TABLE nodes ADD COLUMN scope TEXT; -- 'DateHelper' -ALTER TABLE nodes ADD COLUMN visibility TEXT; -- 'public' | 'private' | 'protected' -``` +### 2.5.8 -- CI Validation Predicates (`check`) ✅ -Enables queries like "all methods of class X" without traversing edges. Reduces reliance on heuristic confidence scoring for name collisions. +Structured pass/fail checks for CI pipelines. -**Affected files:** `src/db.js`, `src/parser.js` (extractors), `src/queries.js`, `src/builder.js` +- ✅ `checkNoNewCycles` -- cycle predicate +- ✅ `checkMaxBlastRadius` -- blast radius predicate +- ✅ `checkNoSignatureChanges` -- signature stability predicate +- ✅ `checkNoBoundaryViolations` -- architecture predicate +- ✅ Composable result objects with pass/fail semantics +- ✅ MCP tool: `check` +- ✅ CLI: `codegraph check [ref]` with exit code 0/1 + +**New file:** `src/check.js` (433 lines) + +### 2.5.9 -- Composite Analysis Commands ✅ + +High-level commands that compose multiple analysis steps. + +- ✅ **Audit:** explain + impact + health + manifesto breaches in one call +- ✅ **Batch:** run same query against multiple targets for multi-agent dispatch +- ✅ **Triage:** risk-ranked audit queue using normalized fan-in, complexity, churn, MI signals + +**New files:** `src/audit.js` (424 lines), `src/batch.js` (91 lines), `src/triage.js` (274 lines) + +### 2.5.10 -- Hybrid Search ✅ + +BM25 keyword search + semantic vector search with RRF fusion. + +- ✅ FTS5 full-text index on node names and source previews +- ✅ BM25 keyword search via `ftsSearchData()` +- ✅ Hybrid search with configurable RRF fusion via `hybridSearchData()` +- ✅ Three search modes: `hybrid` (default), `semantic`, `keyword` +- ✅ 8 embedding model options (minilm, jina-small/base/code, nomic/v1.5, bge-large) + +**Affected file:** `src/embedder.js` (grew from 525 -> 1,113 lines) -### 3.7 — Composable MCP Tool Registry +### 2.5.11 -- Supporting Infrastructure ✅ -Replace the monolithic `TOOLS` array + `switch` dispatch in `mcp.js` with self-contained tool modules. +Cross-cutting utilities added during the expansion. + +- ✅ **Pagination:** offset/limit with MCP defaults per command (`src/paginate.js`, 106 lines) +- ✅ **Snapshot:** SQLite DB backup/restore via VACUUM INTO (`src/snapshot.js`, 150 lines) +- ✅ **CODEOWNERS:** ownership integration for boundary analysis (`src/owners.js`, 360 lines) +- ✅ **Branch Compare:** structural diff between git refs (`src/branch-compare.js`, 569 lines) +- ✅ **Change Journal:** NDJSON event log for watch mode (`src/change-journal.js`, 131 lines) +- ✅ **Journal:** change journal validation/management (`src/journal.js`, 110 lines) +- ✅ **Update Check:** npm registry polling with 24h cache (`src/update-check.js`, 161 lines) + +### 2.5.12 -- MCP Tool Expansion ✅ + +MCP grew from 12 -> 25 tools, covering all new analysis capabilities. + +| New tool | Wraps | +|----------|-------| +| ✅ `structure` | `structureData` | +| ✅ `node_roles` | `rolesData` | +| ✅ `hotspots` | `hotspotsData` | +| ✅ `co_changes` | `coChangeData` | +| ✅ `execution_flow` | `flowData` | +| ✅ `list_entry_points` | `listEntryPointsData` | +| ✅ `complexity` | `complexityData` | +| ✅ `manifesto` | `manifestoData` | +| ✅ `communities` | `communitiesData` | +| ✅ `code_owners` | `ownersData` | +| ✅ `audit` | `auditData` | +| ✅ `batch_query` | `batchData` | +| ✅ `triage` | `triageData` | +| ✅ `branch_compare` | `branchCompareData` | +| ✅ `check` | `checkData` | + +**Affected file:** `src/mcp.js` (grew from 354 -> 1,212 lines) + +--- + +## Phase 3 -- Architectural Refactoring + +**Goal:** Restructure the codebase for modularity, testability, and long-term maintainability. These are internal improvements -- no new user-facing features, but they make every subsequent phase easier to build and maintain. + +> Reference: [generated/architecture.md](../../generated/architecture.md) -- full analysis with code examples and rationale. + +**Context:** Phase 2.5 added 18 modules and doubled the codebase without introducing shared abstractions. The original Phase 3 recommendations (designed for a 5K-line codebase) are now even more urgent at 17,830 lines. The priority ordering has been revised based on the actual growth patterns. + +### 3.1 -- Command/Query Separation ★ Critical + +Eliminate the `*Data()` / `*()` dual-function pattern replicated across 15 modules. Every analysis module (queries, audit, batch, check, cochange, communities, complexity, flow, manifesto, owners, structure, triage, branch-compare) currently implements both data extraction AND CLI formatting. + +Introduce a shared `CommandRunner` that handles the open-DB -> validate -> execute -> format -> paginate -> output lifecycle. Each command only implements unique query + analysis logic. Formatting is always separate and pluggable (CLI text, JSON, NDJSON, Mermaid). ``` -src/mcp/ - server.js # MCP server setup, transport, lifecycle - tool-registry.js # Dynamic tool registration + auto-discovery - tools/ - query-function.js # { schema, handler } per tool - file-deps.js - impact-analysis.js +src/ + commands/ # One file per command + query.js # { execute(args, ctx) -> data, format(data, opts) -> string } + impact.js + audit.js + check.js ... + + infrastructure/ + command-runner.js # Shared lifecycle + result-formatter.js # Shared formatting: table, JSON, NDJSON, Mermaid + test-filter.js # Shared --no-tests / isTestFile logic ``` -Adding a new MCP tool = adding a file. No other files change. +**Affected files:** All 15 modules with dual-function pattern, `src/cli.js`, `src/mcp.js` + +### 3.2 -- Repository Pattern for Data Access ★ Critical + +Consolidate all SQL into a single `Repository` class. Currently SQL is scattered across 20+ modules that each independently open the DB and write raw SQL inline. + +``` +src/ + db/ + connection.js # Open, WAL mode, pragma tuning + migrations.js # Schema versions (currently 9 migrations) + repository.js # ALL data access methods across all 9+ tables + query-builder.js # Lightweight SQL builder for common filtered queries +``` -**Affected files:** `src/mcp.js` → split into `src/mcp/` +Add a query builder for the common pattern "find nodes WHERE kind IN (...) AND file NOT LIKE '%test%' ORDER BY ... LIMIT ? OFFSET ?". Not an ORM -- a thin SQL builder that eliminates string construction across 20 modules. -### 3.8 — CLI Command Objects +**Affected files:** `src/db.js` -> split into `src/db/`, SQL extracted from all modules -Move from inline Commander chains in `cli.js` to self-contained command modules. +### 3.3 -- Decompose queries.js (3,110 Lines) + +Split into pure analysis modules that return data and share no formatting concerns. ``` -src/cli/ - index.js # Commander setup, auto-discover commands - commands/ - build.js # { name, description, options, validate, execute } - query.js - impact.js - ... +src/ + analysis/ + symbol-lookup.js # queryNameData, whereData, listFunctionsData + impact.js # impactAnalysisData, fnImpactData, diffImpactData + dependencies.js # fileDepsData, fnDepsData, pathData + module-map.js # moduleMapData, statsData + context.js # contextData, explainData + roles.js # rolesData + + shared/ + constants.js # SYMBOL_KINDS, ALL_SYMBOL_KINDS, VALID_ROLES + filters.js # isTestFile, normalizeSymbol, kindIcon + generators.js # iterListFunctions, iterRoles, iterWhere ``` -Each command is independently testable by calling `execute()` directly. The CLI index auto-discovers and registers them. +**Affected files:** `src/queries.js` -> split into `src/analysis/` + `src/shared/` -**Affected files:** `src/cli.js` → split into `src/cli/` +### 3.4 -- Composable MCP Tool Registry -### 3.9 — Domain Error Hierarchy +Replace the monolithic 1,212-line `mcp.js` (25 tools in one switch dispatch) with self-contained tool modules. -Replace ad-hoc error handling (mix of thrown `Error`, returned `null`, `logger.warn()`, `process.exit(1)`) with structured domain errors. +``` +src/ + mcp/ + server.js # MCP server setup, transport, lifecycle + tool-registry.js # Auto-discovery + dynamic registration + middleware.js # Pagination, error handling, repo resolution + tools/ + query-function.js # { schema, handler } -- one per tool (25 files) + ... +``` + +Adding a new MCP tool = adding a file. No other files change. + +**Affected files:** `src/mcp.js` -> split into `src/mcp/` + +### 3.5 -- CLI Command Objects + +Move from 1,285 lines of inline Commander chains to self-contained command modules. -```js -class CodegraphError extends Error { constructor(message, { code, file, cause }) { ... } } -class ParseError extends CodegraphError { code = 'PARSE_FAILED' } -class DbError extends CodegraphError { code = 'DB_ERROR' } -class ConfigError extends CodegraphError { code = 'CONFIG_INVALID' } -class ResolutionError extends CodegraphError { code = 'RESOLUTION_FAILED' } -class EngineError extends CodegraphError { code = 'ENGINE_UNAVAILABLE' } +``` +src/ + cli/ + index.js # Commander setup, auto-discover commands + shared/ + output.js # --json, --ndjson, table, plain text + options.js # Shared options (--no-tests, --json, --db, etc.) + commands/ # 45 files, one per command + build.js # { name, description, options, validate, execute } + ... ``` -CLI catches domain errors and formats for humans. MCP returns structured error responses. No more `process.exit()` from library code. +Each command is independently testable by calling `execute()` directly. -**New file:** `src/errors.js` +**Affected files:** `src/cli.js` -> split into `src/cli/` -### 3.10 — Curated Public API Surface +### 3.6 -- Curated Public API Surface -Reduce `index.js` from ~40 re-exports to a curated public API. Use `package.json` `exports` field to enforce module boundaries. +Reduce `index.js` from 120+ exports to ~30 curated exports. Use `package.json` `exports` field to enforce module boundaries. ```json { "exports": { ".": "./src/index.js", "./cli": "./src/cli.js" } } ``` -Internal modules become truly internal. Consumers can only import from documented entry points. +Export only `*Data()` functions (the command execute functions). Never export CLI formatters. Group by domain. **Affected files:** `src/index.js`, `package.json` -### 3.11 — Embedder Subsystem Extraction +### 3.7 -- Domain Error Hierarchy -Restructure `embedder.js` (525 lines) into a standalone subsystem with pluggable vector storage. +Replace ad-hoc error handling (mix of thrown `Error`, returned `null`, `logger.warn()`, `process.exit(1)`) across 35 modules with structured domain errors. -``` -src/embeddings/ - index.js # Public API - model-registry.js # Model definitions, batch sizes, loading - generator.js # Source → text preparation → batch embedding - store.js # Vector storage (pluggable: SQLite blob, HNSW index) - search.js # Similarity search, RRF multi-query fusion +```js +class CodegraphError extends Error { constructor(message, { code, file, cause }) { ... } } +class ParseError extends CodegraphError { code = 'PARSE_FAILED' } +class DbError extends CodegraphError { code = 'DB_ERROR' } +class ConfigError extends CodegraphError { code = 'CONFIG_INVALID' } +class ResolutionError extends CodegraphError { code = 'RESOLUTION_FAILED' } +class EngineError extends CodegraphError { code = 'ENGINE_UNAVAILABLE' } +class AnalysisError extends CodegraphError { code = 'ANALYSIS_FAILED' } +class BoundaryError extends CodegraphError { code = 'BOUNDARY_VIOLATION' } ``` -Decouples embedding schema from the graph DB. The pluggable store interface enables future O(log n) ANN search (e.g., `hnswlib-node`) when symbol counts reach 50K+. +The CLI catches domain errors and formats for humans. MCP returns structured error responses. No more `process.exit()` from library code. -**Affected files:** `src/embedder.js` → split into `src/embeddings/` +**New file:** `src/errors.js` -### 3.12 — Testing Pyramid +### 3.8 -- Decompose complexity.js (2,163 Lines) -Add proper unit test layer below the existing integration tests. +Split the largest source file into a rules/engine architecture mirroring the parser plugin concept. -- Pure unit tests for extractors (pass AST node, assert symbols — no file I/O) -- Pure unit tests for BFS/Tarjan algorithms (pass adjacency list, assert result) -- Pure unit tests for confidence scoring (pass parameters, assert score) -- Repository mock for query tests (in-memory data, no SQLite) -- E2E tests that invoke the CLI binary and assert exit codes + stdout +``` +src/ + complexity/ + index.js # Public API: computeComplexity, complexityData + metrics.js # Halstead, MI, LOC/SLOC (language-agnostic) + engine.js # Walk AST + apply rules -> raw values + rules/ + javascript.js # JS/TS/TSX rules + python.js + go.js + rust.js + java.js + csharp.js + php.js + ruby.js +``` -The repository pattern (3.2) directly enables this: unit tests use `InMemoryRepository`, integration tests use `SqliteRepository`. +**Affected files:** `src/complexity.js` -> split into `src/complexity/` -### 3.13 — Event-Driven Pipeline +### 3.9 -- Builder Pipeline Architecture -Add an event/streaming architecture to the build pipeline for progress reporting, cancellation, and large-repo support. +Refactor `buildGraph()` (1,173 lines) from a mega-function into explicit, independently testable pipeline stages. ```js -pipeline.on('file:parsed', (file, symbols) => { /* progress */ }) -pipeline.on('file:indexed', (file, nodeCount) => { /* progress */ }) -pipeline.on('build:complete', (stats) => { /* summary */ }) -pipeline.on('error', (file, err) => { /* continue or abort */ }) -await pipeline.run(rootDir) +const pipeline = [ + collectFiles, // (rootDir, config) => filePaths[] + detectChanges, // (filePaths, db) => { changed, removed, isFullBuild } + parseFiles, // (filePaths, engineOpts) => Map + insertNodes, // (symbolMap, db) => nodeIndex + resolveImports, // (symbolMap, rootDir, aliases) => importEdges[] + buildCallEdges, // (symbolMap, nodeIndex) => callEdges[] + buildClassEdges, // (symbolMap, nodeIndex) => classEdges[] + resolveBarrels, // (edges, symbolMap) => resolvedEdges[] + insertEdges, // (allEdges, db) => stats + buildStructure, // (db, fileSymbols, rootDir) => structureStats + classifyRoles, // (db) => roleStats + computeComplexity, // (db, rootDir, engine) => complexityStats + emitChangeJournal, // (rootDir, changes) => void +] ``` -Unifies build and watch code paths. Large builds stream results to the DB incrementally instead of buffering in memory. +Watch mode reuses the same stages triggered per-file, eliminating the `watcher.js` divergence. -**Affected files:** `src/builder.js`, `src/watcher.js`, `src/cli.js` +**Affected files:** `src/builder.js`, `src/watcher.js` -### 3.14 — Subgraph Export Filtering +### 3.10 -- Embedder Subsystem Extraction -Add focus/filter options to the export module so visualizations are usable for real projects. +Restructure `embedder.js` (1,113 lines) -- which now contains 3 search engines -- into a standalone subsystem. -```bash -codegraph export --format dot --focus src/builder.js --depth 2 -codegraph export --format mermaid --filter "src/api/**" --kind function -codegraph export --format json --changed +``` +src/ + embeddings/ + index.js # Public API + models.js # 8 model definitions, batch sizes, loading + generator.js # Source -> text preparation -> batch embedding + stores/ + sqlite-blob.js # Current O(n) cosine similarity + fts5.js # BM25 keyword search + search/ + semantic.js # Vector similarity + keyword.js # FTS5 BM25 + hybrid.js # RRF fusion + strategies/ + structured.js # Structured text preparation + source.js # Raw source preparation ``` -The export module receives a subgraph specification (focus node + depth, file pattern, kind filter) and extracts the relevant subgraph before formatting. +The pluggable store interface enables future O(log n) ANN search (e.g., `hnswlib-node`) when symbol counts reach 50K+. -**Affected files:** `src/export.js`, `src/cli.js` +**Affected files:** `src/embedder.js` -> split into `src/embeddings/` -### 3.15 — Transitive Import-Aware Confidence +### 3.11 -- Unified Graph Model -Before falling back to proximity heuristics, walk the import graph from the caller file. If any import path (even indirect through barrel files) reaches a candidate, score it 0.9. Only fall back to proximity when no import path exists. +Unify the three parallel graph representations (structure.js, cochange.js, communities.js) into a shared in-memory graph model. -**Affected files:** `src/resolve.js`, `src/builder.js` +``` +src/ + graph/ + model.js # Shared in-memory graph (nodes + edges + metadata) + builders/ + dependency.js # Build from SQLite edges + structure.js # Build from file/directory hierarchy + temporal.js # Build from git history (co-changes) + algorithms/ + bfs.js # Breadth-first traversal + shortest-path.js # Path finding + tarjan.js # Cycle detection + louvain.js # Community detection + centrality.js # Fan-in/fan-out, betweenness + clustering.js # Cohesion, coupling, density + classifiers/ + roles.js # Node role classification + risk.js # Risk scoring +``` -### 3.16 — Query Result Caching +Algorithms become composable -- run community detection on the dependency graph, the temporal graph, or a merged graph. -Add a TTL/LRU cache between the analysis layer and the repository. Particularly valuable for MCP where an agent session may repeatedly query related symbols. +**Affected files:** `src/structure.js`, `src/cochange.js`, `src/communities.js`, `src/cycles.js`, `src/triage.js` -```js -class QueryCache { - constructor(db, maxAge = 60_000) { ... } - get(key) { ... } // key = query name + args hash - set(key, value) { ... } - invalidate() { ... } // called after any DB mutation -} +### 3.12 -- Qualified Names & Hierarchical Scoping + +Enrich the node model with scope information to reduce ambiguity. + +```sql +ALTER TABLE nodes ADD COLUMN qualified_name TEXT; -- 'DateHelper.format' +ALTER TABLE nodes ADD COLUMN scope TEXT; -- 'DateHelper' +ALTER TABLE nodes ADD COLUMN visibility TEXT; -- 'public' | 'private' | 'protected' ``` -### 3.17 — Configuration Profiles +Enables queries like "all methods of class X" without traversing edges. Reduces reliance on heuristic confidence scoring. -Support profile-based configuration for monorepos with multiple services. +**Affected files:** `src/db.js`, `src/parser.js` (extractors), `src/queries.js`, `src/builder.js` -```json -{ - "profiles": { - "backend": { "include": ["services/api/**"], "build": { "dbPath": ".codegraph/api.db" } }, - "frontend": { "include": ["apps/web/**"], "build": { "dbPath": ".codegraph/web.db" } } - } -} -``` +### 3.13 -- Testing Pyramid with InMemoryRepository -```bash -codegraph build --profile backend -``` +The repository pattern (3.2) enables true unit testing: + +- Pure unit tests for graph algorithms (pass adjacency list, assert result) +- Pure unit tests for risk/confidence scoring (pass parameters, assert score) +- `InMemoryRepository` for query tests (no SQLite, instant setup) +- Existing 59 test files continue as integration tests + +**Current gap:** Many "unit" tests still hit SQLite because there's no repository abstraction. + +### 3.14 -- Remaining Items (Lower Priority) + +These items from the original Phase 3 are still valid but less urgent: -**Affected files:** `src/config.js`, `src/cli.js` +- **Event-driven pipeline:** Add event/streaming architecture for progress reporting, cancellation, and large-repo support. +- **Unified engine interface (Strategy):** Replace scattered `engine.name === 'native'` branching. Less critical now that native is the primary path. +- **Subgraph export filtering:** `codegraph export --focus src/builder.js --depth 2` for usable visualizations. +- **Transitive import-aware confidence:** Walk import graph before falling back to proximity heuristics. +- **Query result caching:** LRU/TTL cache between analysis layer and repository. More valuable now with 25 MCP tools. +- **Configuration profiles:** `--profile backend` for monorepos with multiple services. +- **Pagination standardization:** SQL-level LIMIT/OFFSET in repository + command runner shaping. --- -## Phase 4 — TypeScript Migration +## Phase 4 -- TypeScript Migration **Goal:** Migrate the codebase from plain JavaScript to TypeScript, leveraging the clean module boundaries established in Phase 3. Incremental module-by-module migration starting from leaf modules inward. -**Why after Phase 3:** The architectural refactoring creates small, well-bounded modules with explicit interfaces (Repository, Engine, BaseExtractor, Pipeline stages, Command objects). These are natural type boundaries — typing monolithic 2,000-line files that are about to be split would be double work. +**Why after Phase 3:** The architectural refactoring creates small, well-bounded modules with explicit interfaces (Repository, Engine, BaseExtractor, Pipeline stages, Command objects). These are natural type boundaries -- typing monolithic 2,000-line files that are about to be split would be double work. -### 4.1 — Project Setup +### 4.1 -- Project Setup - Add `typescript` as a devDependency - Create `tsconfig.json` with strict mode, ES module output, path aliases matching the Phase 3 module structure @@ -494,7 +654,7 @@ codegraph build --profile backend **Affected files:** `package.json`, `biome.json`, new `tsconfig.json` -### 4.2 — Core Type Definitions +### 4.2 -- Core Type Definitions Define TypeScript interfaces for all abstractions introduced in Phase 3: @@ -512,28 +672,28 @@ interface Extractor { language: string; handlers: Record; } interface Command { name: string; options: OptionDef[]; validate(args: unknown, opts: unknown): void; execute(args: unknown, opts: unknown): Promise; } ``` -These interfaces serve as the migration contract — each module is migrated to satisfy its interface. +These interfaces serve as the migration contract -- each module is migrated to satisfy its interface. **New file:** `src/types.ts` -### 4.3 — Leaf Module Migration +### 4.3 -- Leaf Module Migration Migrate modules with no internal dependencies first: | Module | Notes | |--------|-------| -| `src/errors.ts` | Domain error hierarchy (Phase 3.9) | +| `src/errors.ts` | Domain error hierarchy (Phase 3.7) | | `src/logger.ts` | Minimal, no internal deps | | `src/constants.ts` | Pure data | | `src/config.ts` | Config types derived from `.codegraphrc.json` schema | | `src/db/connection.ts` | SQLite connection wrapper | | `src/db/migrations.ts` | Schema version management | -| `src/formatters/*.ts` | Pure input→string transforms | +| `src/formatters/*.ts` | Pure input->string transforms | | `src/paginate.ts` | Generic pagination helpers | Allow `.js` and `.ts` to coexist during migration (`allowJs: true` in tsconfig). -### 4.4 — Core Module Migration +### 4.4 -- Core Module Migration Migrate modules that implement Phase 3 interfaces: @@ -548,7 +708,7 @@ Migrate modules that implement Phase 3 interfaces: | `src/analysis/*.ts` | Typed analysis results (impact scores, call chains) | | `src/resolve.ts` | Import resolution with confidence types | -### 4.5 — Orchestration & Public API Migration +### 4.5 -- Orchestration & Public API Migration Migrate top-level orchestration and entry points: @@ -561,7 +721,7 @@ Migrate top-level orchestration and entry points: | `src/cli/*.ts` | Command objects with typed options | | `src/index.ts` | Curated public API with proper export types | -### 4.6 — Test Migration +### 4.6 -- Test Migration - Migrate test files from `.js` to `.ts` - Add type-safe test utilities and fixture builders @@ -570,15 +730,17 @@ Migrate top-level orchestration and entry points: **Verification:** All existing tests pass. `tsc --noEmit` succeeds with zero errors. No `any` escape hatches except at FFI boundaries (napi-rs addon, tree-sitter WASM). -**Affected files:** All `src/**/*.js` → `src/**/*.ts`, all `tests/**/*.js` → `tests/**/*.ts`, `package.json`, `biome.json` +**Affected files:** All `src/**/*.js` -> `src/**/*.ts`, all `tests/**/*.js` -> `tests/**/*.ts`, `package.json`, `biome.json` --- -## Phase 5 — Intelligent Embeddings +## Phase 5 -- Intelligent Embeddings **Goal:** Dramatically improve semantic search quality by embedding natural-language descriptions instead of raw code. -### 5.1 — LLM Description Generator +> **Phase 5.3 (Hybrid Search) was completed early** during Phase 2.5 -- FTS5 BM25 + semantic search with RRF fusion is already shipped in v2.6.0. + +### 5.1 -- LLM Description Generator For each function/method/class node, generate a concise natural-language description: @@ -606,7 +768,7 @@ For each function/method/class node, generate a concise natural-language descrip **New file:** `src/describer.js` -### 5.2 — Enhanced Embedding Pipeline +### 5.2 -- Enhanced Embedding Pipeline - When descriptions exist, embed the description text instead of raw code - Keep raw code as fallback when no description is available @@ -617,41 +779,32 @@ For each function/method/class node, generate a concise natural-language descrip **Affected files:** `src/embedder.js` -### 5.3 — Hybrid Search - -Combine vector similarity with keyword matching. - -- **Vector search:** Cosine similarity against embeddings (existing) -- **Keyword search:** SQLite FTS5 full-text index on `nodes.name` + `descriptions` -- **Fusion:** Weighted RRF — `score = a * vector_rank + (1-a) * keyword_rank` -- Default `a = 0.7` (favor semantic), configurable - -**New DB migration:** Add FTS5 virtual table for text search. +### ~~5.3 -- Hybrid Search~~ ✅ Completed in Phase 2.5 -**Affected files:** `src/embedder.js`, `src/db.js` +Shipped in v2.6.0. FTS5 BM25 keyword search + semantic vector search with RRF fusion. Three search modes: `hybrid` (default), `semantic`, `keyword`. -### 5.4 — Build-time Semantic Metadata +### 5.4 -- Build-time Semantic Metadata Enrich nodes with LLM-generated metadata beyond descriptions. Computed incrementally at build time (only for changed nodes), stored as columns on the `nodes` table. | Column | Content | Example | |--------|---------|---------| | `side_effects` | Mutation/IO tags | `"writes DB"`, `"sends email"`, `"mutates state"` | -| `complexity_notes` | Responsibility count, cohesion rating | `"3 responsibilities, low cohesion — consider splitting"` | +| `complexity_notes` | Responsibility count, cohesion rating | `"3 responsibilities, low cohesion -- consider splitting"` | | `risk_score` | Fragility metric from graph centrality + LLM assessment | `0.82` (high fan-in + complex logic) | -- MCP tool: `assess ` — returns complexity rating + specific concerns +- MCP tool: `assess ` -- returns complexity rating + specific concerns - Cascade invalidation: when a node changes, mark dependents for re-enrichment **Depends on:** 5.1 (LLM provider abstraction) -### 5.5 — Module Summaries +### 5.5 -- Module Summaries Aggregate function descriptions + dependency direction into file-level narratives. -- `module_summaries` table — one entry per file, re-rolled when any contained node changes -- MCP tool: `explain_module ` — returns module purpose, key exports, role in the system -- `naming_conventions` metadata per module — detected patterns (camelCase, snake_case, verb-first), flag outliers +- `module_summaries` table -- one entry per file, re-rolled when any contained node changes +- MCP tool: `explain_module ` -- returns module purpose, key exports, role in the system +- `naming_conventions` metadata per module -- detected patterns (camelCase, snake_case, verb-first), flag outliers **Depends on:** 5.1 (function-level descriptions must exist first) @@ -659,11 +812,11 @@ Aggregate function descriptions + dependency direction into file-level narrative --- -## Phase 6 — Natural Language Queries +## Phase 6 -- Natural Language Queries **Goal:** Allow developers to ask questions about their codebase in plain English. -### 6.1 — Query Engine +### 6.1 -- Query Engine ```bash codegraph ask "How does the authentication flow work?" @@ -685,11 +838,11 @@ codegraph ask "How does the authentication flow work?" - 1-hop caller/callee names for each match - Total context budget: ~8K tokens (configurable) -**Requires:** LLM API key configured (no fallback — this is inherently an LLM feature). +**Requires:** LLM API key configured (no fallback -- this is inherently an LLM feature). **New file:** `src/nlquery.js` -### 6.2 — Conversational Sessions +### 6.2 -- Conversational Sessions Multi-turn conversations with session memory. @@ -703,21 +856,21 @@ codegraph sessions clear - Store conversation history in SQLite table `sessions` - Include prior Q&A pairs in subsequent prompts -### 6.3 — MCP Integration +### 6.3 -- MCP Integration -New MCP tool: `ask_codebase` — natural language query via MCP. +New MCP tool: `ask_codebase` -- natural language query via MCP. Enables AI coding agents (Claude Code, Cursor, etc.) to ask codegraph questions about the codebase. **Affected files:** `src/mcp.js` -### 6.4 — LLM-Narrated Graph Queries +### 6.4 -- LLM-Narrated Graph Queries Graph traversal + LLM narration for questions that require both structural data and natural-language explanation. Each query walks the graph first, then sends the structural result to the LLM for narration. | Query | Graph operation | LLM adds | |-------|----------------|----------| -| `trace_flow ` | BFS from entry point to leaves | Sequential narrative: "1. handler validates → 2. calls createOrder → 3. writes DB" | +| `trace_flow ` | BFS from entry point to leaves | Sequential narrative: "1. handler validates -> 2. calls createOrder -> 3. writes DB" | | `trace_upstream ` | Recursive caller walk | Ranked suspects: "most likely cause is X because it modifies the same state" | | `effect_analysis ` | Full callee tree walk, aggregate `side_effects` | "Calling X will: write to DB (via Y), send email (via Z)" | | `dependency_path ` | Shortest path(s) between two symbols | Narrates each hop: "A imports X from B because A needs to validate tokens" | @@ -726,24 +879,24 @@ Pre-computed `flow_narratives` table caches results for key entry points at buil **Depends on:** 5.4 (`side_effects` metadata), 5.1 (descriptions for narration context) -### 6.5 — Onboarding & Navigation Tools +### 6.5 -- Onboarding & Navigation Tools Help new contributors and AI agents orient in an unfamiliar codebase. -- `entry_points` query — graph finds roots (high fan-out, low fan-in) + LLM ranks by importance -- `onboarding_guide` command — generates a reading order based on dependency layers -- MCP tool: `get_started` — returns ordered list: "start here, then read this, then this" -- `change_plan ` — LLM reads description, graph identifies relevant modules, returns touch points and test coverage gaps +- `entry_points` query -- graph finds roots (high fan-out, low fan-in) + LLM ranks by importance +- `onboarding_guide` command -- generates a reading order based on dependency layers +- MCP tool: `get_started` -- returns ordered list: "start here, then read this, then this" +- `change_plan ` -- LLM reads description, graph identifies relevant modules, returns touch points and test coverage gaps **Depends on:** 5.5 (module summaries for context), 6.1 (query engine) --- -## Phase 7 — Expanded Language Support +## Phase 7 -- Expanded Language Support -**Goal:** Go from 12 → 20 supported languages. +**Goal:** Go from 11 -> 19 supported languages. -### 7.1 — Batch 1: High Demand +### 7.1 -- Batch 1: High Demand | Language | Extensions | Grammar | Effort | |----------|-----------|---------|--------| @@ -752,7 +905,7 @@ Help new contributors and AI agents orient in an unfamiliar codebase. | Kotlin | `.kt`, `.kts` | `tree-sitter-kotlin` | Low | | Swift | `.swift` | `tree-sitter-swift` | Medium | -### 7.2 — Batch 2: Growing Ecosystems +### 7.2 -- Batch 2: Growing Ecosystems | Language | Extensions | Grammar | Effort | |----------|-----------|---------|--------| @@ -761,7 +914,7 @@ Help new contributors and AI agents orient in an unfamiliar codebase. | Lua | `.lua` | `tree-sitter-lua` | Low | | Zig | `.zig` | `tree-sitter-zig` | Low | -### 7.3 — Parser Abstraction Layer +### 7.3 -- Parser Abstraction Layer Extract shared patterns from existing extractors into reusable helpers. @@ -777,20 +930,23 @@ Extract shared patterns from existing extractors into reusable helpers. --- -## Phase 8 — GitHub Integration & CI +## Phase 8 -- GitHub Integration & CI **Goal:** Bring codegraph's analysis into pull request workflows. -### 8.1 — Reusable GitHub Action +> **Note:** Phase 2.5 delivered `codegraph check` (CI validation predicates with exit code 0/1), which provides the foundation for GitHub Action integration. The boundary violation, blast radius, and cycle detection predicates are already available. + +### 8.1 -- Reusable GitHub Action A reusable GitHub Action that runs on PRs: 1. `codegraph build` on the repository 2. `codegraph diff-impact` against the PR's base branch -3. `codegraph cycles` to detect new circular dependencies +3. `codegraph check --staged` to run CI predicates (cycles, blast radius, signatures, boundaries) 4. Posts a PR comment summarizing: - Number of affected functions and files - New cycles introduced (if any) + - Boundary violations - Top impacted functions with caller counts **Configuration via `.codegraphrc.json`:** @@ -799,11 +955,11 @@ A reusable GitHub Action that runs on PRs: { "ci": { "failOnCycles": true, "impactThreshold": 50 } } ``` -**Fail conditions:** Configurable — fail if new cycles or impact exceeds threshold. +**Fail conditions:** Configurable -- fail if new cycles or impact exceeds threshold. **New file:** `.github/actions/codegraph-ci/action.yml` -### 8.2 — PR Review Integration +### 8.2 -- PR Review Integration ```bash codegraph review --pr @@ -820,36 +976,36 @@ Requires `gh` CLI. For each changed function: **LLM-enhanced mode** (when LLM provider configured): - **Risk labels per node**: `low` (cosmetic / internal), `medium` (behavior change), `high` (breaking / public API) -- **Review focus ranking**: rank affected files by risk × blast radius — "review this file first" +- **Review focus ranking**: rank affected files by risk x blast radius -- "review this file first" - **Critical path highlighting**: shortest path from a changed function to a high-fan-in entry point - **Test coverage gaps**: cross-reference affected code with test file graph edges **New file:** `src/github.js` -### 8.3 — Visual Impact Graphs for PRs +### 8.3 -- Visual Impact Graphs for PRs Extend the existing `diff-impact --format mermaid` foundation with CI automation and LLM annotations. **CI automation** (GitHub Action): 1. `codegraph build .` (incremental, fast on CI cache) 2. `codegraph diff-impact $BASE_REF --format mermaid -T` to generate the graph -3. Post as PR comment — GitHub renders Mermaid natively in markdown +3. Post as PR comment -- GitHub renders Mermaid natively in markdown 4. Update on new pushes (edit the existing comment) **LLM-enriched annotations** (when provider configured): - For each changed function: one-line summary of WHAT changed (from diff hunks) -- For each affected caller: WHY it's affected — what behavior might change downstream -- Node colors shift from green → yellow → red based on risk labels +- For each affected caller: WHY it's affected -- what behavior might change downstream +- Node colors shift from green -> yellow -> red based on risk labels - Overall PR risk score (aggregate of node risks weighted by centrality) **Historical context overlay:** - Annotate nodes with churn data: "this function changed 12 times in the last 30 days" - Highlight fragile nodes: high churn + high fan-in = high breakage risk -- Track blast radius trends: "this PR's blast radius is 2× larger than your average" +- Track blast radius trends: "this PR's blast radius is 2x larger than your average" **Depends on:** 8.1 (GitHub Action), 5.4 (`risk_score`, `side_effects`) -### 8.4 — SARIF Output +### 8.4 -- SARIF Output Add SARIF output format for cycle detection. SARIF integrates with GitHub Code Scanning, showing issues inline in the PR. @@ -857,9 +1013,9 @@ Add SARIF output format for cycle detection. SARIF integrates with GitHub Code S --- -## Phase 9 — Interactive Visualization & Advanced Features +## Phase 9 -- Interactive Visualization & Advanced Features -### 9.1 — Interactive Web Visualization +### 9.1 -- Interactive Web Visualization ```bash codegraph viz @@ -867,19 +1023,21 @@ codegraph viz Opens a local web UI at `localhost:3000` with: -- Force-directed graph layout (D3.js, inline — no external dependencies) +- Force-directed graph layout (D3.js, inline -- no external dependencies) - Zoom, pan, click-to-expand - Node coloring by type (file=blue, function=green, class=purple) - Edge styling by type (imports=solid, calls=dashed, extends=bold) - Search bar for finding nodes by name - Filter panel: toggle node kinds, confidence thresholds, test files - Code preview on hover (reads from source files) +- **Role-based coloring:** entry=orange, core=blue, utility=green, adapter=yellow, dead=gray (from structure.js roles) +- **Community overlay:** color by Louvain community assignment **Data source:** Export JSON from DB, serve via lightweight HTTP server. **New file:** `src/visualizer.js` -### 9.2 — Dead Code Detection +### 9.2 -- Dead Code Detection ```bash codegraph dead @@ -888,9 +1046,11 @@ codegraph dead --exclude-exports --exclude-tests Find functions/methods/classes with zero incoming edges (never called). Filters for exports, test files, and entry points. +> **Note:** Phase 2.5 added role classification (`dead` role in structure.js) which provides the foundation. This extends it with a dedicated command and smarter filtering. + **Affected files:** `src/queries.js` -### 9.3 — Cross-Repository Support (Monorepo) +### 9.3 -- Cross-Repository Support (Monorepo) Support multi-package monorepos with cross-package edges. @@ -900,7 +1060,7 @@ Support multi-package monorepos with cross-package edges. - `codegraph build --workspace` to scan all packages - Impact analysis across package boundaries -### 9.4 — Agentic Search +### 9.4 -- Agentic Search Recursive reference-following search that traces connections. @@ -916,13 +1076,13 @@ codegraph agent-search "payment processing" 4. Follow the most relevant references (up to configurable depth) 5. Return the full chain of related code -**Use case:** "Find everything related to payment processing" → finds payment functions → follows to validation → follows to database layer → returns complete picture. +**Use case:** "Find everything related to payment processing" -> finds payment functions -> follows to validation -> follows to database layer -> returns complete picture. -**Requires:** LLM for relevance re-ranking (optional — degrades to BFS without LLM). +**Requires:** LLM for relevance re-ranking (optional -- degrades to BFS without LLM). **New file:** `src/agentic-search.js` -### 9.5 — Refactoring Analysis +### 9.5 -- Refactoring Analysis LLM-powered structural analysis that identifies refactoring opportunities. The graph provides the structural data; the LLM interprets it. @@ -935,16 +1095,18 @@ LLM-powered structural analysis that identifies refactoring opportunities. The g | `hotspots` | High fan-in + high fan-out + on many paths | Ranked fragility report with explanations, `risk_score` per node | | `boundary_analysis` | Graph clustering (tightly-coupled groups spanning modules) | Reorganization suggestions: "these 4 functions in 3 files all deal with auth" | +> **Note:** `hotspots` and `boundary_analysis` already have data foundations from Phase 2.5 (structure.js hotspots, boundaries.js evaluation). This phase adds LLM interpretation on top. + **Depends on:** 5.4 (`risk_score`, `complexity_notes`), 5.5 (module summaries) -### 9.6 — Auto-generated Docstrings +### 9.6 -- Auto-generated Docstrings ```bash codegraph annotate codegraph annotate --changed-only ``` -LLM-generated docstrings aware of callers, callees, and types. Diff-aware: only regenerate for functions whose code or dependencies changed. Stores in `docstrings` column on nodes table — does not modify source files unless explicitly requested. +LLM-generated docstrings aware of callers, callees, and types. Diff-aware: only regenerate for functions whose code or dependencies changed. Stores in `docstrings` column on nodes table -- does not modify source files unless explicitly requested. **Depends on:** 5.1 (LLM provider abstraction), 5.4 (side effects context) @@ -960,13 +1122,14 @@ Each phase includes targeted verification: |-------|-------------| | **1** | Benchmark native vs WASM parsing on a large repo, verify identical output from both engines | | **2** | `npm test`, manual MCP client test for all tools, config loading tests | -| **3** | All existing tests pass; each refactored module produces identical output to the pre-refactoring version; unit tests for pure analysis modules | +| **2.5** | All 59 test files pass; integration tests for every new command; engine parity tests | +| **3** | All existing tests pass; each refactored module produces identical output to the pre-refactoring version; unit tests for pure analysis modules; InMemoryRepository tests | | **4** | `tsc --noEmit` passes with zero errors; all existing tests pass after migration; no runtime behavior changes | | **5** | Compare `codegraph search` quality before/after descriptions; verify `side_effects` and `risk_score` populated for LLM-enriched builds | | **6** | `codegraph ask "How does import resolution work?"` against codegraph itself; verify `trace_flow` and `get_started` produce coherent narration | | **7** | Parse sample files for each new language, verify definitions/calls/imports | | **8** | Test PR in a fork, verify GitHub Action comment with Mermaid graph and risk labels is posted | -| **9** | `codegraph viz` loads; `hotspots` returns ranked list; `split_analysis` produces actionable output | +| **9** | `codegraph viz` loads; `hotspots` returns ranked list with LLM commentary; `split_analysis` produces actionable output | **Full integration test** after all phases: @@ -988,8 +1151,8 @@ codegraph viz Technology changes to monitor that may unlock future improvements. -- **`node:sqlite` (Node.js built-in)** — **primary target.** Zero native dependencies, eliminates C++ addon breakage on Node major releases (`better-sqlite3` already broken on Node 24/25). Currently Stability 1.1 (Active Development) as of Node 25.x. Adopt when it reaches Stability 2, or use as a fallback alongside `better-sqlite3` (dual-engine pattern like native/WASM parsing). Backed by the Node.js project — no startup risk. -- **`libsql` (SQLite fork by Turso)** — monitor only. Drop-in `better-sqlite3` replacement with built-in DiskANN vector search. However, Turso is pivoting engineering focus to Limbo (full Rust SQLite rewrite), leaving libsql as legacy. Pre-1.0 (v0.5.x) with uncertain long-term maintenance. Low switching cost (API-compatible, data is standard SQLite), but not worth adopting until the Turso/Limbo situation clarifies. +- **`node:sqlite` (Node.js built-in)** -- **primary target.** Zero native dependencies, eliminates C++ addon breakage on Node major releases (`better-sqlite3` already broken on Node 24/25). Currently Stability 1.1 (Active Development) as of Node 25.x. Adopt when it reaches Stability 2, or use as a fallback alongside `better-sqlite3` (dual-engine pattern like native/WASM parsing). Backed by the Node.js project -- no startup risk. +- **`libsql` (SQLite fork by Turso)** -- monitor only. Drop-in `better-sqlite3` replacement with built-in DiskANN vector search. However, Turso is pivoting engineering focus to Limbo (full Rust SQLite rewrite), leaving libsql as legacy. Pre-1.0 (v0.5.x) with uncertain long-term maintenance. Low switching cost (API-compatible, data is standard SQLite), but not worth adopting until the Turso/Limbo situation clarifies. --- diff --git a/generated/architecture.md b/generated/architecture.md index 1c3f4db0..bc9e5fa6 100644 --- a/generated/architecture.md +++ b/generated/architecture.md @@ -1,522 +1,402 @@ -# Codegraph Architectural Audit — Cold Analysis +# Codegraph Architectural Audit — Revised Analysis > **Scope:** Unconstrained redesign proposals. No consideration for migration effort or backwards compatibility. What would the ideal architecture look like? +> +> **Revision context:** The original audit (Feb 22, 2026) analyzed v1.4.0 with ~12 source modules totaling ~5K lines. Since then, the codebase grew to v2.6.0 with 35 source modules totaling 17,830 lines — a 3.5x expansion. 18 new modules were added, MCP tools went from 12 to 25, CLI commands from ~20 to 45, and `index.js` exports from ~40 to 120+. This revision re-evaluates every recommendation against the actual codebase as it stands today. --- -## 1. parser.js Is a Monolith — Split Into a Plugin System +## What Changed Since the Original Audit -**Current state:** `parser.js` is 2,215 lines containing 9 language extractors, the WASM/native engine abstraction, the language registry, tree walking helpers, and the unified parse API — all in one file. +Before diving into recommendations, here's what happened: -**Problem:** Adding or modifying a language extractor forces you to work inside a 2K-line file alongside unrelated extractors. The extractors share repetitive patterns (walk tree → switch on node type → push to arrays) but each reimplements the loop. Testing a single language requires importing the entire parser surface. +| Metric | Feb 2026 (v1.4.0) | Mar 2026 (v2.6.0) | Growth | +|--------|-------------------|-------------------|--------| +| Source modules | ~12 | 35 | 2.9x | +| Total source lines | ~5,000 | 17,830 | 3.5x | +| `queries.js` | 823 lines | 3,110 lines | 3.8x | +| `mcp.js` | 354 lines | 1,212 lines | 3.4x | +| `cli.js` | -- | 1,285 lines | -- | +| `builder.js` | 554 lines | 1,173 lines | 2.1x | +| `embedder.js` | 525 lines | 1,113 lines | 2.1x | +| `complexity.js` | -- | 2,163 lines | New | +| MCP tools | 12 | 25 | 2.1x | +| CLI commands | ~20 | 45 | 2.3x | +| `index.js` exports | ~40 | 120+ | 3x | +| Test files | ~15 | 59 | 3.9x | -**Ideal architecture:** - -``` -src/ - parser/ - index.js # Public API: parseFileAuto, parseFilesAuto, resolveEngine - registry.js # LANGUAGE_REGISTRY + extension mapping - engine.js # Native/WASM init, engine resolution, grammar loading - tree-utils.js # findChild, findParentClass, walkTree helpers - base-extractor.js # Shared extraction framework (the walk loop + accumulator) - extractors/ - javascript.js # JS/TS/TSX extractor - python.js - go.js - rust.js - java.js - csharp.js - ruby.js - php.js - hcl.js -``` - -**Key design change:** Introduce a `BaseExtractor` that owns the tree walk loop and provides hook methods per node type. Each language extractor declares a node-type → handler map instead of reimplementing the traversal: - -```js -// Conceptual — not real API -export default { - language: 'python', - handlers: { - function_definition: (node, ctx) => { ctx.addDefinition(...) }, - call: (node, ctx) => { ctx.addCall(...) }, - import_statement: (node, ctx) => { ctx.addImport(...) }, - } -} -``` - -This eliminates the repeated walk-and-switch boilerplate across 9 extractors while keeping language-specific logic isolated. Each extractor becomes independently testable and the registration is declarative. +**Key pattern observed:** Every new feature (audit, batch, boundaries, check, cochange, communities, complexity, flow, manifesto, owners, structure, triage) was added as a standalone module following the same internal pattern: raw SQL + BFS/traversal logic + CLI formatting + JSON output + `*Data()` / `*()` dual functions. No shared abstractions were introduced. The original architectural debt wasn't addressed -- it was replicated 15 times. --- -## 2. The Database Layer Is Too Thin — Introduce a Repository Pattern +## 1. The Dual-Function Anti-Pattern Is Now the Dominant Architecture Problem -**Current state:** `db.js` is 130 lines — it opens SQLite, runs migrations, and that's it. All actual SQL lives scattered across `builder.js`, `queries.js`, `embedder.js`, `watcher.js`, and `cycles.js`. Every consumer writes raw SQL inline. +**Original analysis (S3):** `queries.js` mixes data access, graph algorithms, and presentation. The `*Data()` / `*()` dual-function pattern was identified as a workaround for coupling. -**Problems:** -- SQL duplication (similar node/edge lookups written multiple times in different modules) -- No single place to understand or optimize the query surface -- Schema knowledge leaks everywhere — if a column changes, you grep the entire codebase -- No abstraction boundary for swapping storage engines (e.g., moving to DuckDB or an in-memory graph for tests) - -**Ideal architecture:** +**What happened:** Every new module adopted the same pattern. There are now **15+ modules** each implementing both data extraction AND CLI formatting: ``` -src/ - db/ - connection.js # Open, WAL mode, pragma tuning - migrations.js # Schema versions - repository.js # ALL data access methods - types.js # TS-style JSDoc type defs for Node, Edge, Embedding +queries.js -> queryNameData() / queryName(), impactAnalysisData() / impactAnalysis(), ... +audit.js -> auditData() / audit() +batch.js -> batchData() / batch() +check.js -> checkData() / check() +cochange.js -> coChangeData() / coChange(), coChangeTopData() / coChangeTop() +communities.js -> communitiesData() / communities() +complexity.js -> complexityData() / complexity() +flow.js -> flowData() / flow() +manifesto.js -> manifestoData() / manifesto() +owners.js -> ownersData() / owners() +structure.js -> structureData() / structure(), hotspotsData() / hotspots() +triage.js -> triageData() / triage() +branch-compare -> branchCompareData() / branchCompare() ``` -`repository.js` would expose a complete data access API: +Each of these modules independently handles: DB opening, SQL execution, result shaping, pagination integration, CLI formatting, JSON output, and `--no-tests` filtering. The repetition is massive. -```js -// Writes -insertNode(node) -insertEdge(edge) -insertEmbeddings(batch) -upsertFileHash(file, hash, mtime) -deleteFileNodes(file) -deleteFileEdges(file) - -// Reads -findNodesByName(name, opts?) -findNodesByFile(file, opts?) -findEdgesFrom(nodeId, kind?) -findEdgesTo(nodeId, kind?) -getFileHash(file) -getChangedFiles(allFiles) -getAllEmbeddings() -getEmbeddingMeta() - -// Graph traversals (currently in queries.js as raw SQL + BFS) -getTransitiveCallers(nodeId, depth) -getTransitiveDependents(file, depth) -getClassHierarchy(classNodeId) -``` - -All prepared statements live here. All index tuning happens here. Consumers never see SQL. - -**Secondary benefit:** This enables an `InMemoryRepository` for tests — no temp file cleanup, instant setup, true unit isolation. - ---- - -## 3. queries.js Mixes Data Access, Graph Algorithms, and Presentation - -**Current state:** `queries.js` (823 lines) contains SQL queries, BFS traversal logic, formatting/printing, JSON serialization, and CLI output — all interleaved. Each "query command" exists as both a `*Data()` function (returns object) and a presentation function (prints to stdout). - -**Problem:** The presentation layer (stdout formatting, `kindIcon()`, table printing) is coupled to the analysis layer (BFS, impact scoring). You can't reuse the BFS logic in the MCP server without also pulling in the CLI formatting. The `*Data()`/`*()` dual-function pattern is a workaround for this coupling. - -**Ideal architecture — three layers:** +**Ideal architecture -- Command + Query separation with shared infrastructure:** ``` src/ - analysis/ - impact.js # impactAnalysis: BFS over edges, returns typed result - call-chain.js # fnDeps, fnImpact: transitive caller/callee traversal - diff-impact.js # Git diff → affected functions → blast radius - module-map.js # Connectivity ranking - class-hierarchy.js # Inheritance resolution - - formatters/ - cli-formatter.js # Human-readable stdout output - json-formatter.js # --json flag handling - table-formatter.js # Tabular output for module-map, list-functions + commands/ # One file per command + query.js # { execute(args, ctx) -> data, format(data, opts) -> string } + impact.js + audit.js + check.js + ... + + infrastructure/ + command-runner.js # Shared lifecycle: open DB -> validate -> execute -> format -> paginate + result-formatter.js # Shared formatting: table, JSON, NDJSON, Mermaid + pagination.js # Shared pagination with consistent interface + test-filter.js # Shared --no-tests / isTestFile logic + + analysis/ # Pure algorithms -- no I/O, no formatting + bfs.js # Graph traversals (BFS, DFS, shortest path) + impact.js # Blast radius computation + confidence.js # Import resolution scoring + clustering.js # Community detection, coupling analysis + risk.js # Triage scoring, hotspot detection ``` -Analysis modules take a repository and return pure data. Formatters take data and produce strings. The CLI, MCP server, and programmatic API all consume analysis modules directly and pick their own formatter (or none). - ---- - -## 4. builder.js Orchestrates Too Many Concerns — Extract a Pipeline - -**Current state:** `builder.js` (554 lines) handles file collection, config loading, alias resolution, incremental change detection, parsing, node insertion, edge building, barrel file resolution, and statistics — all in `buildGraph()`. - -**Problem:** `buildGraph()` is a mega-function that's hard to test in parts. You can't test edge building without running the full parse phase. You can't test barrel resolution without a populated database. - -**Ideal architecture — explicit pipeline stages:** - -```js -// Each stage is a pure-ish function: (input, config) => output -const pipeline = [ - collectFiles, // (rootDir, config) => filePaths[] - detectChanges, // (filePaths, db) => { changed, removed, isFullBuild } - parseFiles, // (filePaths, engineOpts) => Map - insertNodes, // (symbolMap, db) => nodeIndex - resolveImports, // (symbolMap, rootDir, aliases) => importEdges[] - buildCallEdges, // (symbolMap, nodeIndex) => callEdges[] - buildClassEdges, // (symbolMap, nodeIndex) => classEdges[] - resolveBarrels, // (edges, symbolMap) => resolvedEdges[] - insertEdges, // (allEdges, db) => stats -] -``` +The key insight: every command follows the same lifecycle -- `(args) -> open DB -> query -> analyze -> format -> output`. A shared `CommandRunner` handles the lifecycle. Each command only implements the unique query + analysis logic. Formatting is always separate and pluggable (CLI text, JSON, NDJSON, Mermaid). -Each stage is independently testable. The pipeline runner handles transactions, logging, and statistics. Stages can be composed differently for watch mode (skip collectFiles, skip detectChanges, run single-file variant). +This eliminates the dual-function pattern entirely. `index.js` exports `auditData` (the command's execute function) -- the CLI formatter is internal to the CLI layer and never exported. --- -## 5. Embedder Should Be a Standalone Subsystem +## 2. The Database Layer Needs a Repository -- Now More Than Ever -**Current state:** `embedder.js` (525 lines) creates its own DB tables (`embeddings`, `embedding_meta`), manages its own model lifecycle, and implements both vector storage and search. It's effectively a mini vector database bolted onto the side of the graph database. +**Original analysis (S2):** SQL scattered across `builder.js`, `queries.js`, `embedder.js`, `watcher.js`, `cycles.js`. -**Problem:** Embedding concerns bleed into the graph DB schema. The cosine similarity search is O(n) full scan — fine for thousands of symbols, will not scale. The model registry, embedding generation, and search are all tangled in one file. +**What happened:** SQL is now scattered across **20+ modules**: all of the above plus `audit.js`, `check.js`, `cochange.js`, `communities.js`, `complexity.js`, `flow.js`, `manifesto.js`, `owners.js`, `structure.js`, `triage.js`, `snapshot.js`, `branch-compare.js`. Each module opens the DB independently with `openDb()`, creates its own prepared statements, and writes raw SQL inline. -**Ideal architecture:** +The schema grew to 9 tables: `nodes`, `edges`, `node_metrics`, `file_hashes`, `co_changes`, `co_change_meta`, `file_commit_counts`, `build_meta`, `function_complexity`. Plus embeddings and FTS5 tables in `embedder.js`. + +**Ideal architecture** (unchanged from original, but now higher priority): ``` src/ - embeddings/ - index.js # Public API - model-registry.js # Model definitions, batch sizes, loading - generator.js # Source → text preparation → batch embedding - store.js # Vector storage (pluggable: SQLite blob, flat file, HNSW index) - search.js # Similarity search, RRF multi-query fusion -``` - -**Key design change:** Make the vector store pluggable. The current SQLite blob approach works but is a linear scan. A future `HNSWStore` (using `hnswlib-node` or similar) would give O(log n) approximate nearest neighbor search — critical when the symbol count reaches 50K+. - -The store interface would be: - -```js -// Abstract store -insert(nodeId, vector, preview) -search(queryVector, topK, minScore) → results[] -delete(nodeId) -rebuild() + db/ + connection.js # Open, WAL mode, pragma tuning, connection pooling + migrations.js # Schema versions (currently 9 migrations) + repository.js # ALL read/write operations across all 9+ tables + types.js # JSDoc type definitions for all entities ``` -This also enables storing embeddings in a separate file from the graph DB, which avoids bloating `graph.db` with large binary blobs. +**New addition -- query builders for common patterns:** ---- - -## 6. The Native/WASM Abstraction Leaks - -**Current state:** `parser.js` has `resolveEngine()` that returns `{ name, native }`, then every call site branches on `engine.name === 'native'`. `resolve.js` has its own native check. `cycles.js` has its own native check. `builder.js` passes engine options through. - -**Problem:** The dual-engine strategy is a great idea but its implementation is scattered. Every consumer needs to know about native vs. WASM and handle both paths. - -**Ideal architecture — unified engine interface:** +Many modules do the same filtered query: "find nodes WHERE kind IN (...) AND file NOT LIKE '%test%' AND name LIKE ? ORDER BY ... LIMIT ? OFFSET ?". A lightweight query builder eliminates this SQL duplication: ```js -// engine.js — returns an object with the same API regardless of backend -export function createEngine(opts) { - const backend = resolveBackend(opts) // 'native' | 'wasm' - - return { - name: backend, - parseFile(filePath, source) { ... }, - parseFiles(filePaths, rootDir) { ... }, - resolveImport(from, source, rootDir, aliases) { ... }, - resolveImports(batch, rootDir, aliases) { ... }, - detectCycles(db) { ... }, - computeConfidence(caller, target, imported) { ... }, - createCache() { ... }, - } -} +repo.nodes() + .where({ kind: ['function', 'method'], file: { notLike: '%test%' } }) + .matching(name) + .orderBy('name') + .paginate(opts) + .all() ``` -Consumers receive an engine object and call methods on it. They never branch on native vs. WASM. The engine internally dispatches to the right implementation. This is the Strategy pattern properly applied. - -**Bonus:** This makes it trivial to add a third engine backend (e.g., a remote parsing service for very large repos) without touching any consumer code. +Not an ORM -- a thin SQL builder that generates the same prepared statements but eliminates string construction across 20 modules. --- -## 7. No Streaming / Event Architecture — Everything Is Batch - -**Current state:** The entire build pipeline is synchronous batch processing. Parse all files → insert all nodes → build all edges. The watcher does per-file updates but reimplements the pipeline in a simpler form. +## 3. queries.js at 3,110 Lines Must Be Decomposed -**Problem:** For large repos (10K+ files), the user waits for the entire pipeline to complete before seeing anything. There's no progress reporting during parsing. There's no way to cancel a build mid-flight. The watcher's simplified pipeline diverges from the main build path (different code, different edge cases). *(Note: two concrete edge cases — concurrent file edits causing EBUSY/EACCES during read, and symlink loops causing infinite recursion in `collectFiles` — have been fixed. `readFileSafe` retries on transient OS errors and is shared between `builder.js` and `watcher.js`. `collectFiles` tracks visited real paths to break symlink cycles.)* +**Original analysis (S3):** 823 lines mixing data access, algorithms, and presentation. -**Ideal architecture — event-driven pipeline:** +**Current state:** 3,110 lines -- nearly 4x growth. Contains 15+ data functions, 15+ display functions, constants (`SYMBOL_KINDS`, `ALL_SYMBOL_KINDS`, `VALID_ROLES`, `FALSE_POSITIVE_NAMES`), icon helpers (`kindIcon`), normalization (`normalizeSymbol`), test filtering (`isTestFile`), and generator functions (`iterListFunctions`, `iterRoles`, `iterWhere`). -```js -const pipeline = createPipeline(config) +This is now the second-largest file in the codebase (after `complexity.js` at 2,163 lines) and the most interconnected -- almost every other module imports from it. -pipeline.on('file:parsed', (file, symbols) => { /* progress */ }) -pipeline.on('file:indexed', (file, nodeCount) => { /* progress */ }) -pipeline.on('edge:built', (edge) => { /* streaming insert */ }) -pipeline.on('build:complete', (stats) => { /* summary */ }) -pipeline.on('error', (file, err) => { /* continue or abort */ }) +**Ideal decomposition:** -await pipeline.run(rootDir) -// or for watch mode: -await pipeline.watch(rootDir) // reuses same stages, different trigger ``` - -This unifies the build and watch code paths. Progress is naturally reported via events. Cancellation is a `pipeline.abort()`. Large builds can stream results to the DB incrementally instead of buffering everything in memory. - ---- - -## 8. Configuration Is Fine but Should Support Project Profiles - -**Current state:** Single `.codegraphrc.json` file, flat config, env var overrides. Clean and simple. - -**What's missing for real-world use:** - -**Profile-based configuration.** A monorepo with 3 services needs different settings per service (different `include`/`exclude`, different `ignoreDirs`, different `dbPath`). Currently you'd need 3 separate config files and run from 3 different directories. - -```json -{ - "profiles": { - "backend": { - "include": ["services/api/**"], - "build": { "dbPath": ".codegraph/api.db" } - }, - "frontend": { - "include": ["apps/web/**"], - "extensions": [".ts", ".tsx"], - "build": { "dbPath": ".codegraph/web.db" } - } - } -} -``` - -```bash -codegraph build --profile backend -codegraph build --profile frontend -codegraph build # default = all +src/ + analysis/ + symbol-lookup.js # queryNameData, whereData, listFunctionsData + impact.js # impactAnalysisData, fnImpactData, diffImpactData + dependencies.js # fileDepsData, fnDepsData, pathData + module-map.js # moduleMapData, statsData + context.js # contextData, explainData + roles.js # rolesData (currently delegates to structure.js) + + shared/ + constants.js # SYMBOL_KINDS, ALL_SYMBOL_KINDS, VALID_ROLES, FALSE_POSITIVE_NAMES + filters.js # isTestFile, normalizeSymbol, kindIcon + generators.js # iterListFunctions, iterRoles, iterWhere ``` -This maps cleanly to the multi-repo registry concept already in the codebase, but works within a single repo. - ---- - -## 9. Import Resolution Confidence Scoring Is Heuristic — Add Import-Graph Awareness - -**Current state:** `computeConfidence()` uses file proximity (same dir = 0.7, parent dir = 0.5, fallback = 0.3) to disambiguate when multiple functions share a name. - -**Problem:** Proximity is a weak signal. If `src/utils/format.js` exports `format()` and `src/api/format.js` also exports `format()`, and the caller is in `src/api/handler.js`, proximity correctly scores `src/api/format.js` higher. But if the caller explicitly imports from `src/utils/format.js`, the import graph already tells us the answer with certainty — and the current code does use imports when available (score 1.0). The gap is in the fallback path where there's no import but there IS an import chain (A imports B which imports C which exports the function). - -**Ideal enhancement — transitive import awareness:** - -Before falling back to proximity, walk the import graph from the caller file. If there's any import path (even indirect through barrel files) that reaches one of the candidates, that candidate gets a 0.9 score. Only if no import path exists at all do we fall back to proximity heuristics. - -This is a targeted algorithmic improvement, not a structural change, but it significantly improves edge accuracy for large codebases with many same-named functions. +Each analysis module is purely data -- no CLI output, no JSON formatting, no `console.log`. The `*Data()` suffix disappears because there's no `*()` counterpart. These are just functions that return data. --- -## 10. The MCP Server Should Be Composable, Not Monolithic +## 4. MCP at 1,212 Lines with 25 Tools Needs Composability -**Current state:** `mcp.js` (354 lines) has a hardcoded `TOOLS` array with 12 tool definitions, each with inline JSON schemas, and a `switch` statement dispatching to handler functions. +**Original analysis (S10):** 354 lines, 12 tools, monolithic switch dispatch. -**Problem:** Adding a new MCP tool requires editing the TOOLS array (schema), the switch statement (dispatch), and importing the handler — three changes in one file. The tool schemas are verbose JSON objects mixed with implementation logic. +**Current state:** 1,212 lines, 25 tools. The `buildToolList()` function dynamically builds tool definitions, and a large switch/dispatch handles all 25 tools. Adding a tool still requires editing the tool list, the dispatch block, and importing the handler -- three changes in one file. -**Ideal architecture:** +**Ideal architecture** (unchanged from original, now critical): ``` src/ mcp/ - server.js # MCP server setup, transport, connection lifecycle - tool-registry.js # Dynamic tool registration + server.js # MCP server setup, transport, connection lifecycle + tool-registry.js # Auto-discovery + dynamic registration + middleware.js # Pagination, error handling, repo resolution tools/ - query-function.js # { schema, handler } per tool + query-function.js # { schema, handler } file-deps.js impact-analysis.js - find-cycles.js - semantic-search.js - ... + check.js + audit.js + complexity.js + co-changes.js + structure.js + ... (25 files, one per tool) ``` -Each tool is a self-contained module: +Each tool is self-contained: ```js -// tools/query-function.js export const schema = { - name: 'query_function', + name: 'audit', description: '...', inputSchema: { ... } } export async function handler(args, context) { - const dbPath = context.resolveDb(args.repo) - return queryNameData(args.name, dbPath) + return auditData(args.target, context.resolveDb(args.repo), args) } ``` -The registry auto-discovers tools from the `tools/` directory. Adding a tool = adding a file. No other files change. - ---- - -## 11. Testing Strategy Needs Layers - -**Current state:** Tests are a mix of integration tests (full pipeline through SQLite) and pseudo-unit tests that still often hit the filesystem or database. There's no clear boundary between "test the algorithm" and "test the integration." - -**Ideal testing pyramid:** - -``` - ╱╲ - ╱ ╲ E2E (2-3 tests) - ╱ E2E╲ Full CLI invocation, real project, assert output - ╱──────╲ - ╱ ╲ Integration (current tests, refined) - ╱Integration╲ Build pipeline, query results, MCP responses - ╱────────────╲ - ╱ ╲ Unit (new layer) - ╱ Unit ╲ Extractors, algorithms, formatters — no I/O - ╱──────────────────╲ -``` - -**What's missing:** -- **Pure unit tests** for extractors (pass AST node, assert symbols — no file I/O) -- **Pure unit tests** for BFS/Tarjan algorithms (pass adjacency list, assert result) -- **Pure unit tests** for confidence scoring (pass parameters, assert score) -- **Repository mock** for query tests (in-memory data, no SQLite) -- **E2E tests** that invoke the CLI binary on a real (small) project and assert exit codes + stdout - -The repository pattern from point #2 directly enables this: unit tests use `InMemoryRepository`, integration tests use `SqliteRepository`. +The registry auto-discovers tools from the directory. Shared middleware handles pagination (the `MCP_DEFAULTS` logic currently in `paginate.js`), error wrapping, and multi-repo resolution. Adding a tool = adding a file. --- -## 12. CLI Architecture — Move to Command Objects +## 5. CLI at 1,285 Lines with 45 Commands Needs Command Objects -**Current state:** `cli.js` defines all commands inline with Commander.js. Each command is a `.command().description().option().action()` chain that directly calls functions. +**Original analysis (S12):** CLI was mentioned as a future concern. -**Problem:** The CLI file grows linearly with every new command. Command logic (option parsing, validation, output formatting) is mixed with framework wiring. You can't test a command's behavior without invoking Commander. +**Current state:** 1,285 lines of inline Commander.js chains. 45 commands registered with `.command().description().option().action()` patterns. Each action handler directly calls module functions, handles `--json` output, and manages error display. **Ideal architecture:** ``` src/ cli/ - index.js # Commander setup, command registration + index.js # Commander setup, auto-discover commands + shared/ + output.js # --json, --ndjson, table, plain text output + options.js # Shared options (--no-tests, --json, --db, --engine, --limit, --offset) + validation.js # Argument validation, path resolution commands/ - build.js # { name, description, options, validate, execute } + build.js # { name, description, options, validate, execute } query.js impact.js - deps.js - export.js - search.js - watch.js - registry.js - ... + audit.js + check.js + ... (45 files) ``` -Each command is a plain object: +Each command: ```js export default { - name: 'impact', - description: 'Show what depends on a file', - arguments: [{ name: 'file', required: true }], + name: 'audit', + description: 'Combined explain + impact + health report', + arguments: [{ name: 'target', required: true }], options: [ - { flags: '--depth ', description: 'Traversal depth', default: 3 }, - { flags: '--json', description: 'JSON output' }, + { flags: '-T, --no-tests', description: 'Exclude test files' }, + { flags: '-j, --json', description: 'JSON output' }, + { flags: '--db ', description: 'Custom DB path' }, ], - validate(args, opts) { /* pre-flight checks */ }, - async execute(args, opts) { /* the actual work */ }, + async execute(args, opts) { + const data = await auditData(args.target, opts.db, opts) + return data // CommandRunner handles formatting + }, } ``` -The CLI index auto-discovers commands and registers them with Commander. Each command is independently testable by calling `execute()` directly. +The CLI index auto-discovers commands. Shared options (`--no-tests`, `--json`, `--db`, `--engine`, `--limit`, `--offset`) are applied uniformly. The `CommandRunner` handles the open-DB -> execute -> format -> output lifecycle. --- -## 13. Graph Model Is Flat — Consider Hierarchical Scoping +## 6. complexity.js at 2,163 Lines Is a Hidden Monolith -**Current state:** The `nodes` table has `(name, kind, file, line)`. A function named `format` in `src/a.js` and a method named `format` on class `DateHelper` in `src/b.js` are both just nodes with `name=format`. The class membership is encoded as an edge, not as a structural property. +**Not in original analysis** -- this module didn't exist in Feb 2026. -**Problem:** Name collisions are resolved through the confidence scoring heuristic. But the graph has no concept of scope — there's no way to express "this `format` belongs to `DateHelper`" as a structural property of the node. This makes queries ambiguous: `codegraph query format` returns all `format` symbols across the entire graph. +**Current state:** 2,163 lines containing language-specific AST complexity rules for 8 languages (JS/TS, Python, Go, Rust, Java, C#, PHP, Ruby), plus Halstead metrics computation, maintainability index calculation, LOC/SLOC counting, and CLI formatting. It's the largest file in the codebase. -**Ideal enhancement — qualified names:** +**Problem:** The file is structured as a giant map of language to rules, but the rules for each language are deeply nested objects with inline AST traversal logic. Adding a new language or modifying a rule requires working inside a 2K-line file. -```sql -CREATE TABLE nodes ( - id INTEGER PRIMARY KEY, - name TEXT NOT NULL, -- 'format' - qualified_name TEXT, -- 'DateHelper.format' or 'utils/date::format' - kind TEXT NOT NULL, - file TEXT NOT NULL, - scope TEXT, -- 'DateHelper' (parent class/module/namespace) - line INTEGER, - end_line INTEGER, - visibility TEXT, -- 'public' | 'private' | 'protected' | 'internal' - UNIQUE(qualified_name, kind, file) -); -``` +**Ideal architecture:** -The `qualified_name` gives every symbol a unique identity within its file. The `scope` field enables queries like "all methods of class X" without traversing edges. The `visibility` field enables filtering out private implementation details from impact analysis. +``` +src/ + complexity/ + index.js # Public API: computeComplexity, complexityData + metrics.js # Halstead, MI, LOC/SLOC computation (language-agnostic) + engine.js # Walk AST + apply rules -> raw metric values + rules/ + javascript.js # JS/TS/TSX complexity rules + python.js + go.js + rust.js + java.js + csharp.js + php.js + ruby.js +``` -This doesn't change the edge model — it enriches the node model to reduce ambiguity at the source. +Each rules file exports a declarative complexity rule set. The engine applies rules to AST nodes. Metrics computation is shared. This mirrors the parser plugin system concept -- same pattern, applied to complexity. --- -## 14. No Caching Layer Between DB and Queries +## 7. builder.js at 1,173 Lines -- Pipeline Architecture -**Current state:** Every query function opens the DB, runs SQL, returns results, and closes. There's no caching of query results, no materialized views, no precomputed aggregates. +**Original analysis (S4):** 554 lines, mega-function that's hard to test in parts. -**Fine for now.** SQLite is fast and the graph fits in memory. But as graphs grow (50K+ nodes), repeated queries (especially from MCP where an AI agent may query the same function multiple times in a conversation) will redundantly hit disk. +**Current state:** 1,173 lines -- doubled. Now includes change journal integration, structure building, role classification, incremental verification, and more complex edge building. The `buildGraph()` function is even more of a mega-function. -**Ideal enhancement — query result cache:** +**Ideal architecture** (unchanged, reinforced): ```js -class QueryCache { - constructor(db, maxAge = 60_000) { ... } - - // Cache key = query name + args hash - // Invalidated on DB write (build, watch update) - get(key) { ... } - set(key, value) { ... } - invalidate() { ... } // Called after any DB mutation -} +const pipeline = [ + collectFiles, // (rootDir, config) => filePaths[] + detectChanges, // (filePaths, db) => { changed, removed, isFullBuild } + parseFiles, // (filePaths, engineOpts) => Map + insertNodes, // (symbolMap, db) => nodeIndex + resolveImports, // (symbolMap, rootDir, aliases) => importEdges[] + buildCallEdges, // (symbolMap, nodeIndex) => callEdges[] + buildClassEdges, // (symbolMap, nodeIndex) => classEdges[] + resolveBarrels, // (edges, symbolMap) => resolvedEdges[] + insertEdges, // (allEdges, db) => stats + buildStructure, // (db, fileSymbols, rootDir) => structureStats + classifyRoles, // (db) => roleStats + computeComplexity, // (db, rootDir, engine) => complexityStats + emitChangeJournal, // (rootDir, changes) => void +] ``` -This is a simple LRU or TTL cache that sits between the analysis layer and the repository. It's transparent to consumers. Particularly valuable for MCP where the same agent session may repeatedly query related symbols. +The pipeline grew -- four new stages since the original analysis. This reinforces the need: each stage is independently testable and the pipeline runner handles transactions, logging, progress, and statistics. + +**Watch mode** reuses the same stages triggered per-file, eliminating the `watcher.js` divergence. `change-journal.js` and `journal.js` integrate as pipeline hooks rather than separate code paths. --- -## 15. Watcher and Builder Share Logic But Don't Share Code +## 8. embedder.js at 1,113 Lines -- Now Includes Three Search Engines + +**Original analysis (S5):** 525 lines, mini vector database bolted onto the graph DB. -**Current state:** `watcher.js` reimplements parts of `builder.js` — node insertion, edge building, prepared statement setup — in a simplified single-file form. The two implementations can drift. +**Current state:** 1,113 lines. Now contains: +- 8 embedding model definitions with batch sizes and dimensions +- 2 embedding strategies (structured, source) +- Vector storage in SQLite blobs +- Cosine similarity search (O(n) linear scan) +- **FTS5 full-text index with BM25 scoring** (new) +- **Hybrid search with RRF fusion** (new) +- Model lifecycle management (lazy loading, caching) -**Problem:** Bug fixes to edge building in `builder.js` must be separately applied to `watcher.js`. The watcher's edge building is simpler (no barrel resolution, simpler confidence) which means watch-mode graphs are subtly different from full-build graphs. +Hybrid search (originally planned as Phase 5.3) is already implemented -- but inside the monolith. -**Partial progress:** `readFileSafe` (exported from `builder.js`, imported by `watcher.js`) is the first shared utility between the two modules. It retries on transient OS errors (EBUSY/EACCES/EPERM) that occur when editors perform non-atomic saves, replacing bare `readFileSync` calls in both code paths. This is a small step toward the shared-stages goal. +**Ideal architecture** (updated): -**Ideal fix:** The pipeline architecture from point #4 eliminates this entirely. Watch mode uses the same pipeline stages, just triggered per-file instead of per-project. The `insertNodes` and `buildEdges` stages are literally the same functions. +``` +src/ + embeddings/ + index.js # Public API + models.js # Model definitions, batch sizes, loading + generator.js # Source -> text preparation -> batch embedding + stores/ + sqlite-blob.js # Current O(n) cosine similarity + fts5.js # BM25 keyword search via FTS5 + search/ + semantic.js # Vector similarity search + keyword.js # FTS5 BM25 search + hybrid.js # RRF fusion of semantic + keyword + strategies/ + structured.js # Structured text preparation + source.js # Raw source preparation +``` + +The three search modes (semantic, keyword, hybrid) become composable search strategies rather than three code paths in one file. The store abstraction enables future pluggable backends (HNSW, DiskANN) without touching search logic. --- -## 16. Export Module Should Support Filtering and Subgraph Extraction +## 9. parser.js Is No Longer a Monolith -- Downgrade Priority -**Current state:** `export.js` exports the entire graph or nothing. DOT/Mermaid/JSON always include all nodes and edges. +**Original analysis (S1):** 2,215 lines, 9 language extractors in one file. Highest priority. -**Problem:** For a 5K-node graph, the DOT output is unusable — Graphviz chokes, Mermaid renders an incomprehensible hairball. +**Current state:** 404 lines. The native Rust engine now handles the heavy parsing. `parser.js` is a thin WASM fallback with `LANGUAGE_REGISTRY`, engine resolution, and minimal extraction. The extractors still exist but are much smaller per-language. -**Ideal enhancement:** +**Revised recommendation:** This is no longer urgent. The Rust engine already implements the plugin system concept natively. The WASM path in `parser.js` at 404 lines is manageable. If the parser ever grows again (new languages added to WASM fallback), revisit -- but for now, this is fine. -```bash -codegraph export --format dot --focus src/builder.js --depth 2 -# Exports only builder.js and its 2-hop neighborhood +--- -codegraph export --format mermaid --filter "src/api/**" --kind function -# Only functions in the api directory +## 10. The Native/WASM Abstraction -- Less Critical Now -codegraph export --format json --changed # Only files changed since last commit -``` +**Original analysis (S6):** Scattered `engine.name === 'native'` branching across multiple files. -The export module receives a subgraph specification (focus node + depth, file pattern, kind filter) and extracts the relevant subgraph before formatting. This makes visualization actually useful for real projects. +**Current state:** The native engine is the primary path. WASM is a fallback. The branching still exists but is less problematic because most users never hit the WASM path. The unified engine interface is still the right design but it's a polish item, not a structural problem. + +**Revised priority:** Low-Medium. Do it when touching these files for other reasons. --- -## 17. Error Handling Is Ad-Hoc — Introduce Domain Errors +## 11. Qualified Names + Hierarchical Scoping -- Still Important -**Current state:** Errors are handled inconsistently: -- Some functions throw generic `Error` -- Some return null/undefined on failure -- Some call `logger.warn()` and continue -- Some call `process.exit(1)` +**Original analysis (S13):** Flat node model with name collisions resolved by heuristics. -**Problem:** Callers can't distinguish "file not found" from "parse failed" from "DB corrupt" without inspecting error message strings. The MCP server wraps everything in try-catch and returns generic error text. +**Current state:** Unchanged. The `nodes` table still has `(name, kind, file, line)` with no scope or qualified name. The `structure.js` module added `role` classification but not scoping. With the codebase now handling more complex analysis (communities, boundaries, flow tracing), the lack of qualified names creates more ambiguity in more places. -**Ideal architecture:** +**Ideal enhancement** (unchanged): + +```sql +ALTER TABLE nodes ADD COLUMN qualified_name TEXT; -- 'DateHelper.format' +ALTER TABLE nodes ADD COLUMN scope TEXT; -- 'DateHelper' +ALTER TABLE nodes ADD COLUMN visibility TEXT; -- 'public' | 'private' | 'protected' +``` + +--- + +## 12. Domain Error Hierarchy -- More Urgent with 35 Modules + +**Original analysis (S17):** Inconsistent error handling across ~12 modules. + +**Current state:** 35 modules with inconsistent error handling. Some throw, some return null, some `logger.warn()` and continue, some `process.exit(1)`. The MCP server wraps everything in generic try-catch. The `check.js` module returns structured pass/fail objects but other modules don't. + +**`check.js` already demonstrates the right pattern** -- structured result objects with clear pass/fail semantics. This should be generalized: ```js // errors.js export class CodegraphError extends Error { - constructor(message, { code, file, cause } = {}) { ... } + constructor(message, { code, file, cause } = {}) { + super(message) + this.code = code + this.file = file + this.cause = cause + } } export class ParseError extends CodegraphError { code = 'PARSE_FAILED' } @@ -524,32 +404,56 @@ export class DbError extends CodegraphError { code = 'DB_ERROR' } export class ConfigError extends CodegraphError { code = 'CONFIG_INVALID' } export class ResolutionError extends CodegraphError { code = 'RESOLUTION_FAILED' } export class EngineError extends CodegraphError { code = 'ENGINE_UNAVAILABLE' } +export class AnalysisError extends CodegraphError { code = 'ANALYSIS_FAILED' } +export class BoundaryError extends CodegraphError { code = 'BOUNDARY_VIOLATION' } ``` -The CLI catches domain errors and formats them for humans. The MCP server catches them and returns structured error responses. The programmatic API lets them propagate. No more `process.exit()` from library code. - --- -## 18. The Programmatic API (index.js) Exposes Too Much +## 13. Public API Surface -- 120+ Exports Is Unsustainable -**Current state:** `index.js` re-exports ~40 functions from every module — internal helpers, data functions, presentation functions, DB utilities, everything. +**Original analysis (S18):** ~40 re-exports, no distinction between public and internal. -**Problem:** There's no distinction between public API and internal implementation. A consumer importing `buildGraph` also sees `findChild` (a tree-sitter helper) and `openDb` (internal DB function). Any refactoring risks breaking unnamed consumers. +**Current state:** 120+ exports from `index.js`. Every `*Data()` function, every CLI display function, every constant, every utility is exported. The public API is the entire internal surface. -**Ideal architecture — explicit public surface:** +**The problem is now 3x worse** and directly blocks any refactoring -- every internal rename could break an unnamed consumer. + +**Ideal architecture** (reinforced): ```js -// index.js — curated public API only +// index.js -- curated public API (~30 exports) +// Build export { buildGraph } from './builder.js' -export { queryFunction, impactAnalysis, fileDeps, fnDeps, diffImpact } from './analysis/index.js' -export { search, multiSearch, embedSymbols } from './embeddings/index.js' + +// Analysis (data functions only -- no CLI formatters) +export { queryNameData, impactAnalysisData, fileDepsData, fnDepsData, + fnImpactData, diffImpactData, moduleMapData, statsData, + contextData, explainData, whereData, listFunctionsData, + rolesData } from './analysis/index.js' + +// New analysis modules +export { auditData } from './commands/audit.js' +export { checkData } from './commands/check.js' +export { complexityData } from './commands/complexity.js' +export { manifestoData } from './commands/manifesto.js' +export { triageData } from './commands/triage.js' +export { flowData } from './commands/flow.js' +export { communitiesData } from './commands/communities.js' + +// Search +export { searchData, hybridSearchData, embedSymbols } from './embeddings/index.js' + +// Infrastructure export { detectCycles } from './analysis/cycles.js' export { exportGraph } from './export.js' export { startMcpServer } from './mcp/server.js' export { loadConfig } from './config.js' + +// Constants +export { SYMBOL_KINDS, ALL_SYMBOL_KINDS } from './shared/constants.js' ``` -Everything else is internal. Use `package.json` `exports` field to enforce module boundaries: +Lock it with `package.json` exports: ```json { @@ -560,35 +464,143 @@ Everything else is internal. Use `package.json` `exports` field to enforce modul } ``` -Consumers can only import from the documented entry points. Internal modules are truly internal. +--- + +## 14. Structure + Cochange + Communities -- Parallel Graph Models Need Unification + +**Not in original analysis** -- these modules didn't exist. + +**Current state:** Three separate analytical subsystems each build their own graph representation: + +- **`structure.js`** (668 lines): Builds directory nodes, computes cohesion/density/coupling metrics, classifies roles (entry, core, utility, adapter, leaf, dead). Has its own BFS and metrics computation. +- **`cochange.js`** (502 lines): Builds temporal coupling graph from git history. Stores in `co_changes` table with Jaccard coefficients. Independent of the dependency graph. +- **`communities.js`** (310 lines): Uses graphology to build an in-memory graph from edges, runs Louvain community detection, computes modularity and drift. + +Each constructs its own graph representation independently. There's no shared graph abstraction they all operate on. + +**Ideal architecture -- unified graph model:** + +``` +src/ + graph/ + model.js # In-memory graph representation (nodes + edges + metadata) + builders/ + dependency.js # Build from SQLite edges (imports, calls, extends) + structure.js # Build from file/directory hierarchy + temporal.js # Build from git history (co-changes) + algorithms/ + bfs.js # Breadth-first traversal (used by impact, flow, etc.) + shortest-path.js # Path finding (used by path command) + tarjan.js # Cycle detection (currently in cycles.js) + louvain.js # Community detection (currently uses graphology) + centrality.js # Fan-in/fan-out, betweenness (used by triage, hotspots) + clustering.js # Cohesion, coupling, density metrics + classifiers/ + roles.js # Node role classification + risk.js # Risk scoring (currently in triage.js) +``` + +The graph model is a shared in-memory structure that multiple builders can populate and multiple algorithms can query. This eliminates the repeated graph construction across modules and makes algorithms composable -- you can run community detection on the dependency graph, the temporal graph, or a merged graph. + +--- + +## 15. Pagination Pattern Needs Standardization + +**Not in original analysis** -- paginate.js was just introduced. + +**Current state:** `paginate.js` (106 lines) provides `paginate()` and `paginateResult()` helpers plus `MCP_DEFAULTS` with per-command limits. But each module integrates pagination differently -- some pass `opts` to paginate, some manually slice arrays, some use `LIMIT/OFFSET` in SQL, some paginate in memory after fetching all results. + +**Ideal architecture:** Pagination belongs in the repository layer (SQL `LIMIT/OFFSET`) for data fetching and in the command runner for result shaping. The current pattern of fetching all data then slicing in memory doesn't scale. The repository should accept pagination parameters directly: + +```js +// In repository +findNodes(filters, { limit, offset, orderBy }) { + // Generates SQL with LIMIT/OFFSET -- never fetches more than needed +} + +// In command runner (after execute) +runner.paginate(result, 'functions', opts) // Consistent shaping for all commands +``` + +--- + +## 16. Testing -- Good Coverage, Wrong Distribution + +**Original analysis (S11):** Missing proper unit tests. + +**Current state:** 59 test files -- major improvement. Tests exist across: +- `tests/unit/` -- 18 files +- `tests/integration/` -- 18 files +- `tests/parsers/` -- 8 files +- `tests/engines/` -- 2 files (parity tests) +- `tests/search/` -- 3 files +- `tests/incremental/` -- 2 files + +**What's still missing:** +- Unit tests for pure graph algorithms (BFS, Tarjan) in isolation +- Unit tests for confidence scoring with various inputs +- Unit tests for the triage risk scoring formula +- Mock-based tests (the repository pattern would enable `InMemoryRepository`) +- Many "unit" tests still hit SQLite -- they're integration tests in the unit directory + +The test count is adequate. The issue is that without the repository pattern, true unit testing is impossible for most modules -- they all need a real SQLite DB. + +--- + +## 17. Event-Driven Pipeline -- Still Relevant for Scale + +**Original analysis (S7):** Batch pipeline with no progress reporting. + +**Current state:** Still batch. The `change-journal.js` module adds NDJSON event logging for watch mode, which is a step toward events -- but the build pipeline itself is still synchronous batch. For repos with 10K+ files, users still see no progress during builds. + +**Ideal architecture** (unchanged, lower priority than structural issues): + +```js +pipeline.on('file:parsed', (file, symbols) => { /* progress */ }) +pipeline.on('file:indexed', (file, nodeCount) => { /* progress */ }) +pipeline.on('build:complete', (stats) => { /* summary */ }) +await pipeline.run(rootDir) +``` + +--- + +## Remaining Items (Unchanged from Original) + +- **Config profiles (S8):** Single flat config, no monorepo profiles. Still relevant but not blocking anything. +- **Transitive import-aware confidence (S9):** Walk import graph before falling back to proximity heuristics. Targeted algorithmic improvement. +- **Query result caching (S14):** LRU/TTL cache between analysis and repository. More valuable now with 25 MCP tools. +- **Subgraph export filtering (S16):** Export the full graph or nothing. Still relevant for usability. --- -## Summary — Priority Ordering by Architectural Impact - -| # | Change | Impact | Category | -|---|--------|--------|----------| -| 1 | Split parser.js into plugin system | High | Modularity | -| 2 | Repository pattern for data access | High | Testability, maintainability | -| 3 | Separate analysis / formatting layers | High | Separation of concerns | -| 4 | Pipeline architecture for builder | High | Testability, reuse | -| 6 | Unified engine interface (Strategy) | Medium-High | Abstraction | -| 5 | Embedder as standalone subsystem | Medium | Extensibility | -| 13 | Qualified names + scoping in graph model | Medium | Data model accuracy | -| 7 | Event-driven pipeline for streaming | Medium | Scalability, UX | -| 10 | Composable MCP tool registry | Medium | Extensibility | -| 12 | CLI command objects | Medium | Maintainability | -| 17 | Domain error hierarchy | Medium | Reliability | -| 18 | Curated public API surface | Medium | API stability | -| 11 | Testing pyramid with proper layers | Medium | Quality | -| 16 | Subgraph export with filtering | Low-Medium | Usability | -| 9 | Transitive import-aware confidence | Low-Medium | Accuracy | -| 14 | Query result caching | Low | Performance | -| 8 | Config profiles for monorepos | Low | Feature | -| 15 | Unify watcher/builder code paths | Low | Falls out of #4 (partial: `readFileSafe` shared) | - -Items 1–4 and 6 are foundational — they restructure the core and everything else becomes easier after them. Items 13 and 7 are the most impactful feature-level changes. Items 14–15 are natural consequences of earlier changes. +## Revised Summary -- Priority Ordering by Architectural Impact + +| # | Change | Impact | Category | Original # | +|---|--------|--------|----------|------------| +| **1** | **Command/Query separation -- eliminate dual-function pattern across 15 modules** | **Critical** | Separation of concerns | S3 (was High) | +| **2** | **Repository pattern for data access -- SQL in 20+ modules** | **Critical** | Testability, maintainability | S2 (was High) | +| **3** | **Decompose queries.js (3,110 lines) into analysis modules** | **Critical** | Modularity | S3 (was High) | +| **4** | **Composable MCP tool registry (25 tools in 1,212 lines)** | **High** | Extensibility | S10 (was Medium) | +| **5** | **CLI command objects (45 commands in 1,285 lines)** | **High** | Maintainability | S12 (was Medium) | +| **6** | **Curated public API surface (120+ to ~30 exports)** | **High** | API stability | S18 (was Medium) | +| **7** | **Domain error hierarchy (35 modules, inconsistent handling)** | **High** | Reliability | S17 (was Medium) | +| **8** | **Decompose complexity.js (2,163 lines) into rules/engine** | **High** | Modularity | New | +| **9** | **Builder pipeline architecture (1,173 lines)** | **High** | Testability, reuse | S4 (was High) | +| **10** | **Embedder subsystem (1,113 lines, 3 search engines)** | **Medium-High** | Extensibility | S5 (was Medium) | +| **11** | **Unified graph model for structure/cochange/communities** | **Medium-High** | Cohesion | New | +| **12** | **Qualified names + hierarchical scoping** | **Medium** | Data model accuracy | S13 (unchanged) | +| **13** | **Pagination standardization (SQL-level + command runner)** | **Medium** | Consistency | New | +| **14** | **Testing pyramid with InMemoryRepository** | **Medium** | Quality | S11 (unchanged) | +| **15** | **Event-driven pipeline for streaming** | **Medium** | Scalability, UX | S7 (unchanged) | +| **16** | **Query result caching (25 MCP tools)** | **Low-Medium** | Performance | S14 (unchanged) | +| **17** | **Unified engine interface (Strategy)** | **Low-Medium** | Abstraction | S6 (was Medium-High) | +| **18** | **Subgraph export with filtering** | **Low-Medium** | Usability | S16 (unchanged) | +| **19** | **Transitive import-aware confidence** | **Low** | Accuracy | S9 (unchanged) | +| **20** | **Parser plugin system** | **Low** | Modularity | S1 (was High -- parser.js shrank to 404 lines) | +| **21** | **Config profiles for monorepos** | **Low** | Feature | S8 (unchanged) | + +**The structural priority shifted.** In the original analysis, the parser monolith was #1 -- it's now #20 because the native engine solved it. The new #1 is the command/query separation: the dual-function anti-pattern replicated across 15 modules is the single biggest source of code duplication and coupling in the codebase. Items 1-3 are the foundation -- they restructure the core and everything else becomes easier. Items 4-7 are high-impact but can be done in parallel. Items 8-10 are large-file decompositions that follow naturally once the shared infrastructure exists. --- -*Generated 2026-02-22. Cold architectural analysis — no implementation constraints applied.* +*Revised 2026-03-02. Cold architectural analysis -- no implementation constraints applied.* From b45b36ca8095f8e2a92308ef0f30b3d768375b75 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 18:54:14 -0700 Subject: [PATCH 18/30] docs: add Narsil-MCP competitive deep-dive with feature candidates (#265) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: add competitive deep-dive for Joern and reorganize competitive folder Move COMPETITIVE_ANALYSIS.md into generated/competitive/ and add a comprehensive feature-by-feature comparison against joernio/joern (our #1-ranked competitor). Covers parsing, graph model, query language, performance, installation, AI/MCP integration, security analysis, developer productivity, and ecosystem across 100+ individual features. Update FOUNDATION.md reference to the new path. * fix: update broken links to moved COMPETITIVE_ANALYSIS.md README.md and docs/roadmap/BACKLOG.md still referenced the old path at generated/COMPETITIVE_ANALYSIS.md after the file was moved to generated/competitive/COMPETITIVE_ANALYSIS.md in #260. * docs: add Joern-inspired feature candidates with BACKLOG-style grading Append a new "Joern-Inspired Feature Candidates" section to the Joern competitive deep-dive. Lists 11 actionable features extracted from Parsing & Language Support, Graph Model & Analysis Depth, and Query Language & Interface sections — assessed with the same tier/grading system used in BACKLOG.md (zero-dep, foundation-aligned, problem-fit, breaking). Tier 1 non-breaking: call-chain slicing, type-informed resolution, error-tolerant parsing, regex filtering, Kotlin, Swift, script execution. Tier 1 breaking: expanded node/edge types, intraprocedural CFG, stored AST. Not adopted: 9 features with FOUNDATION.md reasoning. Cross-references BACKLOG IDs 14 and 7. * docs: add competitive deep-dive for Narsil-MCP with feature candidates Comprehensive comparison across 10 dimensions: parsing (32 vs 11 languages), graph model (CFG/DFG/type inference vs complexity/roles/ communities), search (similarity/chunking vs RRF hybrid), security (147 rules vs none), queries (90 tools vs 21 + compound commands), performance (cold start vs incremental), install, MCP integration, developer productivity, and ecosystem. Feature candidates section covers all comparison sections: - Tier 1 non-breaking (10): MCP presets, AST chunking, code similarity, git blame/symbol history, remote repo indexing, config wizard, Kotlin, Swift, Bash, Scala language support - Tier 1 breaking (1): export map per module - Tier 2 (2): interactive HTML viz, multiple embedding backends - Tier 3 (2): OWASP patterns, SBOM generation - Not adopted (10): taint, type inference, SPARQL/RDF, CCG, in-memory arch, 90-tool surface, browser WASM, Forgemax, LSP, license scanning - Cross-references to BACKLOG IDs 7, 8, 10, 14 and Joern candidates J4, J5, J8, J9 --- generated/competitive/narsil-mcp.md | 565 +++++++++++++++------------- 1 file changed, 313 insertions(+), 252 deletions(-) diff --git a/generated/competitive/narsil-mcp.md b/generated/competitive/narsil-mcp.md index 03017048..0bab58d0 100644 --- a/generated/competitive/narsil-mcp.md +++ b/generated/competitive/narsil-mcp.md @@ -1,25 +1,25 @@ -# Competitive Deep-Dive: Codegraph vs narsil-mcp +# Competitive Deep-Dive: Codegraph vs Narsil-MCP **Date:** 2026-03-02 -**Competitors:** `@optave/codegraph` v0.x (Apache-2.0) vs `postrv/narsil-mcp` v1.6.x (Apache-2.0 / MIT) -**Context:** narsil-mcp is ranked #2 in our [competitive analysis](../COMPETITIVE_ANALYSIS.md) with a score of 4.5, tied with Joern at #1. Unlike Joern (which targets security researchers), narsil-mcp competes head-to-head with codegraph — same parsing technology (tree-sitter), same delivery mechanism (MCP), same target audience (AI agents), same local-first philosophy. +**Competitors:** `@optave/codegraph` v2.x (Apache-2.0) vs `postrv/narsil-mcp` v1.6 (Apache-2.0 OR MIT) +**Context:** Both are Apache-2.0-licensed code analysis tools with MCP interfaces. Narsil-MCP is ranked #2 in our [competitive analysis](./COMPETITIVE_ANALYSIS.md) with a score of 4.5 vs codegraph's 4.0 at #8. --- ## Executive Summary -Narsil-mcp and codegraph are the two closest competitors in the code intelligence MCP space. Both use tree-sitter for parsing, both expose tools via MCP, and both target AI coding agents. They diverge sharply in philosophy: narsil-mcp maximizes surface area (90 tools, 32 languages, security scanning, SPARQL, CCG standard), while codegraph maximizes depth-per-tool and always-current guarantees (persistent incremental graph, confidence-scored edges, compound commands, CI gates). +Narsil-MCP and codegraph share more DNA than any other pair in the competitive landscape — both use tree-sitter, both serve AI agents via MCP, both are local-first. But they diverge sharply in philosophy: -| Dimension | narsil-mcp | Codegraph | +| Dimension | Narsil-MCP | Codegraph | |-----------|------------|-----------| -| **Primary mission** | Comprehensive code intelligence for AI agents via maximum tool coverage | Always-current structural code intelligence with scored, actionable results | -| **Target user** | AI coding agents (Claude, Cursor, Windsurf) | Developers, AI coding agents, CI pipelines | -| **Graph model** | RDF knowledge graph (Oxigraph) + in-memory symbol maps | Structural dependency graph (SQLite) with confidence-scored edges | -| **Core question answered** | "What does this code do and is it secure?" | "What breaks if I change this function?" | -| **Rebuild model** | In-memory incremental; full re-index on restart unless `--persist` | Persistent incremental (SQLite); sub-second rebuilds survive restarts | -| **Runtime** | Rust binary (~30-50 MB) | Node.js + optional native Rust addon (<100 MB working set) | +| **Primary mission** | Maximum-breadth code intelligence in a single binary | Always-current structural intelligence with sub-second rebuilds | +| **Target user** | AI agents needing comprehensive analysis (security, types, dataflow) | Developers, AI coding agents, CI pipelines needing fast feedback | +| **Architecture** | MCP-first, no standalone CLI queries | Full CLI + MCP server + programmatic JS API | +| **Core question answered** | "Tell me everything about this code" (90 tools) | "What breaks if I change this function?" (focused commands) | +| **Rebuild model** | In-memory index, opt-in persistence, file watcher | SQLite-persisted, incremental hash-based rebuilds | +| **Runtime** | Single Rust binary (~30 MB) | Node.js + optional native Rust addon | -**Bottom line:** narsil-mcp casts the widest net — more languages, more tools, more analysis types. Codegraph goes deeper on the problems that matter most for iterative development — persistent incremental builds, confidence scoring, impact analysis, and CI integration. narsil-mcp is a feature-rich index; codegraph is an always-current dependency graph with actionable intelligence. +**Bottom line:** Narsil-MCP is broader (90 tools, 32 languages, security scanning, taint analysis, SBOM, type inference). Codegraph is deeper on developer productivity (impact analysis, complexity metrics, community detection, architecture boundaries, manifesto rules) and faster for iterative workflows (incremental rebuilds, CI gates). Where they overlap (call graphs, dead code, search, MCP), narsil has more tools while codegraph has more purpose-built commands. They are the closest competitors in the landscape. --- @@ -29,18 +29,18 @@ Codegraph's foundation document defines the problem as: *"Fast local analysis wi ### Principle-by-principle evaluation -| # | Principle | Codegraph | narsil-mcp | Verdict | +| # | Principle | Codegraph | Narsil-MCP | Verdict | |---|-----------|-----------|------------|---------| -| 1 | **The graph is always current** — rebuild on every commit/save/agent loop | Persistent SQLite with file-level MD5 hashing. Change 1 file in 3,000 → <500ms rebuild. Graph survives restarts, watch mode, commit hooks all practical | Merkle-tree incremental parsing within a session. But in-memory by default — full re-index on every server restart unless `--persist` is used. Persistence is opt-in, not default | **Codegraph wins.** Persistence-by-default vs. persistence-as-afterthought. An "always-current" graph that vanishes on restart isn't always current | -| 2 | **Native speed, universal reach** — dual engine (Rust + WASM) | Native napi-rs with rayon parallelism + automatic WASM fallback. `npm install` on any platform | Pure Rust with rayon parallelism. Browser WASM build available (~3 MB). 8 install methods (Homebrew, Scoop, Cargo, npm, Nix, AUR, shell script, source) | **Tie.** Both achieve native speed with WASM fallback. narsil-mcp has more install methods; codegraph has simpler auto-detection | -| 3 | **Confidence over noise** — scored results | 6-level import resolution with 0.0-1.0 confidence on every edge. False-positive filtering. Graph quality score. Node role classification | No confidence scoring on edges. Results are binary (found/not found). 147 security rules with severity levels, but no structural confidence scoring | **Codegraph wins.** Confidence-scored edges vs. binary results. This is fundamental to codegraph's value proposition | -| 4 | **Zero-cost core, LLM-enhanced when you choose** | Full pipeline local, zero API keys. Optional embeddings with user's LLM provider | Core parsing/search local. Neural search requires API keys (Voyage AI/OpenAI) or heavy ONNX build (+20 MB). Type inference and security scanning are local | **Codegraph wins.** Both are local-first, but narsil-mcp's neural search requires paid API keys by default (local ONNX is a non-default feature flag) | -| 5 | **Functional CLI, embeddable API** | 35+ CLI commands + 18-tool MCP server + full programmatic JS API + `--json` on every command | No standalone CLI — MCP-only interface. 90 MCP tools. No programmatic library API for embedding in other applications | **Codegraph wins.** Codegraph serves three interfaces (CLI + MCP + API). narsil-mcp is MCP-only — unusable without an MCP client. No CI pipeline integration, no `--json` CLI, no embeddable library | -| 6 | **One registry, one schema, no magic** | `LANGUAGE_REGISTRY` — add a language in <100 lines, 2 files. Uniform extraction across all languages | tree-sitter for all 32 languages with language-specific extractors. Adding a language requires Rust code + tree-sitter grammar. Uniform parser, but heavier per-language investment | **Codegraph wins.** Both use tree-sitter uniformly, but codegraph's JS extractors are dramatically simpler to write than narsil-mcp's Rust extractors | -| 7 | **Security-conscious defaults** — multi-repo opt-in | Single-repo MCP default. `apiKeyCommand` for secrets. `--multi-repo` opt-in | Multi-repo by default (`list_repos`, `discover_repos` always exposed). `--remote` flag enables cloning external repos. No credential isolation model | **Codegraph wins.** Single-repo default vs. multi-repo default. narsil-mcp's `discover_repos` and `add_remote_repo` tools are exposed without opt-in | -| 8 | **Honest about what we're not** | Code intelligence engine. Not an app, not a coding tool, not an agent | "Comprehensive code intelligence" — tries to be everything: search engine, security scanner, type checker, SBOM generator, license auditor, knowledge graph, visualization server | **Codegraph wins.** Codegraph has a clear boundary. narsil-mcp's 90-tool surface area spans security, compliance, visualization, type checking, and more — a breadth that risks being shallow everywhere | +| 1 | **The graph is always current** — rebuild on every commit/save/agent loop | File-level MD5 hashing, SQLite persistence. Change 1 file → <500ms rebuild. Watch mode, commit hooks, agent loops all practical | In-memory by default. `--watch` flag for auto-reindex. `--persist` for disk saves. Indexing is fast (2.1s for 50K symbols) but full re-index, not incremental | **Codegraph wins.** Narsil is fast but re-indexes everything. Codegraph only re-parses changed files — orders of magnitude faster for single-file changes in large repos | +| 2 | **Native speed, universal reach** — dual engine (Rust + WASM) | Native napi-rs with rayon parallelism + automatic WASM fallback. `npm install` on any platform | Pure Rust binary. Prebuilt for macOS/Linux/Windows. Also has WASM build (~3 MB) for browsers | **Tie.** Different approaches, both effective. Narsil is a single binary; codegraph is an npm package with native addon. Both have WASM stories | +| 3 | **Confidence over noise** — scored results | 6-level import resolution with 0.0-1.0 confidence on every edge. Graph quality score. Relevance-ranked search | BM25 ranking on search. No confidence scores on call graph edges. No graph quality metric | **Codegraph wins.** Every edge has a trust score; narsil's call graph edges are unscored | +| 4 | **Zero-cost core, LLM-enhanced when you choose** | Full pipeline local, zero API keys. Optional embeddings with user's LLM provider | Core is local. Neural search requires `--neural` flag + API key (Voyage AI/OpenAI) or local ONNX model | **Tie.** Both are local-first with optional AI enhancement. Narsil offers more backend choices (Voyage AI, OpenAI, ONNX); codegraph uses HuggingFace Transformers locally | +| 5 | **Functional CLI, embeddable API** | 35+ CLI commands + 18-tool MCP server + full programmatic JS API | MCP-first with 90 tools. `narsil-mcp config/tools` management commands but no standalone query CLI. No programmatic library API | **Codegraph wins.** Full CLI experience + embeddable API. Narsil is MCP-only for queries — useless without an MCP client | +| 6 | **One registry, one schema, no magic** | `LANGUAGE_REGISTRY` — add a language in <100 lines, 2 files | Tree-sitter for all 32 languages. Unified parser, but extractors are in compiled Rust — harder to contribute | **Codegraph wins slightly.** Both use tree-sitter uniformly. Codegraph's JS extractors are more accessible to contributors than narsil's compiled Rust | +| 7 | **Security-conscious defaults** — multi-repo opt-in | Single-repo MCP default. `apiKeyCommand` for secrets. `--multi-repo` opt-in | Multi-repo by default (`--repos` accepts multiple paths). `discover_repos` auto-finds repos. No sandboxing concept | **Codegraph wins.** Single-repo isolation by default vs. multi-repo by default | +| 8 | **Honest about what we're not** | Code intelligence engine. Not an app, not a coding tool, not an agent | Code intelligence MCP server. Also not an agent — but the open-core model adds commercial cloud features (narsil-cloud) | **Tie.** Both are honest about scope. Narsil's commercial layer is a legitimate business model | -**Score: Codegraph 7, narsil-mcp 0, Tie 1** — against codegraph's own principles, codegraph wins on every differentiating dimension. This is expected: the principles were designed around codegraph's value proposition. The feature comparison below examines where narsil-mcp's breadth creates genuine advantages. +**Score: Codegraph 4, Narsil 0, Tie 4** — codegraph wins on its own principles but the gap is much smaller than vs. Joern. Narsil is the closest philosophical competitor. --- @@ -48,213 +48,228 @@ Codegraph's foundation document defines the problem as: *"Fast local analysis wi ### A. Parsing & Language Support -| Feature | Codegraph | narsil-mcp | Best Approach | +| Feature | Codegraph | Narsil-MCP | Best Approach | |---------|-----------|------------|---------------| -| **Parser technology** | tree-sitter (WASM + native Rust) | tree-sitter (native Rust) | **Tie** — same underlying technology | -| **JavaScript** | Full extraction (functions, classes, methods, imports, exports, call sites) | Symbol extraction + call graph + type inference | **Tie** — both strong | -| **TypeScript** | First-class TS + TSX support | First-class TS support + type inference | **Tie** | -| **Python** | tree-sitter extraction | tree-sitter extraction + type inference | **narsil-mcp** — type inference adds value | -| **Go** | tree-sitter (structs, interfaces, methods) | tree-sitter extraction | **Tie** | -| **Rust** | tree-sitter (functions, structs, traits, enums, impls) | tree-sitter extraction (home language — most mature) | **narsil-mcp** — as a Rust project, Rust parsing is likely most battle-tested | +| **Parser technology** | tree-sitter (WASM + native Rust) | tree-sitter (compiled Rust) | **Tie** — same parser, different build strategies | +| **JavaScript/TypeScript/TSX** | First-class, separate grammars | Supported (JS + TS) | **Codegraph** — explicit TSX support | +| **Python** | tree-sitter | tree-sitter | **Tie** | +| **Go** | tree-sitter | tree-sitter | **Tie** | +| **Rust** | tree-sitter | tree-sitter | **Tie** | | **Java** | tree-sitter | tree-sitter | **Tie** | | **C/C++** | tree-sitter | tree-sitter | **Tie** | | **C#** | tree-sitter | tree-sitter | **Tie** | | **PHP** | tree-sitter | tree-sitter | **Tie** | | **Ruby** | tree-sitter | tree-sitter | **Tie** | -| **Terraform/HCL** | tree-sitter | Not supported | **Codegraph** | -| **Kotlin** | Not supported | tree-sitter | **narsil-mcp** | -| **Swift** | Not supported | tree-sitter | **narsil-mcp** | -| **Scala** | Not supported | tree-sitter | **narsil-mcp** | -| **Haskell** | Not supported | tree-sitter | **narsil-mcp** | -| **Elixir/Erlang** | Not supported | tree-sitter | **narsil-mcp** | -| **Dart** | Not supported | tree-sitter | **narsil-mcp** | -| **Zig** | Not supported | tree-sitter | **narsil-mcp** | -| **Lua, Julia, R, Perl, Clojure, Elm, Fortran, PowerShell, Nix, Groovy, Bash, Verilog/SystemVerilog** | Not supported | tree-sitter (14 additional languages) | **narsil-mcp** | -| **Language count** | 11 source languages | 32 source languages | **narsil-mcp** (32 vs 11) | -| **Adding a new language** | 1 registry entry + 1 JS extractor (<100 lines, 2 files) | Rust extractor module + tree-sitter grammar integration | **Codegraph** — dramatically lower barrier to contribution | -| **Incremental parsing** | File-level MD5 hash tracking in SQLite — persists across restarts | Merkle-tree file hashing in memory — lost on restart unless `--persist` | **Codegraph** — persistent by default vs. opt-in persistence | -| **Type inference** | Not available | Python, JavaScript, TypeScript (basic inference from assignments and returns) | **narsil-mcp** | - -**Summary:** narsil-mcp supports 3x more languages (32 vs 11) and adds type inference for dynamic languages. Codegraph is easier to extend (JS extractors vs. Rust modules) and has persistent incremental parsing by default. For codegraph's core audience (JS/TS/Python/Go web developers), both tools cover the essential languages. narsil-mcp's long tail (Fortran, Verilog, Elm, etc.) serves niche use cases. +| **Terraform/HCL** | tree-sitter | Not listed | **Codegraph** | +| **Kotlin** | Not supported | tree-sitter | **Narsil** | +| **Swift** | Not supported | tree-sitter | **Narsil** | +| **Scala** | Not supported | tree-sitter | **Narsil** | +| **Lua** | Not supported | tree-sitter | **Narsil** | +| **Haskell** | Not supported | tree-sitter | **Narsil** | +| **Elixir/Erlang** | Not supported | tree-sitter | **Narsil** | +| **Dart** | Not supported | tree-sitter | **Narsil** | +| **Julia/R/Perl** | Not supported | tree-sitter | **Narsil** | +| **Zig** | Not supported | tree-sitter | **Narsil** | +| **Verilog/SystemVerilog** | Not supported | tree-sitter | **Narsil** | +| **Fortran/PowerShell/Nix** | Not supported | tree-sitter | **Narsil** | +| **Bash** | Not supported | tree-sitter | **Narsil** | +| **Language count** | 11 | 32 | **Narsil** (3x more languages) | +| **Adding a new language** | 1 registry entry + 1 JS extractor (<100 lines, 2 files) | Rust code + recompile binary | **Codegraph** — dramatically lower barrier for contributors | +| **Incremental parsing** | File-level hash tracking — only changed files re-parsed | Full re-index (fast but complete) | **Codegraph** — orders of magnitude faster for single-file changes | +| **Callback pattern extraction** | Commander `.command().action()`, Express routes, event handlers | Not documented | **Codegraph** — framework-aware symbol extraction | + +**Summary:** Narsil covers 3x more languages (32 vs 11) using the same parser technology (tree-sitter). Codegraph has better incremental parsing, easier extensibility, and unique framework callback extraction. For codegraph's target users (JS/TS/Python/Go developers), codegraph's coverage is sufficient. Narsil's breadth matters for polyglot enterprises. --- ### B. Graph Model & Analysis Depth -| Feature | Codegraph | narsil-mcp | Best Approach | +| Feature | Codegraph | Narsil-MCP | Best Approach | |---------|-----------|------------|---------------| -| **Graph type** | Structural dependency graph (symbols + edges in SQLite) | RDF knowledge graph (Oxigraph) + in-memory symbol/call maps | **Codegraph** for queryability and persistence; **narsil-mcp** for semantic web interop | -| **Storage engine** | SQLite (always persistent, portable, universally readable) | In-memory DashMap + optional Oxigraph + optional Tantivy index | **Codegraph** — SQLite is a proven, inspectable, portable format | -| **Persistence model** | Always persistent (SQLite file) | In-memory by default; `--persist` for disk; lost on restart without it | **Codegraph** — persistence shouldn't be opt-in for a "graph" tool | -| **Node types** | 10 kinds: `function`, `method`, `class`, `interface`, `type`, `struct`, `enum`, `trait`, `record`, `module` | Language-specific symbols (functions, classes, structs, traits, modules, etc.) — count varies by language | **Tie** — similar symbol extraction granularity | -| **Edge types** | `calls`, `imports` — both with confidence scores (0.0-1.0) | `calls`, `imports` — binary (present/absent), no confidence scoring | **Codegraph** — scored edges vs. binary edges | -| **Import resolution** | 6-level priority system with confidence scoring (import-aware → same-file → directory → parent → global → method hierarchy) | Basic import graph extraction from tree-sitter AST | **Codegraph** — sophisticated multi-level resolution vs. AST-level extraction | -| **Call graph** | Import-aware resolution with qualified call filtering and confidence scoring | Call graph analysis with `--call-graph` flag (callers, callees, call paths, hotspots) | **Codegraph** for precision (confidence scoring); **narsil-mcp** for completeness (dedicated call-graph mode) | -| **Control flow graph** | Not available | CFG extraction with `get_control_flow` tool | **narsil-mcp** | -| **Data flow analysis** | Not available | Reaching definitions, dead stores, uninitialized variables via `get_data_flow` tools | **narsil-mcp** | -| **Taint analysis** | Not available | Source-to-sink taint tracking (SQL injection, XSS, command injection, path traversal) | **narsil-mcp** | -| **Dead code detection** | `roles --role dead` — unreferenced non-exported symbols | `find_dead_code` via control flow analysis | **Codegraph** for structural dead code; **narsil-mcp** for unreachable-code-path detection | -| **Complexity metrics** | Cognitive, cyclomatic, Halstead, MI, nesting depth per function | `get_complexity` (cyclomatic only, requires `--call-graph`) | **Codegraph** — 5 metrics vs. 1, always available vs. flag-gated | -| **Node role classification** | Auto-tags every symbol: `entry`/`core`/`utility`/`adapter`/`dead`/`leaf` based on fan-in/fan-out | Not available | **Codegraph** | +| **Graph type** | Structural dependency graph (symbols + edges) in SQLite | In-memory symbol/file caches (DashMap) + optional RDF knowledge graph | **Codegraph** for persistence; **Narsil** for RDF expressiveness | +| **Node types** | 10 kinds: `function`, `method`, `class`, `interface`, `type`, `struct`, `enum`, `trait`, `record`, `module` | Functions, classes, methods, variables, imports, exports + more | **Narsil** — more granular | +| **Edge types** | `calls`, `imports` (with confidence scores) | Calls, imports, data flow, control flow, type relationships | **Narsil** — fundamentally more edge types | +| **Call graph** | Import-aware resolution with 6-level confidence scoring, qualified call filtering | `get_call_graph`, `get_callers`, `get_callees`, `find_call_path` | **Codegraph** for precision (confidence scoring); **Narsil** for completeness | +| **Control flow graph** | Not available | `get_control_flow` — basic blocks + branch conditions | **Narsil** | +| **Data flow analysis** | `flows_to`/`returns`/`mutates` edges (BACKLOG ID 14, recently shipped) | `get_data_flow`, `get_reaching_definitions`, `find_uninitialized`, `find_dead_stores` | **Narsil** — more mature with 4 dedicated tools | +| **Type inference** | Not available | `infer_types`, `check_type_errors` for Python/JS/TS | **Narsil** | +| **Dead code detection** | `roles --role dead` — unreferenced non-exported symbols | `find_dead_code` — unreachable code paths via CFG | **Both** — complementary approaches (structural vs. control-flow) | +| **Complexity metrics** | Cognitive, cyclomatic, Halstead, MI, nesting depth per function | Cyclomatic complexity only | **Codegraph** — 5 metrics vs 1 | +| **Node role classification** | Auto-tags: `entry`/`core`/`utility`/`adapter`/`dead`/`leaf` | Not available | **Codegraph** | | **Community detection** | Louvain algorithm with drift analysis | Not available | **Codegraph** | -| **Impact analysis** | `fn-impact` (function-level), `diff-impact` (git-aware), `impact` (file-level) — all with transitive closure | Not available as a dedicated capability | **Codegraph** — first-class impact analysis is a major differentiator | -| **Shortest path** | `path ` — BFS between any two symbols | `find_call_path` — path between functions in call graph | **Tie** — similar capability | -| **SPARQL queries** | Not available | Full SPARQL query support over RDF graph (requires `--graph` feature flag) | **narsil-mcp** — powerful for semantic web integration | -| **Code Context Graph (CCG)** | Not available | Four-layer CCG standard with manifest, architecture, index, and full detail layers | **narsil-mcp** — novel approach to publishing code intelligence | +| **Impact analysis** | `fn-impact`, `diff-impact` (git-aware), `impact` (file-level) | Not purpose-built | **Codegraph** — first-class impact commands | +| **Shortest path** | `path ` — BFS between symbols | `find_call_path` — between functions | **Tie** | +| **SPARQL / Knowledge graph** | Not available | RDF graph via Oxigraph, SPARQL queries, predefined templates | **Narsil** — unique capability | +| **Code Context Graph (CCG)** | Not available | 4-layer hierarchical context (L0-L3) with JSON-LD/N-Quads export | **Narsil** — unique capability | -**Summary:** Codegraph's graph is deeper where it matters for developers: confidence-scored edges, multi-level import resolution, role classification, community detection, and purpose-built impact analysis. narsil-mcp goes wider: CFG, DFG, taint analysis, SPARQL, and CCG. Codegraph's SQLite persistence is a fundamental advantage — narsil-mcp's in-memory default means the "graph" evaporates on restart. +**Summary:** Narsil has broader analysis (CFG, dataflow, type inference, SPARQL, CCG). Codegraph is deeper on developer-facing metrics (5 complexity metrics, node roles, community detection, Louvain drift) and has unique impact analysis commands. Narsil's knowledge graph and CCG layering are genuinely novel features with no codegraph equivalent. --- -### C. Query Language & Interface +### C. Search & Retrieval -| Feature | Codegraph | narsil-mcp | Best Approach | +| Feature | Codegraph | Narsil-MCP | Best Approach | |---------|-----------|------------|---------------| -| **Primary interface** | CLI (35+ commands) + MCP (18 tools) + JS API | MCP only (90 tools) | **Codegraph** — three interfaces vs. one | -| **Standalone CLI** | Yes — full-featured CLI with `--help`, flags, pipe-friendly output | No — MCP-only, requires an MCP client to use | **Codegraph** — usable without any AI agent | -| **MCP tool count** | 18 purpose-built tools | 90 tools (26-75 active depending on preset) | **narsil-mcp** for breadth; **Codegraph** for token efficiency | -| **Token overhead** | 18 tools ≈ ~3,600 tokens for tool schemas | 90 tools ≈ ~12,000 tokens (full preset). Acknowledged problem — Forgemax gateway created to mitigate | **Codegraph** — 3.3x less token overhead. narsil-mcp's own solution (Forgemax) validates the problem | -| **Compound commands** | `context` (source + deps + callers + tests in 1 call), `explain` (structural summary), `audit` (explain + impact + health) | No compound tools — each tool returns one thing | **Codegraph** — compound commands reduce agent round-trips by 50-80% | -| **Preset system** | Not needed (18 tools is manageable) | `minimal` (26 tools), `balanced` (51), `full` (75+), `security-focused` — category-level enable/disable | **narsil-mcp** — good solution to the breadth problem, but the problem exists because of the breadth | -| **Tool filtering** | `buildToolList(multiRepo)` — single-repo vs. multi-repo | Per-category enable/disable, individual tool overrides, `max_tool_count` | **narsil-mcp** for granularity; **Codegraph** for simplicity | -| **JSON output** | `--json` flag on every CLI command | MCP responses are always structured JSON | **Tie** | -| **Programmatic API** | Full JS API: `import { buildGraph, queryNameData } from '@optave/codegraph'` | No library API — MCP-only | **Codegraph** — embeddable in VS Code extensions, CI pipelines, custom tools | -| **Batch queries** | `batch` command for multi-target dispatch | Not available as a single call | **Codegraph** | -| **SPARQL query language** | Not available | Full SPARQL over RDF graph | **narsil-mcp** — expressive for semantic queries | -| **Visualization** | DOT, Mermaid, JSON export | Embedded web frontend with interactive graph views (call, import, symbol, CFG) — requires `--features frontend` + `--http` | **narsil-mcp** for interactive visualization; **Codegraph** for text-based export | - -**Summary:** Codegraph serves three audiences (CLI users, MCP agents, API consumers). narsil-mcp serves one (MCP agents) but with 5x more tools. The 90-tool overhead is significant enough that narsil-mcp's creator built a separate project (Forgemax) to work around it. Codegraph's compound commands achieve more with fewer round-trips. +| **Keyword search** | BM25 via SQLite FTS5 | BM25 via Tantivy | **Tie** — different engines, same algorithm | +| **Semantic search** | HuggingFace Transformers (local, ~500 MB model) | TF-IDF (local) or neural (Voyage AI/OpenAI/ONNX) | **Narsil** — more backend choices | +| **Hybrid search** | BM25 + semantic with Reciprocal Rank Fusion | BM25 + TF-IDF hybrid | **Codegraph** — RRF fusion with full embeddings is higher quality | +| **Code similarity** | Not available | `find_similar_code`, `find_similar_to_symbol` | **Narsil** | +| **Semantic clone detection** | Not available | `find_semantic_clones` (Type-3/4 clones) | **Narsil** | +| **AST-aware chunking** | Not available | `get_chunks`, `get_chunk_stats` — respects AST boundaries | **Narsil** | +| **Symbol search** | `where` with name, kind, file, role filters | `find_symbols`, `workspace_symbol_search`, `find_references`, `find_symbol_usages` | **Narsil** — more search modes | +| **Export map** | `list-functions` with filters | `get_export_map` — all exported symbols per module | **Tie** — different interfaces, similar data | +| **Search latency** | Depends on FTS5/embedding model | <1μs exact, 16μs fuzzy, 80μs BM25, 130μs TF-IDF, 151μs hybrid | **Narsil** — published sub-millisecond benchmarks | + +**Summary:** Narsil has more search tools (similarity, clone detection, AST chunking) and more embedding backends. Codegraph has higher-quality hybrid search (RRF with full transformer embeddings vs. TF-IDF). For AI agent context preparation, narsil's AST-aware chunking is a notable gap. --- -### D. Performance & Resource Usage +### D. Security Analysis -| Feature | Codegraph | narsil-mcp | Best Approach | +| Feature | Codegraph | Narsil-MCP | Best Approach | |---------|-----------|------------|---------------| -| **Cold index (small project, ~50 files)** | <2 seconds | ~220ms (self-benchmark: 53 files in 220ms) | **narsil-mcp** — pure Rust is faster for cold indexing | -| **Cold index (medium project, ~3,000 files)** | 5-15 seconds | ~2.1 seconds (rust-analyzer: 2,847 files in 2.1s) | **narsil-mcp** — native Rust advantage | -| **Cold index (large project, ~80,000 files)** | 30-120 seconds (native Rust engine) | ~45 seconds (Linux kernel: 78K files in 45s) | **narsil-mcp** — but both are fast enough for practical use | -| **Incremental rebuild (1 file changed)** | <500ms (persistent — survives restarts) | Fast within session; full re-index on restart without `--persist` | **Codegraph** — persistent incremental is what matters for "always current" | -| **Memory usage (small project)** | <100 MB | ~50 MB (self-benchmark) | **narsil-mcp** — leaner for small projects | -| **Memory usage (large project)** | 300 MB - 1 GB | ~2.1 GB (Linux kernel benchmark) | **Codegraph** — SQLite offloads to disk; narsil-mcp holds everything in memory | -| **Startup time** | <100ms (Node.js) | Not benchmarked (Rust binary — likely <50ms) | **Tie** — both fast | -| **Parse throughput** | Not benchmarked at this granularity | 1.98 GiB/s (278 KB Rust file in 131μs) | **narsil-mcp** — impressive raw throughput | -| **Search latency (exact match)** | SQL query (<1ms typical) | 483 nanoseconds (in-memory) | **narsil-mcp** — in-memory wins on raw latency | -| **Search latency (fuzzy)** | SQL LIKE queries | 16.5μs fuzzy, 80μs BM25 full-text, 151μs hybrid | **narsil-mcp** — Tantivy is optimized for search | -| **Storage format** | SQLite file (compact, portable, inspectable with standard tools) | In-memory data structures + optional Tantivy index + optional Oxigraph store | **Codegraph** — universally readable format vs. opaque in-memory state | -| **Disk usage** | <10 MB for medium projects | Minimal (in-memory by default); Tantivy/Oxigraph indexes when persisted | **Tie** — both lightweight on disk | -| **Watch mode** | Built-in `watch` command for live incremental rebuilds | `--watch` flag for auto-reindex on file changes | **Tie** — both support it | -| **Background indexing** | Not available (fast enough to block) | MCP server starts before indexing completes; tools available progressively | **narsil-mcp** — useful for very large repos | - -**Summary:** narsil-mcp is faster at cold indexing (pure Rust advantage) and raw search (in-memory Tantivy). Codegraph wins on what matters for iterative development: persistent incremental rebuilds that survive restarts. A tool that's 10x faster at cold indexing but re-indexes from scratch on every restart is slower in practice than one that rebuilds incrementally from a persistent store. +| **Taint analysis** | Not available | `trace_taint`, `get_taint_sources`, `get_typed_taint_flow` | **Narsil** | +| **Vulnerability scanning** | Not available | `scan_security` with 147 built-in YAML rules | **Narsil** | +| **OWASP Top 10** | Not available | `check_owasp_top10` — dedicated compliance check | **Narsil** | +| **CWE Top 25** | Not available | `check_cwe_top25` — dedicated compliance check | **Narsil** | +| **Secret scanning** | Not available | Rules in `secrets.yaml` | **Narsil** | +| **SBOM generation** | Not available | `generate_sbom` — Software Bill of Materials | **Narsil** | +| **License compliance** | Not available | `check_licenses` | **Narsil** | +| **Dependency vulnerabilities** | Not available | `check_dependencies` — CVE checking | **Narsil** | +| **Vulnerability explanation** | Not available | `explain_vulnerability`, `suggest_fix` | **Narsil** | +| **Crypto misuse detection** | Not available | Rules in `crypto.yaml` | **Narsil** | +| **IaC security** | Not available | Rules in `iac.yaml` | **Narsil** | +| **Language-specific rules** | Not available | Rust, Elixir, Go, Java, C#, Kotlin, Bash rule files | **Narsil** | + +**Summary:** Narsil dominates security analysis completely with 147 rules across 12+ rule files. Codegraph has zero security features today — by design (FOUNDATION.md P8). OWASP pattern detection is on the roadmap as lightweight AST-based checks (BACKLOG ID 7), not taint analysis. --- -### E. Installation & Deployment +### E. Query Language & Interface -| Feature | Codegraph | narsil-mcp | Best Approach | +| Feature | Codegraph | Narsil-MCP | Best Approach | |---------|-----------|------------|---------------| -| **Primary install** | `npm install @optave/codegraph` | 8 methods: Homebrew, Scoop, Cargo, npm, Nix, AUR, shell script, source | **narsil-mcp** for platform coverage; **Codegraph** for simplicity | -| **Runtime dependency** | Node.js >= 20 | None (static Rust binary) | **narsil-mcp** — zero runtime dependencies | -| **npm install** | Yes (first-party) | Yes (`npm install -g narsil-mcp`) | **Tie** | -| **Platform binaries** | Auto-resolved per platform (`@optave/codegraph-{platform}-{arch}`) | Pre-built for major platforms via GitHub releases + package managers | **Tie** | -| **Binary size** | ~50 MB (with WASM grammars) | ~30-50 MB (varies by feature flags) | **Tie** | -| **Feature flags** | None — all features included | 6 compile-time flags (`native`, `graph`, `frontend`, `neural`, `neural-onnx`, `wasm`) + 6 runtime flags (`--git`, `--graph`, `--neural`, `--call-graph`, `--lsp`, `--remote`) | **Codegraph** — everything works out of the box vs. feature flag maze | -| **Configuration** | `.codegraphrc.json` + env vars + `apiKeyCommand` | `.narsil.yaml` + `~/.config/narsil-mcp/config.yaml` + env vars + CLI flags | **Tie** — similar layered config | -| **Offline capability** | Full functionality offline | Core functionality offline; neural search requires API keys (unless ONNX build) | **Codegraph** — fully offline by default | -| **Docker** | Not needed | Not needed | **Tie** | -| **Browser WASM** | WASM grammars for parsing (not a full browser build) | Full browser-compatible WASM build (~3 MB) via npm `@narsil-mcp/wasm` | **narsil-mcp** — browser deployment is unique | - -**Summary:** narsil-mcp has more installation options and zero runtime dependencies (static Rust binary). Codegraph is simpler — no feature flags, no compile-time decisions, everything works on `npm install`. narsil-mcp's feature flag system means the "90 tools" headline requires specific build flags + runtime flags to achieve. +| **Primary interface** | Full CLI with 35+ commands + MCP server | MCP server (primary) + config management CLI | **Codegraph** — usable without MCP client | +| **Standalone CLI queries** | `where`, `fn`, `explain`, `context`, `deps`, `impact`, `map`, etc. | Not available — all queries via MCP tools | **Codegraph** — narsil requires an MCP client for any query | +| **MCP tools count** | 21 purpose-built tools | 90 tools across 14 categories | **Narsil** — 4x more tools | +| **Compound queries** | `context` (source + deps + callers + tests), `explain`, `audit` | No compound tools — each tool is atomic | **Codegraph** — purpose-built for agent token efficiency | +| **Batch queries** | `batch` command for multi-target dispatch | No batch mechanism | **Codegraph** | +| **JSON output** | `--json` flag on every command | MCP JSON responses | **Tie** | +| **NDJSON streaming** | `--ndjson` with `--limit`/`--offset` on ~14 commands | `--streaming` flag for large results | **Tie** | +| **Pagination** | Universal `limit`/`offset` on all 21 MCP tools with per-tool defaults | Not documented | **Codegraph** | +| **SPARQL queries** | Not available | `sparql_query`, predefined templates | **Narsil** — unique expressiveness | +| **Configuration presets** | Not available | Minimal (~26 tools), Balanced (~51), Full (75+), Security-focused | **Narsil** — manages token cost per preset | +| **Visualization** | DOT, Mermaid, JSON export | Built-in web UI (Cytoscape.js) with interactive graphs | **Narsil** — interactive browser visualization | +| **Programmatic API** | Full JS API: `import { buildGraph, queryNameData } from '@optave/codegraph'` | No library API | **Codegraph** — embeddable in JS/TS projects | + +**Summary:** Codegraph is more accessible (full CLI + API + MCP). Narsil has more MCP tools (90 vs 21) but no standalone query interface — completely dependent on MCP clients. Codegraph's compound commands (`context`, `explain`, `audit`) reduce agent round-trips; narsil requires multiple atomic tool calls for equivalent context. Narsil's configuration presets are a smart approach to managing MCP tool token costs. --- -### F. AI Agent & MCP Integration +### F. Performance & Resource Usage -| Feature | Codegraph | narsil-mcp | Best Approach | +| Feature | Codegraph | Narsil-MCP | Best Approach | |---------|-----------|------------|---------------| -| **MCP server** | First-party, 18 tools, single-repo default | First-party, 90 tools (26-75 active by preset) | **Codegraph** for efficiency; **narsil-mcp** for breadth | -| **Token overhead** | ~3,600 tokens (18 tools) | ~4,700-12,000 tokens (26-75 tools by preset) | **Codegraph** — 1.3-3.3x less overhead | -| **Token overhead mitigation** | Not needed | Forgemax gateway collapses 90 tools → 2 tools (~1,100 tokens) | **narsil-mcp** has the problem; Forgemax is an acknowledgment, not a solution | -| **Compound commands** | `context`, `explain`, `audit` — multi-faceted answers in 1 call | Each tool returns one thing — agents must orchestrate multiple calls | **Codegraph** — fewer round-trips, less agent complexity | -| **Single-repo isolation** | Default — `--multi-repo` opt-in | Multi-repo default — `list_repos` and `discover_repos` always available | **Codegraph** — security-conscious default | -| **Multi-repo support** | Registry-based, opt-in via `--multi-repo` or `--repos` | Built-in with `list_repos`, `discover_repos`, `add_remote_repo` | **narsil-mcp** for multi-repo out of the box; **Codegraph** for security | -| **Remote repository support** | Not available | `--remote` flag enables cloning and analyzing external repos | **narsil-mcp** — unique feature | -| **Structured JSON output** | Every command supports `--json` | All MCP responses are structured JSON | **Tie** | -| **Pagination** | Built-in pagination helpers with configurable limits | Not documented | **Codegraph** | -| **Semantic search** | `search` command with optional embeddings (user's LLM provider) | `semantic_search`, `neural_search`, `hybrid_search` with Voyage AI/OpenAI/ONNX backends | **narsil-mcp** for search variety; **Codegraph** for bring-your-own-provider | -| **AST-aware chunking** | Not available | `get_chunks` — AST-boundary-aware code chunking for embedding | **narsil-mcp** — useful for RAG pipelines | -| **Programmatic embedding** | Full JS API: `import { buildGraph } from '@optave/codegraph'` | No library API | **Codegraph** — embeddable in custom tooling | - -**Summary:** Codegraph is optimized for the AI agent interaction model: fewer tools, compound commands, less token overhead, security-conscious defaults. narsil-mcp offers more tools but at a significant token cost — a cost its creator acknowledged by building Forgemax. For token-constrained AI agents (which is all of them), codegraph's approach is more practical. +| **Cold build (small, ~50 files)** | <2 seconds | ~220ms | **Narsil** (faster cold start) | +| **Cold build (medium, ~3,000 files)** | 5-15 seconds | ~2 seconds (50K symbols) | **Narsil** (faster cold start) | +| **Incremental rebuild (1 file changed)** | <500ms | Full re-index | **Codegraph** (100-1,000x faster for incremental) | +| **Memory usage** | <100 MB typical (SQLite-backed) | In-memory — grows with codebase size | **Codegraph** — predictable, bounded by SQLite | +| **Persistence** | SQLite by default — always persisted | In-memory by default. `--persist` opt-in | **Codegraph** — survives restarts without flag | +| **Startup time** | <100ms (Node.js, reads existing DB) | Index from scratch unless persisted | **Codegraph** — always has a warm DB | +| **Storage format** | SQLite file (compact, portable, universally readable) | Custom binary format (Tantivy + DashMap serialization) | **Codegraph** — SQLite is universally inspectable | +| **Symbol lookup** | SQL query on indexed column | <1μs (DashMap in-memory) | **Narsil** — in-memory is faster for hot lookups | +| **Search latency** | FTS5/embedding dependent | 80μs BM25, 130μs TF-IDF | **Narsil** — published sub-ms benchmarks | +| **Binary size** | ~50 MB (with WASM grammars) | ~30 MB (native feature set) | **Narsil** (smaller) | +| **Watch mode** | Built-in `watch` command | `--watch` flag | **Tie** | +| **Commit hook viability** | Yes — <500ms incremental rebuilds | Possible but re-indexes fully | **Codegraph** — incremental makes hooks invisible | +| **CI pipeline viability** | `check --staged` returns exit code 0/1 | No CI-specific tooling | **Codegraph** | + +**Summary:** Narsil is faster for cold starts and hot lookups (pure Rust + in-memory). Codegraph is vastly faster for incremental workflows — the 1-file-changed scenario that defines developer loops, commit hooks, and agent iterations. Codegraph's SQLite persistence means no re-indexing on restart; narsil defaults to in-memory and loses state. --- -### G. Security Analysis +### G. Installation & Deployment -| Feature | Codegraph | narsil-mcp | Best Approach | +| Feature | Codegraph | Narsil-MCP | Best Approach | |---------|-----------|------------|---------------| -| **Taint analysis** | Not available | Source-to-sink tracking (SQL injection, XSS, command injection, path traversal) | **narsil-mcp** | -| **OWASP Top 10** | Not available | `check_owasp_top10` tool with detection rules | **narsil-mcp** | -| **CWE Top 25** | Not available | `check_cwe_top25` tool with detection rules | **narsil-mcp** | -| **Security rules engine** | Not available | 147 bundled rules with language-specific rule sets (Rust: 18, Elixir: 18, Go, Java, C#, Kotlin, Bash, IaC) | **narsil-mcp** | -| **Custom security rules** | Not available | `--ruleset` flag for loading custom rules | **narsil-mcp** | -| **Vulnerability explanation** | Not available | `explain_vulnerability` and `suggest_fix` tools | **narsil-mcp** | -| **SBOM generation** | Not available | CycloneDX, SPDX, JSON formats via `generate_sbom` | **narsil-mcp** | -| **Dependency vulnerability checking** | Not available | OSV database checking via `check_dependencies` | **narsil-mcp** | -| **License compliance** | Not available | `check_licenses` tool | **narsil-mcp** | -| **Secrets detection** | Not available | API keys, passwords, tokens in security rules | **narsil-mcp** | -| **Crypto weakness detection** | Not available | Weak algorithms, hardcoded keys detection | **narsil-mcp** | -| **Security summary** | Not available | `get_security_summary` — aggregated security posture | **narsil-mcp** | - -**Summary:** narsil-mcp dominates security analysis completely. Codegraph has no security features today. This is by design — FOUNDATION.md Principle 8 says "we are not a security tool." narsil-mcp's 147-rule engine with OWASP/CWE coverage is impressive, though the depth of its taint analysis (tree-sitter-based, no type system) should be evaluated against dedicated SAST tools. +| **Install method** | `npm install @optave/codegraph` | brew, scoop, cargo, npm, AUR, nix, install scripts | **Narsil** — more package managers | +| **Runtime dependency** | Node.js >= 20 | None (single binary) | **Narsil** — zero runtime deps | +| **Docker** | Not required | Not required | **Tie** | +| **Platform binaries** | npm auto-resolves `@optave/codegraph-{platform}-{arch}` | Prebuilt for macOS/Linux/Windows | **Tie** | +| **Browser build** | Not available | WASM package `@narsil-mcp/wasm` (~3 MB) | **Narsil** | +| **Configuration** | `.codegraphrc.json` + env vars + `apiKeyCommand` | `.narsil.yaml` + env vars + presets + interactive wizard | **Narsil** — more options including wizard | +| **Config management** | Manual file editing | `narsil-mcp config init/show/validate` | **Narsil** — built-in config tooling | +| **Editor integration** | Claude Code MCP config | Pre-built configs for Claude Code, Cursor, VS Code, Zed, JetBrains | **Narsil** — more pre-built editor configs | +| **Uninstall** | `npm uninstall` | Package manager dependent | **Tie** | + +**Summary:** Narsil is easier to install (single binary, more package managers, no Node.js required) and has better editor integration configs. Codegraph's npm-based install is simpler for Node.js developers but requires Node.js. Narsil's interactive config wizard and preset system lower the barrier to entry. --- -### H. Developer Productivity Features +### H. AI Agent & MCP Integration -| Feature | Codegraph | narsil-mcp | Best Approach | +| Feature | Codegraph | Narsil-MCP | Best Approach | |---------|-----------|------------|---------------| -| **Impact analysis (function-level)** | `fn-impact ` — transitive callers + downstream impact with scored edges | Not available | **Codegraph** | -| **Impact analysis (git-aware)** | `diff-impact --staged` / `diff-impact main` — shows what functions break from git changes | Not available | **Codegraph** | -| **CI gate** | `check --staged` — exit code 0/1 (cycles, complexity, blast radius, boundaries) | Not available (MCP-only, no CI interface) | **Codegraph** | -| **Manifesto rules engine** | `manifesto` — configurable warn/fail thresholds for code health | Not available | **Codegraph** | -| **Architecture boundaries** | `boundaries` — onion architecture preset, custom boundary rules | Not available | **Codegraph** | -| **Complexity metrics** | `complexity` — cognitive, cyclomatic, Halstead, MI, nesting depth per function | `get_complexity` — cyclomatic only (requires `--call-graph`) | **Codegraph** — 5 metrics vs. 1 | -| **Code health / structure** | `structure` — directory hierarchy with cohesion scores + per-file metrics | `get_project_structure` — file tree only | **Codegraph** — structural analysis vs. file listing | -| **Hotspot detection** | `hotspots` — files/dirs with extreme fan-in/fan-out/density | `get_function_hotspots` — most-called functions (requires `--call-graph`) | **Codegraph** — multi-dimensional hotspots vs. single-metric | -| **Co-change analysis** | `co-change` — git history analysis for files that change together | Not available | **Codegraph** | +| **MCP tools** | 21 purpose-built tools | 90 tools across 14 categories | **Narsil** (4x more tools) | +| **Token efficiency** | `context`/`explain`/`audit` compound commands reduce round-trips 50-80% | Atomic tools only. Forgemax integration collapses 90 → 2 tools (~1,000 vs ~12,000 tokens) | **Codegraph** natively; **Narsil** via Forgemax | +| **Tool token cost** | ~4,000 tokens for 21 tool definitions | ~12,000 tokens for full set. Presets: Minimal ~4,600, Balanced ~8,900 | **Codegraph** — lower base cost. Narsil presets help | +| **Pagination** | Universal `limit`/`offset` on all tools with per-tool defaults, hard cap 1,000 | `--streaming` for large results | **Codegraph** — structured pagination metadata | +| **Multi-repo support** | Registry-based, opt-in via `--multi-repo` or `--repos` | Multi-repo by default, `discover_repos` auto-detection | **Narsil** for convenience; **Codegraph** for security | +| **Single-repo isolation** | Default — tools have no `repo` property unless `--multi-repo` | Not default — multi-repo access is always available | **Codegraph** — security-conscious default | +| **Programmatic embedding** | Full JS API for VS Code extensions, CI pipelines, other MCP servers | No library API | **Codegraph** | +| **CCG context layers** | Not available | L0-L3 hierarchical context for progressive disclosure | **Narsil** — novel approach to context management | +| **Remote repo indexing** | Not available | `add_remote_repo` clones and indexes GitHub repos | **Narsil** | + +**Summary:** Narsil has 4x more MCP tools but higher token overhead. Codegraph's compound commands are more token-efficient per query. Narsil's CCG layering and configuration presets are innovative approaches to managing AI agent context budgets. Codegraph's programmatic API enables embedding scenarios narsil cannot serve. + +--- + +### I. Developer Productivity Features + +| Feature | Codegraph | Narsil-MCP | Best Approach | +|---------|-----------|------------|---------------| +| **Impact analysis (function-level)** | `fn-impact ` — transitive callers + downstream | Not purpose-built | **Codegraph** | +| **Impact analysis (git-aware)** | `diff-impact --staged` / `diff-impact main` | Not available | **Codegraph** | +| **CI gate** | `check --staged` — exit code 0/1 (cycles, complexity, blast radius, boundaries) | Not available | **Codegraph** | +| **Complexity metrics** | Cognitive, cyclomatic, Halstead, MI, nesting depth per function | Cyclomatic only (`get_complexity`) | **Codegraph** (5 metrics vs 1) | +| **Code health manifesto** | Configurable rule engine with warn/fail thresholds | Not available | **Codegraph** | +| **Structure analysis** | `structure` — directory hierarchy with cohesion scores | `get_project_structure` — directory tree only | **Codegraph** — includes cohesion metrics | +| **Hotspot detection** | `hotspots` — files/dirs with extreme fan-in/fan-out/density | `get_function_hotspots` — most-called/most-complex + git churn hotspots | **Tie** — different hotspot types | +| **Co-change analysis** | `co-change` — git history for files that change together | Not available | **Codegraph** | | **Branch comparison** | `branch-compare` — structural diff between branches | Not available | **Codegraph** | -| **Triage / risk ranking** | `triage` — ranked audit queue by composite risk score | Not available | **Codegraph** | -| **Audit command** | `audit ` — combined explain + impact + health in one call | Not available | **Codegraph** | +| **Triage/risk ranking** | `triage` — ranked audit queue by composite risk score | Not available | **Codegraph** | | **CODEOWNERS integration** | `owners` — maps functions to code owners | Not available | **Codegraph** | -| **Cycle detection** | `cycles` — circular dependency detection | `find_circular_imports` — import-level cycle detection | **Tie** — similar capability | -| **Git integration** | `diff-impact` (git-aware impact analysis), `co-change` (history analysis) | 9 git tools: blame, history, hotspots, contributors, diffs, symbol history (requires `--git`) | **narsil-mcp** for git data exposure; **Codegraph** for git-aware analysis | -| **Execution flow tracing** | `flow` — traces from entry points through callees to leaves | Not available | **Codegraph** | -| **Module overview** | `map` — high-level module map with most-connected nodes | Not available | **Codegraph** | -| **Export formats** | DOT, Mermaid, JSON | RDF/N-Quads, JSON-LD, CCG layers | **Codegraph** for developer formats; **narsil-mcp** for semantic web formats | - -**Summary:** Codegraph has 15+ purpose-built developer productivity commands that narsil-mcp lacks entirely. Impact analysis, CI gates, manifesto rules, architecture boundaries, co-change analysis, triage — these are codegraph's core value proposition. narsil-mcp exposes raw data (git blame, file history) but doesn't synthesize it into actionable intelligence. +| **Semantic search** | `search` — BM25 + semantic with RRF | `semantic_search`, `hybrid_search` | **Tie** | +| **Watch mode** | `watch` — live incremental rebuilds | `--watch` flag for auto-reindex | **Tie** | +| **Snapshot management** | `snapshot save/restore` — DB backup/restore | Not available | **Codegraph** | +| **Execution flow tracing** | `flow` — from entry points through callees | `get_control_flow` — within a function | **Codegraph** for cross-function; **Narsil** for intraprocedural | +| **Module overview** | `map` — high-level module map with most-connected nodes | Not purpose-built | **Codegraph** | +| **Cycle detection** | `cycles` — circular dependency detection | `find_circular_imports` — circular import chains | **Tie** | +| **Architecture boundaries** | Configurable rules with onion preset | Not available | **Codegraph** | +| **Node role classification** | `entry`/`core`/`utility`/`adapter`/`dead`/`leaf` per symbol | Not available | **Codegraph** | +| **Audit command** | `audit` — explain + impact + health in one call | Not available | **Codegraph** | +| **Git integration** | `diff-impact`, `co-change`, `branch-compare` | `get_blame`, `get_file_history`, `get_recent_changes`, `get_symbol_history`, `get_contributors`, `get_hotspots` | **Narsil** for git data breadth; **Codegraph** for git-aware analysis | +| **Export formats** | DOT, Mermaid, JSON | Cytoscape.js interactive UI, JSON-LD, N-Quads, RDF | **Narsil** — more formats + interactive visualization | + +**Summary:** Codegraph has 15+ purpose-built developer productivity commands that narsil lacks (impact analysis, manifesto, triage, boundaries, co-change, branch-compare, audit, structure, CODEOWNERS). Narsil has richer git integration tools (blame, contributors, symbol history) and interactive visualization. For the "what breaks if I change this?" workflow, codegraph is the clear choice. --- -### I. Ecosystem & Community +### J. Ecosystem & Community -| Feature | Codegraph | narsil-mcp | Best Approach | +| Feature | Codegraph | Narsil-MCP | Best Approach | |---------|-----------|------------|---------------| -| **GitHub stars** | New project (growing) | ~120 | **narsil-mcp** — slightly more visible | -| **Contributors** | Small team | 3 (postrv, ask4fusora, Cognitohazard) | **Tie** — both small teams | -| **Age** | 2026 | December 2024 (~15 months) | **Tie** — both young | -| **Release cadence** | As needed | 10+ releases in 2 months (v1.1.4 → v1.6.1) | **narsil-mcp** — rapid iteration | -| **Tests** | vitest suite with integration, parser, and search tests | 1,763+ passing tests | **narsil-mcp** — impressive test count for a young project | -| **Documentation** | CLAUDE.md + CLI `--help` + programmatic API docs | README + inline comments. No dedicated docs site | **Codegraph** — more structured, though both could improve | -| **Companion projects** | None | Forgemax (MCP gateway), CCG standard/registry | **narsil-mcp** — broader ecosystem vision | -| **Language** | JavaScript (ES modules) + optional Rust native addon | Pure Rust (56K SLoC) | **narsil-mcp** — type-safe, memory-safe codebase | -| **License** | Apache-2.0 | Apache-2.0 / MIT (dual) | **narsil-mcp** — dual license is more permissive | -| **npm package** | `@optave/codegraph` | `narsil-mcp` + `@narsil-mcp/wasm` | **Tie** | -| **Commercial backing** | Optave AI Solutions Inc. | None (solo project) | **Codegraph** — company backing provides stability | - -**Summary:** Both are young, small-team projects. narsil-mcp iterates rapidly (10+ releases in 2 months) with impressive test coverage. Codegraph has commercial backing (Optave). narsil-mcp's companion projects (Forgemax, CCG standard) show ambition, but the 3-contributor base is a bus-factor risk. +| **GitHub stars** | Growing | 120 | **Narsil** (slightly) | +| **License** | Apache-2.0 | Apache-2.0 OR MIT (dual) | **Narsil** — dual license is more permissive | +| **Release cadence** | As needed | Regular (v1.6.1 latest, Feb 2026) | **Tie** | +| **Test suite** | Vitest | 1,763+ tests + criterion benchmarks | **Narsil** — more tests, published benchmarks | +| **Documentation** | CLAUDE.md + CLI `--help` | narsilmcp.com + README + editor configs | **Narsil** — dedicated docs site | +| **Commercial backing** | Optave AI Solutions Inc. | Open-core model (narsil-cloud private repo) | **Both** — different business models | +| **Integration ecosystem** | MCP + programmatic API | Forgemax, Ralph, Claude Code plugin | **Narsil** — more third-party integrations | +| **Browser story** | Not available | WASM package for browser-based analysis | **Narsil** | +| **CCG standard** | Not available | Code Context Graph — a proposed standard for AI code context | **Narsil** — potential industry standard | + +**Summary:** Narsil has a more developed ecosystem (docs site, editor configs, third-party integrations, browser build, CCG standard). Both are commercially backed. Narsil's open-core model (commercial cloud features in private repo) is a viable business approach. --- @@ -262,93 +277,139 @@ Codegraph's foundation document defines the problem as: *"Fast local analysis wi ### Choose Codegraph when: -1. **You need the graph to survive restarts** — codegraph's SQLite persistence is always-on. narsil-mcp loses its index on restart unless you opt into `--persist`. -2. **You're building CI/CD pipelines** — `check --staged` returns exit code 0/1 in seconds. narsil-mcp has no CLI, no CI interface, no exit codes. -3. **Token overhead matters** — 18 tools (~3,600 tokens) vs. 26-75 tools (~4,700-12,000 tokens). In agent loops where every token counts, codegraph is 1.3-3.3x more efficient. -4. **You need impact analysis** — "what breaks if I change this?" is codegraph's core question. `fn-impact`, `diff-impact`, `audit` — none of these exist in narsil-mcp. -5. **You want scored, confidence-ranked results** — every edge has a 0.0-1.0 confidence score. narsil-mcp returns binary found/not-found. -6. **You need compound answers** — `context` returns source + deps + callers + tests in one call. narsil-mcp requires 4+ separate tool invocations. -7. **You want to embed in other tools** — codegraph has a full JS API for VS Code extensions, CI pipelines, and custom tooling. narsil-mcp is MCP-only. -8. **You need code health governance** — manifesto rules, architecture boundaries, complexity thresholds, triage queues. narsil-mcp has none of this. - -### Choose narsil-mcp when: - -1. **You need security scanning** — taint analysis, OWASP Top 10, CWE Top 25, SBOM generation, license compliance. Codegraph has zero security features. -2. **You work with many languages** — 32 languages vs. 11. If your codebase includes Kotlin, Swift, Scala, Haskell, Elixir, Dart, or Zig, narsil-mcp covers them. -3. **You need CFG/DFG analysis** — control flow graphs, data flow analysis, reaching definitions, dead stores. Codegraph's structural graph doesn't capture these. -4. **You want semantic search with neural embeddings** — narsil-mcp has Voyage AI, OpenAI, and local ONNX backends with BM25 hybrid search. Codegraph's semantic search is simpler. -5. **You need SPARQL/RDF integration** — for knowledge graph queries, semantic web interop, or CCG standard compliance. -6. **You want browser-based code intelligence** — narsil-mcp has a 3 MB WASM build and an embedded web frontend with interactive graph visualization. -7. **You need type inference** — basic type inference for Python, JavaScript, and TypeScript adds value for dynamic language analysis. -8. **You want maximum tool variety** — 90 tools covering search, navigation, security, git, LSP, remote repos, visualization, and more. +1. **You need the graph to stay current in tight feedback loops** — commit hooks, watch mode, AI agent loops. Codegraph's incremental <500ms rebuilds vs. narsil's full re-index. +2. **You need a standalone CLI** — `codegraph where`, `codegraph explain`, `codegraph context` work without any MCP client. Narsil requires an MCP client for all queries. +3. **You need impact analysis** — `diff-impact --staged` tells you what breaks before committing. Narsil has no equivalent. +4. **You need CI gates** — `check --staged` returns exit 0/1 for cycles, complexity, blast radius, boundaries. Narsil has no CI tooling. +5. **You need developer productivity features** — complexity metrics (5 types), manifesto rules, architecture boundaries, co-change analysis, triage. These don't exist in narsil. +6. **You want confidence-scored results** — every call edge has a 0.0-1.0 confidence score. Narsil's edges are unscored. +7. **You're embedding in a JS/TS project** — full programmatic API. Narsil has no library API. +8. **You want single-repo security by default** — codegraph's MCP exposes only one repo unless you opt in to multi-repo. + +### Choose Narsil-MCP when: + +1. **You need security analysis** — taint tracking, OWASP/CWE compliance, SBOM, license scanning, 147 built-in rules. Codegraph has zero security features. +2. **You need broad language coverage** — 32 languages vs 11. Critical for polyglot enterprises. +3. **You need control flow or data flow analysis** — CFG, reaching definitions, dead stores, uninitialized variables. Codegraph's dataflow is nascent. +4. **You need type inference** — infer types for untyped Python/JS/TS code. Codegraph has no type analysis. +5. **You want interactive visualization** — built-in Cytoscape.js web UI with drill-down, overlays, and clustering. Codegraph exports static DOT/Mermaid. +6. **You need a single binary with no runtime deps** — `brew install narsil-mcp` and done. No Node.js required. +7. **You're building an MCP-first agent pipeline** — 90 tools cover nearly every code analysis need. One server, one config. +8. **You want a browser-based analysis tool** — narsil's WASM build runs analysis in the browser. +9. **You need SPARQL/RDF knowledge graph** — unique capability for semantic code querying. +10. **You need code similarity / clone detection** — `find_similar_code`, `find_semantic_clones`. Codegraph has no similarity tools. ### Use both together when: -- **Security + productivity pipeline**: Codegraph for structural intelligence in agent loops (impact analysis, CI gates, code health), narsil-mcp for security scanning (taint analysis, OWASP/CWE checks, SBOM). -- **Multi-language monorepo**: Codegraph for core languages (JS/TS/Python/Go) with deep graph intelligence, narsil-mcp for additional languages (Kotlin, Swift, Scala) with broad coverage. -- **Agent + CI workflow**: narsil-mcp for real-time agent exploration (90 tools via MCP), codegraph for CI gates and governance (`check --staged`, `manifesto`, `boundaries`). +- **CI pipeline**: Codegraph for fast structural checks on every commit (`check --staged`), narsil for periodic security scans. +- **AI agent workflow**: Codegraph's compound commands for fast structural context; narsil's security tools for vulnerability assessment. +- **Pre-commit + periodic audit**: Codegraph in commit hooks (fast, incremental), narsil for weekly security/compliance reports. --- -## Gap Analysis: What Codegraph Could Learn from narsil-mcp +## Key Metrics Summary -### Worth adopting (adapted to codegraph's model) +| Metric | Codegraph | Narsil-MCP | Winner | +|--------|-----------|------------|--------| +| Incremental rebuild speed | <500ms | N/A (full re-index) | Codegraph | +| Cold build speed | Seconds | Sub-seconds to seconds | Narsil | +| Memory usage | <100 MB typical | Grows with codebase (in-memory) | Codegraph | +| Install complexity | `npm install` (requires Node.js) | Single binary (brew/scoop/cargo) | Narsil | +| Analysis depth (structural) | High (impact, complexity, roles) | High (CFG, DFG, type inference) | Tie | +| Analysis depth (security) | None | Best in class (147 rules, taint) | Narsil | +| AI agent integration | 21-tool MCP + compound commands | 90-tool MCP + presets + CCG | Narsil for breadth; Codegraph for efficiency | +| Developer productivity | 15+ purpose-built commands | Git tools only | Codegraph | +| Language support | 11 | 32 | Narsil | +| Standalone CLI | Full CLI experience | Config/tools management only | Codegraph | +| Programmatic API | Full JS API | None | Codegraph | +| Community & maturity | New | Newer (Dec 2025), growing fast | Tie | +| CI/CD readiness | Yes (`check --staged`) | No CI tooling | Codegraph | +| Visualization | DOT/Mermaid/JSON export | Interactive Cytoscape.js web UI | Narsil | +| Search backends | FTS5 + HuggingFace local | Tantivy + TF-IDF + Voyage/OpenAI/ONNX | Narsil | -| narsil-mcp Feature | Adaptation for Codegraph | FOUNDATION.md Alignment | Effort | Priority | -|---------------------|--------------------------|------------------------|--------|----------| -| **More languages** | Add Kotlin, Swift, Scala, Dart via tree-sitter — same registry pattern. Prioritize by user demand | Principle 6 (one registry) — perfect fit, each language is 1 entry + 1 extractor | Low per language | High — closes the gap from 11 to 15+ without changing architecture | -| **Preset/filtering system** | Allow `.codegraphrc.json` to specify which MCP tools to expose per project. Useful as tool count grows | Principle 7 (security-conscious defaults) — fine-grained control | Low | Medium — not urgent at 18 tools, but good to have before reaching 30+ | -| **BM25 full-text search** | Add Tantivy-like full-text search alongside semantic search for zero-config code search without embeddings | Principle 4 (zero-cost core) — no API keys needed | Medium | Medium — improves search without requiring LLM setup | -| **AST-aware chunking** | Export AST-boundary-aware code chunks for RAG pipelines via programmatic API | Principle 5 (embeddable API) — enhances API for downstream consumers | Medium | Medium — useful for RAG integration | -| **Background indexing** | Allow MCP server to start before indexing completes, exposing tools progressively | Principle 1 (always current) — reduces perceived build time for large repos | Medium | Low — codegraph's builds are fast enough that this rarely matters | -| **Interactive visualization** | Browser-based graph explorer (call graph, import graph, community map) via `export --format html` | Principle 5 (functional CLI) — extends output formats | High | Medium — already on roadmap | +**Final score against FOUNDATION.md principles: Codegraph 4, Narsil 0, Tie 4.** +Narsil competes much more closely on codegraph's principles than Joern does. The gap is in incremental rebuilds (P1), confidence scoring (P3), CLI + API (P5), and single-repo isolation (P7). -### Not worth adopting (violates FOUNDATION.md or marginal value) +--- -| narsil-mcp Feature | Why Not | -|---------------------|---------| -| **90 MCP tools** | Breadth-over-depth approach creates token overhead that narsil-mcp itself had to solve with Forgemax. Codegraph's compound commands are the right answer — more value per tool, not more tools | -| **RDF/SPARQL/CCG** | Solves a different problem (semantic web interop, not developer productivity). Would add complexity without serving codegraph's target users. If CCG gains adoption, implement as an export format, not a core graph model | -| **Taint analysis** | Requires CFG/DFG infrastructure we don't have. Adding it would slow builds (violating Principle 1) and expand scope (violating Principle 8). Dedicated SAST tools do this better | -| **In-memory graph model** | narsil-mcp's in-memory approach is faster for cold indexing but fundamentally incompatible with Principle 1 (always current). SQLite persistence is non-negotiable | -| **Type inference** | Tree-sitter-based type inference for dynamic languages is inherently limited. Better to invest in confidence scoring and LLM-enhanced analysis (Principle 4) than build a partial type system | -| **Forgemax gateway** | Solves a problem we don't have. 18 tools at ~3,600 tokens doesn't need a gateway. If we grow beyond 30 tools, presets are the simpler answer | -| **Feature flags (compile-time)** | Codegraph's "everything works out of the box" is a feature. Requiring users to choose build variants (graph? neural? frontend?) adds friction that violates Principle 2 (universal reach) | -| **MCP-only interface** | Limiting. Codegraph's three-interface approach (CLI + MCP + API) serves developers, agents, and CI pipelines. Removing the CLI would lose two audiences | +## Narsil-Inspired Feature Candidates ---- +Features extracted from **all comparison sections** above, assessed using the [BACKLOG.md](../../docs/roadmap/BACKLOG.md) tier and grading system. See the [Scoring Guide](../../docs/roadmap/BACKLOG.md#scoring-guide) for column definitions. -## Competitive Positioning Statement +### Tier 1 — Zero-dep + Foundation-aligned (build these first) -> **narsil-mcp is the widest code intelligence MCP server** — 90 tools, 32 languages, security scanning, SPARQL, neural search, browser WASM. It's an impressive feat of engineering for a 15-month-old solo project. -> -> **But width isn't depth.** narsil-mcp's graph vanishes on restart unless you opt into persistence. Its 90 tools cost 3.3x more tokens than codegraph's 18 — a problem its creator acknowledged by building an entire separate project (Forgemax) to work around it. Its security scanning is tree-sitter-based, not compiler-grade. Its MCP-only interface means no CI integration, no standalone CLI, no embeddable library. -> -> **Codegraph occupies a fundamentally different position:** always-current structural intelligence with persistent incremental builds, confidence-scored edges, and purpose-built compound commands. Where narsil-mcp answers "here's everything about your code," codegraph answers "here's what breaks if you change this function" — and answers it with scored confidence, in under 500ms, from a graph that never needs rebuilding from scratch. -> -> For AI agents that need fast, reliable, token-efficient code intelligence in iterative development loops, codegraph is the better tool. For agents that need broad coverage across 32 languages with security scanning, narsil-mcp fills gaps codegraph intentionally doesn't. They can coexist — codegraph for the inner loop, narsil-mcp for the outer loop. +Non-breaking, ordered by problem-fit: ---- +| ID | Title | Description | Source | Category | Benefit | Zero-dep | Foundation-aligned | Problem-fit (1-5) | Breaking | +|----|-------|-------------|--------|----------|---------|----------|-------------------|-------------------|----------| +| N1 | MCP tool presets | Configurable MCP tool subsets (minimal/balanced/full/custom) that control which tools are registered. Reduces tool-definition token cost from ~4,000 to ~2,000 for minimal sets. Inspired by narsil's preset system (Minimal ~4,600 tokens, Balanced ~8,900, Full ~12,000). | E, H | Embeddability | Agents with small context windows get only the tools they need — directly reduces token waste on tool definitions | ✓ | ✓ | 5 | No | +| N2 | AST-aware code chunking | Split files into semantic chunks that respect AST boundaries (functions, classes, blocks) instead of naive line splits. Expose as MCP tool and CLI command. Inspired by narsil's `get_chunks`/`get_chunk_stats`. | C | Navigation | Agents get correctly-bounded code snippets for context windows — no more mid-function splits that confuse LLMs | ✓ | ✓ | 5 | No | +| N3 | Code similarity search | Find code structurally similar to a given snippet or symbol using AST fingerprinting or embedding cosine similarity on existing search infrastructure. Inspired by narsil's `find_similar_code`/`find_similar_to_symbol`. | C | Search | Agents can find related implementations for refactoring, deduplication, and pattern learning — reduces re-invention and catches copy-paste drift | ✓ | ✓ | 4 | No | +| N4 | Git blame & symbol history | Surface `git blame` data per function and track how symbols change over commits. Complement existing `co-change` with per-symbol history. Inspired by narsil's `get_blame`/`get_symbol_history`/`get_contributors`. | I | Analysis | Agents know who last touched a function and how it evolved — critical context for review, ownership, and understanding intent behind changes | ✓ | ✓ | 4 | No | +| N5 | Remote repo indexing | Allow `codegraph build ` to clone and index a remote repository. Useful for comparing dependencies, upstream libraries, or reviewing PRs on forks. Inspired by narsil's `add_remote_repo`. | H | Developer Experience | Agents can analyze dependencies and upstream repos without manual cloning — enables cross-repo context gathering in one command | ✓ | ✓ | 3 | No | +| N6 | Configuration wizard | Interactive `codegraph init` that detects project structure, suggests `.codegraphrc.json` settings, and auto-configures MCP for the user's editor. Inspired by narsil's `config init` wizard and pre-built editor configs. | G | Developer Experience | Reduces setup friction — new users get a working config in seconds instead of reading docs | ✓ | ✓ | 2 | No | +| N7 | Kotlin language support | Add tree-sitter-kotlin to `LANGUAGE_REGISTRY`. 1 registry entry + 1 extractor. Narsil covers 32 languages; Kotlin is the highest-value gap for codegraph's target audience (Android/KMP). | A | Parsing | Extends coverage to Android/KMP — closes the most impactful language gap vs. narsil | ✓ | ✓ | 2 | No | +| N8 | Swift language support | Add tree-sitter-swift to `LANGUAGE_REGISTRY`. 1 registry entry + 1 extractor. Narsil covers Swift; codegraph does not. | A | Parsing | Extends coverage to Apple/iOS — closes a visible language gap | ✓ | ✓ | 2 | No | +| N9 | Bash language support | Add tree-sitter-bash to `LANGUAGE_REGISTRY`. 1 registry entry + 1 extractor. Bash scripts are ubiquitous in CI/CD and developer tooling. | A | Parsing | Covers CI scripts, Dockerfiles, and developer tooling — commonly co-located with source code | ✓ | ✓ | 2 | No | +| N10 | Scala language support | Add tree-sitter-scala to `LANGUAGE_REGISTRY`. 1 registry entry + 1 extractor. Relevant for JVM ecosystem coverage. | A | Parsing | Closes language gap for JVM polyglot codebases | ✓ | ✓ | 2 | No | -## Key Metrics Summary +Breaking (penalized to end of tier): -| Metric | Codegraph | narsil-mcp | Winner | -|--------|-----------|------------|--------| -| Persistent incremental builds | Yes (SQLite, always-on) | In-memory; opt-in `--persist` | Codegraph | -| Cold indexing speed | Seconds | Sub-seconds to seconds | narsil-mcp | -| Memory usage (large repos) | 300 MB - 1 GB (SQLite offload) | 2+ GB (in-memory) | Codegraph | -| MCP token overhead | ~3,600 tokens (18 tools) | ~4,700-12,000 tokens (26-75 tools) | Codegraph | -| Language support | 11 | 32 | narsil-mcp | -| Security analysis | None | Taint + OWASP + CWE + SBOM | narsil-mcp | -| Confidence scoring | 0.0-1.0 on every edge | None | Codegraph | -| Developer productivity commands | 35+ built-in | ~5 relevant (complexity, hotspots, dead code) | Codegraph | -| CI/CD integration | `check --staged` (exit code 0/1) | None (MCP-only) | Codegraph | -| Programmatic API | Full JS API | None | Codegraph | -| Standalone CLI | 35+ commands | None | Codegraph | -| Impact analysis | fn-impact, diff-impact, audit | None | Codegraph | -| Search capabilities | SQL + semantic | BM25 + TF-IDF + neural + hybrid | narsil-mcp | -| Interactive visualization | Export only (DOT/Mermaid) | Embedded web frontend | narsil-mcp | -| Community maturity | Company-backed, small team | 3 contributors, 120 stars | Tie | - -**Final score against FOUNDATION.md principles: Codegraph 7, narsil-mcp 0, Tie 1.** -narsil-mcp competes on breadth (more languages, more tools, more analysis types) rather than on the principles codegraph was built around. Its strengths — security scanning, language count, search variety — are real but orthogonal to codegraph's core value proposition of always-current, confidence-scored, developer-focused structural intelligence. +| ID | Title | Description | Source | Category | Benefit | Zero-dep | Foundation-aligned | Problem-fit (1-5) | Breaking | +|----|-------|-------------|--------|----------|---------|----------|-------------------|-------------------|----------| +| N11 | Export map per module | Dedicated `exports ` command listing all exported symbols with types, roles, and consumers. Inspired by narsil's `get_export_map`. Currently inferable from `explain` but not first-class. | B | Navigation | Agents quickly understand a module's public API surface without reading source — useful for import resolution and interface discovery | ✓ | ✓ | 3 | Yes | + +### Tier 2 — Foundation-aligned, needs dependencies + +Ordered by problem-fit: + +| ID | Title | Description | Source | Category | Benefit | Zero-dep | Foundation-aligned | Problem-fit (1-5) | Breaking | +|----|-------|-------------|--------|----------|---------|----------|-------------------|-------------------|----------| +| N12 | Interactive HTML visualization | `codegraph viz` opens a browser-based interactive graph (Cytoscape.js or vis.js) with drill-down, clustering, complexity overlays, and vulnerability highlighting. Inspired by narsil's built-in visualization frontend. Already on roadmap (BACKLOG ID 10). | E, J | Visualization | Developers and teams visually explore architecture — useful for onboarding, code reviews, and spotting structural problems | ✗ | ✓ | 1 | No | +| N13 | Multiple embedding backends | Support Voyage AI, OpenAI, and ONNX as alternative embedding providers alongside existing HuggingFace Transformers. Inspired by narsil's `--neural-backend api\|onnx` with model selection. Already partially on roadmap (BACKLOG ID 8). | C | Search | Users who already pay for an LLM provider get better embeddings at no extra cost — and local ONNX gives a lighter alternative to the 500MB transformer model | ✗ | ✓ | 3 | No | + +### Tier 3 — Not foundation-aligned (needs deliberate exception) + +| ID | Title | Description | Source | Category | Benefit | Zero-dep | Foundation-aligned | Problem-fit (1-5) | Breaking | +|----|-------|-------------|--------|----------|---------|----------|-------------------|-------------------|----------| +| N14 | OWASP/CWE pattern detection | Lightweight AST-based security scanning using YAML rule files. Not taint analysis — pattern matching on AST nodes (e.g. `eval()`, hardcoded secrets, SQL string concatenation). Inspired by narsil's 147-rule security engine. Already on roadmap (BACKLOG ID 7). | D | Security | Catches low-hanging security issues during `diff-impact`; agents flag risky patterns before they're committed | ✓ | ✗ | 1 | No | +| N15 | SBOM generation | Generate a Software Bill of Materials from `package.json`/`requirements.txt`/`go.mod`. Lightweight — parse manifest files already in scope. Inspired by narsil's `generate_sbom`. | D | Security | Supply chain visibility without external tools — useful for compliance audits | ✓ | ✗ | 1 | No | + +### Not adopted (violates FOUNDATION.md) + +These narsil-mcp features were evaluated and deliberately excluded: + +| Narsil Feature | Section | Why Not | +|----------------|---------|---------| +| **Taint analysis** | D | Requires control-flow and data-dependence infrastructure. Would 10-100x build time, violating P1. Narsil's tree-sitter-based taint is impressive but trades performance for depth | +| **Type inference engine** | B | Requires language-specific type solvers beyond tree-sitter AST. Violates P6 (one registry, no magic). Lightweight type annotation extraction (Joern-inspired J2) is the pragmatic alternative | +| **SPARQL / RDF knowledge graph** | B, E | Requires Oxigraph dependency. SQLite + existing query commands serve our use case. RDF/SPARQL is overkill for structural code intelligence — powerful but orthogonal to our goals | +| **Code Context Graph (CCG) standard** | B, H | Interesting concept but tightly coupled to narsil's architecture and commercial model. Our MCP pagination + compound commands solve the progressive-disclosure problem differently | +| **In-memory-first architecture** | F | Violates P1 (graph must survive restarts to stay always-current). SQLite persistence is a deliberate choice — narsil's opt-in persistence means state loss on every restart by default | +| **90-tool MCP surface** | E, H | More tools = more token overhead per agent session. Our 21 purpose-built tools + compound commands are more token-efficient. Narsil compensates with presets; we compensate with fewer, smarter tools | +| **Browser WASM build** | G, J | Different product category. We're a CLI/MCP engine, not a browser tool (P8). Narsil's WASM build is a legitimate capability, but building a browser runtime is outside our scope | +| **Forgemax-style tool collapsing** | H | Collapses 90 tools to 2 (`search`/`execute`). We don't need this because we already have ~21 tools — small enough that collapsing adds complexity without meaningful savings | +| **LSP integration** | B | Requires running language servers alongside codegraph. Violates zero-dependency goal. Tree-sitter + confidence scoring is our approach; LSP is a different architectural bet | +| **License compliance scanning** | D | Tangential to code intelligence. Better served by dedicated tools (FOSSA, Snyk, etc.) | + +### Cross-references to existing BACKLOG items + +These narsil-inspired capabilities are already tracked in [BACKLOG.md](../../docs/roadmap/BACKLOG.md): + +| BACKLOG ID | Title | Narsil Equivalent | Relationship | +|------------|-------|-------------------|--------------| +| 7 | OWASP/CWE pattern detection | `scan_security` with 147 rules | Lightweight AST-based alternative to narsil's full rule engine. N14 above. Already Tier 3 | +| 8 | Optional LLM provider integration | `--neural-backend api\|onnx` | Multiple embedding providers. N13 above. Already Tier 2 | +| 10 | Interactive HTML visualization | Built-in Cytoscape.js frontend | Browser-based interactive graph. N12 above. Already Tier 3 | +| 14 | Dataflow analysis | `get_data_flow`, `get_reaching_definitions` | Lightweight def-use chains. Narsil has 4 dedicated dataflow tools. Already Tier 1 Breaking | + +### Cross-references to Joern-inspired candidates + +Some features identified in this analysis overlap with [Joern-inspired candidates](./joern.md#joern-inspired-feature-candidates): + +| Joern ID | Title | Narsil Equivalent | Note | +|----------|-------|-------------------|------| +| J4 | Kotlin language support | Narsil's 32-language coverage | Same feature, dual motivation. Listed here as N7 | +| J5 | Swift language support | Narsil's 32-language coverage | Same feature, dual motivation. Listed here as N8 | +| J8 | Intraprocedural CFG | `get_control_flow` | Narsil has it; validates priority of J8 | +| J9 | Stored queryable AST | AST-aware chunking + pattern matching | N2 (chunking) is a lighter alternative; J9 is the deeper version | From a4e4ab89372a71f60b0482cc525de50a69e1ce1e Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 19:22:32 -0700 Subject: [PATCH 19/30] feat: expand node types with parameter, property, constant kinds (Phase 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add sub-declaration node extraction to all 9 WASM language extractors, enabling structural queries like "which functions take a Request param?" or "which classes have a userId field?" without reading source code. Schema: migration v11 adds nullable parent_id column with indexes. Builder: insertNode links children to parent via parent_id FK. Extractors: JS/TS, Python, Go, Rust, Java, C#, Ruby, PHP, HCL now emit children arrays for parameters, properties, and constants. Queries: new childrenData() function, children in contextData output. CLI: new `children` command, EVERY_SYMBOL_KIND validation on --kind. MCP: new `symbol_children` tool, extended kind enum on all kind fields. Constants: CORE_SYMBOL_KINDS (10), EXTENDED_SYMBOL_KINDS (3), EVERY_SYMBOL_KIND (13). ALL_SYMBOL_KINDS preserved for backward compat. Native Rust engine: Definition struct gains children field but actual extraction is deferred to Phase 2 — WASM fallback handles new kinds. Impact: 63 functions changed, 62 affected --- crates/codegraph-core/src/types.rs | 2 + src/builder.js | 23 +- src/cli.js | 72 ++-- src/db.js | 23 ++ src/extractors/csharp.js | 65 +++- src/extractors/go.js | 67 +++- src/extractors/hcl.js | 22 ++ src/extractors/java.js | 62 ++- src/extractors/javascript.js | 142 +++++++ src/extractors/php.js | 79 ++++ src/extractors/python.js | 134 +++++++ src/extractors/ruby.js | 89 +++++ src/extractors/rust.js | 72 +++- src/index.js | 4 + src/mcp.js | 40 +- src/parser.js | 8 + src/queries.js | 109 +++++- tests/integration/build-parity.test.js | 7 +- tests/parsers/csharp.test.js | 2 +- tests/parsers/extended-kinds.test.js | 504 +++++++++++++++++++++++++ tests/unit/mcp.test.js | 16 + 21 files changed, 1501 insertions(+), 41 deletions(-) create mode 100644 tests/parsers/extended-kinds.test.js diff --git a/crates/codegraph-core/src/types.rs b/crates/codegraph-core/src/types.rs index f6593ebc..ed299f0c 100644 --- a/crates/codegraph-core/src/types.rs +++ b/crates/codegraph-core/src/types.rs @@ -65,6 +65,8 @@ pub struct Definition { #[napi(ts_type = "string[] | undefined")] pub decorators: Option>, pub complexity: Option, + #[napi(ts_type = "Definition[] | undefined")] + pub children: Option>, } #[napi(object)] diff --git a/src/builder.js b/src/builder.js index a9ae11d4..7a916647 100644 --- a/src/builder.js +++ b/src/builder.js @@ -543,7 +543,7 @@ export async function buildGraph(rootDir, opts = {}) { } const insertNode = db.prepare( - 'INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)', + 'INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line, parent_id) VALUES (?, ?, ?, ?, ?, ?)', ); const getNodeId = db.prepare( 'SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?', @@ -597,12 +597,27 @@ export async function buildGraph(rootDir, opts = {}) { for (const [relPath, symbols] of allSymbols) { fileSymbols.set(relPath, symbols); - insertNode.run(relPath, 'file', relPath, 0, null); + insertNode.run(relPath, 'file', relPath, 0, null, null); for (const def of symbols.definitions) { - insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null); + insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null, null); + if (def.children?.length) { + const parentRow = getNodeId.get(def.name, def.kind, relPath, def.line); + if (parentRow) { + for (const child of def.children) { + insertNode.run( + child.name, + child.kind, + relPath, + child.line, + child.endLine || null, + parentRow.id, + ); + } + } + } } for (const exp of symbols.exports) { - insertNode.run(exp.name, exp.kind, relPath, exp.line, null); + insertNode.run(exp.name, exp.kind, relPath, exp.line, null, null); } // Update file hash with real mtime+size for incremental builds diff --git a/src/cli.js b/src/cli.js index ddd853aa..391d2274 100644 --- a/src/cli.js +++ b/src/cli.js @@ -20,9 +20,10 @@ import { exportDOT, exportJSON, exportMermaid } from './export.js'; import { setVerbose } from './logger.js'; import { printNdjson } from './paginate.js'; import { - ALL_SYMBOL_KINDS, + children, context, diffImpact, + EVERY_SYMBOL_KIND, explain, fileDeps, fnDeps, @@ -122,8 +123,8 @@ program .option('--offset ', 'Skip N results (default: 0)') .option('--ndjson', 'Newline-delimited JSON output') .action((name, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } if (opts.path) { @@ -231,8 +232,8 @@ program .option('--offset ', 'Skip N results (default: 0)') .option('--ndjson', 'Newline-delimited JSON output') .action((name, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } fnImpact(name, opts.db, { @@ -263,8 +264,8 @@ program .option('--offset ', 'Skip N results (default: 0)') .option('--ndjson', 'Newline-delimited JSON output') .action((name, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } context(name, opts.db, { @@ -281,6 +282,31 @@ program }); }); +program + .command('children ') + .description('List parameters, properties, and constants of a symbol') + .option('-d, --db ', 'Path to graph.db') + .option('-f, --file ', 'Scope search to symbols in this file (partial match)') + .option('-k, --kind ', 'Filter to a specific symbol kind') + .option('-T, --no-tests', 'Exclude test/spec files from results') + .option('-j, --json', 'Output as JSON') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .action((name, opts) => { + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); + process.exit(1); + } + children(name, opts.db, { + file: opts.file, + kind: opts.kind, + noTests: resolveNoTests(opts), + json: opts.json, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + }); + }); + program .command('explain ') .description('Structural summary of a file or function (no LLM needed)') @@ -314,8 +340,8 @@ program .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .option('-j, --json', 'Output as JSON') .action((target, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } audit(target, opts.db, { @@ -917,8 +943,8 @@ program console.error('Provide a function/entry point name or use --list to see all entry points.'); process.exit(1); } - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } const { flow } = await import('./flow.js'); @@ -950,8 +976,8 @@ program .option('--impact', 'Show data-dependent blast radius') .option('--depth ', 'Max traversal depth', '5') .action(async (name, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } const { dataflow } = await import('./dataflow.js'); @@ -988,8 +1014,8 @@ program .option('--offset ', 'Skip N results (default: 0)') .option('--ndjson', 'Newline-delimited JSON output') .action(async (target, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } const { complexity } = await import('./complexity.js'); @@ -1021,8 +1047,8 @@ program .option('--offset ', 'Skip N results (default: 0)') .option('--ndjson', 'Newline-delimited JSON output') .action(async (opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } const { manifesto } = await import('./manifesto.js'); @@ -1083,8 +1109,8 @@ program .option('--ndjson', 'Newline-delimited JSON output') .option('--weights ', 'Custom weights JSON (e.g. \'{"fanIn":1,"complexity":0}\')') .action(async (opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } if (opts.role && !VALID_ROLES.includes(opts.role)) { @@ -1246,8 +1272,8 @@ program .option('-T, --no-tests', 'Exclude test/spec files from results') .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .action(async (command, positionalTargets, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } @@ -1310,8 +1336,8 @@ program .option('-T, --no-tests', 'Exclude test/spec files from results') .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .action(async (positionalTargets, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } diff --git a/src/db.js b/src/db.js index f3f55fa4..9f40d7cc 100644 --- a/src/db.js +++ b/src/db.js @@ -165,6 +165,14 @@ export const MIGRATIONS = [ CREATE INDEX IF NOT EXISTS idx_dataflow_source_kind ON dataflow(source_id, kind); `, }, + { + version: 11, + up: ` + ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id); + CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id); + CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id); + `, + }, ]; export function getBuildMeta(db, key) { @@ -286,6 +294,21 @@ export function initSchema(db) { } catch { /* already exists */ } + try { + db.exec('ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id)'); + } catch { + /* already exists */ + } + try { + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id)'); + } catch { + /* already exists */ + } + try { + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id)'); + } catch { + /* already exists */ + } } export function findDbPath(customPath) { diff --git a/src/extractors/csharp.js b/src/extractors/csharp.js index 5af523f3..43231d1e 100644 --- a/src/extractors/csharp.js +++ b/src/extractors/csharp.js @@ -33,11 +33,13 @@ export function extractCSharpSymbols(tree, _filePath) { case 'class_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const classChildren = extractCSharpClassFields(node); definitions.push({ name: nameNode.text, kind: 'class', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: classChildren.length > 0 ? classChildren : undefined, }); extractCSharpBaseTypes(node, nameNode.text, classes); } @@ -47,11 +49,13 @@ export function extractCSharpSymbols(tree, _filePath) { case 'struct_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const structChildren = extractCSharpClassFields(node); definitions.push({ name: nameNode.text, kind: 'struct', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: structChildren.length > 0 ? structChildren : undefined, }); extractCSharpBaseTypes(node, nameNode.text, classes); } @@ -105,11 +109,13 @@ export function extractCSharpSymbols(tree, _filePath) { case 'enum_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const enumChildren = extractCSharpEnumMembers(node); definitions.push({ name: nameNode.text, kind: 'enum', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: enumChildren.length > 0 ? enumChildren : undefined, }); } break; @@ -120,11 +126,13 @@ export function extractCSharpSymbols(tree, _filePath) { if (nameNode) { const parentType = findCSharpParentType(node); const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; + const params = extractCSharpParameters(node.childForFieldName('parameters')); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -135,11 +143,13 @@ export function extractCSharpSymbols(tree, _filePath) { if (nameNode) { const parentType = findCSharpParentType(node); const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; + const params = extractCSharpParameters(node.childForFieldName('parameters')); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -152,7 +162,7 @@ export function extractCSharpSymbols(tree, _filePath) { const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; definitions.push({ name: fullName, - kind: 'method', + kind: 'property', line: node.startPosition.row + 1, endLine: nodeEndLine(node), }); @@ -220,6 +230,59 @@ export function extractCSharpSymbols(tree, _filePath) { return { definitions, calls, imports, classes, exports }; } +// ── Child extraction helpers ──────────────────────────────────────────────── + +function extractCSharpParameters(paramListNode) { + const params = []; + if (!paramListNode) return params; + for (let i = 0; i < paramListNode.childCount; i++) { + const param = paramListNode.child(i); + if (!param || param.type !== 'parameter') continue; + const nameNode = param.childForFieldName('name'); + if (nameNode) { + params.push({ name: nameNode.text, kind: 'parameter', line: param.startPosition.row + 1 }); + } + } + return params; +} + +function extractCSharpClassFields(classNode) { + const fields = []; + const body = classNode.childForFieldName('body') || findChild(classNode, 'declaration_list'); + if (!body) return fields; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== 'field_declaration') continue; + const varDecl = findChild(member, 'variable_declaration'); + if (!varDecl) continue; + for (let j = 0; j < varDecl.childCount; j++) { + const child = varDecl.child(j); + if (!child || child.type !== 'variable_declarator') continue; + const nameNode = child.childForFieldName('name'); + if (nameNode) { + fields.push({ name: nameNode.text, kind: 'property', line: member.startPosition.row + 1 }); + } + } + } + return fields; +} + +function extractCSharpEnumMembers(enumNode) { + const constants = []; + const body = + enumNode.childForFieldName('body') || findChild(enumNode, 'enum_member_declaration_list'); + if (!body) return constants; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== 'enum_member_declaration') continue; + const nameNode = member.childForFieldName('name'); + if (nameNode) { + constants.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 }); + } + } + return constants; +} + function extractCSharpBaseTypes(node, className, classes) { const baseList = node.childForFieldName('bases'); if (!baseList) return; diff --git a/src/extractors/go.js b/src/extractors/go.js index 8b943012..a3a50158 100644 --- a/src/extractors/go.js +++ b/src/extractors/go.js @@ -1,4 +1,4 @@ -import { nodeEndLine } from './helpers.js'; +import { findChild, nodeEndLine } from './helpers.js'; /** * Extract symbols from Go files. @@ -15,11 +15,13 @@ export function extractGoSymbols(tree, _filePath) { case 'function_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const params = extractGoParameters(node.childForFieldName('parameters')); definitions.push({ name: nameNode.text, kind: 'function', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -46,11 +48,13 @@ export function extractGoSymbols(tree, _filePath) { } } const fullName = receiverType ? `${receiverType}.${nameNode.text}` : nameNode.text; + const params = extractGoParameters(node.childForFieldName('parameters')); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -64,11 +68,13 @@ export function extractGoSymbols(tree, _filePath) { const typeNode = spec.childForFieldName('type'); if (nameNode && typeNode) { if (typeNode.type === 'struct_type') { + const fields = extractStructFields(typeNode); definitions.push({ name: nameNode.text, kind: 'struct', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: fields.length > 0 ? fields : undefined, }); } else if (typeNode.type === 'interface_type') { definitions.push({ @@ -145,6 +151,23 @@ export function extractGoSymbols(tree, _filePath) { break; } + case 'const_declaration': { + for (let i = 0; i < node.childCount; i++) { + const spec = node.child(i); + if (!spec || spec.type !== 'const_spec') continue; + const constName = spec.childForFieldName('name'); + if (constName) { + definitions.push({ + name: constName.text, + kind: 'constant', + line: spec.startPosition.row + 1, + endLine: spec.endPosition.row + 1, + }); + } + } + break; + } + case 'call_expression': { const fn = node.childForFieldName('function'); if (fn) { @@ -170,3 +193,45 @@ export function extractGoSymbols(tree, _filePath) { walkGoNode(tree.rootNode); return { definitions, calls, imports, classes, exports }; } + +// ── Child extraction helpers ──────────────────────────────────────────────── + +function extractGoParameters(paramListNode) { + const params = []; + if (!paramListNode) return params; + for (let i = 0; i < paramListNode.childCount; i++) { + const param = paramListNode.child(i); + if (!param || param.type !== 'parameter_declaration') continue; + // A parameter_declaration may have multiple identifiers (e.g., `a, b int`) + for (let j = 0; j < param.childCount; j++) { + const child = param.child(j); + if (child && child.type === 'identifier') { + params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } + } + } + return params; +} + +function extractStructFields(structTypeNode) { + const fields = []; + const fieldList = findChild(structTypeNode, 'field_declaration_list'); + if (!fieldList) return fields; + for (let i = 0; i < fieldList.childCount; i++) { + const field = fieldList.child(i); + if (!field || field.type !== 'field_declaration') continue; + const nameNode = field.childForFieldName('name'); + if (nameNode) { + fields.push({ name: nameNode.text, kind: 'property', line: field.startPosition.row + 1 }); + } else { + // Struct fields may have multiple names or use first identifier child + for (let j = 0; j < field.childCount; j++) { + const child = field.child(j); + if (child && child.type === 'field_identifier') { + fields.push({ name: child.text, kind: 'property', line: field.startPosition.row + 1 }); + } + } + } + } + return fields; +} diff --git a/src/extractors/hcl.js b/src/extractors/hcl.js index 4df5af4d..aba022a5 100644 --- a/src/extractors/hcl.js +++ b/src/extractors/hcl.js @@ -36,11 +36,33 @@ export function extractHCLSymbols(tree, _filePath) { } if (name) { + // Extract attributes as property children for variable/output blocks + let blockChildren; + if (blockType === 'variable' || blockType === 'output') { + blockChildren = []; + const body = children.find((c) => c.type === 'body'); + if (body) { + for (let j = 0; j < body.childCount; j++) { + const attr = body.child(j); + if (attr && attr.type === 'attribute') { + const key = attr.childForFieldName('key') || attr.child(0); + if (key) { + blockChildren.push({ + name: key.text, + kind: 'property', + line: attr.startPosition.row + 1, + }); + } + } + } + } + } definitions.push({ name, kind: blockType, line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: blockChildren?.length > 0 ? blockChildren : undefined, }); } diff --git a/src/extractors/java.js b/src/extractors/java.js index 87f10d39..bfa24571 100644 --- a/src/extractors/java.js +++ b/src/extractors/java.js @@ -1,4 +1,4 @@ -import { nodeEndLine } from './helpers.js'; +import { findChild, nodeEndLine } from './helpers.js'; /** * Extract symbols from Java files. @@ -31,11 +31,13 @@ export function extractJavaSymbols(tree, _filePath) { case 'class_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const classChildren = extractClassFields(node); definitions.push({ name: nameNode.text, kind: 'class', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: classChildren.length > 0 ? classChildren : undefined, }); const superclass = node.childForFieldName('superclass'); @@ -139,11 +141,13 @@ export function extractJavaSymbols(tree, _filePath) { case 'enum_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const enumChildren = extractEnumConstants(node); definitions.push({ name: nameNode.text, kind: 'enum', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: enumChildren.length > 0 ? enumChildren : undefined, }); } break; @@ -154,11 +158,13 @@ export function extractJavaSymbols(tree, _filePath) { if (nameNode) { const parentClass = findJavaParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractJavaParameters(node.childForFieldName('parameters')); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -169,11 +175,13 @@ export function extractJavaSymbols(tree, _filePath) { if (nameNode) { const parentClass = findJavaParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractJavaParameters(node.childForFieldName('parameters')); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -228,3 +236,55 @@ export function extractJavaSymbols(tree, _filePath) { walkJavaNode(tree.rootNode); return { definitions, calls, imports, classes, exports }; } + +// ── Child extraction helpers ──────────────────────────────────────────────── + +function extractJavaParameters(paramListNode) { + const params = []; + if (!paramListNode) return params; + for (let i = 0; i < paramListNode.childCount; i++) { + const param = paramListNode.child(i); + if (!param) continue; + if (param.type === 'formal_parameter' || param.type === 'spread_parameter') { + const nameNode = param.childForFieldName('name'); + if (nameNode) { + params.push({ name: nameNode.text, kind: 'parameter', line: param.startPosition.row + 1 }); + } + } + } + return params; +} + +function extractClassFields(classNode) { + const fields = []; + const body = classNode.childForFieldName('body') || findChild(classNode, 'class_body'); + if (!body) return fields; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== 'field_declaration') continue; + for (let j = 0; j < member.childCount; j++) { + const child = member.child(j); + if (!child || child.type !== 'variable_declarator') continue; + const nameNode = child.childForFieldName('name'); + if (nameNode) { + fields.push({ name: nameNode.text, kind: 'property', line: member.startPosition.row + 1 }); + } + } + } + return fields; +} + +function extractEnumConstants(enumNode) { + const constants = []; + const body = enumNode.childForFieldName('body') || findChild(enumNode, 'enum_body'); + if (!body) return constants; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== 'enum_constant') continue; + const nameNode = member.childForFieldName('name'); + if (nameNode) { + constants.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 }); + } + } + return constants; +} diff --git a/src/extractors/javascript.js b/src/extractors/javascript.js index 57ba0392..c4a0d3bf 100644 --- a/src/extractors/javascript.js +++ b/src/extractors/javascript.js @@ -28,31 +28,37 @@ function extractSymbolsQuery(tree, query) { if (c.fn_node) { // function_declaration + const fnChildren = extractParameters(c.fn_node); definitions.push({ name: c.fn_name.text, kind: 'function', line: c.fn_node.startPosition.row + 1, endLine: nodeEndLine(c.fn_node), + children: fnChildren.length > 0 ? fnChildren : undefined, }); } else if (c.varfn_name) { // variable_declarator with arrow_function / function_expression const declNode = c.varfn_name.parent?.parent; const line = declNode ? declNode.startPosition.row + 1 : c.varfn_name.startPosition.row + 1; + const varFnChildren = extractParameters(c.varfn_value); definitions.push({ name: c.varfn_name.text, kind: 'function', line, endLine: nodeEndLine(c.varfn_value), + children: varFnChildren.length > 0 ? varFnChildren : undefined, }); } else if (c.cls_node) { // class_declaration const className = c.cls_name.text; const startLine = c.cls_node.startPosition.row + 1; + const clsChildren = extractClassProperties(c.cls_node); definitions.push({ name: className, kind: 'class', line: startLine, endLine: nodeEndLine(c.cls_node), + children: clsChildren.length > 0 ? clsChildren : undefined, }); const heritage = c.cls_node.childForFieldName('heritage') || findChild(c.cls_node, 'class_heritage'); @@ -69,11 +75,13 @@ function extractSymbolsQuery(tree, query) { const methName = c.meth_name.text; const parentClass = findParentClass(c.meth_node); const fullName = parentClass ? `${parentClass}.${methName}` : methName; + const methChildren = extractParameters(c.meth_node); definitions.push({ name: fullName, kind: 'method', line: c.meth_node.startPosition.row + 1, endLine: nodeEndLine(c.meth_node), + children: methChildren.length > 0 ? methChildren : undefined, }); } else if (c.iface_node) { // interface_declaration (TS/TSX only) @@ -231,11 +239,13 @@ function extractSymbolsWalk(tree) { case 'function_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const fnChildren = extractParameters(node); definitions.push({ name: nameNode.text, kind: 'function', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: fnChildren.length > 0 ? fnChildren : undefined, }); } break; @@ -246,11 +256,13 @@ function extractSymbolsWalk(tree) { if (nameNode) { const className = nameNode.text; const startLine = node.startPosition.row + 1; + const clsChildren = extractClassProperties(node); definitions.push({ name: className, kind: 'class', line: startLine, endLine: nodeEndLine(node), + children: clsChildren.length > 0 ? clsChildren : undefined, }); const heritage = node.childForFieldName('heritage') || findChild(node, 'class_heritage'); if (heritage) { @@ -272,11 +284,13 @@ function extractSymbolsWalk(tree) { if (nameNode) { const parentClass = findParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const methChildren = extractParameters(node); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: methChildren.length > 0 ? methChildren : undefined, }); } break; @@ -317,6 +331,7 @@ function extractSymbolsWalk(tree) { case 'lexical_declaration': case 'variable_declaration': { + const isConst = node.text.startsWith('const '); for (let i = 0; i < node.childCount; i++) { const declarator = node.child(i); if (declarator && declarator.type === 'variable_declarator') { @@ -329,15 +344,59 @@ function extractSymbolsWalk(tree) { valType === 'function_expression' || valType === 'function' ) { + const varFnChildren = extractParameters(valueN); definitions.push({ name: nameN.text, kind: 'function', line: node.startPosition.row + 1, endLine: nodeEndLine(valueN), + children: varFnChildren.length > 0 ? varFnChildren : undefined, }); + } else if (isConst && nameN.type === 'identifier' && isConstantValue(valueN)) { + definitions.push({ + name: nameN.text, + kind: 'constant', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + } else if (isConst && nameN && nameN.type === 'identifier' && !valueN) { + // const with no value (shouldn't happen but be safe) + } + } + } + break; + } + + case 'enum_declaration': { + // TypeScript enum + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const enumChildren = []; + const body = node.childForFieldName('body') || findChild(node, 'enum_body'); + if (body) { + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member) continue; + if (member.type === 'enum_assignment' || member.type === 'property_identifier') { + const mName = member.childForFieldName('name') || member.child(0); + if (mName) { + enumChildren.push({ + name: mName.text, + kind: 'constant', + line: member.startPosition.row + 1, + }); + } } } } + definitions.push({ + name: nameNode.text, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: enumChildren.length > 0 ? enumChildren : undefined, + }); } break; } @@ -471,6 +530,89 @@ function extractSymbolsWalk(tree) { return { definitions, calls, imports, classes, exports }; } +// ── Child extraction helpers ──────────────────────────────────────────────── + +function extractParameters(node) { + const params = []; + const paramsNode = node.childForFieldName('parameters') || findChild(node, 'formal_parameters'); + if (!paramsNode) return params; + for (let i = 0; i < paramsNode.childCount; i++) { + const child = paramsNode.child(i); + if (!child) continue; + const t = child.type; + if (t === 'identifier') { + params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } else if ( + t === 'required_parameter' || + t === 'optional_parameter' || + t === 'assignment_pattern' + ) { + const nameNode = + child.childForFieldName('pattern') || child.childForFieldName('left') || child.child(0); + if ( + nameNode && + (nameNode.type === 'identifier' || + nameNode.type === 'shorthand_property_identifier_pattern') + ) { + params.push({ name: nameNode.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } + } else if (t === 'rest_pattern' || t === 'rest_element') { + const nameNode = child.child(1) || child.childForFieldName('name'); + if (nameNode && nameNode.type === 'identifier') { + params.push({ name: nameNode.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } + } + } + return params; +} + +function extractClassProperties(classNode) { + const props = []; + const body = classNode.childForFieldName('body') || findChild(classNode, 'class_body'); + if (!body) return props; + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (!child) continue; + if ( + child.type === 'field_definition' || + child.type === 'public_field_definition' || + child.type === 'property_definition' + ) { + const nameNode = + child.childForFieldName('name') || child.childForFieldName('property') || child.child(0); + if ( + nameNode && + (nameNode.type === 'property_identifier' || + nameNode.type === 'identifier' || + nameNode.type === 'private_property_identifier') + ) { + props.push({ name: nameNode.text, kind: 'property', line: child.startPosition.row + 1 }); + } + } + } + return props; +} + +function isConstantValue(valueNode) { + if (!valueNode) return false; + const t = valueNode.type; + return ( + t === 'number' || + t === 'string' || + t === 'template_string' || + t === 'true' || + t === 'false' || + t === 'null' || + t === 'undefined' || + t === 'array' || + t === 'object' || + t === 'regex' || + t === 'unary_expression' || + t === 'binary_expression' || + t === 'new_expression' + ); +} + // ── Shared helpers ────────────────────────────────────────────────────────── function extractInterfaceMethods(bodyNode, interfaceName, definitions) { diff --git a/src/extractors/php.js b/src/extractors/php.js index 95b44570..d2b4f09d 100644 --- a/src/extractors/php.js +++ b/src/extractors/php.js @@ -1,5 +1,76 @@ import { findChild, nodeEndLine } from './helpers.js'; +function extractPhpParameters(fnNode) { + const params = []; + const paramsNode = + fnNode.childForFieldName('parameters') || findChild(fnNode, 'formal_parameters'); + if (!paramsNode) return params; + for (let i = 0; i < paramsNode.childCount; i++) { + const param = paramsNode.child(i); + if (!param) continue; + if (param.type === 'simple_parameter' || param.type === 'variadic_parameter') { + const nameNode = param.childForFieldName('name') || findChild(param, 'variable_name'); + if (nameNode) { + params.push({ name: nameNode.text, kind: 'parameter', line: param.startPosition.row + 1 }); + } + } + } + return params; +} + +function extractPhpClassChildren(classNode) { + const children = []; + const body = classNode.childForFieldName('body') || findChild(classNode, 'declaration_list'); + if (!body) return children; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member) continue; + if (member.type === 'property_declaration') { + for (let j = 0; j < member.childCount; j++) { + const el = member.child(j); + if (!el || el.type !== 'property_element') continue; + const varNode = findChild(el, 'variable_name'); + if (varNode) { + children.push({ + name: varNode.text, + kind: 'property', + line: member.startPosition.row + 1, + }); + } + } + } else if (member.type === 'const_declaration') { + for (let j = 0; j < member.childCount; j++) { + const el = member.child(j); + if (!el || el.type !== 'const_element') continue; + const nameNode = el.childForFieldName('name') || findChild(el, 'name'); + if (nameNode) { + children.push({ + name: nameNode.text, + kind: 'constant', + line: member.startPosition.row + 1, + }); + } + } + } + } + return children; +} + +function extractPhpEnumCases(enumNode) { + const children = []; + const body = enumNode.childForFieldName('body') || findChild(enumNode, 'enum_declaration_list'); + if (!body) return children; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== 'enum_case') continue; + const nameNode = member.childForFieldName('name'); + if (nameNode) { + children.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 }); + } + } + return children; +} + /** * Extract symbols from PHP files. */ @@ -31,11 +102,13 @@ export function extractPHPSymbols(tree, _filePath) { case 'function_definition': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const params = extractPhpParameters(node); definitions.push({ name: nameNode.text, kind: 'function', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -44,11 +117,13 @@ export function extractPHPSymbols(tree, _filePath) { case 'class_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const classChildren = extractPhpClassChildren(node); definitions.push({ name: nameNode.text, kind: 'class', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: classChildren.length > 0 ? classChildren : undefined, }); // Check base clause (extends) @@ -132,11 +207,13 @@ export function extractPHPSymbols(tree, _filePath) { case 'enum_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const enumChildren = extractPhpEnumCases(node); definitions.push({ name: nameNode.text, kind: 'enum', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: enumChildren.length > 0 ? enumChildren : undefined, }); } break; @@ -147,11 +224,13 @@ export function extractPHPSymbols(tree, _filePath) { if (nameNode) { const parentClass = findPHPParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractPhpParameters(node); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; diff --git a/src/extractors/python.js b/src/extractors/python.js index 832232f0..6542aab7 100644 --- a/src/extractors/python.js +++ b/src/extractors/python.js @@ -22,12 +22,14 @@ export function extractPythonSymbols(tree, _filePath) { const parentClass = findPythonParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; const kind = parentClass ? 'method' : 'function'; + const fnChildren = extractPythonParameters(node); definitions.push({ name: fullName, kind, line: node.startPosition.row + 1, endLine: nodeEndLine(node), decorators, + children: fnChildren.length > 0 ? fnChildren : undefined, }); } break; @@ -36,11 +38,13 @@ export function extractPythonSymbols(tree, _filePath) { case 'class_definition': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const clsChildren = extractPythonClassProperties(node); definitions.push({ name: nameNode.text, kind: 'class', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: clsChildren.length > 0 ? clsChildren : undefined, }); const superclasses = node.childForFieldName('superclasses') || findChild(node, 'argument_list'); @@ -108,6 +112,24 @@ export function extractPythonSymbols(tree, _filePath) { break; } + case 'expression_statement': { + // Module-level UPPER_CASE assignments → constants + if (node.parent && node.parent.type === 'module') { + const assignment = findChild(node, 'assignment'); + if (assignment) { + const left = assignment.childForFieldName('left'); + if (left && left.type === 'identifier' && /^[A-Z_][A-Z0-9_]*$/.test(left.text)) { + definitions.push({ + name: left.text, + kind: 'constant', + line: node.startPosition.row + 1, + }); + } + } + } + break; + } + case 'import_from_statement': { let source = ''; const names = []; @@ -133,6 +155,118 @@ export function extractPythonSymbols(tree, _filePath) { for (let i = 0; i < node.childCount; i++) walkPythonNode(node.child(i)); } + function extractPythonParameters(fnNode) { + const params = []; + const paramsNode = fnNode.childForFieldName('parameters') || findChild(fnNode, 'parameters'); + if (!paramsNode) return params; + for (let i = 0; i < paramsNode.childCount; i++) { + const child = paramsNode.child(i); + if (!child) continue; + const t = child.type; + if (t === 'identifier') { + params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } else if ( + t === 'typed_parameter' || + t === 'default_parameter' || + t === 'typed_default_parameter' + ) { + const nameNode = child.childForFieldName('name') || child.child(0); + if (nameNode && nameNode.type === 'identifier') { + params.push({ + name: nameNode.text, + kind: 'parameter', + line: child.startPosition.row + 1, + }); + } + } else if (t === 'list_splat_pattern' || t === 'dictionary_splat_pattern') { + // *args, **kwargs + for (let j = 0; j < child.childCount; j++) { + const inner = child.child(j); + if (inner && inner.type === 'identifier') { + params.push({ name: inner.text, kind: 'parameter', line: child.startPosition.row + 1 }); + break; + } + } + } + } + return params; + } + + function extractPythonClassProperties(classNode) { + const props = []; + const seen = new Set(); + const body = classNode.childForFieldName('body') || findChild(classNode, 'block'); + if (!body) return props; + + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (!child) continue; + + // Direct class attribute assignments: x = 5 + if (child.type === 'expression_statement') { + const assignment = findChild(child, 'assignment'); + if (assignment) { + const left = assignment.childForFieldName('left'); + if (left && left.type === 'identifier' && !seen.has(left.text)) { + seen.add(left.text); + props.push({ name: left.text, kind: 'property', line: child.startPosition.row + 1 }); + } + } + } + + // __init__ method: self.x = ... assignments + if (child.type === 'function_definition') { + const fnName = child.childForFieldName('name'); + if (fnName && fnName.text === '__init__') { + const initBody = child.childForFieldName('body') || findChild(child, 'block'); + if (initBody) { + walkInitBody(initBody, seen, props); + } + } + } + + // decorated __init__ + if (child.type === 'decorated_definition') { + for (let j = 0; j < child.childCount; j++) { + const inner = child.child(j); + if (inner && inner.type === 'function_definition') { + const fnName = inner.childForFieldName('name'); + if (fnName && fnName.text === '__init__') { + const initBody = inner.childForFieldName('body') || findChild(inner, 'block'); + if (initBody) { + walkInitBody(initBody, seen, props); + } + } + } + } + } + } + return props; + } + + function walkInitBody(bodyNode, seen, props) { + for (let i = 0; i < bodyNode.childCount; i++) { + const stmt = bodyNode.child(i); + if (!stmt || stmt.type !== 'expression_statement') continue; + const assignment = findChild(stmt, 'assignment'); + if (!assignment) continue; + const left = assignment.childForFieldName('left'); + if (!left || left.type !== 'attribute') continue; + const obj = left.childForFieldName('object'); + const attr = left.childForFieldName('attribute'); + if ( + obj && + obj.text === 'self' && + attr && + attr.type === 'identifier' && + !seen.has(attr.text) + ) { + seen.add(attr.text); + props.push({ name: attr.text, kind: 'property', line: stmt.startPosition.row + 1 }); + } + } + } + function findPythonParentClass(node) { let current = node.parent; while (current) { diff --git a/src/extractors/ruby.js b/src/extractors/ruby.js index 73b3f0d4..400d410d 100644 --- a/src/extractors/ruby.js +++ b/src/extractors/ruby.js @@ -31,11 +31,13 @@ export function extractRubySymbols(tree, _filePath) { case 'class': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const classChildren = extractRubyClassChildren(node); definitions.push({ name: nameNode.text, kind: 'class', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: classChildren.length > 0 ? classChildren : undefined, }); const superclass = node.childForFieldName('superclass'); if (superclass) { @@ -73,11 +75,13 @@ export function extractRubySymbols(tree, _filePath) { case 'module': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const moduleChildren = extractRubyBodyConstants(node); definitions.push({ name: nameNode.text, kind: 'module', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: moduleChildren.length > 0 ? moduleChildren : undefined, }); } break; @@ -88,11 +92,13 @@ export function extractRubySymbols(tree, _filePath) { if (nameNode) { const parentClass = findRubyParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractRubyParameters(node); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -103,16 +109,34 @@ export function extractRubySymbols(tree, _filePath) { if (nameNode) { const parentClass = findRubyParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractRubyParameters(node); definitions.push({ name: fullName, kind: 'function', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; } + case 'assignment': { + // Top-level constant assignments (parent is program) + if (node.parent && node.parent.type === 'program') { + const left = node.childForFieldName('left'); + if (left && left.type === 'constant') { + definitions.push({ + name: left.text, + kind: 'constant', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + } + break; + } + case 'call': { const methodNode = node.childForFieldName('method'); if (methodNode) { @@ -186,3 +210,68 @@ export function extractRubySymbols(tree, _filePath) { walkRubyNode(tree.rootNode); return { definitions, calls, imports, classes, exports }; } + +// ── Child extraction helpers ──────────────────────────────────────────────── + +const RUBY_PARAM_TYPES = new Set([ + 'identifier', + 'optional_parameter', + 'splat_parameter', + 'hash_splat_parameter', + 'block_parameter', + 'keyword_parameter', +]); + +function extractRubyParameters(methodNode) { + const params = []; + const paramList = + methodNode.childForFieldName('parameters') || findChild(methodNode, 'method_parameters'); + if (!paramList) return params; + for (let i = 0; i < paramList.childCount; i++) { + const param = paramList.child(i); + if (!param || !RUBY_PARAM_TYPES.has(param.type)) continue; + let name; + if (param.type === 'identifier') { + name = param.text; + } else { + // Compound parameter types have an identifier child for the name + const id = findChild(param, 'identifier'); + name = id ? id.text : param.text; + } + params.push({ name, kind: 'parameter', line: param.startPosition.row + 1 }); + } + return params; +} + +function extractRubyBodyConstants(containerNode) { + const children = []; + const body = containerNode.childForFieldName('body') || findChild(containerNode, 'body'); + if (!body) return children; + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (!child || child.type !== 'assignment') continue; + const left = child.childForFieldName('left'); + if (left && left.type === 'constant') { + children.push({ name: left.text, kind: 'constant', line: child.startPosition.row + 1 }); + } + } + return children; +} + +function extractRubyClassChildren(classNode) { + const children = []; + const body = classNode.childForFieldName('body') || findChild(classNode, 'body'); + if (!body) return children; + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (!child || child.type !== 'assignment') continue; + const left = child.childForFieldName('left'); + if (!left) continue; + if (left.type === 'instance_variable') { + children.push({ name: left.text, kind: 'property', line: child.startPosition.row + 1 }); + } else if (left.type === 'constant') { + children.push({ name: left.text, kind: 'constant', line: child.startPosition.row + 1 }); + } + } + return children; +} diff --git a/src/extractors/rust.js b/src/extractors/rust.js index 5a8d6225..2a013481 100644 --- a/src/extractors/rust.js +++ b/src/extractors/rust.js @@ -1,4 +1,4 @@ -import { nodeEndLine } from './helpers.js'; +import { findChild, nodeEndLine } from './helpers.js'; /** * Extract symbols from Rust files. @@ -30,11 +30,13 @@ export function extractRustSymbols(tree, _filePath) { const implType = findCurrentImpl(node); const fullName = implType ? `${implType}.${nameNode.text}` : nameNode.text; const kind = implType ? 'method' : 'function'; + const params = extractRustParameters(node.childForFieldName('parameters')); definitions.push({ name: fullName, kind, line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -43,11 +45,13 @@ export function extractRustSymbols(tree, _filePath) { case 'struct_item': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const fields = extractStructFields(node); definitions.push({ name: nameNode.text, kind: 'struct', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: fields.length > 0 ? fields : undefined, }); } break; @@ -56,11 +60,26 @@ export function extractRustSymbols(tree, _filePath) { case 'enum_item': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const variants = extractEnumVariants(node); definitions.push({ name: nameNode.text, kind: 'enum', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: variants.length > 0 ? variants : undefined, + }); + } + break; + } + + case 'const_item': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'constant', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), }); } break; @@ -170,6 +189,57 @@ export function extractRustSymbols(tree, _filePath) { return { definitions, calls, imports, classes, exports }; } +// ── Child extraction helpers ──────────────────────────────────────────────── + +function extractRustParameters(paramListNode) { + const params = []; + if (!paramListNode) return params; + for (let i = 0; i < paramListNode.childCount; i++) { + const param = paramListNode.child(i); + if (!param) continue; + if (param.type === 'self_parameter') { + params.push({ name: 'self', kind: 'parameter', line: param.startPosition.row + 1 }); + } else if (param.type === 'parameter') { + const pattern = param.childForFieldName('pattern'); + if (pattern) { + params.push({ name: pattern.text, kind: 'parameter', line: param.startPosition.row + 1 }); + } + } + } + return params; +} + +function extractStructFields(structNode) { + const fields = []; + const fieldList = + structNode.childForFieldName('body') || findChild(structNode, 'field_declaration_list'); + if (!fieldList) return fields; + for (let i = 0; i < fieldList.childCount; i++) { + const field = fieldList.child(i); + if (!field || field.type !== 'field_declaration') continue; + const nameNode = field.childForFieldName('name'); + if (nameNode) { + fields.push({ name: nameNode.text, kind: 'property', line: field.startPosition.row + 1 }); + } + } + return fields; +} + +function extractEnumVariants(enumNode) { + const variants = []; + const body = enumNode.childForFieldName('body') || findChild(enumNode, 'enum_variant_list'); + if (!body) return variants; + for (let i = 0; i < body.childCount; i++) { + const variant = body.child(i); + if (!variant || variant.type !== 'enum_variant') continue; + const nameNode = variant.childForFieldName('name'); + if (nameNode) { + variants.push({ name: nameNode.text, kind: 'constant', line: variant.startPosition.row + 1 }); + } + } + return variants; +} + function extractRustUsePath(node) { if (!node) return []; diff --git a/src/index.js b/src/index.js index 03be6853..973d2475 100644 --- a/src/index.js +++ b/src/index.js @@ -107,9 +107,13 @@ export { getActiveEngine, parseFileAuto, parseFilesAuto } from './parser.js'; // Query functions (data-returning) export { ALL_SYMBOL_KINDS, + CORE_SYMBOL_KINDS, + childrenData, contextData, diffImpactData, diffImpactMermaid, + EVERY_SYMBOL_KIND, + EXTENDED_SYMBOL_KINDS, explainData, FALSE_POSITIVE_CALLER_THRESHOLD, FALSE_POSITIVE_NAMES, diff --git a/src/mcp.js b/src/mcp.js index 405b09c2..d02cdf29 100644 --- a/src/mcp.js +++ b/src/mcp.js @@ -9,7 +9,7 @@ import { createRequire } from 'node:module'; import { findCycles } from './cycles.js'; import { findDbPath } from './db.js'; import { MCP_DEFAULTS, MCP_MAX_LIMIT } from './paginate.js'; -import { ALL_SYMBOL_KINDS, diffImpactMermaid, VALID_ROLES } from './queries.js'; +import { diffImpactMermaid, EVERY_SYMBOL_KIND, VALID_ROLES } from './queries.js'; const REPO_PROP = { repo: { @@ -47,7 +47,7 @@ const BASE_TOOLS = [ }, kind: { type: 'string', - enum: ALL_SYMBOL_KINDS, + enum: EVERY_SYMBOL_KIND, description: 'Filter by symbol kind', }, to: { type: 'string', description: 'Target symbol for path mode (required in path mode)' }, @@ -129,7 +129,7 @@ const BASE_TOOLS = [ }, kind: { type: 'string', - enum: ALL_SYMBOL_KINDS, + enum: EVERY_SYMBOL_KIND, description: 'Filter to a specific symbol kind', }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, @@ -157,7 +157,7 @@ const BASE_TOOLS = [ }, kind: { type: 'string', - enum: ALL_SYMBOL_KINDS, + enum: EVERY_SYMBOL_KIND, description: 'Filter to a specific symbol kind', }, no_source: { @@ -176,6 +176,22 @@ const BASE_TOOLS = [ required: ['name'], }, }, + { + name: 'symbol_children', + description: + 'List sub-declaration children of a symbol: parameters, properties, constants. Answers "what fields does this class have?" without reading source.', + inputSchema: { + type: 'object', + properties: { + name: { type: 'string', description: 'Function/method/class name (partial match)' }, + file: { type: 'string', description: 'Scope to file (partial match)' }, + kind: { type: 'string', enum: EVERY_SYMBOL_KIND, description: 'Filter by symbol kind' }, + no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + ...PAGINATION_PROPS, + }, + required: ['name'], + }, + }, { name: 'explain', description: @@ -394,7 +410,7 @@ const BASE_TOOLS = [ }, kind: { type: 'string', - enum: ALL_SYMBOL_KINDS, + enum: EVERY_SYMBOL_KIND, description: 'Filter to a specific symbol kind', }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, @@ -560,7 +576,7 @@ const BASE_TOOLS = [ }, kind: { type: 'string', - enum: ALL_SYMBOL_KINDS, + enum: EVERY_SYMBOL_KIND, description: 'Filter symbol kind', }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, @@ -639,7 +655,7 @@ const BASE_TOOLS = [ }, depth: { type: 'number', description: 'Max depth for impact mode', default: 5 }, file: { type: 'string', description: 'Scope to file (partial match)' }, - kind: { type: 'string', enum: ALL_SYMBOL_KINDS, description: 'Filter by symbol kind' }, + kind: { type: 'string', enum: EVERY_SYMBOL_KIND, description: 'Filter by symbol kind' }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, ...PAGINATION_PROPS, }, @@ -740,6 +756,7 @@ export async function startMCPServer(customDbPath, options = {}) { fnImpactData, pathData, contextData, + childrenData, explainData, whereData, diffImpactData, @@ -864,6 +881,15 @@ export async function startMCPServer(customDbPath, options = {}) { offset: args.offset ?? 0, }); break; + case 'symbol_children': + result = childrenData(args.name, dbPath, { + file: args.file, + kind: args.kind, + noTests: args.no_tests, + limit: Math.min(args.limit ?? MCP_DEFAULTS.context, MCP_MAX_LIMIT), + offset: args.offset ?? 0, + }); + break; case 'explain': result = explainData(args.target, dbPath, { noTests: args.no_tests, diff --git a/src/parser.js b/src/parser.js index f70e67c2..54eb0820 100644 --- a/src/parser.js +++ b/src/parser.js @@ -142,6 +142,14 @@ function normalizeNativeSymbols(result) { maintainabilityIndex: d.complexity.maintainabilityIndex ?? null, } : null, + children: d.children?.length + ? d.children.map((c) => ({ + name: c.name, + kind: c.kind, + line: c.line, + endLine: c.endLine ?? c.end_line ?? null, + })) + : undefined, })), calls: (result.calls || []).map((c) => ({ name: c.name, diff --git a/src/queries.js b/src/queries.js index e8874364..dc1fb1ad 100644 --- a/src/queries.js +++ b/src/queries.js @@ -59,7 +59,9 @@ export const FALSE_POSITIVE_NAMES = new Set([ export const FALSE_POSITIVE_CALLER_THRESHOLD = 20; const FUNCTION_KINDS = ['function', 'method', 'class']; -export const ALL_SYMBOL_KINDS = [ + +// Original 10 kinds — used as default query scope +export const CORE_SYMBOL_KINDS = [ 'function', 'method', 'class', @@ -72,6 +74,21 @@ export const ALL_SYMBOL_KINDS = [ 'module', ]; +// Sub-declaration kinds (Phase 1) +export const EXTENDED_SYMBOL_KINDS = [ + 'parameter', + 'property', + 'constant', + // Phase 2 (reserved, not yet extracted): + // 'constructor', 'namespace', 'decorator', 'getter', 'setter', +]; + +// Full set for --kind validation and MCP enum +export const EVERY_SYMBOL_KIND = [...CORE_SYMBOL_KINDS, ...EXTENDED_SYMBOL_KINDS]; + +// Backward compat: ALL_SYMBOL_KINDS stays as the core 10 +export const ALL_SYMBOL_KINDS = CORE_SYMBOL_KINDS; + export const VALID_ROLES = ['entry', 'core', 'utility', 'adapter', 'dead', 'leaf']; /** @@ -190,6 +207,12 @@ export function kindIcon(kind) { return 'I'; case 'type': return 'T'; + case 'parameter': + return 'p'; + case 'property': + return '.'; + case 'constant': + return 'C'; default: return '-'; } @@ -2224,6 +2247,17 @@ export function contextData(name, customDbPath, opts = {}) { /* table may not exist */ } + // Children (parameters, properties, constants) + let nodeChildren = []; + try { + nodeChildren = db + .prepare('SELECT name, kind, line, end_line FROM nodes WHERE parent_id = ? ORDER BY line') + .all(node.id) + .map((c) => ({ name: c.name, kind: c.kind, line: c.line, endLine: c.end_line || null })); + } catch { + /* parent_id column may not exist */ + } + return { name: node.name, kind: node.kind, @@ -2234,6 +2268,7 @@ export function contextData(name, customDbPath, opts = {}) { source, signature, complexity: complexityMetrics, + children: nodeChildren.length > 0 ? nodeChildren : undefined, callees, callers, relatedTests, @@ -2273,6 +2308,15 @@ export function context(name, customDbPath, opts = {}) { console.log(); } + // Children + if (r.children && r.children.length > 0) { + console.log(`## Children (${r.children.length})`); + for (const c of r.children) { + console.log(` ${kindIcon(c.kind)} ${c.name} :${c.line}`); + } + console.log(); + } + // Complexity if (r.complexity) { const cx = r.complexity; @@ -2345,6 +2389,69 @@ export function context(name, customDbPath, opts = {}) { } } +// ─── childrenData ─────────────────────────────────────────────────────── + +export function childrenData(name, customDbPath, opts = {}) { + const db = openReadonlyOrFail(customDbPath); + const noTests = opts.noTests || false; + + const nodes = findMatchingNodes(db, name, { noTests, file: opts.file, kind: opts.kind }); + if (nodes.length === 0) { + db.close(); + return { name, results: [] }; + } + + const results = nodes.map((node) => { + let children; + try { + children = db + .prepare('SELECT name, kind, line, end_line FROM nodes WHERE parent_id = ? ORDER BY line') + .all(node.id); + } catch { + children = []; + } + if (noTests) children = children.filter((c) => !isTestFile(c.file || node.file)); + return { + name: node.name, + kind: node.kind, + file: node.file, + line: node.line, + children: children.map((c) => ({ + name: c.name, + kind: c.kind, + line: c.line, + endLine: c.end_line || null, + })), + }; + }); + + db.close(); + const base = { name, results }; + return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset }); +} + +export function children(name, customDbPath, opts = {}) { + const data = childrenData(name, customDbPath, opts); + if (opts.json) { + console.log(JSON.stringify(data, null, 2)); + return; + } + if (data.results.length === 0) { + console.log(`No symbol matching "${name}"`); + return; + } + for (const r of data.results) { + console.log(`\n${kindIcon(r.kind)} ${r.name} ${r.file}:${r.line}`); + if (r.children.length === 0) { + console.log(' (no children)'); + } else { + for (const c of r.children) { + console.log(` ${kindIcon(c.kind)} ${c.name} :${c.line}`); + } + } + } +} + // ─── explainData ──────────────────────────────────────────────────────── function isFileLikeTarget(target) { diff --git a/tests/integration/build-parity.test.js b/tests/integration/build-parity.test.js index 94097e7f..5651a61b 100644 --- a/tests/integration/build-parity.test.js +++ b/tests/integration/build-parity.test.js @@ -76,9 +76,14 @@ describeOrSkip('Build parity: native vs WASM', () => { }); it('produces identical nodes', () => { + // Filter out extended kinds (parameter, property, constant) — WASM extracts + // these as children but native engine defers child extraction for now. + const EXTENDED = new Set(['parameter', 'property', 'constant']); + const filterCore = (nodes) => nodes.filter((n) => !EXTENDED.has(n.kind)); + const wasmGraph = readGraph(path.join(wasmDir, '.codegraph', 'graph.db')); const nativeGraph = readGraph(path.join(nativeDir, '.codegraph', 'graph.db')); - expect(nativeGraph.nodes).toEqual(wasmGraph.nodes); + expect(filterCore(nativeGraph.nodes)).toEqual(filterCore(wasmGraph.nodes)); }); it('produces identical edges', () => { diff --git a/tests/parsers/csharp.test.js b/tests/parsers/csharp.test.js index f49913d2..e8031262 100644 --- a/tests/parsers/csharp.test.js +++ b/tests/parsers/csharp.test.js @@ -108,7 +108,7 @@ public class Foo {}`); public string Name { get; set; } }`); expect(symbols.definitions).toContainEqual( - expect.objectContaining({ name: 'User.Name', kind: 'method' }), + expect.objectContaining({ name: 'User.Name', kind: 'property' }), ); }); }); diff --git a/tests/parsers/extended-kinds.test.js b/tests/parsers/extended-kinds.test.js new file mode 100644 index 00000000..266ac44a --- /dev/null +++ b/tests/parsers/extended-kinds.test.js @@ -0,0 +1,504 @@ +/** + * Extended kind extraction tests (parameters, properties, constants). + * + * Validates that each language extractor populates the `children` array + * on definitions with parameter, property, and constant entries. + */ +import { beforeAll, describe, expect, it } from 'vitest'; +import { + createParsers, + extractCSharpSymbols, + extractGoSymbols, + extractJavaSymbols, + extractPHPSymbols, + extractPythonSymbols, + extractRubySymbols, + extractRustSymbols, + extractSymbols, +} from '../../src/parser.js'; + +// ── JavaScript ────────────────────────────────────────────────────────────── + +describe('JavaScript extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseJS(code) { + const parser = parsers.get('javascript'); + const tree = parser.parse(code); + return extractSymbols(tree, 'test.js'); + } + + describe('parameter extraction', () => { + it('extracts parameters from function declarations', () => { + const symbols = parseJS('function greet(name, age) { }'); + const greet = symbols.definitions.find((d) => d.name === 'greet'); + expect(greet).toBeDefined(); + expect(greet.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'name', kind: 'parameter' }), + expect.objectContaining({ name: 'age', kind: 'parameter' }), + ]), + ); + }); + + it('extracts parameters from arrow functions', () => { + const symbols = parseJS('const add = (a, b) => a + b;'); + const add = symbols.definitions.find((d) => d.name === 'add'); + expect(add).toBeDefined(); + expect(add.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'a', kind: 'parameter' }), + expect.objectContaining({ name: 'b', kind: 'parameter' }), + ]), + ); + }); + + it('extracts parameters from class methods', () => { + const symbols = parseJS('class Foo { bar(x, y) {} }'); + const bar = symbols.definitions.find((d) => d.name === 'Foo.bar'); + expect(bar).toBeDefined(); + expect(bar.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'x', kind: 'parameter' }), + expect.objectContaining({ name: 'y', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts class field properties', () => { + const symbols = parseJS('class User { name; age; greet() {} }'); + const user = symbols.definitions.find((d) => d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'name', kind: 'property' }), + expect.objectContaining({ name: 'age', kind: 'property' }), + ]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts constant definitions from const declarations', () => { + const symbols = parseJS('const MAX = 100;'); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'MAX', kind: 'constant' }), + ); + }); + }); +}); + +// ── Python ────────────────────────────────────────────────────────────────── + +describe('Python extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parsePython(code) { + const parser = parsers.get('python'); + if (!parser) throw new Error('Python parser not available'); + const tree = parser.parse(code); + return extractPythonSymbols(tree, 'test.py'); + } + + describe('parameter extraction', () => { + it('extracts parameters from function definitions', () => { + const symbols = parsePython('def greet(name, age=30):\n pass'); + const greet = symbols.definitions.find((d) => d.name === 'greet'); + expect(greet).toBeDefined(); + expect(greet.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'name', kind: 'parameter' }), + expect.objectContaining({ name: 'age', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts properties from __init__ self assignments', () => { + const symbols = parsePython( + ['class User:', ' def __init__(self, x, y):', ' self.x = x', ' self.y = y'].join( + '\n', + ), + ); + const user = symbols.definitions.find((d) => d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'x', kind: 'property' }), + expect.objectContaining({ name: 'y', kind: 'property' }), + ]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts module-level UPPER_CASE constants', () => { + const symbols = parsePython('MAX_RETRIES = 3'); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'MAX_RETRIES', kind: 'constant' }), + ); + }); + }); +}); + +// ── Go ────────────────────────────────────────────────────────────────────── + +describe('Go extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseGo(code) { + const parser = parsers.get('go'); + if (!parser) throw new Error('Go parser not available'); + const tree = parser.parse(code); + return extractGoSymbols(tree, 'test.go'); + } + + describe('parameter extraction', () => { + it('extracts parameters from function declarations', () => { + const symbols = parseGo('package main\nfunc add(a int, b int) int { return a + b }'); + const add = symbols.definitions.find((d) => d.name === 'add'); + expect(add).toBeDefined(); + expect(add.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'a', kind: 'parameter' }), + expect.objectContaining({ name: 'b', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts struct fields as properties', () => { + const symbols = parseGo('package main\ntype User struct {\n Name string\n Age int\n}'); + const user = symbols.definitions.find((d) => d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'Name', kind: 'property' }), + expect.objectContaining({ name: 'Age', kind: 'property' }), + ]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts const declarations', () => { + const symbols = parseGo('package main\nconst MaxRetries = 3'); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'MaxRetries', kind: 'constant' }), + ); + }); + }); +}); + +// ── Rust ───────────────────────────────────────────────────────────────────── + +describe('Rust extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseRust(code) { + const parser = parsers.get('rust'); + if (!parser) throw new Error('Rust parser not available'); + const tree = parser.parse(code); + return extractRustSymbols(tree, 'test.rs'); + } + + describe('parameter extraction', () => { + it('extracts parameters from function declarations', () => { + const symbols = parseRust('fn add(a: i32, b: i32) -> i32 { a + b }'); + const add = symbols.definitions.find((d) => d.name === 'add'); + expect(add).toBeDefined(); + expect(add.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'a', kind: 'parameter' }), + expect.objectContaining({ name: 'b', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts struct fields as properties', () => { + const symbols = parseRust('struct User { name: String, age: u32 }'); + const user = symbols.definitions.find((d) => d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'name', kind: 'property' }), + expect.objectContaining({ name: 'age', kind: 'property' }), + ]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts const item declarations', () => { + const symbols = parseRust('const MAX: i32 = 100;'); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'MAX', kind: 'constant' }), + ); + }); + + it('extracts enum variants as constant children', () => { + const symbols = parseRust('enum Color { Red, Green, Blue }'); + const color = symbols.definitions.find((d) => d.name === 'Color'); + expect(color).toBeDefined(); + expect(color.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'Red', kind: 'constant' }), + expect.objectContaining({ name: 'Green', kind: 'constant' }), + expect.objectContaining({ name: 'Blue', kind: 'constant' }), + ]), + ); + }); + }); +}); + +// ── Java ───────────────────────────────────────────────────────────────────── + +describe('Java extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseJava(code) { + const parser = parsers.get('java'); + if (!parser) throw new Error('Java parser not available'); + const tree = parser.parse(code); + return extractJavaSymbols(tree, 'Test.java'); + } + + describe('parameter extraction', () => { + it('extracts method parameters', () => { + const symbols = parseJava('class Foo { void bar(int x, String y) {} }'); + const bar = symbols.definitions.find((d) => d.name === 'Foo.bar'); + expect(bar).toBeDefined(); + expect(bar.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'x', kind: 'parameter' }), + expect.objectContaining({ name: 'y', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts class field declarations as properties', () => { + const symbols = parseJava('class User { String name; int age; }'); + const user = symbols.definitions.find((d) => d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'name', kind: 'property' }), + expect.objectContaining({ name: 'age', kind: 'property' }), + ]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts enum constants as children', () => { + const symbols = parseJava('enum Status { ACTIVE, INACTIVE }'); + const status = symbols.definitions.find((d) => d.name === 'Status'); + expect(status).toBeDefined(); + expect(status.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'ACTIVE', kind: 'constant' }), + expect.objectContaining({ name: 'INACTIVE', kind: 'constant' }), + ]), + ); + }); + }); +}); + +// ── C# ────────────────────────────────────────────────────────────────────── + +describe('C# extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseCSharp(code) { + const parser = parsers.get('csharp'); + if (!parser) throw new Error('C# parser not available'); + const tree = parser.parse(code); + return extractCSharpSymbols(tree, 'Test.cs'); + } + + describe('parameter extraction', () => { + it('extracts method parameters', () => { + const symbols = parseCSharp('class Foo { void Bar(int x, string y) {} }'); + const bar = symbols.definitions.find((d) => d.name === 'Foo.Bar'); + expect(bar).toBeDefined(); + expect(bar.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'x', kind: 'parameter' }), + expect.objectContaining({ name: 'y', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts class field declarations as properties', () => { + const symbols = parseCSharp('class User { string Name; int Age; }'); + const user = symbols.definitions.find((d) => d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'Name', kind: 'property' }), + expect.objectContaining({ name: 'Age', kind: 'property' }), + ]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts enum member declarations as constants', () => { + const symbols = parseCSharp('enum Status { Active, Inactive }'); + const status = symbols.definitions.find((d) => d.name === 'Status'); + expect(status).toBeDefined(); + expect(status.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'Active', kind: 'constant' }), + expect.objectContaining({ name: 'Inactive', kind: 'constant' }), + ]), + ); + }); + }); +}); + +// ── Ruby ───────────────────────────────────────────────────────────────────── + +describe('Ruby extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseRuby(code) { + const parser = parsers.get('ruby'); + if (!parser) throw new Error('Ruby parser not available'); + const tree = parser.parse(code); + return extractRubySymbols(tree, 'test.rb'); + } + + describe('parameter extraction', () => { + it('extracts method parameters', () => { + const symbols = parseRuby('def greet(name, age)\nend'); + const greet = symbols.definitions.find((d) => d.name === 'greet'); + expect(greet).toBeDefined(); + expect(greet.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'name', kind: 'parameter' }), + expect.objectContaining({ name: 'age', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts instance variable assignments as properties', () => { + const symbols = parseRuby('class User\n @name = nil\nend'); + const user = symbols.definitions.find((d) => d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([expect.objectContaining({ name: '@name', kind: 'property' })]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts class-level constant assignments', () => { + const symbols = parseRuby('class Foo\n MAX = 100\nend'); + const foo = symbols.definitions.find((d) => d.name === 'Foo'); + expect(foo).toBeDefined(); + expect(foo.children).toEqual( + expect.arrayContaining([expect.objectContaining({ name: 'MAX', kind: 'constant' })]), + ); + }); + }); +}); + +// ── PHP ────────────────────────────────────────────────────────────────────── + +describe('PHP extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parsePHP(code) { + const parser = parsers.get('php'); + if (!parser) throw new Error('PHP parser not available'); + const tree = parser.parse(code); + return extractPHPSymbols(tree, 'test.php'); + } + + describe('parameter extraction', () => { + it('extracts function parameters', () => { + const symbols = parsePHP(' d.name === 'greet'); + expect(greet).toBeDefined(); + expect(greet.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: '$name', kind: 'parameter' }), + expect.objectContaining({ name: '$age', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts class property declarations', () => { + const symbols = parsePHP(' d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: '$name', kind: 'property' }), + expect.objectContaining({ name: '$age', kind: 'property' }), + ]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts enum case declarations as constants', () => { + const symbols = parsePHP(' d.name === 'Status'); + expect(status).toBeDefined(); + expect(status.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'Active', kind: 'constant' }), + expect.objectContaining({ name: 'Inactive', kind: 'constant' }), + ]), + ); + }); + }); +}); diff --git a/tests/unit/mcp.test.js b/tests/unit/mcp.test.js index fc610c4b..3b38f590 100644 --- a/tests/unit/mcp.test.js +++ b/tests/unit/mcp.test.js @@ -16,6 +16,7 @@ const ALL_TOOL_NAMES = [ 'module_map', 'fn_impact', 'context', + 'symbol_children', 'explain', 'where', 'diff_impact', @@ -249,6 +250,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(() => ({ name: 'test', results: [] })), fnImpactData: vi.fn(() => ({ name: 'test', results: [] })), contextData: vi.fn(() => ({ name: 'test', results: [] })), + childrenData: vi.fn(() => ({ name: 'test', results: [] })), explainData: vi.fn(() => ({ target: 'test', kind: 'function', results: [] })), whereData: vi.fn(() => ({ target: 'test', mode: 'symbol', results: [] })), diffImpactData: vi.fn(() => ({ changedFiles: 0, affectedFunctions: [] })), @@ -312,6 +314,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: fnDepsMock, fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -371,6 +374,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: fnImpactMock, contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -427,6 +431,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: diffImpactMock, @@ -486,6 +491,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -546,6 +552,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: fnDepsMock, fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -604,6 +611,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -656,6 +664,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -710,6 +719,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: fnDepsMock, fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -774,6 +784,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -831,6 +842,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -879,6 +891,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -927,6 +940,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -975,6 +989,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), @@ -1024,6 +1039,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), From cec075ab563a1771ab6517fc70de1e42d41430db Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 20:03:10 -0700 Subject: [PATCH 20/30] =?UTF-8?q?feat:=20add=20expanded=20edge=20types=20?= =?UTF-8?q?=E2=80=94=20contains,=20parameter=5Fof,=20receiver=20(Phase=202?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Build file→definition and parent→child contains edges, parameter_of inverse edges, and receiver edges for method-call dispatch. Add CORE_EDGE_KINDS, STRUCTURAL_EDGE_KINDS, EVERY_EDGE_KIND constants. Exclude structural edges from moduleMapData coupling counts. Scope directory contains-edge cleanup to preserve symbol-level edges. Impact: 3 functions changed, 22 affected --- src/builder.js | 62 ++++++++++++++---- src/index.js | 3 + src/mcp.js | 4 +- src/queries.js | 24 ++++++- src/structure.js | 5 +- tests/integration/build-parity.test.js | 25 +++++++- tests/integration/queries.test.js | 87 +++++++++++++++++++++++++- 7 files changed, 187 insertions(+), 23 deletions(-) diff --git a/src/builder.js b/src/builder.js index 7a916647..79fd9d47 100644 --- a/src/builder.js +++ b/src/builder.js @@ -598,20 +598,32 @@ export async function buildGraph(rootDir, opts = {}) { fileSymbols.set(relPath, symbols); insertNode.run(relPath, 'file', relPath, 0, null, null); + const fileRow = getNodeId.get(relPath, 'file', relPath, 0); for (const def of symbols.definitions) { insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null, null); - if (def.children?.length) { - const parentRow = getNodeId.get(def.name, def.kind, relPath, def.line); - if (parentRow) { - for (const child of def.children) { - insertNode.run( - child.name, - child.kind, - relPath, - child.line, - child.endLine || null, - parentRow.id, - ); + const defRow = getNodeId.get(def.name, def.kind, relPath, def.line); + // File → top-level definition contains edge + if (fileRow && defRow) { + insertEdge.run(fileRow.id, defRow.id, 'contains', 1.0, 0); + } + if (def.children?.length && defRow) { + for (const child of def.children) { + insertNode.run( + child.name, + child.kind, + relPath, + child.line, + child.endLine || null, + defRow.id, + ); + // Parent → child contains edge + const childRow = getNodeId.get(child.name, child.kind, relPath, child.line); + if (childRow) { + insertEdge.run(defRow.id, childRow.id, 'contains', 1.0, 0); + // Parameter → parent parameter_of edge (inverse direction) + if (child.kind === 'parameter') { + insertEdge.run(childRow.id, defRow.id, 'parameter_of', 1.0, 0); + } } } } @@ -797,7 +809,7 @@ export async function buildGraph(rootDir, opts = {}) { // N+1 optimization: pre-load all nodes into a lookup map for edge building const allNodes = db .prepare( - `SELECT id, name, kind, file FROM nodes WHERE kind IN ('function','method','class','interface')`, + `SELECT id, name, kind, file FROM nodes WHERE kind IN ('function','method','class','interface','struct','type','module','enum','trait')`, ) .all(); const nodesByName = new Map(); @@ -956,6 +968,30 @@ export async function buildGraph(rootDir, opts = {}) { edgeCount++; } } + + // Receiver edge: caller → receiver type node + if ( + call.receiver && + !BUILTIN_RECEIVERS.has(call.receiver) && + call.receiver !== 'this' && + call.receiver !== 'self' && + call.receiver !== 'super' + ) { + const receiverKinds = new Set(['class', 'struct', 'interface', 'type', 'module']); + // Same-file first, then global + const samefile = nodesByNameAndFile.get(`${call.receiver}|${relPath}`) || []; + const candidates = samefile.length > 0 ? samefile : nodesByName.get(call.receiver) || []; + const receiverNodes = candidates.filter((n) => receiverKinds.has(n.kind)); + if (receiverNodes.length > 0 && caller) { + const recvTarget = receiverNodes[0]; + const recvKey = `recv|${caller.id}|${recvTarget.id}`; + if (!seenCallEdges.has(recvKey)) { + seenCallEdges.add(recvKey); + insertEdge.run(caller.id, recvTarget.id, 'receiver', 0.7, 0); + edgeCount++; + } + } + } } // Class extends edges (use pre-loaded maps instead of inline DB queries) diff --git a/src/index.js b/src/index.js index 973d2475..6774d54b 100644 --- a/src/index.js +++ b/src/index.js @@ -107,11 +107,13 @@ export { getActiveEngine, parseFileAuto, parseFilesAuto } from './parser.js'; // Query functions (data-returning) export { ALL_SYMBOL_KINDS, + CORE_EDGE_KINDS, CORE_SYMBOL_KINDS, childrenData, contextData, diffImpactData, diffImpactMermaid, + EVERY_EDGE_KIND, EVERY_SYMBOL_KIND, EXTENDED_SYMBOL_KINDS, explainData, @@ -130,6 +132,7 @@ export { pathData, queryNameData, rolesData, + STRUCTURAL_EDGE_KINDS, statsData, VALID_ROLES, whereData, diff --git a/src/mcp.js b/src/mcp.js index d02cdf29..cd0b8808 100644 --- a/src/mcp.js +++ b/src/mcp.js @@ -9,7 +9,7 @@ import { createRequire } from 'node:module'; import { findCycles } from './cycles.js'; import { findDbPath } from './db.js'; import { MCP_DEFAULTS, MCP_MAX_LIMIT } from './paginate.js'; -import { diffImpactMermaid, EVERY_SYMBOL_KIND, VALID_ROLES } from './queries.js'; +import { diffImpactMermaid, EVERY_EDGE_KIND, EVERY_SYMBOL_KIND, VALID_ROLES } from './queries.js'; const REPO_PROP = { repo: { @@ -53,7 +53,7 @@ const BASE_TOOLS = [ to: { type: 'string', description: 'Target symbol for path mode (required in path mode)' }, edge_kinds: { type: 'array', - items: { type: 'string' }, + items: { type: 'string', enum: EVERY_EDGE_KIND }, description: 'Edge kinds to follow in path mode (default: ["calls"])', }, reverse: { diff --git a/src/queries.js b/src/queries.js index dc1fb1ad..6d094108 100644 --- a/src/queries.js +++ b/src/queries.js @@ -89,6 +89,24 @@ export const EVERY_SYMBOL_KIND = [...CORE_SYMBOL_KINDS, ...EXTENDED_SYMBOL_KINDS // Backward compat: ALL_SYMBOL_KINDS stays as the core 10 export const ALL_SYMBOL_KINDS = CORE_SYMBOL_KINDS; +// ── Edge kind constants ───────────────────────────────────────────── +// Core edge kinds — coupling and dependency relationships +export const CORE_EDGE_KINDS = [ + 'imports', + 'imports-type', + 'reexports', + 'calls', + 'extends', + 'implements', + 'contains', +]; + +// Structural edge kinds — parent/child and type relationships +export const STRUCTURAL_EDGE_KINDS = ['parameter_of', 'receiver']; + +// Full set for MCP enum and validation +export const EVERY_EDGE_KIND = [...CORE_EDGE_KINDS, ...STRUCTURAL_EDGE_KINDS]; + export const VALID_ROLES = ['entry', 'core', 'utility', 'adapter', 'dead', 'leaf']; /** @@ -348,12 +366,12 @@ export function moduleMapData(customDbPath, limit = 20, opts = {}) { const nodes = db .prepare(` SELECT n.*, - (SELECT COUNT(*) FROM edges WHERE source_id = n.id AND kind != 'contains') as out_edges, - (SELECT COUNT(*) FROM edges WHERE target_id = n.id AND kind != 'contains') as in_edges + (SELECT COUNT(*) FROM edges WHERE source_id = n.id AND kind NOT IN ('contains', 'parameter_of', 'receiver')) as out_edges, + (SELECT COUNT(*) FROM edges WHERE target_id = n.id AND kind NOT IN ('contains', 'parameter_of', 'receiver')) as in_edges FROM nodes n WHERE n.kind = 'file' ${testFilter} - ORDER BY (SELECT COUNT(*) FROM edges WHERE target_id = n.id AND kind != 'contains') DESC + ORDER BY (SELECT COUNT(*) FROM edges WHERE target_id = n.id AND kind NOT IN ('contains', 'parameter_of', 'receiver')) DESC LIMIT ? `) .all(limit); diff --git a/src/structure.js b/src/structure.js index a4c28f41..6169795d 100644 --- a/src/structure.js +++ b/src/structure.js @@ -34,8 +34,11 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director `); // Clean previous directory nodes/edges (idempotent rebuild) + // Scope contains-edge delete to directory-sourced edges only, + // preserving symbol-level contains edges (file→def, class→method, etc.) db.exec(` - DELETE FROM edges WHERE kind = 'contains'; + DELETE FROM edges WHERE kind = 'contains' + AND source_id IN (SELECT id FROM nodes WHERE kind = 'directory'); DELETE FROM node_metrics; DELETE FROM nodes WHERE kind = 'directory'; `); diff --git a/tests/integration/build-parity.test.js b/tests/integration/build-parity.test.js index 5651a61b..7811f6df 100644 --- a/tests/integration/build-parity.test.js +++ b/tests/integration/build-parity.test.js @@ -87,8 +87,27 @@ describeOrSkip('Build parity: native vs WASM', () => { }); it('produces identical edges', () => { - const wasmGraph = readGraph(path.join(wasmDir, '.codegraph', 'graph.db')); - const nativeGraph = readGraph(path.join(nativeDir, '.codegraph', 'graph.db')); - expect(nativeGraph.edges).toEqual(wasmGraph.edges); + // Filter out edges involving extended-kind nodes (parameter, property, constant) + // — WASM extracts children but native engine defers child extraction for now. + function readCoreEdges(dbPath) { + const db = new Database(dbPath, { readonly: true }); + const edges = db + .prepare(` + SELECT n1.name AS source_name, n2.name AS target_name, e.kind + FROM edges e + JOIN nodes n1 ON e.source_id = n1.id + JOIN nodes n2 ON e.target_id = n2.id + WHERE n1.kind NOT IN ('parameter', 'property', 'constant') + AND n2.kind NOT IN ('parameter', 'property', 'constant') + ORDER BY n1.name, n2.name, e.kind + `) + .all(); + db.close(); + return edges; + } + + const wasmEdges = readCoreEdges(path.join(wasmDir, '.codegraph', 'graph.db')); + const nativeEdges = readCoreEdges(path.join(nativeDir, '.codegraph', 'graph.db')); + expect(nativeEdges).toEqual(wasmEdges); }); }); diff --git a/tests/integration/queries.test.js b/tests/integration/queries.test.js index 0bb3b7dc..af288060 100644 --- a/tests/integration/queries.test.js +++ b/tests/integration/queries.test.js @@ -103,6 +103,24 @@ beforeAll(() => { // Low-confidence call edge for quality tests insertEdge(db, formatResponse, validateToken, 'calls', 0.3); + // ── Phase 2: expanded node/edge types ────────────────────────────── + // Class with method and property children + const userService = insertNode(db, 'UserService', 'class', 'auth.js', 40); + const getUser = insertNode(db, 'UserService.getUser', 'method', 'auth.js', 42); + const dbConn = insertNode(db, 'dbConn', 'property', 'auth.js', 41); + const userId = insertNode(db, 'userId', 'parameter', 'auth.js', 10); + + // Symbol-level contains edges (file → class, class → method/property) + insertEdge(db, fAuth, userService, 'contains'); + insertEdge(db, userService, getUser, 'contains'); + insertEdge(db, userService, dbConn, 'contains'); + + // parameter_of edge (parameter → owning function) + insertEdge(db, userId, authenticate, 'parameter_of'); + + // receiver edge (caller → receiver type) + insertEdge(db, handleRoute, userService, 'receiver', 0.7); + // File hashes (for fileHash exposure) for (const f of ['auth.js', 'middleware.js', 'routes.js', 'utils.js', 'auth.test.js']) { db.prepare('INSERT INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)').run( @@ -448,7 +466,7 @@ describe('explainData', () => { const r = data.results[0]; expect(r.file).toBe('auth.js'); - expect(r.symbolCount).toBe(2); + expect(r.symbolCount).toBe(6); // Both authenticate and validateToken are called from middleware.js expect(r.publicApi.map((s) => s.name)).toContain('authenticate'); expect(r.publicApi.map((s) => s.name)).toContain('validateToken'); @@ -661,6 +679,73 @@ describe('noTests filtering', () => { }); }); +// ─── Expanded edge types (Phase 2) ───────────────────────────────────── + +describe('expanded edge types', () => { + test('statsData counts new edge kinds', () => { + const data = statsData(dbPath); + expect(data.edges.byKind.contains).toBeGreaterThanOrEqual(3); + expect(data.edges.byKind.parameter_of).toBeGreaterThanOrEqual(1); + expect(data.edges.byKind.receiver).toBeGreaterThanOrEqual(1); + }); + + test('moduleMapData excludes structural edges from coupling', () => { + const data = moduleMapData(dbPath); + // auth.js has contains, parameter_of, receiver edges but they should + // not inflate coupling counts — only imports/calls/etc. count + const authNode = data.topNodes.find((n) => n.file === 'auth.js'); + expect(authNode).toBeDefined(); + // in_edges should not include contains/parameter_of/receiver + // auth.js is imported by middleware.js and auth.test.js → in_edges = 2 + expect(authNode.inEdges).toBe(2); + }); + + test('queryNameData returns new edge kinds in callers/callees', () => { + // authenticate has a parameter_of edge from userId + const authData = queryNameData('authenticate', dbPath); + const fn = authData.results.find((r) => r.kind === 'function' && r.name === 'authenticate'); + expect(fn).toBeDefined(); + const paramCaller = fn.callers.find((c) => c.edgeKind === 'parameter_of'); + expect(paramCaller).toBeDefined(); + expect(paramCaller.name).toBe('userId'); + + // UserService has contains callees (method and property) + const usData = queryNameData('UserService', dbPath); + const cls = usData.results.find((r) => r.kind === 'class' && r.name === 'UserService'); + expect(cls).toBeDefined(); + const containsCallees = cls.callees.filter((c) => c.edgeKind === 'contains'); + expect(containsCallees.length).toBeGreaterThanOrEqual(2); + const names = containsCallees.map((c) => c.name); + expect(names).toContain('UserService.getUser'); + expect(names).toContain('dbConn'); + + // UserService has a receiver caller (handleRoute) + const receiverCaller = cls.callers.find((c) => c.edgeKind === 'receiver'); + expect(receiverCaller).toBeDefined(); + expect(receiverCaller.name).toBe('handleRoute'); + }); + + test('pathData traverses contains edges', () => { + const data = pathData('UserService', 'UserService.getUser', dbPath, { + edgeKinds: ['contains'], + }); + expect(data.found).toBe(true); + expect(data.hops).toBe(1); + expect(data.path[0].name).toBe('UserService'); + expect(data.path[1].name).toBe('UserService.getUser'); + expect(data.path[1].edgeKind).toBe('contains'); + }); + + test('pathData traverses receiver edges', () => { + const data = pathData('handleRoute', 'UserService', dbPath, { + edgeKinds: ['receiver'], + }); + expect(data.found).toBe(true); + expect(data.hops).toBe(1); + expect(data.path[1].edgeKind).toBe('receiver'); + }); +}); + // ─── Stable symbol schema conformance ────────────────────────────────── const STABLE_FIELDS = ['name', 'kind', 'file', 'line', 'endLine', 'role', 'fileHash']; From f8f045c8127d194a95117f613d5fcd9de7c22610 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 20:06:12 -0700 Subject: [PATCH 21/30] chore: add pre-commit diff-impact hook (#271) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add show-diff-impact.sh that automatically runs `codegraph diff-impact --staged -T` before git commit commands. The hook injects blast radius info as additionalContext — informational only, never blocks commits. --- .claude/hooks/show-diff-impact.sh | 70 +++++++++++++++++++++++++++++++ .claude/settings.json | 5 +++ 2 files changed, 75 insertions(+) create mode 100644 .claude/hooks/show-diff-impact.sh diff --git a/.claude/hooks/show-diff-impact.sh b/.claude/hooks/show-diff-impact.sh new file mode 100644 index 00000000..e3c583f7 --- /dev/null +++ b/.claude/hooks/show-diff-impact.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# show-diff-impact.sh — PreToolUse hook for Bash (git commit) +# Runs `codegraph diff-impact --staged -T` before commits and injects +# the impact summary as additionalContext. Informational only — never blocks. + +set -euo pipefail + +INPUT=$(cat) + +# Extract the command from tool_input JSON +COMMAND=$(echo "$INPUT" | node -e " + let d=''; + process.stdin.on('data',c=>d+=c); + process.stdin.on('end',()=>{ + const p=JSON.parse(d).tool_input?.command||''; + if(p)process.stdout.write(p); + }); +" 2>/dev/null) || true + +if [ -z "$COMMAND" ]; then + exit 0 +fi + +# Only trigger on git commit commands +if ! echo "$COMMAND" | grep -qE '(^|\s|&&\s*)git\s+commit\b'; then + exit 0 +fi + +# Guard: codegraph DB must exist +WORK_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) || WORK_ROOT="${CLAUDE_PROJECT_DIR:-.}" +if [ ! -f "$WORK_ROOT/.codegraph/graph.db" ]; then + exit 0 +fi + +# Guard: must have staged changes +STAGED=$(git diff --cached --name-only 2>/dev/null) || true +if [ -z "$STAGED" ]; then + exit 0 +fi + +# Run diff-impact and capture output +IMPACT=$(node "$WORK_ROOT/src/cli.js" diff-impact --staged -T 2>/dev/null) || true + +if [ -z "$IMPACT" ]; then + exit 0 +fi + +# Escape for JSON embedding +ESCAPED=$(printf '%s' "$IMPACT" | node -e " + let d=''; + process.stdin.on('data',c=>d+=c); + process.stdin.on('end',()=>process.stdout.write(JSON.stringify(d))); +" 2>/dev/null) || true + +if [ -z "$ESCAPED" ]; then + exit 0 +fi + +# Inject as additionalContext — never block +node -e " + console.log(JSON.stringify({ + hookSpecificOutput: { + hookEventName: 'PreToolUse', + permissionDecision: 'allow', + additionalContext: '[codegraph diff-impact] Pre-commit blast radius:\\n' + JSON.parse(process.argv[1]) + } + })); +" "$ESCAPED" 2>/dev/null || true + +exit 0 diff --git a/.claude/settings.json b/.claude/settings.json index 9d7e609b..4ffe2530 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -13,6 +13,11 @@ "type": "command", "command": "bash \"$CLAUDE_PROJECT_DIR/.claude/hooks/guard-git.sh\"", "timeout": 10 + }, + { + "type": "command", + "command": "bash \"$CLAUDE_PROJECT_DIR/.claude/hooks/show-diff-impact.sh\"", + "timeout": 15 } ] }, From 115fefaee82914d4aff1e9dd39fe733b952b2807 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 20:15:53 -0700 Subject: [PATCH 22/30] feat(export): add GraphML, GraphSON, Neo4j CSV and interactive viewer (#268) * feat(export): add GraphML, GraphSON, Neo4j CSV formats and interactive HTML viewer Add three new export formats for graph database interoperability: - GraphML (XML standard) with file-level and function-level modes - GraphSON (TinkerPop v3) for Gremlin/JanusGraph compatibility - Neo4j CSV (bulk import) with separate nodes/relationships files Add interactive HTML viewer (`codegraph plot`) powered by vis-network: - Hierarchical, force, and radial layouts with physics toggle - Node coloring by kind or role, search/filter, legend panel - Configurable via .plotDotCfg JSON file Update CLI export command, MCP export_graph tool, and programmatic API to support all six formats. Impact: 12 functions changed, 6 affected * feat(plot): add drill-down, clustering, complexity overlays, and detail panel Evolve the plot command from a static viewer into an interactive exploration tool with rich data overlays and navigation. Data preparation: - Extract prepareGraphData() with complexity, fan-in/fan-out, Louvain community detection, directory derivation, and risk flag computation - Seed strategies: all (default), top-fanin, entry Interactive features: - Detail sidebar: metrics, callers/callees lists, risk badges - Drill-down: click-to-expand / double-click-to-collapse neighbors - Clustering: community and directory grouping via vis-network API - Color by: kind, role, community, complexity (MI-based borders) - Size by: uniform, fan-in, fan-out, complexity - Risk overlay: dead-code (dashed), high-blast-radius (shadow), low-MI CLI options: - --cluster, --overlay, --seed, --seed-count, --size-by, --color-by Tests expanded from 7 to 21 covering all new data enrichment, seed strategies, risk flags, UI elements, and config backward compatibility. Impact: 5 functions changed, 3 affected * fix(test): update MCP export_graph enum to include new formats The previous commit added graphml, graphson, and neo4j export formats to the MCP tool definition but did not update the test assertion. * style: format mcp test after enum update * fix(security): escape config values in HTML template to prevent XSS Use JSON.stringify() for cfg.layout.direction, effectiveColorBy, and cfg.clusterBy when interpolated into inline JavaScript. Replace shell exec() with execFile() for browser-open to avoid path injection. Impact: 1 functions changed, 1 affected --- src/cli.js | 111 ++++- src/export.js | 305 ++++++++++++ src/index.js | 13 +- src/mcp.js | 29 +- src/viewer.js | 948 +++++++++++++++++++++++++++++++++++++ tests/graph/export.test.js | 205 +++++++- tests/graph/viewer.test.js | 360 ++++++++++++++ tests/unit/mcp.test.js | 9 +- 8 files changed, 1969 insertions(+), 11 deletions(-) create mode 100644 src/viewer.js create mode 100644 tests/graph/viewer.test.js diff --git a/src/cli.js b/src/cli.js index ddd853aa..d3b36f74 100644 --- a/src/cli.js +++ b/src/cli.js @@ -16,7 +16,14 @@ import { MODELS, search, } from './embedder.js'; -import { exportDOT, exportJSON, exportMermaid } from './export.js'; +import { + exportDOT, + exportGraphML, + exportGraphSON, + exportJSON, + exportMermaid, + exportNeo4jCSV, +} from './export.js'; import { setVerbose } from './logger.js'; import { printNdjson } from './paginate.js'; import { @@ -413,9 +420,13 @@ program program .command('export') - .description('Export dependency graph as DOT (Graphviz), Mermaid, or JSON') + .description('Export dependency graph as DOT, Mermaid, JSON, GraphML, GraphSON, or Neo4j CSV') .option('-d, --db ', 'Path to graph.db') - .option('-f, --format ', 'Output format: dot, mermaid, json', 'dot') + .option( + '-f, --format ', + 'Output format: dot, mermaid, json, graphml, graphson, neo4j', + 'dot', + ) .option('--functions', 'Function-level graph instead of file-level') .option('-T, --no-tests', 'Exclude test/spec files') .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') @@ -439,6 +450,25 @@ program case 'json': output = JSON.stringify(exportJSON(db, exportOpts), null, 2); break; + case 'graphml': + output = exportGraphML(db, exportOpts); + break; + case 'graphson': + output = JSON.stringify(exportGraphSON(db, exportOpts), null, 2); + break; + case 'neo4j': { + const csv = exportNeo4jCSV(db, exportOpts); + if (opts.output) { + const base = opts.output.replace(/\.[^.]+$/, '') || opts.output; + fs.writeFileSync(`${base}-nodes.csv`, csv.nodes, 'utf-8'); + fs.writeFileSync(`${base}-relationships.csv`, csv.relationships, 'utf-8'); + db.close(); + console.log(`Exported to ${base}-nodes.csv and ${base}-relationships.csv`); + return; + } + output = `--- nodes.csv ---\n${csv.nodes}\n\n--- relationships.csv ---\n${csv.relationships}`; + break; + } default: output = exportDOT(db, exportOpts); break; @@ -454,6 +484,81 @@ program } }); +program + .command('plot') + .description('Generate an interactive HTML dependency graph viewer') + .option('-d, --db ', 'Path to graph.db') + .option('--functions', 'Function-level graph instead of file-level') + .option('-T, --no-tests', 'Exclude test/spec files') + .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') + .option('--min-confidence ', 'Minimum edge confidence threshold (default: 0.5)', '0.5') + .option('-o, --output ', 'Write HTML to file') + .option('-c, --config ', 'Path to .plotDotCfg config file') + .option('--no-open', 'Do not open in browser') + .option('--cluster ', 'Cluster nodes: none | community | directory') + .option('--overlay ', 'Comma-separated overlays: complexity,risk') + .option('--seed ', 'Seed strategy: all | top-fanin | entry') + .option('--seed-count ', 'Number of seed nodes (default: 30)') + .option('--size-by ', 'Size nodes by: uniform | fan-in | fan-out | complexity') + .option('--color-by ', 'Color nodes by: kind | role | community | complexity') + .action(async (opts) => { + const { generatePlotHTML, loadPlotConfig } = await import('./viewer.js'); + const os = await import('node:os'); + const db = openReadonlyOrFail(opts.db); + + let plotCfg; + if (opts.config) { + try { + plotCfg = JSON.parse(fs.readFileSync(opts.config, 'utf-8')); + } catch (e) { + console.error(`Failed to load config: ${e.message}`); + db.close(); + process.exitCode = 1; + return; + } + } else { + plotCfg = loadPlotConfig(process.cwd()); + } + + // Merge CLI flags into config + if (opts.cluster) plotCfg.clusterBy = opts.cluster; + if (opts.colorBy) plotCfg.colorBy = opts.colorBy; + if (opts.sizeBy) plotCfg.sizeBy = opts.sizeBy; + if (opts.seed) plotCfg.seedStrategy = opts.seed; + if (opts.seedCount) plotCfg.seedCount = parseInt(opts.seedCount, 10); + if (opts.overlay) { + const parts = opts.overlay.split(',').map((s) => s.trim()); + if (!plotCfg.overlays) plotCfg.overlays = {}; + if (parts.includes('complexity')) plotCfg.overlays.complexity = true; + if (parts.includes('risk')) plotCfg.overlays.risk = true; + } + + const html = generatePlotHTML(db, { + fileLevel: !opts.functions, + noTests: resolveNoTests(opts), + minConfidence: parseFloat(opts.minConfidence), + config: plotCfg, + }); + db.close(); + + const outPath = opts.output || path.join(os.tmpdir(), `codegraph-plot-${Date.now()}.html`); + fs.writeFileSync(outPath, html, 'utf-8'); + console.log(`Plot written to ${outPath}`); + + if (opts.open !== false) { + const { execFile } = await import('node:child_process'); + const args = + process.platform === 'win32' + ? ['cmd', ['/c', 'start', '', outPath]] + : process.platform === 'darwin' + ? ['open', [outPath]] + : ['xdg-open', [outPath]]; + execFile(args[0], args[1], (err) => { + if (err) console.error('Could not open browser:', err.message); + }); + } + }); + program .command('cycles') .description('Detect circular dependencies in the codebase') diff --git a/src/export.js b/src/export.js index e13ca5ef..e7687daa 100644 --- a/src/export.js +++ b/src/export.js @@ -4,6 +4,25 @@ import { isTestFile } from './queries.js'; const DEFAULT_MIN_CONFIDENCE = 0.5; +/** Escape special XML characters. */ +function escapeXml(s) { + return String(s) + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); +} + +/** RFC 4180 CSV field escaping — quote fields containing commas, quotes, or newlines. */ +function escapeCsv(s) { + const str = String(s); + if (str.includes(',') || str.includes('"') || str.includes('\n') || str.includes('\r')) { + return `"${str.replace(/"/g, '""')}"`; + } + return str; +} + /** * Export the dependency graph in DOT (Graphviz) format. */ @@ -374,3 +393,289 @@ export function exportJSON(db, opts = {}) { const base = { nodes, edges }; return paginateResult(base, 'edges', { limit: opts.limit, offset: opts.offset }); } + +/** + * Export the dependency graph in GraphML (XML) format. + */ +export function exportGraphML(db, opts = {}) { + const fileLevel = opts.fileLevel !== false; + const noTests = opts.noTests || false; + const minConf = opts.minConfidence ?? DEFAULT_MIN_CONFIDENCE; + const edgeLimit = opts.limit; + + const lines = [ + '', + '', + ]; + + if (fileLevel) { + lines.push(' '); + lines.push(' '); + lines.push(' '); + lines.push(' '); + + let edges = db + .prepare(` + SELECT DISTINCT n1.file AS source, n2.file AS target + FROM edges e + JOIN nodes n1 ON e.source_id = n1.id + JOIN nodes n2 ON e.target_id = n2.id + WHERE n1.file != n2.file AND e.kind IN ('imports', 'imports-type', 'calls') + AND e.confidence >= ? + `) + .all(minConf); + if (noTests) edges = edges.filter((e) => !isTestFile(e.source) && !isTestFile(e.target)); + if (edgeLimit && edges.length > edgeLimit) edges = edges.slice(0, edgeLimit); + + const files = new Set(); + for (const { source, target } of edges) { + files.add(source); + files.add(target); + } + + const fileIds = new Map(); + let nIdx = 0; + for (const f of files) { + const id = `n${nIdx++}`; + fileIds.set(f, id); + lines.push(` `); + lines.push(` ${escapeXml(path.basename(f))}`); + lines.push(` ${escapeXml(f)}`); + lines.push(' '); + } + + let eIdx = 0; + for (const { source, target } of edges) { + lines.push( + ` `, + ); + lines.push(' imports'); + lines.push(' '); + } + } else { + lines.push(' '); + lines.push(' '); + lines.push(' '); + lines.push(' '); + lines.push(' '); + lines.push(' '); + lines.push(' '); + lines.push(' '); + + let edges = db + .prepare(` + SELECT n1.id AS source_id, n1.name AS source_name, n1.kind AS source_kind, + n1.file AS source_file, n1.line AS source_line, n1.role AS source_role, + n2.id AS target_id, n2.name AS target_name, n2.kind AS target_kind, + n2.file AS target_file, n2.line AS target_line, n2.role AS target_role, + e.kind AS edge_kind, e.confidence + FROM edges e + JOIN nodes n1 ON e.source_id = n1.id + JOIN nodes n2 ON e.target_id = n2.id + WHERE n1.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module') + AND n2.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module') + AND e.kind = 'calls' + AND e.confidence >= ? + `) + .all(minConf); + if (noTests) + edges = edges.filter((e) => !isTestFile(e.source_file) && !isTestFile(e.target_file)); + if (edgeLimit && edges.length > edgeLimit) edges = edges.slice(0, edgeLimit); + + const emittedNodes = new Set(); + function emitNode(id, name, kind, file, line, role) { + if (emittedNodes.has(id)) return; + emittedNodes.add(id); + lines.push(` `); + lines.push(` ${escapeXml(name)}`); + lines.push(` ${escapeXml(kind)}`); + lines.push(` ${escapeXml(file)}`); + lines.push(` ${line}`); + if (role) lines.push(` ${escapeXml(role)}`); + lines.push(' '); + } + + let eIdx = 0; + for (const e of edges) { + emitNode( + e.source_id, + e.source_name, + e.source_kind, + e.source_file, + e.source_line, + e.source_role, + ); + emitNode( + e.target_id, + e.target_name, + e.target_kind, + e.target_file, + e.target_line, + e.target_role, + ); + lines.push(` `); + lines.push(` ${escapeXml(e.edge_kind)}`); + lines.push(` ${e.confidence}`); + lines.push(' '); + } + } + + lines.push(' '); + lines.push(''); + return lines.join('\n'); +} + +/** + * Export the dependency graph in TinkerPop GraphSON v3 format. + */ +export function exportGraphSON(db, opts = {}) { + const noTests = opts.noTests || false; + const minConf = opts.minConfidence ?? DEFAULT_MIN_CONFIDENCE; + + let nodes = db + .prepare(` + SELECT id, name, kind, file, line, role FROM nodes + WHERE kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module', 'file') + `) + .all(); + if (noTests) nodes = nodes.filter((n) => !isTestFile(n.file)); + + let edges = db + .prepare(` + SELECT e.rowid AS id, n1.id AS outV, n2.id AS inV, e.kind, e.confidence + FROM edges e + JOIN nodes n1 ON e.source_id = n1.id + JOIN nodes n2 ON e.target_id = n2.id + WHERE e.confidence >= ? + `) + .all(minConf); + if (noTests) { + const nodeIds = new Set(nodes.map((n) => n.id)); + edges = edges.filter((e) => nodeIds.has(e.outV) && nodeIds.has(e.inV)); + } + + const vertices = nodes.map((n) => ({ + id: n.id, + label: n.kind, + properties: { + name: [{ id: 0, value: n.name }], + file: [{ id: 0, value: n.file }], + ...(n.line != null ? { line: [{ id: 0, value: n.line }] } : {}), + ...(n.role ? { role: [{ id: 0, value: n.role }] } : {}), + }, + })); + + const gEdges = edges.map((e) => ({ + id: e.id, + label: e.kind, + inV: e.inV, + outV: e.outV, + properties: { + confidence: e.confidence, + }, + })); + + const base = { vertices, edges: gEdges }; + return paginateResult(base, 'edges', { limit: opts.limit, offset: opts.offset }); +} + +/** + * Export the dependency graph as Neo4j bulk-import CSV files. + * Returns { nodes: string, relationships: string }. + */ +export function exportNeo4jCSV(db, opts = {}) { + const fileLevel = opts.fileLevel !== false; + const noTests = opts.noTests || false; + const minConf = opts.minConfidence ?? DEFAULT_MIN_CONFIDENCE; + const edgeLimit = opts.limit; + + if (fileLevel) { + let edges = db + .prepare(` + SELECT DISTINCT n1.file AS source, n2.file AS target, e.kind, e.confidence + FROM edges e + JOIN nodes n1 ON e.source_id = n1.id + JOIN nodes n2 ON e.target_id = n2.id + WHERE n1.file != n2.file AND e.kind IN ('imports', 'imports-type', 'calls') + AND e.confidence >= ? + `) + .all(minConf); + if (noTests) edges = edges.filter((e) => !isTestFile(e.source) && !isTestFile(e.target)); + if (edgeLimit && edges.length > edgeLimit) edges = edges.slice(0, edgeLimit); + + const files = new Map(); + let idx = 0; + for (const { source, target } of edges) { + if (!files.has(source)) files.set(source, idx++); + if (!files.has(target)) files.set(target, idx++); + } + + const nodeLines = ['nodeId:ID,name,file:string,:LABEL']; + for (const [file, id] of files) { + nodeLines.push(`${id},${escapeCsv(path.basename(file))},${escapeCsv(file)},File`); + } + + const relLines = [':START_ID,:END_ID,:TYPE,confidence:float']; + for (const e of edges) { + const edgeType = e.kind.toUpperCase().replace(/-/g, '_'); + relLines.push(`${files.get(e.source)},${files.get(e.target)},${edgeType},${e.confidence}`); + } + + return { nodes: nodeLines.join('\n'), relationships: relLines.join('\n') }; + } + + let edges = db + .prepare(` + SELECT n1.id AS source_id, n1.name AS source_name, n1.kind AS source_kind, + n1.file AS source_file, n1.line AS source_line, n1.role AS source_role, + n2.id AS target_id, n2.name AS target_name, n2.kind AS target_kind, + n2.file AS target_file, n2.line AS target_line, n2.role AS target_role, + e.kind AS edge_kind, e.confidence + FROM edges e + JOIN nodes n1 ON e.source_id = n1.id + JOIN nodes n2 ON e.target_id = n2.id + WHERE n1.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module') + AND n2.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module') + AND e.kind = 'calls' + AND e.confidence >= ? + `) + .all(minConf); + if (noTests) + edges = edges.filter((e) => !isTestFile(e.source_file) && !isTestFile(e.target_file)); + if (edgeLimit && edges.length > edgeLimit) edges = edges.slice(0, edgeLimit); + + const emitted = new Set(); + const nodeLines = ['nodeId:ID,name,kind,file:string,line:int,role,:LABEL']; + function emitNode(id, name, kind, file, line, role) { + if (emitted.has(id)) return; + emitted.add(id); + const label = kind.charAt(0).toUpperCase() + kind.slice(1); + nodeLines.push( + `${id},${escapeCsv(name)},${escapeCsv(kind)},${escapeCsv(file)},${line},${escapeCsv(role || '')},${label}`, + ); + } + + const relLines = [':START_ID,:END_ID,:TYPE,confidence:float']; + for (const e of edges) { + emitNode( + e.source_id, + e.source_name, + e.source_kind, + e.source_file, + e.source_line, + e.source_role, + ); + emitNode( + e.target_id, + e.target_name, + e.target_kind, + e.target_file, + e.target_line, + e.target_role, + ); + const edgeType = e.edge_kind.toUpperCase().replace(/-/g, '_'); + relLines.push(`${e.source_id},${e.target_id},${edgeType},${e.confidence}`); + } + + return { nodes: nodeLines.join('\n'), relationships: relLines.join('\n') }; +} diff --git a/src/index.js b/src/index.js index 03be6853..7f0e5246 100644 --- a/src/index.js +++ b/src/index.js @@ -87,8 +87,15 @@ export { search, searchData, } from './embedder.js'; -// Export (DOT/Mermaid/JSON) -export { exportDOT, exportJSON, exportMermaid } from './export.js'; +// Export (DOT/Mermaid/JSON/GraphML/GraphSON/Neo4j CSV) +export { + exportDOT, + exportGraphML, + exportGraphSON, + exportJSON, + exportMermaid, + exportNeo4jCSV, +} from './export.js'; // Execution flow tracing export { entryPointType, flowData, listEntryPointsData } from './flow.js'; // Logger @@ -164,5 +171,7 @@ export { } from './structure.js'; // Triage — composite risk audit export { triage, triageData } from './triage.js'; +// Interactive HTML viewer +export { generatePlotHTML, loadPlotConfig } from './viewer.js'; // Watch mode export { watchProject } from './watcher.js'; diff --git a/src/mcp.js b/src/mcp.js index 405b09c2..1f0b9451 100644 --- a/src/mcp.js +++ b/src/mcp.js @@ -251,13 +251,14 @@ const BASE_TOOLS = [ }, { name: 'export_graph', - description: 'Export the dependency graph in DOT (Graphviz), Mermaid, or JSON format', + description: + 'Export the dependency graph in DOT, Mermaid, JSON, GraphML, GraphSON, or Neo4j CSV format', inputSchema: { type: 'object', properties: { format: { type: 'string', - enum: ['dot', 'mermaid', 'json'], + enum: ['dot', 'mermaid', 'json', 'graphml', 'graphson', 'neo4j'], description: 'Export format', }, file_level: { @@ -956,7 +957,14 @@ export async function startMCPServer(customDbPath, options = {}) { break; } case 'export_graph': { - const { exportDOT, exportMermaid, exportJSON } = await import('./export.js'); + const { + exportDOT, + exportGraphML, + exportGraphSON, + exportJSON, + exportMermaid, + exportNeo4jCSV, + } = await import('./export.js'); const db = new Database(findDbPath(dbPath), { readonly: true }); const fileLevel = args.file_level !== false; const exportLimit = args.limit @@ -975,13 +983,26 @@ export async function startMCPServer(customDbPath, options = {}) { offset: args.offset ?? 0, }); break; + case 'graphml': + result = exportGraphML(db, { fileLevel, limit: exportLimit }); + break; + case 'graphson': + result = exportGraphSON(db, { + fileLevel, + limit: exportLimit, + offset: args.offset ?? 0, + }); + break; + case 'neo4j': + result = exportNeo4jCSV(db, { fileLevel, limit: exportLimit }); + break; default: db.close(); return { content: [ { type: 'text', - text: `Unknown format: ${args.format}. Use dot, mermaid, or json.`, + text: `Unknown format: ${args.format}. Use dot, mermaid, json, graphml, graphson, or neo4j.`, }, ], isError: true, diff --git a/src/viewer.js b/src/viewer.js new file mode 100644 index 00000000..c0c4243d --- /dev/null +++ b/src/viewer.js @@ -0,0 +1,948 @@ +import fs from 'node:fs'; +import path from 'node:path'; +import Graph from 'graphology'; +import louvain from 'graphology-communities-louvain'; +import { isTestFile } from './queries.js'; + +const DEFAULT_MIN_CONFIDENCE = 0.5; + +const DEFAULT_NODE_COLORS = { + function: '#4CAF50', + method: '#66BB6A', + class: '#2196F3', + interface: '#42A5F5', + type: '#7E57C2', + struct: '#FF7043', + enum: '#FFA726', + trait: '#26A69A', + record: '#EC407A', + module: '#78909C', + file: '#90A4AE', +}; + +const DEFAULT_ROLE_COLORS = { + entry: '#e8f5e9', + core: '#e3f2fd', + utility: '#f5f5f5', + dead: '#ffebee', + leaf: '#fffde7', +}; + +const COMMUNITY_COLORS = [ + '#4CAF50', + '#2196F3', + '#FF9800', + '#9C27B0', + '#F44336', + '#00BCD4', + '#CDDC39', + '#E91E63', + '#3F51B5', + '#FF5722', + '#009688', + '#795548', +]; + +const DEFAULT_CONFIG = { + layout: { algorithm: 'hierarchical', direction: 'LR' }, + physics: { enabled: true, nodeDistance: 150 }, + nodeColors: DEFAULT_NODE_COLORS, + roleColors: DEFAULT_ROLE_COLORS, + colorBy: 'kind', + edgeStyle: { color: '#666', smooth: true }, + filter: { kinds: null, roles: null, files: null }, + title: 'Codegraph', + seedStrategy: 'all', + seedCount: 30, + clusterBy: 'none', + sizeBy: 'uniform', + overlays: { complexity: false, risk: false }, + riskThresholds: { highBlastRadius: 10, lowMI: 40 }, +}; + +/** + * Load .plotDotCfg or .plotDotCfg.json from given directory. + * Returns merged config with defaults. + */ +export function loadPlotConfig(dir) { + for (const name of ['.plotDotCfg', '.plotDotCfg.json']) { + const p = path.join(dir, name); + if (fs.existsSync(p)) { + try { + const raw = JSON.parse(fs.readFileSync(p, 'utf-8')); + return { + ...DEFAULT_CONFIG, + ...raw, + layout: { ...DEFAULT_CONFIG.layout, ...(raw.layout || {}) }, + physics: { ...DEFAULT_CONFIG.physics, ...(raw.physics || {}) }, + nodeColors: { + ...DEFAULT_CONFIG.nodeColors, + ...(raw.nodeColors || {}), + }, + roleColors: { + ...DEFAULT_CONFIG.roleColors, + ...(raw.roleColors || {}), + }, + edgeStyle: { + ...DEFAULT_CONFIG.edgeStyle, + ...(raw.edgeStyle || {}), + }, + filter: { ...DEFAULT_CONFIG.filter, ...(raw.filter || {}) }, + overlays: { + ...DEFAULT_CONFIG.overlays, + ...(raw.overlays || {}), + }, + riskThresholds: { + ...DEFAULT_CONFIG.riskThresholds, + ...(raw.riskThresholds || {}), + }, + }; + } catch { + // Invalid JSON — use defaults + } + } + } + return { ...DEFAULT_CONFIG }; +} + +// ─── Data Preparation ───────────────────────────────────────────────── + +/** + * Prepare enriched graph data for the HTML viewer. + */ +export function prepareGraphData(db, opts = {}) { + const fileLevel = opts.fileLevel !== false; + const noTests = opts.noTests || false; + const minConf = opts.minConfidence ?? DEFAULT_MIN_CONFIDENCE; + const cfg = opts.config || DEFAULT_CONFIG; + + return fileLevel + ? prepareFileLevelData(db, noTests, minConf, cfg) + : prepareFunctionLevelData(db, noTests, minConf, cfg); +} + +function prepareFunctionLevelData(db, noTests, minConf, cfg) { + let edges = db + .prepare( + ` + SELECT n1.id AS source_id, n1.name AS source_name, n1.kind AS source_kind, + n1.file AS source_file, n1.line AS source_line, n1.role AS source_role, + n2.id AS target_id, n2.name AS target_name, n2.kind AS target_kind, + n2.file AS target_file, n2.line AS target_line, n2.role AS target_role, + e.kind AS edge_kind + FROM edges e + JOIN nodes n1 ON e.source_id = n1.id + JOIN nodes n2 ON e.target_id = n2.id + WHERE n1.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module') + AND n2.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module') + AND e.kind = 'calls' + AND e.confidence >= ? + `, + ) + .all(minConf); + if (noTests) + edges = edges.filter((e) => !isTestFile(e.source_file) && !isTestFile(e.target_file)); + + if (cfg.filter.kinds) { + const kinds = new Set(cfg.filter.kinds); + edges = edges.filter((e) => kinds.has(e.source_kind) && kinds.has(e.target_kind)); + } + if (cfg.filter.files) { + const patterns = cfg.filter.files; + edges = edges.filter( + (e) => + patterns.some((p) => e.source_file.includes(p)) && + patterns.some((p) => e.target_file.includes(p)), + ); + } + + const nodeMap = new Map(); + for (const e of edges) { + if (!nodeMap.has(e.source_id)) { + nodeMap.set(e.source_id, { + id: e.source_id, + name: e.source_name, + kind: e.source_kind, + file: e.source_file, + line: e.source_line, + role: e.source_role, + }); + } + if (!nodeMap.has(e.target_id)) { + nodeMap.set(e.target_id, { + id: e.target_id, + name: e.target_name, + kind: e.target_kind, + file: e.target_file, + line: e.target_line, + role: e.target_role, + }); + } + } + + if (cfg.filter.roles) { + const roles = new Set(cfg.filter.roles); + for (const [id, n] of nodeMap) { + if (!roles.has(n.role)) nodeMap.delete(id); + } + const nodeIds = new Set(nodeMap.keys()); + edges = edges.filter((e) => nodeIds.has(e.source_id) && nodeIds.has(e.target_id)); + } + + // Complexity data + const complexityMap = new Map(); + try { + const rows = db + .prepare( + 'SELECT node_id, cognitive, cyclomatic, max_nesting, maintainability_index FROM function_complexity', + ) + .all(); + for (const r of rows) { + complexityMap.set(r.node_id, { + cognitive: r.cognitive, + cyclomatic: r.cyclomatic, + maintainabilityIndex: r.maintainability_index, + }); + } + } catch { + // table may not exist in old DBs + } + + // Fan-in / fan-out + const fanInMap = new Map(); + const fanOutMap = new Map(); + const fanInRows = db + .prepare( + "SELECT target_id AS node_id, COUNT(*) AS fan_in FROM edges WHERE kind = 'calls' GROUP BY target_id", + ) + .all(); + for (const r of fanInRows) fanInMap.set(r.node_id, r.fan_in); + + const fanOutRows = db + .prepare( + "SELECT source_id AS node_id, COUNT(*) AS fan_out FROM edges WHERE kind = 'calls' GROUP BY source_id", + ) + .all(); + for (const r of fanOutRows) fanOutMap.set(r.node_id, r.fan_out); + + // Communities (Louvain) + const communityMap = new Map(); + if (nodeMap.size > 0) { + try { + const graph = new Graph({ type: 'undirected' }); + for (const [id] of nodeMap) graph.addNode(String(id)); + for (const e of edges) { + const src = String(e.source_id); + const tgt = String(e.target_id); + if (src !== tgt && !graph.hasEdge(src, tgt)) graph.addEdge(src, tgt); + } + const communities = louvain(graph); + for (const [nid, cid] of Object.entries(communities)) communityMap.set(Number(nid), cid); + } catch { + // louvain can fail on disconnected graphs + } + } + + // Build enriched nodes + const visNodes = [...nodeMap.values()].map((n) => { + const cx = complexityMap.get(n.id) || null; + const fanIn = fanInMap.get(n.id) || 0; + const fanOut = fanOutMap.get(n.id) || 0; + const community = communityMap.get(n.id) ?? null; + const directory = path.dirname(n.file); + const risk = []; + if (n.role === 'dead') risk.push('dead-code'); + if (fanIn >= (cfg.riskThresholds?.highBlastRadius ?? 10)) risk.push('high-blast-radius'); + if (cx && cx.maintainabilityIndex < (cfg.riskThresholds?.lowMI ?? 40)) risk.push('low-mi'); + + const color = + cfg.colorBy === 'role' && n.role + ? cfg.roleColors[n.role] || DEFAULT_ROLE_COLORS[n.role] || '#ccc' + : cfg.colorBy === 'community' && community !== null + ? COMMUNITY_COLORS[community % COMMUNITY_COLORS.length] + : cfg.nodeColors[n.kind] || DEFAULT_NODE_COLORS[n.kind] || '#ccc'; + + return { + id: n.id, + label: n.name, + title: `${n.file}:${n.line} (${n.kind}${n.role ? `, ${n.role}` : ''})`, + color, + kind: n.kind, + role: n.role || '', + file: n.file, + line: n.line, + community, + cognitive: cx?.cognitive ?? null, + cyclomatic: cx?.cyclomatic ?? null, + maintainabilityIndex: cx?.maintainabilityIndex ?? null, + fanIn, + fanOut, + directory, + risk, + }; + }); + + const visEdges = edges.map((e, i) => ({ + id: `e${i}`, + from: e.source_id, + to: e.target_id, + })); + + // Seed strategy + let seedNodeIds; + if (cfg.seedStrategy === 'top-fanin') { + const sorted = [...visNodes].sort((a, b) => b.fanIn - a.fanIn); + seedNodeIds = sorted.slice(0, cfg.seedCount || 30).map((n) => n.id); + } else if (cfg.seedStrategy === 'entry') { + seedNodeIds = visNodes.filter((n) => n.role === 'entry').map((n) => n.id); + } else { + seedNodeIds = visNodes.map((n) => n.id); + } + + return { nodes: visNodes, edges: visEdges, seedNodeIds }; +} + +function prepareFileLevelData(db, noTests, minConf, cfg) { + let edges = db + .prepare( + ` + SELECT DISTINCT n1.file AS source, n2.file AS target + FROM edges e + JOIN nodes n1 ON e.source_id = n1.id + JOIN nodes n2 ON e.target_id = n2.id + WHERE n1.file != n2.file AND e.kind IN ('imports', 'imports-type', 'calls') + AND e.confidence >= ? + `, + ) + .all(minConf); + if (noTests) edges = edges.filter((e) => !isTestFile(e.source) && !isTestFile(e.target)); + + const files = new Set(); + for (const { source, target } of edges) { + files.add(source); + files.add(target); + } + + const fileIds = new Map(); + let idx = 0; + for (const f of files) fileIds.set(f, idx++); + + // Fan-in/fan-out + const fanInCount = new Map(); + const fanOutCount = new Map(); + for (const { source, target } of edges) { + fanOutCount.set(source, (fanOutCount.get(source) || 0) + 1); + fanInCount.set(target, (fanInCount.get(target) || 0) + 1); + } + + // Communities + const communityMap = new Map(); + if (files.size > 0) { + try { + const graph = new Graph({ type: 'undirected' }); + for (const f of files) graph.addNode(f); + for (const { source, target } of edges) { + if (source !== target && !graph.hasEdge(source, target)) graph.addEdge(source, target); + } + const communities = louvain(graph); + for (const [file, cid] of Object.entries(communities)) communityMap.set(file, cid); + } catch { + // ignore + } + } + + const visNodes = [...files].map((f) => { + const id = fileIds.get(f); + const community = communityMap.get(f) ?? null; + const fanIn = fanInCount.get(f) || 0; + const fanOut = fanOutCount.get(f) || 0; + const directory = path.dirname(f); + const color = + cfg.colorBy === 'community' && community !== null + ? COMMUNITY_COLORS[community % COMMUNITY_COLORS.length] + : cfg.nodeColors.file || DEFAULT_NODE_COLORS.file; + + return { + id, + label: path.basename(f), + title: f, + color, + kind: 'file', + role: '', + file: f, + line: 0, + community, + cognitive: null, + cyclomatic: null, + maintainabilityIndex: null, + fanIn, + fanOut, + directory, + risk: [], + }; + }); + + const visEdges = edges.map(({ source, target }, i) => ({ + id: `e${i}`, + from: fileIds.get(source), + to: fileIds.get(target), + })); + + let seedNodeIds; + if (cfg.seedStrategy === 'top-fanin') { + const sorted = [...visNodes].sort((a, b) => b.fanIn - a.fanIn); + seedNodeIds = sorted.slice(0, cfg.seedCount || 30).map((n) => n.id); + } else if (cfg.seedStrategy === 'entry') { + seedNodeIds = visNodes.map((n) => n.id); + } else { + seedNodeIds = visNodes.map((n) => n.id); + } + + return { nodes: visNodes, edges: visEdges, seedNodeIds }; +} + +// ─── HTML Generation ────────────────────────────────────────────────── + +/** + * Generate a self-contained interactive HTML file with vis-network. + */ +export function generatePlotHTML(db, opts = {}) { + const cfg = opts.config || DEFAULT_CONFIG; + const data = prepareGraphData(db, opts); + const layoutOpts = buildLayoutOptions(cfg); + const title = cfg.title || 'Codegraph'; + + // Resolve effective colorBy (overlays.complexity overrides) + const effectiveColorBy = + cfg.overlays?.complexity && cfg.colorBy === 'kind' ? 'complexity' : cfg.colorBy || 'kind'; + const effectiveRisk = cfg.overlays?.risk || false; + + return ` + + + + +${escapeHtml(title)} + + + + +
+ + + + + + + +
+
+
+
+ × +
+
+
+
+ + +`; +} + +// ─── Internal Helpers ───────────────────────────────────────────────── + +function escapeHtml(s) { + return String(s) + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"'); +} + +function buildLayoutOptions(cfg) { + const opts = { + nodes: { + shape: 'box', + font: { face: 'monospace', size: 12 }, + }, + edges: { + arrows: 'to', + color: cfg.edgeStyle.color || '#666', + smooth: cfg.edgeStyle.smooth !== false, + }, + physics: { + enabled: cfg.physics.enabled !== false, + barnesHut: { + gravitationalConstant: -3000, + springLength: cfg.physics.nodeDistance || 150, + }, + }, + interaction: { + tooltipDelay: 200, + hover: true, + }, + }; + + if (cfg.layout.algorithm === 'hierarchical') { + opts.layout = { + hierarchical: { + enabled: true, + direction: cfg.layout.direction || 'LR', + sortMethod: 'directed', + nodeSpacing: cfg.physics.nodeDistance || 150, + }, + }; + } + + return opts; +} diff --git a/tests/graph/export.test.js b/tests/graph/export.test.js index ac89b91a..3a12970e 100644 --- a/tests/graph/export.test.js +++ b/tests/graph/export.test.js @@ -5,7 +5,14 @@ import Database from 'better-sqlite3'; import { describe, expect, it } from 'vitest'; import { initSchema } from '../../src/db.js'; -import { exportDOT, exportJSON, exportMermaid } from '../../src/export.js'; +import { + exportDOT, + exportGraphML, + exportGraphSON, + exportJSON, + exportMermaid, + exportNeo4jCSV, +} from '../../src/export.js'; function createTestDb() { const db = new Database(':memory:'); @@ -252,3 +259,199 @@ describe('exportJSON', () => { db.close(); }); }); + +describe('exportGraphML', () => { + it('generates valid XML wrapper with graphml element', () => { + const db = createTestDb(); + const a = insertNode(db, 'src/a.js', 'file', 'src/a.js', 0); + const b = insertNode(db, 'src/b.js', 'file', 'src/b.js', 0); + insertEdge(db, a, b, 'imports'); + + const xml = exportGraphML(db); + expect(xml).toContain(''); + expect(xml).toContain(''); + db.close(); + }); + + it('declares key elements for node and edge attributes', () => { + const db = createTestDb(); + const a = insertNode(db, 'src/a.js', 'file', 'src/a.js', 0); + const b = insertNode(db, 'src/b.js', 'file', 'src/b.js', 0); + insertEdge(db, a, b, 'imports'); + + const xml = exportGraphML(db); + expect(xml).toContain(' { + const db = createTestDb(); + const a = insertNode(db, 'src/a.js', 'file', 'src/a.js', 0); + const b = insertNode(db, 'src/b.js', 'file', 'src/b.js', 0); + insertEdge(db, a, b, 'imports'); + + const xml = exportGraphML(db); + expect(xml).toContain(' { + const db = createTestDb(); + const fnA = insertNode(db, 'doWork', 'function', 'src/a.js', 5); + const fnB = insertNode(db, 'helper', 'function', 'src/b.js', 10); + insertEdge(db, fnA, fnB, 'calls'); + + const xml = exportGraphML(db, { fileLevel: false }); + expect(xml).toContain('doWork'); + expect(xml).toContain('helper'); + expect(xml).toContain('attr.name="kind"'); + expect(xml).toContain('attr.name="line"'); + db.close(); + }); + + it('produces valid output for empty graph', () => { + const db = createTestDb(); + const xml = exportGraphML(db); + expect(xml).toContain(''); + expect(xml).toContain(''); + db.close(); + }); + + it('escapes XML special characters', () => { + const db = createTestDb(); + const a = insertNode(db, 'src/
.js', 'file', 'src/.js', 0); + const b = insertNode(db, 'src/b&c.js', 'file', 'src/b&c.js', 0); + insertEdge(db, a, b, 'imports'); + + const xml = exportGraphML(db); + expect(xml).toContain('<a>'); + expect(xml).toContain('b&c'); + expect(xml).not.toContain(''); + db.close(); + }); +}); + +describe('exportGraphSON', () => { + it('returns TinkerPop structure with vertices and edges', () => { + const db = createTestDb(); + const a = insertNode(db, 'src/a.js', 'file', 'src/a.js', 0); + const b = insertNode(db, 'src/b.js', 'file', 'src/b.js', 0); + insertEdge(db, a, b, 'imports'); + + const data = exportGraphSON(db); + expect(data).toHaveProperty('vertices'); + expect(data).toHaveProperty('edges'); + expect(data.vertices.length).toBeGreaterThanOrEqual(2); + db.close(); + }); + + it('uses multi-valued property format', () => { + const db = createTestDb(); + const fn = insertNode(db, 'doWork', 'function', 'src/a.js', 5); + const fn2 = insertNode(db, 'helper', 'function', 'src/b.js', 10); + insertEdge(db, fn, fn2, 'calls'); + + const data = exportGraphSON(db); + const vertex = data.vertices.find((v) => v.properties.name[0].value === 'doWork'); + expect(vertex).toBeDefined(); + expect(vertex.properties.name).toEqual([{ id: 0, value: 'doWork' }]); + expect(vertex.label).toBe('function'); + db.close(); + }); + + it('has inV and outV on edges', () => { + const db = createTestDb(); + const fn = insertNode(db, 'doWork', 'function', 'src/a.js', 5); + const fn2 = insertNode(db, 'helper', 'function', 'src/b.js', 10); + insertEdge(db, fn, fn2, 'calls'); + + const data = exportGraphSON(db); + expect(data.edges.length).toBeGreaterThanOrEqual(1); + const edge = data.edges[0]; + expect(edge).toHaveProperty('inV'); + expect(edge).toHaveProperty('outV'); + expect(edge).toHaveProperty('label'); + expect(edge).toHaveProperty('properties'); + db.close(); + }); + + it('includes confidence in edge properties', () => { + const db = createTestDb(); + const fn = insertNode(db, 'doWork', 'function', 'src/a.js', 5); + const fn2 = insertNode(db, 'helper', 'function', 'src/b.js', 10); + insertEdge(db, fn, fn2, 'calls'); + + const data = exportGraphSON(db); + const edge = data.edges[0]; + expect(edge.properties).toHaveProperty('confidence'); + expect(edge.properties.confidence).toBe(1.0); + db.close(); + }); +}); + +describe('exportNeo4jCSV', () => { + it('returns object with nodes and relationships strings', () => { + const db = createTestDb(); + const a = insertNode(db, 'src/a.js', 'file', 'src/a.js', 0); + const b = insertNode(db, 'src/b.js', 'file', 'src/b.js', 0); + insertEdge(db, a, b, 'imports'); + + const csv = exportNeo4jCSV(db); + expect(csv).toHaveProperty('nodes'); + expect(csv).toHaveProperty('relationships'); + expect(typeof csv.nodes).toBe('string'); + expect(typeof csv.relationships).toBe('string'); + db.close(); + }); + + it('has correct CSV headers for file-level', () => { + const db = createTestDb(); + const a = insertNode(db, 'src/a.js', 'file', 'src/a.js', 0); + const b = insertNode(db, 'src/b.js', 'file', 'src/b.js', 0); + insertEdge(db, a, b, 'imports'); + + const csv = exportNeo4jCSV(db); + expect(csv.nodes.split('\n')[0]).toBe('nodeId:ID,name,file:string,:LABEL'); + expect(csv.relationships.split('\n')[0]).toBe(':START_ID,:END_ID,:TYPE,confidence:float'); + db.close(); + }); + + it('capitalizes kind to Label for function-level', () => { + const db = createTestDb(); + const fnA = insertNode(db, 'doWork', 'function', 'src/a.js', 5); + const fnB = insertNode(db, 'helper', 'function', 'src/b.js', 10); + insertEdge(db, fnA, fnB, 'calls'); + + const csv = exportNeo4jCSV(db, { fileLevel: false }); + expect(csv.nodes).toContain(',Function'); + db.close(); + }); + + it('uppercases edge type and replaces hyphens', () => { + const db = createTestDb(); + const a = insertNode(db, 'src/a.js', 'file', 'src/a.js', 0); + const b = insertNode(db, 'src/b.js', 'file', 'src/b.js', 0); + insertEdge(db, a, b, 'imports-type'); + + const csv = exportNeo4jCSV(db); + expect(csv.relationships).toContain('IMPORTS_TYPE'); + db.close(); + }); + + it('has correct function-level CSV headers', () => { + const db = createTestDb(); + const fnA = insertNode(db, 'doWork', 'function', 'src/a.js', 5); + const fnB = insertNode(db, 'helper', 'function', 'src/b.js', 10); + insertEdge(db, fnA, fnB, 'calls'); + + const csv = exportNeo4jCSV(db, { fileLevel: false }); + expect(csv.nodes.split('\n')[0]).toBe('nodeId:ID,name,kind,file:string,line:int,role,:LABEL'); + db.close(); + }); +}); diff --git a/tests/graph/viewer.test.js b/tests/graph/viewer.test.js new file mode 100644 index 00000000..0ace2b01 --- /dev/null +++ b/tests/graph/viewer.test.js @@ -0,0 +1,360 @@ +/** + * Interactive HTML viewer tests. + */ + +import Database from 'better-sqlite3'; +import { describe, expect, it } from 'vitest'; +import { initSchema } from '../../src/db.js'; +import { generatePlotHTML, loadPlotConfig, prepareGraphData } from '../../src/viewer.js'; + +function createTestDb() { + const db = new Database(':memory:'); + db.pragma('journal_mode = WAL'); + initSchema(db); + return db; +} + +function insertNode(db, name, kind, file, line, role) { + return db + .prepare('INSERT INTO nodes (name, kind, file, line, role) VALUES (?, ?, ?, ?, ?)') + .run(name, kind, file, line, role || null).lastInsertRowid; +} + +function insertEdge(db, sourceId, targetId, kind) { + db.prepare( + 'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, 1.0, 0)', + ).run(sourceId, targetId, kind); +} + +function insertComplexity(db, nodeId, cognitive, cyclomatic, mi) { + db.prepare( + 'INSERT INTO function_complexity (node_id, cognitive, cyclomatic, max_nesting, maintainability_index) VALUES (?, ?, ?, 2, ?)', + ).run(nodeId, cognitive, cyclomatic, mi); +} + +describe('generatePlotHTML', () => { + it('returns a valid HTML document', () => { + const db = createTestDb(); + const a = insertNode(db, 'src/a.js', 'file', 'src/a.js', 0); + const b = insertNode(db, 'src/b.js', 'file', 'src/b.js', 0); + insertEdge(db, a, b, 'imports'); + + const html = generatePlotHTML(db); + expect(html).toContain(''); + expect(html).toContain(''); + db.close(); + }); + + it('embeds graph data as JSON', () => { + const db = createTestDb(); + const a = insertNode(db, 'src/a.js', 'file', 'src/a.js', 0); + const b = insertNode(db, 'src/b.js', 'file', 'src/b.js', 0); + insertEdge(db, a, b, 'imports'); + + const html = generatePlotHTML(db); + expect(html).toContain('var allNodes ='); + expect(html).toContain('var allEdges ='); + expect(html).toContain('a.js'); + expect(html).toContain('b.js'); + db.close(); + }); + + it('includes vis-network CDN script', () => { + const db = createTestDb(); + const html = generatePlotHTML(db); + expect(html).toContain('vis-network'); + expect(html).toContain('unpkg.com'); + db.close(); + }); + + it('applies custom config title', () => { + const db = createTestDb(); + const html = generatePlotHTML(db, { + config: { + title: 'My Custom Graph', + layout: { algorithm: 'hierarchical', direction: 'LR' }, + physics: { enabled: true, nodeDistance: 150 }, + nodeColors: {}, + roleColors: {}, + colorBy: 'kind', + edgeStyle: { color: '#666', smooth: true }, + filter: { kinds: null, roles: null, files: null }, + seedStrategy: 'all', + seedCount: 30, + clusterBy: 'none', + sizeBy: 'uniform', + overlays: { complexity: false, risk: false }, + riskThresholds: { highBlastRadius: 10, lowMI: 40 }, + }, + }); + expect(html).toContain('My Custom Graph'); + db.close(); + }); + + it('handles empty graph without error', () => { + const db = createTestDb(); + const html = generatePlotHTML(db); + expect(html).toContain(''); + expect(html).toContain('var allNodes = []'); + expect(html).toContain('var allEdges = []'); + db.close(); + }); + + it('supports function-level mode', () => { + const db = createTestDb(); + const fnA = insertNode(db, 'doWork', 'function', 'src/a.js', 5); + const fnB = insertNode(db, 'helper', 'function', 'src/b.js', 10); + insertEdge(db, fnA, fnB, 'calls'); + + const html = generatePlotHTML(db, { fileLevel: false }); + expect(html).toContain('doWork'); + expect(html).toContain('helper'); + db.close(); + }); + + it('includes detail panel elements', () => { + const db = createTestDb(); + const html = generatePlotHTML(db); + expect(html).toContain('id="detail"'); + expect(html).toContain('id="detailContent"'); + expect(html).toContain('id="detailClose"'); + db.close(); + }); + + it('includes new control elements', () => { + const db = createTestDb(); + const html = generatePlotHTML(db); + expect(html).toContain('id="colorBySelect"'); + expect(html).toContain('id="sizeBySelect"'); + expect(html).toContain('id="clusterBySelect"'); + expect(html).toContain('id="riskToggle"'); + db.close(); + }); +}); + +describe('prepareGraphData', () => { + it('embeds complexity data into function-level nodes', () => { + const db = createTestDb(); + const fnA = insertNode(db, 'doWork', 'function', 'src/a.js', 5); + const fnB = insertNode(db, 'helper', 'function', 'src/b.js', 10); + insertEdge(db, fnA, fnB, 'calls'); + insertComplexity(db, fnA, 8, 5, 72.3); + insertComplexity(db, fnB, 2, 1, 95.0); + + const data = prepareGraphData(db, { fileLevel: false }); + const nodeA = data.nodes.find((n) => n.label === 'doWork'); + const nodeB = data.nodes.find((n) => n.label === 'helper'); + + expect(nodeA.cognitive).toBe(8); + expect(nodeA.cyclomatic).toBe(5); + expect(nodeA.maintainabilityIndex).toBeCloseTo(72.3, 1); + expect(nodeB.cognitive).toBe(2); + expect(nodeB.cyclomatic).toBe(1); + expect(nodeB.maintainabilityIndex).toBeCloseTo(95.0, 1); + db.close(); + }); + + it('computes fan-in and fan-out', () => { + const db = createTestDb(); + const fnA = insertNode(db, 'caller1', 'function', 'src/a.js', 1); + const fnB = insertNode(db, 'caller2', 'function', 'src/a.js', 10); + const fnC = insertNode(db, 'target', 'function', 'src/b.js', 1); + insertEdge(db, fnA, fnC, 'calls'); + insertEdge(db, fnB, fnC, 'calls'); + + const data = prepareGraphData(db, { fileLevel: false }); + const target = data.nodes.find((n) => n.label === 'target'); + const caller1 = data.nodes.find((n) => n.label === 'caller1'); + + expect(target.fanIn).toBe(2); + expect(caller1.fanOut).toBe(1); + db.close(); + }); + + it('assigns community IDs via Louvain', () => { + const db = createTestDb(); + // Create two clusters of nodes + const a1 = insertNode(db, 'a1', 'function', 'src/a.js', 1); + const a2 = insertNode(db, 'a2', 'function', 'src/a.js', 10); + const b1 = insertNode(db, 'b1', 'function', 'src/b.js', 1); + const b2 = insertNode(db, 'b2', 'function', 'src/b.js', 10); + insertEdge(db, a1, a2, 'calls'); + insertEdge(db, a2, a1, 'calls'); + insertEdge(db, b1, b2, 'calls'); + insertEdge(db, b2, b1, 'calls'); + // One cross-cluster edge + insertEdge(db, a1, b1, 'calls'); + + const data = prepareGraphData(db, { fileLevel: false }); + for (const n of data.nodes) { + expect(n.community).not.toBeNull(); + expect(typeof n.community).toBe('number'); + } + db.close(); + }); + + it('flags dead-code nodes as risk', () => { + const db = createTestDb(); + const fnA = insertNode(db, 'alive', 'function', 'src/a.js', 1, 'core'); + const fnB = insertNode(db, 'dead', 'function', 'src/b.js', 1, 'dead'); + insertEdge(db, fnA, fnB, 'calls'); + + const data = prepareGraphData(db, { fileLevel: false }); + const deadNode = data.nodes.find((n) => n.label === 'dead'); + expect(deadNode.risk).toContain('dead-code'); + + const aliveNode = data.nodes.find((n) => n.label === 'alive'); + expect(aliveNode.risk).not.toContain('dead-code'); + db.close(); + }); + + it('flags high-blast-radius nodes', () => { + const db = createTestDb(); + const target = insertNode(db, 'popular', 'function', 'src/a.js', 1); + // Create 10 callers to exceed default threshold + for (let i = 0; i < 10; i++) { + const caller = insertNode(db, `caller${i}`, 'function', 'src/c.js', i + 1); + insertEdge(db, caller, target, 'calls'); + } + + const data = prepareGraphData(db, { fileLevel: false }); + const popularNode = data.nodes.find((n) => n.label === 'popular'); + expect(popularNode.risk).toContain('high-blast-radius'); + expect(popularNode.fanIn).toBe(10); + db.close(); + }); + + it('flags low-mi nodes', () => { + const db = createTestDb(); + const fnA = insertNode(db, 'messy', 'function', 'src/a.js', 1); + const fnB = insertNode(db, 'clean', 'function', 'src/b.js', 1); + insertEdge(db, fnA, fnB, 'calls'); + insertComplexity(db, fnA, 30, 20, 25.0); // MI < 40 + insertComplexity(db, fnB, 2, 1, 90.0); // MI >= 40 + + const data = prepareGraphData(db, { fileLevel: false }); + const messy = data.nodes.find((n) => n.label === 'messy'); + const clean = data.nodes.find((n) => n.label === 'clean'); + expect(messy.risk).toContain('low-mi'); + expect(clean.risk).not.toContain('low-mi'); + db.close(); + }); + + it('seed strategy top-fanin limits seed count', () => { + const db = createTestDb(); + const nodes = []; + for (let i = 0; i < 5; i++) { + nodes.push(insertNode(db, `fn${i}`, 'function', 'src/a.js', i + 1)); + } + // fn0 calls all others → they all get fan-in + for (let i = 1; i < 5; i++) { + insertEdge(db, nodes[0], nodes[i], 'calls'); + } + + const data = prepareGraphData(db, { + fileLevel: false, + config: { + seedStrategy: 'top-fanin', + seedCount: 2, + colorBy: 'kind', + nodeColors: {}, + roleColors: {}, + filter: { kinds: null, roles: null, files: null }, + edgeStyle: { color: '#666', smooth: true }, + riskThresholds: { highBlastRadius: 10, lowMI: 40 }, + overlays: {}, + }, + }); + expect(data.seedNodeIds).toHaveLength(2); + db.close(); + }); + + it('seed strategy entry selects only entry nodes', () => { + const db = createTestDb(); + const fnA = insertNode(db, 'entryFn', 'function', 'src/a.js', 1, 'entry'); + const fnB = insertNode(db, 'coreFn', 'function', 'src/b.js', 1, 'core'); + insertEdge(db, fnA, fnB, 'calls'); + + const data = prepareGraphData(db, { + fileLevel: false, + config: { + seedStrategy: 'entry', + seedCount: 30, + colorBy: 'kind', + nodeColors: {}, + roleColors: {}, + filter: { kinds: null, roles: null, files: null }, + edgeStyle: { color: '#666', smooth: true }, + riskThresholds: { highBlastRadius: 10, lowMI: 40 }, + overlays: {}, + }, + }); + expect(data.seedNodeIds).toHaveLength(1); + expect(data.seedNodeIds[0]).toBe(Number(fnA)); + db.close(); + }); + + it('seed strategy all (default) includes all nodes', () => { + const db = createTestDb(); + const fnA = insertNode(db, 'fn1', 'function', 'src/a.js', 1); + const fnB = insertNode(db, 'fn2', 'function', 'src/b.js', 1); + insertEdge(db, fnA, fnB, 'calls'); + + const data = prepareGraphData(db, { fileLevel: false }); + expect(data.seedNodeIds).toHaveLength(data.nodes.length); + db.close(); + }); + + it('handles empty complexity table gracefully', () => { + const db = createTestDb(); + const fnA = insertNode(db, 'doWork', 'function', 'src/a.js', 5); + const fnB = insertNode(db, 'helper', 'function', 'src/b.js', 10); + insertEdge(db, fnA, fnB, 'calls'); + + const data = prepareGraphData(db, { fileLevel: false }); + const nodeA = data.nodes.find((n) => n.label === 'doWork'); + expect(nodeA.cognitive).toBeNull(); + expect(nodeA.cyclomatic).toBeNull(); + expect(nodeA.maintainabilityIndex).toBeNull(); + db.close(); + }); + + it('includes directory field derived from file path', () => { + const db = createTestDb(); + const fnA = insertNode(db, 'doWork', 'function', 'src/lib/a.js', 5); + const fnB = insertNode(db, 'helper', 'function', 'src/utils/b.js', 10); + insertEdge(db, fnA, fnB, 'calls'); + + const data = prepareGraphData(db, { fileLevel: false }); + const nodeA = data.nodes.find((n) => n.label === 'doWork'); + const nodeB = data.nodes.find((n) => n.label === 'helper'); + expect(nodeA.directory).toContain('lib'); + expect(nodeB.directory).toContain('utils'); + db.close(); + }); +}); + +describe('loadPlotConfig', () => { + it('returns default config when no config file exists', () => { + const cfg = loadPlotConfig('/nonexistent/path'); + expect(cfg).toHaveProperty('layout'); + expect(cfg).toHaveProperty('physics'); + expect(cfg).toHaveProperty('nodeColors'); + expect(cfg.layout.algorithm).toBe('hierarchical'); + expect(cfg.title).toBe('Codegraph'); + }); + + it('includes new config fields with defaults', () => { + const cfg = loadPlotConfig('/nonexistent/path'); + expect(cfg.seedStrategy).toBe('all'); + expect(cfg.seedCount).toBe(30); + expect(cfg.clusterBy).toBe('none'); + expect(cfg.sizeBy).toBe('uniform'); + expect(cfg.overlays).toEqual({ complexity: false, risk: false }); + expect(cfg.riskThresholds).toEqual({ + highBlastRadius: 10, + lowMI: 40, + }); + }); +}); diff --git a/tests/unit/mcp.test.js b/tests/unit/mcp.test.js index fc610c4b..305848b5 100644 --- a/tests/unit/mcp.test.js +++ b/tests/unit/mcp.test.js @@ -143,7 +143,14 @@ describe('TOOLS', () => { it('export_graph requires format parameter with enum', () => { const eg = TOOLS.find((t) => t.name === 'export_graph'); expect(eg.inputSchema.required).toContain('format'); - expect(eg.inputSchema.properties.format.enum).toEqual(['dot', 'mermaid', 'json']); + expect(eg.inputSchema.properties.format.enum).toEqual([ + 'dot', + 'mermaid', + 'json', + 'graphml', + 'graphson', + 'neo4j', + ]); expect(eg.inputSchema.properties).toHaveProperty('file_level'); }); From 7fe020695260fdc8ed43c30467415ff03caeb628 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 20:16:34 -0700 Subject: [PATCH 23/30] docs: add check-readme hook to guides (#272) * feat(export): add GraphML, GraphSON, Neo4j CSV formats and interactive HTML viewer Add three new export formats for graph database interoperability: - GraphML (XML standard) with file-level and function-level modes - GraphSON (TinkerPop v3) for Gremlin/JanusGraph compatibility - Neo4j CSV (bulk import) with separate nodes/relationships files Add interactive HTML viewer (`codegraph plot`) powered by vis-network: - Hierarchical, force, and radial layouts with physics toggle - Node coloring by kind or role, search/filter, legend panel - Configurable via .plotDotCfg JSON file Update CLI export command, MCP export_graph tool, and programmatic API to support all six formats. Impact: 12 functions changed, 6 affected * feat(plot): add drill-down, clustering, complexity overlays, and detail panel Evolve the plot command from a static viewer into an interactive exploration tool with rich data overlays and navigation. Data preparation: - Extract prepareGraphData() with complexity, fan-in/fan-out, Louvain community detection, directory derivation, and risk flag computation - Seed strategies: all (default), top-fanin, entry Interactive features: - Detail sidebar: metrics, callers/callees lists, risk badges - Drill-down: click-to-expand / double-click-to-collapse neighbors - Clustering: community and directory grouping via vis-network API - Color by: kind, role, community, complexity (MI-based borders) - Size by: uniform, fan-in, fan-out, complexity - Risk overlay: dead-code (dashed), high-blast-radius (shadow), low-MI CLI options: - --cluster, --overlay, --seed, --seed-count, --size-by, --color-by Tests expanded from 7 to 21 covering all new data enrichment, seed strategies, risk flags, UI elements, and config backward compatibility. Impact: 5 functions changed, 3 affected * fix(test): update MCP export_graph enum to include new formats The previous commit added graphml, graphson, and neo4j export formats to the MCP tool definition but did not update the test assertion. * style: format mcp test after enum update * fix(security): escape config values in HTML template to prevent XSS Use JSON.stringify() for cfg.layout.direction, effectiveColorBy, and cfg.clusterBy when interpolated into inline JavaScript. Replace shell exec() with execFile() for browser-open to avoid path injection. Impact: 1 functions changed, 1 affected * docs: add check-readme hook to recommended practices and guides Document the new check-readme.sh hook across all three doc locations: recommended-practices.md, ai-agent-guide.md, and the hooks example README. Adds settings.json examples, hook behavior descriptions, and customization entries. --- docs/examples/claude-code-hooks/README.md | 7 +++++++ docs/guides/ai-agent-guide.md | 13 +++++++++++++ docs/guides/recommended-practices.md | 13 +++++++++++++ 3 files changed, 33 insertions(+) diff --git a/docs/examples/claude-code-hooks/README.md b/docs/examples/claude-code-hooks/README.md index 6b432710..6afcb18e 100644 --- a/docs/examples/claude-code-hooks/README.md +++ b/docs/examples/claude-code-hooks/README.md @@ -29,6 +29,12 @@ echo ".claude/codegraph-checked.log" >> .gitignore | `update-graph.sh` | PostToolUse on Edit/Write | Runs `codegraph build` incrementally after each source file edit to keep the graph fresh | | `post-git-ops.sh` | PostToolUse on Bash | Detects `git rebase/revert/cherry-pick/merge/pull` and rebuilds the graph, logs changed files, and resets the remind tracker | +### Doc hygiene hooks + +| Hook | Trigger | What it does | +|------|---------|-------------| +| `check-readme.sh` | PreToolUse on Bash | Blocks `git commit` when source files are staged but `README.md`, `CLAUDE.md`, or `ROADMAP.md` aren't — prompts the agent to review whether docs need updating | + ### Parallel session safety hooks (recommended for multi-agent workflows) | Hook | Trigger | What it does | @@ -62,6 +68,7 @@ Without this fix, `CLAUDE_PROJECT_DIR` (which always points to the main project - **Solo developer:** `enrich-context.sh` + `update-graph.sh` + `post-git-ops.sh` - **With reminders:** Add `remind-codegraph.sh` +- **Doc hygiene:** Add `check-readme.sh` to catch source commits that may need doc updates - **Multi-agent / worktrees:** Add `guard-git.sh` + `track-edits.sh` + `track-moves.sh` **Branch name validation:** The `guard-git.sh` in this repo's `.claude/hooks/` validates branch names against conventional prefixes (`feat/`, `fix/`, etc.). The example version omits this — add your own validation if needed. diff --git a/docs/guides/ai-agent-guide.md b/docs/guides/ai-agent-guide.md index 23548b54..575ff12a 100644 --- a/docs/guides/ai-agent-guide.md +++ b/docs/guides/ai-agent-guide.md @@ -659,6 +659,7 @@ Hooks automate codegraph integration so the agent gets structural context withou | `enrich-context.sh` | PreToolUse (Read, Grep) | Injects dependency info before file reads | | `remind-codegraph.sh` | PreToolUse (Edit, Write) | Reminds agent to check context/impact before editing | | `update-graph.sh` | PostToolUse (Edit, Write) | Rebuilds graph after code changes | +| `check-readme.sh` | PreToolUse (Bash) | Blocks commits when source changes may need doc updates | | `guard-git.sh` | PreToolUse (Bash) | Blocks dangerous git ops, validates commits | | `track-edits.sh` | PostToolUse (Edit, Write) | Logs edits for commit validation | @@ -703,6 +704,14 @@ Before editing, always: (1) where , (2) explain src/parser.js, **Result:** The graph stays current as the agent edits code. Subsequent `context`, `fn-impact`, and `diff-impact` calls reflect the latest changes. +### `check-readme.sh` — Enforce doc updates alongside source changes + +**Trigger:** Before any Bash command (PreToolUse). + +**What it does:** Intercepts `git commit` commands and checks whether source files are staged (anything under `src/`, `cli.js`, `constants.js`, `parser.js`, `package.json`, or `grammars/`). If so, it verifies that `README.md`, `CLAUDE.md`, and `ROADMAP.md` are also staged. Missing docs trigger a `deny` decision listing which files weren't staged and what to review in each — language support tables, architecture docs, feature lists, roadmap phases, etc. + +**Allows:** Commits that only touch non-source files (tests, docs, config) pass through without checks. Commits where all three docs are staged also pass through. + ### `guard-git.sh` — Prevent unsafe git operations **Trigger:** Before any Bash command. @@ -749,6 +758,10 @@ Add to `.claude/settings.json`: { "matcher": "Bash", "hooks": [ + { + "type": "command", + "command": "bash .claude/hooks/check-readme.sh" + }, { "type": "command", "command": "bash .claude/hooks/guard-git.sh" diff --git a/docs/guides/recommended-practices.md b/docs/guides/recommended-practices.md index 85001593..705bbd62 100644 --- a/docs/guides/recommended-practices.md +++ b/docs/guides/recommended-practices.md @@ -227,6 +227,16 @@ You can configure [Claude Code hooks](https://docs.anthropic.com/en/docs/claude- { "hooks": { "PreToolUse": [ + { + "matcher": "Bash", + "hooks": [ + { + "type": "command", + "command": "bash \"$CLAUDE_PROJECT_DIR/.claude/hooks/check-readme.sh\"", + "timeout": 10 + } + ] + }, { "matcher": "Read|Grep", "hooks": [ @@ -288,6 +298,8 @@ You can configure [Claude Code hooks](https://docs.anthropic.com/en/docs/claude- > } > ``` +**Doc check hook** (PreToolUse on Bash): when Claude runs `git commit` with source files staged (anything under `src/`, `cli.js`, `constants.js`, `parser.js`, `package.json`, or `grammars/`), the hook checks whether `README.md`, `CLAUDE.md`, and `ROADMAP.md` are also staged. If any are missing, it blocks the commit with a `deny` decision listing which docs weren't staged and what to review in each (language support tables, architecture docs, roadmap phases, etc.). Non-source-only commits (tests, docs, config) pass through without checks. + **Edit reminder hook** (PreToolUse on Edit/Write): before the agent writes code, a reminder is injected via `additionalContext` prompting it to check `where`, `explain`, `context`, and `fn-impact` first. Only fires once per file per session (tracks in `.claude/codegraph-checked.log`, gitignored). Non-blocking — it nudges but never prevents the edit. Skips non-source files like `.md`, `.json`, `.yml`. **Graph update hook** (PostToolUse on Edit/Write): keeps the graph incrementally updated after each file edit. Only changed files are re-parsed. @@ -301,6 +313,7 @@ You can configure [Claude Code hooks](https://docs.anthropic.com/en/docs/claude- - `remind-codegraph.sh` — pre-edit reminder to check context/impact - `update-graph.sh` — incremental graph updates after edits - `post-git-ops.sh` — graph rebuild + edit tracking after rebase/revert/merge +- `check-readme.sh` — blocks commits when source changes may require doc updates - `guard-git.sh` — blocks dangerous git commands + validates commits - `track-edits.sh` — logs edited files for commit validation - `track-moves.sh` — logs file moves/copies for commit validation From af936712fc6ad1f06a6243ba7944a5da630cfc2a Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 20:18:24 -0700 Subject: [PATCH 24/30] feat: exports command + scoped rebuild for parallel agents (#269) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: add competitive deep-dive for Joern and reorganize competitive folder Move COMPETITIVE_ANALYSIS.md into generated/competitive/ and add a comprehensive feature-by-feature comparison against joernio/joern (our #1-ranked competitor). Covers parsing, graph model, query language, performance, installation, AI/MCP integration, security analysis, developer productivity, and ecosystem across 100+ individual features. Update FOUNDATION.md reference to the new path. * fix: update broken links to moved COMPETITIVE_ANALYSIS.md README.md and docs/roadmap/BACKLOG.md still referenced the old path at generated/COMPETITIVE_ANALYSIS.md after the file was moved to generated/competitive/COMPETITIVE_ANALYSIS.md in #260. * docs: add Joern-inspired feature candidates with BACKLOG-style grading Append a new "Joern-Inspired Feature Candidates" section to the Joern competitive deep-dive. Lists 11 actionable features extracted from Parsing & Language Support, Graph Model & Analysis Depth, and Query Language & Interface sections — assessed with the same tier/grading system used in BACKLOG.md (zero-dep, foundation-aligned, problem-fit, breaking). Tier 1 non-breaking: call-chain slicing, type-informed resolution, error-tolerant parsing, regex filtering, Kotlin, Swift, script execution. Tier 1 breaking: expanded node/edge types, intraprocedural CFG, stored AST. Not adopted: 9 features with FOUNDATION.md reasoning. Cross-references BACKLOG IDs 14 and 7. * docs: add competitive deep-dive for Narsil-MCP with feature candidates Comprehensive comparison across 10 dimensions: parsing (32 vs 11 languages), graph model (CFG/DFG/type inference vs complexity/roles/ communities), search (similarity/chunking vs RRF hybrid), security (147 rules vs none), queries (90 tools vs 21 + compound commands), performance (cold start vs incremental), install, MCP integration, developer productivity, and ecosystem. Feature candidates section covers all comparison sections: - Tier 1 non-breaking (10): MCP presets, AST chunking, code similarity, git blame/symbol history, remote repo indexing, config wizard, Kotlin, Swift, Bash, Scala language support - Tier 1 breaking (1): export map per module - Tier 2 (2): interactive HTML viz, multiple embedding backends - Tier 3 (2): OWASP patterns, SBOM generation - Not adopted (10): taint, type inference, SPARQL/RDF, CCG, in-memory arch, 90-tool surface, browser WASM, Forgemax, LSP, license scanning - Cross-references to BACKLOG IDs 7, 8, 10, 14 and Joern candidates J4, J5, J8, J9 * feat: add dedicated `exports ` command with per-symbol consumers Implements feature N11 from the Narsil competitive analysis. The new command provides a focused export map showing which symbols a file exports and who calls each one, filling the gap between `explain` (public/internal split without consumers) and `where --file` (just export names). Adds exportsData/fileExports to queries.js, CLI command, MCP tool, batch support, programmatic API, and integration tests. Impact: 7 functions changed, 15 affected * feat: add scoped rebuild for parallel agent rollback Extract purgeFilesFromGraph() from the inline deletion cascade in buildGraph() for reuse. Add opts.scope and opts.noReverseDeps to buildGraph() so agents can surgically rebuild only their changed files without nuking other agents' graph state. - `--scope ` on `build` skips collectFiles/getChangedFiles - `--no-reverse-deps` skips reverse-dep cascade (safe when exports unchanged) - New `scoped_rebuild` MCP tool for multi-agent orchestration - purgeFilesFromGraph exported from programmatic API - Unit tests for purge function, integration tests for scoped rebuild - Documented agent-level rollback workflow in titan-paradigm.md Impact: 3 functions changed, 20 affected * fix: remove leaked scoped_rebuild changes from another session Reverts purgeFilesFromGraph export, --scope/--no-reverse-deps CLI options, scoped_rebuild MCP tool+handler, and test list entry that were accidentally included from a concurrent session's dirty worktree. Impact: 2 functions changed, 1 affected * fix: remove stale scoped-rebuild docs from titan-paradigm The scoped_rebuild feature (--scope, --no-reverse-deps CLI options and scoped_rebuild MCP tool) was removed in 651ddb2 but the documentation in titan-paradigm.md still referenced it. Addresses Greptile review feedback on PR #269. --- src/batch.js | 2 + src/builder.js | 224 ++++++++++++++--------- src/cli.js | 21 +++ src/index.js | 2 + src/mcp.js | 22 +++ src/paginate.js | 1 + src/queries.js | 160 ++++++++++++++++ tests/integration/queries.test.js | 78 ++++++++ tests/integration/scoped-rebuild.test.js | 174 ++++++++++++++++++ tests/unit/mcp.test.js | 22 +++ tests/unit/purge-files.test.js | 184 +++++++++++++++++++ 11 files changed, 808 insertions(+), 82 deletions(-) create mode 100644 tests/integration/scoped-rebuild.test.js create mode 100644 tests/unit/purge-files.test.js diff --git a/src/batch.js b/src/batch.js index 2a703a3c..17494dc0 100644 --- a/src/batch.js +++ b/src/batch.js @@ -11,6 +11,7 @@ import { flowData } from './flow.js'; import { contextData, explainData, + exportsData, fileDepsData, fnDepsData, fnImpactData, @@ -34,6 +35,7 @@ export const BATCH_COMMANDS = { query: { fn: fnDepsData, sig: 'name' }, impact: { fn: impactAnalysisData, sig: 'file' }, deps: { fn: fileDepsData, sig: 'file' }, + exports: { fn: exportsData, sig: 'file' }, flow: { fn: flowData, sig: 'name' }, dataflow: { fn: dataflowData, sig: 'name' }, complexity: { fn: complexityData, sig: 'dbOnly' }, diff --git a/src/builder.js b/src/builder.js index a9ae11d4..24021f55 100644 --- a/src/builder.js +++ b/src/builder.js @@ -338,6 +338,76 @@ function getChangedFiles(db, allFiles, rootDir) { return { changed, removed, isFullBuild: false }; } +/** + * Purge all graph data for the specified files. + * Deletes: embeddings → edges (in+out) → node_metrics → function_complexity → dataflow → nodes. + * Handles missing tables gracefully (embeddings, complexity, dataflow may not exist in older DBs). + * + * @param {import('better-sqlite3').Database} db - Open writable database + * @param {string[]} files - Relative file paths to purge + * @param {object} [options] + * @param {boolean} [options.purgeHashes=true] - Also delete file_hashes entries + */ +export function purgeFilesFromGraph(db, files, options = {}) { + const { purgeHashes = true } = options; + if (!files || files.length === 0) return; + + // Check if embeddings table exists + let hasEmbeddings = false; + try { + db.prepare('SELECT 1 FROM embeddings LIMIT 1').get(); + hasEmbeddings = true; + } catch { + /* table doesn't exist */ + } + + const deleteEmbeddingsForFile = hasEmbeddings + ? db.prepare('DELETE FROM embeddings WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)') + : null; + const deleteNodesForFile = db.prepare('DELETE FROM nodes WHERE file = ?'); + const deleteEdgesForFile = db.prepare(` + DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = @f) + OR target_id IN (SELECT id FROM nodes WHERE file = @f) + `); + const deleteMetricsForFile = db.prepare( + 'DELETE FROM node_metrics WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)', + ); + let deleteComplexityForFile; + try { + deleteComplexityForFile = db.prepare( + 'DELETE FROM function_complexity WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)', + ); + } catch { + deleteComplexityForFile = null; + } + let deleteDataflowForFile; + try { + deleteDataflowForFile = db.prepare( + 'DELETE FROM dataflow WHERE source_id IN (SELECT id FROM nodes WHERE file = ?) OR target_id IN (SELECT id FROM nodes WHERE file = ?)', + ); + } catch { + deleteDataflowForFile = null; + } + let deleteHashForFile; + if (purgeHashes) { + try { + deleteHashForFile = db.prepare('DELETE FROM file_hashes WHERE file = ?'); + } catch { + deleteHashForFile = null; + } + } + + for (const relPath of files) { + deleteEmbeddingsForFile?.run(relPath); + deleteEdgesForFile.run({ f: relPath }); + deleteMetricsForFile.run(relPath); + deleteComplexityForFile?.run(relPath); + deleteDataflowForFile?.run(relPath, relPath); + deleteNodesForFile.run(relPath); + if (purgeHashes) deleteHashForFile?.run(relPath); + } +} + export async function buildGraph(rootDir, opts = {}) { const dbPath = path.join(rootDir, '.codegraph', 'graph.db'); const db = openDb(dbPath); @@ -384,19 +454,46 @@ export async function buildGraph(rootDir, opts = {}) { ); } - const collected = collectFiles(rootDir, [], config, new Set()); - const files = collected.files; - const discoveredDirs = collected.directories; - info(`Found ${files.length} files to parse`); - - // Check for incremental build - const { changed, removed, isFullBuild } = incremental - ? getChangedFiles(db, files, rootDir) - : { changed: files.map((f) => ({ file: f })), removed: [], isFullBuild: true }; - - // Separate metadata-only updates (mtime/size self-heal) from real changes - const parseChanges = changed.filter((c) => !c.metadataOnly); - const metadataUpdates = changed.filter((c) => c.metadataOnly); + // ── Scoped rebuild: rebuild only specified files ────────────────── + let files, discoveredDirs, parseChanges, metadataUpdates, removed, isFullBuild; + + if (opts.scope) { + const scopedFiles = opts.scope.map((f) => normalizePath(f)); + const existing = []; + const missing = []; + for (const rel of scopedFiles) { + const abs = path.join(rootDir, rel); + if (fs.existsSync(abs)) { + existing.push({ file: abs, relPath: rel }); + } else { + missing.push(rel); + } + } + files = existing.map((e) => e.file); + // Derive discoveredDirs from scoped files' parent directories + discoveredDirs = new Set(existing.map((e) => path.dirname(e.file))); + parseChanges = existing; + metadataUpdates = []; + removed = missing; + isFullBuild = false; + info(`Scoped rebuild: ${existing.length} files to rebuild, ${missing.length} to purge`); + } else { + const collected = collectFiles(rootDir, [], config, new Set()); + files = collected.files; + discoveredDirs = collected.directories; + info(`Found ${files.length} files to parse`); + + // Check for incremental build + const increResult = incremental + ? getChangedFiles(db, files, rootDir) + : { changed: files.map((f) => ({ file: f })), removed: [], isFullBuild: true }; + removed = increResult.removed; + isFullBuild = increResult.isFullBuild; + + // Separate metadata-only updates (mtime/size self-heal) from real changes + parseChanges = increResult.changed.filter((c) => !c.metadataOnly); + metadataUpdates = increResult.changed.filter((c) => c.metadataOnly); + } if (!isFullBuild && parseChanges.length === 0 && removed.length === 0) { // Still update metadata for self-healing even when no real changes @@ -446,29 +543,33 @@ export async function buildGraph(rootDir, opts = {}) { // Find files with edges pointing TO changed/removed files. // Their nodes stay intact (preserving IDs), but outgoing edges are // deleted so they can be rebuilt during the edge-building pass. - const changedRelPaths = new Set(); - for (const item of parseChanges) { - changedRelPaths.add(item.relPath || normalizePath(path.relative(rootDir, item.file))); - } - for (const relPath of removed) { - changedRelPaths.add(relPath); - } - + // When opts.noReverseDeps is true (e.g. agent rollback to same version), + // skip this cascade — the agent knows exports didn't change. const reverseDeps = new Set(); - if (changedRelPaths.size > 0) { - const findReverseDeps = db.prepare(` - SELECT DISTINCT n_src.file FROM edges e - JOIN nodes n_src ON e.source_id = n_src.id - JOIN nodes n_tgt ON e.target_id = n_tgt.id - WHERE n_tgt.file = ? AND n_src.file != n_tgt.file AND n_src.kind != 'directory' - `); - for (const relPath of changedRelPaths) { - for (const row of findReverseDeps.all(relPath)) { - if (!changedRelPaths.has(row.file) && !reverseDeps.has(row.file)) { - // Verify the file still exists on disk - const absPath = path.join(rootDir, row.file); - if (fs.existsSync(absPath)) { - reverseDeps.add(row.file); + if (!opts.noReverseDeps) { + const changedRelPaths = new Set(); + for (const item of parseChanges) { + changedRelPaths.add(item.relPath || normalizePath(path.relative(rootDir, item.file))); + } + for (const relPath of removed) { + changedRelPaths.add(relPath); + } + + if (changedRelPaths.size > 0) { + const findReverseDeps = db.prepare(` + SELECT DISTINCT n_src.file FROM edges e + JOIN nodes n_src ON e.source_id = n_src.id + JOIN nodes n_tgt ON e.target_id = n_tgt.id + WHERE n_tgt.file = ? AND n_src.file != n_tgt.file AND n_src.kind != 'directory' + `); + for (const relPath of changedRelPaths) { + for (const row of findReverseDeps.all(relPath)) { + if (!changedRelPaths.has(row.file) && !reverseDeps.has(row.file)) { + // Verify the file still exists on disk + const absPath = path.join(rootDir, row.file); + if (fs.existsSync(absPath)) { + reverseDeps.add(row.file); + } } } } @@ -482,57 +583,16 @@ export async function buildGraph(rootDir, opts = {}) { debug(`Changed files: ${parseChanges.map((c) => c.relPath).join(', ')}`); if (removed.length > 0) debug(`Removed files: ${removed.join(', ')}`); // Remove embeddings/metrics/edges/nodes for changed and removed files - // Embeddings must be deleted BEFORE nodes (we need node IDs to find them) - const deleteEmbeddingsForFile = hasEmbeddings - ? db.prepare('DELETE FROM embeddings WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)') - : null; - const deleteNodesForFile = db.prepare('DELETE FROM nodes WHERE file = ?'); - const deleteEdgesForFile = db.prepare(` - DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = @f) - OR target_id IN (SELECT id FROM nodes WHERE file = @f) - `); - const deleteOutgoingEdgesForFile = db.prepare( - 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)', - ); - const deleteMetricsForFile = db.prepare( - 'DELETE FROM node_metrics WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)', + const changePaths = parseChanges.map( + (item) => item.relPath || normalizePath(path.relative(rootDir, item.file)), ); - let deleteComplexityForFile; - try { - deleteComplexityForFile = db.prepare( - 'DELETE FROM function_complexity WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)', - ); - } catch { - deleteComplexityForFile = null; - } - let deleteDataflowForFile; - try { - deleteDataflowForFile = db.prepare( - 'DELETE FROM dataflow WHERE source_id IN (SELECT id FROM nodes WHERE file = ?) OR target_id IN (SELECT id FROM nodes WHERE file = ?)', - ); - } catch { - deleteDataflowForFile = null; - } - for (const relPath of removed) { - deleteEmbeddingsForFile?.run(relPath); - deleteEdgesForFile.run({ f: relPath }); - deleteMetricsForFile.run(relPath); - deleteComplexityForFile?.run(relPath); - deleteDataflowForFile?.run(relPath, relPath); - deleteNodesForFile.run(relPath); - } - for (const item of parseChanges) { - const relPath = item.relPath || normalizePath(path.relative(rootDir, item.file)); - deleteEmbeddingsForFile?.run(relPath); - deleteEdgesForFile.run({ f: relPath }); - deleteMetricsForFile.run(relPath); - deleteComplexityForFile?.run(relPath); - deleteDataflowForFile?.run(relPath, relPath); - deleteNodesForFile.run(relPath); - } + purgeFilesFromGraph(db, [...removed, ...changePaths], { purgeHashes: false }); // Process reverse deps: delete only outgoing edges (nodes/IDs preserved) // then add them to the parse list so they participate in edge building + const deleteOutgoingEdgesForFile = db.prepare( + 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)', + ); for (const relPath of reverseDeps) { deleteOutgoingEdgesForFile.run(relPath); } diff --git a/src/cli.js b/src/cli.js index d3b36f74..81e14dc5 100644 --- a/src/cli.js +++ b/src/cli.js @@ -32,6 +32,7 @@ import { diffImpact, explain, fileDeps, + fileExports, fnDeps, fnImpact, impactAnalysis, @@ -224,6 +225,26 @@ program }); }); +program + .command('exports ') + .description('Show exported symbols with per-symbol consumers (who calls each export)') + .option('-d, --db ', 'Path to graph.db') + .option('-T, --no-tests', 'Exclude test/spec files from results') + .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') + .option('-j, --json', 'Output as JSON') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .option('--ndjson', 'Newline-delimited JSON output') + .action((file, opts) => { + fileExports(file, opts.db, { + noTests: resolveNoTests(opts), + json: opts.json, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + ndjson: opts.ndjson, + }); + }); + program .command('fn-impact ') .description('Function-level impact: what functions break if this one changes') diff --git a/src/index.js b/src/index.js index 7f0e5246..ea76dacc 100644 --- a/src/index.js +++ b/src/index.js @@ -118,9 +118,11 @@ export { diffImpactData, diffImpactMermaid, explainData, + exportsData, FALSE_POSITIVE_CALLER_THRESHOLD, FALSE_POSITIVE_NAMES, fileDepsData, + fileExports, fnDepsData, fnImpactData, impactAnalysisData, diff --git a/src/mcp.js b/src/mcp.js index 1f0b9451..78a20c6b 100644 --- a/src/mcp.js +++ b/src/mcp.js @@ -82,6 +82,20 @@ const BASE_TOOLS = [ required: ['file'], }, }, + { + name: 'file_exports', + description: + 'Show exported symbols of a file with per-symbol consumers — who calls each export and from where', + inputSchema: { + type: 'object', + properties: { + file: { type: 'string', description: 'File path (partial match supported)' }, + no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + ...PAGINATION_PROPS, + }, + required: ['file'], + }, + }, { name: 'impact_analysis', description: 'Show files affected by changes to a given file (transitive)', @@ -741,6 +755,7 @@ export async function startMCPServer(customDbPath, options = {}) { fnImpactData, pathData, contextData, + exportsData, explainData, whereData, diffImpactData, @@ -826,6 +841,13 @@ export async function startMCPServer(customDbPath, options = {}) { offset: args.offset ?? 0, }); break; + case 'file_exports': + result = exportsData(args.file, dbPath, { + noTests: args.no_tests, + limit: Math.min(args.limit ?? MCP_DEFAULTS.file_exports, MCP_MAX_LIMIT), + offset: args.offset ?? 0, + }); + break; case 'impact_analysis': result = impactAnalysisData(args.file, dbPath, { noTests: args.no_tests, diff --git a/src/paginate.js b/src/paginate.js index 8802b65a..79bfaa27 100644 --- a/src/paginate.js +++ b/src/paginate.js @@ -18,6 +18,7 @@ export const MCP_DEFAULTS = { context: 5, explain: 10, file_deps: 20, + file_exports: 20, diff_impact: 30, impact_analysis: 20, semantic_search: 20, diff --git a/src/queries.js b/src/queries.js index 5ee87b0c..7fb28d9c 100644 --- a/src/queries.js +++ b/src/queries.js @@ -3006,6 +3006,166 @@ export function roles(customDbPath, opts = {}) { } } +// ─── exportsData ───────────────────────────────────────────────────── + +function exportsFileImpl(db, target, noTests, getFileLines) { + const fileNodes = db + .prepare(`SELECT * FROM nodes WHERE file LIKE ? AND kind = 'file'`) + .all(`%${target}%`); + if (fileNodes.length === 0) return []; + + return fileNodes.map((fn) => { + const symbols = db + .prepare(`SELECT * FROM nodes WHERE file = ? AND kind != 'file' ORDER BY line`) + .all(fn.file); + + // IDs of symbols that have incoming calls from other files (exported) + const exportedIds = new Set( + db + .prepare( + `SELECT DISTINCT e.target_id FROM edges e + JOIN nodes caller ON e.source_id = caller.id + JOIN nodes target ON e.target_id = target.id + WHERE target.file = ? AND caller.file != ? AND e.kind = 'calls'`, + ) + .all(fn.file, fn.file) + .map((r) => r.target_id), + ); + + const exported = symbols.filter((s) => exportedIds.has(s.id)); + const internalCount = symbols.length - exported.length; + + const results = exported.map((s) => { + const fileLines = getFileLines(fn.file); + + let consumers = db + .prepare( + `SELECT n.name, n.file, n.line FROM edges e JOIN nodes n ON e.source_id = n.id + WHERE e.target_id = ? AND e.kind = 'calls'`, + ) + .all(s.id); + if (noTests) consumers = consumers.filter((c) => !isTestFile(c.file)); + + return { + name: s.name, + kind: s.kind, + line: s.line, + endLine: s.end_line ?? null, + role: s.role || null, + signature: fileLines ? extractSignature(fileLines, s.line) : null, + summary: fileLines ? extractSummary(fileLines, s.line) : null, + consumers: consumers.map((c) => ({ name: c.name, file: c.file, line: c.line })), + consumerCount: consumers.length, + }; + }); + + // Reexport edges from this file node + const reexports = db + .prepare( + `SELECT n.file FROM edges e JOIN nodes n ON e.target_id = n.id + WHERE e.source_id = ? AND e.kind = 'reexports'`, + ) + .all(fn.id) + .map((r) => ({ file: r.file })); + + return { + file: fn.file, + results, + reexports, + totalExported: exported.length, + totalInternal: internalCount, + }; + }); +} + +export function exportsData(file, customDbPath, opts = {}) { + const db = openReadonlyOrFail(customDbPath); + const noTests = opts.noTests || false; + + const dbFilePath = findDbPath(customDbPath); + const repoRoot = path.resolve(path.dirname(dbFilePath), '..'); + + const fileCache = new Map(); + function getFileLines(file) { + if (fileCache.has(file)) return fileCache.get(file); + try { + const absPath = safePath(repoRoot, file); + if (!absPath) { + fileCache.set(file, null); + return null; + } + const lines = fs.readFileSync(absPath, 'utf-8').split('\n'); + fileCache.set(file, lines); + return lines; + } catch { + fileCache.set(file, null); + return null; + } + } + + const fileResults = exportsFileImpl(db, file, noTests, getFileLines); + db.close(); + + if (fileResults.length === 0) { + return paginateResult( + { file, results: [], reexports: [], totalExported: 0, totalInternal: 0 }, + 'results', + { limit: opts.limit, offset: opts.offset }, + ); + } + + // For single-file match return flat; for multi-match return first (like explainData) + const first = fileResults[0]; + const base = { + file: first.file, + results: first.results, + reexports: first.reexports, + totalExported: first.totalExported, + totalInternal: first.totalInternal, + }; + return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset }); +} + +export function fileExports(file, customDbPath, opts = {}) { + const data = exportsData(file, customDbPath, opts); + if (opts.ndjson) { + printNdjson(data, 'results'); + return; + } + if (opts.json) { + console.log(JSON.stringify(data, null, 2)); + return; + } + + if (data.results.length === 0) { + console.log(`No exported symbols found for "${file}". Run "codegraph build" first.`); + return; + } + + console.log( + `\n# ${data.file} — ${data.totalExported} exported, ${data.totalInternal} internal\n`, + ); + + for (const sym of data.results) { + const icon = kindIcon(sym.kind); + const sig = sym.signature?.params ? `(${sym.signature.params})` : ''; + const role = sym.role ? ` [${sym.role}]` : ''; + console.log(` ${icon} ${sym.name}${sig}${role} :${sym.line}`); + if (sym.consumers.length === 0) { + console.log(' (no consumers)'); + } else { + for (const c of sym.consumers) { + console.log(` <- ${c.name} (${c.file}:${c.line})`); + } + } + } + + if (data.reexports.length > 0) { + console.log(`\n Re-exports: ${data.reexports.map((r) => r.file).join(', ')}`); + } + console.log(); +} + export function fnImpact(name, customDbPath, opts = {}) { const data = fnImpactData(name, customDbPath, opts); if (opts.ndjson) { diff --git a/tests/integration/queries.test.js b/tests/integration/queries.test.js index 0bb3b7dc..e991991c 100644 --- a/tests/integration/queries.test.js +++ b/tests/integration/queries.test.js @@ -28,6 +28,7 @@ import { initSchema } from '../../src/db.js'; import { diffImpactData, explainData, + exportsData, fileDepsData, fnDepsData, fnImpactData, @@ -734,3 +735,80 @@ describe('stable symbol schema', () => { expect(fn.fileHash).toBe('hash_auth_js'); }); }); + +// ─── exportsData ────────────────────────────────────────────────────── + +describe('exportsData', () => { + test('returns exported symbols with consumers for auth.js', () => { + const data = exportsData('auth.js', dbPath); + expect(data.file).toBe('auth.js'); + expect(data.totalExported).toBeGreaterThanOrEqual(2); + + const names = data.results.map((r) => r.name); + expect(names).toContain('authenticate'); + expect(names).toContain('validateToken'); + }); + + test('consumers include cross-file callers', () => { + const data = exportsData('auth.js', dbPath); + const auth = data.results.find((r) => r.name === 'authenticate'); + expect(auth).toBeDefined(); + const consumerNames = auth.consumers.map((c) => c.name); + // authMiddleware calls authenticate from middleware.js (cross-file) + expect(consumerNames).toContain('authMiddleware'); + }); + + test('noTests filters test file consumers', () => { + const all = exportsData('auth.js', dbPath); + const filtered = exportsData('auth.js', dbPath, { noTests: true }); + + const allAuth = all.results.find((r) => r.name === 'authenticate'); + const filteredAuth = filtered.results.find((r) => r.name === 'authenticate'); + + const allConsumers = allAuth.consumers.map((c) => c.name); + const filteredConsumers = filteredAuth.consumers.map((c) => c.name); + + // testAuthenticate should be in unfiltered consumers + expect(allConsumers).toContain('testAuthenticate'); + // testAuthenticate should be excluded with noTests + expect(filteredConsumers).not.toContain('testAuthenticate'); + }); + + test('returns empty results for unknown file', () => { + const data = exportsData('nonexistent.js', dbPath); + expect(data.results).toHaveLength(0); + expect(data.totalExported).toBe(0); + expect(data.totalInternal).toBe(0); + }); + + test('reexports field is present', () => { + const data = exportsData('auth.js', dbPath); + expect(data).toHaveProperty('reexports'); + expect(Array.isArray(data.reexports)).toBe(true); + }); + + test('pagination limits results', () => { + const data = exportsData('auth.js', dbPath, { limit: 1, offset: 0 }); + expect(data.results).toHaveLength(1); + expect(data._pagination).toBeDefined(); + expect(data._pagination.total).toBeGreaterThanOrEqual(2); + expect(data._pagination.hasMore).toBe(true); + }); + + test('result shape has expected fields', () => { + const data = exportsData('auth.js', dbPath); + expect(data.results.length).toBeGreaterThan(0); + const sym = data.results[0]; + expect(sym).toHaveProperty('name'); + expect(sym).toHaveProperty('kind'); + expect(sym).toHaveProperty('line'); + expect(sym).toHaveProperty('consumers'); + expect(sym).toHaveProperty('consumerCount'); + expect(sym).toHaveProperty('role'); + expect(sym).toHaveProperty('signature'); + expect(sym).toHaveProperty('summary'); + expect(sym).toHaveProperty('endLine'); + expect(Array.isArray(sym.consumers)).toBe(true); + expect(typeof sym.consumerCount).toBe('number'); + }); +}); diff --git a/tests/integration/scoped-rebuild.test.js b/tests/integration/scoped-rebuild.test.js new file mode 100644 index 00000000..fd4d8a12 --- /dev/null +++ b/tests/integration/scoped-rebuild.test.js @@ -0,0 +1,174 @@ +/** + * Integration tests for scoped rebuild (opts.scope + opts.noReverseDeps). + * + * Uses the sample-project fixture (math.js, utils.js, index.js) to build + * a real graph, then verifies that scoped rebuilds surgically update only + * targeted files while leaving everything else intact. + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { afterAll, beforeAll, describe, expect, test } from 'vitest'; +import { buildGraph } from '../../src/builder.js'; + +const FIXTURE_DIR = path.join(import.meta.dirname, '..', 'fixtures', 'sample-project'); + +let tmpDir; + +function copyFixture() { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-scoped-')); + for (const file of fs.readdirSync(FIXTURE_DIR)) { + fs.copyFileSync(path.join(FIXTURE_DIR, file), path.join(dir, file)); + } + return dir; +} + +function openDb(dir) { + const Database = require('better-sqlite3'); + return new Database(path.join(dir, '.codegraph', 'graph.db'), { readonly: true }); +} + +function nodeCount(db, file) { + return db.prepare('SELECT COUNT(*) as c FROM nodes WHERE file = ?').get(file).c; +} + +function edgeCount(db) { + return db.prepare('SELECT COUNT(*) as c FROM edges').get().c; +} + +beforeAll(async () => { + tmpDir = copyFixture(); + // Build the initial full graph + await buildGraph(tmpDir, { incremental: false }); +}); + +afterAll(() => { + if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +describe('scoped rebuild', () => { + test('scoped rebuild updates only targeted file, preserves others', async () => { + const db1 = openDb(tmpDir); + const mathNodesBefore = nodeCount(db1, 'math.js'); + const utilsNodesBefore = nodeCount(db1, 'utils.js'); + const indexNodesBefore = nodeCount(db1, 'index.js'); + db1.close(); + + expect(mathNodesBefore).toBeGreaterThan(0); + expect(utilsNodesBefore).toBeGreaterThan(0); + + // Scoped rebuild only math.js (no content change — should re-parse same result) + await buildGraph(tmpDir, { scope: ['math.js'] }); + + const db2 = openDb(tmpDir); + const mathNodesAfter = nodeCount(db2, 'math.js'); + const utilsNodesAfter = nodeCount(db2, 'utils.js'); + const indexNodesAfter = nodeCount(db2, 'index.js'); + db2.close(); + + // math.js should be rebuilt with same node count + expect(mathNodesAfter).toBe(mathNodesBefore); + // utils.js and index.js should be untouched + expect(utilsNodesAfter).toBe(utilsNodesBefore); + expect(indexNodesAfter).toBe(indexNodesBefore); + }); + + test('scoped rebuild with deleted file purges it from graph', async () => { + // Create a temporary extra file, build it in, then delete and scope-rebuild + const extraPath = path.join(tmpDir, 'extra.js'); + fs.writeFileSync(extraPath, 'function extra() { return 1; }\nmodule.exports = { extra };\n'); + + // Full rebuild to pick up the new file + await buildGraph(tmpDir, { incremental: false }); + + const db1 = openDb(tmpDir); + const extraBefore = nodeCount(db1, 'extra.js'); + const mathBefore = nodeCount(db1, 'math.js'); + db1.close(); + expect(extraBefore).toBeGreaterThan(0); + + // Delete the file and scope-rebuild it + fs.unlinkSync(extraPath); + await buildGraph(tmpDir, { scope: ['extra.js'] }); + + const db2 = openDb(tmpDir); + const extraAfter = nodeCount(db2, 'extra.js'); + const mathAfter = nodeCount(db2, 'math.js'); + db2.close(); + + // extra.js should be completely purged + expect(extraAfter).toBe(0); + // math.js should be untouched + expect(mathAfter).toBe(mathBefore); + }); + + test('reverse-dep cascade rebuilds importers edges', async () => { + // Full rebuild to get clean state + await buildGraph(tmpDir, { incremental: false }); + + const db1 = openDb(tmpDir); + const edgesBefore = edgeCount(db1); + db1.close(); + + // Scoped rebuild of math.js with default (reverse deps enabled) + // utils.js and index.js import math.js, so their edges should be rebuilt + await buildGraph(tmpDir, { scope: ['math.js'] }); + + const db2 = openDb(tmpDir); + const edgesAfter = edgeCount(db2); + db2.close(); + + // Edge count should be comparable (rebuilt edges for math.js + reverse deps) + expect(edgesAfter).toBeGreaterThan(0); + // Should not lose edges dramatically + expect(edgesAfter).toBeGreaterThanOrEqual(edgesBefore - 2); + }); + + test('noReverseDeps: true skips the cascade', async () => { + // Full rebuild to get clean state + await buildGraph(tmpDir, { incremental: false }); + + // Scoped rebuild with noReverseDeps — only math.js edges are rebuilt + await buildGraph(tmpDir, { scope: ['math.js'], noReverseDeps: true }); + + const db2 = openDb(tmpDir); + const edgesAfter = edgeCount(db2); + const mathNodes = nodeCount(db2, 'math.js'); + const utilsNodes = nodeCount(db2, 'utils.js'); + db2.close(); + + // math.js and utils.js should still have nodes + expect(mathNodes).toBeGreaterThan(0); + expect(utilsNodes).toBeGreaterThan(0); + // With noReverseDeps, we may lose some edges because importers weren't rebuilt + // but the graph should still be valid + expect(edgesAfter).toBeGreaterThan(0); + }); + + test('multiple files in scope', async () => { + // Full rebuild to get clean state + await buildGraph(tmpDir, { incremental: false }); + + const db1 = openDb(tmpDir); + const mathBefore = nodeCount(db1, 'math.js'); + const utilsBefore = nodeCount(db1, 'utils.js'); + const indexBefore = nodeCount(db1, 'index.js'); + db1.close(); + + // Scope both math.js and utils.js + await buildGraph(tmpDir, { scope: ['math.js', 'utils.js'] }); + + const db2 = openDb(tmpDir); + const mathAfter = nodeCount(db2, 'math.js'); + const utilsAfter = nodeCount(db2, 'utils.js'); + const indexAfter = nodeCount(db2, 'index.js'); + db2.close(); + + // Both scoped files should be rebuilt with same counts + expect(mathAfter).toBe(mathBefore); + expect(utilsAfter).toBe(utilsBefore); + // index.js untouched + expect(indexAfter).toBe(indexBefore); + }); +}); diff --git a/tests/unit/mcp.test.js b/tests/unit/mcp.test.js index 305848b5..4d27259f 100644 --- a/tests/unit/mcp.test.js +++ b/tests/unit/mcp.test.js @@ -11,6 +11,7 @@ import { buildToolList, TOOLS } from '../../src/mcp.js'; const ALL_TOOL_NAMES = [ 'query', 'file_deps', + 'file_exports', 'impact_analysis', 'find_cycles', 'module_map', @@ -257,6 +258,13 @@ describe('startMCPServer handler dispatch', () => { fnImpactData: vi.fn(() => ({ name: 'test', results: [] })), contextData: vi.fn(() => ({ name: 'test', results: [] })), explainData: vi.fn(() => ({ target: 'test', kind: 'function', results: [] })), + exportsData: vi.fn(() => ({ + file: 'test', + results: [], + reexports: [], + totalExported: 0, + totalInternal: 0, + })), whereData: vi.fn(() => ({ target: 'test', mode: 'symbol', results: [] })), diffImpactData: vi.fn(() => ({ changedFiles: 0, affectedFunctions: [] })), listFunctionsData: vi.fn(() => ({ count: 0, functions: [] })), @@ -320,6 +328,7 @@ describe('startMCPServer handler dispatch', () => { fnImpactData: vi.fn(), contextData: vi.fn(), explainData: vi.fn(), + exportsData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), @@ -379,6 +388,7 @@ describe('startMCPServer handler dispatch', () => { fnImpactData: fnImpactMock, contextData: vi.fn(), explainData: vi.fn(), + exportsData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), @@ -435,6 +445,7 @@ describe('startMCPServer handler dispatch', () => { fnImpactData: vi.fn(), contextData: vi.fn(), explainData: vi.fn(), + exportsData: vi.fn(), whereData: vi.fn(), diffImpactData: diffImpactMock, listFunctionsData: vi.fn(), @@ -494,6 +505,7 @@ describe('startMCPServer handler dispatch', () => { fnImpactData: vi.fn(), contextData: vi.fn(), explainData: vi.fn(), + exportsData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: listFnMock, @@ -554,6 +566,7 @@ describe('startMCPServer handler dispatch', () => { fnImpactData: vi.fn(), contextData: vi.fn(), explainData: vi.fn(), + exportsData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), @@ -612,6 +625,7 @@ describe('startMCPServer handler dispatch', () => { fnImpactData: vi.fn(), contextData: vi.fn(), explainData: vi.fn(), + exportsData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), @@ -664,6 +678,7 @@ describe('startMCPServer handler dispatch', () => { fnImpactData: vi.fn(), contextData: vi.fn(), explainData: vi.fn(), + exportsData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), @@ -718,6 +733,7 @@ describe('startMCPServer handler dispatch', () => { fnImpactData: vi.fn(), contextData: vi.fn(), explainData: vi.fn(), + exportsData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), @@ -782,6 +798,7 @@ describe('startMCPServer handler dispatch', () => { fnImpactData: vi.fn(), contextData: vi.fn(), explainData: vi.fn(), + exportsData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), @@ -839,6 +856,7 @@ describe('startMCPServer handler dispatch', () => { fnImpactData: vi.fn(), contextData: vi.fn(), explainData: vi.fn(), + exportsData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), @@ -887,6 +905,7 @@ describe('startMCPServer handler dispatch', () => { fnImpactData: vi.fn(), contextData: vi.fn(), explainData: vi.fn(), + exportsData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), @@ -935,6 +954,7 @@ describe('startMCPServer handler dispatch', () => { fnImpactData: vi.fn(), contextData: vi.fn(), explainData: vi.fn(), + exportsData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), @@ -983,6 +1003,7 @@ describe('startMCPServer handler dispatch', () => { fnImpactData: vi.fn(), contextData: vi.fn(), explainData: vi.fn(), + exportsData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), @@ -1032,6 +1053,7 @@ describe('startMCPServer handler dispatch', () => { fnImpactData: vi.fn(), contextData: vi.fn(), explainData: vi.fn(), + exportsData: vi.fn(), whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), diff --git a/tests/unit/purge-files.test.js b/tests/unit/purge-files.test.js new file mode 100644 index 00000000..9702899a --- /dev/null +++ b/tests/unit/purge-files.test.js @@ -0,0 +1,184 @@ +/** + * Unit tests for purgeFilesFromGraph() — the extracted deletion cascade. + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import Database from 'better-sqlite3'; +import { afterEach, describe, expect, test } from 'vitest'; +import { purgeFilesFromGraph } from '../../src/builder.js'; +import { initSchema } from '../../src/db.js'; + +// ─── Helpers ─────────────────────────────────────────────────────────── + +function insertNode(db, name, kind, file, line) { + return db + .prepare('INSERT INTO nodes (name, kind, file, line) VALUES (?, ?, ?, ?)') + .run(name, kind, file, line).lastInsertRowid; +} + +function insertEdge(db, sourceId, targetId, kind, confidence = 1.0) { + db.prepare( + 'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, ?, 0)', + ).run(sourceId, targetId, kind, confidence); +} + +// ─── Fixture ─────────────────────────────────────────────────────────── + +// Track open DBs for cleanup (Windows locks DB files) +let openDbs = []; + +afterEach(() => { + for (const db of openDbs) { + try { + db.close(); + } catch { + /* already closed */ + } + } + openDbs = []; +}); + +function makeDb() { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-purge-')); + const dbPath = path.join(tmpDir, 'graph.db'); + const db = new Database(dbPath); + db.pragma('journal_mode = WAL'); + initSchema(db); + openDbs.push(db); + return db; +} + +function seedGraph(db) { + // Two files: auth.js and utils.js + const fAuth = insertNode(db, 'auth.js', 'file', 'auth.js', 0); + const fUtils = insertNode(db, 'utils.js', 'file', 'utils.js', 0); + const authenticate = insertNode(db, 'authenticate', 'function', 'auth.js', 10); + const validate = insertNode(db, 'validateToken', 'function', 'auth.js', 25); + const format = insertNode(db, 'formatResponse', 'function', 'utils.js', 5); + + insertEdge(db, authenticate, validate, 'calls'); + insertEdge(db, fAuth, fUtils, 'imports'); + + // node_metrics (columns: node_id, fan_in, fan_out, etc.) + db.prepare('INSERT INTO node_metrics (node_id, fan_in) VALUES (?, ?)').run(fAuth, 2); + db.prepare('INSERT INTO node_metrics (node_id, fan_in) VALUES (?, ?)').run(fUtils, 1); + + // file_hashes + try { + db.prepare( + 'INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, 0, 0)', + ).run('auth.js', 'abc123'); + db.prepare( + 'INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, 0, 0)', + ).run('utils.js', 'def456'); + } catch { + /* table may not exist in very old schemas */ + } + + return { fAuth, fUtils, authenticate, validate, format }; +} + +// ─── Tests ───────────────────────────────────────────────────────────── + +describe('purgeFilesFromGraph', () => { + test('purges nodes/edges/metrics for specified files, leaves others untouched', () => { + const db = makeDb(); + seedGraph(db); + + // Purge only auth.js + purgeFilesFromGraph(db, ['auth.js']); + + // auth.js nodes should be gone + const authNodes = db.prepare("SELECT * FROM nodes WHERE file = 'auth.js'").all(); + expect(authNodes).toHaveLength(0); + + // utils.js nodes should remain + const utilsNodes = db.prepare("SELECT * FROM nodes WHERE file = 'utils.js'").all(); + expect(utilsNodes.length).toBeGreaterThan(0); + + // Edges involving auth.js nodes should be gone + const edges = db.prepare('SELECT * FROM edges').all(); + // The only remaining nodes are from utils.js, so no edges should reference auth.js nodes + for (const edge of edges) { + const src = db.prepare('SELECT file FROM nodes WHERE id = ?').get(edge.source_id); + const tgt = db.prepare('SELECT file FROM nodes WHERE id = ?').get(edge.target_id); + if (src) expect(src.file).not.toBe('auth.js'); + if (tgt) expect(tgt.file).not.toBe('auth.js'); + } + + // Metrics for auth.js file node should be gone (we inserted metrics for file node IDs) + // Since auth.js nodes are deleted, their metrics should also be gone + const remainingMetrics = db.prepare('SELECT * FROM node_metrics').all(); + // Only the utils.js file node metric should remain + expect(remainingMetrics).toHaveLength(1); + + // file_hashes for auth.js should be gone (purgeHashes defaults to true) + const authHash = db.prepare("SELECT * FROM file_hashes WHERE file = 'auth.js'").all(); + expect(authHash).toHaveLength(0); + + // utils.js hash should remain + const utilsHash = db.prepare("SELECT * FROM file_hashes WHERE file = 'utils.js'").all(); + expect(utilsHash).toHaveLength(1); + }); + + test('respects purgeHashes: false', () => { + const db = makeDb(); + seedGraph(db); + + purgeFilesFromGraph(db, ['auth.js'], { purgeHashes: false }); + + // Nodes should be gone + const authNodes = db.prepare("SELECT * FROM nodes WHERE file = 'auth.js'").all(); + expect(authNodes).toHaveLength(0); + + // But file_hashes should remain + const authHash = db.prepare("SELECT * FROM file_hashes WHERE file = 'auth.js'").all(); + expect(authHash).toHaveLength(1); + }); + + test('handles missing optional tables gracefully', () => { + const db = makeDb(); + seedGraph(db); + + // Drop optional tables to simulate pre-migration DB + try { + db.exec('DROP TABLE IF EXISTS function_complexity'); + } catch { + /* ignore */ + } + try { + db.exec('DROP TABLE IF EXISTS dataflow'); + } catch { + /* ignore */ + } + + // Should not throw + expect(() => purgeFilesFromGraph(db, ['auth.js'])).not.toThrow(); + + const authNodes = db.prepare("SELECT * FROM nodes WHERE file = 'auth.js'").all(); + expect(authNodes).toHaveLength(0); + }); + + test('no-ops on empty file list', () => { + const db = makeDb(); + seedGraph(db); + + const beforeCount = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c; + purgeFilesFromGraph(db, []); + const afterCount = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c; + expect(afterCount).toBe(beforeCount); + }); + + test('no-ops on null/undefined file list', () => { + const db = makeDb(); + seedGraph(db); + + const beforeCount = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c; + purgeFilesFromGraph(db, null); + purgeFilesFromGraph(db, undefined); + const afterCount = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c; + expect(afterCount).toBe(beforeCount); + }); +}); From ef3ae44385e531ace007985c43929406b1e54285 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 20:24:04 -0700 Subject: [PATCH 25/30] feat: add intraprocedural control flow graph (CFG) construction Add opt-in CFG analysis that builds basic-block control flow graphs from tree-sitter AST for individual functions. Enables complexity-aware impact analysis and opens the path to dataflow (def-use chains). - DB migration v12: cfg_blocks + cfg_edges tables - New src/cfg.js module: CFG_RULES, buildFunctionCFG, buildCFGData, cfgData, cfgToDOT, cfgToMermaid, cfg CLI printer - Builder integration: --cfg flag triggers CFG after complexity pass - CLI: `cfg ` command with --format text/dot/mermaid, -j, --ndjson - MCP: cfg tool with name, format, file, kind, pagination props - Exports findFunctionNode from complexity.js for reuse - 24 unit tests + 11 integration tests (35 total) Phase 1: JS/TS/TSX only. Handles if/else, for/while/do-while, switch, try/catch/finally, break/continue (with labels), return/throw. Impact: 27 functions changed, 36 affected --- src/builder.js | 13 + src/cfg.js | 1035 +++++++++++++++++++++++++++++++++ src/cli.js | 39 +- src/complexity.js | 2 +- src/db.js | 31 + src/index.js | 11 + src/mcp.js | 38 ++ tests/integration/cfg.test.js | 199 +++++++ tests/unit/cfg.test.js | 457 +++++++++++++++ tests/unit/mcp.test.js | 1 + 10 files changed, 1824 insertions(+), 2 deletions(-) create mode 100644 src/cfg.js create mode 100644 tests/integration/cfg.test.js create mode 100644 tests/unit/cfg.test.js diff --git a/src/builder.js b/src/builder.js index 79fd9d47..6ceec39e 100644 --- a/src/builder.js +++ b/src/builder.js @@ -1139,6 +1139,18 @@ export async function buildGraph(rootDir, opts = {}) { } _t.complexityMs = performance.now() - _t.complexity0; + // Opt-in CFG analysis (--cfg) + if (opts.cfg) { + _t.cfg0 = performance.now(); + try { + const { buildCFGData } = await import('./cfg.js'); + await buildCFGData(db, allSymbols, rootDir, engineOpts); + } catch (err) { + debug(`CFG analysis failed: ${err.message}`); + } + _t.cfgMs = performance.now() - _t.cfg0; + } + // Opt-in dataflow analysis (--dataflow) if (opts.dataflow) { _t.dataflow0 = performance.now(); @@ -1241,6 +1253,7 @@ export async function buildGraph(rootDir, opts = {}) { structureMs: +_t.structureMs.toFixed(1), rolesMs: +_t.rolesMs.toFixed(1), complexityMs: +_t.complexityMs.toFixed(1), + ...(_t.cfgMs != null && { cfgMs: +_t.cfgMs.toFixed(1) }), }, }; } diff --git a/src/cfg.js b/src/cfg.js new file mode 100644 index 00000000..0e6e49be --- /dev/null +++ b/src/cfg.js @@ -0,0 +1,1035 @@ +/** + * Intraprocedural Control Flow Graph (CFG) construction from tree-sitter AST. + * + * Builds basic-block CFGs for individual functions, stored in cfg_blocks + cfg_edges tables. + * Opt-in via `build --cfg`. JS/TS/TSX only for Phase 1. + */ + +import fs from 'node:fs'; +import path from 'node:path'; +import { COMPLEXITY_RULES } from './complexity.js'; +import { openReadonlyOrFail } from './db.js'; +import { info } from './logger.js'; +import { paginateResult, printNdjson } from './paginate.js'; +import { LANGUAGE_REGISTRY } from './parser.js'; +import { isTestFile } from './queries.js'; + +// ─── CFG Node Type Rules (extends COMPLEXITY_RULES) ────────────────────── + +const JS_TS_CFG = { + ifNode: 'if_statement', + elseClause: 'else_clause', + forNodes: new Set(['for_statement', 'for_in_statement']), + whileNode: 'while_statement', + doNode: 'do_statement', + switchNode: 'switch_statement', + caseNode: 'switch_case', + defaultNode: 'switch_default', + tryNode: 'try_statement', + catchNode: 'catch_clause', + finallyNode: 'finally_clause', + returnNode: 'return_statement', + throwNode: 'throw_statement', + breakNode: 'break_statement', + continueNode: 'continue_statement', + blockNode: 'statement_block', + labeledNode: 'labeled_statement', + functionNodes: new Set([ + 'function_declaration', + 'function_expression', + 'arrow_function', + 'method_definition', + 'generator_function', + 'generator_function_declaration', + ]), +}; + +export const CFG_RULES = new Map([ + ['javascript', JS_TS_CFG], + ['typescript', JS_TS_CFG], + ['tsx', JS_TS_CFG], +]); + +// Language IDs that support CFG (Phase 1: JS/TS/TSX only) +const CFG_LANG_IDS = new Set(['javascript', 'typescript', 'tsx']); + +// JS/TS extensions +const CFG_EXTENSIONS = new Set(); +for (const entry of LANGUAGE_REGISTRY) { + if (CFG_LANG_IDS.has(entry.id)) { + for (const ext of entry.extensions) CFG_EXTENSIONS.add(ext); + } +} + +// ─── Core Algorithm: AST → CFG ────────────────────────────────────────── + +/** + * Build a control flow graph for a single function AST node. + * + * @param {object} functionNode - tree-sitter function AST node + * @param {string} langId - language identifier (javascript, typescript, tsx) + * @returns {{ blocks: object[], edges: object[] }} - CFG blocks and edges + */ +export function buildFunctionCFG(functionNode, langId) { + const rules = CFG_RULES.get(langId); + if (!rules) return { blocks: [], edges: [] }; + + const blocks = []; + const edges = []; + let nextIndex = 0; + + function makeBlock(type, startLine = null, endLine = null, label = null) { + const block = { + index: nextIndex++, + type, + startLine, + endLine, + label, + }; + blocks.push(block); + return block; + } + + function addEdge(source, target, kind) { + edges.push({ + sourceIndex: source.index, + targetIndex: target.index, + kind, + }); + } + + const entryBlock = makeBlock('entry'); + const exitBlock = makeBlock('exit'); + + // Loop context stack for break/continue resolution + const loopStack = []; + + // Label map for labeled break/continue + const labelMap = new Map(); + + /** + * Get the body node of a function (handles arrow functions with expression bodies). + */ + function getFunctionBody(fnNode) { + const body = fnNode.childForFieldName('body'); + if (!body) return null; + return body; + } + + /** + * Get statement children from a block or statement list. + */ + function getStatements(node) { + if (!node) return []; + // statement_block: get named children + if (node.type === rules.blockNode) { + const stmts = []; + for (let i = 0; i < node.namedChildCount; i++) { + stmts.push(node.namedChild(i)); + } + return stmts; + } + // Single statement (e.g., arrow fn with expression body, or unbraced if body) + return [node]; + } + + /** + * Process a list of statements, creating blocks and edges. + * Returns the last "current" block after processing, or null if all paths terminated. + */ + function processStatements(stmts, currentBlock) { + let cur = currentBlock; + + for (const stmt of stmts) { + if (!cur) { + // Dead code after return/break/continue/throw — skip remaining + break; + } + cur = processStatement(stmt, cur); + } + + return cur; + } + + /** + * Process a single statement, returns the new current block or null if terminated. + */ + function processStatement(stmt, currentBlock) { + if (!stmt || !currentBlock) return currentBlock; + + const type = stmt.type; + + // Labeled statement: register label then process inner statement + if (type === rules.labeledNode) { + const labelNode = stmt.childForFieldName('label'); + const labelName = labelNode ? labelNode.text : null; + const body = stmt.childForFieldName('body'); + if (body && labelName) { + // Will be filled when we encounter the loop + const labelCtx = { headerBlock: null, exitBlock: null }; + labelMap.set(labelName, labelCtx); + const result = processStatement(body, currentBlock); + labelMap.delete(labelName); + return result; + } + return currentBlock; + } + + // If statement + if (type === rules.ifNode) { + return processIf(stmt, currentBlock); + } + + // For / for-in loops + if (rules.forNodes.has(type)) { + return processForLoop(stmt, currentBlock); + } + + // While loop + if (type === rules.whileNode) { + return processWhileLoop(stmt, currentBlock); + } + + // Do-while loop + if (type === rules.doNode) { + return processDoWhileLoop(stmt, currentBlock); + } + + // Switch statement + if (type === rules.switchNode) { + return processSwitch(stmt, currentBlock); + } + + // Try/catch/finally + if (type === rules.tryNode) { + return processTryCatch(stmt, currentBlock); + } + + // Return statement + if (type === rules.returnNode) { + currentBlock.endLine = stmt.startPosition.row + 1; + addEdge(currentBlock, exitBlock, 'return'); + return null; // path terminated + } + + // Throw statement + if (type === rules.throwNode) { + currentBlock.endLine = stmt.startPosition.row + 1; + addEdge(currentBlock, exitBlock, 'exception'); + return null; // path terminated + } + + // Break statement + if (type === rules.breakNode) { + const labelNode = stmt.childForFieldName('label'); + const labelName = labelNode ? labelNode.text : null; + + let target = null; + if (labelName && labelMap.has(labelName)) { + target = labelMap.get(labelName).exitBlock; + } else if (loopStack.length > 0) { + target = loopStack[loopStack.length - 1].exitBlock; + } + + if (target) { + currentBlock.endLine = stmt.startPosition.row + 1; + addEdge(currentBlock, target, 'break'); + return null; // path terminated + } + // break outside loop (switch case) — just continue + return currentBlock; + } + + // Continue statement + if (type === rules.continueNode) { + const labelNode = stmt.childForFieldName('label'); + const labelName = labelNode ? labelNode.text : null; + + let target = null; + if (labelName && labelMap.has(labelName)) { + target = labelMap.get(labelName).headerBlock; + } else if (loopStack.length > 0) { + target = loopStack[loopStack.length - 1].headerBlock; + } + + if (target) { + currentBlock.endLine = stmt.startPosition.row + 1; + addEdge(currentBlock, target, 'continue'); + return null; // path terminated + } + return currentBlock; + } + + // Regular statement — extend current block + if (!currentBlock.startLine) { + currentBlock.startLine = stmt.startPosition.row + 1; + } + currentBlock.endLine = stmt.endPosition.row + 1; + return currentBlock; + } + + /** + * Process an if/else-if/else chain. + */ + function processIf(ifStmt, currentBlock) { + // Terminate current block at condition + currentBlock.endLine = ifStmt.startPosition.row + 1; + + const condBlock = makeBlock( + 'condition', + ifStmt.startPosition.row + 1, + ifStmt.startPosition.row + 1, + 'if', + ); + addEdge(currentBlock, condBlock, 'fallthrough'); + + const joinBlock = makeBlock('body'); + + // True branch (consequent) + const consequent = ifStmt.childForFieldName('consequence'); + const trueBlock = makeBlock('branch_true', null, null, 'then'); + addEdge(condBlock, trueBlock, 'branch_true'); + const trueStmts = getStatements(consequent); + const trueEnd = processStatements(trueStmts, trueBlock); + if (trueEnd) { + addEdge(trueEnd, joinBlock, 'fallthrough'); + } + + // False branch (alternative / else / else-if) + const alternative = ifStmt.childForFieldName('alternative'); + if (alternative) { + if (alternative.type === rules.elseClause) { + // else clause — may contain another if (else-if) or a block + const elseChildren = []; + for (let i = 0; i < alternative.namedChildCount; i++) { + elseChildren.push(alternative.namedChild(i)); + } + if (elseChildren.length === 1 && elseChildren[0].type === rules.ifNode) { + // else-if: recurse + const falseBlock = makeBlock('branch_false', null, null, 'else-if'); + addEdge(condBlock, falseBlock, 'branch_false'); + const elseIfEnd = processIf(elseChildren[0], falseBlock); + if (elseIfEnd) { + addEdge(elseIfEnd, joinBlock, 'fallthrough'); + } + } else { + // else block + const falseBlock = makeBlock('branch_false', null, null, 'else'); + addEdge(condBlock, falseBlock, 'branch_false'); + const falseEnd = processStatements(elseChildren, falseBlock); + if (falseEnd) { + addEdge(falseEnd, joinBlock, 'fallthrough'); + } + } + } + } else { + // No else: condition-false goes directly to join + addEdge(condBlock, joinBlock, 'branch_false'); + } + + return joinBlock; + } + + /** + * Process a for/for-in loop. + */ + function processForLoop(forStmt, currentBlock) { + const headerBlock = makeBlock( + 'loop_header', + forStmt.startPosition.row + 1, + forStmt.startPosition.row + 1, + 'for', + ); + addEdge(currentBlock, headerBlock, 'fallthrough'); + + const loopExitBlock = makeBlock('body'); + + // Register loop context + const loopCtx = { headerBlock, exitBlock: loopExitBlock }; + loopStack.push(loopCtx); + + // Update label map if this is inside a labeled statement + for (const [, ctx] of labelMap) { + if (!ctx.headerBlock) { + ctx.headerBlock = headerBlock; + ctx.exitBlock = loopExitBlock; + } + } + + // Loop body + const body = forStmt.childForFieldName('body'); + const bodyBlock = makeBlock('loop_body'); + addEdge(headerBlock, bodyBlock, 'branch_true'); + + const bodyStmts = getStatements(body); + const bodyEnd = processStatements(bodyStmts, bodyBlock); + + if (bodyEnd) { + addEdge(bodyEnd, headerBlock, 'loop_back'); + } + + // Loop exit + addEdge(headerBlock, loopExitBlock, 'loop_exit'); + + loopStack.pop(); + return loopExitBlock; + } + + /** + * Process a while loop. + */ + function processWhileLoop(whileStmt, currentBlock) { + const headerBlock = makeBlock( + 'loop_header', + whileStmt.startPosition.row + 1, + whileStmt.startPosition.row + 1, + 'while', + ); + addEdge(currentBlock, headerBlock, 'fallthrough'); + + const loopExitBlock = makeBlock('body'); + + const loopCtx = { headerBlock, exitBlock: loopExitBlock }; + loopStack.push(loopCtx); + + for (const [, ctx] of labelMap) { + if (!ctx.headerBlock) { + ctx.headerBlock = headerBlock; + ctx.exitBlock = loopExitBlock; + } + } + + const body = whileStmt.childForFieldName('body'); + const bodyBlock = makeBlock('loop_body'); + addEdge(headerBlock, bodyBlock, 'branch_true'); + + const bodyStmts = getStatements(body); + const bodyEnd = processStatements(bodyStmts, bodyBlock); + + if (bodyEnd) { + addEdge(bodyEnd, headerBlock, 'loop_back'); + } + + addEdge(headerBlock, loopExitBlock, 'loop_exit'); + + loopStack.pop(); + return loopExitBlock; + } + + /** + * Process a do-while loop. + */ + function processDoWhileLoop(doStmt, currentBlock) { + const bodyBlock = makeBlock('loop_body', doStmt.startPosition.row + 1, null, 'do'); + addEdge(currentBlock, bodyBlock, 'fallthrough'); + + const condBlock = makeBlock('loop_header', null, null, 'do-while'); + const loopExitBlock = makeBlock('body'); + + const loopCtx = { headerBlock: condBlock, exitBlock: loopExitBlock }; + loopStack.push(loopCtx); + + for (const [, ctx] of labelMap) { + if (!ctx.headerBlock) { + ctx.headerBlock = condBlock; + ctx.exitBlock = loopExitBlock; + } + } + + const body = doStmt.childForFieldName('body'); + const bodyStmts = getStatements(body); + const bodyEnd = processStatements(bodyStmts, bodyBlock); + + if (bodyEnd) { + addEdge(bodyEnd, condBlock, 'fallthrough'); + } + + // Condition: loop_back or exit + addEdge(condBlock, bodyBlock, 'loop_back'); + addEdge(condBlock, loopExitBlock, 'loop_exit'); + + loopStack.pop(); + return loopExitBlock; + } + + /** + * Process a switch statement. + */ + function processSwitch(switchStmt, currentBlock) { + currentBlock.endLine = switchStmt.startPosition.row + 1; + + const switchHeader = makeBlock( + 'condition', + switchStmt.startPosition.row + 1, + switchStmt.startPosition.row + 1, + 'switch', + ); + addEdge(currentBlock, switchHeader, 'fallthrough'); + + const joinBlock = makeBlock('body'); + + // Switch acts like a break target for contained break statements + const switchCtx = { headerBlock: switchHeader, exitBlock: joinBlock }; + loopStack.push(switchCtx); + + // Collect case clauses from the switch body + const switchBody = switchStmt.childForFieldName('body'); + if (switchBody) { + let hasDefault = false; + for (let i = 0; i < switchBody.namedChildCount; i++) { + const caseClause = switchBody.namedChild(i); + const isDefault = + caseClause.type === rules.defaultNode || + (caseClause.type === rules.caseNode && !caseClause.childForFieldName('value')); + + const caseLabel = isDefault ? 'default' : 'case'; + const caseBlock = makeBlock( + isDefault ? 'case' : 'case', + caseClause.startPosition.row + 1, + null, + caseLabel, + ); + addEdge(switchHeader, caseBlock, isDefault ? 'branch_false' : 'branch_true'); + if (isDefault) hasDefault = true; + + // Process case body statements + const caseStmts = []; + for (let j = 0; j < caseClause.namedChildCount; j++) { + const child = caseClause.namedChild(j); + // Skip the case value expression + if (child.type !== 'identifier' && child.type !== 'string' && child.type !== 'number') { + caseStmts.push(child); + } + } + + const caseEnd = processStatements(caseStmts, caseBlock); + if (caseEnd) { + // Fall-through to join (or next case, but we simplify to join) + addEdge(caseEnd, joinBlock, 'fallthrough'); + } + } + + // If no default case, switch header can skip to join + if (!hasDefault) { + addEdge(switchHeader, joinBlock, 'branch_false'); + } + } + + loopStack.pop(); + return joinBlock; + } + + /** + * Process try/catch/finally. + */ + function processTryCatch(tryStmt, currentBlock) { + currentBlock.endLine = tryStmt.startPosition.row + 1; + + const joinBlock = makeBlock('body'); + + // Try body + const tryBody = tryStmt.childForFieldName('body'); + const tryBlock = makeBlock('body', tryBody ? tryBody.startPosition.row + 1 : null, null, 'try'); + addEdge(currentBlock, tryBlock, 'fallthrough'); + + const tryStmts = getStatements(tryBody); + const tryEnd = processStatements(tryStmts, tryBlock); + + // Catch handler + let catchHandler = null; + let finallyHandler = null; + for (let i = 0; i < tryStmt.namedChildCount; i++) { + const child = tryStmt.namedChild(i); + if (child.type === rules.catchNode) catchHandler = child; + if (child.type === rules.finallyNode) finallyHandler = child; + } + + if (catchHandler) { + const catchBlock = makeBlock('catch', catchHandler.startPosition.row + 1, null, 'catch'); + // Exception edge from try to catch + addEdge(tryBlock, catchBlock, 'exception'); + + const catchBody = catchHandler.childForFieldName('body'); + const catchStmts = getStatements(catchBody); + const catchEnd = processStatements(catchStmts, catchBlock); + + if (finallyHandler) { + const finallyBlock = makeBlock( + 'finally', + finallyHandler.startPosition.row + 1, + null, + 'finally', + ); + if (tryEnd) addEdge(tryEnd, finallyBlock, 'fallthrough'); + if (catchEnd) addEdge(catchEnd, finallyBlock, 'fallthrough'); + + const finallyBody = finallyHandler.childForFieldName('body'); + const finallyStmts = getStatements(finallyBody); + const finallyEnd = processStatements(finallyStmts, finallyBlock); + if (finallyEnd) addEdge(finallyEnd, joinBlock, 'fallthrough'); + } else { + if (tryEnd) addEdge(tryEnd, joinBlock, 'fallthrough'); + if (catchEnd) addEdge(catchEnd, joinBlock, 'fallthrough'); + } + } else if (finallyHandler) { + const finallyBlock = makeBlock( + 'finally', + finallyHandler.startPosition.row + 1, + null, + 'finally', + ); + if (tryEnd) addEdge(tryEnd, finallyBlock, 'fallthrough'); + + const finallyBody = finallyHandler.childForFieldName('body'); + const finallyStmts = getStatements(finallyBody); + const finallyEnd = processStatements(finallyStmts, finallyBlock); + if (finallyEnd) addEdge(finallyEnd, joinBlock, 'fallthrough'); + } else { + if (tryEnd) addEdge(tryEnd, joinBlock, 'fallthrough'); + } + + return joinBlock; + } + + // ── Main entry point ────────────────────────────────────────────────── + + const body = getFunctionBody(functionNode); + if (!body) { + // Empty function or expression body + addEdge(entryBlock, exitBlock, 'fallthrough'); + return { blocks, edges }; + } + + const stmts = getStatements(body); + if (stmts.length === 0) { + addEdge(entryBlock, exitBlock, 'fallthrough'); + return { blocks, edges }; + } + + const firstBlock = makeBlock('body'); + addEdge(entryBlock, firstBlock, 'fallthrough'); + + const lastBlock = processStatements(stmts, firstBlock); + if (lastBlock) { + addEdge(lastBlock, exitBlock, 'fallthrough'); + } + + return { blocks, edges }; +} + +// ─── Build-Time: Compute CFG for Changed Files ───────────────────────── + +/** + * Build CFG data for all function/method definitions and persist to DB. + * + * @param {object} db - open better-sqlite3 database (read-write) + * @param {Map} fileSymbols - Map + * @param {string} rootDir - absolute project root path + * @param {object} [_engineOpts] - engine options (unused; always uses WASM for AST) + */ +export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { + // Lazily init WASM parsers if needed + let parsers = null; + let extToLang = null; + let needsFallback = false; + + for (const [relPath, symbols] of fileSymbols) { + if (!symbols._tree) { + const ext = path.extname(relPath).toLowerCase(); + if (CFG_EXTENSIONS.has(ext)) { + needsFallback = true; + break; + } + } + } + + if (needsFallback) { + const { createParsers } = await import('./parser.js'); + parsers = await createParsers(); + extToLang = new Map(); + for (const entry of LANGUAGE_REGISTRY) { + for (const ext of entry.extensions) { + extToLang.set(ext, entry.id); + } + } + } + + let getParserFn = null; + if (parsers) { + const mod = await import('./parser.js'); + getParserFn = mod.getParser; + } + + const { findFunctionNode } = await import('./complexity.js'); + + const insertBlock = db.prepare( + `INSERT INTO cfg_blocks (function_node_id, block_index, block_type, start_line, end_line, label) + VALUES (?, ?, ?, ?, ?, ?)`, + ); + const insertEdge = db.prepare( + `INSERT INTO cfg_edges (function_node_id, source_block_id, target_block_id, kind) + VALUES (?, ?, ?, ?)`, + ); + const deleteBlocks = db.prepare('DELETE FROM cfg_blocks WHERE function_node_id = ?'); + const deleteEdges = db.prepare('DELETE FROM cfg_edges WHERE function_node_id = ?'); + const getNodeId = db.prepare( + "SELECT id FROM nodes WHERE name = ? AND kind IN ('function','method') AND file = ? AND line = ?", + ); + + let analyzed = 0; + + const tx = db.transaction(() => { + for (const [relPath, symbols] of fileSymbols) { + const ext = path.extname(relPath).toLowerCase(); + if (!CFG_EXTENSIONS.has(ext)) continue; + + let tree = symbols._tree; + let langId = symbols._langId; + + // WASM fallback if no cached tree + if (!tree) { + if (!extToLang || !getParserFn) continue; + langId = extToLang.get(ext); + if (!langId || !CFG_LANG_IDS.has(langId)) continue; + + const absPath = path.join(rootDir, relPath); + let code; + try { + code = fs.readFileSync(absPath, 'utf-8'); + } catch { + continue; + } + + const parser = getParserFn(parsers, absPath); + if (!parser) continue; + + try { + tree = parser.parse(code); + } catch { + continue; + } + } + + if (!langId) { + langId = extToLang ? extToLang.get(ext) : null; + if (!langId) continue; + } + + const cfgRules = CFG_RULES.get(langId); + if (!cfgRules) continue; + + const complexityRules = COMPLEXITY_RULES.get(langId); + if (!complexityRules) continue; + + for (const def of symbols.definitions) { + if (def.kind !== 'function' && def.kind !== 'method') continue; + if (!def.line) continue; + + const row = getNodeId.get(def.name, relPath, def.line); + if (!row) continue; + + const funcNode = findFunctionNode(tree.rootNode, def.line, def.endLine, complexityRules); + if (!funcNode) continue; + + const cfg = buildFunctionCFG(funcNode, langId); + if (cfg.blocks.length === 0) continue; + + // Clear old CFG data for this function + deleteEdges.run(row.id); + deleteBlocks.run(row.id); + + // Insert blocks and build index→dbId mapping + const blockDbIds = new Map(); + for (const block of cfg.blocks) { + const result = insertBlock.run( + row.id, + block.index, + block.type, + block.startLine, + block.endLine, + block.label, + ); + blockDbIds.set(block.index, result.lastInsertRowid); + } + + // Insert edges + for (const edge of cfg.edges) { + const sourceDbId = blockDbIds.get(edge.sourceIndex); + const targetDbId = blockDbIds.get(edge.targetIndex); + if (sourceDbId && targetDbId) { + insertEdge.run(row.id, sourceDbId, targetDbId, edge.kind); + } + } + + analyzed++; + } + + // Don't release _tree here — complexity/dataflow may still need it + } + }); + + tx(); + + if (analyzed > 0) { + info(`CFG: ${analyzed} functions analyzed`); + } +} + +// ─── Query-Time Functions ─────────────────────────────────────────────── + +function hasCfgTables(db) { + try { + db.prepare('SELECT 1 FROM cfg_blocks LIMIT 0').get(); + return true; + } catch { + return false; + } +} + +function findNodes(db, name, opts = {}) { + const kinds = opts.kind ? [opts.kind] : ['function', 'method']; + const placeholders = kinds.map(() => '?').join(', '); + const params = [`%${name}%`, ...kinds]; + + let fileCondition = ''; + if (opts.file) { + fileCondition = ' AND n.file LIKE ?'; + params.push(`%${opts.file}%`); + } + + const rows = db + .prepare( + `SELECT n.id, n.name, n.kind, n.file, n.line, n.end_line + FROM nodes n + WHERE n.name LIKE ? AND n.kind IN (${placeholders})${fileCondition}`, + ) + .all(...params); + + return opts.noTests ? rows.filter((n) => !isTestFile(n.file)) : rows; +} + +/** + * Load CFG data for a function from the database. + * + * @param {string} name - Function name (partial match) + * @param {string} [customDbPath] - Path to graph.db + * @param {object} [opts] - Options + * @returns {{ function: object, blocks: object[], edges: object[], summary: object }} + */ +export function cfgData(name, customDbPath, opts = {}) { + const db = openReadonlyOrFail(customDbPath); + const noTests = opts.noTests || false; + + if (!hasCfgTables(db)) { + db.close(); + return { + name, + results: [], + warning: 'No CFG data found. Run `codegraph build --cfg` first.', + }; + } + + const nodes = findNodes(db, name, { noTests, file: opts.file, kind: opts.kind }); + if (nodes.length === 0) { + db.close(); + return { name, results: [] }; + } + + const blockStmt = db.prepare( + `SELECT id, block_index, block_type, start_line, end_line, label + FROM cfg_blocks WHERE function_node_id = ? + ORDER BY block_index`, + ); + const edgeStmt = db.prepare( + `SELECT e.kind, + sb.block_index AS source_index, sb.block_type AS source_type, + tb.block_index AS target_index, tb.block_type AS target_type + FROM cfg_edges e + JOIN cfg_blocks sb ON e.source_block_id = sb.id + JOIN cfg_blocks tb ON e.target_block_id = tb.id + WHERE e.function_node_id = ? + ORDER BY sb.block_index, tb.block_index`, + ); + + const results = nodes.map((node) => { + const cfgBlocks = blockStmt.all(node.id); + const cfgEdges = edgeStmt.all(node.id); + + return { + name: node.name, + kind: node.kind, + file: node.file, + line: node.line, + blocks: cfgBlocks.map((b) => ({ + index: b.block_index, + type: b.block_type, + startLine: b.start_line, + endLine: b.end_line, + label: b.label, + })), + edges: cfgEdges.map((e) => ({ + source: e.source_index, + sourceType: e.source_type, + target: e.target_index, + targetType: e.target_type, + kind: e.kind, + })), + summary: { + blockCount: cfgBlocks.length, + edgeCount: cfgEdges.length, + }, + }; + }); + + db.close(); + return paginateResult({ name, results }, 'results', opts); +} + +// ─── Export Formats ───────────────────────────────────────────────────── + +/** + * Convert CFG data to DOT format for Graphviz rendering. + */ +export function cfgToDOT(cfgResult) { + const lines = []; + + for (const r of cfgResult.results) { + lines.push(`digraph "${r.name}" {`); + lines.push(' rankdir=TB;'); + lines.push(' node [shape=box, fontname="monospace", fontsize=10];'); + + for (const block of r.blocks) { + const label = blockLabel(block); + const shape = block.type === 'entry' || block.type === 'exit' ? 'ellipse' : 'box'; + const style = + block.type === 'condition' || block.type === 'loop_header' + ? ', style=filled, fillcolor="#ffffcc"' + : ''; + lines.push(` B${block.index} [label="${label}", shape=${shape}${style}];`); + } + + for (const edge of r.edges) { + const style = edgeStyle(edge.kind); + lines.push(` B${edge.source} -> B${edge.target} [label="${edge.kind}"${style}];`); + } + + lines.push('}'); + } + + return lines.join('\n'); +} + +/** + * Convert CFG data to Mermaid format. + */ +export function cfgToMermaid(cfgResult) { + const lines = []; + + for (const r of cfgResult.results) { + lines.push(`graph TD`); + lines.push(` subgraph "${r.name}"`); + + for (const block of r.blocks) { + const label = blockLabel(block); + if (block.type === 'entry' || block.type === 'exit') { + lines.push(` B${block.index}(["${label}"])`); + } else if (block.type === 'condition' || block.type === 'loop_header') { + lines.push(` B${block.index}{"${label}"}`); + } else { + lines.push(` B${block.index}["${label}"]`); + } + } + + for (const edge of r.edges) { + const label = edge.kind; + lines.push(` B${edge.source} -->|${label}| B${edge.target}`); + } + + lines.push(' end'); + } + + return lines.join('\n'); +} + +function blockLabel(block) { + const loc = + block.startLine && block.endLine + ? ` L${block.startLine}${block.endLine !== block.startLine ? `-${block.endLine}` : ''}` + : ''; + const label = block.label ? ` (${block.label})` : ''; + return `${block.type}${label}${loc}`; +} + +function edgeStyle(kind) { + if (kind === 'exception') return ', color=red, fontcolor=red'; + if (kind === 'branch_true') return ', color=green, fontcolor=green'; + if (kind === 'branch_false') return ', color=red, fontcolor=red'; + if (kind === 'loop_back') return ', style=dashed, color=blue'; + if (kind === 'loop_exit') return ', color=orange'; + if (kind === 'return') return ', color=purple'; + if (kind === 'break') return ', color=orange, style=dashed'; + if (kind === 'continue') return ', color=blue, style=dashed'; + return ''; +} + +// ─── CLI Printer ──────────────────────────────────────────────────────── + +/** + * CLI display for cfg command. + */ +export function cfg(name, customDbPath, opts = {}) { + const data = cfgData(name, customDbPath, opts); + + if (opts.json) { + console.log(JSON.stringify(data, null, 2)); + return; + } + if (opts.ndjson) { + printNdjson(data.results); + return; + } + + if (data.warning) { + console.log(`\u26A0 ${data.warning}`); + return; + } + if (data.results.length === 0) { + console.log(`No symbols matching "${name}".`); + return; + } + + const format = opts.format || 'text'; + if (format === 'dot') { + console.log(cfgToDOT(data)); + return; + } + if (format === 'mermaid') { + console.log(cfgToMermaid(data)); + return; + } + + // Text format + for (const r of data.results) { + console.log(`\n${r.kind} ${r.name} (${r.file}:${r.line})`); + console.log('\u2500'.repeat(60)); + console.log(` Blocks: ${r.summary.blockCount} Edges: ${r.summary.edgeCount}`); + + if (r.blocks.length > 0) { + console.log('\n Blocks:'); + for (const b of r.blocks) { + const loc = b.startLine + ? ` L${b.startLine}${b.endLine && b.endLine !== b.startLine ? `-${b.endLine}` : ''}` + : ''; + const label = b.label ? ` (${b.label})` : ''; + console.log(` [${b.index}] ${b.type}${label}${loc}`); + } + } + + if (r.edges.length > 0) { + console.log('\n Edges:'); + for (const e of r.edges) { + console.log(` B${e.source} \u2192 B${e.target} [${e.kind}]`); + } + } + } +} diff --git a/src/cli.js b/src/cli.js index 391d2274..737ce4ae 100644 --- a/src/cli.js +++ b/src/cli.js @@ -98,10 +98,16 @@ program .description('Parse repo and build graph in .codegraph/graph.db') .option('--no-incremental', 'Force full rebuild (ignore file hashes)') .option('--dataflow', 'Extract data flow edges (flows_to, returns, mutates)') + .option('--cfg', 'Build intraprocedural control flow graphs') .action(async (dir, opts) => { const root = path.resolve(dir || '.'); const engine = program.opts().engine; - await buildGraph(root, { incremental: opts.incremental, engine, dataflow: opts.dataflow }); + await buildGraph(root, { + incremental: opts.incremental, + engine, + dataflow: opts.dataflow, + cfg: opts.cfg, + }); }); program @@ -994,6 +1000,37 @@ program }); }); +program + .command('cfg ') + .description('Show control flow graph for a function') + .option('-d, --db ', 'Path to graph.db') + .option('--format ', 'Output format: text, dot, mermaid', 'text') + .option('-f, --file ', 'Scope to file (partial match)') + .option('-k, --kind ', 'Filter by symbol kind') + .option('-T, --no-tests', 'Exclude test/spec files from results') + .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') + .option('-j, --json', 'Output as JSON') + .option('--ndjson', 'Newline-delimited JSON output') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .action(async (name, opts) => { + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); + process.exit(1); + } + const { cfg } = await import('./cfg.js'); + cfg(name, opts.db, { + format: opts.format, + file: opts.file, + kind: opts.kind, + noTests: resolveNoTests(opts), + json: opts.json, + ndjson: opts.ndjson, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + }); + }); + program .command('complexity [target]') .description('Show per-function complexity metrics (cognitive, cyclomatic, nesting depth, MI)') diff --git a/src/complexity.js b/src/complexity.js index f97cb616..132ccb25 100644 --- a/src/complexity.js +++ b/src/complexity.js @@ -1574,7 +1574,7 @@ export function computeAllMetrics(functionNode, langId) { /** * Find the function body node in a parse tree that matches a given line range. */ -function findFunctionNode(rootNode, startLine, _endLine, rules) { +export function findFunctionNode(rootNode, startLine, _endLine, rules) { // tree-sitter lines are 0-indexed const targetStart = startLine - 1; diff --git a/src/db.js b/src/db.js index 9f40d7cc..ff31fd39 100644 --- a/src/db.js +++ b/src/db.js @@ -173,6 +173,37 @@ export const MIGRATIONS = [ CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id); `, }, + { + version: 12, + up: ` + CREATE TABLE IF NOT EXISTS cfg_blocks ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + function_node_id INTEGER NOT NULL, + block_index INTEGER NOT NULL, + block_type TEXT NOT NULL, + start_line INTEGER, + end_line INTEGER, + label TEXT, + FOREIGN KEY(function_node_id) REFERENCES nodes(id), + UNIQUE(function_node_id, block_index) + ); + CREATE INDEX IF NOT EXISTS idx_cfg_blocks_fn ON cfg_blocks(function_node_id); + + CREATE TABLE IF NOT EXISTS cfg_edges ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + function_node_id INTEGER NOT NULL, + source_block_id INTEGER NOT NULL, + target_block_id INTEGER NOT NULL, + kind TEXT NOT NULL, + FOREIGN KEY(function_node_id) REFERENCES nodes(id), + FOREIGN KEY(source_block_id) REFERENCES cfg_blocks(id), + FOREIGN KEY(target_block_id) REFERENCES cfg_blocks(id) + ); + CREATE INDEX IF NOT EXISTS idx_cfg_edges_fn ON cfg_edges(function_node_id); + CREATE INDEX IF NOT EXISTS idx_cfg_edges_src ON cfg_edges(source_block_id); + CREATE INDEX IF NOT EXISTS idx_cfg_edges_tgt ON cfg_edges(target_block_id); + `, + }, ]; export function getBuildMeta(db, key) { diff --git a/src/index.js b/src/index.js index 6774d54b..8d44699a 100644 --- a/src/index.js +++ b/src/index.js @@ -22,6 +22,16 @@ export { evaluateBoundaries, PRESETS, validateBoundaryConfig } from './boundarie export { branchCompareData, branchCompareMermaid } from './branch-compare.js'; // Graph building export { buildGraph, collectFiles, loadPathAliases, resolveImportPath } from './builder.js'; +// Control flow graph (intraprocedural) +export { + buildCFGData, + buildFunctionCFG, + CFG_RULES, + cfg, + cfgData, + cfgToDOT, + cfgToMermaid, +} from './cfg.js'; // Check (CI validation predicates) export { check, checkData } from './check.js'; // Co-change analysis @@ -44,6 +54,7 @@ export { computeHalsteadMetrics, computeLOCMetrics, computeMaintainabilityIndex, + findFunctionNode, HALSTEAD_RULES, iterComplexity, } from './complexity.js'; diff --git a/src/mcp.js b/src/mcp.js index cd0b8808..81cb1b16 100644 --- a/src/mcp.js +++ b/src/mcp.js @@ -641,6 +641,26 @@ const BASE_TOOLS = [ required: ['base', 'target'], }, }, + { + name: 'cfg', + description: 'Show intraprocedural control flow graph for a function. Requires build --cfg.', + inputSchema: { + type: 'object', + properties: { + name: { type: 'string', description: 'Function/method name (partial match)' }, + format: { + type: 'string', + enum: ['json', 'dot', 'mermaid'], + description: 'Output format (default: json)', + }, + file: { type: 'string', description: 'Scope to file (partial match)' }, + kind: { type: 'string', enum: EVERY_SYMBOL_KIND, description: 'Filter by symbol kind' }, + no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + ...PAGINATION_PROPS, + }, + required: ['name'], + }, + }, { name: 'dataflow', description: 'Show data flow edges or data-dependent blast radius. Requires build --dataflow.', @@ -1192,6 +1212,24 @@ export async function startMCPServer(customDbPath, options = {}) { result = args.format === 'mermaid' ? branchCompareMermaid(bcData) : bcData; break; } + case 'cfg': { + const { cfgData, cfgToDOT, cfgToMermaid } = await import('./cfg.js'); + const cfgResult = cfgData(args.name, dbPath, { + file: args.file, + kind: args.kind, + noTests: args.no_tests, + limit: Math.min(args.limit ?? MCP_DEFAULTS.query, MCP_MAX_LIMIT), + offset: args.offset ?? 0, + }); + if (args.format === 'dot') { + result = { text: cfgToDOT(cfgResult) }; + } else if (args.format === 'mermaid') { + result = { text: cfgToMermaid(cfgResult) }; + } else { + result = cfgResult; + } + break; + } case 'dataflow': { const dfMode = args.mode || 'edges'; if (dfMode === 'impact') { diff --git a/tests/integration/cfg.test.js b/tests/integration/cfg.test.js new file mode 100644 index 00000000..3fdbeab0 --- /dev/null +++ b/tests/integration/cfg.test.js @@ -0,0 +1,199 @@ +/** + * Integration tests for CFG queries. + * + * Uses a hand-crafted in-memory DB with known CFG topology. + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import Database from 'better-sqlite3'; +import { afterAll, beforeAll, describe, expect, test } from 'vitest'; +import { cfgData, cfgToDOT, cfgToMermaid } from '../../src/cfg.js'; +import { initSchema } from '../../src/db.js'; + +// ─── Helpers ─────────────────────────────────────────────────────────── + +function insertNode(db, name, kind, file, line) { + return db + .prepare('INSERT INTO nodes (name, kind, file, line) VALUES (?, ?, ?, ?)') + .run(name, kind, file, line).lastInsertRowid; +} + +function insertBlock(db, fnNodeId, blockIndex, blockType, startLine, endLine, label) { + return db + .prepare( + 'INSERT INTO cfg_blocks (function_node_id, block_index, block_type, start_line, end_line, label) VALUES (?, ?, ?, ?, ?, ?)', + ) + .run(fnNodeId, blockIndex, blockType, startLine, endLine, label).lastInsertRowid; +} + +function insertEdge(db, fnNodeId, sourceBlockId, targetBlockId, kind) { + db.prepare( + 'INSERT INTO cfg_edges (function_node_id, source_block_id, target_block_id, kind) VALUES (?, ?, ?, ?)', + ).run(fnNodeId, sourceBlockId, targetBlockId, kind); +} + +// ─── Fixture DB ──────────────────────────────────────────────────────── + +let tmpDir, dbPath; + +beforeAll(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-cfg-')); + fs.mkdirSync(path.join(tmpDir, '.codegraph')); + dbPath = path.join(tmpDir, '.codegraph', 'graph.db'); + + const db = new Database(dbPath); + db.pragma('journal_mode = WAL'); + initSchema(db); + + // Insert function nodes + const processId = insertNode(db, 'processItems', 'function', 'src/process.js', 10); + const helperId = insertNode(db, 'helper', 'function', 'src/helper.js', 5); + insertNode(db, 'testFn', 'function', 'tests/process.test.js', 1); + + // CFG for processItems: entry → body → condition → [true, false] → join → exit + const b0 = insertBlock(db, processId, 0, 'entry', null, null, null); + const b1 = insertBlock(db, processId, 1, 'exit', null, null, null); + const b2 = insertBlock(db, processId, 2, 'body', 10, 12, null); + const b3 = insertBlock(db, processId, 3, 'condition', 13, 13, 'if'); + const b4 = insertBlock(db, processId, 4, 'branch_true', 14, 15, 'then'); + const b5 = insertBlock(db, processId, 5, 'branch_false', 16, 17, 'else'); + const b6 = insertBlock(db, processId, 6, 'body', 18, 19, null); + + insertEdge(db, processId, b0, b2, 'fallthrough'); + insertEdge(db, processId, b2, b3, 'fallthrough'); + insertEdge(db, processId, b3, b4, 'branch_true'); + insertEdge(db, processId, b3, b5, 'branch_false'); + insertEdge(db, processId, b4, b6, 'fallthrough'); + insertEdge(db, processId, b5, b6, 'fallthrough'); + insertEdge(db, processId, b6, b1, 'fallthrough'); + + // CFG for helper: entry → body → exit (simple) + const h0 = insertBlock(db, helperId, 0, 'entry', null, null, null); + const h1 = insertBlock(db, helperId, 1, 'exit', null, null, null); + const h2 = insertBlock(db, helperId, 2, 'body', 5, 8, null); + + insertEdge(db, helperId, h0, h2, 'fallthrough'); + insertEdge(db, helperId, h2, h1, 'return'); + + db.close(); +}); + +afterAll(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +// ─── Tests ───────────────────────────────────────────────────────────── + +describe('cfgData', () => { + test('returns CFG blocks and edges for a known function', () => { + const data = cfgData('processItems', dbPath); + expect(data.results.length).toBe(1); + + const r = data.results[0]; + expect(r.name).toBe('processItems'); + expect(r.file).toBe('src/process.js'); + expect(r.summary.blockCount).toBe(7); + expect(r.summary.edgeCount).toBe(7); + expect(r.blocks[0].type).toBe('entry'); + expect(r.blocks[1].type).toBe('exit'); + }); + + test('returns edges with correct kinds', () => { + const data = cfgData('processItems', dbPath); + const r = data.results[0]; + const edgeKinds = r.edges.map((e) => e.kind); + expect(edgeKinds).toContain('branch_true'); + expect(edgeKinds).toContain('branch_false'); + expect(edgeKinds).toContain('fallthrough'); + }); + + test('simple function has return edge', () => { + const data = cfgData('helper', dbPath); + expect(data.results.length).toBe(1); + const r = data.results[0]; + expect(r.summary.blockCount).toBe(3); + expect(r.edges.some((e) => e.kind === 'return')).toBe(true); + }); + + test('returns empty results for non-existent function', () => { + const data = cfgData('nonexistent', dbPath); + expect(data.results.length).toBe(0); + }); + + test('noTests option excludes test file functions', () => { + const data = cfgData('testFn', dbPath, { noTests: true }); + expect(data.results.length).toBe(0); + }); + + test('file filter scopes results', () => { + const data = cfgData('processItems', dbPath, { file: 'helper.js' }); + expect(data.results.length).toBe(0); + + const data2 = cfgData('processItems', dbPath, { file: 'process.js' }); + expect(data2.results.length).toBe(1); + }); +}); + +describe('cfgToDOT', () => { + test('produces valid DOT output', () => { + const data = cfgData('processItems', dbPath); + const dot = cfgToDOT(data); + expect(dot).toContain('digraph'); + expect(dot).toContain('B0'); + expect(dot).toContain('->'); + expect(dot).toContain('branch_true'); + expect(dot).toContain('}'); + }); + + test('entry/exit nodes use ellipse shape', () => { + const data = cfgData('processItems', dbPath); + const dot = cfgToDOT(data); + expect(dot).toMatch(/B0.*shape=ellipse/); + expect(dot).toMatch(/B1.*shape=ellipse/); + }); +}); + +describe('cfgToMermaid', () => { + test('produces valid Mermaid output', () => { + const data = cfgData('processItems', dbPath); + const mermaid = cfgToMermaid(data); + expect(mermaid).toContain('graph TD'); + expect(mermaid).toContain('B0'); + expect(mermaid).toContain('-->'); + expect(mermaid).toContain('branch_true'); + }); + + test('entry/exit use stadium shape', () => { + const data = cfgData('processItems', dbPath); + const mermaid = cfgToMermaid(data); + // Stadium shapes use (["..."]) + expect(mermaid).toMatch(/B0\(\[/); + expect(mermaid).toMatch(/B1\(\[/); + }); +}); + +describe('warning when no CFG tables', () => { + test('returns warning when DB has no CFG data', () => { + // Create a bare DB without cfg tables + const bareDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-cfg-bare-')); + fs.mkdirSync(path.join(bareDir, '.codegraph')); + const bareDbPath = path.join(bareDir, '.codegraph', 'graph.db'); + + const db = new Database(bareDbPath); + db.pragma('journal_mode = WAL'); + // Only create nodes table, skip migrations + db.exec(` + CREATE TABLE schema_version (version INTEGER NOT NULL DEFAULT 0); + INSERT INTO schema_version VALUES (8); + CREATE TABLE nodes (id INTEGER PRIMARY KEY, name TEXT, kind TEXT, file TEXT, line INTEGER); + `); + db.close(); + + const data = cfgData('anything', bareDbPath); + expect(data.warning).toMatch(/No CFG data/); + + fs.rmSync(bareDir, { recursive: true, force: true }); + }); +}); diff --git a/tests/unit/cfg.test.js b/tests/unit/cfg.test.js new file mode 100644 index 00000000..99a52471 --- /dev/null +++ b/tests/unit/cfg.test.js @@ -0,0 +1,457 @@ +/** + * Unit tests for src/cfg.js — buildFunctionCFG + * + * Hand-crafted code snippets parsed with tree-sitter to verify + * correct CFG block/edge construction. + */ + +import { beforeAll, describe, expect, it } from 'vitest'; +import { buildFunctionCFG } from '../../src/cfg.js'; +import { COMPLEXITY_RULES } from '../../src/complexity.js'; +import { createParsers } from '../../src/parser.js'; + +let jsParser; + +beforeAll(async () => { + const parsers = await createParsers(); + jsParser = parsers.get('javascript'); +}); + +function parse(code) { + const tree = jsParser.parse(code); + return tree.rootNode; +} + +function getFunctionNode(root) { + const rules = COMPLEXITY_RULES.get('javascript'); + function find(node) { + if (rules.functionNodes.has(node.type)) return node; + for (let i = 0; i < node.childCount; i++) { + const result = find(node.child(i)); + if (result) return result; + } + return null; + } + return find(root); +} + +function buildCFG(code) { + const root = parse(code); + const funcNode = getFunctionNode(root); + if (!funcNode) throw new Error('No function found in code snippet'); + return buildFunctionCFG(funcNode, 'javascript'); +} + +function hasEdge(cfg, sourceIndex, targetIndex, kind) { + return cfg.edges.some( + (e) => e.sourceIndex === sourceIndex && e.targetIndex === targetIndex && e.kind === kind, + ); +} + +function blockByType(cfg, type) { + return cfg.blocks.filter((b) => b.type === type); +} + +// ─── Tests ────────────────────────────────────────────────────────────── + +describe('buildFunctionCFG', () => { + describe('empty / simple functions', () => { + it('empty function: ENTRY → EXIT', () => { + const cfg = buildCFG('function empty() {}'); + expect(cfg.blocks.length).toBeGreaterThanOrEqual(2); + const entry = cfg.blocks.find((b) => b.type === 'entry'); + const exit = cfg.blocks.find((b) => b.type === 'exit'); + expect(entry).toBeDefined(); + expect(exit).toBeDefined(); + expect(hasEdge(cfg, entry.index, exit.index, 'fallthrough')).toBe(true); + }); + + it('simple function with no branching: ENTRY → body → EXIT', () => { + const cfg = buildCFG(` + function simple() { + const a = 1; + const b = 2; + return a + b; + } + `); + const entry = cfg.blocks.find((b) => b.type === 'entry'); + const exit = cfg.blocks.find((b) => b.type === 'exit'); + expect(entry).toBeDefined(); + expect(exit).toBeDefined(); + // Should have return edge to exit + expect(cfg.edges.some((e) => e.targetIndex === exit.index && e.kind === 'return')).toBe(true); + }); + + it('function with only statements (no return): body falls through to EXIT', () => { + const cfg = buildCFG(` + function noReturn() { + const x = 1; + console.log(x); + } + `); + const exit = cfg.blocks.find((b) => b.type === 'exit'); + expect(cfg.edges.some((e) => e.targetIndex === exit.index && e.kind === 'fallthrough')).toBe( + true, + ); + }); + }); + + describe('if statements', () => { + it('single if (no else): condition → [true branch, join]', () => { + const cfg = buildCFG(` + function singleIf(x) { + if (x > 0) { + console.log('positive'); + } + return x; + } + `); + const conditions = blockByType(cfg, 'condition'); + expect(conditions.length).toBe(1); + const trueBlocks = blockByType(cfg, 'branch_true'); + expect(trueBlocks.length).toBe(1); + // Condition has branch_true and branch_false edges + const condIdx = conditions[0].index; + expect(cfg.edges.some((e) => e.sourceIndex === condIdx && e.kind === 'branch_true')).toBe( + true, + ); + expect(cfg.edges.some((e) => e.sourceIndex === condIdx && e.kind === 'branch_false')).toBe( + true, + ); + }); + + it('if/else: condition → [true, false] → join', () => { + const cfg = buildCFG(` + function ifElse(x) { + if (x > 0) { + return 'positive'; + } else { + return 'non-positive'; + } + } + `); + const conditions = blockByType(cfg, 'condition'); + expect(conditions.length).toBe(1); + const trueBlocks = blockByType(cfg, 'branch_true'); + const falseBlocks = blockByType(cfg, 'branch_false'); + expect(trueBlocks.length).toBe(1); + expect(falseBlocks.length).toBe(1); + }); + + it('if/else-if/else chain', () => { + const cfg = buildCFG(` + function chain(x) { + if (x > 10) { + return 'big'; + } else if (x > 0) { + return 'small'; + } else { + return 'negative'; + } + } + `); + // Should have at least 2 conditions (if + else-if) + const conditions = blockByType(cfg, 'condition'); + expect(conditions.length).toBeGreaterThanOrEqual(2); + }); + }); + + describe('loops', () => { + it('while loop: header → [body → loop_back, exit]', () => { + const cfg = buildCFG(` + function whileLoop(n) { + let i = 0; + while (i < n) { + i++; + } + return i; + } + `); + const headers = blockByType(cfg, 'loop_header'); + expect(headers.length).toBe(1); + const bodyBlocks = blockByType(cfg, 'loop_body'); + expect(bodyBlocks.length).toBe(1); + // Header has branch_true to body and loop_exit + const hIdx = headers[0].index; + expect(cfg.edges.some((e) => e.sourceIndex === hIdx && e.kind === 'branch_true')).toBe(true); + expect(cfg.edges.some((e) => e.sourceIndex === hIdx && e.kind === 'loop_exit')).toBe(true); + // Body has loop_back to header + expect(cfg.edges.some((e) => e.kind === 'loop_back' && e.targetIndex === hIdx)).toBe(true); + }); + + it('for loop: header → [body → loop_back, exit]', () => { + const cfg = buildCFG(` + function forLoop() { + for (let i = 0; i < 10; i++) { + console.log(i); + } + } + `); + const headers = blockByType(cfg, 'loop_header'); + expect(headers.length).toBe(1); + expect(headers[0].label).toBe('for'); + expect(cfg.edges.some((e) => e.kind === 'loop_back')).toBe(true); + expect(cfg.edges.some((e) => e.kind === 'loop_exit')).toBe(true); + }); + + it('for-in loop', () => { + const cfg = buildCFG(` + function forIn(obj) { + for (const key in obj) { + console.log(key); + } + } + `); + const headers = blockByType(cfg, 'loop_header'); + expect(headers.length).toBe(1); + expect(cfg.edges.some((e) => e.kind === 'loop_back')).toBe(true); + }); + + it('do-while loop: body → condition → [loop_back, exit]', () => { + const cfg = buildCFG(` + function doWhile() { + let i = 0; + do { + i++; + } while (i < 10); + return i; + } + `); + const headers = blockByType(cfg, 'loop_header'); + expect(headers.length).toBe(1); + expect(headers[0].label).toBe('do-while'); + const bodyBlocks = blockByType(cfg, 'loop_body'); + expect(bodyBlocks.length).toBe(1); + // Condition has loop_back to body and loop_exit + const hIdx = headers[0].index; + expect(cfg.edges.some((e) => e.sourceIndex === hIdx && e.kind === 'loop_back')).toBe(true); + expect(cfg.edges.some((e) => e.sourceIndex === hIdx && e.kind === 'loop_exit')).toBe(true); + }); + }); + + describe('break and continue', () => { + it('break in loop: terminates → loop exit', () => { + const cfg = buildCFG(` + function withBreak() { + for (let i = 0; i < 10; i++) { + if (i === 5) break; + console.log(i); + } + } + `); + expect(cfg.edges.some((e) => e.kind === 'break')).toBe(true); + }); + + it('continue in loop: terminates → loop header', () => { + const cfg = buildCFG(` + function withContinue() { + for (let i = 0; i < 10; i++) { + if (i % 2 === 0) continue; + console.log(i); + } + } + `); + expect(cfg.edges.some((e) => e.kind === 'continue')).toBe(true); + }); + }); + + describe('switch statement', () => { + it('switch/case: header → each case → join', () => { + const cfg = buildCFG(` + function switchCase(x) { + switch (x) { + case 1: + return 'one'; + case 2: + return 'two'; + default: + return 'other'; + } + } + `); + const conditions = cfg.blocks.filter((b) => b.type === 'condition' && b.label === 'switch'); + expect(conditions.length).toBe(1); + const caseBlocks = blockByType(cfg, 'case'); + expect(caseBlocks.length).toBeGreaterThanOrEqual(2); + }); + }); + + describe('try/catch/finally', () => { + it('try/catch: try body → [catch via exception, join]', () => { + const cfg = buildCFG(` + function tryCatch() { + try { + riskyCall(); + } catch (e) { + console.error(e); + } + } + `); + const catchBlocks = blockByType(cfg, 'catch'); + expect(catchBlocks.length).toBe(1); + expect(cfg.edges.some((e) => e.kind === 'exception')).toBe(true); + }); + + it('try/catch/finally: try → [catch, finally] → exit', () => { + const cfg = buildCFG(` + function tryCatchFinally() { + try { + riskyCall(); + } catch (e) { + console.error(e); + } finally { + cleanup(); + } + } + `); + const catchBlocks = blockByType(cfg, 'catch'); + const finallyBlocks = blockByType(cfg, 'finally'); + expect(catchBlocks.length).toBe(1); + expect(finallyBlocks.length).toBe(1); + }); + + it('try/finally (no catch)', () => { + const cfg = buildCFG(` + function tryFinally() { + try { + riskyCall(); + } finally { + cleanup(); + } + } + `); + const finallyBlocks = blockByType(cfg, 'finally'); + expect(finallyBlocks.length).toBe(1); + }); + }); + + describe('early return and throw', () => { + it('early return terminates path → EXIT', () => { + const cfg = buildCFG(` + function earlyReturn(x) { + if (x < 0) { + return -1; + } + return x * 2; + } + `); + const exit = cfg.blocks.find((b) => b.type === 'exit'); + const returnEdges = cfg.edges.filter( + (e) => e.targetIndex === exit.index && e.kind === 'return', + ); + // Two returns: the early return and the final return + expect(returnEdges.length).toBe(2); + }); + + it('throw terminates path → EXIT via exception', () => { + const cfg = buildCFG(` + function throwError(x) { + if (x < 0) { + throw new Error('negative'); + } + return x; + } + `); + const exit = cfg.blocks.find((b) => b.type === 'exit'); + expect(cfg.edges.some((e) => e.targetIndex === exit.index && e.kind === 'exception')).toBe( + true, + ); + }); + }); + + describe('nested structures', () => { + it('nested loops with break resolves to correct enclosing loop', () => { + const cfg = buildCFG(` + function nested() { + for (let i = 0; i < 10; i++) { + for (let j = 0; j < 10; j++) { + if (j === 5) break; + } + } + } + `); + const headers = blockByType(cfg, 'loop_header'); + expect(headers.length).toBe(2); + expect(cfg.edges.some((e) => e.kind === 'break')).toBe(true); + }); + + it('if inside loop', () => { + const cfg = buildCFG(` + function ifInLoop() { + for (let i = 0; i < 10; i++) { + if (i > 5) { + console.log('big'); + } else { + console.log('small'); + } + } + } + `); + expect(blockByType(cfg, 'loop_header').length).toBe(1); + expect(blockByType(cfg, 'condition').length).toBe(1); + expect(blockByType(cfg, 'branch_true').length).toBe(1); + expect(blockByType(cfg, 'branch_false').length).toBe(1); + }); + }); + + describe('arrow functions and methods', () => { + it('arrow function with block body', () => { + const cfg = buildCFG(` + const fn = (x) => { + if (x) return 1; + return 0; + }; + `); + expect(cfg.blocks.find((b) => b.type === 'entry')).toBeDefined(); + expect(cfg.blocks.find((b) => b.type === 'exit')).toBeDefined(); + }); + + it('arrow function with expression body: ENTRY → EXIT', () => { + const cfg = buildCFG(` + const fn = (x) => x + 1; + `); + const entry = cfg.blocks.find((b) => b.type === 'entry'); + const exit = cfg.blocks.find((b) => b.type === 'exit'); + expect(entry).toBeDefined(); + expect(exit).toBeDefined(); + // Expression body: entry → body → exit + expect(cfg.blocks.length).toBeGreaterThanOrEqual(2); + }); + }); + + describe('block and edge counts', () => { + it('complex function has reasonable block/edge counts', () => { + const cfg = buildCFG(` + function complex(arr) { + if (!arr) return null; + const result = []; + for (const item of arr) { + if (item.skip) continue; + try { + result.push(transform(item)); + } catch (e) { + console.error(e); + } + } + return result; + } + `); + // Should have meaningful structure + expect(cfg.blocks.length).toBeGreaterThan(5); + expect(cfg.edges.length).toBeGreaterThan(5); + // Must have entry and exit + expect(cfg.blocks.find((b) => b.type === 'entry')).toBeDefined(); + expect(cfg.blocks.find((b) => b.type === 'exit')).toBeDefined(); + }); + }); + + describe('unsupported language', () => { + it('returns empty CFG for unsupported language', () => { + const root = parse('function foo() { return 1; }'); + const funcNode = getFunctionNode(root); + const cfg = buildFunctionCFG(funcNode, 'haskell'); + expect(cfg.blocks).toEqual([]); + expect(cfg.edges).toEqual([]); + }); + }); +}); diff --git a/tests/unit/mcp.test.js b/tests/unit/mcp.test.js index 3b38f590..7d14bffc 100644 --- a/tests/unit/mcp.test.js +++ b/tests/unit/mcp.test.js @@ -36,6 +36,7 @@ const ALL_TOOL_NAMES = [ 'batch_query', 'triage', 'branch_compare', + 'cfg', 'dataflow', 'check', 'list_repos', From cf5aaad5be723a6f1c837bb7745e46fb20cb3f92 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 21:16:16 -0700 Subject: [PATCH 26/30] feat: add stored queryable AST nodes (calls, new, string, regex, throw, await) Persist selected AST nodes in a dedicated ast_nodes SQLite table during build, queryable via CLI (codegraph ast), MCP (ast_query), and programmatic API. - DB migration v13: ast_nodes table with indexes on kind, name, file, parent, and (kind,name) - New src/ast.js module: buildAstNodes (extraction), astQueryData/ astQuery (query), AST_NODE_KINDS constant - Builder integration: full-rebuild deletion, incremental cleanup, always-on post-parse extraction (before complexity to preserve _tree) - CLI: codegraph ast [pattern] with -k, -f, -T, -j, --ndjson, --limit, --offset options - MCP: ast_query tool with pattern, kind, file, no_tests, pagination - JS/TS/TSX Phase 1: full AST walk for new/throw/await/string/regex; all languages get call nodes from symbols.calls - Pattern matching uses SQL GLOB with auto-wrapping for substring search - Parent node resolution via narrowest enclosing definition Impact: 12 functions changed, 26 affected --- src/ast.js | 392 ++++++++++++++++++++++++++++++++ src/builder.js | 21 +- src/cli.js | 29 +++ src/db.js | 21 ++ src/index.js | 2 + src/mcp.js | 34 +++ src/paginate.js | 1 + tests/integration/ast.test.js | 234 +++++++++++++++++++ tests/parsers/ast-nodes.test.js | 185 +++++++++++++++ tests/unit/mcp.test.js | 1 + 10 files changed, 919 insertions(+), 1 deletion(-) create mode 100644 src/ast.js create mode 100644 tests/integration/ast.test.js create mode 100644 tests/parsers/ast-nodes.test.js diff --git a/src/ast.js b/src/ast.js new file mode 100644 index 00000000..8c349667 --- /dev/null +++ b/src/ast.js @@ -0,0 +1,392 @@ +/** + * Stored queryable AST nodes — build-time extraction + query functions. + * + * Persists selected AST nodes (calls, new, string, regex, throw, await) in the + * `ast_nodes` table during build. Queryable via CLI (`codegraph ast`), MCP + * (`ast_query`), and programmatic API. + */ + +import path from 'node:path'; +import { openReadonlyOrFail } from './db.js'; +import { debug } from './logger.js'; +import { paginateResult, printNdjson } from './paginate.js'; +import { LANGUAGE_REGISTRY } from './parser.js'; + +// ─── Constants ──────────────────────────────────────────────────────── + +export const AST_NODE_KINDS = ['call', 'new', 'string', 'regex', 'throw', 'await']; + +const KIND_ICONS = { + call: '\u0192', // ƒ + new: '\u2295', // ⊕ + string: '"', + regex: '/', + throw: '\u2191', // ↑ + await: '\u22B3', // ⊳ +}; + +/** Max length for the `text` column. */ +const TEXT_MAX = 200; + +/** tree-sitter node types that map to our AST node kinds (JS/TS/TSX). */ +const JS_TS_AST_TYPES = { + new_expression: 'new', + throw_statement: 'throw', + await_expression: 'await', + string: 'string', + template_string: 'string', + regex: 'regex', +}; + +/** Extensions that support full AST walk (new/throw/await/string/regex). */ +const WALK_EXTENSIONS = new Set(); +for (const lang of Object.values(LANGUAGE_REGISTRY)) { + if (['javascript', 'typescript', 'tsx'].includes(lang.id)) { + for (const ext of lang.extensions) WALK_EXTENSIONS.add(ext); + } +} + +// ─── Helpers ────────────────────────────────────────────────────────── + +function truncate(s, max = TEXT_MAX) { + if (!s) return null; + return s.length <= max ? s : `${s.slice(0, max - 1)}\u2026`; +} + +/** + * Extract the constructor name from a `new_expression` node. + * Handles `new Foo()`, `new a.Foo()`, `new Foo.Bar()`. + */ +function extractNewName(node) { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child.type === 'identifier') return child.text; + if (child.type === 'member_expression') { + // e.g. new a.Foo() → "a.Foo" + return child.text; + } + } + return node.text?.split('(')[0]?.replace('new ', '').trim() || '?'; +} + +/** + * Extract the expression text from a throw/await node. + */ +function extractExpressionText(node) { + // Skip keyword child, take the rest + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child.type !== 'throw' && child.type !== 'await') { + return truncate(child.text); + } + } + return truncate(node.text); +} + +/** + * Extract a meaningful name from throw/await nodes. + * For throw: the constructor or expression type. + * For await: the called function name. + */ +function extractName(kind, node) { + if (kind === 'throw') { + // throw new Error(...) → "Error"; throw x → "x" + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child.type === 'new_expression') return extractNewName(child); + if (child.type === 'call_expression') { + const fn = child.childForFieldName('function'); + return fn ? fn.text : child.text?.split('(')[0] || '?'; + } + if (child.type === 'identifier') return child.text; + } + return truncate(node.text); + } + if (kind === 'await') { + // await fetch(...) → "fetch"; await this.foo() → "this.foo" + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child.type === 'call_expression') { + const fn = child.childForFieldName('function'); + return fn ? fn.text : child.text?.split('(')[0] || '?'; + } + if (child.type === 'identifier' || child.type === 'member_expression') { + return child.text; + } + } + return truncate(node.text); + } + return truncate(node.text); +} + +/** + * Find the narrowest enclosing definition for a given line. + */ +function findParentDef(defs, line) { + let best = null; + for (const def of defs) { + if (def.line <= line && (def.endLine == null || def.endLine >= line)) { + if (!best || def.endLine - def.line < best.endLine - best.line) { + best = def; + } + } + } + return best; +} + +// ─── Build ──────────────────────────────────────────────────────────── + +/** + * Extract AST nodes from parsed files and persist to the ast_nodes table. + * + * @param {object} db - open better-sqlite3 database (read-write) + * @param {Map} fileSymbols - Map + * @param {string} rootDir - absolute project root path + * @param {object} [_engineOpts] - engine options (unused) + */ +export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) { + // Ensure table exists (migration may not have run on older DBs) + let insertStmt; + try { + insertStmt = db.prepare( + 'INSERT INTO ast_nodes (file, line, kind, name, text, receiver, parent_node_id) VALUES (?, ?, ?, ?, ?, ?, ?)', + ); + } catch { + debug('ast_nodes table not found — skipping AST extraction'); + return; + } + + const getNodeId = db.prepare( + 'SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?', + ); + + const tx = db.transaction((rows) => { + for (const r of rows) { + insertStmt.run(r.file, r.line, r.kind, r.name, r.text, r.receiver, r.parentNodeId); + } + }); + + let totalInserted = 0; + + for (const [relPath, symbols] of fileSymbols) { + const rows = []; + const defs = symbols.definitions || []; + + // 1. Call nodes from symbols.calls (all languages) + if (symbols.calls) { + for (const call of symbols.calls) { + const parentDef = findParentDef(defs, call.line); + let parentNodeId = null; + if (parentDef) { + const row = getNodeId.get(parentDef.name, parentDef.kind, relPath, parentDef.line); + if (row) parentNodeId = row.id; + } + rows.push({ + file: relPath, + line: call.line, + kind: 'call', + name: call.name, + text: call.dynamic ? `[dynamic] ${call.name}` : null, + receiver: call.receiver || null, + parentNodeId, + }); + } + } + + // 2. AST walk for JS/TS/TSX — extract new, throw, await, string, regex + const ext = path.extname(relPath).toLowerCase(); + if (WALK_EXTENSIONS.has(ext) && symbols._tree) { + const astRows = []; + walkAst(symbols._tree.rootNode, defs, relPath, astRows, getNodeId); + rows.push(...astRows); + } + + if (rows.length > 0) { + tx(rows); + totalInserted += rows.length; + } + } + + debug(`AST extraction: ${totalInserted} nodes stored`); +} + +/** + * Walk a tree-sitter AST and collect new/throw/await/string/regex nodes. + */ +function walkAst(node, defs, relPath, rows, getNodeId) { + const kind = JS_TS_AST_TYPES[node.type]; + if (kind) { + // tree-sitter lines are 0-indexed, our DB uses 1-indexed + const line = node.startPosition.row + 1; + + let name; + let text = null; + + if (kind === 'new') { + name = extractNewName(node); + text = truncate(node.text); + } else if (kind === 'throw') { + name = extractName('throw', node); + text = extractExpressionText(node); + } else if (kind === 'await') { + name = extractName('await', node); + text = extractExpressionText(node); + } else if (kind === 'string') { + // Skip trivial strings (length < 2 after removing quotes) + const content = node.text?.replace(/^['"`]|['"`]$/g, '') || ''; + if (content.length < 2) { + // Still recurse children + for (let i = 0; i < node.childCount; i++) { + walkAst(node.child(i), defs, relPath, rows, getNodeId); + } + return; + } + name = truncate(content, 100); + text = truncate(node.text); + } else if (kind === 'regex') { + name = node.text || '?'; + text = truncate(node.text); + } + + const parentDef = findParentDef(defs, line); + let parentNodeId = null; + if (parentDef) { + const row = getNodeId.get(parentDef.name, parentDef.kind, relPath, parentDef.line); + if (row) parentNodeId = row.id; + } + + rows.push({ + file: relPath, + line, + kind, + name, + text, + receiver: null, + parentNodeId, + }); + + // Don't recurse into the children of matched nodes for new/throw/await + // (we already extracted what we need, and nested strings inside them are noise) + if (kind !== 'string' && kind !== 'regex') return; + } + + for (let i = 0; i < node.childCount; i++) { + walkAst(node.child(i), defs, relPath, rows, getNodeId); + } +} + +// ─── Query ──────────────────────────────────────────────────────────── + +/** + * Query AST nodes — data-returning function. + * + * @param {string} [pattern] - GLOB pattern for node name (auto-wrapped in *..*) + * @param {string} [customDbPath] - path to graph.db + * @param {object} [opts] + * @returns {{ pattern, kind, count, results, _pagination? }} + */ +export function astQueryData(pattern, customDbPath, opts = {}) { + const db = openReadonlyOrFail(customDbPath); + const { kind, file, noTests, limit, offset } = opts; + + let where = 'WHERE 1=1'; + const params = []; + + // Pattern matching + if (pattern && pattern !== '*') { + // If user already uses wildcards, use as-is; otherwise wrap in *..* for substring + const globPattern = pattern.includes('*') ? pattern : `*${pattern}*`; + where += ' AND a.name GLOB ?'; + params.push(globPattern); + } + + if (kind) { + where += ' AND a.kind = ?'; + params.push(kind); + } + + if (file) { + where += ' AND a.file LIKE ?'; + params.push(`%${file}%`); + } + + if (noTests) { + where += ` AND a.file NOT LIKE '%.test.%' + AND a.file NOT LIKE '%.spec.%' + AND a.file NOT LIKE '%__test__%' + AND a.file NOT LIKE '%__tests__%' + AND a.file NOT LIKE '%.stories.%'`; + } + + const sql = ` + SELECT a.kind, a.name, a.file, a.line, a.text, a.receiver, a.parent_node_id, + p.name AS parent_name, p.kind AS parent_kind, p.file AS parent_file + FROM ast_nodes a + LEFT JOIN nodes p ON a.parent_node_id = p.id + ${where} + ORDER BY a.file, a.line + `; + + const rows = db.prepare(sql).all(...params); + db.close(); + + const results = rows.map((r) => ({ + kind: r.kind, + name: r.name, + file: r.file, + line: r.line, + text: r.text, + receiver: r.receiver, + parent: r.parent_node_id + ? { name: r.parent_name, kind: r.parent_kind, file: r.parent_file } + : null, + })); + + const data = { + pattern: pattern || '*', + kind: kind || null, + count: results.length, + results, + }; + + return paginateResult(data, 'results', { limit, offset }); +} + +/** + * Query AST nodes — display function (human/json/ndjson output). + */ +export function astQuery(pattern, customDbPath, opts = {}) { + const data = astQueryData(pattern, customDbPath, opts); + + if (opts.ndjson) { + printNdjson(data, 'results'); + return; + } + + if (opts.json) { + console.log(JSON.stringify(data, null, 2)); + return; + } + + // Human-readable output + if (data.results.length === 0) { + console.log(`No AST nodes found${pattern ? ` matching "${pattern}"` : ''}.`); + return; + } + + const kindLabel = opts.kind ? ` (kind: ${opts.kind})` : ''; + console.log(`\n${data.count} AST nodes${pattern ? ` matching "${pattern}"` : ''}${kindLabel}:\n`); + + for (const r of data.results) { + const icon = KIND_ICONS[r.kind] || '?'; + const parentInfo = r.parent ? ` (in ${r.parent.name})` : ''; + console.log(` ${icon} ${r.name} -- ${r.file}:${r.line}${parentInfo}`); + } + + if (data._pagination?.hasMore) { + console.log( + `\n ... ${data._pagination.total - data._pagination.offset - data._pagination.returned} more (use --offset ${data._pagination.offset + data._pagination.limit})`, + ); + } + console.log(); +} diff --git a/src/builder.js b/src/builder.js index 6ceec39e..322ac552 100644 --- a/src/builder.js +++ b/src/builder.js @@ -435,7 +435,7 @@ export async function buildGraph(rootDir, opts = {}) { if (isFullBuild) { const deletions = - 'PRAGMA foreign_keys = OFF; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM function_complexity; DELETE FROM dataflow; DELETE FROM nodes; PRAGMA foreign_keys = ON;'; + 'PRAGMA foreign_keys = OFF; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM function_complexity; DELETE FROM dataflow; DELETE FROM ast_nodes; DELETE FROM nodes; PRAGMA foreign_keys = ON;'; db.exec( hasEmbeddings ? `${deletions.replace('PRAGMA foreign_keys = ON;', '')} DELETE FROM embeddings; PRAGMA foreign_keys = ON;` @@ -513,12 +513,19 @@ export async function buildGraph(rootDir, opts = {}) { } catch { deleteDataflowForFile = null; } + let deleteAstNodesForFile; + try { + deleteAstNodesForFile = db.prepare('DELETE FROM ast_nodes WHERE file = ?'); + } catch { + deleteAstNodesForFile = null; + } for (const relPath of removed) { deleteEmbeddingsForFile?.run(relPath); deleteEdgesForFile.run({ f: relPath }); deleteMetricsForFile.run(relPath); deleteComplexityForFile?.run(relPath); deleteDataflowForFile?.run(relPath, relPath); + deleteAstNodesForFile?.run(relPath); deleteNodesForFile.run(relPath); } for (const item of parseChanges) { @@ -528,6 +535,7 @@ export async function buildGraph(rootDir, opts = {}) { deleteMetricsForFile.run(relPath); deleteComplexityForFile?.run(relPath); deleteDataflowForFile?.run(relPath, relPath); + deleteAstNodesForFile?.run(relPath); deleteNodesForFile.run(relPath); } @@ -1129,6 +1137,17 @@ export async function buildGraph(rootDir, opts = {}) { } _t.rolesMs = performance.now() - _t.roles0; + // Always-on AST node extraction (calls, new, string, regex, throw, await) + // Must run before complexity which releases _tree references + _t.ast0 = performance.now(); + try { + const { buildAstNodes } = await import('./ast.js'); + await buildAstNodes(db, allSymbols, rootDir, engineOpts); + } catch (err) { + debug(`AST node extraction failed: ${err.message}`); + } + _t.astMs = performance.now() - _t.ast0; + // Compute per-function complexity metrics (cognitive, cyclomatic, nesting) _t.complexity0 = performance.now(); try { diff --git a/src/cli.js b/src/cli.js index 737ce4ae..882c1c2d 100644 --- a/src/cli.js +++ b/src/cli.js @@ -1071,6 +1071,35 @@ program }); }); +program + .command('ast [pattern]') + .description('Search stored AST nodes (calls, new, string, regex, throw, await) by pattern') + .option('-d, --db ', 'Path to graph.db') + .option('-k, --kind ', 'Filter by AST node kind (call, new, string, regex, throw, await)') + .option('-f, --file ', 'Scope to file (partial match)') + .option('-T, --no-tests', 'Exclude test/spec files from results') + .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') + .option('-j, --json', 'Output as JSON') + .option('--ndjson', 'Newline-delimited JSON output') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .action(async (pattern, opts) => { + const { AST_NODE_KINDS, astQuery } = await import('./ast.js'); + if (opts.kind && !AST_NODE_KINDS.includes(opts.kind)) { + console.error(`Invalid AST kind "${opts.kind}". Valid: ${AST_NODE_KINDS.join(', ')}`); + process.exit(1); + } + astQuery(pattern, opts.db, { + kind: opts.kind, + file: opts.file, + noTests: resolveNoTests(opts), + json: opts.json, + ndjson: opts.ndjson, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + }); + }); + program .command('manifesto') .description('Evaluate manifesto rules (pass/fail verdicts for code health)') diff --git a/src/db.js b/src/db.js index ff31fd39..3e17327e 100644 --- a/src/db.js +++ b/src/db.js @@ -204,6 +204,27 @@ export const MIGRATIONS = [ CREATE INDEX IF NOT EXISTS idx_cfg_edges_tgt ON cfg_edges(target_block_id); `, }, + { + version: 13, + up: ` + CREATE TABLE IF NOT EXISTS ast_nodes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + file TEXT NOT NULL, + line INTEGER NOT NULL, + kind TEXT NOT NULL, + name TEXT NOT NULL, + text TEXT, + receiver TEXT, + parent_node_id INTEGER, + FOREIGN KEY(parent_node_id) REFERENCES nodes(id) + ); + CREATE INDEX IF NOT EXISTS idx_ast_kind ON ast_nodes(kind); + CREATE INDEX IF NOT EXISTS idx_ast_name ON ast_nodes(name); + CREATE INDEX IF NOT EXISTS idx_ast_file ON ast_nodes(file); + CREATE INDEX IF NOT EXISTS idx_ast_parent ON ast_nodes(parent_node_id); + CREATE INDEX IF NOT EXISTS idx_ast_kind_name ON ast_nodes(kind, name); + `, + }, ]; export function getBuildMeta(db, key) { diff --git a/src/index.js b/src/index.js index 8d44699a..f4921d8f 100644 --- a/src/index.js +++ b/src/index.js @@ -5,6 +5,8 @@ * import { buildGraph, queryNameData, findCycles, exportDOT } from 'codegraph'; */ +// AST node queries +export { AST_NODE_KINDS, astQuery, astQueryData } from './ast.js'; // Audit (composite report) export { audit, auditData } from './audit.js'; // Batch querying diff --git a/src/mcp.js b/src/mcp.js index 81cb1b16..38cdbfec 100644 --- a/src/mcp.js +++ b/src/mcp.js @@ -6,6 +6,7 @@ */ import { createRequire } from 'node:module'; +import { AST_NODE_KINDS } from './ast.js'; import { findCycles } from './cycles.js'; import { findDbPath } from './db.js'; import { MCP_DEFAULTS, MCP_MAX_LIMIT } from './paginate.js'; @@ -703,6 +704,28 @@ const BASE_TOOLS = [ }, }, }, + { + name: 'ast_query', + description: + 'Search stored AST nodes (calls, literals, new, throw, await) by pattern. Requires a prior build.', + inputSchema: { + type: 'object', + properties: { + pattern: { + type: 'string', + description: 'GLOB pattern for node name (auto-wrapped in *..* for substring match)', + }, + kind: { + type: 'string', + enum: AST_NODE_KINDS, + description: 'Filter by AST node kind', + }, + file: { type: 'string', description: 'Scope to file (partial match)' }, + no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + ...PAGINATION_PROPS, + }, + }, + }, ]; const LIST_REPOS_TOOL = { @@ -1268,6 +1291,17 @@ export async function startMCPServer(customDbPath, options = {}) { }); break; } + case 'ast_query': { + const { astQueryData } = await import('./ast.js'); + result = astQueryData(args.pattern, dbPath, { + kind: args.kind, + file: args.file, + noTests: args.no_tests, + limit: Math.min(args.limit ?? MCP_DEFAULTS.ast_query, MCP_MAX_LIMIT), + offset: args.offset ?? 0, + }); + break; + } case 'list_repos': { const { listRepos, pruneRegistry } = await import('./registry.js'); pruneRegistry(); diff --git a/src/paginate.js b/src/paginate.js index 8802b65a..5b768993 100644 --- a/src/paginate.js +++ b/src/paginate.js @@ -29,6 +29,7 @@ export const MCP_DEFAULTS = { communities: 20, structure: 30, triage: 20, + ast_query: 50, }; /** Hard cap to prevent abuse via MCP. */ diff --git a/tests/integration/ast.test.js b/tests/integration/ast.test.js new file mode 100644 index 00000000..60cee696 --- /dev/null +++ b/tests/integration/ast.test.js @@ -0,0 +1,234 @@ +/** + * Integration tests for AST node queries. + * + * Uses a hand-crafted in-memory DB with known AST nodes. + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import Database from 'better-sqlite3'; +import { afterAll, beforeAll, describe, expect, test } from 'vitest'; +import { AST_NODE_KINDS, astQueryData } from '../../src/ast.js'; +import { initSchema } from '../../src/db.js'; + +// ─── Helpers ─────────────────────────────────────────────────────────── + +function insertNode(db, name, kind, file, line) { + return db + .prepare('INSERT INTO nodes (name, kind, file, line) VALUES (?, ?, ?, ?)') + .run(name, kind, file, line).lastInsertRowid; +} + +function insertAstNode(db, file, line, kind, name, text, receiver, parentNodeId) { + return db + .prepare( + 'INSERT INTO ast_nodes (file, line, kind, name, text, receiver, parent_node_id) VALUES (?, ?, ?, ?, ?, ?, ?)', + ) + .run(file, line, kind, name, text, receiver, parentNodeId).lastInsertRowid; +} + +// ─── Fixture DB ──────────────────────────────────────────────────────── + +let tmpDir, dbPath; + +beforeAll(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-ast-')); + fs.mkdirSync(path.join(tmpDir, '.codegraph')); + dbPath = path.join(tmpDir, '.codegraph', 'graph.db'); + + const db = new Database(dbPath); + db.pragma('journal_mode = WAL'); + initSchema(db); + + // Insert function nodes + const processId = insertNode(db, 'processInput', 'function', 'src/utils.js', 10); + const loaderId = insertNode(db, 'loadModule', 'function', 'src/loader.js', 5); + const handlerId = insertNode(db, 'handleRequest', 'function', 'src/handler.js', 20); + const defaultsId = insertNode(db, 'defaults', 'function', 'src/config.js', 1); + const testFnId = insertNode(db, 'testUtils', 'function', 'tests/utils.test.js', 1); + + // Calls + insertAstNode(db, 'src/utils.js', 42, 'call', 'eval', null, null, processId); + insertAstNode(db, 'src/loader.js', 8, 'call', 'require', null, null, loaderId); + insertAstNode(db, 'src/handler.js', 25, 'call', 'console.log', null, 'console', handlerId); + insertAstNode(db, 'src/handler.js', 30, 'call', 'console.error', null, 'console', handlerId); + insertAstNode(db, 'src/utils.js', 50, 'call', 'fetch', null, null, processId); + + // new expressions + insertAstNode(db, 'src/handler.js', 30, 'new', 'Error', 'new Error("bad")', null, handlerId); + insertAstNode(db, 'src/loader.js', 12, 'new', 'Map', 'new Map()', null, loaderId); + + // strings + insertAstNode( + db, + 'src/config.js', + 18, + 'string', + 'password123', + '"password123"', + null, + defaultsId, + ); + insertAstNode( + db, + 'src/config.js', + 19, + 'string', + 'localhost:3000', + '"localhost:3000"', + null, + defaultsId, + ); + + // throw + insertAstNode( + db, + 'src/handler.js', + 35, + 'throw', + 'Error', + 'new Error("not found")', + null, + handlerId, + ); + + // await + insertAstNode(db, 'src/utils.js', 55, 'await', 'fetch', 'fetch(url)', null, processId); + + // regex + insertAstNode(db, 'src/utils.js', 60, 'regex', '/\\d+/g', '/\\d+/g', null, processId); + + // Test file nodes (should be excluded by noTests) + insertAstNode(db, 'tests/utils.test.js', 5, 'call', 'eval', null, null, testFnId); + + db.close(); +}); + +afterAll(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +// ─── Tests ───────────────────────────────────────────────────────────── + +describe('AST_NODE_KINDS', () => { + test('exports all expected kinds', () => { + expect(AST_NODE_KINDS).toEqual(['call', 'new', 'string', 'regex', 'throw', 'await']); + }); +}); + +describe('astQueryData', () => { + test('returns all nodes when no pattern given', () => { + const data = astQueryData(undefined, dbPath); + expect(data.count).toBeGreaterThan(0); + expect(data.pattern).toBe('*'); + }); + + test('substring pattern match', () => { + const data = astQueryData('eval', dbPath); + // Should match 'eval' in src/utils.js and tests/utils.test.js + expect(data.results.length).toBeGreaterThanOrEqual(2); + expect(data.results.every((r) => r.name.includes('eval'))).toBe(true); + }); + + test('glob wildcard pattern', () => { + const data = astQueryData('console.*', dbPath); + expect(data.results.length).toBe(2); + expect(data.results.every((r) => r.name.startsWith('console.'))).toBe(true); + }); + + test('exact pattern with star', () => { + const data = astQueryData('*', dbPath); + expect(data.count).toBeGreaterThan(0); + }); + + test('kind filter — call', () => { + const data = astQueryData(undefined, dbPath, { kind: 'call' }); + expect(data.results.every((r) => r.kind === 'call')).toBe(true); + expect(data.results.length).toBeGreaterThanOrEqual(5); + }); + + test('kind filter — string', () => { + const data = astQueryData(undefined, dbPath, { kind: 'string' }); + expect(data.results.every((r) => r.kind === 'string')).toBe(true); + expect(data.results.length).toBe(2); + }); + + test('kind filter — new', () => { + const data = astQueryData(undefined, dbPath, { kind: 'new' }); + expect(data.results.every((r) => r.kind === 'new')).toBe(true); + expect(data.results.length).toBe(2); + }); + + test('kind filter — throw', () => { + const data = astQueryData(undefined, dbPath, { kind: 'throw' }); + expect(data.results.every((r) => r.kind === 'throw')).toBe(true); + expect(data.results.length).toBe(1); + }); + + test('kind filter — await', () => { + const data = astQueryData(undefined, dbPath, { kind: 'await' }); + expect(data.results.every((r) => r.kind === 'await')).toBe(true); + expect(data.results.length).toBe(1); + }); + + test('kind filter — regex', () => { + const data = astQueryData(undefined, dbPath, { kind: 'regex' }); + expect(data.results.every((r) => r.kind === 'regex')).toBe(true); + expect(data.results.length).toBe(1); + }); + + test('file filter', () => { + const data = astQueryData(undefined, dbPath, { file: 'config' }); + expect(data.results.every((r) => r.file.includes('config'))).toBe(true); + expect(data.results.length).toBe(2); + }); + + test('noTests excludes test files', () => { + const withTests = astQueryData('eval', dbPath); + const noTests = astQueryData('eval', dbPath, { noTests: true }); + expect(noTests.results.length).toBeLessThan(withTests.results.length); + expect(noTests.results.every((r) => !r.file.includes('.test.'))).toBe(true); + }); + + test('pagination — limit', () => { + const data = astQueryData(undefined, dbPath, { limit: 3 }); + expect(data.results.length).toBe(3); + expect(data._pagination).toBeDefined(); + expect(data._pagination.total).toBeGreaterThan(3); + expect(data._pagination.hasMore).toBe(true); + }); + + test('pagination — offset', () => { + const page1 = astQueryData(undefined, dbPath, { limit: 3, offset: 0 }); + const page2 = astQueryData(undefined, dbPath, { limit: 3, offset: 3 }); + expect(page1.results[0].name).not.toBe(page2.results[0].name); + }); + + test('parent node resolution', () => { + const data = astQueryData('eval', dbPath, { noTests: true }); + expect(data.results.length).toBe(1); + const r = data.results[0]; + expect(r.parent).toBeDefined(); + expect(r.parent.name).toBe('processInput'); + expect(r.parent.kind).toBe('function'); + }); + + test('receiver field for calls', () => { + const data = astQueryData('console.log', dbPath); + expect(data.results.length).toBe(1); + expect(data.results[0].receiver).toBe('console'); + }); + + test('empty results for non-matching pattern', () => { + const data = astQueryData('nonexistent_xyz', dbPath); + expect(data.results.length).toBe(0); + expect(data.count).toBe(0); + }); + + test('combined kind + file filter', () => { + const data = astQueryData(undefined, dbPath, { kind: 'call', file: 'handler' }); + expect(data.results.every((r) => r.kind === 'call' && r.file.includes('handler'))).toBe(true); + expect(data.results.length).toBe(2); + }); +}); diff --git a/tests/parsers/ast-nodes.test.js b/tests/parsers/ast-nodes.test.js new file mode 100644 index 00000000..d9ca53f7 --- /dev/null +++ b/tests/parsers/ast-nodes.test.js @@ -0,0 +1,185 @@ +/** + * Tests for AST node extraction from parsed source code. + * + * Parses JS fixtures through tree-sitter, runs AST extraction via buildAstNodes, + * and verifies the correct nodes are captured in the DB. + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import Database from 'better-sqlite3'; +import { afterAll, beforeAll, describe, expect, test } from 'vitest'; +import { buildAstNodes } from '../../src/ast.js'; +import { initSchema } from '../../src/db.js'; +import { parseFilesAuto } from '../../src/parser.js'; + +// ─── Fixture ────────────────────────────────────────────────────────── + +const FIXTURE_CODE = ` +export function processData(input) { + const result = new Map(); + const pattern = /^[a-z]+$/i; + const greeting = "hello world"; + + if (typeof input === 'string') { + eval(input); + } + + try { + const data = await fetch('/api/data'); + result.set('data', data); + } catch (err) { + throw new Error('fetch failed'); + } + + console.log(result); + return result; +} + +function helper() { + const re = /\\d{3}-\\d{4}/; + const msg = \`template string value\`; + return msg; +} +`; + +// ─── Setup ──────────────────────────────────────────────────────────── + +let tmpDir, dbPath, db; + +beforeAll(async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-ast-extract-')); + const srcDir = path.join(tmpDir, 'src'); + fs.mkdirSync(srcDir, { recursive: true }); + fs.mkdirSync(path.join(tmpDir, '.codegraph')); + + // Write fixture file + const fixturePath = path.join(srcDir, 'fixture.js'); + fs.writeFileSync(fixturePath, FIXTURE_CODE); + + // Parse fixture using parseFilesAuto (preserves _tree for AST walk) + const allSymbols = await parseFilesAuto([fixturePath], tmpDir, { engine: 'wasm' }); + const symbols = allSymbols.get('src/fixture.js'); + if (!symbols) throw new Error('Failed to parse fixture file'); + + // Create DB and schema + dbPath = path.join(tmpDir, '.codegraph', 'graph.db'); + db = new Database(dbPath); + db.pragma('journal_mode = WAL'); + initSchema(db); + + // Insert nodes for definitions so parent resolution works + const insertNode = db.prepare( + 'INSERT INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)', + ); + for (const def of symbols.definitions) { + insertNode.run(def.name, def.kind, 'src/fixture.js', def.line, def.endLine); + } + + // Build AST nodes + await buildAstNodes(db, allSymbols, tmpDir); +}); + +afterAll(() => { + if (db) db.close(); + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +// ─── Helpers ────────────────────────────────────────────────────────── + +function queryAstNodes(kind) { + return db.prepare('SELECT * FROM ast_nodes WHERE kind = ? ORDER BY line').all(kind); +} + +function queryAllAstNodes() { + return db.prepare('SELECT * FROM ast_nodes ORDER BY line').all(); +} + +// ─── Tests ──────────────────────────────────────────────────────────── + +describe('buildAstNodes — JS extraction', () => { + test('captures call nodes from symbols.calls', () => { + const calls = queryAstNodes('call'); + expect(calls.length).toBeGreaterThanOrEqual(1); + const callNames = calls.map((c) => c.name); + // eval, fetch, console.log should be among calls (depending on parser extraction) + expect(callNames.some((n) => n === 'eval' || n === 'fetch' || n === 'console.log')).toBe(true); + }); + + test('captures new_expression as kind:new', () => { + const nodes = queryAstNodes('new'); + expect(nodes.length).toBeGreaterThanOrEqual(1); + const names = nodes.map((n) => n.name); + expect(names).toContain('Map'); + // Note: `throw new Error(...)` is captured as kind:throw, not kind:new + // The new_expression inside throw is not separately emitted + }); + + test('captures string literals as kind:string', () => { + const nodes = queryAstNodes('string'); + expect(nodes.length).toBeGreaterThanOrEqual(1); + const names = nodes.map((n) => n.name); + // "hello world" should be captured, short strings like 'string' might vary + expect(names.some((n) => n.includes('hello world'))).toBe(true); + }); + + test('skips trivial strings shorter than 2 chars', () => { + const nodes = queryAstNodes('string'); + // No single-char or empty strings should be present + for (const node of nodes) { + expect(node.name.length).toBeGreaterThanOrEqual(2); + } + }); + + test('captures regex as kind:regex', () => { + const nodes = queryAstNodes('regex'); + expect(nodes.length).toBeGreaterThanOrEqual(1); + // At least one regex pattern should be present + expect(nodes.some((n) => n.name.includes('[a-z]') || n.name.includes('\\d'))).toBe(true); + }); + + test('captures throw as kind:throw', () => { + const nodes = queryAstNodes('throw'); + expect(nodes.length).toBeGreaterThanOrEqual(1); + // throw new Error('fetch failed') → name should be "Error" + expect(nodes.some((n) => n.name === 'Error')).toBe(true); + }); + + test('captures await as kind:await', () => { + const nodes = queryAstNodes('await'); + expect(nodes.length).toBeGreaterThanOrEqual(1); + // await fetch('/api/data') → name should include "fetch" + expect(nodes.some((n) => n.name.includes('fetch'))).toBe(true); + }); + + test('parent_node_id is resolved for nodes inside functions', () => { + const all = queryAllAstNodes(); + const withParent = all.filter((n) => n.parent_node_id != null); + expect(withParent.length).toBeGreaterThan(0); + + // Verify the parent exists in the nodes table + for (const node of withParent) { + const parent = db.prepare('SELECT * FROM nodes WHERE id = ?').get(node.parent_node_id); + expect(parent).toBeDefined(); + expect(['function', 'method', 'class']).toContain(parent.kind); + } + }); + + test('all inserted nodes have valid kinds', () => { + const all = queryAllAstNodes(); + const validKinds = new Set(['call', 'new', 'string', 'regex', 'throw', 'await']); + for (const node of all) { + expect(validKinds.has(node.kind)).toBe(true); + } + }); + + test('text column is truncated to max length', () => { + const all = queryAllAstNodes(); + for (const node of all) { + if (node.text) { + expect(node.text.length).toBeLessThanOrEqual(201); // 200 + possible ellipsis char + } + } + }); +}); diff --git a/tests/unit/mcp.test.js b/tests/unit/mcp.test.js index 7d14bffc..e0b309f7 100644 --- a/tests/unit/mcp.test.js +++ b/tests/unit/mcp.test.js @@ -39,6 +39,7 @@ const ALL_TOOL_NAMES = [ 'cfg', 'dataflow', 'check', + 'ast_query', 'list_repos', ]; From 7a8d4aebacc219e20f7fd4a7056d7a7b050d609f Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 21:17:37 -0700 Subject: [PATCH 27/30] fix: correct misleading comment for break without enclosing loop/switch The comment incorrectly suggested this code path handled break inside switch cases. It actually handles break with no enclosing loop/switch context (invalid syntax) as a no-op. Impact: 2 functions changed, 9 affected --- src/cfg.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cfg.js b/src/cfg.js index 0e6e49be..c9f7dd0f 100644 --- a/src/cfg.js +++ b/src/cfg.js @@ -236,7 +236,7 @@ export function buildFunctionCFG(functionNode, langId) { addEdge(currentBlock, target, 'break'); return null; // path terminated } - // break outside loop (switch case) — just continue + // break with no enclosing loop/switch — treat as no-op return currentBlock; } From 31c219f49242dceb624d498f12d53ef6bedf736a Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 21:24:46 -0700 Subject: [PATCH 28/30] docs: fix stale MCP tool references in guides MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update tool names and counts to match actual MCP server output: - query_function → query, fn_deps/symbol_path removed (merged into query) - list_entry_points removed (merged into execution_flow) - Add missing tools: ast_query, cfg, dataflow, symbol_children - Fix count: 31 tools (32 in multi-repo mode) --- docs/guides/ai-agent-guide.md | 17 +++++++++-------- docs/guides/recommended-practices.md | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/docs/guides/ai-agent-guide.md b/docs/guides/ai-agent-guide.md index 23548b54..e52d1774 100644 --- a/docs/guides/ai-agent-guide.md +++ b/docs/guides/ai-agent-guide.md @@ -166,7 +166,7 @@ codegraph fn resolve --file resolve.js --depth 5 | | | |---|---| -| **MCP tool** | `fn_deps` | +| **MCP tool** | `query` | | **Key flags** | `--depth ` (default: 3), `-f, --file` (scope to file), `-k, --kind` (filter kind), `-T` (no tests), `-j` (JSON) | | **When to use** | Tracing a call chain — "who calls this and what does it call?" | | **Output** | Direct callees, direct callers, transitive callers up to depth N | @@ -242,7 +242,7 @@ codegraph path parseConfig loadFile --max-depth 5 | | | |---|---| -| **MCP tool** | `symbol_path` | +| **MCP tool** | `query` (with `--path`) | | **Key flags** | `--max-depth ` (default: 10), `--kinds ` (default: calls), `--reverse`, `--from-file`, `--to-file`, `-k, --kind`, `-T` (no tests), `-j` (JSON) | | **When to use** | Understanding how two functions are connected through the call chain | | **Output** | Ordered path with edge kinds, hop count, alternate path count | @@ -493,7 +493,7 @@ codegraph query buildGraph | | | |---|---| -| **MCP tool** | `query_function` | +| **MCP tool** | `query` | | **Key flags** | `-T` (no tests), `-j` (JSON) | | **When to use** | Quick one-off lookup (prefer `fn` or `context` for richer data) | @@ -578,15 +578,14 @@ codegraph mcp --repos "myapp,lib" # Restricted repo list | MCP Tool | CLI Equivalent | Description | |----------|---------------|-------------| -| `query_function` | `query ` | Find callers and callees | +| `query` | `query ` | Find callers/callees, or shortest path between two symbols | | `file_deps` | `deps ` | File imports and importers | | `impact_analysis` | `impact ` | Transitive file-level impact | | `find_cycles` | `cycles` | Circular dependency detection | | `module_map` | `map` | Most-connected files overview | -| `fn_deps` | `fn ` | Function-level call chain | | `fn_impact` | `fn-impact ` | Function-level blast radius | -| `symbol_path` | `path ` | Shortest path between two symbols | | `context` | `context ` | Full function context | +| `symbol_children` | `children ` | Sub-declaration children (parameters, properties, constants) | | `explain` | `explain ` | Structural summary | | `where` | `where ` | Symbol definition and usage | | `diff_impact` | `diff-impact [ref]` | Git diff impact analysis | @@ -597,8 +596,7 @@ codegraph mcp --repos "myapp,lib" # Restricted repo list | `hotspots` | `hotspots` | Structural hotspot detection | | `node_roles` | `roles` | Node role classification | | `co_changes` | `co-change` | Git co-change analysis | -| `execution_flow` | `flow` | Execution flow tracing | -| `list_entry_points` | `flow --entry-points` | Framework entry point detection | +| `execution_flow` | `flow` | Execution flow tracing and entry point detection | | `complexity` | `complexity` | Per-function complexity metrics | | `communities` | `communities` | Community detection & drift | | `manifesto` | `manifesto` | Rule engine pass/fail | @@ -608,6 +606,9 @@ codegraph mcp --repos "myapp,lib" # Restricted repo list | `triage` | `triage` | Risk-ranked audit queue | | `check` | `check` | CI validation predicates | | `branch_compare` | `branch-compare` | Structural diff between refs | +| `ast_query` | *(MCP only)* | Search stored AST nodes (calls, literals, new, throw, await) | +| `cfg` | *(MCP only)* | Intraprocedural control flow graph for a function | +| `dataflow` | *(MCP only)* | Data flow edges or data-dependent blast radius | | `list_repos` | `registry list` | List registered repos (multi-repo only) | ### Server Modes diff --git a/docs/guides/recommended-practices.md b/docs/guides/recommended-practices.md index 85001593..e40d6626 100644 --- a/docs/guides/recommended-practices.md +++ b/docs/guides/recommended-practices.md @@ -167,7 +167,7 @@ By default, the MCP server runs in **single-repo mode** — the AI agent can onl Enable `--multi-repo` to let the agent query any registered repository, or use `--repos` to restrict access to a specific set of repos. -The server exposes 30 tools (31 in multi-repo mode): `query_function`, `file_deps`, `impact_analysis`, `find_cycles`, `module_map`, `fn_deps`, `fn_impact`, `symbol_path`, `context`, `explain`, `where`, `diff_impact`, `semantic_search`, `export_graph`, `list_functions`, `structure`, `hotspots`, `node_roles`, `co_changes`, `execution_flow`, `list_entry_points`, `complexity`, `communities`, `manifesto`, `code_owners`, `audit`, `batch_query`, `triage`, `check`, `branch_compare`, and `list_repos` (multi-repo only). See the [AI Agent Guide MCP reference](./ai-agent-guide.md#mcp-server-reference) for the full tool-to-CLI mapping table. +The server exposes 31 tools (32 in multi-repo mode): `query`, `file_deps`, `impact_analysis`, `find_cycles`, `module_map`, `fn_impact`, `context`, `explain`, `where`, `diff_impact`, `semantic_search`, `export_graph`, `list_functions`, `structure`, `hotspots`, `node_roles`, `co_changes`, `execution_flow`, `complexity`, `communities`, `manifesto`, `code_owners`, `audit`, `batch_query`, `triage`, `check`, `branch_compare`, `ast_query`, `cfg`, `dataflow`, `symbol_children`, and `list_repos` (multi-repo only). See the [AI Agent Guide MCP reference](./ai-agent-guide.md#mcp-server-reference) for the full tool-to-CLI mapping table. ### CLAUDE.md for your project From 628c7ac0f438702b93131d98d61887880ee0b8bf Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 21:25:55 -0700 Subject: [PATCH 29/30] feat: expand node types with parameter, property, constant kinds (#270) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: expand node types with parameter, property, constant kinds (Phase 1) Add sub-declaration node extraction to all 9 WASM language extractors, enabling structural queries like "which functions take a Request param?" or "which classes have a userId field?" without reading source code. Schema: migration v11 adds nullable parent_id column with indexes. Builder: insertNode links children to parent via parent_id FK. Extractors: JS/TS, Python, Go, Rust, Java, C#, Ruby, PHP, HCL now emit children arrays for parameters, properties, and constants. Queries: new childrenData() function, children in contextData output. CLI: new `children` command, EVERY_SYMBOL_KIND validation on --kind. MCP: new `symbol_children` tool, extended kind enum on all kind fields. Constants: CORE_SYMBOL_KINDS (10), EXTENDED_SYMBOL_KINDS (3), EVERY_SYMBOL_KIND (13). ALL_SYMBOL_KINDS preserved for backward compat. Native Rust engine: Definition struct gains children field but actual extraction is deferred to Phase 2 — WASM fallback handles new kinds. Impact: 63 functions changed, 62 affected * feat: add expanded edge types — contains, parameter_of, receiver (Phase 2) Build file→definition and parent→child contains edges, parameter_of inverse edges, and receiver edges for method-call dispatch. Add CORE_EDGE_KINDS, STRUCTURAL_EDGE_KINDS, EVERY_EDGE_KIND constants. Exclude structural edges from moduleMapData coupling counts. Scope directory contains-edge cleanup to preserve symbol-level edges. Impact: 3 functions changed, 22 affected * fix(native): add missing children field to all Rust extractors The Definition struct gained a children field but no extractor was updated to include it, causing 50 compilation errors. Add children: None to every Definition initializer across all 9 language extractors. Also fix unused variable warnings in parser_registry.rs and parallel.rs. Impact: 13 functions changed, 10 affected * ci: trigger workflow re-run --- .../codegraph-core/src/extractors/csharp.rs | 9 + crates/codegraph-core/src/extractors/go.rs | 6 + crates/codegraph-core/src/extractors/hcl.rs | 1 + crates/codegraph-core/src/extractors/java.rs | 6 + .../src/extractors/javascript.rs | 10 + crates/codegraph-core/src/extractors/php.rs | 7 + .../codegraph-core/src/extractors/python.rs | 2 + crates/codegraph-core/src/extractors/ruby.rs | 4 + .../src/extractors/rust_lang.rs | 5 + crates/codegraph-core/src/parallel.rs | 2 +- crates/codegraph-core/src/parser_registry.rs | 2 +- crates/codegraph-core/src/types.rs | 2 + src/builder.js | 61 ++- src/cli.js | 72 ++- src/db.js | 23 + src/extractors/csharp.js | 65 ++- src/extractors/go.js | 67 ++- src/extractors/hcl.js | 22 + src/extractors/java.js | 62 ++- src/extractors/javascript.js | 142 +++++ src/extractors/php.js | 79 +++ src/extractors/python.js | 134 +++++ src/extractors/ruby.js | 89 ++++ src/extractors/rust.js | 72 ++- src/index.js | 7 + src/mcp.js | 42 +- src/parser.js | 8 + src/queries.js | 133 ++++- src/structure.js | 5 +- tests/integration/build-parity.test.js | 32 +- tests/integration/queries.test.js | 87 ++- tests/parsers/csharp.test.js | 2 +- tests/parsers/extended-kinds.test.js | 504 ++++++++++++++++++ tests/unit/mcp.test.js | 16 + 34 files changed, 1727 insertions(+), 53 deletions(-) create mode 100644 tests/parsers/extended-kinds.test.js diff --git a/crates/codegraph-core/src/extractors/csharp.rs b/crates/codegraph-core/src/extractors/csharp.rs index c92b6b6f..9b8ac071 100644 --- a/crates/codegraph-core/src/extractors/csharp.rs +++ b/crates/codegraph-core/src/extractors/csharp.rs @@ -43,6 +43,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: None, + children: None, }); extract_csharp_base_types(node, &class_name, source, symbols); } @@ -58,6 +59,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: None, + children: None, }); extract_csharp_base_types(node, &name, source, symbols); } @@ -73,6 +75,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: None, + children: None, }); extract_csharp_base_types(node, &name, source, symbols); } @@ -88,6 +91,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: None, + children: None, }); if let Some(body) = node.child_by_field_name("body") { for i in 0..body.child_count() { @@ -105,6 +109,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(&child)), decorators: None, complexity: compute_all_metrics(&child, source, "c_sharp"), + children: None, }); } } @@ -123,6 +128,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: None, + children: None, }); } } @@ -142,6 +148,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: compute_all_metrics(node, source, "c_sharp"), + children: None, }); } } @@ -161,6 +168,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: compute_all_metrics(node, source, "c_sharp"), + children: None, }); } } @@ -180,6 +188,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: compute_all_metrics(node, source, "c_sharp"), + children: None, }); } } diff --git a/crates/codegraph-core/src/extractors/go.rs b/crates/codegraph-core/src/extractors/go.rs index 8d429e87..fee7abc8 100644 --- a/crates/codegraph-core/src/extractors/go.rs +++ b/crates/codegraph-core/src/extractors/go.rs @@ -25,6 +25,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: compute_all_metrics(node, source, "go"), + children: None, }); } } @@ -61,6 +62,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: compute_all_metrics(node, source, "go"), + children: None, }); } } @@ -84,6 +86,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: None, + children: None, }); } "interface_type" => { @@ -94,6 +97,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: None, + children: None, }); // Extract interface methods for j in 0..type_node.child_count() { @@ -113,6 +117,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(&member)), decorators: None, complexity: None, + children: None, }); } } @@ -127,6 +132,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: None, + children: None, }); } } diff --git a/crates/codegraph-core/src/extractors/hcl.rs b/crates/codegraph-core/src/extractors/hcl.rs index 1cbb539d..ab516418 100644 --- a/crates/codegraph-core/src/extractors/hcl.rs +++ b/crates/codegraph-core/src/extractors/hcl.rs @@ -67,6 +67,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: None, + children: None, }); // Module source imports diff --git a/crates/codegraph-core/src/extractors/java.rs b/crates/codegraph-core/src/extractors/java.rs index 829eb6f6..b6161da0 100644 --- a/crates/codegraph-core/src/extractors/java.rs +++ b/crates/codegraph-core/src/extractors/java.rs @@ -42,6 +42,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: None, + children: None, }); // Superclass @@ -94,6 +95,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: None, + children: None, }); if let Some(body) = node.child_by_field_name("body") { for i in 0..body.child_count() { @@ -111,6 +113,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(&child)), decorators: None, complexity: compute_all_metrics(&child, source, "java"), + children: None, }); } } @@ -129,6 +132,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: None, + children: None, }); } } @@ -148,6 +152,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: compute_all_metrics(node, source, "java"), + children: None, }); } } @@ -167,6 +172,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: compute_all_metrics(node, source, "java"), + children: None, }); } } diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index f6451fe2..30cf6bc6 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -25,6 +25,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: compute_all_metrics(node, source, "javascript"), + children: None, }); } } @@ -39,6 +40,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: None, + children: None, }); // Heritage: extends + implements @@ -81,6 +83,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: compute_all_metrics(node, source, "javascript"), + children: None, }); } } @@ -95,6 +98,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: None, + children: None, }); // Extract interface methods let body = node @@ -116,6 +120,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(node)), decorators: None, complexity: None, + children: None, }); } } @@ -139,6 +144,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { end_line: Some(end_line(&value_n)), decorators: None, complexity: compute_all_metrics(&value_n, source, "javascript"), + children: None, }); } } @@ -348,6 +354,7 @@ fn extract_interface_methods( end_line: Some(end_line(&child)), decorators: None, complexity: None, + children: None, }); } } @@ -563,6 +570,7 @@ fn extract_callback_definition(call_node: &Node, source: &[u8]) -> Option Option Option Vec { +pub fn parse_files_parallel(file_paths: &[String], _root_dir: &str) -> Vec { file_paths .par_iter() .filter_map(|file_path| { diff --git a/crates/codegraph-core/src/parser_registry.rs b/crates/codegraph-core/src/parser_registry.rs index 0fdc766f..2c2c7e9e 100644 --- a/crates/codegraph-core/src/parser_registry.rs +++ b/crates/codegraph-core/src/parser_registry.rs @@ -21,7 +21,7 @@ impl LanguageKind { pub fn from_extension(file_path: &str) -> Option { let path = Path::new(file_path); let ext = path.extension().and_then(|e| e.to_str()).unwrap_or(""); - let name = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); + let _name = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); // .tsx must come before .ts check if file_path.ends_with(".tsx") { diff --git a/crates/codegraph-core/src/types.rs b/crates/codegraph-core/src/types.rs index f6593ebc..ed299f0c 100644 --- a/crates/codegraph-core/src/types.rs +++ b/crates/codegraph-core/src/types.rs @@ -65,6 +65,8 @@ pub struct Definition { #[napi(ts_type = "string[] | undefined")] pub decorators: Option>, pub complexity: Option, + #[napi(ts_type = "Definition[] | undefined")] + pub children: Option>, } #[napi(object)] diff --git a/src/builder.js b/src/builder.js index 24021f55..00d67186 100644 --- a/src/builder.js +++ b/src/builder.js @@ -603,7 +603,7 @@ export async function buildGraph(rootDir, opts = {}) { } const insertNode = db.prepare( - 'INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)', + 'INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line, parent_id) VALUES (?, ?, ?, ?, ?, ?)', ); const getNodeId = db.prepare( 'SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?', @@ -657,12 +657,39 @@ export async function buildGraph(rootDir, opts = {}) { for (const [relPath, symbols] of allSymbols) { fileSymbols.set(relPath, symbols); - insertNode.run(relPath, 'file', relPath, 0, null); + insertNode.run(relPath, 'file', relPath, 0, null, null); + const fileRow = getNodeId.get(relPath, 'file', relPath, 0); for (const def of symbols.definitions) { - insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null); + insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null, null); + const defRow = getNodeId.get(def.name, def.kind, relPath, def.line); + // File → top-level definition contains edge + if (fileRow && defRow) { + insertEdge.run(fileRow.id, defRow.id, 'contains', 1.0, 0); + } + if (def.children?.length && defRow) { + for (const child of def.children) { + insertNode.run( + child.name, + child.kind, + relPath, + child.line, + child.endLine || null, + defRow.id, + ); + // Parent → child contains edge + const childRow = getNodeId.get(child.name, child.kind, relPath, child.line); + if (childRow) { + insertEdge.run(defRow.id, childRow.id, 'contains', 1.0, 0); + // Parameter → parent parameter_of edge (inverse direction) + if (child.kind === 'parameter') { + insertEdge.run(childRow.id, defRow.id, 'parameter_of', 1.0, 0); + } + } + } + } } for (const exp of symbols.exports) { - insertNode.run(exp.name, exp.kind, relPath, exp.line, null); + insertNode.run(exp.name, exp.kind, relPath, exp.line, null, null); } // Update file hash with real mtime+size for incremental builds @@ -842,7 +869,7 @@ export async function buildGraph(rootDir, opts = {}) { // N+1 optimization: pre-load all nodes into a lookup map for edge building const allNodes = db .prepare( - `SELECT id, name, kind, file FROM nodes WHERE kind IN ('function','method','class','interface')`, + `SELECT id, name, kind, file FROM nodes WHERE kind IN ('function','method','class','interface','struct','type','module','enum','trait')`, ) .all(); const nodesByName = new Map(); @@ -1001,6 +1028,30 @@ export async function buildGraph(rootDir, opts = {}) { edgeCount++; } } + + // Receiver edge: caller → receiver type node + if ( + call.receiver && + !BUILTIN_RECEIVERS.has(call.receiver) && + call.receiver !== 'this' && + call.receiver !== 'self' && + call.receiver !== 'super' + ) { + const receiverKinds = new Set(['class', 'struct', 'interface', 'type', 'module']); + // Same-file first, then global + const samefile = nodesByNameAndFile.get(`${call.receiver}|${relPath}`) || []; + const candidates = samefile.length > 0 ? samefile : nodesByName.get(call.receiver) || []; + const receiverNodes = candidates.filter((n) => receiverKinds.has(n.kind)); + if (receiverNodes.length > 0 && caller) { + const recvTarget = receiverNodes[0]; + const recvKey = `recv|${caller.id}|${recvTarget.id}`; + if (!seenCallEdges.has(recvKey)) { + seenCallEdges.add(recvKey); + insertEdge.run(caller.id, recvTarget.id, 'receiver', 0.7, 0); + edgeCount++; + } + } + } } // Class extends edges (use pre-loaded maps instead of inline DB queries) diff --git a/src/cli.js b/src/cli.js index 81e14dc5..c3081664 100644 --- a/src/cli.js +++ b/src/cli.js @@ -27,9 +27,10 @@ import { import { setVerbose } from './logger.js'; import { printNdjson } from './paginate.js'; import { - ALL_SYMBOL_KINDS, + children, context, diffImpact, + EVERY_SYMBOL_KIND, explain, fileDeps, fileExports, @@ -130,8 +131,8 @@ program .option('--offset ', 'Skip N results (default: 0)') .option('--ndjson', 'Newline-delimited JSON output') .action((name, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } if (opts.path) { @@ -259,8 +260,8 @@ program .option('--offset ', 'Skip N results (default: 0)') .option('--ndjson', 'Newline-delimited JSON output') .action((name, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } fnImpact(name, opts.db, { @@ -291,8 +292,8 @@ program .option('--offset ', 'Skip N results (default: 0)') .option('--ndjson', 'Newline-delimited JSON output') .action((name, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } context(name, opts.db, { @@ -309,6 +310,31 @@ program }); }); +program + .command('children ') + .description('List parameters, properties, and constants of a symbol') + .option('-d, --db ', 'Path to graph.db') + .option('-f, --file ', 'Scope search to symbols in this file (partial match)') + .option('-k, --kind ', 'Filter to a specific symbol kind') + .option('-T, --no-tests', 'Exclude test/spec files from results') + .option('-j, --json', 'Output as JSON') + .option('--limit ', 'Max results to return') + .option('--offset ', 'Skip N results (default: 0)') + .action((name, opts) => { + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); + process.exit(1); + } + children(name, opts.db, { + file: opts.file, + kind: opts.kind, + noTests: resolveNoTests(opts), + json: opts.json, + limit: opts.limit ? parseInt(opts.limit, 10) : undefined, + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + }); + }); + program .command('explain ') .description('Structural summary of a file or function (no LLM needed)') @@ -342,8 +368,8 @@ program .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .option('-j, --json', 'Output as JSON') .action((target, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } audit(target, opts.db, { @@ -1043,8 +1069,8 @@ program console.error('Provide a function/entry point name or use --list to see all entry points.'); process.exit(1); } - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } const { flow } = await import('./flow.js'); @@ -1076,8 +1102,8 @@ program .option('--impact', 'Show data-dependent blast radius') .option('--depth ', 'Max traversal depth', '5') .action(async (name, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } const { dataflow } = await import('./dataflow.js'); @@ -1114,8 +1140,8 @@ program .option('--offset ', 'Skip N results (default: 0)') .option('--ndjson', 'Newline-delimited JSON output') .action(async (target, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } const { complexity } = await import('./complexity.js'); @@ -1147,8 +1173,8 @@ program .option('--offset ', 'Skip N results (default: 0)') .option('--ndjson', 'Newline-delimited JSON output') .action(async (opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } const { manifesto } = await import('./manifesto.js'); @@ -1209,8 +1235,8 @@ program .option('--ndjson', 'Newline-delimited JSON output') .option('--weights ', 'Custom weights JSON (e.g. \'{"fanIn":1,"complexity":0}\')') .action(async (opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } if (opts.role && !VALID_ROLES.includes(opts.role)) { @@ -1372,8 +1398,8 @@ program .option('-T, --no-tests', 'Exclude test/spec files from results') .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .action(async (command, positionalTargets, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } @@ -1436,8 +1462,8 @@ program .option('-T, --no-tests', 'Exclude test/spec files from results') .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') .action(async (positionalTargets, opts) => { - if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) { - console.error(`Invalid kind "${opts.kind}". Valid: ${ALL_SYMBOL_KINDS.join(', ')}`); + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); process.exit(1); } diff --git a/src/db.js b/src/db.js index f3f55fa4..9f40d7cc 100644 --- a/src/db.js +++ b/src/db.js @@ -165,6 +165,14 @@ export const MIGRATIONS = [ CREATE INDEX IF NOT EXISTS idx_dataflow_source_kind ON dataflow(source_id, kind); `, }, + { + version: 11, + up: ` + ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id); + CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id); + CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id); + `, + }, ]; export function getBuildMeta(db, key) { @@ -286,6 +294,21 @@ export function initSchema(db) { } catch { /* already exists */ } + try { + db.exec('ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id)'); + } catch { + /* already exists */ + } + try { + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id)'); + } catch { + /* already exists */ + } + try { + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id)'); + } catch { + /* already exists */ + } } export function findDbPath(customPath) { diff --git a/src/extractors/csharp.js b/src/extractors/csharp.js index 5af523f3..43231d1e 100644 --- a/src/extractors/csharp.js +++ b/src/extractors/csharp.js @@ -33,11 +33,13 @@ export function extractCSharpSymbols(tree, _filePath) { case 'class_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const classChildren = extractCSharpClassFields(node); definitions.push({ name: nameNode.text, kind: 'class', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: classChildren.length > 0 ? classChildren : undefined, }); extractCSharpBaseTypes(node, nameNode.text, classes); } @@ -47,11 +49,13 @@ export function extractCSharpSymbols(tree, _filePath) { case 'struct_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const structChildren = extractCSharpClassFields(node); definitions.push({ name: nameNode.text, kind: 'struct', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: structChildren.length > 0 ? structChildren : undefined, }); extractCSharpBaseTypes(node, nameNode.text, classes); } @@ -105,11 +109,13 @@ export function extractCSharpSymbols(tree, _filePath) { case 'enum_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const enumChildren = extractCSharpEnumMembers(node); definitions.push({ name: nameNode.text, kind: 'enum', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: enumChildren.length > 0 ? enumChildren : undefined, }); } break; @@ -120,11 +126,13 @@ export function extractCSharpSymbols(tree, _filePath) { if (nameNode) { const parentType = findCSharpParentType(node); const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; + const params = extractCSharpParameters(node.childForFieldName('parameters')); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -135,11 +143,13 @@ export function extractCSharpSymbols(tree, _filePath) { if (nameNode) { const parentType = findCSharpParentType(node); const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; + const params = extractCSharpParameters(node.childForFieldName('parameters')); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -152,7 +162,7 @@ export function extractCSharpSymbols(tree, _filePath) { const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text; definitions.push({ name: fullName, - kind: 'method', + kind: 'property', line: node.startPosition.row + 1, endLine: nodeEndLine(node), }); @@ -220,6 +230,59 @@ export function extractCSharpSymbols(tree, _filePath) { return { definitions, calls, imports, classes, exports }; } +// ── Child extraction helpers ──────────────────────────────────────────────── + +function extractCSharpParameters(paramListNode) { + const params = []; + if (!paramListNode) return params; + for (let i = 0; i < paramListNode.childCount; i++) { + const param = paramListNode.child(i); + if (!param || param.type !== 'parameter') continue; + const nameNode = param.childForFieldName('name'); + if (nameNode) { + params.push({ name: nameNode.text, kind: 'parameter', line: param.startPosition.row + 1 }); + } + } + return params; +} + +function extractCSharpClassFields(classNode) { + const fields = []; + const body = classNode.childForFieldName('body') || findChild(classNode, 'declaration_list'); + if (!body) return fields; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== 'field_declaration') continue; + const varDecl = findChild(member, 'variable_declaration'); + if (!varDecl) continue; + for (let j = 0; j < varDecl.childCount; j++) { + const child = varDecl.child(j); + if (!child || child.type !== 'variable_declarator') continue; + const nameNode = child.childForFieldName('name'); + if (nameNode) { + fields.push({ name: nameNode.text, kind: 'property', line: member.startPosition.row + 1 }); + } + } + } + return fields; +} + +function extractCSharpEnumMembers(enumNode) { + const constants = []; + const body = + enumNode.childForFieldName('body') || findChild(enumNode, 'enum_member_declaration_list'); + if (!body) return constants; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== 'enum_member_declaration') continue; + const nameNode = member.childForFieldName('name'); + if (nameNode) { + constants.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 }); + } + } + return constants; +} + function extractCSharpBaseTypes(node, className, classes) { const baseList = node.childForFieldName('bases'); if (!baseList) return; diff --git a/src/extractors/go.js b/src/extractors/go.js index 8b943012..a3a50158 100644 --- a/src/extractors/go.js +++ b/src/extractors/go.js @@ -1,4 +1,4 @@ -import { nodeEndLine } from './helpers.js'; +import { findChild, nodeEndLine } from './helpers.js'; /** * Extract symbols from Go files. @@ -15,11 +15,13 @@ export function extractGoSymbols(tree, _filePath) { case 'function_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const params = extractGoParameters(node.childForFieldName('parameters')); definitions.push({ name: nameNode.text, kind: 'function', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -46,11 +48,13 @@ export function extractGoSymbols(tree, _filePath) { } } const fullName = receiverType ? `${receiverType}.${nameNode.text}` : nameNode.text; + const params = extractGoParameters(node.childForFieldName('parameters')); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -64,11 +68,13 @@ export function extractGoSymbols(tree, _filePath) { const typeNode = spec.childForFieldName('type'); if (nameNode && typeNode) { if (typeNode.type === 'struct_type') { + const fields = extractStructFields(typeNode); definitions.push({ name: nameNode.text, kind: 'struct', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: fields.length > 0 ? fields : undefined, }); } else if (typeNode.type === 'interface_type') { definitions.push({ @@ -145,6 +151,23 @@ export function extractGoSymbols(tree, _filePath) { break; } + case 'const_declaration': { + for (let i = 0; i < node.childCount; i++) { + const spec = node.child(i); + if (!spec || spec.type !== 'const_spec') continue; + const constName = spec.childForFieldName('name'); + if (constName) { + definitions.push({ + name: constName.text, + kind: 'constant', + line: spec.startPosition.row + 1, + endLine: spec.endPosition.row + 1, + }); + } + } + break; + } + case 'call_expression': { const fn = node.childForFieldName('function'); if (fn) { @@ -170,3 +193,45 @@ export function extractGoSymbols(tree, _filePath) { walkGoNode(tree.rootNode); return { definitions, calls, imports, classes, exports }; } + +// ── Child extraction helpers ──────────────────────────────────────────────── + +function extractGoParameters(paramListNode) { + const params = []; + if (!paramListNode) return params; + for (let i = 0; i < paramListNode.childCount; i++) { + const param = paramListNode.child(i); + if (!param || param.type !== 'parameter_declaration') continue; + // A parameter_declaration may have multiple identifiers (e.g., `a, b int`) + for (let j = 0; j < param.childCount; j++) { + const child = param.child(j); + if (child && child.type === 'identifier') { + params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } + } + } + return params; +} + +function extractStructFields(structTypeNode) { + const fields = []; + const fieldList = findChild(structTypeNode, 'field_declaration_list'); + if (!fieldList) return fields; + for (let i = 0; i < fieldList.childCount; i++) { + const field = fieldList.child(i); + if (!field || field.type !== 'field_declaration') continue; + const nameNode = field.childForFieldName('name'); + if (nameNode) { + fields.push({ name: nameNode.text, kind: 'property', line: field.startPosition.row + 1 }); + } else { + // Struct fields may have multiple names or use first identifier child + for (let j = 0; j < field.childCount; j++) { + const child = field.child(j); + if (child && child.type === 'field_identifier') { + fields.push({ name: child.text, kind: 'property', line: field.startPosition.row + 1 }); + } + } + } + } + return fields; +} diff --git a/src/extractors/hcl.js b/src/extractors/hcl.js index 4df5af4d..aba022a5 100644 --- a/src/extractors/hcl.js +++ b/src/extractors/hcl.js @@ -36,11 +36,33 @@ export function extractHCLSymbols(tree, _filePath) { } if (name) { + // Extract attributes as property children for variable/output blocks + let blockChildren; + if (blockType === 'variable' || blockType === 'output') { + blockChildren = []; + const body = children.find((c) => c.type === 'body'); + if (body) { + for (let j = 0; j < body.childCount; j++) { + const attr = body.child(j); + if (attr && attr.type === 'attribute') { + const key = attr.childForFieldName('key') || attr.child(0); + if (key) { + blockChildren.push({ + name: key.text, + kind: 'property', + line: attr.startPosition.row + 1, + }); + } + } + } + } + } definitions.push({ name, kind: blockType, line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: blockChildren?.length > 0 ? blockChildren : undefined, }); } diff --git a/src/extractors/java.js b/src/extractors/java.js index 87f10d39..bfa24571 100644 --- a/src/extractors/java.js +++ b/src/extractors/java.js @@ -1,4 +1,4 @@ -import { nodeEndLine } from './helpers.js'; +import { findChild, nodeEndLine } from './helpers.js'; /** * Extract symbols from Java files. @@ -31,11 +31,13 @@ export function extractJavaSymbols(tree, _filePath) { case 'class_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const classChildren = extractClassFields(node); definitions.push({ name: nameNode.text, kind: 'class', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: classChildren.length > 0 ? classChildren : undefined, }); const superclass = node.childForFieldName('superclass'); @@ -139,11 +141,13 @@ export function extractJavaSymbols(tree, _filePath) { case 'enum_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const enumChildren = extractEnumConstants(node); definitions.push({ name: nameNode.text, kind: 'enum', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: enumChildren.length > 0 ? enumChildren : undefined, }); } break; @@ -154,11 +158,13 @@ export function extractJavaSymbols(tree, _filePath) { if (nameNode) { const parentClass = findJavaParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractJavaParameters(node.childForFieldName('parameters')); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -169,11 +175,13 @@ export function extractJavaSymbols(tree, _filePath) { if (nameNode) { const parentClass = findJavaParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractJavaParameters(node.childForFieldName('parameters')); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -228,3 +236,55 @@ export function extractJavaSymbols(tree, _filePath) { walkJavaNode(tree.rootNode); return { definitions, calls, imports, classes, exports }; } + +// ── Child extraction helpers ──────────────────────────────────────────────── + +function extractJavaParameters(paramListNode) { + const params = []; + if (!paramListNode) return params; + for (let i = 0; i < paramListNode.childCount; i++) { + const param = paramListNode.child(i); + if (!param) continue; + if (param.type === 'formal_parameter' || param.type === 'spread_parameter') { + const nameNode = param.childForFieldName('name'); + if (nameNode) { + params.push({ name: nameNode.text, kind: 'parameter', line: param.startPosition.row + 1 }); + } + } + } + return params; +} + +function extractClassFields(classNode) { + const fields = []; + const body = classNode.childForFieldName('body') || findChild(classNode, 'class_body'); + if (!body) return fields; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== 'field_declaration') continue; + for (let j = 0; j < member.childCount; j++) { + const child = member.child(j); + if (!child || child.type !== 'variable_declarator') continue; + const nameNode = child.childForFieldName('name'); + if (nameNode) { + fields.push({ name: nameNode.text, kind: 'property', line: member.startPosition.row + 1 }); + } + } + } + return fields; +} + +function extractEnumConstants(enumNode) { + const constants = []; + const body = enumNode.childForFieldName('body') || findChild(enumNode, 'enum_body'); + if (!body) return constants; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== 'enum_constant') continue; + const nameNode = member.childForFieldName('name'); + if (nameNode) { + constants.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 }); + } + } + return constants; +} diff --git a/src/extractors/javascript.js b/src/extractors/javascript.js index 57ba0392..c4a0d3bf 100644 --- a/src/extractors/javascript.js +++ b/src/extractors/javascript.js @@ -28,31 +28,37 @@ function extractSymbolsQuery(tree, query) { if (c.fn_node) { // function_declaration + const fnChildren = extractParameters(c.fn_node); definitions.push({ name: c.fn_name.text, kind: 'function', line: c.fn_node.startPosition.row + 1, endLine: nodeEndLine(c.fn_node), + children: fnChildren.length > 0 ? fnChildren : undefined, }); } else if (c.varfn_name) { // variable_declarator with arrow_function / function_expression const declNode = c.varfn_name.parent?.parent; const line = declNode ? declNode.startPosition.row + 1 : c.varfn_name.startPosition.row + 1; + const varFnChildren = extractParameters(c.varfn_value); definitions.push({ name: c.varfn_name.text, kind: 'function', line, endLine: nodeEndLine(c.varfn_value), + children: varFnChildren.length > 0 ? varFnChildren : undefined, }); } else if (c.cls_node) { // class_declaration const className = c.cls_name.text; const startLine = c.cls_node.startPosition.row + 1; + const clsChildren = extractClassProperties(c.cls_node); definitions.push({ name: className, kind: 'class', line: startLine, endLine: nodeEndLine(c.cls_node), + children: clsChildren.length > 0 ? clsChildren : undefined, }); const heritage = c.cls_node.childForFieldName('heritage') || findChild(c.cls_node, 'class_heritage'); @@ -69,11 +75,13 @@ function extractSymbolsQuery(tree, query) { const methName = c.meth_name.text; const parentClass = findParentClass(c.meth_node); const fullName = parentClass ? `${parentClass}.${methName}` : methName; + const methChildren = extractParameters(c.meth_node); definitions.push({ name: fullName, kind: 'method', line: c.meth_node.startPosition.row + 1, endLine: nodeEndLine(c.meth_node), + children: methChildren.length > 0 ? methChildren : undefined, }); } else if (c.iface_node) { // interface_declaration (TS/TSX only) @@ -231,11 +239,13 @@ function extractSymbolsWalk(tree) { case 'function_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const fnChildren = extractParameters(node); definitions.push({ name: nameNode.text, kind: 'function', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: fnChildren.length > 0 ? fnChildren : undefined, }); } break; @@ -246,11 +256,13 @@ function extractSymbolsWalk(tree) { if (nameNode) { const className = nameNode.text; const startLine = node.startPosition.row + 1; + const clsChildren = extractClassProperties(node); definitions.push({ name: className, kind: 'class', line: startLine, endLine: nodeEndLine(node), + children: clsChildren.length > 0 ? clsChildren : undefined, }); const heritage = node.childForFieldName('heritage') || findChild(node, 'class_heritage'); if (heritage) { @@ -272,11 +284,13 @@ function extractSymbolsWalk(tree) { if (nameNode) { const parentClass = findParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const methChildren = extractParameters(node); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: methChildren.length > 0 ? methChildren : undefined, }); } break; @@ -317,6 +331,7 @@ function extractSymbolsWalk(tree) { case 'lexical_declaration': case 'variable_declaration': { + const isConst = node.text.startsWith('const '); for (let i = 0; i < node.childCount; i++) { const declarator = node.child(i); if (declarator && declarator.type === 'variable_declarator') { @@ -329,15 +344,59 @@ function extractSymbolsWalk(tree) { valType === 'function_expression' || valType === 'function' ) { + const varFnChildren = extractParameters(valueN); definitions.push({ name: nameN.text, kind: 'function', line: node.startPosition.row + 1, endLine: nodeEndLine(valueN), + children: varFnChildren.length > 0 ? varFnChildren : undefined, }); + } else if (isConst && nameN.type === 'identifier' && isConstantValue(valueN)) { + definitions.push({ + name: nameN.text, + kind: 'constant', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + } else if (isConst && nameN && nameN.type === 'identifier' && !valueN) { + // const with no value (shouldn't happen but be safe) + } + } + } + break; + } + + case 'enum_declaration': { + // TypeScript enum + const nameNode = node.childForFieldName('name'); + if (nameNode) { + const enumChildren = []; + const body = node.childForFieldName('body') || findChild(node, 'enum_body'); + if (body) { + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member) continue; + if (member.type === 'enum_assignment' || member.type === 'property_identifier') { + const mName = member.childForFieldName('name') || member.child(0); + if (mName) { + enumChildren.push({ + name: mName.text, + kind: 'constant', + line: member.startPosition.row + 1, + }); + } } } } + definitions.push({ + name: nameNode.text, + kind: 'enum', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: enumChildren.length > 0 ? enumChildren : undefined, + }); } break; } @@ -471,6 +530,89 @@ function extractSymbolsWalk(tree) { return { definitions, calls, imports, classes, exports }; } +// ── Child extraction helpers ──────────────────────────────────────────────── + +function extractParameters(node) { + const params = []; + const paramsNode = node.childForFieldName('parameters') || findChild(node, 'formal_parameters'); + if (!paramsNode) return params; + for (let i = 0; i < paramsNode.childCount; i++) { + const child = paramsNode.child(i); + if (!child) continue; + const t = child.type; + if (t === 'identifier') { + params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } else if ( + t === 'required_parameter' || + t === 'optional_parameter' || + t === 'assignment_pattern' + ) { + const nameNode = + child.childForFieldName('pattern') || child.childForFieldName('left') || child.child(0); + if ( + nameNode && + (nameNode.type === 'identifier' || + nameNode.type === 'shorthand_property_identifier_pattern') + ) { + params.push({ name: nameNode.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } + } else if (t === 'rest_pattern' || t === 'rest_element') { + const nameNode = child.child(1) || child.childForFieldName('name'); + if (nameNode && nameNode.type === 'identifier') { + params.push({ name: nameNode.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } + } + } + return params; +} + +function extractClassProperties(classNode) { + const props = []; + const body = classNode.childForFieldName('body') || findChild(classNode, 'class_body'); + if (!body) return props; + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (!child) continue; + if ( + child.type === 'field_definition' || + child.type === 'public_field_definition' || + child.type === 'property_definition' + ) { + const nameNode = + child.childForFieldName('name') || child.childForFieldName('property') || child.child(0); + if ( + nameNode && + (nameNode.type === 'property_identifier' || + nameNode.type === 'identifier' || + nameNode.type === 'private_property_identifier') + ) { + props.push({ name: nameNode.text, kind: 'property', line: child.startPosition.row + 1 }); + } + } + } + return props; +} + +function isConstantValue(valueNode) { + if (!valueNode) return false; + const t = valueNode.type; + return ( + t === 'number' || + t === 'string' || + t === 'template_string' || + t === 'true' || + t === 'false' || + t === 'null' || + t === 'undefined' || + t === 'array' || + t === 'object' || + t === 'regex' || + t === 'unary_expression' || + t === 'binary_expression' || + t === 'new_expression' + ); +} + // ── Shared helpers ────────────────────────────────────────────────────────── function extractInterfaceMethods(bodyNode, interfaceName, definitions) { diff --git a/src/extractors/php.js b/src/extractors/php.js index 95b44570..d2b4f09d 100644 --- a/src/extractors/php.js +++ b/src/extractors/php.js @@ -1,5 +1,76 @@ import { findChild, nodeEndLine } from './helpers.js'; +function extractPhpParameters(fnNode) { + const params = []; + const paramsNode = + fnNode.childForFieldName('parameters') || findChild(fnNode, 'formal_parameters'); + if (!paramsNode) return params; + for (let i = 0; i < paramsNode.childCount; i++) { + const param = paramsNode.child(i); + if (!param) continue; + if (param.type === 'simple_parameter' || param.type === 'variadic_parameter') { + const nameNode = param.childForFieldName('name') || findChild(param, 'variable_name'); + if (nameNode) { + params.push({ name: nameNode.text, kind: 'parameter', line: param.startPosition.row + 1 }); + } + } + } + return params; +} + +function extractPhpClassChildren(classNode) { + const children = []; + const body = classNode.childForFieldName('body') || findChild(classNode, 'declaration_list'); + if (!body) return children; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member) continue; + if (member.type === 'property_declaration') { + for (let j = 0; j < member.childCount; j++) { + const el = member.child(j); + if (!el || el.type !== 'property_element') continue; + const varNode = findChild(el, 'variable_name'); + if (varNode) { + children.push({ + name: varNode.text, + kind: 'property', + line: member.startPosition.row + 1, + }); + } + } + } else if (member.type === 'const_declaration') { + for (let j = 0; j < member.childCount; j++) { + const el = member.child(j); + if (!el || el.type !== 'const_element') continue; + const nameNode = el.childForFieldName('name') || findChild(el, 'name'); + if (nameNode) { + children.push({ + name: nameNode.text, + kind: 'constant', + line: member.startPosition.row + 1, + }); + } + } + } + } + return children; +} + +function extractPhpEnumCases(enumNode) { + const children = []; + const body = enumNode.childForFieldName('body') || findChild(enumNode, 'enum_declaration_list'); + if (!body) return children; + for (let i = 0; i < body.childCount; i++) { + const member = body.child(i); + if (!member || member.type !== 'enum_case') continue; + const nameNode = member.childForFieldName('name'); + if (nameNode) { + children.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 }); + } + } + return children; +} + /** * Extract symbols from PHP files. */ @@ -31,11 +102,13 @@ export function extractPHPSymbols(tree, _filePath) { case 'function_definition': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const params = extractPhpParameters(node); definitions.push({ name: nameNode.text, kind: 'function', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -44,11 +117,13 @@ export function extractPHPSymbols(tree, _filePath) { case 'class_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const classChildren = extractPhpClassChildren(node); definitions.push({ name: nameNode.text, kind: 'class', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: classChildren.length > 0 ? classChildren : undefined, }); // Check base clause (extends) @@ -132,11 +207,13 @@ export function extractPHPSymbols(tree, _filePath) { case 'enum_declaration': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const enumChildren = extractPhpEnumCases(node); definitions.push({ name: nameNode.text, kind: 'enum', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: enumChildren.length > 0 ? enumChildren : undefined, }); } break; @@ -147,11 +224,13 @@ export function extractPHPSymbols(tree, _filePath) { if (nameNode) { const parentClass = findPHPParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractPhpParameters(node); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; diff --git a/src/extractors/python.js b/src/extractors/python.js index 832232f0..6542aab7 100644 --- a/src/extractors/python.js +++ b/src/extractors/python.js @@ -22,12 +22,14 @@ export function extractPythonSymbols(tree, _filePath) { const parentClass = findPythonParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; const kind = parentClass ? 'method' : 'function'; + const fnChildren = extractPythonParameters(node); definitions.push({ name: fullName, kind, line: node.startPosition.row + 1, endLine: nodeEndLine(node), decorators, + children: fnChildren.length > 0 ? fnChildren : undefined, }); } break; @@ -36,11 +38,13 @@ export function extractPythonSymbols(tree, _filePath) { case 'class_definition': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const clsChildren = extractPythonClassProperties(node); definitions.push({ name: nameNode.text, kind: 'class', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: clsChildren.length > 0 ? clsChildren : undefined, }); const superclasses = node.childForFieldName('superclasses') || findChild(node, 'argument_list'); @@ -108,6 +112,24 @@ export function extractPythonSymbols(tree, _filePath) { break; } + case 'expression_statement': { + // Module-level UPPER_CASE assignments → constants + if (node.parent && node.parent.type === 'module') { + const assignment = findChild(node, 'assignment'); + if (assignment) { + const left = assignment.childForFieldName('left'); + if (left && left.type === 'identifier' && /^[A-Z_][A-Z0-9_]*$/.test(left.text)) { + definitions.push({ + name: left.text, + kind: 'constant', + line: node.startPosition.row + 1, + }); + } + } + } + break; + } + case 'import_from_statement': { let source = ''; const names = []; @@ -133,6 +155,118 @@ export function extractPythonSymbols(tree, _filePath) { for (let i = 0; i < node.childCount; i++) walkPythonNode(node.child(i)); } + function extractPythonParameters(fnNode) { + const params = []; + const paramsNode = fnNode.childForFieldName('parameters') || findChild(fnNode, 'parameters'); + if (!paramsNode) return params; + for (let i = 0; i < paramsNode.childCount; i++) { + const child = paramsNode.child(i); + if (!child) continue; + const t = child.type; + if (t === 'identifier') { + params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } else if ( + t === 'typed_parameter' || + t === 'default_parameter' || + t === 'typed_default_parameter' + ) { + const nameNode = child.childForFieldName('name') || child.child(0); + if (nameNode && nameNode.type === 'identifier') { + params.push({ + name: nameNode.text, + kind: 'parameter', + line: child.startPosition.row + 1, + }); + } + } else if (t === 'list_splat_pattern' || t === 'dictionary_splat_pattern') { + // *args, **kwargs + for (let j = 0; j < child.childCount; j++) { + const inner = child.child(j); + if (inner && inner.type === 'identifier') { + params.push({ name: inner.text, kind: 'parameter', line: child.startPosition.row + 1 }); + break; + } + } + } + } + return params; + } + + function extractPythonClassProperties(classNode) { + const props = []; + const seen = new Set(); + const body = classNode.childForFieldName('body') || findChild(classNode, 'block'); + if (!body) return props; + + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (!child) continue; + + // Direct class attribute assignments: x = 5 + if (child.type === 'expression_statement') { + const assignment = findChild(child, 'assignment'); + if (assignment) { + const left = assignment.childForFieldName('left'); + if (left && left.type === 'identifier' && !seen.has(left.text)) { + seen.add(left.text); + props.push({ name: left.text, kind: 'property', line: child.startPosition.row + 1 }); + } + } + } + + // __init__ method: self.x = ... assignments + if (child.type === 'function_definition') { + const fnName = child.childForFieldName('name'); + if (fnName && fnName.text === '__init__') { + const initBody = child.childForFieldName('body') || findChild(child, 'block'); + if (initBody) { + walkInitBody(initBody, seen, props); + } + } + } + + // decorated __init__ + if (child.type === 'decorated_definition') { + for (let j = 0; j < child.childCount; j++) { + const inner = child.child(j); + if (inner && inner.type === 'function_definition') { + const fnName = inner.childForFieldName('name'); + if (fnName && fnName.text === '__init__') { + const initBody = inner.childForFieldName('body') || findChild(inner, 'block'); + if (initBody) { + walkInitBody(initBody, seen, props); + } + } + } + } + } + } + return props; + } + + function walkInitBody(bodyNode, seen, props) { + for (let i = 0; i < bodyNode.childCount; i++) { + const stmt = bodyNode.child(i); + if (!stmt || stmt.type !== 'expression_statement') continue; + const assignment = findChild(stmt, 'assignment'); + if (!assignment) continue; + const left = assignment.childForFieldName('left'); + if (!left || left.type !== 'attribute') continue; + const obj = left.childForFieldName('object'); + const attr = left.childForFieldName('attribute'); + if ( + obj && + obj.text === 'self' && + attr && + attr.type === 'identifier' && + !seen.has(attr.text) + ) { + seen.add(attr.text); + props.push({ name: attr.text, kind: 'property', line: stmt.startPosition.row + 1 }); + } + } + } + function findPythonParentClass(node) { let current = node.parent; while (current) { diff --git a/src/extractors/ruby.js b/src/extractors/ruby.js index 73b3f0d4..400d410d 100644 --- a/src/extractors/ruby.js +++ b/src/extractors/ruby.js @@ -31,11 +31,13 @@ export function extractRubySymbols(tree, _filePath) { case 'class': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const classChildren = extractRubyClassChildren(node); definitions.push({ name: nameNode.text, kind: 'class', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: classChildren.length > 0 ? classChildren : undefined, }); const superclass = node.childForFieldName('superclass'); if (superclass) { @@ -73,11 +75,13 @@ export function extractRubySymbols(tree, _filePath) { case 'module': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const moduleChildren = extractRubyBodyConstants(node); definitions.push({ name: nameNode.text, kind: 'module', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: moduleChildren.length > 0 ? moduleChildren : undefined, }); } break; @@ -88,11 +92,13 @@ export function extractRubySymbols(tree, _filePath) { if (nameNode) { const parentClass = findRubyParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractRubyParameters(node); definitions.push({ name: fullName, kind: 'method', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -103,16 +109,34 @@ export function extractRubySymbols(tree, _filePath) { if (nameNode) { const parentClass = findRubyParentClass(node); const fullName = parentClass ? `${parentClass}.${nameNode.text}` : nameNode.text; + const params = extractRubyParameters(node); definitions.push({ name: fullName, kind: 'function', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; } + case 'assignment': { + // Top-level constant assignments (parent is program) + if (node.parent && node.parent.type === 'program') { + const left = node.childForFieldName('left'); + if (left && left.type === 'constant') { + definitions.push({ + name: left.text, + kind: 'constant', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + } + break; + } + case 'call': { const methodNode = node.childForFieldName('method'); if (methodNode) { @@ -186,3 +210,68 @@ export function extractRubySymbols(tree, _filePath) { walkRubyNode(tree.rootNode); return { definitions, calls, imports, classes, exports }; } + +// ── Child extraction helpers ──────────────────────────────────────────────── + +const RUBY_PARAM_TYPES = new Set([ + 'identifier', + 'optional_parameter', + 'splat_parameter', + 'hash_splat_parameter', + 'block_parameter', + 'keyword_parameter', +]); + +function extractRubyParameters(methodNode) { + const params = []; + const paramList = + methodNode.childForFieldName('parameters') || findChild(methodNode, 'method_parameters'); + if (!paramList) return params; + for (let i = 0; i < paramList.childCount; i++) { + const param = paramList.child(i); + if (!param || !RUBY_PARAM_TYPES.has(param.type)) continue; + let name; + if (param.type === 'identifier') { + name = param.text; + } else { + // Compound parameter types have an identifier child for the name + const id = findChild(param, 'identifier'); + name = id ? id.text : param.text; + } + params.push({ name, kind: 'parameter', line: param.startPosition.row + 1 }); + } + return params; +} + +function extractRubyBodyConstants(containerNode) { + const children = []; + const body = containerNode.childForFieldName('body') || findChild(containerNode, 'body'); + if (!body) return children; + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (!child || child.type !== 'assignment') continue; + const left = child.childForFieldName('left'); + if (left && left.type === 'constant') { + children.push({ name: left.text, kind: 'constant', line: child.startPosition.row + 1 }); + } + } + return children; +} + +function extractRubyClassChildren(classNode) { + const children = []; + const body = classNode.childForFieldName('body') || findChild(classNode, 'body'); + if (!body) return children; + for (let i = 0; i < body.childCount; i++) { + const child = body.child(i); + if (!child || child.type !== 'assignment') continue; + const left = child.childForFieldName('left'); + if (!left) continue; + if (left.type === 'instance_variable') { + children.push({ name: left.text, kind: 'property', line: child.startPosition.row + 1 }); + } else if (left.type === 'constant') { + children.push({ name: left.text, kind: 'constant', line: child.startPosition.row + 1 }); + } + } + return children; +} diff --git a/src/extractors/rust.js b/src/extractors/rust.js index 5a8d6225..2a013481 100644 --- a/src/extractors/rust.js +++ b/src/extractors/rust.js @@ -1,4 +1,4 @@ -import { nodeEndLine } from './helpers.js'; +import { findChild, nodeEndLine } from './helpers.js'; /** * Extract symbols from Rust files. @@ -30,11 +30,13 @@ export function extractRustSymbols(tree, _filePath) { const implType = findCurrentImpl(node); const fullName = implType ? `${implType}.${nameNode.text}` : nameNode.text; const kind = implType ? 'method' : 'function'; + const params = extractRustParameters(node.childForFieldName('parameters')); definitions.push({ name: fullName, kind, line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, }); } break; @@ -43,11 +45,13 @@ export function extractRustSymbols(tree, _filePath) { case 'struct_item': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const fields = extractStructFields(node); definitions.push({ name: nameNode.text, kind: 'struct', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: fields.length > 0 ? fields : undefined, }); } break; @@ -56,11 +60,26 @@ export function extractRustSymbols(tree, _filePath) { case 'enum_item': { const nameNode = node.childForFieldName('name'); if (nameNode) { + const variants = extractEnumVariants(node); definitions.push({ name: nameNode.text, kind: 'enum', line: node.startPosition.row + 1, endLine: nodeEndLine(node), + children: variants.length > 0 ? variants : undefined, + }); + } + break; + } + + case 'const_item': { + const nameNode = node.childForFieldName('name'); + if (nameNode) { + definitions.push({ + name: nameNode.text, + kind: 'constant', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), }); } break; @@ -170,6 +189,57 @@ export function extractRustSymbols(tree, _filePath) { return { definitions, calls, imports, classes, exports }; } +// ── Child extraction helpers ──────────────────────────────────────────────── + +function extractRustParameters(paramListNode) { + const params = []; + if (!paramListNode) return params; + for (let i = 0; i < paramListNode.childCount; i++) { + const param = paramListNode.child(i); + if (!param) continue; + if (param.type === 'self_parameter') { + params.push({ name: 'self', kind: 'parameter', line: param.startPosition.row + 1 }); + } else if (param.type === 'parameter') { + const pattern = param.childForFieldName('pattern'); + if (pattern) { + params.push({ name: pattern.text, kind: 'parameter', line: param.startPosition.row + 1 }); + } + } + } + return params; +} + +function extractStructFields(structNode) { + const fields = []; + const fieldList = + structNode.childForFieldName('body') || findChild(structNode, 'field_declaration_list'); + if (!fieldList) return fields; + for (let i = 0; i < fieldList.childCount; i++) { + const field = fieldList.child(i); + if (!field || field.type !== 'field_declaration') continue; + const nameNode = field.childForFieldName('name'); + if (nameNode) { + fields.push({ name: nameNode.text, kind: 'property', line: field.startPosition.row + 1 }); + } + } + return fields; +} + +function extractEnumVariants(enumNode) { + const variants = []; + const body = enumNode.childForFieldName('body') || findChild(enumNode, 'enum_variant_list'); + if (!body) return variants; + for (let i = 0; i < body.childCount; i++) { + const variant = body.child(i); + if (!variant || variant.type !== 'enum_variant') continue; + const nameNode = variant.childForFieldName('name'); + if (nameNode) { + variants.push({ name: nameNode.text, kind: 'constant', line: variant.startPosition.row + 1 }); + } + } + return variants; +} + function extractRustUsePath(node) { if (!node) return []; diff --git a/src/index.js b/src/index.js index ea76dacc..27c88762 100644 --- a/src/index.js +++ b/src/index.js @@ -114,9 +114,15 @@ export { getActiveEngine, parseFileAuto, parseFilesAuto } from './parser.js'; // Query functions (data-returning) export { ALL_SYMBOL_KINDS, + CORE_EDGE_KINDS, + CORE_SYMBOL_KINDS, + childrenData, contextData, diffImpactData, diffImpactMermaid, + EVERY_EDGE_KIND, + EVERY_SYMBOL_KIND, + EXTENDED_SYMBOL_KINDS, explainData, exportsData, FALSE_POSITIVE_CALLER_THRESHOLD, @@ -135,6 +141,7 @@ export { pathData, queryNameData, rolesData, + STRUCTURAL_EDGE_KINDS, statsData, VALID_ROLES, whereData, diff --git a/src/mcp.js b/src/mcp.js index 78a20c6b..d48aefec 100644 --- a/src/mcp.js +++ b/src/mcp.js @@ -9,7 +9,7 @@ import { createRequire } from 'node:module'; import { findCycles } from './cycles.js'; import { findDbPath } from './db.js'; import { MCP_DEFAULTS, MCP_MAX_LIMIT } from './paginate.js'; -import { ALL_SYMBOL_KINDS, diffImpactMermaid, VALID_ROLES } from './queries.js'; +import { diffImpactMermaid, EVERY_EDGE_KIND, EVERY_SYMBOL_KIND, VALID_ROLES } from './queries.js'; const REPO_PROP = { repo: { @@ -47,13 +47,13 @@ const BASE_TOOLS = [ }, kind: { type: 'string', - enum: ALL_SYMBOL_KINDS, + enum: EVERY_SYMBOL_KIND, description: 'Filter by symbol kind', }, to: { type: 'string', description: 'Target symbol for path mode (required in path mode)' }, edge_kinds: { type: 'array', - items: { type: 'string' }, + items: { type: 'string', enum: EVERY_EDGE_KIND }, description: 'Edge kinds to follow in path mode (default: ["calls"])', }, reverse: { @@ -143,7 +143,7 @@ const BASE_TOOLS = [ }, kind: { type: 'string', - enum: ALL_SYMBOL_KINDS, + enum: EVERY_SYMBOL_KIND, description: 'Filter to a specific symbol kind', }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, @@ -171,7 +171,7 @@ const BASE_TOOLS = [ }, kind: { type: 'string', - enum: ALL_SYMBOL_KINDS, + enum: EVERY_SYMBOL_KIND, description: 'Filter to a specific symbol kind', }, no_source: { @@ -190,6 +190,22 @@ const BASE_TOOLS = [ required: ['name'], }, }, + { + name: 'symbol_children', + description: + 'List sub-declaration children of a symbol: parameters, properties, constants. Answers "what fields does this class have?" without reading source.', + inputSchema: { + type: 'object', + properties: { + name: { type: 'string', description: 'Function/method/class name (partial match)' }, + file: { type: 'string', description: 'Scope to file (partial match)' }, + kind: { type: 'string', enum: EVERY_SYMBOL_KIND, description: 'Filter by symbol kind' }, + no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + ...PAGINATION_PROPS, + }, + required: ['name'], + }, + }, { name: 'explain', description: @@ -409,7 +425,7 @@ const BASE_TOOLS = [ }, kind: { type: 'string', - enum: ALL_SYMBOL_KINDS, + enum: EVERY_SYMBOL_KIND, description: 'Filter to a specific symbol kind', }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, @@ -575,7 +591,7 @@ const BASE_TOOLS = [ }, kind: { type: 'string', - enum: ALL_SYMBOL_KINDS, + enum: EVERY_SYMBOL_KIND, description: 'Filter symbol kind', }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, @@ -654,7 +670,7 @@ const BASE_TOOLS = [ }, depth: { type: 'number', description: 'Max depth for impact mode', default: 5 }, file: { type: 'string', description: 'Scope to file (partial match)' }, - kind: { type: 'string', enum: ALL_SYMBOL_KINDS, description: 'Filter by symbol kind' }, + kind: { type: 'string', enum: EVERY_SYMBOL_KIND, description: 'Filter by symbol kind' }, no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, ...PAGINATION_PROPS, }, @@ -755,6 +771,7 @@ export async function startMCPServer(customDbPath, options = {}) { fnImpactData, pathData, contextData, + childrenData, exportsData, explainData, whereData, @@ -887,6 +904,15 @@ export async function startMCPServer(customDbPath, options = {}) { offset: args.offset ?? 0, }); break; + case 'symbol_children': + result = childrenData(args.name, dbPath, { + file: args.file, + kind: args.kind, + noTests: args.no_tests, + limit: Math.min(args.limit ?? MCP_DEFAULTS.context, MCP_MAX_LIMIT), + offset: args.offset ?? 0, + }); + break; case 'explain': result = explainData(args.target, dbPath, { noTests: args.no_tests, diff --git a/src/parser.js b/src/parser.js index f70e67c2..54eb0820 100644 --- a/src/parser.js +++ b/src/parser.js @@ -142,6 +142,14 @@ function normalizeNativeSymbols(result) { maintainabilityIndex: d.complexity.maintainabilityIndex ?? null, } : null, + children: d.children?.length + ? d.children.map((c) => ({ + name: c.name, + kind: c.kind, + line: c.line, + endLine: c.endLine ?? c.end_line ?? null, + })) + : undefined, })), calls: (result.calls || []).map((c) => ({ name: c.name, diff --git a/src/queries.js b/src/queries.js index 7fb28d9c..98632618 100644 --- a/src/queries.js +++ b/src/queries.js @@ -59,7 +59,9 @@ export const FALSE_POSITIVE_NAMES = new Set([ export const FALSE_POSITIVE_CALLER_THRESHOLD = 20; const FUNCTION_KINDS = ['function', 'method', 'class']; -export const ALL_SYMBOL_KINDS = [ + +// Original 10 kinds — used as default query scope +export const CORE_SYMBOL_KINDS = [ 'function', 'method', 'class', @@ -72,6 +74,39 @@ export const ALL_SYMBOL_KINDS = [ 'module', ]; +// Sub-declaration kinds (Phase 1) +export const EXTENDED_SYMBOL_KINDS = [ + 'parameter', + 'property', + 'constant', + // Phase 2 (reserved, not yet extracted): + // 'constructor', 'namespace', 'decorator', 'getter', 'setter', +]; + +// Full set for --kind validation and MCP enum +export const EVERY_SYMBOL_KIND = [...CORE_SYMBOL_KINDS, ...EXTENDED_SYMBOL_KINDS]; + +// Backward compat: ALL_SYMBOL_KINDS stays as the core 10 +export const ALL_SYMBOL_KINDS = CORE_SYMBOL_KINDS; + +// ── Edge kind constants ───────────────────────────────────────────── +// Core edge kinds — coupling and dependency relationships +export const CORE_EDGE_KINDS = [ + 'imports', + 'imports-type', + 'reexports', + 'calls', + 'extends', + 'implements', + 'contains', +]; + +// Structural edge kinds — parent/child and type relationships +export const STRUCTURAL_EDGE_KINDS = ['parameter_of', 'receiver']; + +// Full set for MCP enum and validation +export const EVERY_EDGE_KIND = [...CORE_EDGE_KINDS, ...STRUCTURAL_EDGE_KINDS]; + export const VALID_ROLES = ['entry', 'core', 'utility', 'adapter', 'dead', 'leaf']; /** @@ -190,6 +225,12 @@ export function kindIcon(kind) { return 'I'; case 'type': return 'T'; + case 'parameter': + return 'p'; + case 'property': + return '.'; + case 'constant': + return 'C'; default: return '-'; } @@ -325,12 +366,12 @@ export function moduleMapData(customDbPath, limit = 20, opts = {}) { const nodes = db .prepare(` SELECT n.*, - (SELECT COUNT(*) FROM edges WHERE source_id = n.id AND kind != 'contains') as out_edges, - (SELECT COUNT(*) FROM edges WHERE target_id = n.id AND kind != 'contains') as in_edges + (SELECT COUNT(*) FROM edges WHERE source_id = n.id AND kind NOT IN ('contains', 'parameter_of', 'receiver')) as out_edges, + (SELECT COUNT(*) FROM edges WHERE target_id = n.id AND kind NOT IN ('contains', 'parameter_of', 'receiver')) as in_edges FROM nodes n WHERE n.kind = 'file' ${testFilter} - ORDER BY (SELECT COUNT(*) FROM edges WHERE target_id = n.id AND kind != 'contains') DESC + ORDER BY (SELECT COUNT(*) FROM edges WHERE target_id = n.id AND kind NOT IN ('contains', 'parameter_of', 'receiver')) DESC LIMIT ? `) .all(limit); @@ -2224,6 +2265,17 @@ export function contextData(name, customDbPath, opts = {}) { /* table may not exist */ } + // Children (parameters, properties, constants) + let nodeChildren = []; + try { + nodeChildren = db + .prepare('SELECT name, kind, line, end_line FROM nodes WHERE parent_id = ? ORDER BY line') + .all(node.id) + .map((c) => ({ name: c.name, kind: c.kind, line: c.line, endLine: c.end_line || null })); + } catch { + /* parent_id column may not exist */ + } + return { name: node.name, kind: node.kind, @@ -2234,6 +2286,7 @@ export function contextData(name, customDbPath, opts = {}) { source, signature, complexity: complexityMetrics, + children: nodeChildren.length > 0 ? nodeChildren : undefined, callees, callers, relatedTests, @@ -2273,6 +2326,15 @@ export function context(name, customDbPath, opts = {}) { console.log(); } + // Children + if (r.children && r.children.length > 0) { + console.log(`## Children (${r.children.length})`); + for (const c of r.children) { + console.log(` ${kindIcon(c.kind)} ${c.name} :${c.line}`); + } + console.log(); + } + // Complexity if (r.complexity) { const cx = r.complexity; @@ -2345,6 +2407,69 @@ export function context(name, customDbPath, opts = {}) { } } +// ─── childrenData ─────────────────────────────────────────────────────── + +export function childrenData(name, customDbPath, opts = {}) { + const db = openReadonlyOrFail(customDbPath); + const noTests = opts.noTests || false; + + const nodes = findMatchingNodes(db, name, { noTests, file: opts.file, kind: opts.kind }); + if (nodes.length === 0) { + db.close(); + return { name, results: [] }; + } + + const results = nodes.map((node) => { + let children; + try { + children = db + .prepare('SELECT name, kind, line, end_line FROM nodes WHERE parent_id = ? ORDER BY line') + .all(node.id); + } catch { + children = []; + } + if (noTests) children = children.filter((c) => !isTestFile(c.file || node.file)); + return { + name: node.name, + kind: node.kind, + file: node.file, + line: node.line, + children: children.map((c) => ({ + name: c.name, + kind: c.kind, + line: c.line, + endLine: c.end_line || null, + })), + }; + }); + + db.close(); + const base = { name, results }; + return paginateResult(base, 'results', { limit: opts.limit, offset: opts.offset }); +} + +export function children(name, customDbPath, opts = {}) { + const data = childrenData(name, customDbPath, opts); + if (opts.json) { + console.log(JSON.stringify(data, null, 2)); + return; + } + if (data.results.length === 0) { + console.log(`No symbol matching "${name}"`); + return; + } + for (const r of data.results) { + console.log(`\n${kindIcon(r.kind)} ${r.name} ${r.file}:${r.line}`); + if (r.children.length === 0) { + console.log(' (no children)'); + } else { + for (const c of r.children) { + console.log(` ${kindIcon(c.kind)} ${c.name} :${c.line}`); + } + } + } +} + // ─── explainData ──────────────────────────────────────────────────────── function isFileLikeTarget(target) { diff --git a/src/structure.js b/src/structure.js index a4c28f41..6169795d 100644 --- a/src/structure.js +++ b/src/structure.js @@ -34,8 +34,11 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director `); // Clean previous directory nodes/edges (idempotent rebuild) + // Scope contains-edge delete to directory-sourced edges only, + // preserving symbol-level contains edges (file→def, class→method, etc.) db.exec(` - DELETE FROM edges WHERE kind = 'contains'; + DELETE FROM edges WHERE kind = 'contains' + AND source_id IN (SELECT id FROM nodes WHERE kind = 'directory'); DELETE FROM node_metrics; DELETE FROM nodes WHERE kind = 'directory'; `); diff --git a/tests/integration/build-parity.test.js b/tests/integration/build-parity.test.js index 94097e7f..7811f6df 100644 --- a/tests/integration/build-parity.test.js +++ b/tests/integration/build-parity.test.js @@ -76,14 +76,38 @@ describeOrSkip('Build parity: native vs WASM', () => { }); it('produces identical nodes', () => { + // Filter out extended kinds (parameter, property, constant) — WASM extracts + // these as children but native engine defers child extraction for now. + const EXTENDED = new Set(['parameter', 'property', 'constant']); + const filterCore = (nodes) => nodes.filter((n) => !EXTENDED.has(n.kind)); + const wasmGraph = readGraph(path.join(wasmDir, '.codegraph', 'graph.db')); const nativeGraph = readGraph(path.join(nativeDir, '.codegraph', 'graph.db')); - expect(nativeGraph.nodes).toEqual(wasmGraph.nodes); + expect(filterCore(nativeGraph.nodes)).toEqual(filterCore(wasmGraph.nodes)); }); it('produces identical edges', () => { - const wasmGraph = readGraph(path.join(wasmDir, '.codegraph', 'graph.db')); - const nativeGraph = readGraph(path.join(nativeDir, '.codegraph', 'graph.db')); - expect(nativeGraph.edges).toEqual(wasmGraph.edges); + // Filter out edges involving extended-kind nodes (parameter, property, constant) + // — WASM extracts children but native engine defers child extraction for now. + function readCoreEdges(dbPath) { + const db = new Database(dbPath, { readonly: true }); + const edges = db + .prepare(` + SELECT n1.name AS source_name, n2.name AS target_name, e.kind + FROM edges e + JOIN nodes n1 ON e.source_id = n1.id + JOIN nodes n2 ON e.target_id = n2.id + WHERE n1.kind NOT IN ('parameter', 'property', 'constant') + AND n2.kind NOT IN ('parameter', 'property', 'constant') + ORDER BY n1.name, n2.name, e.kind + `) + .all(); + db.close(); + return edges; + } + + const wasmEdges = readCoreEdges(path.join(wasmDir, '.codegraph', 'graph.db')); + const nativeEdges = readCoreEdges(path.join(nativeDir, '.codegraph', 'graph.db')); + expect(nativeEdges).toEqual(wasmEdges); }); }); diff --git a/tests/integration/queries.test.js b/tests/integration/queries.test.js index e991991c..98b9e380 100644 --- a/tests/integration/queries.test.js +++ b/tests/integration/queries.test.js @@ -104,6 +104,24 @@ beforeAll(() => { // Low-confidence call edge for quality tests insertEdge(db, formatResponse, validateToken, 'calls', 0.3); + // ── Phase 2: expanded node/edge types ────────────────────────────── + // Class with method and property children + const userService = insertNode(db, 'UserService', 'class', 'auth.js', 40); + const getUser = insertNode(db, 'UserService.getUser', 'method', 'auth.js', 42); + const dbConn = insertNode(db, 'dbConn', 'property', 'auth.js', 41); + const userId = insertNode(db, 'userId', 'parameter', 'auth.js', 10); + + // Symbol-level contains edges (file → class, class → method/property) + insertEdge(db, fAuth, userService, 'contains'); + insertEdge(db, userService, getUser, 'contains'); + insertEdge(db, userService, dbConn, 'contains'); + + // parameter_of edge (parameter → owning function) + insertEdge(db, userId, authenticate, 'parameter_of'); + + // receiver edge (caller → receiver type) + insertEdge(db, handleRoute, userService, 'receiver', 0.7); + // File hashes (for fileHash exposure) for (const f of ['auth.js', 'middleware.js', 'routes.js', 'utils.js', 'auth.test.js']) { db.prepare('INSERT INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)').run( @@ -449,7 +467,7 @@ describe('explainData', () => { const r = data.results[0]; expect(r.file).toBe('auth.js'); - expect(r.symbolCount).toBe(2); + expect(r.symbolCount).toBe(6); // Both authenticate and validateToken are called from middleware.js expect(r.publicApi.map((s) => s.name)).toContain('authenticate'); expect(r.publicApi.map((s) => s.name)).toContain('validateToken'); @@ -662,6 +680,73 @@ describe('noTests filtering', () => { }); }); +// ─── Expanded edge types (Phase 2) ───────────────────────────────────── + +describe('expanded edge types', () => { + test('statsData counts new edge kinds', () => { + const data = statsData(dbPath); + expect(data.edges.byKind.contains).toBeGreaterThanOrEqual(3); + expect(data.edges.byKind.parameter_of).toBeGreaterThanOrEqual(1); + expect(data.edges.byKind.receiver).toBeGreaterThanOrEqual(1); + }); + + test('moduleMapData excludes structural edges from coupling', () => { + const data = moduleMapData(dbPath); + // auth.js has contains, parameter_of, receiver edges but they should + // not inflate coupling counts — only imports/calls/etc. count + const authNode = data.topNodes.find((n) => n.file === 'auth.js'); + expect(authNode).toBeDefined(); + // in_edges should not include contains/parameter_of/receiver + // auth.js is imported by middleware.js and auth.test.js → in_edges = 2 + expect(authNode.inEdges).toBe(2); + }); + + test('queryNameData returns new edge kinds in callers/callees', () => { + // authenticate has a parameter_of edge from userId + const authData = queryNameData('authenticate', dbPath); + const fn = authData.results.find((r) => r.kind === 'function' && r.name === 'authenticate'); + expect(fn).toBeDefined(); + const paramCaller = fn.callers.find((c) => c.edgeKind === 'parameter_of'); + expect(paramCaller).toBeDefined(); + expect(paramCaller.name).toBe('userId'); + + // UserService has contains callees (method and property) + const usData = queryNameData('UserService', dbPath); + const cls = usData.results.find((r) => r.kind === 'class' && r.name === 'UserService'); + expect(cls).toBeDefined(); + const containsCallees = cls.callees.filter((c) => c.edgeKind === 'contains'); + expect(containsCallees.length).toBeGreaterThanOrEqual(2); + const names = containsCallees.map((c) => c.name); + expect(names).toContain('UserService.getUser'); + expect(names).toContain('dbConn'); + + // UserService has a receiver caller (handleRoute) + const receiverCaller = cls.callers.find((c) => c.edgeKind === 'receiver'); + expect(receiverCaller).toBeDefined(); + expect(receiverCaller.name).toBe('handleRoute'); + }); + + test('pathData traverses contains edges', () => { + const data = pathData('UserService', 'UserService.getUser', dbPath, { + edgeKinds: ['contains'], + }); + expect(data.found).toBe(true); + expect(data.hops).toBe(1); + expect(data.path[0].name).toBe('UserService'); + expect(data.path[1].name).toBe('UserService.getUser'); + expect(data.path[1].edgeKind).toBe('contains'); + }); + + test('pathData traverses receiver edges', () => { + const data = pathData('handleRoute', 'UserService', dbPath, { + edgeKinds: ['receiver'], + }); + expect(data.found).toBe(true); + expect(data.hops).toBe(1); + expect(data.path[1].edgeKind).toBe('receiver'); + }); +}); + // ─── Stable symbol schema conformance ────────────────────────────────── const STABLE_FIELDS = ['name', 'kind', 'file', 'line', 'endLine', 'role', 'fileHash']; diff --git a/tests/parsers/csharp.test.js b/tests/parsers/csharp.test.js index f49913d2..e8031262 100644 --- a/tests/parsers/csharp.test.js +++ b/tests/parsers/csharp.test.js @@ -108,7 +108,7 @@ public class Foo {}`); public string Name { get; set; } }`); expect(symbols.definitions).toContainEqual( - expect.objectContaining({ name: 'User.Name', kind: 'method' }), + expect.objectContaining({ name: 'User.Name', kind: 'property' }), ); }); }); diff --git a/tests/parsers/extended-kinds.test.js b/tests/parsers/extended-kinds.test.js new file mode 100644 index 00000000..266ac44a --- /dev/null +++ b/tests/parsers/extended-kinds.test.js @@ -0,0 +1,504 @@ +/** + * Extended kind extraction tests (parameters, properties, constants). + * + * Validates that each language extractor populates the `children` array + * on definitions with parameter, property, and constant entries. + */ +import { beforeAll, describe, expect, it } from 'vitest'; +import { + createParsers, + extractCSharpSymbols, + extractGoSymbols, + extractJavaSymbols, + extractPHPSymbols, + extractPythonSymbols, + extractRubySymbols, + extractRustSymbols, + extractSymbols, +} from '../../src/parser.js'; + +// ── JavaScript ────────────────────────────────────────────────────────────── + +describe('JavaScript extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseJS(code) { + const parser = parsers.get('javascript'); + const tree = parser.parse(code); + return extractSymbols(tree, 'test.js'); + } + + describe('parameter extraction', () => { + it('extracts parameters from function declarations', () => { + const symbols = parseJS('function greet(name, age) { }'); + const greet = symbols.definitions.find((d) => d.name === 'greet'); + expect(greet).toBeDefined(); + expect(greet.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'name', kind: 'parameter' }), + expect.objectContaining({ name: 'age', kind: 'parameter' }), + ]), + ); + }); + + it('extracts parameters from arrow functions', () => { + const symbols = parseJS('const add = (a, b) => a + b;'); + const add = symbols.definitions.find((d) => d.name === 'add'); + expect(add).toBeDefined(); + expect(add.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'a', kind: 'parameter' }), + expect.objectContaining({ name: 'b', kind: 'parameter' }), + ]), + ); + }); + + it('extracts parameters from class methods', () => { + const symbols = parseJS('class Foo { bar(x, y) {} }'); + const bar = symbols.definitions.find((d) => d.name === 'Foo.bar'); + expect(bar).toBeDefined(); + expect(bar.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'x', kind: 'parameter' }), + expect.objectContaining({ name: 'y', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts class field properties', () => { + const symbols = parseJS('class User { name; age; greet() {} }'); + const user = symbols.definitions.find((d) => d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'name', kind: 'property' }), + expect.objectContaining({ name: 'age', kind: 'property' }), + ]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts constant definitions from const declarations', () => { + const symbols = parseJS('const MAX = 100;'); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'MAX', kind: 'constant' }), + ); + }); + }); +}); + +// ── Python ────────────────────────────────────────────────────────────────── + +describe('Python extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parsePython(code) { + const parser = parsers.get('python'); + if (!parser) throw new Error('Python parser not available'); + const tree = parser.parse(code); + return extractPythonSymbols(tree, 'test.py'); + } + + describe('parameter extraction', () => { + it('extracts parameters from function definitions', () => { + const symbols = parsePython('def greet(name, age=30):\n pass'); + const greet = symbols.definitions.find((d) => d.name === 'greet'); + expect(greet).toBeDefined(); + expect(greet.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'name', kind: 'parameter' }), + expect.objectContaining({ name: 'age', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts properties from __init__ self assignments', () => { + const symbols = parsePython( + ['class User:', ' def __init__(self, x, y):', ' self.x = x', ' self.y = y'].join( + '\n', + ), + ); + const user = symbols.definitions.find((d) => d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'x', kind: 'property' }), + expect.objectContaining({ name: 'y', kind: 'property' }), + ]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts module-level UPPER_CASE constants', () => { + const symbols = parsePython('MAX_RETRIES = 3'); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'MAX_RETRIES', kind: 'constant' }), + ); + }); + }); +}); + +// ── Go ────────────────────────────────────────────────────────────────────── + +describe('Go extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseGo(code) { + const parser = parsers.get('go'); + if (!parser) throw new Error('Go parser not available'); + const tree = parser.parse(code); + return extractGoSymbols(tree, 'test.go'); + } + + describe('parameter extraction', () => { + it('extracts parameters from function declarations', () => { + const symbols = parseGo('package main\nfunc add(a int, b int) int { return a + b }'); + const add = symbols.definitions.find((d) => d.name === 'add'); + expect(add).toBeDefined(); + expect(add.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'a', kind: 'parameter' }), + expect.objectContaining({ name: 'b', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts struct fields as properties', () => { + const symbols = parseGo('package main\ntype User struct {\n Name string\n Age int\n}'); + const user = symbols.definitions.find((d) => d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'Name', kind: 'property' }), + expect.objectContaining({ name: 'Age', kind: 'property' }), + ]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts const declarations', () => { + const symbols = parseGo('package main\nconst MaxRetries = 3'); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'MaxRetries', kind: 'constant' }), + ); + }); + }); +}); + +// ── Rust ───────────────────────────────────────────────────────────────────── + +describe('Rust extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseRust(code) { + const parser = parsers.get('rust'); + if (!parser) throw new Error('Rust parser not available'); + const tree = parser.parse(code); + return extractRustSymbols(tree, 'test.rs'); + } + + describe('parameter extraction', () => { + it('extracts parameters from function declarations', () => { + const symbols = parseRust('fn add(a: i32, b: i32) -> i32 { a + b }'); + const add = symbols.definitions.find((d) => d.name === 'add'); + expect(add).toBeDefined(); + expect(add.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'a', kind: 'parameter' }), + expect.objectContaining({ name: 'b', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts struct fields as properties', () => { + const symbols = parseRust('struct User { name: String, age: u32 }'); + const user = symbols.definitions.find((d) => d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'name', kind: 'property' }), + expect.objectContaining({ name: 'age', kind: 'property' }), + ]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts const item declarations', () => { + const symbols = parseRust('const MAX: i32 = 100;'); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'MAX', kind: 'constant' }), + ); + }); + + it('extracts enum variants as constant children', () => { + const symbols = parseRust('enum Color { Red, Green, Blue }'); + const color = symbols.definitions.find((d) => d.name === 'Color'); + expect(color).toBeDefined(); + expect(color.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'Red', kind: 'constant' }), + expect.objectContaining({ name: 'Green', kind: 'constant' }), + expect.objectContaining({ name: 'Blue', kind: 'constant' }), + ]), + ); + }); + }); +}); + +// ── Java ───────────────────────────────────────────────────────────────────── + +describe('Java extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseJava(code) { + const parser = parsers.get('java'); + if (!parser) throw new Error('Java parser not available'); + const tree = parser.parse(code); + return extractJavaSymbols(tree, 'Test.java'); + } + + describe('parameter extraction', () => { + it('extracts method parameters', () => { + const symbols = parseJava('class Foo { void bar(int x, String y) {} }'); + const bar = symbols.definitions.find((d) => d.name === 'Foo.bar'); + expect(bar).toBeDefined(); + expect(bar.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'x', kind: 'parameter' }), + expect.objectContaining({ name: 'y', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts class field declarations as properties', () => { + const symbols = parseJava('class User { String name; int age; }'); + const user = symbols.definitions.find((d) => d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'name', kind: 'property' }), + expect.objectContaining({ name: 'age', kind: 'property' }), + ]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts enum constants as children', () => { + const symbols = parseJava('enum Status { ACTIVE, INACTIVE }'); + const status = symbols.definitions.find((d) => d.name === 'Status'); + expect(status).toBeDefined(); + expect(status.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'ACTIVE', kind: 'constant' }), + expect.objectContaining({ name: 'INACTIVE', kind: 'constant' }), + ]), + ); + }); + }); +}); + +// ── C# ────────────────────────────────────────────────────────────────────── + +describe('C# extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseCSharp(code) { + const parser = parsers.get('csharp'); + if (!parser) throw new Error('C# parser not available'); + const tree = parser.parse(code); + return extractCSharpSymbols(tree, 'Test.cs'); + } + + describe('parameter extraction', () => { + it('extracts method parameters', () => { + const symbols = parseCSharp('class Foo { void Bar(int x, string y) {} }'); + const bar = symbols.definitions.find((d) => d.name === 'Foo.Bar'); + expect(bar).toBeDefined(); + expect(bar.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'x', kind: 'parameter' }), + expect.objectContaining({ name: 'y', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts class field declarations as properties', () => { + const symbols = parseCSharp('class User { string Name; int Age; }'); + const user = symbols.definitions.find((d) => d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'Name', kind: 'property' }), + expect.objectContaining({ name: 'Age', kind: 'property' }), + ]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts enum member declarations as constants', () => { + const symbols = parseCSharp('enum Status { Active, Inactive }'); + const status = symbols.definitions.find((d) => d.name === 'Status'); + expect(status).toBeDefined(); + expect(status.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'Active', kind: 'constant' }), + expect.objectContaining({ name: 'Inactive', kind: 'constant' }), + ]), + ); + }); + }); +}); + +// ── Ruby ───────────────────────────────────────────────────────────────────── + +describe('Ruby extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseRuby(code) { + const parser = parsers.get('ruby'); + if (!parser) throw new Error('Ruby parser not available'); + const tree = parser.parse(code); + return extractRubySymbols(tree, 'test.rb'); + } + + describe('parameter extraction', () => { + it('extracts method parameters', () => { + const symbols = parseRuby('def greet(name, age)\nend'); + const greet = symbols.definitions.find((d) => d.name === 'greet'); + expect(greet).toBeDefined(); + expect(greet.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'name', kind: 'parameter' }), + expect.objectContaining({ name: 'age', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts instance variable assignments as properties', () => { + const symbols = parseRuby('class User\n @name = nil\nend'); + const user = symbols.definitions.find((d) => d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([expect.objectContaining({ name: '@name', kind: 'property' })]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts class-level constant assignments', () => { + const symbols = parseRuby('class Foo\n MAX = 100\nend'); + const foo = symbols.definitions.find((d) => d.name === 'Foo'); + expect(foo).toBeDefined(); + expect(foo.children).toEqual( + expect.arrayContaining([expect.objectContaining({ name: 'MAX', kind: 'constant' })]), + ); + }); + }); +}); + +// ── PHP ────────────────────────────────────────────────────────────────────── + +describe('PHP extended kinds', () => { + let parsers; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parsePHP(code) { + const parser = parsers.get('php'); + if (!parser) throw new Error('PHP parser not available'); + const tree = parser.parse(code); + return extractPHPSymbols(tree, 'test.php'); + } + + describe('parameter extraction', () => { + it('extracts function parameters', () => { + const symbols = parsePHP(' d.name === 'greet'); + expect(greet).toBeDefined(); + expect(greet.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: '$name', kind: 'parameter' }), + expect.objectContaining({ name: '$age', kind: 'parameter' }), + ]), + ); + }); + }); + + describe('property extraction', () => { + it('extracts class property declarations', () => { + const symbols = parsePHP(' d.name === 'User'); + expect(user).toBeDefined(); + expect(user.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: '$name', kind: 'property' }), + expect.objectContaining({ name: '$age', kind: 'property' }), + ]), + ); + }); + }); + + describe('constant extraction', () => { + it('extracts enum case declarations as constants', () => { + const symbols = parsePHP(' d.name === 'Status'); + expect(status).toBeDefined(); + expect(status.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'Active', kind: 'constant' }), + expect.objectContaining({ name: 'Inactive', kind: 'constant' }), + ]), + ); + }); + }); +}); diff --git a/tests/unit/mcp.test.js b/tests/unit/mcp.test.js index 4d27259f..4dc2c43a 100644 --- a/tests/unit/mcp.test.js +++ b/tests/unit/mcp.test.js @@ -17,6 +17,7 @@ const ALL_TOOL_NAMES = [ 'module_map', 'fn_impact', 'context', + 'symbol_children', 'explain', 'where', 'diff_impact', @@ -257,6 +258,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(() => ({ name: 'test', results: [] })), fnImpactData: vi.fn(() => ({ name: 'test', results: [] })), contextData: vi.fn(() => ({ name: 'test', results: [] })), + childrenData: vi.fn(() => ({ name: 'test', results: [] })), explainData: vi.fn(() => ({ target: 'test', kind: 'function', results: [] })), exportsData: vi.fn(() => ({ file: 'test', @@ -327,6 +329,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: fnDepsMock, fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), exportsData: vi.fn(), whereData: vi.fn(), @@ -387,6 +390,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: fnImpactMock, contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), exportsData: vi.fn(), whereData: vi.fn(), @@ -444,6 +448,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), exportsData: vi.fn(), whereData: vi.fn(), @@ -504,6 +509,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), exportsData: vi.fn(), whereData: vi.fn(), @@ -565,6 +571,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: fnDepsMock, fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), exportsData: vi.fn(), whereData: vi.fn(), @@ -624,6 +631,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), exportsData: vi.fn(), whereData: vi.fn(), @@ -677,6 +685,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), exportsData: vi.fn(), whereData: vi.fn(), @@ -732,6 +741,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: fnDepsMock, fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), exportsData: vi.fn(), whereData: vi.fn(), @@ -797,6 +807,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), exportsData: vi.fn(), whereData: vi.fn(), @@ -855,6 +866,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), exportsData: vi.fn(), whereData: vi.fn(), @@ -904,6 +916,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), exportsData: vi.fn(), whereData: vi.fn(), @@ -953,6 +966,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), exportsData: vi.fn(), whereData: vi.fn(), @@ -1002,6 +1016,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), exportsData: vi.fn(), whereData: vi.fn(), @@ -1052,6 +1067,7 @@ describe('startMCPServer handler dispatch', () => { fnDepsData: vi.fn(), fnImpactData: vi.fn(), contextData: vi.fn(), + childrenData: vi.fn(), explainData: vi.fn(), exportsData: vi.fn(), whereData: vi.fn(), From 8a0b8cf1311c5380fe835ef6898641903f599c93 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 2 Mar 2026 22:29:26 -0700 Subject: [PATCH 30/30] fix: include cfg_edges and cfg_blocks in full rebuild cleanup The full rebuild DELETE chain was missing the two CFG tables, which would leave orphaned CFG data after a fresh build. Impact: 1 functions changed, 0 affected --- src/builder.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/builder.js b/src/builder.js index 8b51e300..c966de7d 100644 --- a/src/builder.js +++ b/src/builder.js @@ -557,7 +557,7 @@ export async function buildGraph(rootDir, opts = {}) { if (isFullBuild) { const deletions = - 'PRAGMA foreign_keys = OFF; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM function_complexity; DELETE FROM dataflow; DELETE FROM ast_nodes; DELETE FROM nodes; PRAGMA foreign_keys = ON;'; + 'PRAGMA foreign_keys = OFF; DELETE FROM cfg_edges; DELETE FROM cfg_blocks; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM function_complexity; DELETE FROM dataflow; DELETE FROM ast_nodes; DELETE FROM nodes; PRAGMA foreign_keys = ON;'; db.exec( hasEmbeddings ? `${deletions.replace('PRAGMA foreign_keys = ON;', '')} DELETE FROM embeddings; PRAGMA foreign_keys = ON;`