From 7377fd903c0131828067c3f994aa1c7a534b46f1 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 25 Feb 2026 01:04:09 -0700 Subject: [PATCH 1/7] fix(builder): preserve structure data during incremental builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit buildStructure() clears all contains edges and directory nodes before rebuilding, but during incremental builds it only received the changed files — causing all unchanged files to lose their directory containment edges and metrics. This made `codegraph structure` show "0 files" for most directories after any incremental build. Fix: before calling buildStructure, load all existing file nodes and their symbols from the DB so the complete file set is available for structure rebuild. Closes #89 Impact: 1 functions changed, 0 affected --- src/builder.js | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/builder.js b/src/builder.js index d48d9b8a..724002b0 100644 --- a/src/builder.js +++ b/src/builder.js @@ -827,6 +827,43 @@ export async function buildGraph(rootDir, opts = {}) { } } + // For incremental builds, buildStructure needs ALL files (not just changed ones) + // because it clears and rebuilds all contains edges and directory metrics. + // Load unchanged files from the DB so structure data stays complete. + if (!isFullBuild) { + const existingFiles = db.prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'").all(); + const defsByFile = db.prepare( + "SELECT name, kind, line FROM nodes WHERE file = ? AND kind != 'file' AND kind != 'directory'", + ); + const importsByFile = db.prepare( + `SELECT DISTINCT n2.file AS source FROM edges e + JOIN nodes n1 ON e.source_id = n1.id + JOIN nodes n2 ON e.target_id = n2.id + WHERE n1.file = ? AND e.kind = 'imports'`, + ); + let loadedFromDb = 0; + for (const { file: relPath } of existingFiles) { + if (!fileSymbols.has(relPath)) { + fileSymbols.set(relPath, { + definitions: defsByFile.all(relPath), + imports: importsByFile.all(relPath), + exports: [], + }); + loadedFromDb++; + } + if (!lineCountMap.has(relPath)) { + const absPath = path.join(rootDir, relPath); + try { + const content = fs.readFileSync(absPath, 'utf-8'); + lineCountMap.set(relPath, content.split('\n').length); + } catch { + lineCountMap.set(relPath, 0); + } + } + } + debug(`Structure: ${fileSymbols.size} files (${loadedFromDb} loaded from DB)`); + } + // Build directory structure, containment edges, and metrics const relDirs = new Set(); for (const absDir of discoveredDirs) { From ffcf2bde95364f0998c7d4fafa49f310c095f5d0 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 25 Feb 2026 01:06:56 -0700 Subject: [PATCH 2/7] docs: add dogfood report for v2.3.0 --- generated/DOGFOOD_REPORT_v2.3.0.md | 351 +++++++++++++++++++++++++++++ 1 file changed, 351 insertions(+) create mode 100644 generated/DOGFOOD_REPORT_v2.3.0.md diff --git a/generated/DOGFOOD_REPORT_v2.3.0.md b/generated/DOGFOOD_REPORT_v2.3.0.md new file mode 100644 index 00000000..18d09810 --- /dev/null +++ b/generated/DOGFOOD_REPORT_v2.3.0.md @@ -0,0 +1,351 @@ +# Dogfooding Report: @optave/codegraph@2.3.0 + +**Date:** 2026-02-25 +**Platform:** Windows 11 Pro (win32-x64), Node.js v22.18.0 +**Native binary:** @optave/codegraph-win32-x64-msvc@2.3.0 +**Active engine:** native (v0.1.0), auto-detected +**Target repo:** codegraph itself (99 files, 2 languages: JS 80, Rust 19) + +--- + +## 1. Setup & Installation + +| Step | Result | +|------|--------| +| `npm install @optave/codegraph@2.3.0` | 207 packages, 6s, 0 vulnerabilities | +| `npx codegraph --version` | `2.3.0` | +| Native binary package | `@optave/codegraph-win32-x64-msvc@2.3.0` present | +| `optionalDependencies` pinned | All 4 platform packages pinned to `2.3.0` | +| `npx codegraph info` | `engine: native (v0.1.0)` | + +Installation is clean. Native binary loads correctly. All platform packages properly version-pinned. + +--- + +## 2. Cold Start (Pre-Build) + +Every command was tested against a non-existent database path before building: + +| Command | Status | Message | +|---------|--------|---------| +| `query buildGraph` | PASS | "No codegraph database found... Run `codegraph build` first" | +| `stats` | PASS | Same graceful message | +| `cycles` | PASS | Same graceful message | +| `export` | PASS | Same graceful message | +| `embed` | PASS | Same graceful message (note: `--db` not supported on `embed`) | +| `search "test"` | PASS | Same graceful message | +| `map` | PASS | Same graceful message | +| `deps src/cli.js` | PASS | Same graceful message | +| `fn buildGraph` | PASS | Same graceful message | +| `fn-impact buildGraph` | PASS | Same graceful message | +| `context buildGraph` | PASS | Same graceful message | +| `explain src/cli.js` | PASS | Same graceful message | +| `where buildGraph` | PASS | Same graceful message | +| `impact src/cli.js` | PASS | Same graceful message | +| `diff-impact` | PASS | Same graceful message | +| `structure` | PASS | Same graceful message | +| `hotspots` | PASS | Same graceful message | +| `models` | PASS | Lists 7 models (no DB needed) | +| `registry list` | PASS | Lists registered repos (no DB needed) | +| `info` | PASS | Engine diagnostics (no DB needed) | + +**All 20 commands pass cold-start gracefully.** No crashes, no stack traces. + +--- + +## 3. Full Command Sweep + +### Build + +``` +codegraph build --engine native --no-incremental --verbose +``` +- 99 files parsed, 576 nodes, 787 edges (build output) +- Stats: 898 edges (includes 111 `contains` edges added by structure analysis) +- Time: sub-second + +### Query Commands + +| Command | Flags Tested | Status | Notes | +|---------|-------------|--------|-------| +| `query ` | `-T`, `-j`, `--db` | PASS | `--depth` not supported (not in help) | +| `impact ` | default | PASS | Shows 6 transitive dependents | +| `map` | `-n 10`, `-j` | PASS | Coupling score present in JSON | +| `stats` | `-j` | PASS | Valid JSON, 82/100 quality | +| `deps ` | default | PASS | Shows imports and importers | +| `fn ` | `--depth 2`, `-f`, `-k`, `-T`, `-j` | PASS | All flags work | +| `fn-impact ` | `-T`, `-j` | PASS | 5 transitive dependents | +| `context ` | `--depth`, `--no-source`, `--with-test-source`, `-j` | PASS | Source included by default | +| `explain ` | file path, function name, `--depth 2`, `-j` | PASS | Structural summary accurate | +| `where ` | default, `-f `, `-j` | PASS | Fast lookup, file overview mode works | +| `diff-impact [ref]` | `main`, `HEAD`, `--staged`, `--format mermaid`, `-j` | PASS | Mermaid output generates flowchart | +| `cycles` | default, `--functions` | PASS | 1 file-level, 2 function-level cycles | +| `structure [dir]` | `.`, `--depth 1`, `--sort cohesion/fan-in`, `-j` | PASS | `.` filter works (v2.2.0 bug fixed) | +| `hotspots` | `--metric fan-in/fan-out/density/coupling`, `--level file/directory`, `-n`, `-j` | PASS | All metrics and levels work | + +### Export Commands + +| Command | Flags | Status | Notes | +|---------|-------|--------|-------| +| `export -f dot` | default, `--functions`, `--min-confidence` | PASS | Valid DOT graph | +| `export -f mermaid` | default | PASS | Valid Mermaid syntax | +| `export -f json` | `-o ` | PASS | 69KB JSON file written | + +### Embedding & Search + +| Command | Flags | Status | Notes | +|---------|-------|--------|-------| +| `models` | default | PASS | 7 models listed | +| `embed` | `-m minilm`, `--strategy structured` | PASS | 434 symbols embedded | +| `embed` | `--strategy source` | PASS | 434 symbols, 111 truncation warnings | +| `search` | `-n`, `--min-score`, `-k`, `--file`, multi-query `;` | PASS | Relevant results, buildGraph tops "build graph" query | +| `search --json` | N/A | MISSING | `-j/--json` flag not available on search | + +### Infrastructure Commands + +| Command | Status | Notes | +|---------|--------|-------| +| `info` | PASS | Shows version, engine, platform | +| `--version` | PASS | `2.3.0` | +| `registry list` | PASS | Lists registered repos, `-j` works | +| `registry add` | PASS | Custom name with `-n` | +| `registry remove` | PASS | Removes by name | +| `registry prune --ttl 0` | PASS | Prunes expired entries | +| `mcp` (single-repo) | PASS | 16 tools, no `list_repos`, no `repo` param | +| `mcp --multi-repo` | PASS | 17 tools, `list_repos` present, `repo` param on tools | + +### Edge Cases + +| Scenario | Result | Status | +|----------|--------|--------| +| Non-existent symbol: `query nonexistent` | "No results" | PASS | +| Non-existent file: `deps nonexistent.js` | "No file matching" | PASS | +| Non-existent function: `fn nonexistent` | "No function/method/class matching" | PASS | +| `--kind invalid` | "Invalid kind... Valid: function, method, ..." | PASS | +| `search` with no embeddings | "No embeddings found. Run `codegraph embed` first." | PASS | +| `--json` on all commands | Valid JSON (tested: stats, map, hotspots, fn, context, where, explain, structure, registry) | PASS | +| `--no-tests` effect | Reduces callers from 7 to 4 for buildGraph | PASS | +| Pipe output: `map --json 2>/dev/null` | Clean JSON on stdout | PASS | +| `build --no-incremental` | Force full rebuild | PASS | +| `build --verbose` | Per-file parsing details | PASS | + +### JSON Output Validation + +All commands that support `-j/--json` produce valid JSON: +- `stats`, `map`, `hotspots`, `fn`, `fn-impact`, `context`, `where`, `explain`, `structure`, `query`, `registry list`, `diff-impact`, `export -f json` + +--- + +## 4. Rebuild & Staleness + +### Incremental No-Op +``` +Graph is up to date. +``` +PASS — no files re-parsed when nothing changed. + +### Three-Tier Change Detection +- **Touch only (mtime change):** "Self-healed mtime/size for 1 files" — content hash verified, no re-parse. PASS. +- **Content change:** Tier 1 detects mtime+size change → Tier 2 confirms hash change → 1 file re-parsed. PASS. + +### Embed → Rebuild → Search Pipeline +1. Build embeddings (434 symbols) → search "build graph" → buildGraph ranks #1 (46.4%). PASS. +2. Touch file → rebuild → search still works (embeddings remain valid for unchanged symbols). PASS. +3. Delete DB → rebuild from scratch → "No embeddings table found" on search. PASS. + +### DB Migrations +Deleting `graph.db` and rebuilding triggers migrations v1→v4. PASS. + +### Incremental Build Structure Bug (FIXED) +See Bug #1 below. Incremental builds corrupted structure data by clearing ALL `contains` edges but only rebuilding for changed files. **Fixed in this session.** + +--- + +## 5. Engine Comparison + +| Metric | Native | WASM | Delta | +|--------|--------|------|-------| +| Nodes | 576 | 576 | 0 | +| Edges (total) | 898 | 898 | 0 | +| Calls | 647 | 647 | 0 | +| Imports | 115 | 115 | 0 | +| Contains | 111 | 111 | 0 | +| Reexports | 25 | 25 | 0 | +| Files | 99 | 99 | 0 | +| Quality Score | 82/100 | 82/100 | 0 | +| Caller Coverage | 56.6% | 56.6% | 0% | +| Call Confidence | 97.8% | 97.8% | 0% | +| Cycles (file) | 1 | 1 | 0 | +| Cycles (fn) | 2 | 2 | 0 | + +**Perfect engine parity.** Both engines produce identical results across all metrics. This is a significant improvement over v2.1.0 which had parity gaps. + +--- + +## 6. Release-Specific Tests + +### v2.3.0 CHANGELOG Features + +| Feature | Test | Result | +|---------|------|--------| +| Graph-enriched embedding strategy (`--strategy structured`) | `embed -m minilm --strategy structured` → 434 symbols, ~100 tokens avg | PASS | +| `--strategy source` option | `embed --strategy source` → 434 symbols, 111 truncated | PASS | +| Context overflow detection | Warning: "111 symbol(s) exceeded model context window (256 tokens)" | PASS | +| `excludeTests` config option | `{ "query": { "excludeTests": true } }` → test files hidden | PASS | +| `--include-tests` CLI override | Overrides config, shows test files | PASS | +| `--depth` on `explain` | `explain src/builder.js --depth 2` → includes recursive deps | PASS | +| Coupling score in `map` | `map -j` → `topNodes[].coupling` field present | PASS | +| Mermaid output in `diff-impact` | `diff-impact main --format mermaid` → flowchart output | PASS | +| `--min-confidence` on export | `export -f dot --min-confidence 0.5` → filters low-confidence edges | PASS | +| `/dogfood` skill | Currently running! | PASS | + +### v2.3.0 Bug Fixes Verified + +| Fix | Test | Result | +|-----|------|--------| +| Graceful error for `cycles`/`export`/`embed` with no DB | All tested pre-build | PASS | +| Default model changed to minilm | `embed` without `-m` uses minilm | PASS | +| `splitIdentifier` camelCase fix | Search "build graph" → `buildGraph` ranks high | PASS | +| `structure .` treated as no filter | `structure .` shows full project | PASS | +| Engine status messages to stderr | `build 2>/dev/null` produces no stdout | PASS | +| `--with-test-source` rename | `context --with-test-source` works, old `--include-test-source` gone | PASS | +| Embedding invalidation on node deletion | Orphan warning mechanism in builder.js | VERIFIED in code | + +--- + +## 7. Additional Testing + +### Programmatic API + +ESM import of `@optave/codegraph` exports all expected symbols: +- Functions: `buildGraph`, `loadConfig`, `openDb`, `findDbPath`, `contextData`, `explainData`, `whereData`, `fnDepsData`, `diffImpactData`, `statsData`, `isNativeAvailable`, and 40+ more +- Constants: `ALL_SYMBOL_KINDS` (10 kinds), `EXTENSIONS` (15 extensions), `MODELS` (7 models), `IGNORE_DIRS`, `FALSE_POSITIVE_NAMES` +- CJS `require()` correctly fails with `ERR_PACKAGE_PATH_NOT_EXPORTED` (ESM-only package) + +### MCP Server + +| Mode | Tools | `list_repos` | `repo` param | Status | +|------|-------|-------------|-------------|--------| +| Single-repo (default) | 16 | absent | absent | PASS | +| `--multi-repo` | 17 | present | present | PASS | + +MCP initializes via JSON-RPC, responds to `tools/list`, correct tool schemas. + +### Config & Registry + +- `.codegraphrc.json` with `query.excludeTests: true` → works +- `registry add/list/remove/prune` → all work +- `registry list -j` → valid JSON with timestamps + +### Version Upgrade Path +- Incremental build on a graph from a previous version says "Graph is up to date" even if the engine version changed. Users should run `--no-incremental` after upgrading to ensure consistent data. (Not a bug per se, but worth documenting.) + +--- + +## 8. Bugs Found + +### BUG 1: Incremental builds corrupt structure/contains edges (Medium) +- **Issue:** [#89](https://github.com/optave/codegraph/issues/89) +- **PR:** [#91](https://github.com/optave/codegraph/pull/91) +- **Symptoms:** After any incremental build, `codegraph structure` shows most directories as "0 files, 0 symbols". Only the changed file's directory retains data. `contains` edges drop from 111 to ~15. +- **Root cause:** `buildStructure()` unconditionally clears ALL `contains` edges and directory nodes (`DELETE FROM edges WHERE kind = 'contains'`), then only rebuilds for files in `fileSymbols` — which during incremental builds only contains changed files. +- **Fix applied:** Before calling `buildStructure`, load all existing file nodes from the DB into `fileSymbols` and `lineCountMap` so the complete file set is available for structure rebuild. 37 lines added to `builder.js`. All 491 tests pass. + +### Enhancement: `search` command missing `--json` flag (Low) +- **Issue:** [#90](https://github.com/optave/codegraph/issues/90) +- **PR:** N/A — enhancement, not a bug fix +- **Description:** All other query commands support `-j/--json` but `search` does not. Running `search -j` returns "unknown option '-j'". + +--- + +## 9. Suggestions for Improvement + +### 9.1 Add `--json` to `search` command +Every other query command supports JSON output. `search` is the only holdout, which breaks automation workflows. + +### 9.2 Document `excludeTests` config nesting +The CHANGELOG and CLI help say "excludeTests config option" but don't mention it must be nested under `query`. A top-level `{ "excludeTests": true }` silently does nothing. Either: +- Document as `query.excludeTests` in the README/CHANGELOG +- Or accept it at both top-level and nested + +### 9.3 Warn on engine mismatch during incremental builds +Store the engine used for the last full build in DB metadata. When an incremental build uses a different engine, warn the user and suggest `--no-incremental`. + +### 9.4 Add `--no-incremental` recommendation after version upgrades +When `codegraph info` detects the installed version differs from the version that built the graph, suggest a full rebuild. + +--- + +## 10. Testing Plan + +### General Testing Plan (Any Release) + +- [ ] Install from npm, verify version and native binary +- [ ] Cold start: all commands gracefully fail without DB +- [ ] Full build: verify node/edge counts +- [ ] Incremental no-op: "Graph is up to date" +- [ ] Incremental with change: only changed files re-parsed +- [ ] `--no-incremental` full rebuild matches clean build +- [ ] Engine comparison: native vs WASM parity +- [ ] All query commands with `-j`, `-T`, `--include-tests` +- [ ] Edge cases: non-existent symbols/files, invalid `--kind` +- [ ] Export: DOT, Mermaid, JSON formats +- [ ] Embed + search pipeline +- [ ] Registry CRUD: add, list, remove, prune +- [ ] MCP single-repo and multi-repo modes +- [ ] Programmatic API: key exports present +- [ ] Pipe output: clean JSON on stdout +- [ ] DB deletion → rebuild → migrations run +- [ ] `structure` after incremental build preserves all files + +### Release-Specific Testing Plan (v2.3.0) + +- [ ] `--strategy structured` vs `--strategy source` embeddings +- [ ] Context overflow detection and truncation warning +- [ ] `excludeTests` config (under `query` key) +- [ ] `--include-tests` override +- [ ] `--depth` on `explain` +- [ ] Coupling score in `map` output +- [ ] Mermaid output in `diff-impact` +- [ ] `--min-confidence` on export +- [ ] `structure .` no longer crashes +- [ ] Default model is minilm (no auth required) +- [ ] Engine status messages on stderr (not stdout) +- [ ] `--with-test-source` renamed from `--include-test-source` + +### Proposed Additional Tests + +- [ ] **Embed → modify → rebuild → search:** Most likely path to stale embeddings. Should be tested every release. +- [ ] **Watch mode integration:** Start watcher, modify file, verify incremental update + query correctness. +- [ ] **Multi-repo MCP workflow:** `registry add` → `mcp --repos ` → query via JSON-RPC. +- [ ] **Config options:** Test `.codegraphrc.json` with `include`/`exclude` patterns, `aliases`, `build.incremental: false`, `query.defaultDepth`, `search.defaultMinScore`. +- [ ] **Concurrent builds:** Two builds at once on the same DB — should one fail or queue. +- [ ] **Different repo test:** Build on a small open-source project besides codegraph itself. +- [ ] **`apiKeyCommand` credential resolution:** Test with a simple `echo` command. + +--- + +## 11. Overall Assessment + +v2.3.0 is a solid release with significant improvements in embedding quality (graph-enriched strategy), better developer experience (`excludeTests` config, `--depth` on explain, coupling scores), and excellent engine parity (0% delta on all metrics). + +**The one critical bug found** — incremental builds corrupting structure data — affects all users who run `codegraph structure` or `codegraph hotspots` after any incremental build. The fix is straightforward (37 lines in builder.js) and has been submitted as PR #91. Until merged, users should run `codegraph build --no-incremental` to get correct structure data. + +All 20+ CLI commands work correctly. Cold-start error handling is excellent. JSON output is valid across all commands. The three-tier change detection (journal → mtime+size → content hash) is robust. MCP server works in both single and multi-repo modes. + +**Rating: 8/10** + +Deductions: +- -1 for the incremental structure corruption bug (affects real-world usage) +- -0.5 for `search` missing `--json` (inconsistency with other commands) +- -0.5 for undocumented `excludeTests` nesting requirement + +--- + +## 12. Issues & PRs Created + +| Type | Number | Title | Status | +|------|--------|-------|--------| +| Issue | [#89](https://github.com/optave/codegraph/issues/89) | bug: mixed-engine incremental build corrupts structure/contains edges | open | +| Issue | [#90](https://github.com/optave/codegraph/issues/90) | enhancement: add --json flag to search command | open | +| PR | [#91](https://github.com/optave/codegraph/pull/91) | fix(builder): preserve structure data during incremental builds | open | From 689dc06d2485a8b2fb78f2743221914377e0a78d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 25 Feb 2026 01:13:25 -0700 Subject: [PATCH 3/7] docs: add benchmark results to dogfood report v2.3.0 --- generated/DOGFOOD_REPORT_v2.3.0.md | 63 ++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/generated/DOGFOOD_REPORT_v2.3.0.md b/generated/DOGFOOD_REPORT_v2.3.0.md index 18d09810..1afa7506 100644 --- a/generated/DOGFOOD_REPORT_v2.3.0.md +++ b/generated/DOGFOOD_REPORT_v2.3.0.md @@ -180,6 +180,69 @@ See Bug #1 below. Incremental builds corrupted structure data by clearing ALL `c **Perfect engine parity.** Both engines produce identical results across all metrics. This is a significant improvement over v2.1.0 which had parity gaps. +### Performance Benchmarks + +#### Build Benchmark (`scripts/benchmark.js`) + +| Metric | v2.1.0 WASM (92 files) | v2.3.0 WASM (99 files) | Per-file delta | +|--------|----------------------|----------------------|----------------| +| Build time | 609ms (6.6ms/file) | 509ms (5.1ms/file) | -22% per file | +| Query time | 1.9ms | 1.8ms | -5% | +| Nodes | 527 (5.7/file) | 575 (5.8/file) | +2% | +| Edges | 814 (8.8/file) | 897 (9.1/file) | +3% | +| DB size | 344KB (3829B/file) | 372KB (3848B/file) | +0.5% | + +Build performance improved 22% per file vs v2.1.0. Node/edge counts grew slightly as the codebase grew from 92→99 files. No regressions. + +#### Incremental Benchmark (`scripts/incremental-benchmark.js`) + +| Metric | v2.3.0 WASM | +|--------|-------------| +| Full build | 474ms | +| No-op rebuild | 4ms | +| 1-file rebuild | 144ms | +| Import resolution (84 pairs) | 1.9ms | + +No-op rebuilds complete in 4ms. Single-file incremental rebuilds take ~144ms (30% of full build for 1% of files). + +#### Query Benchmark (`scripts/query-benchmark.js`) + +| Metric | v2.3.0 WASM | +|--------|-------------| +| fnDeps depth 1 | 0.7ms | +| fnDeps depth 3 | 1.8ms | +| fnDeps depth 5 | 1.8ms | +| fnImpact depth 1 | 0.7ms | +| fnImpact depth 3 | 1.3ms | +| fnImpact depth 5 | 1.3ms | +| diff-impact | 13.7ms | + +Sub-2ms for all function-level queries. No depth scaling issues. + +#### Embedding Benchmark (`scripts/embedding-benchmark.js`) + +| Model | Hit@1 | Hit@3 | Hit@5 | Misses | +|-------|-------|-------|-------|--------| +| minilm (default) | 252/329 (76.6%) | 312/329 (94.8%) | 322/329 (97.9%) | 2 | +| jina-small | 256/329 (77.8%) | 318/329 (96.7%) | 324/329 (98.5%) | 2 | +| jina-base | 248/329 (75.4%) | 311/329 (94.5%) | 320/329 (97.3%) | 3 | +| nomic | 278/329 (84.5%) | 326/329 (99.1%) | 329/329 (100%) | 0 | +| nomic-v1.5 | 274/329 (83.3%) | 323/329 (98.2%) | 329/329 (100%) | 0 | +| bge-large | FAIL (ONNX load error on Windows) | — | — | — | + +nomic and nomic-v1.5 achieve perfect Hit@5 (100%) with 0 misses. minilm (default) achieves strong 97.9% Hit@5 with the smallest model size. + +#### Fix Impact: Incremental Structure Rebuild (PR #91) + +| Metric | Before (main) | After (fix) | Delta | +|--------|--------------|-------------|-------| +| Full build | 416ms | 439ms | +23ms (+5.5%) | +| No-op rebuild | 4ms | 4ms | 0 | +| 1-file rebuild | 125ms | 159ms | +34ms (+27%) | +| Import resolution | 2.0ms | 1.9ms | -0.1ms | + +The fix adds ~34ms to 1-file incremental rebuilds (loading 98 unchanged files from DB for structure rebuild). Acceptable trade-off for correct structure data. + --- ## 6. Release-Specific Tests From cadb3ce78fd164cb3205655e270094db57b822c6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 25 Feb 2026 01:14:37 -0700 Subject: [PATCH 4/7] docs(skill): add performance benchmarks phase to dogfood skill Add Phase 4b requiring all four benchmark scripts to be run during dogfooding sessions. Also update Phase 7c to require before/after benchmark comparisons when bug fixes touch benchmarked code paths. --- .claude/skills/dogfood/SKILL.md | 37 ++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/.claude/skills/dogfood/SKILL.md b/.claude/skills/dogfood/SKILL.md index 250fcecc..78218b99 100644 --- a/.claude/skills/dogfood/SKILL.md +++ b/.claude/skills/dogfood/SKILL.md @@ -158,6 +158,33 @@ Test that incremental rebuilds, full rebuilds, and cross-feature state remain co --- +## Phase 4b — Performance Benchmarks + +Run all four benchmark scripts from the codegraph source repo (not the temp install dir) and record results. These detect performance regressions between releases. + +| Benchmark | Script | What it measures | When it matters | +|-----------|--------|-----------------|-----------------| +| Build | `node scripts/benchmark.js` | Build speed (native vs WASM), query latency | Always | +| Incremental | `node scripts/incremental-benchmark.js` | Incremental build tiers, import resolution throughput | Always | +| Query | `node scripts/query-benchmark.js` | Query depth scaling, diff-impact latency | Always | +| Embedding | `node scripts/embedding-benchmark.js` | Search recall (Hit@1/3/5/10) across models | Always | + +1. Run all four from the codegraph source repo directory. +2. Record the JSON output from each. +3. Compare with the previous release's numbers in `generated/BUILD-BENCHMARKS.md` (build benchmark) and previous dogfood reports. +4. Flag any regressions: + - Build time per file >10% slower → investigate + - Query latency >2x slower → investigate + - Embedding recall (Hit@5) drops by >2% → investigate + - Incremental no-op >10ms → investigate +5. Include a **Performance Benchmarks** section in the report with tables for each benchmark. + +**Note:** The native engine may not be available in the dev repo (no prebuilt binary in `node_modules`). Record WASM results at minimum. If native is available, record both. + +**IMPORTANT:** If your bug-fix PR touches code covered by a benchmark (`builder.js`, `parser.js`, `queries.js`, `resolve.js`, `db.js`, `embedder.js`, `journal.js`), you **must** run the relevant benchmarks **before and after** your changes and include the comparison in the PR description. + +--- + ## Phase 5 — Changes Since Last Release 1. Read `CHANGELOG.md` to identify what changed in v$ARGUMENTS vs the previous version. @@ -254,14 +281,15 @@ For each bug you can fix in this session: 2. Implement the fix. 3. Run `npm test` to verify no regressions. 4. Run `npm run lint` to verify code style. -5. Commit with a message referencing the issue: +5. **Run benchmarks before and after** if your fix touches code covered by a benchmark (see Phase 4b table). Include the comparison in the PR body. +6. Commit with a message referencing the issue: ``` fix(): Closes # ``` The `Closes #N` footer tells GitHub to auto-close the issue when the PR merges. -6. Push and open a PR: +7. Push and open a PR. If benchmarks were run, include them in the body: ```bash gh pr create --base main \ --title "fix(): " \ @@ -272,12 +300,15 @@ For each bug you can fix in this session: ## Found during Dogfooding v$ARGUMENTS — see # + ## Benchmark results + + ## Test plan - [ ] PR_EOF )" ``` -7. Return to the main working branch before continuing to the next bug. +8. Return to the main working branch before continuing to the next bug. If a bug is too complex to fix in this session, leave the issue open and note it in the report. From 7b538bccd78c23f9114d5408bb7c452feec125b3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 25 Feb 2026 01:21:58 -0700 Subject: [PATCH 5/7] perf(builder): avoid disk reads for line counts during incremental rebuild Address Greptile review feedback on PR #91: - Replace misleading importsByFile query with importCountByFile COUNT query, since buildStructure only uses imports.length for metrics - Cache line counts from node_metrics table instead of reading every unchanged file from disk (falls back to disk if not cached) Impact: 1 functions changed, 1 affected --- src/builder.js | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/src/builder.js b/src/builder.js index 724002b0..ce9f6633 100644 --- a/src/builder.js +++ b/src/builder.js @@ -835,29 +835,45 @@ export async function buildGraph(rootDir, opts = {}) { const defsByFile = db.prepare( "SELECT name, kind, line FROM nodes WHERE file = ? AND kind != 'file' AND kind != 'directory'", ); - const importsByFile = db.prepare( - `SELECT DISTINCT n2.file AS source FROM edges e + // Count imports per file — buildStructure only uses imports.length for metrics + const importCountByFile = db.prepare( + `SELECT COUNT(DISTINCT n2.file) AS cnt FROM edges e JOIN nodes n1 ON e.source_id = n1.id JOIN nodes n2 ON e.target_id = n2.id WHERE n1.file = ? AND e.kind = 'imports'`, ); + const lineCountByFile = db.prepare( + `SELECT n.name AS file, m.line_count + FROM node_metrics m JOIN nodes n ON m.node_id = n.id + WHERE n.kind = 'file'`, + ); + const cachedLineCounts = new Map(); + for (const row of lineCountByFile.all()) { + cachedLineCounts.set(row.file, row.line_count); + } let loadedFromDb = 0; for (const { file: relPath } of existingFiles) { if (!fileSymbols.has(relPath)) { + const importCount = importCountByFile.get(relPath)?.cnt || 0; fileSymbols.set(relPath, { definitions: defsByFile.all(relPath), - imports: importsByFile.all(relPath), + imports: new Array(importCount), exports: [], }); loadedFromDb++; } if (!lineCountMap.has(relPath)) { - const absPath = path.join(rootDir, relPath); - try { - const content = fs.readFileSync(absPath, 'utf-8'); - lineCountMap.set(relPath, content.split('\n').length); - } catch { - lineCountMap.set(relPath, 0); + const cached = cachedLineCounts.get(relPath); + if (cached != null) { + lineCountMap.set(relPath, cached); + } else { + const absPath = path.join(rootDir, relPath); + try { + const content = fs.readFileSync(absPath, 'utf-8'); + lineCountMap.set(relPath, content.split('\n').length); + } catch { + lineCountMap.set(relPath, 0); + } } } } From 165f6ca3653be8ae2ec745f244f344d23b680fd5 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 25 Feb 2026 01:42:16 -0700 Subject: [PATCH 6/7] feat: add node role classification (entry/core/utility/adapter/dead/leaf) Auto-classify every symbol based on fan-in/fan-out connectivity patterns using adaptive median thresholds. Roles are computed during graph build and stored in the DB (migration v5). - classifyNodeRoles() in structure.js with median-based thresholds - New `roles` CLI command with --role and --file filters - New `node_roles` MCP tool (18 tools total) - Role field surfaced in where/explain/context/stats/list-functions - Dead code detection via `roles --role dead` - Unit + integration tests for classification and queries - Updated README, BACKLOG, and COMPETITIVE_ANALYSIS docs Impact: 19 functions changed, 13 affected --- README.md | 19 ++- generated/COMPETITIVE_ANALYSIS.md | 56 ++++---- roadmap/BACKLOG.md | 6 +- src/builder.js | 11 ++ src/cli.js | 24 ++++ src/db.js | 10 ++ src/index.js | 3 + src/mcp.js | 27 +++- src/queries.js | 133 ++++++++++++++++++- src/structure.js | 94 +++++++++++++ tests/integration/roles.test.js | 212 ++++++++++++++++++++++++++++++ tests/unit/mcp.test.js | 15 +++ tests/unit/roles.test.js | 183 ++++++++++++++++++++++++++ 13 files changed, 750 insertions(+), 43 deletions(-) create mode 100644 tests/integration/roles.test.js create mode 100644 tests/unit/roles.test.js diff --git a/README.md b/README.md index 011af9d3..844b94a4 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ cd your-project codegraph build ``` -That's it. No config files, no Docker, no JVM, no API keys, no accounts. The graph is ready to query. Add `codegraph mcp` to your AI agent's config and it has full access to your dependency graph through 17 MCP tools. +That's it. No config files, no Docker, no JVM, no API keys, no accounts. The graph is ready to query. Add `codegraph mcp` to your AI agent's config and it has full access to your dependency graph through 18 MCP tools. ### Why it matters @@ -79,6 +79,7 @@ That's it. No config files, no Docker, no JVM, no API keys, no accounts. The gra | MCP / AI agent support | **Yes** | — | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | — | | Git diff impact | **Yes** | — | — | — | — | **Yes** | — | **Yes** | | Watch mode | **Yes** | — | **Yes** | — | — | — | — | — | +| Dead code / role classification | **Yes** | — | **Yes** | — | — | — | — | **Yes** | | Cycle detection | **Yes** | — | **Yes** | — | — | — | — | **Yes** | | Incremental rebuilds | **O(changed)** | — | O(n) Merkle | — | — | — | — | — | | Zero config | **Yes** | — | **Yes** | — | — | — | — | — | @@ -94,7 +95,8 @@ That's it. No config files, no Docker, no JVM, no API keys, no accounts. The gra | **⚡** | **Always-fresh graph** | Three-tier change detection: journal (O(changed)) → mtime+size (O(n) stats) → hash (O(changed) reads). Sub-second rebuilds even on large codebases | | **🔓** | **Zero-cost core, LLM-enhanced when you want** | Full graph analysis with no API keys, no accounts, no cost. Optionally bring your own LLM provider — your code only goes where you choose | | **🔬** | **Function-level, not just files** | Traces `handleAuth()` → `validateToken()` → `decryptJWT()` and shows 14 callers across 9 files break if `decryptJWT` changes | -| **🤖** | **Built for AI agents** | 17-tool [MCP server](https://modelcontextprotocol.io/) — AI assistants query your graph directly. Single-repo by default | +| **🏷️** | **Role classification** | Every symbol auto-tagged as `entry`/`core`/`utility`/`adapter`/`dead`/`leaf` — agents instantly know what they're looking at | +| **🤖** | **Built for AI agents** | 18-tool [MCP server](https://modelcontextprotocol.io/) — AI assistants query your graph directly. Single-repo by default | | **🌐** | **Multi-language, one CLI** | JS/TS + Python + Go + Rust + Java + C# + PHP + Ruby + HCL in a single graph | | **💥** | **Git diff impact** | `codegraph diff-impact` shows changed functions, their callers, and full blast radius — ships with a GitHub Actions workflow | | **🧠** | **Semantic search** | Local embeddings by default, LLM-powered when opted in — multi-query with RRF ranking via `"auth; token; JWT"` | @@ -141,7 +143,7 @@ After modifying code: Or connect directly via MCP: ```bash -codegraph mcp # 17-tool MCP server — AI queries the graph directly +codegraph mcp # 18-tool MCP server — AI queries the graph directly ``` Full agent setup: [AI Agent Guide](docs/ai-agent-guide.md) · [CLAUDE.md template](docs/ai-agent-guide.md#claudemd-template) @@ -161,11 +163,12 @@ Full agent setup: [AI Agent Guide](docs/ai-agent-guide.md) · [CLAUDE.md t | 📊 | **Diff impact** | Parse `git diff`, find overlapping functions, trace their callers | | 🗺️ | **Module map** | Bird's-eye view of your most-connected files | | 🏗️ | **Structure & hotspots** | Directory cohesion scores, fan-in/fan-out hotspot detection, module boundaries | +| 🏷️ | **Node role classification** | Every symbol auto-tagged as `entry`/`core`/`utility`/`adapter`/`dead`/`leaf` based on connectivity patterns — agents instantly know architectural role | | 🔄 | **Cycle detection** | Find circular dependencies at file or function level | | 📤 | **Export** | DOT (Graphviz), Mermaid, and JSON graph export | | 🧠 | **Semantic search** | Embeddings-powered natural language search with multi-query RRF ranking | | 👀 | **Watch mode** | Incrementally update the graph as files change | -| 🤖 | **MCP server** | 17-tool MCP server for AI assistants; single-repo by default, opt-in multi-repo | +| 🤖 | **MCP server** | 18-tool MCP server for AI assistants; single-repo by default, opt-in multi-repo | | ⚡ | **Always fresh** | Three-tier incremental detection — sub-second rebuilds even on large codebases | ## 📦 Commands @@ -189,6 +192,9 @@ codegraph map -n 50 --no-tests # Top 50, excluding test files codegraph where # Where is a symbol defined and used? codegraph where --file src/db.js # List symbols, imports, exports for a file codegraph stats # Graph health: nodes, edges, languages, quality score +codegraph roles # Node role classification (entry, core, utility, adapter, dead, leaf) +codegraph roles --role dead -T # Find dead code (unreferenced, non-exported symbols) +codegraph roles --role core --file src/ # Core symbols in src/ ``` ### Deep Context (AI-Optimized) @@ -402,7 +408,7 @@ Optional: `@huggingface/transformers` (semantic search), `@modelcontextprotocol/ ### MCP Server -Codegraph includes a built-in [Model Context Protocol](https://modelcontextprotocol.io/) server with 17 tools, so AI assistants can query your dependency graph directly: +Codegraph includes a built-in [Model Context Protocol](https://modelcontextprotocol.io/) server with 18 tools, so AI assistants can query your dependency graph directly: ```bash codegraph mcp # Single-repo mode (default) — only local project @@ -589,6 +595,7 @@ const { results: fused } = await multiSearchData( | Incremental rebuilds | **O(changed)** | — | O(n) Merkle | — | — | — | | MCP / AI agent support | **Yes** | — | **Yes** | **Yes** | **Yes** | **Yes** | | Git diff impact | **Yes** | — | — | — | — | **Yes** | +| Dead code / role classification | **Yes** | — | **Yes** | — | — | — | | Semantic search | **Yes** | — | **Yes** | **Yes** | — | **Yes** | | Watch mode | **Yes** | — | **Yes** | — | — | — | | Zero config, no Docker/JVM | **Yes** | — | **Yes** | — | — | — | @@ -606,7 +613,7 @@ See **[ROADMAP.md](ROADMAP.md)** for the full development roadmap and **[STABILI 5. **Natural Language Queries** — `codegraph ask` command, conversational sessions 6. **Expanded Language Support** — 8 new languages (12 → 20) 7. **GitHub Integration & CI** — reusable GitHub Action, PR review, SARIF output -8. **Visualization & Advanced** — web UI, dead code detection, monorepo support, agentic search +8. **Visualization & Advanced** — web UI, monorepo support, agentic search ## 🤝 Contributing diff --git a/generated/COMPETITIVE_ANALYSIS.md b/generated/COMPETITIVE_ANALYSIS.md index ca1c2340..464df995 100644 --- a/generated/COMPETITIVE_ANALYSIS.md +++ b/generated/COMPETITIVE_ANALYSIS.md @@ -1,6 +1,6 @@ # Competitive Analysis — Code Graph / Code Intelligence Tools -**Date:** 2026-02-22 +**Date:** 2026-02-25 **Scope:** 136+ code analysis tools evaluated, 81+ ranked against `@optave/codegraph` --- @@ -20,15 +20,15 @@ Ranked by weighted score across 6 dimensions (each 1–5): | 5 | 4.2 | [seatedro/glimpse](https://github.com/seatedro/glimpse) | 349 | Rust | MIT | Clipboard-first codebase-to-LLM tool with call graphs, token counting, LSP resolution | | 6 | 4.0 | [SimplyLiz/CodeMCP (CKB)](https://github.com/SimplyLiz/CodeMCP) | 59 | Go | Custom | SCIP-based indexing, compound operations (83% token savings), CODEOWNERS, secret scanning | | 7 | 4.0 | [abhigyanpatwari/GitNexus](https://github.com/abhigyanpatwari/GitNexus) | — | TS/JS | PolyForm NC | Knowledge graph with precomputed structural intelligence, 7 MCP tools, hybrid BM25+semantic search, clustering, process tracing, KuzuDB. **Non-commercial only** | -| 8 | 3.9 | [harshkedia177/axon](https://github.com/harshkedia177/axon) | 29 | Python | None | 11-phase pipeline, KuzuDB, Leiden community detection, dead code, change coupling | -| 9 | 3.8 | [anrgct/autodev-codebase](https://github.com/anrgct/autodev-codebase) | 111 | TypeScript | None | 40+ languages, 7 embedding providers, Cytoscape.js visualization, LLM reranking | -| 10 | 3.8 | [ShiftLeftSecurity/codepropertygraph](https://github.com/ShiftLeftSecurity/codepropertygraph) | 564 | Scala | Apache-2.0 | CPG specification + Tinkergraph library, Scala query DSL, protobuf serialization (Joern foundation) | -| 11 | 3.8 | [Jakedismo/codegraph-rust](https://github.com/Jakedismo/codegraph-rust) | 142 | Rust | None | 100% Rust GraphRAG, SurrealDB, LSP-powered dataflow analysis, architecture boundary enforcement | -| 12 | 3.7 | [Anandb71/arbor](https://github.com/Anandb71/arbor) | 85 | Rust | MIT | Native GUI, confidence scoring, architectural role classification, fuzzy search, MCP | -| 13 | 3.7 | [JudiniLabs/mcp-code-graph](https://github.com/JudiniLabs/mcp-code-graph) | 380 | JavaScript | MIT | Cloud-hosted MCP server by CodeGPT, semantic search, dependency links (requires account) | -| 14 | 3.7 | [entrepeneur4lyf/code-graph-mcp](https://github.com/entrepeneur4lyf/code-graph-mcp) | 80 | Python | MIT | ast-grep for 25+ languages, complexity metrics, code smells, circular dependency detection | -| 15 | 3.7 | [cs-au-dk/jelly](https://github.com/cs-au-dk/jelly) | 417 | TypeScript | BSD-3 | Academic-grade JS/TS points-to analysis, call graphs, vulnerability exposure, 5 published papers | -| **16** | **3.8** | **[@optave/codegraph](https://github.com/optave/codegraph)** | — | **JS/Rust** | **Apache-2.0** | **Sub-second incremental rebuilds, dual engine (native Rust + WASM), 11 languages, 17-tool MCP, qualified call resolution, `context`/`explain`/`where` AI-optimized commands, structure/hotspot analysis, zero-cost core + optional LLM enhancement** | +| **8** | **4.0** | **[@optave/codegraph](https://github.com/optave/codegraph)** | — | **JS/Rust** | **Apache-2.0** | **Sub-second incremental rebuilds, dual engine (native Rust + WASM), 11 languages, 18-tool MCP, qualified call resolution, `context`/`explain`/`where` AI-optimized commands, structure/hotspot analysis, node role classification (entry/core/utility/adapter/dead/leaf), dead code detection, zero-cost core + optional LLM enhancement** | +| 9 | 3.9 | [harshkedia177/axon](https://github.com/harshkedia177/axon) | 29 | Python | None | 11-phase pipeline, KuzuDB, Leiden community detection, dead code, change coupling | +| 10 | 3.8 | [anrgct/autodev-codebase](https://github.com/anrgct/autodev-codebase) | 111 | TypeScript | None | 40+ languages, 7 embedding providers, Cytoscape.js visualization, LLM reranking | +| 11 | 3.8 | [ShiftLeftSecurity/codepropertygraph](https://github.com/ShiftLeftSecurity/codepropertygraph) | 564 | Scala | Apache-2.0 | CPG specification + Tinkergraph library, Scala query DSL, protobuf serialization (Joern foundation) | +| 12 | 3.8 | [Jakedismo/codegraph-rust](https://github.com/Jakedismo/codegraph-rust) | 142 | Rust | None | 100% Rust GraphRAG, SurrealDB, LSP-powered dataflow analysis, architecture boundary enforcement | +| 13 | 3.7 | [Anandb71/arbor](https://github.com/Anandb71/arbor) | 85 | Rust | MIT | Native GUI, confidence scoring, architectural role classification, fuzzy search, MCP | +| 14 | 3.7 | [JudiniLabs/mcp-code-graph](https://github.com/JudiniLabs/mcp-code-graph) | 380 | JavaScript | MIT | Cloud-hosted MCP server by CodeGPT, semantic search, dependency links (requires account) | +| 15 | 3.7 | [entrepeneur4lyf/code-graph-mcp](https://github.com/entrepeneur4lyf/code-graph-mcp) | 80 | Python | MIT | ast-grep for 25+ languages, complexity metrics, code smells, circular dependency detection | +| 16 | 3.7 | [cs-au-dk/jelly](https://github.com/cs-au-dk/jelly) | 417 | TypeScript | BSD-3 | Academic-grade JS/TS points-to analysis, call graphs, vulnerability exposure, 5 published papers | | 17 | 3.5 | [er77/code-graph-rag-mcp](https://github.com/er77/code-graph-rag-mcp) | 89 | TypeScript | MIT | 26 MCP methods, 11 languages, tree-sitter, semantic search, hotspot analysis, clone detection | | 18 | 3.5 | [MikeRecognex/mcp-codebase-index](https://github.com/MikeRecognex/mcp-codebase-index) | 25 | Python | AGPL-3.0 | 18 MCP tools, zero runtime deps, auto-incremental reindexing via git diff | | 19 | 3.5 | [nahisaho/CodeGraphMCPServer](https://github.com/nahisaho/CodeGraphMCPServer) | 7 | Python | MIT | GraphRAG with Louvain community detection, 16 languages, 14 MCP tools, 334 tests | @@ -136,15 +136,15 @@ Ranked by weighted score across 6 dimensions (each 1–5): | 5 | glimpse | 4 | 4 | 5 | 3 | 5 | 5 | | 6 | CKB | 5 | 5 | 4 | 3 | 4 | 3 | | 7 | GitNexus | 5 | 5 | 4 | 4 | 4 | 2 | -| 8 | axon | 5 | 5 | 4 | 2 | 4 | 2 | -| 9 | autodev-codebase | 5 | 3 | 3 | 5 | 3 | 4 | -| 10 | codepropertygraph | 4 | 5 | 2 | 4 | 5 | 3 | -| 11 | codegraph-rust | 5 | 5 | 2 | 4 | 4 | 3 | -| 12 | arbor | 4 | 4 | 5 | 4 | 5 | 3 | -| 13 | mcp-code-graph | 4 | 3 | 4 | 4 | 3 | 4 | -| 14 | code-graph-mcp | 4 | 4 | 4 | 5 | 3 | 2 | -| 15 | jelly | 4 | 5 | 4 | 1 | 5 | 3 | -| **16** | **codegraph (us)** | **4** | **4** | **5** | **4** | **4** | **2** | +| **8** | **codegraph (us)** | **5** | **4** | **5** | **4** | **4** | **2** | +| 9 | axon | 5 | 5 | 4 | 2 | 4 | 2 | +| 10 | autodev-codebase | 5 | 3 | 3 | 5 | 3 | 4 | +| 11 | codepropertygraph | 4 | 5 | 2 | 4 | 5 | 3 | +| 12 | codegraph-rust | 5 | 5 | 2 | 4 | 4 | 3 | +| 13 | arbor | 4 | 4 | 5 | 4 | 5 | 3 | +| 14 | mcp-code-graph | 4 | 3 | 4 | 4 | 3 | 4 | +| 15 | code-graph-mcp | 4 | 4 | 4 | 5 | 3 | 2 | +| 16 | jelly | 4 | 5 | 4 | 1 | 5 | 3 | | 17 | code-graph-rag-mcp | 5 | 4 | 3 | 4 | 3 | 2 | | 18 | mcp-codebase-index | 4 | 3 | 5 | 3 | 4 | 2 | | 19 | CodeGraphMCPServer | 4 | 4 | 4 | 5 | 3 | 1 | @@ -186,10 +186,11 @@ Ranked by weighted score across 6 dimensions (each 1–5): | **Zero-cost core, LLM-enhanced when you choose** | The full graph pipeline (parse, resolve, query, impact analysis) runs with no API keys, no cloud, no cost. LLM features (richer embeddings, semantic search) are an optional layer on top — using whichever provider the user already works with. Competitors either require cloud APIs for core features (code-graph-rag, autodev-codebase, mcp-code-graph) or offer no AI enhancement at all (CKB, axon). Nobody else offers both modes in one tool | | **Data goes only where you send it** | Your code reaches exactly one place: the AI agent you already chose (via MCP). No additional third-party services, no surprise cloud calls. Competitors like code-graph-rag, autodev-codebase, mcp-code-graph, and Claude-code-memory send your code to additional AI providers beyond the agent you're using | | **Dual engine architecture** | Only project with native Rust (napi-rs) + automatic WASM fallback. Others are pure Rust (narsil-mcp, codegraph-rust) OR pure JS/Python — never both | -| **Standalone CLI + MCP** | Full CLI experience (`context`, `explain`, `where`, `fn`, `diff-impact`, `map`, `deps`, `search`, `structure`, `hotspots`) alongside 17-tool MCP server. Many competitors are MCP-only (narsil-mcp, code-graph-mcp, CodeGraphMCPServer) with no standalone query interface | +| **Standalone CLI + MCP** | Full CLI experience (`context`, `explain`, `where`, `fn`, `diff-impact`, `map`, `deps`, `search`, `structure`, `hotspots`, `roles`) alongside 18-tool MCP server. Many competitors are MCP-only (narsil-mcp, code-graph-mcp, CodeGraphMCPServer) with no standalone query interface | | **Single-repo MCP isolation** | Security-conscious default: tools have no `repo` property unless `--multi-repo` is explicitly enabled. Most competitors default to exposing everything | | **Zero-dependency deployment** | `npm install` and done. No Docker, no external databases, no Python, no SCIP toolchains, no JVM. Published platform-specific binaries (`@optave/codegraph-{platform}-{arch}`) resolve automatically. Joern requires JDK 21, cpg requires Gradle + language-specific deps, codegraph-rust requires SurrealDB + LSP servers | | **Structure & quality analysis** | `structure` shows directory cohesion scores, `hotspots` finds files with extreme fan-in/fan-out/density, `stats` includes a graph quality score (0-100) with false-positive warnings. These give agents architectural awareness without requiring external tools | +| **Node role classification** | Every symbol is auto-tagged as `entry`/`core`/`utility`/`adapter`/`dead`/`leaf` based on fan-in/fan-out patterns with adaptive median thresholds. Agents instantly know a function's architectural role without reading surrounding code. Inspired by arbor's role classification — but we compute roles automatically during graph build rather than requiring manual tagging, and we surface roles across all query commands (`where`, `explain`, `context`, `stats`, `list-functions`). Dead code detection comes free as a byproduct | | **Callback pattern extraction** | Extracts symbols from Commander `.command().action()` (as `command:build`), Express route handlers (as `route:GET /api/users`), and event emitter listeners (as `event:data`). No competitor extracts symbols from framework callback patterns | --- @@ -207,7 +208,7 @@ Ranked by weighted score across 6 dimensions (each 1–5): - **Feature breadth**: 90 MCP tools vs our 17; covers taint analysis, SBOM, license compliance, control flow graphs, data flow analysis - **Language count**: 32 languages (including Verilog, Fortran, PowerShell, Nix) vs our 11 - **Security analysis**: vulnerability scanning with OWASP/CWE coverage — we have no security features -- **Dead code detection**: built-in — we lack this +- **Dead code detection**: built-in — *(Gap closed: our `roles --role dead` now surfaces unreferenced non-exported symbols)* - **Single-binary deployment**: ~30MB Rust binary via brew/scoop/cargo/npm — as easy as ours ### vs code-graph-rag (#3, 1,916 stars) @@ -240,23 +241,24 @@ Ranked by weighted score across 6 dimensions (each 1–5): - **Auto-generated context files**: LLM-powered wiki and AGENTS.md/CLAUDE.md generation from the knowledge graph - **Tradeoff**: Full pipeline re-run on changes (no incremental builds), KuzuDB graph DB (heavier than SQLite), browser mode limited to ~5,000 files -### vs axon (#8, 29 stars) -- **Analysis depth**: their 11-phase pipeline includes community detection (Leiden), execution flow tracing, git change coupling, dead code detection — all features we lack +### vs axon (#9, 29 stars) +- **Analysis depth**: their 11-phase pipeline includes community detection (Leiden), execution flow tracing, git change coupling, dead code detection — *(Gap narrowed: we now have dead code detection via node role classification)* - **Graph database**: KuzuDB with native Cypher is more expressive for complex graph queries than our SQLite - **Branch structural diff**: compares code structure between branches using git worktrees -### vs codegraph-rust (#11, 142 stars) +### vs codegraph-rust (#12, 142 stars) - **LSP-powered analysis**: compiler-grade cross-file references via rust-analyzer, pyright, gopls vs our tree-sitter heuristics - **Dataflow edges**: defines/uses/flows_to/returns/mutates relationships we don't capture - **Architecture boundary enforcement**: configurable rules for detecting violations — we have no architectural awareness - **Tiered indexing**: fast/balanced/full modes for different use cases — we have one mode -### vs jelly (#15, 417 stars) +### vs jelly (#16, 417 stars) - **Points-to analysis**: flow-insensitive analysis with access paths for JS/TS — fundamentally more precise than our tree-sitter-based call resolution - **Academic rigor**: 5 published papers backing the methodology (Aarhus University) - **Vulnerability exposure analysis**: library usage pattern matching specific to the JS/TS ecosystem ### vs colbymchenry/codegraph (#20, 165 stars) +- **No role classification**: they lack node role classification or dead code detection — we now have both - **Naming competitor**: same name, same tech stack (tree-sitter + SQLite + MCP + Node.js) — marketplace confusion risk - **Published benchmarks**: 67% fewer tool calls and measurable Claude Code token reduction — compelling marketing angle we lack. *(Gap narrowed: our `context` and `explain` compound commands now provide similar token savings by batching multiple queries into one call)* - **One-liner setup**: `npx @colbymchenry/codegraph` with interactive installer auto-configures Claude Code @@ -268,7 +270,7 @@ Ranked by weighted score across 6 dimensions (each 1–5): ### Tier 1: High impact, low effort | Feature | Inspired by | Why | Status | |---------|------------|-----|--------| -| **Dead code detection** | narsil-mcp, axon, codexray, CKB | We have the graph — find nodes with zero incoming edges (minus entry points/exports). Agents constantly ask "is this used?" | TODO | +| ~~**Dead code detection**~~ | narsil-mcp, axon, codexray, CKB | ~~We have the graph — find nodes with zero incoming edges (minus entry points/exports). Agents constantly ask "is this used?"~~ | **DONE** — Delivered via node classification. `roles --role dead` lists all unreferenced, non-exported symbols | | ~~**Fuzzy symbol search**~~ | arbor | ~~Add Levenshtein/Jaro-Winkler to `fn` command. Currently requires exact match~~ | **DONE** — `fn` now has relevance scoring (exact > prefix > word-boundary > substring) with fan-in tiebreaker, plus `--file` and `--kind` filters | | ~~**Expose confidence scores**~~ | arbor | ~~Already computed internally in import resolution — just surface them~~ | **DONE** — confidence scores stored on every call edge, surfaced in `stats` graph quality score | | **Shortest path A→B** | codexray, arbor | BFS on existing edges table. We have `fn` for single chains but no A→B pathfinding | TODO | @@ -279,7 +281,7 @@ Ranked by weighted score across 6 dimensions (each 1–5): | **Optional LLM provider integration** | code-graph-rag, autodev-codebase | Bring-your-own provider (OpenAI, etc.) for richer embeddings and AI-powered search. Enhancement layer only — core graph never depends on it. No other tool offers both zero-cost local and LLM-enhanced modes in one package | TODO | | ~~**Compound MCP tools**~~ | CKB, colbymchenry/codegraph | ~~`explore`/`understand` meta-tools that batch deps + fn + map into single responses~~ | **DONE** — `context` returns source + deps + callers + signature + tests in one call; `explain` returns structural summaries of files or functions | | **Token counting on responses** | glimpse, arbor | tiktoken-based counts so agents know context budget consumed | TODO | -| **Node classification** | arbor | Auto-tag Entry Point / Core / Utility / Adapter from in-degree/out-degree patterns | TODO | +| ~~**Node classification**~~ | arbor | ~~Auto-tag Entry Point / Core / Utility / Adapter from in-degree/out-degree patterns~~ | **DONE** — `classifyNodeRoles()` tags every symbol as `entry`/`core`/`utility`/`adapter`/`dead`/`leaf`. New `roles` CLI command, `node_roles` MCP tool (18 tools), `--role`/`--file` filters. Roles surfaced in `where`/`explain`/`context`/`stats`/`list-functions` | | **TF-IDF lightweight search** | codexray | SQLite FTS5 + TF-IDF as a middle tier (~50MB) between "no search" and full transformers (~500MB) | TODO | | **OWASP/CWE pattern detection** | narsil-mcp, CKB | Security pattern scanning on the existing AST — hardcoded secrets, SQL injection patterns, XSS | TODO | | **Formal code health metrics** | code-health-meter | Cyclomatic complexity, Maintainability Index, Halstead metrics per function — we already parse the AST | TODO | diff --git a/roadmap/BACKLOG.md b/roadmap/BACKLOG.md index 9aa9c371..5084b8d9 100644 --- a/roadmap/BACKLOG.md +++ b/roadmap/BACKLOG.md @@ -1,6 +1,6 @@ # Codegraph Feature Backlog -**Last updated:** 2026-02-23 +**Last updated:** 2026-02-25 **Source:** Features derived from [COMPETITIVE_ANALYSIS.md](../generated/COMPETITIVE_ANALYSIS.md) and internal roadmap discussions. --- @@ -26,9 +26,9 @@ Non-breaking, ordered by problem-fit: | ID | Title | Description | Category | Benefit | Zero-dep | Foundation-aligned | Problem-fit (1-5) | Breaking | |----|-------|-------------|----------|---------|----------|-------------------|-------------------|----------| -| 4 | Node classification | Auto-tag symbols as Entry Point / Core / Utility / Adapter based on in-degree/out-degree patterns. High fan-in + low fan-out = Core. Zero fan-in + non-export = Dead. Inspired by arbor. | Intelligence | Agents immediately understand architectural role of any symbol without reading surrounding code — fewer orientation tokens | ✓ | ✓ | 5 | No | +| 4 | ~~Node classification~~ | ~~Auto-tag symbols as Entry Point / Core / Utility / Adapter based on in-degree/out-degree patterns. High fan-in + low fan-out = Core. Zero fan-in + non-export = Dead. Inspired by arbor.~~ | Intelligence | ~~Agents immediately understand architectural role of any symbol without reading surrounding code — fewer orientation tokens~~ | ✓ | ✓ | 5 | No | **DONE** — `classifyNodeRoles()` in `structure.js` auto-tags every symbol as `entry`/`core`/`utility`/`adapter`/`dead`/`leaf` using median-based fan-in/fan-out thresholds. Roles stored in DB (`role` column, migration v5), surfaced in `where`/`explain`/`context`/`stats`/`list-functions`, new `roles` CLI command, new `node_roles` MCP tool (18 tools total). Includes `--role` and `--file` filters. | | 9 | Git change coupling | Analyze git history for files/functions that always change together. Surfaces hidden dependencies that the static graph can't see. Enhances `diff-impact` with historical co-change data. Inspired by axon. | Analysis | `diff-impact` catches more breakage by including historically coupled files; agents get a more complete blast radius picture | ✓ | ✓ | 5 | No | -| 1 | Dead code detection | Find symbols with zero incoming edges (excluding entry points and exports). Agents constantly ask "is this used?" — the graph already has the data, we just need to surface it. Inspired by narsil-mcp, axon, codexray, CKB. | Analysis | Agents stop wasting tokens investigating dead code; developers get actionable cleanup lists without external tools | ✓ | ✓ | 4 | No | +| 1 | ~~Dead code detection~~ | ~~Find symbols with zero incoming edges (excluding entry points and exports). Agents constantly ask "is this used?" — the graph already has the data, we just need to surface it. Inspired by narsil-mcp, axon, codexray, CKB.~~ | Analysis | ~~Agents stop wasting tokens investigating dead code; developers get actionable cleanup lists without external tools~~ | ✓ | ✓ | 4 | No | **DONE** — Delivered as part of node classification (ID 4). `codegraph roles --role dead -T` lists all symbols with zero fan-in that aren't exported. | | 2 | Shortest path A→B | BFS/Dijkstra on the existing edges table to find how symbol A reaches symbol B. We have `fn` for single-node chains but no A→B pathfinding. Inspired by codexray, arbor. | Navigation | Agents can answer "how does this function reach that one?" in one call instead of manually tracing chains | ✓ | ✓ | 4 | No | | 12 | Execution flow tracing | Framework-aware entry point detection (Express routes, CLI commands, event handlers) + BFS flow tracing from entry to leaf. Inspired by axon, GitNexus, code-context-mcp. | Navigation | Agents can answer "what happens when a user hits POST /login?" by tracing the full execution path in one query | ✓ | ✓ | 4 | No | | 16 | Branch structural diff | Compare code structure between two branches using git worktrees. Show added/removed/changed symbols and their impact. Inspired by axon. | Analysis | Teams can review structural impact of feature branches before merge; agents get branch-aware context | ✓ | ✓ | 4 | No | diff --git a/src/builder.js b/src/builder.js index ce9f6633..0feafc0a 100644 --- a/src/builder.js +++ b/src/builder.js @@ -892,6 +892,17 @@ export async function buildGraph(rootDir, opts = {}) { debug(`Structure analysis failed: ${err.message}`); } + // Classify node roles (entry, core, utility, adapter, dead, leaf) + try { + const { classifyNodeRoles } = await import('./structure.js'); + const roleSummary = classifyNodeRoles(db); + debug( + `Roles: ${Object.entries(roleSummary).map(([r, c]) => `${r}=${c}`).join(', ')}`, + ); + } catch (err) { + debug(`Role classification failed: ${err.message}`); + } + const nodeCount = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c; info(`Graph built: ${nodeCount} nodes, ${edgeCount} edges`); info(`Stored in ${dbPath}`); diff --git a/src/cli.js b/src/cli.js index ccf8fbc4..963972d2 100644 --- a/src/cli.js +++ b/src/cli.js @@ -21,7 +21,9 @@ import { impactAnalysis, moduleMap, queryName, + roles, stats, + VALID_ROLES, where, } from './queries.js'; import { @@ -529,6 +531,28 @@ program } }); +program + .command('roles') + .description('Show node role classification: entry, core, utility, adapter, dead, leaf') + .option('-d, --db ', 'Path to graph.db') + .option('--role ', `Filter by role (${VALID_ROLES.join(', ')})`) + .option('-f, --file ', 'Scope to a specific file (partial match)') + .option('-T, --no-tests', 'Exclude test/spec files') + .option('--include-tests', 'Include test/spec files (overrides excludeTests config)') + .option('-j, --json', 'Output as JSON') + .action((opts) => { + if (opts.role && !VALID_ROLES.includes(opts.role)) { + console.error(`Invalid role "${opts.role}". Valid roles: ${VALID_ROLES.join(', ')}`); + process.exit(1); + } + roles(opts.db, { + role: opts.role, + file: opts.file, + noTests: resolveNoTests(opts), + json: opts.json, + }); + }); + program .command('watch [dir]') .description('Watch project for file changes and incrementally update the graph') diff --git a/src/db.js b/src/db.js index cbabc93a..7d4b79fe 100644 --- a/src/db.js +++ b/src/db.js @@ -115,6 +115,16 @@ export function initSchema(db) { } catch { /* already exists */ } + try { + db.exec('ALTER TABLE nodes ADD COLUMN role TEXT'); + } catch { + /* already exists */ + } + try { + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_role ON nodes(role)'); + } catch { + /* already exists */ + } } export function findDbPath(customPath) { diff --git a/src/index.js b/src/index.js index 7435b8a6..9da90b5e 100644 --- a/src/index.js +++ b/src/index.js @@ -53,7 +53,9 @@ export { impactAnalysisData, moduleMapData, queryNameData, + rolesData, statsData, + VALID_ROLES, whereData, } from './queries.js'; // Registry (multi-repo) @@ -70,6 +72,7 @@ export { // Structure analysis export { buildStructure, + classifyNodeRoles, formatHotspots, formatModuleBoundaries, formatStructure, diff --git a/src/mcp.js b/src/mcp.js index 83ab1f90..2daeeb84 100644 --- a/src/mcp.js +++ b/src/mcp.js @@ -8,7 +8,7 @@ import { createRequire } from 'node:module'; import { findCycles } from './cycles.js'; import { findDbPath } from './db.js'; -import { ALL_SYMBOL_KINDS, diffImpactMermaid } from './queries.js'; +import { ALL_SYMBOL_KINDS, diffImpactMermaid, VALID_ROLES } from './queries.js'; const REPO_PROP = { repo: { @@ -273,6 +273,23 @@ const BASE_TOOLS = [ }, }, }, + { + name: 'node_roles', + description: + 'Show node role classification (entry, core, utility, adapter, dead, leaf) based on connectivity patterns', + inputSchema: { + type: 'object', + properties: { + role: { + type: 'string', + enum: VALID_ROLES, + description: 'Filter to a specific role', + }, + file: { type: 'string', description: 'Scope to a specific file (partial match)' }, + no_tests: { type: 'boolean', description: 'Exclude test files', default: false }, + }, + }, + }, { name: 'hotspots', description: @@ -372,6 +389,7 @@ export async function startMCPServer(customDbPath, options = {}) { whereData, diffImpactData, listFunctionsData, + rolesData, } = await import('./queries.js'); const require = createRequire(import.meta.url); @@ -540,6 +558,13 @@ export async function startMCPServer(customDbPath, options = {}) { noTests: args.no_tests, }); break; + case 'node_roles': + result = rolesData(dbPath, { + role: args.role, + file: args.file, + noTests: args.no_tests, + }); + break; case 'structure': { const { structureData } = await import('./structure.js'); result = structureData(dbPath, { diff --git a/src/queries.js b/src/queries.js index 91bb0c4f..3e1eac62 100644 --- a/src/queries.js +++ b/src/queries.js @@ -67,6 +67,8 @@ export const ALL_SYMBOL_KINDS = [ 'module', ]; +export const VALID_ROLES = ['entry', 'core', 'utility', 'adapter', 'dead', 'leaf']; + /** * Get all ancestor class names for a given class using extends edges. */ @@ -876,7 +878,7 @@ export function listFunctionsData(customDbPath, opts = {}) { let rows = db .prepare( - `SELECT name, kind, file, line FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY file, line`, + `SELECT name, kind, file, line, role FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY file, line`, ) .all(...params); @@ -1077,6 +1079,24 @@ export function statsData(customDbPath, opts = {}) { falsePositiveWarnings, }; + // Role distribution + let roleRows; + if (noTests) { + const allRoleNodes = db + .prepare('SELECT role, file FROM nodes WHERE role IS NOT NULL') + .all(); + const filtered = allRoleNodes.filter((n) => !isTestFile(n.file)); + const counts = {}; + for (const n of filtered) counts[n.role] = (counts[n.role] || 0) + 1; + roleRows = Object.entries(counts).map(([role, c]) => ({ role, c })); + } else { + roleRows = db + .prepare('SELECT role, COUNT(*) as c FROM nodes WHERE role IS NOT NULL GROUP BY role') + .all(); + } + const roles = {}; + for (const r of roleRows) roles[r.role] = r.c; + db.close(); return { nodes: { total: totalNodes, byKind: nodesByKind }, @@ -1086,6 +1106,7 @@ export function statsData(customDbPath, opts = {}) { hotspots, embeddings, quality, + roles, }; } @@ -1182,6 +1203,22 @@ export function stats(customDbPath, opts = {}) { } } + // Roles + if (data.roles && Object.keys(data.roles).length > 0) { + const total = Object.values(data.roles).reduce((a, b) => a + b, 0); + console.log(`\nRoles: ${total} classified symbols`); + const roleParts = Object.entries(data.roles) + .sort((a, b) => b[1] - a[1]) + .map(([k, v]) => `${k} ${v}`); + for (let i = 0; i < roleParts.length; i += 3) { + const row = roleParts + .slice(i, i + 3) + .map((p) => p.padEnd(18)) + .join(''); + console.log(` ${row}`); + } + } + console.log(); } @@ -1649,6 +1686,7 @@ export function contextData(name, customDbPath, opts = {}) { kind: node.kind, file: node.file, line: node.line, + role: node.role || null, endLine: node.end_line || null, source, signature, @@ -1675,7 +1713,8 @@ export function context(name, customDbPath, opts = {}) { for (const r of data.results) { const lineRange = r.endLine ? `${r.line}-${r.endLine}` : `${r.line}`; - console.log(`\n# ${r.name} (${r.kind}) — ${r.file}:${lineRange}\n`); + const roleTag = r.role ? ` [${r.role}]` : ''; + console.log(`\n# ${r.name} (${r.kind})${roleTag} — ${r.file}:${lineRange}\n`); // Signature if (r.signature) { @@ -1787,6 +1826,7 @@ function explainFileImpl(db, target, getFileLines) { name: s.name, kind: s.kind, line: s.line, + role: s.role || null, summary: fileLines ? extractSummary(fileLines, s.line) : null, signature: fileLines ? extractSignature(fileLines, s.line) : null, }); @@ -1907,6 +1947,7 @@ function explainFunctionImpl(db, target, noTests, getFileLines) { kind: node.kind, file: node.file, line: node.line, + role: node.role || null, endLine: node.end_line || null, lineCount, summary, @@ -2018,8 +2059,9 @@ export function explain(target, customDbPath, opts = {}) { console.log(`\n## Exported`); for (const s of r.publicApi) { const sig = s.signature?.params != null ? `(${s.signature.params})` : ''; + const roleTag = s.role ? ` [${s.role}]` : ''; const summary = s.summary ? ` -- ${s.summary}` : ''; - console.log(` ${kindIcon(s.kind)} ${s.name}${sig} :${s.line}${summary}`); + console.log(` ${kindIcon(s.kind)} ${s.name}${sig}${roleTag} :${s.line}${summary}`); } } @@ -2027,8 +2069,9 @@ export function explain(target, customDbPath, opts = {}) { console.log(`\n## Internal`); for (const s of r.internal) { const sig = s.signature?.params != null ? `(${s.signature.params})` : ''; + const roleTag = s.role ? ` [${s.role}]` : ''; const summary = s.summary ? ` -- ${s.summary}` : ''; - console.log(` ${kindIcon(s.kind)} ${s.name}${sig} :${s.line}${summary}`); + console.log(` ${kindIcon(s.kind)} ${s.name}${sig}${roleTag} :${s.line}${summary}`); } } @@ -2045,9 +2088,10 @@ export function explain(target, customDbPath, opts = {}) { const lineRange = r.endLine ? `${r.line}-${r.endLine}` : `${r.line}`; const lineInfo = r.lineCount ? `${r.lineCount} lines` : ''; const summaryPart = r.summary ? ` | ${r.summary}` : ''; + const roleTag = r.role ? ` [${r.role}]` : ''; const depthLevel = r._depth || 0; const heading = depthLevel === 0 ? '#' : '##'.padEnd(depthLevel + 2, '#'); - console.log(`\n${indent}${heading} ${r.name} (${r.kind}) ${r.file}:${lineRange}`); + console.log(`\n${indent}${heading} ${r.name} (${r.kind})${roleTag} ${r.file}:${lineRange}`); if (lineInfo || r.summary) { console.log(`${indent} ${lineInfo}${summaryPart}`); } @@ -2134,6 +2178,7 @@ function whereSymbolImpl(db, target, noTests) { kind: node.kind, file: node.file, line: node.line, + role: node.role || null, exported, uses: uses.map((u) => ({ name: u.name, file: u.file, line: u.line })), }; @@ -2220,8 +2265,9 @@ export function where(target, customDbPath, opts = {}) { if (data.mode === 'symbol') { for (const r of data.results) { + const roleTag = r.role ? ` [${r.role}]` : ''; const tag = r.exported ? ' (exported)' : ''; - console.log(`\n${kindIcon(r.kind)} ${r.name} ${r.file}:${r.line}${tag}`); + console.log(`\n${kindIcon(r.kind)} ${r.name}${roleTag} ${r.file}:${r.line}${tag}`); if (r.uses.length > 0) { const useStrs = r.uses.map((u) => `${u.file}:${u.line}`); console.log(` Used in: ${useStrs.join(', ')}`); @@ -2250,6 +2296,81 @@ export function where(target, customDbPath, opts = {}) { console.log(); } +// ─── rolesData ────────────────────────────────────────────────────────── + +export function rolesData(customDbPath, opts = {}) { + const db = openReadonlyOrFail(customDbPath); + const noTests = opts.noTests || false; + const filterRole = opts.role || null; + const filterFile = opts.file || null; + + const conditions = ['role IS NOT NULL']; + const params = []; + + if (filterRole) { + conditions.push('role = ?'); + params.push(filterRole); + } + if (filterFile) { + conditions.push('file LIKE ?'); + params.push(`%${filterFile}%`); + } + + let rows = db + .prepare( + `SELECT name, kind, file, line, role FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY role, file, line`, + ) + .all(...params); + + if (noTests) rows = rows.filter((r) => !isTestFile(r.file)); + + const summary = {}; + for (const r of rows) { + summary[r.role] = (summary[r.role] || 0) + 1; + } + + db.close(); + return { count: rows.length, summary, symbols: rows }; +} + +export function roles(customDbPath, opts = {}) { + const data = rolesData(customDbPath, opts); + if (opts.json) { + console.log(JSON.stringify(data, null, 2)); + return; + } + + if (data.count === 0) { + console.log('No classified symbols found. Run "codegraph build" first.'); + return; + } + + const total = data.count; + console.log(`\nNode roles (${total} symbols):\n`); + + const summaryParts = Object.entries(data.summary) + .sort((a, b) => b[1] - a[1]) + .map(([role, count]) => `${role}: ${count}`); + console.log(` ${summaryParts.join(' ')}\n`); + + const byRole = {}; + for (const s of data.symbols) { + if (!byRole[s.role]) byRole[s.role] = []; + byRole[s.role].push(s); + } + + for (const [role, symbols] of Object.entries(byRole)) { + console.log(`## ${role} (${symbols.length})`); + for (const s of symbols.slice(0, 30)) { + console.log(` ${kindIcon(s.kind)} ${s.name} ${s.file}:${s.line}`); + } + if (symbols.length > 30) { + console.log(` ... and ${symbols.length - 30} more`); + } + console.log(); + } +} + export function fnImpact(name, customDbPath, opts = {}) { const data = fnImpactData(name, customDbPath, opts); if (opts.json) { diff --git a/src/structure.js b/src/structure.js index ba348f37..e094e72a 100644 --- a/src/structure.js +++ b/src/structure.js @@ -224,6 +224,100 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director debug(`Structure: ${dirCount} directories, ${fileSymbols.size} files with metrics`); } +// ─── Node role classification ───────────────────────────────────────── + +function median(sorted) { + if (sorted.length === 0) return 0; + const mid = Math.floor(sorted.length / 2); + return sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid]; +} + +export function classifyNodeRoles(db) { + const rows = db + .prepare( + `SELECT n.id, n.kind, n.file, + COALESCE(fi.cnt, 0) AS fan_in, + COALESCE(fo.cnt, 0) AS fan_out + FROM nodes n + LEFT JOIN ( + SELECT target_id, COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY target_id + ) fi ON n.id = fi.target_id + LEFT JOIN ( + SELECT source_id, COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY source_id + ) fo ON n.id = fo.source_id + WHERE n.kind NOT IN ('file', 'directory')`, + ) + .all(); + + if (rows.length === 0) { + return { entry: 0, core: 0, utility: 0, adapter: 0, dead: 0, leaf: 0 }; + } + + const exportedIds = new Set( + db + .prepare( + `SELECT DISTINCT e.target_id + FROM edges e + JOIN nodes caller ON e.source_id = caller.id + JOIN nodes target ON e.target_id = target.id + WHERE e.kind = 'calls' AND caller.file != target.file`, + ) + .all() + .map((r) => r.target_id), + ); + + const nonZeroFanIn = rows + .filter((r) => r.fan_in > 0) + .map((r) => r.fan_in) + .sort((a, b) => a - b); + const nonZeroFanOut = rows + .filter((r) => r.fan_out > 0) + .map((r) => r.fan_out) + .sort((a, b) => a - b); + + const medFanIn = median(nonZeroFanIn); + const medFanOut = median(nonZeroFanOut); + + const updates = []; + const summary = { entry: 0, core: 0, utility: 0, adapter: 0, dead: 0, leaf: 0 }; + + for (const row of rows) { + const highIn = row.fan_in >= medFanIn && row.fan_in > 0; + const highOut = row.fan_out >= medFanOut && row.fan_out > 0; + const isExported = exportedIds.has(row.id); + + let role; + if (row.fan_in === 0 && !isExported) { + role = 'dead'; + } else if (row.fan_in === 0 && isExported) { + role = 'entry'; + } else if (highIn && !highOut) { + role = 'core'; + } else if (highIn && highOut) { + role = 'utility'; + } else if (!highIn && highOut) { + role = 'adapter'; + } else { + role = 'leaf'; + } + + updates.push({ id: row.id, role }); + summary[role]++; + } + + const clearRoles = db.prepare('UPDATE nodes SET role = NULL'); + const setRole = db.prepare('UPDATE nodes SET role = ? WHERE id = ?'); + + db.transaction(() => { + clearRoles.run(); + for (const u of updates) { + setRole.run(u.role, u.id); + } + })(); + + return summary; +} + // ─── Query functions (read-only) ────────────────────────────────────── /** diff --git a/tests/integration/roles.test.js b/tests/integration/roles.test.js new file mode 100644 index 00000000..6dbc5212 --- /dev/null +++ b/tests/integration/roles.test.js @@ -0,0 +1,212 @@ +/** + * Integration tests for node role classification. + * + * Uses the same fixture DB pattern as queries.test.js — a hand-crafted + * in-file DB with known nodes and edges — then exercises rolesData, + * statsData, whereData, explainData, and listFunctionsData to verify + * roles appear in all expected outputs. + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import Database from 'better-sqlite3'; +import { afterAll, beforeAll, describe, expect, test } from 'vitest'; +import { initSchema } from '../../src/db.js'; +import { + explainData, + listFunctionsData, + rolesData, + statsData, + whereData, +} from '../../src/queries.js'; +import { classifyNodeRoles } from '../../src/structure.js'; + +// ─── Helpers ─────────────────────────────────────────────────────────── + +function insertNode(db, name, kind, file, line) { + return db + .prepare('INSERT INTO nodes (name, kind, file, line) VALUES (?, ?, ?, ?)') + .run(name, kind, file, line).lastInsertRowid; +} + +function insertEdge(db, sourceId, targetId, kind, confidence = 1.0) { + db.prepare( + 'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, ?, 0)', + ).run(sourceId, targetId, kind, confidence); +} + +// ─── Fixture DB ──────────────────────────────────────────────────────── + +let tmpDir, dbPath; + +beforeAll(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-roles-')); + fs.mkdirSync(path.join(tmpDir, '.codegraph')); + dbPath = path.join(tmpDir, '.codegraph', 'graph.db'); + + const db = new Database(dbPath); + db.pragma('journal_mode = WAL'); + initSchema(db); + + // File nodes + const fApp = insertNode(db, 'app.js', 'file', 'app.js', 0); + const fLib = insertNode(db, 'lib.js', 'file', 'lib.js', 0); + const fTest = insertNode(db, 'app.test.js', 'file', 'app.test.js', 0); + + // Function nodes + const main = insertNode(db, 'main', 'function', 'app.js', 1); + const process_ = insertNode(db, 'processData', 'function', 'app.js', 10); + const helper = insertNode(db, 'helper', 'function', 'lib.js', 1); + const format = insertNode(db, 'format', 'function', 'lib.js', 10); + const unused = insertNode(db, 'unused', 'function', 'lib.js', 20); + const testFn = insertNode(db, 'testMain', 'function', 'app.test.js', 1); + + // Import edges + insertEdge(db, fApp, fLib, 'imports'); + insertEdge(db, fTest, fApp, 'imports'); + + // Call edges: + // main → processData (same file) + // main → helper (cross-file) → makes helper exported + // processData → format (cross-file) → makes format exported + // helper → format (same file) + // testFn → main (cross-file) → makes main exported + insertEdge(db, main, process_, 'calls'); + insertEdge(db, main, helper, 'calls'); + insertEdge(db, process_, format, 'calls'); + insertEdge(db, helper, format, 'calls'); + insertEdge(db, testFn, main, 'calls'); + + // unused has no callers and no cross-file callers → dead + + // Classify roles + classifyNodeRoles(db); + + db.close(); +}); + +afterAll(() => { + if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +// ─── rolesData ────────────────────────────────────────────────────────── + +describe('rolesData', () => { + test('returns all classified symbols with correct counts', () => { + const data = rolesData(dbPath); + expect(data.count).toBeGreaterThan(0); + expect(data.summary).toBeDefined(); + expect(Object.keys(data.summary).length).toBeGreaterThan(0); + // Every symbol should have a role + for (const s of data.symbols) { + expect(s.role).toBeTruthy(); + } + }); + + test('dead role includes unused function', () => { + const data = rolesData(dbPath, { role: 'dead' }); + const names = data.symbols.map((s) => s.name); + expect(names).toContain('unused'); + }); + + test('filters by role', () => { + const data = rolesData(dbPath, { role: 'dead' }); + for (const s of data.symbols) { + expect(s.role).toBe('dead'); + } + expect(data.summary.dead).toBe(data.count); + }); + + test('filters by file', () => { + const data = rolesData(dbPath, { file: 'lib.js' }); + for (const s of data.symbols) { + expect(s.file).toContain('lib.js'); + } + }); + + test('filters by noTests', () => { + const withTests = rolesData(dbPath); + const withoutTests = rolesData(dbPath, { noTests: true }); + expect(withoutTests.count).toBeLessThan(withTests.count); + for (const s of withoutTests.symbols) { + expect(s.file).not.toMatch(/\.test\./); + } + }); +}); + +// ─── statsData includes roles ─────────────────────────────────────────── + +describe('statsData with roles', () => { + test('includes roles distribution', () => { + const data = statsData(dbPath); + expect(data.roles).toBeDefined(); + expect(Object.keys(data.roles).length).toBeGreaterThan(0); + // Should have dead for the unused function + expect(data.roles.dead).toBeGreaterThanOrEqual(1); + }); + + test('roles distribution respects noTests filter', () => { + const withTests = statsData(dbPath); + const withoutTests = statsData(dbPath, { noTests: true }); + const totalWith = Object.values(withTests.roles).reduce((a, b) => a + b, 0); + const totalWithout = Object.values(withoutTests.roles).reduce((a, b) => a + b, 0); + expect(totalWithout).toBeLessThanOrEqual(totalWith); + }); +}); + +// ─── whereData includes role ──────────────────────────────────────────── + +describe('whereData with roles', () => { + test('includes role field in symbol results', () => { + const data = whereData('main', dbPath); + expect(data.results.length).toBeGreaterThan(0); + const mainResult = data.results.find((r) => r.name === 'main'); + expect(mainResult).toBeDefined(); + expect(mainResult).toHaveProperty('role'); + expect(mainResult.role).toBeTruthy(); + }); + + test('dead function has dead role', () => { + const data = whereData('unused', dbPath); + const unusedResult = data.results.find((r) => r.name === 'unused'); + expect(unusedResult).toBeDefined(); + expect(unusedResult.role).toBe('dead'); + }); +}); + +// ─── explainData includes role ────────────────────────────────────────── + +describe('explainData with roles', () => { + test('function explain includes role field', () => { + const data = explainData('main', dbPath); + expect(data.results.length).toBeGreaterThan(0); + const mainResult = data.results.find((r) => r.name === 'main'); + expect(mainResult).toBeDefined(); + expect(mainResult).toHaveProperty('role'); + }); + + test('file explain includes role in symbols', () => { + const data = explainData('lib.js', dbPath); + expect(data.results.length).toBeGreaterThan(0); + const fileResult = data.results[0]; + // Check publicApi and internal arrays for role field + const allSymbols = [...(fileResult.publicApi || []), ...(fileResult.internal || [])]; + expect(allSymbols.length).toBeGreaterThan(0); + for (const s of allSymbols) { + expect(s).toHaveProperty('role'); + } + }); +}); + +// ─── listFunctionsData includes role ──────────────────────────────────── + +describe('listFunctionsData with roles', () => { + test('includes role field in function listings', () => { + const data = listFunctionsData(dbPath); + expect(data.count).toBeGreaterThan(0); + // At least some should have roles + const withRoles = data.functions.filter((f) => f.role); + expect(withRoles.length).toBeGreaterThan(0); + }); +}); diff --git a/tests/unit/mcp.test.js b/tests/unit/mcp.test.js index 4199467c..0f3dd77b 100644 --- a/tests/unit/mcp.test.js +++ b/tests/unit/mcp.test.js @@ -25,6 +25,7 @@ const ALL_TOOL_NAMES = [ 'list_functions', 'structure', 'hotspots', + 'node_roles', 'list_repos', ]; @@ -232,6 +233,7 @@ describe('startMCPServer handler dispatch', () => { whereData: vi.fn(() => ({ target: 'test', mode: 'symbol', results: [] })), diffImpactData: vi.fn(() => ({ changedFiles: 0, affectedFunctions: [] })), listFunctionsData: vi.fn(() => ({ count: 0, functions: [] })), + rolesData: vi.fn(() => ({ count: 0, summary: {}, symbols: [] })), })); // Clear module cache and reimport @@ -294,6 +296,7 @@ describe('startMCPServer handler dispatch', () => { whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), + rolesData: vi.fn(), })); const { startMCPServer } = await import('../../src/mcp.js'); @@ -350,6 +353,7 @@ describe('startMCPServer handler dispatch', () => { whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), + rolesData: vi.fn(), })); const { startMCPServer } = await import('../../src/mcp.js'); @@ -401,6 +405,7 @@ describe('startMCPServer handler dispatch', () => { whereData: vi.fn(), diffImpactData: diffImpactMock, listFunctionsData: vi.fn(), + rolesData: vi.fn(), })); const { startMCPServer } = await import('../../src/mcp.js'); @@ -457,6 +462,7 @@ describe('startMCPServer handler dispatch', () => { whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: listFnMock, + rolesData: vi.fn(), })); const { startMCPServer } = await import('../../src/mcp.js'); @@ -514,6 +520,7 @@ describe('startMCPServer handler dispatch', () => { whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), + rolesData: vi.fn(), })); const { startMCPServer } = await import('../../src/mcp.js'); @@ -566,6 +573,7 @@ describe('startMCPServer handler dispatch', () => { whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), + rolesData: vi.fn(), })); const { startMCPServer } = await import('../../src/mcp.js'); @@ -617,6 +625,7 @@ describe('startMCPServer handler dispatch', () => { whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), + rolesData: vi.fn(), })); const { startMCPServer } = await import('../../src/mcp.js'); @@ -670,6 +679,7 @@ describe('startMCPServer handler dispatch', () => { whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), + rolesData: vi.fn(), })); const { startMCPServer } = await import('../../src/mcp.js'); @@ -726,6 +736,7 @@ describe('startMCPServer handler dispatch', () => { whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), + rolesData: vi.fn(), })); const { startMCPServer } = await import('../../src/mcp.js'); @@ -782,6 +793,7 @@ describe('startMCPServer handler dispatch', () => { whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), + rolesData: vi.fn(), })); const { startMCPServer } = await import('../../src/mcp.js'); @@ -829,6 +841,7 @@ describe('startMCPServer handler dispatch', () => { whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), + rolesData: vi.fn(), })); const { startMCPServer } = await import('../../src/mcp.js'); @@ -876,6 +889,7 @@ describe('startMCPServer handler dispatch', () => { whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), + rolesData: vi.fn(), })); const { startMCPServer } = await import('../../src/mcp.js'); @@ -923,6 +937,7 @@ describe('startMCPServer handler dispatch', () => { whereData: vi.fn(), diffImpactData: vi.fn(), listFunctionsData: vi.fn(), + rolesData: vi.fn(), })); const { startMCPServer } = await import('../../src/mcp.js'); diff --git a/tests/unit/roles.test.js b/tests/unit/roles.test.js new file mode 100644 index 00000000..8c216c2b --- /dev/null +++ b/tests/unit/roles.test.js @@ -0,0 +1,183 @@ +/** + * Unit tests for classifyNodeRoles in src/structure.js + * + * Uses an in-memory SQLite database with hand-crafted nodes/edges + * to verify each role classification. + * + * Test graph: + * entryFn - exported (cross-file caller), fan_in=0 from non-test → entry + * coreFn - high fan_in, low fan_out → core + * utilityFn - high fan_in, high fan_out → utility + * adapterFn - low fan_in, high fan_out → adapter + * deadFn - fan_in=0, not exported → dead + * leafFn - low fan_in, low fan_out → leaf + */ + +import Database from 'better-sqlite3'; +import { beforeEach, describe, expect, it } from 'vitest'; +import { initSchema } from '../../src/db.js'; +import { classifyNodeRoles } from '../../src/structure.js'; + +let db; + +function setup() { + db = new Database(':memory:'); + db.pragma('journal_mode = WAL'); + initSchema(db); + return db; +} + +function insertNode(name, kind, file, line) { + return db + .prepare('INSERT INTO nodes (name, kind, file, line) VALUES (?, ?, ?, ?)') + .run(name, kind, file, line).lastInsertRowid; +} + +function insertEdge(sourceId, targetId, kind) { + db.prepare( + 'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, 1.0, 0)', + ).run(sourceId, targetId, kind); +} + +/** + * Build a graph where median fan_in = 2 and median fan_out = 2. + * This allows clear high/low classification. + */ +function buildTestGraph() { + // File nodes (these should NOT get roles) + const fA = insertNode('a.js', 'file', 'a.js', 0); + const fB = insertNode('b.js', 'file', 'b.js', 0); + + // Function nodes + const entryFn = insertNode('entryFn', 'function', 'a.js', 1); + const coreFn = insertNode('coreFn', 'function', 'a.js', 10); + const utilityFn = insertNode('utilityFn', 'function', 'a.js', 20); + const adapterFn = insertNode('adapterFn', 'function', 'b.js', 1); + const deadFn = insertNode('deadFn', 'function', 'b.js', 10); + const leafFn = insertNode('leafFn', 'function', 'b.js', 20); + + // Helper targets for fan_out edges + const helperA = insertNode('helperA', 'function', 'a.js', 30); + const helperB = insertNode('helperB', 'function', 'a.js', 40); + const helperC = insertNode('helperC', 'function', 'b.js', 30); + const helperD = insertNode('helperD', 'function', 'b.js', 40); + + // entryFn: fan_in=0, but exported (cross-file caller) → entry + // No callers from same file, but one cross-file caller + const crossCaller = insertNode('crossCaller', 'function', 'b.js', 50); + insertEdge(crossCaller, entryFn, 'calls'); + + // coreFn: high fan_in (3 callers), low fan_out (0) → core + insertEdge(entryFn, coreFn, 'calls'); + insertEdge(adapterFn, coreFn, 'calls'); + insertEdge(leafFn, coreFn, 'calls'); + + // utilityFn: high fan_in (3 callers), high fan_out (3 callees) → utility + insertEdge(entryFn, utilityFn, 'calls'); + insertEdge(adapterFn, utilityFn, 'calls'); + insertEdge(crossCaller, utilityFn, 'calls'); + insertEdge(utilityFn, helperA, 'calls'); + insertEdge(utilityFn, helperB, 'calls'); + insertEdge(utilityFn, helperC, 'calls'); + + // adapterFn: low fan_in (1 caller), high fan_out (3 callees) → adapter + insertEdge(entryFn, adapterFn, 'calls'); + // adapterFn already calls coreFn and utilityFn above + insertEdge(adapterFn, helperD, 'calls'); + + // deadFn: fan_in=0, not exported → dead + // No callers at all + + // leafFn: low fan_in (1 caller), low fan_out (1 callee) → leaf + insertEdge(crossCaller, leafFn, 'calls'); + // leafFn already calls coreFn above + + return { fA, fB, entryFn, coreFn, utilityFn, adapterFn, deadFn, leafFn }; +} + +describe('classifyNodeRoles', () => { + beforeEach(() => { + setup(); + }); + + it('classifies each role correctly', () => { + buildTestGraph(); + const summary = classifyNodeRoles(db); + + // Verify summary has all roles + expect(summary).toHaveProperty('entry'); + expect(summary).toHaveProperty('core'); + expect(summary).toHaveProperty('utility'); + expect(summary).toHaveProperty('adapter'); + expect(summary).toHaveProperty('dead'); + expect(summary).toHaveProperty('leaf'); + + // Verify specific node roles + const getRole = (name) => db.prepare('SELECT role FROM nodes WHERE name = ?').get(name)?.role; + + expect(getRole('deadFn')).toBe('dead'); + expect(getRole('coreFn')).toBe('core'); + expect(getRole('utilityFn')).toBe('utility'); + }); + + it('marks file and directory nodes as NULL role', () => { + buildTestGraph(); + // Insert a directory node + insertNode('src', 'directory', 'src', 0); + classifyNodeRoles(db); + + const fileRole = db.prepare("SELECT role FROM nodes WHERE kind = 'file' LIMIT 1").get(); + expect(fileRole.role).toBeNull(); + + const dirRole = db.prepare("SELECT role FROM nodes WHERE kind = 'directory' LIMIT 1").get(); + expect(dirRole.role).toBeNull(); + }); + + it('is idempotent (running twice gives same results)', () => { + buildTestGraph(); + const summary1 = classifyNodeRoles(db); + const roles1 = db + .prepare('SELECT name, role FROM nodes WHERE role IS NOT NULL ORDER BY name') + .all(); + + const summary2 = classifyNodeRoles(db); + const roles2 = db + .prepare('SELECT name, role FROM nodes WHERE role IS NOT NULL ORDER BY name') + .all(); + + expect(summary1).toEqual(summary2); + expect(roles1).toEqual(roles2); + }); + + it('handles empty graph without crashing', () => { + const summary = classifyNodeRoles(db); + expect(summary).toEqual({ entry: 0, core: 0, utility: 0, adapter: 0, dead: 0, leaf: 0 }); + }); + + it('adapts median thresholds to data', () => { + // Create a small graph: 2 functions with fan_in=[1,1], fan_out=[1,1] + // median of non-zero = 1 for both, so fan_in >= 1 = high, fan_out >= 1 = high + const fA = insertNode('a.js', 'file', 'a.js', 0); + const fn1 = insertNode('fn1', 'function', 'a.js', 1); + const fn2 = insertNode('fn2', 'function', 'a.js', 10); + + // fn1 calls fn2, fn2 calls fn1 (mutual) + insertEdge(fn1, fn2, 'calls'); + insertEdge(fn2, fn1, 'calls'); + + const summary = classifyNodeRoles(db); + // Both have fan_in=1 (>= median 1) and fan_out=1 (>= median 1) → utility + expect(summary.utility).toBe(2); + }); + + it('classifies nodes with only non-call edges as dead', () => { + const fA = insertNode('a.js', 'file', 'a.js', 0); + const fn1 = insertNode('fn1', 'function', 'a.js', 1); + // Only import edge, no call edge + insertEdge(fA, fn1, 'imports'); + + const summary = classifyNodeRoles(db); + const role = db.prepare("SELECT role FROM nodes WHERE name = 'fn1'").get(); + expect(role.role).toBe('dead'); + }); +}); From 20c1f7192b94e1eb1bc97ab39e50e31c6664af06 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 25 Feb 2026 01:42:47 -0700 Subject: [PATCH 7/7] style: fix lint warnings and format issues in roles feature Impact: 2 functions changed, 1 affected --- src/builder.js | 4 +++- src/queries.js | 4 +--- tests/integration/roles.test.js | 2 +- tests/unit/roles.test.js | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/builder.js b/src/builder.js index 0feafc0a..daed94d2 100644 --- a/src/builder.js +++ b/src/builder.js @@ -897,7 +897,9 @@ export async function buildGraph(rootDir, opts = {}) { const { classifyNodeRoles } = await import('./structure.js'); const roleSummary = classifyNodeRoles(db); debug( - `Roles: ${Object.entries(roleSummary).map(([r, c]) => `${r}=${c}`).join(', ')}`, + `Roles: ${Object.entries(roleSummary) + .map(([r, c]) => `${r}=${c}`) + .join(', ')}`, ); } catch (err) { debug(`Role classification failed: ${err.message}`); diff --git a/src/queries.js b/src/queries.js index 3e1eac62..3aaeaf67 100644 --- a/src/queries.js +++ b/src/queries.js @@ -1082,9 +1082,7 @@ export function statsData(customDbPath, opts = {}) { // Role distribution let roleRows; if (noTests) { - const allRoleNodes = db - .prepare('SELECT role, file FROM nodes WHERE role IS NOT NULL') - .all(); + const allRoleNodes = db.prepare('SELECT role, file FROM nodes WHERE role IS NOT NULL').all(); const filtered = allRoleNodes.filter((n) => !isTestFile(n.file)); const counts = {}; for (const n of filtered) counts[n.role] = (counts[n.role] || 0) + 1; diff --git a/tests/integration/roles.test.js b/tests/integration/roles.test.js index 6dbc5212..c76b5719 100644 --- a/tests/integration/roles.test.js +++ b/tests/integration/roles.test.js @@ -59,7 +59,7 @@ beforeAll(() => { const process_ = insertNode(db, 'processData', 'function', 'app.js', 10); const helper = insertNode(db, 'helper', 'function', 'lib.js', 1); const format = insertNode(db, 'format', 'function', 'lib.js', 10); - const unused = insertNode(db, 'unused', 'function', 'lib.js', 20); + insertNode(db, 'unused', 'function', 'lib.js', 20); const testFn = insertNode(db, 'testMain', 'function', 'app.test.js', 1); // Import edges diff --git a/tests/unit/roles.test.js b/tests/unit/roles.test.js index 8c216c2b..5f8c0e1a 100644 --- a/tests/unit/roles.test.js +++ b/tests/unit/roles.test.js @@ -157,7 +157,7 @@ describe('classifyNodeRoles', () => { it('adapts median thresholds to data', () => { // Create a small graph: 2 functions with fan_in=[1,1], fan_out=[1,1] // median of non-zero = 1 for both, so fan_in >= 1 = high, fan_out >= 1 = high - const fA = insertNode('a.js', 'file', 'a.js', 0); + insertNode('a.js', 'file', 'a.js', 0); const fn1 = insertNode('fn1', 'function', 'a.js', 1); const fn2 = insertNode('fn2', 'function', 'a.js', 10); @@ -176,7 +176,7 @@ describe('classifyNodeRoles', () => { // Only import edge, no call edge insertEdge(fA, fn1, 'imports'); - const summary = classifyNodeRoles(db); + classifyNodeRoles(db); const role = db.prepare("SELECT role FROM nodes WHERE name = 'fn1'").get(); expect(role.role).toBe('dead'); });