From 7377fd903c0131828067c3f994aa1c7a534b46f1 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Wed, 25 Feb 2026 01:04:09 -0700
Subject: [PATCH 1/7] fix(builder): preserve structure data during incremental
 builds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

buildStructure() clears all contains edges and directory nodes before
rebuilding, but during incremental builds it only received the changed
files — causing all unchanged files to lose their directory containment
edges and metrics. This made `codegraph structure` show "0 files" for
most directories after any incremental build.

Fix: before calling buildStructure, load all existing file nodes and
their symbols from the DB so the complete file set is available for
structure rebuild.

Closes #89

Impact: 1 functions changed, 0 affected
---
 src/builder.js | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/src/builder.js b/src/builder.js
index d48d9b8a..724002b0 100644
--- a/src/builder.js
+++ b/src/builder.js
@@ -827,6 +827,43 @@ export async function buildGraph(rootDir, opts = {}) {
     }
   }
 
+  // For incremental builds, buildStructure needs ALL files (not just changed ones)
+  // because it clears and rebuilds all contains edges and directory metrics.
+  // Load unchanged files from the DB so structure data stays complete.
+  if (!isFullBuild) {
+    const existingFiles = db.prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'").all();
+    const defsByFile = db.prepare(
+      "SELECT name, kind, line FROM nodes WHERE file = ? AND kind != 'file' AND kind != 'directory'",
+    );
+    const importsByFile = db.prepare(
+      `SELECT DISTINCT n2.file AS source FROM edges e
+       JOIN nodes n1 ON e.source_id = n1.id
+       JOIN nodes n2 ON e.target_id = n2.id
+       WHERE n1.file = ? AND e.kind = 'imports'`,
+    );
+    let loadedFromDb = 0;
+    for (const { file: relPath } of existingFiles) {
+      if (!fileSymbols.has(relPath)) {
+        fileSymbols.set(relPath, {
+          definitions: defsByFile.all(relPath),
+          imports: importsByFile.all(relPath),
+          exports: [],
+        });
+        loadedFromDb++;
+      }
+      if (!lineCountMap.has(relPath)) {
+        const absPath = path.join(rootDir, relPath);
+        try {
+          const content = fs.readFileSync(absPath, 'utf-8');
+          lineCountMap.set(relPath, content.split('\n').length);
+        } catch {
+          lineCountMap.set(relPath, 0);
+        }
+      }
+    }
+    debug(`Structure: ${fileSymbols.size} files (${loadedFromDb} loaded from DB)`);
+  }
+
   // Build directory structure, containment edges, and metrics
   const relDirs = new Set();
   for (const absDir of discoveredDirs) {

From ffcf2bde95364f0998c7d4fafa49f310c095f5d0 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Wed, 25 Feb 2026 01:06:56 -0700
Subject: [PATCH 2/7] docs: add dogfood report for v2.3.0

---
 generated/DOGFOOD_REPORT_v2.3.0.md | 351 +++++++++++++++++++++++++++++
 1 file changed, 351 insertions(+)
 create mode 100644 generated/DOGFOOD_REPORT_v2.3.0.md

diff --git a/generated/DOGFOOD_REPORT_v2.3.0.md b/generated/DOGFOOD_REPORT_v2.3.0.md
new file mode 100644
index 00000000..18d09810
--- /dev/null
+++ b/generated/DOGFOOD_REPORT_v2.3.0.md
@@ -0,0 +1,351 @@
+# Dogfooding Report: @optave/codegraph@2.3.0
+
+**Date:** 2026-02-25
+**Platform:** Windows 11 Pro (win32-x64), Node.js v22.18.0
+**Native binary:** @optave/codegraph-win32-x64-msvc@2.3.0
+**Active engine:** native (v0.1.0), auto-detected
+**Target repo:** codegraph itself (99 files, 2 languages: JS 80, Rust 19)
+
+---
+
+## 1. Setup & Installation
+
+| Step | Result |
+|------|--------|
+| `npm install @optave/codegraph@2.3.0` | 207 packages, 6s, 0 vulnerabilities |
+| `npx codegraph --version` | `2.3.0` |
+| Native binary package | `@optave/codegraph-win32-x64-msvc@2.3.0` present |
+| `optionalDependencies` pinned | All 4 platform packages pinned to `2.3.0` |
+| `npx codegraph info` | `engine: native (v0.1.0)` |
+
+Installation is clean. Native binary loads correctly. All platform packages properly version-pinned.
+
+---
+
+## 2. Cold Start (Pre-Build)
+
+Every command was tested against a non-existent database path before building:
+
+| Command | Status | Message |
+|---------|--------|---------|
+| `query buildGraph` | PASS | "No codegraph database found... Run `codegraph build` first" |
+| `stats` | PASS | Same graceful message |
+| `cycles` | PASS | Same graceful message |
+| `export` | PASS | Same graceful message |
+| `embed` | PASS | Same graceful message (note: `--db` not supported on `embed`) |
+| `search "test"` | PASS | Same graceful message |
+| `map` | PASS | Same graceful message |
+| `deps src/cli.js` | PASS | Same graceful message |
+| `fn buildGraph` | PASS | Same graceful message |
+| `fn-impact buildGraph` | PASS | Same graceful message |
+| `context buildGraph` | PASS | Same graceful message |
+| `explain src/cli.js` | PASS | Same graceful message |
+| `where buildGraph` | PASS | Same graceful message |
+| `impact src/cli.js` | PASS | Same graceful message |
+| `diff-impact` | PASS | Same graceful message |
+| `structure` | PASS | Same graceful message |
+| `hotspots` | PASS | Same graceful message |
+| `models` | PASS | Lists 7 models (no DB needed) |
+| `registry list` | PASS | Lists registered repos (no DB needed) |
+| `info` | PASS | Engine diagnostics (no DB needed) |
+
+**All 20 commands pass cold-start gracefully.** No crashes, no stack traces.
+
+---
+
+## 3. Full Command Sweep
+
+### Build
+
+```
+codegraph build <repo> --engine native --no-incremental --verbose
+```
+- 99 files parsed, 576 nodes, 787 edges (build output)
+- Stats: 898 edges (includes 111 `contains` edges added by structure analysis)
+- Time: sub-second
+
+### Query Commands
+
+| Command | Flags Tested | Status | Notes |
+|---------|-------------|--------|-------|
+| `query <name>` | `-T`, `-j`, `--db` | PASS | `--depth` not supported (not in help) |
+| `impact <file>` | default | PASS | Shows 6 transitive dependents |
+| `map` | `-n 10`, `-j` | PASS | Coupling score present in JSON |
+| `stats` | `-j` | PASS | Valid JSON, 82/100 quality |
+| `deps <file>` | default | PASS | Shows imports and importers |
+| `fn <name>` | `--depth 2`, `-f`, `-k`, `-T`, `-j` | PASS | All flags work |
+| `fn-impact <name>` | `-T`, `-j` | PASS | 5 transitive dependents |
+| `context <name>` | `--depth`, `--no-source`, `--with-test-source`, `-j` | PASS | Source included by default |
+| `explain <target>` | file path, function name, `--depth 2`, `-j` | PASS | Structural summary accurate |
+| `where <name>` | default, `-f <file>`, `-j` | PASS | Fast lookup, file overview mode works |
+| `diff-impact [ref]` | `main`, `HEAD`, `--staged`, `--format mermaid`, `-j` | PASS | Mermaid output generates flowchart |
+| `cycles` | default, `--functions` | PASS | 1 file-level, 2 function-level cycles |
+| `structure [dir]` | `.`, `--depth 1`, `--sort cohesion/fan-in`, `-j` | PASS | `.` filter works (v2.2.0 bug fixed) |
+| `hotspots` | `--metric fan-in/fan-out/density/coupling`, `--level file/directory`, `-n`, `-j` | PASS | All metrics and levels work |
+
+### Export Commands
+
+| Command | Flags | Status | Notes |
+|---------|-------|--------|-------|
+| `export -f dot` | default, `--functions`, `--min-confidence` | PASS | Valid DOT graph |
+| `export -f mermaid` | default | PASS | Valid Mermaid syntax |
+| `export -f json` | `-o <file>` | PASS | 69KB JSON file written |
+
+### Embedding & Search
+
+| Command | Flags | Status | Notes |
+|---------|-------|--------|-------|
+| `models` | default | PASS | 7 models listed |
+| `embed` | `-m minilm`, `--strategy structured` | PASS | 434 symbols embedded |
+| `embed` | `--strategy source` | PASS | 434 symbols, 111 truncation warnings |
+| `search` | `-n`, `--min-score`, `-k`, `--file`, multi-query `;` | PASS | Relevant results, buildGraph tops "build graph" query |
+| `search --json` | N/A | MISSING | `-j/--json` flag not available on search |
+
+### Infrastructure Commands
+
+| Command | Status | Notes |
+|---------|--------|-------|
+| `info` | PASS | Shows version, engine, platform |
+| `--version` | PASS | `2.3.0` |
+| `registry list` | PASS | Lists registered repos, `-j` works |
+| `registry add` | PASS | Custom name with `-n` |
+| `registry remove` | PASS | Removes by name |
+| `registry prune --ttl 0` | PASS | Prunes expired entries |
+| `mcp` (single-repo) | PASS | 16 tools, no `list_repos`, no `repo` param |
+| `mcp --multi-repo` | PASS | 17 tools, `list_repos` present, `repo` param on tools |
+
+### Edge Cases
+
+| Scenario | Result | Status |
+|----------|--------|--------|
+| Non-existent symbol: `query nonexistent` | "No results" | PASS |
+| Non-existent file: `deps nonexistent.js` | "No file matching" | PASS |
+| Non-existent function: `fn nonexistent` | "No function/method/class matching" | PASS |
+| `--kind invalid` | "Invalid kind... Valid: function, method, ..." | PASS |
+| `search` with no embeddings | "No embeddings found. Run `codegraph embed` first." | PASS |
+| `--json` on all commands | Valid JSON (tested: stats, map, hotspots, fn, context, where, explain, structure, registry) | PASS |
+| `--no-tests` effect | Reduces callers from 7 to 4 for buildGraph | PASS |
+| Pipe output: `map --json 2>/dev/null` | Clean JSON on stdout | PASS |
+| `build --no-incremental` | Force full rebuild | PASS |
+| `build --verbose` | Per-file parsing details | PASS |
+
+### JSON Output Validation
+
+All commands that support `-j/--json` produce valid JSON:
+- `stats`, `map`, `hotspots`, `fn`, `fn-impact`, `context`, `where`, `explain`, `structure`, `query`, `registry list`, `diff-impact`, `export -f json`
+
+---
+
+## 4. Rebuild & Staleness
+
+### Incremental No-Op
+```
+Graph is up to date.
+```
+PASS — no files re-parsed when nothing changed.
+
+### Three-Tier Change Detection
+- **Touch only (mtime change):** "Self-healed mtime/size for 1 files" — content hash verified, no re-parse. PASS.
+- **Content change:** Tier 1 detects mtime+size change → Tier 2 confirms hash change → 1 file re-parsed. PASS.
+
+### Embed → Rebuild → Search Pipeline
+1. Build embeddings (434 symbols) → search "build graph" → buildGraph ranks #1 (46.4%). PASS.
+2. Touch file → rebuild → search still works (embeddings remain valid for unchanged symbols). PASS.
+3. Delete DB → rebuild from scratch → "No embeddings table found" on search. PASS.
+
+### DB Migrations
+Deleting `graph.db` and rebuilding triggers migrations v1→v4. PASS.
+
+### Incremental Build Structure Bug (FIXED)
+See Bug #1 below. Incremental builds corrupted structure data by clearing ALL `contains` edges but only rebuilding for changed files. **Fixed in this session.**
+
+---
+
+## 5. Engine Comparison
+
+| Metric | Native | WASM | Delta |
+|--------|--------|------|-------|
+| Nodes | 576 | 576 | 0 |
+| Edges (total) | 898 | 898 | 0 |
+| Calls | 647 | 647 | 0 |
+| Imports | 115 | 115 | 0 |
+| Contains | 111 | 111 | 0 |
+| Reexports | 25 | 25 | 0 |
+| Files | 99 | 99 | 0 |
+| Quality Score | 82/100 | 82/100 | 0 |
+| Caller Coverage | 56.6% | 56.6% | 0% |
+| Call Confidence | 97.8% | 97.8% | 0% |
+| Cycles (file) | 1 | 1 | 0 |
+| Cycles (fn) | 2 | 2 | 0 |
+
+**Perfect engine parity.** Both engines produce identical results across all metrics. This is a significant improvement over v2.1.0 which had parity gaps.
+
+---
+
+## 6. Release-Specific Tests
+
+### v2.3.0 CHANGELOG Features
+
+| Feature | Test | Result |
+|---------|------|--------|
+| Graph-enriched embedding strategy (`--strategy structured`) | `embed -m minilm --strategy structured` → 434 symbols, ~100 tokens avg | PASS |
+| `--strategy source` option | `embed --strategy source` → 434 symbols, 111 truncated | PASS |
+| Context overflow detection | Warning: "111 symbol(s) exceeded model context window (256 tokens)" | PASS |
+| `excludeTests` config option | `{ "query": { "excludeTests": true } }` → test files hidden | PASS |
+| `--include-tests` CLI override | Overrides config, shows test files | PASS |
+| `--depth` on `explain` | `explain src/builder.js --depth 2` → includes recursive deps | PASS |
+| Coupling score in `map` | `map -j` → `topNodes[].coupling` field present | PASS |
+| Mermaid output in `diff-impact` | `diff-impact main --format mermaid` → flowchart output | PASS |
+| `--min-confidence` on export | `export -f dot --min-confidence 0.5` → filters low-confidence edges | PASS |
+| `/dogfood` skill | Currently running! | PASS |
+
+### v2.3.0 Bug Fixes Verified
+
+| Fix | Test | Result |
+|-----|------|--------|
+| Graceful error for `cycles`/`export`/`embed` with no DB | All tested pre-build | PASS |
+| Default model changed to minilm | `embed` without `-m` uses minilm | PASS |
+| `splitIdentifier` camelCase fix | Search "build graph" → `buildGraph` ranks high | PASS |
+| `structure .` treated as no filter | `structure .` shows full project | PASS |
+| Engine status messages to stderr | `build 2>/dev/null` produces no stdout | PASS |
+| `--with-test-source` rename | `context --with-test-source` works, old `--include-test-source` gone | PASS |
+| Embedding invalidation on node deletion | Orphan warning mechanism in builder.js | VERIFIED in code |
+
+---
+
+## 7. Additional Testing
+
+### Programmatic API
+
+ESM import of `@optave/codegraph` exports all expected symbols:
+- Functions: `buildGraph`, `loadConfig`, `openDb`, `findDbPath`, `contextData`, `explainData`, `whereData`, `fnDepsData`, `diffImpactData`, `statsData`, `isNativeAvailable`, and 40+ more
+- Constants: `ALL_SYMBOL_KINDS` (10 kinds), `EXTENSIONS` (15 extensions), `MODELS` (7 models), `IGNORE_DIRS`, `FALSE_POSITIVE_NAMES`
+- CJS `require()` correctly fails with `ERR_PACKAGE_PATH_NOT_EXPORTED` (ESM-only package)
+
+### MCP Server
+
+| Mode | Tools | `list_repos` | `repo` param | Status |
+|------|-------|-------------|-------------|--------|
+| Single-repo (default) | 16 | absent | absent | PASS |
+| `--multi-repo` | 17 | present | present | PASS |
+
+MCP initializes via JSON-RPC, responds to `tools/list`, correct tool schemas.
+
+### Config & Registry
+
+- `.codegraphrc.json` with `query.excludeTests: true` → works
+- `registry add/list/remove/prune` → all work
+- `registry list -j` → valid JSON with timestamps
+
+### Version Upgrade Path
+- Incremental build on a graph from a previous version says "Graph is up to date" even if the engine version changed. Users should run `--no-incremental` after upgrading to ensure consistent data. (Not a bug per se, but worth documenting.)
+
+---
+
+## 8. Bugs Found
+
+### BUG 1: Incremental builds corrupt structure/contains edges (Medium)
+- **Issue:** [#89](https://github.com/optave/codegraph/issues/89)
+- **PR:** [#91](https://github.com/optave/codegraph/pull/91)
+- **Symptoms:** After any incremental build, `codegraph structure` shows most directories as "0 files, 0 symbols". Only the changed file's directory retains data. `contains` edges drop from 111 to ~15.
+- **Root cause:** `buildStructure()` unconditionally clears ALL `contains` edges and directory nodes (`DELETE FROM edges WHERE kind = 'contains'`), then only rebuilds for files in `fileSymbols` — which during incremental builds only contains changed files.
+- **Fix applied:** Before calling `buildStructure`, load all existing file nodes from the DB into `fileSymbols` and `lineCountMap` so the complete file set is available for structure rebuild. 37 lines added to `builder.js`. All 491 tests pass.
+
+### Enhancement: `search` command missing `--json` flag (Low)
+- **Issue:** [#90](https://github.com/optave/codegraph/issues/90)
+- **PR:** N/A — enhancement, not a bug fix
+- **Description:** All other query commands support `-j/--json` but `search` does not. Running `search -j` returns "unknown option '-j'".
+
+---
+
+## 9. Suggestions for Improvement
+
+### 9.1 Add `--json` to `search` command
+Every other query command supports JSON output. `search` is the only holdout, which breaks automation workflows.
+
+### 9.2 Document `excludeTests` config nesting
+The CHANGELOG and CLI help say "excludeTests config option" but don't mention it must be nested under `query`. A top-level `{ "excludeTests": true }` silently does nothing. Either:
+- Document as `query.excludeTests` in the README/CHANGELOG
+- Or accept it at both top-level and nested
+
+### 9.3 Warn on engine mismatch during incremental builds
+Store the engine used for the last full build in DB metadata. When an incremental build uses a different engine, warn the user and suggest `--no-incremental`.
+
+### 9.4 Add `--no-incremental` recommendation after version upgrades
+When `codegraph info` detects the installed version differs from the version that built the graph, suggest a full rebuild.
+
+---
+
+## 10. Testing Plan
+
+### General Testing Plan (Any Release)
+
+- [ ] Install from npm, verify version and native binary
+- [ ] Cold start: all commands gracefully fail without DB
+- [ ] Full build: verify node/edge counts
+- [ ] Incremental no-op: "Graph is up to date"
+- [ ] Incremental with change: only changed files re-parsed
+- [ ] `--no-incremental` full rebuild matches clean build
+- [ ] Engine comparison: native vs WASM parity
+- [ ] All query commands with `-j`, `-T`, `--include-tests`
+- [ ] Edge cases: non-existent symbols/files, invalid `--kind`
+- [ ] Export: DOT, Mermaid, JSON formats
+- [ ] Embed + search pipeline
+- [ ] Registry CRUD: add, list, remove, prune
+- [ ] MCP single-repo and multi-repo modes
+- [ ] Programmatic API: key exports present
+- [ ] Pipe output: clean JSON on stdout
+- [ ] DB deletion → rebuild → migrations run
+- [ ] `structure` after incremental build preserves all files
+
+### Release-Specific Testing Plan (v2.3.0)
+
+- [ ] `--strategy structured` vs `--strategy source` embeddings
+- [ ] Context overflow detection and truncation warning
+- [ ] `excludeTests` config (under `query` key)
+- [ ] `--include-tests` override
+- [ ] `--depth` on `explain`
+- [ ] Coupling score in `map` output
+- [ ] Mermaid output in `diff-impact`
+- [ ] `--min-confidence` on export
+- [ ] `structure .` no longer crashes
+- [ ] Default model is minilm (no auth required)
+- [ ] Engine status messages on stderr (not stdout)
+- [ ] `--with-test-source` renamed from `--include-test-source`
+
+### Proposed Additional Tests
+
+- [ ] **Embed → modify → rebuild → search:** Most likely path to stale embeddings. Should be tested every release.
+- [ ] **Watch mode integration:** Start watcher, modify file, verify incremental update + query correctness.
+- [ ] **Multi-repo MCP workflow:** `registry add` → `mcp --repos <name>` → query via JSON-RPC.
+- [ ] **Config options:** Test `.codegraphrc.json` with `include`/`exclude` patterns, `aliases`, `build.incremental: false`, `query.defaultDepth`, `search.defaultMinScore`.
+- [ ] **Concurrent builds:** Two builds at once on the same DB — should one fail or queue.
+- [ ] **Different repo test:** Build on a small open-source project besides codegraph itself.
+- [ ] **`apiKeyCommand` credential resolution:** Test with a simple `echo` command.
+
+---
+
+## 11. Overall Assessment
+
+v2.3.0 is a solid release with significant improvements in embedding quality (graph-enriched strategy), better developer experience (`excludeTests` config, `--depth` on explain, coupling scores), and excellent engine parity (0% delta on all metrics).
+
+**The one critical bug found** — incremental builds corrupting structure data — affects all users who run `codegraph structure` or `codegraph hotspots` after any incremental build. The fix is straightforward (37 lines in builder.js) and has been submitted as PR #91. Until merged, users should run `codegraph build --no-incremental` to get correct structure data.
+
+All 20+ CLI commands work correctly. Cold-start error handling is excellent. JSON output is valid across all commands. The three-tier change detection (journal → mtime+size → content hash) is robust. MCP server works in both single and multi-repo modes.
+
+**Rating: 8/10**
+
+Deductions:
+- -1 for the incremental structure corruption bug (affects real-world usage)
+- -0.5 for `search` missing `--json` (inconsistency with other commands)
+- -0.5 for undocumented `excludeTests` nesting requirement
+
+---
+
+## 12. Issues & PRs Created
+
+| Type | Number | Title | Status |
+|------|--------|-------|--------|
+| Issue | [#89](https://github.com/optave/codegraph/issues/89) | bug: mixed-engine incremental build corrupts structure/contains edges | open |
+| Issue | [#90](https://github.com/optave/codegraph/issues/90) | enhancement: add --json flag to search command | open |
+| PR | [#91](https://github.com/optave/codegraph/pull/91) | fix(builder): preserve structure data during incremental builds | open |

From 689dc06d2485a8b2fb78f2743221914377e0a78d Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Wed, 25 Feb 2026 01:13:25 -0700
Subject: [PATCH 3/7] docs: add benchmark results to dogfood report v2.3.0

---
 generated/DOGFOOD_REPORT_v2.3.0.md | 63 ++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/generated/DOGFOOD_REPORT_v2.3.0.md b/generated/DOGFOOD_REPORT_v2.3.0.md
index 18d09810..1afa7506 100644
--- a/generated/DOGFOOD_REPORT_v2.3.0.md
+++ b/generated/DOGFOOD_REPORT_v2.3.0.md
@@ -180,6 +180,69 @@ See Bug #1 below. Incremental builds corrupted structure data by clearing ALL `c
 
 **Perfect engine parity.** Both engines produce identical results across all metrics. This is a significant improvement over v2.1.0 which had parity gaps.
 
+### Performance Benchmarks
+
+#### Build Benchmark (`scripts/benchmark.js`)
+
+| Metric | v2.1.0 WASM (92 files) | v2.3.0 WASM (99 files) | Per-file delta |
+|--------|----------------------|----------------------|----------------|
+| Build time | 609ms (6.6ms/file) | 509ms (5.1ms/file) | -22% per file |
+| Query time | 1.9ms | 1.8ms | -5% |
+| Nodes | 527 (5.7/file) | 575 (5.8/file) | +2% |
+| Edges | 814 (8.8/file) | 897 (9.1/file) | +3% |
+| DB size | 344KB (3829B/file) | 372KB (3848B/file) | +0.5% |
+
+Build performance improved 22% per file vs v2.1.0. Node/edge counts grew slightly as the codebase grew from 92→99 files. No regressions.
+
+#### Incremental Benchmark (`scripts/incremental-benchmark.js`)
+
+| Metric | v2.3.0 WASM |
+|--------|-------------|
+| Full build | 474ms |
+| No-op rebuild | 4ms |
+| 1-file rebuild | 144ms |
+| Import resolution (84 pairs) | 1.9ms |
+
+No-op rebuilds complete in 4ms. Single-file incremental rebuilds take ~144ms (30% of full build for 1% of files).
+
+#### Query Benchmark (`scripts/query-benchmark.js`)
+
+| Metric | v2.3.0 WASM |
+|--------|-------------|
+| fnDeps depth 1 | 0.7ms |
+| fnDeps depth 3 | 1.8ms |
+| fnDeps depth 5 | 1.8ms |
+| fnImpact depth 1 | 0.7ms |
+| fnImpact depth 3 | 1.3ms |
+| fnImpact depth 5 | 1.3ms |
+| diff-impact | 13.7ms |
+
+Sub-2ms for all function-level queries. No depth scaling issues.
+
+#### Embedding Benchmark (`scripts/embedding-benchmark.js`)
+
+| Model | Hit@1 | Hit@3 | Hit@5 | Misses |
+|-------|-------|-------|-------|--------|
+| minilm (default) | 252/329 (76.6%) | 312/329 (94.8%) | 322/329 (97.9%) | 2 |
+| jina-small | 256/329 (77.8%) | 318/329 (96.7%) | 324/329 (98.5%) | 2 |
+| jina-base | 248/329 (75.4%) | 311/329 (94.5%) | 320/329 (97.3%) | 3 |
+| nomic | 278/329 (84.5%) | 326/329 (99.1%) | 329/329 (100%) | 0 |
+| nomic-v1.5 | 274/329 (83.3%) | 323/329 (98.2%) | 329/329 (100%) | 0 |
+| bge-large | FAIL (ONNX load error on Windows) | — | — | — |
+
+nomic and nomic-v1.5 achieve perfect Hit@5 (100%) with 0 misses. minilm (default) achieves strong 97.9% Hit@5 with the smallest model size.
+
+#### Fix Impact: Incremental Structure Rebuild (PR #91)
+
+| Metric | Before (main) | After (fix) | Delta |
+|--------|--------------|-------------|-------|
+| Full build | 416ms | 439ms | +23ms (+5.5%) |
+| No-op rebuild | 4ms | 4ms | 0 |
+| 1-file rebuild | 125ms | 159ms | +34ms (+27%) |
+| Import resolution | 2.0ms | 1.9ms | -0.1ms |
+
+The fix adds ~34ms to 1-file incremental rebuilds (loading 98 unchanged files from DB for structure rebuild). Acceptable trade-off for correct structure data.
+
 ---
 
 ## 6. Release-Specific Tests

From cadb3ce78fd164cb3205655e270094db57b822c6 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Wed, 25 Feb 2026 01:14:37 -0700
Subject: [PATCH 4/7] docs(skill): add performance benchmarks phase to dogfood
 skill

Add Phase 4b requiring all four benchmark scripts to be run during
dogfooding sessions. Also update Phase 7c to require before/after
benchmark comparisons when bug fixes touch benchmarked code paths.
---
 .claude/skills/dogfood/SKILL.md | 37 ++++++++++++++++++++++++++++++---
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/.claude/skills/dogfood/SKILL.md b/.claude/skills/dogfood/SKILL.md
index 250fcecc..78218b99 100644
--- a/.claude/skills/dogfood/SKILL.md
+++ b/.claude/skills/dogfood/SKILL.md
@@ -158,6 +158,33 @@ Test that incremental rebuilds, full rebuilds, and cross-feature state remain co
 
 ---
 
+## Phase 4b — Performance Benchmarks
+
+Run all four benchmark scripts from the codegraph source repo (not the temp install dir) and record results. These detect performance regressions between releases.
+
+| Benchmark | Script | What it measures | When it matters |
+|-----------|--------|-----------------|-----------------|
+| Build | `node scripts/benchmark.js` | Build speed (native vs WASM), query latency | Always |
+| Incremental | `node scripts/incremental-benchmark.js` | Incremental build tiers, import resolution throughput | Always |
+| Query | `node scripts/query-benchmark.js` | Query depth scaling, diff-impact latency | Always |
+| Embedding | `node scripts/embedding-benchmark.js` | Search recall (Hit@1/3/5/10) across models | Always |
+
+1. Run all four from the codegraph source repo directory.
+2. Record the JSON output from each.
+3. Compare with the previous release's numbers in `generated/BUILD-BENCHMARKS.md` (build benchmark) and previous dogfood reports.
+4. Flag any regressions:
+   - Build time per file >10% slower → investigate
+   - Query latency >2x slower → investigate
+   - Embedding recall (Hit@5) drops by >2% → investigate
+   - Incremental no-op >10ms → investigate
+5. Include a **Performance Benchmarks** section in the report with tables for each benchmark.
+
+**Note:** The native engine may not be available in the dev repo (no prebuilt binary in `node_modules`). Record WASM results at minimum. If native is available, record both.
+
+**IMPORTANT:** If your bug-fix PR touches code covered by a benchmark (`builder.js`, `parser.js`, `queries.js`, `resolve.js`, `db.js`, `embedder.js`, `journal.js`), you **must** run the relevant benchmarks **before and after** your changes and include the comparison in the PR description.
+
+---
+
 ## Phase 5 — Changes Since Last Release
 
 1. Read `CHANGELOG.md` to identify what changed in v$ARGUMENTS vs the previous version.
@@ -254,14 +281,15 @@ For each bug you can fix in this session:
 2. Implement the fix.
 3. Run `npm test` to verify no regressions.
 4. Run `npm run lint` to verify code style.
-5. Commit with a message referencing the issue:
+5. **Run benchmarks before and after** if your fix touches code covered by a benchmark (see Phase 4b table). Include the comparison in the PR body.
+6. Commit with a message referencing the issue:
    ```
    fix(<scope>): <description>
 
    Closes #<issue-number>
    ```
    The `Closes #N` footer tells GitHub to auto-close the issue when the PR merges.
-6. Push and open a PR:
+7. Push and open a PR. If benchmarks were run, include them in the body:
    ```bash
    gh pr create --base main \
      --title "fix(<scope>): <description>" \
@@ -272,12 +300,15 @@ For each bug you can fix in this session:
    ## Found during
    Dogfooding v$ARGUMENTS — see #<issue-number>
 
+   ## Benchmark results
+   <before/after table if applicable — see Phase 4b>
+
    ## Test plan
    - [ ] <how to verify the fix>
    PR_EOF
    )"
    ```
-7. Return to the main working branch before continuing to the next bug.
+8. Return to the main working branch before continuing to the next bug.
 
 If a bug is too complex to fix in this session, leave the issue open and note it in the report.
 

From 7b538bccd78c23f9114d5408bb7c452feec125b3 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Wed, 25 Feb 2026 01:21:58 -0700
Subject: [PATCH 5/7] perf(builder): avoid disk reads for line counts during
 incremental rebuild

Address Greptile review feedback on PR #91:
- Replace misleading importsByFile query with importCountByFile COUNT
  query, since buildStructure only uses imports.length for metrics
- Cache line counts from node_metrics table instead of reading every
  unchanged file from disk (falls back to disk if not cached)

Impact: 1 functions changed, 1 affected
---
 src/builder.js | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/src/builder.js b/src/builder.js
index 724002b0..ce9f6633 100644
--- a/src/builder.js
+++ b/src/builder.js
@@ -835,29 +835,45 @@ export async function buildGraph(rootDir, opts = {}) {
     const defsByFile = db.prepare(
       "SELECT name, kind, line FROM nodes WHERE file = ? AND kind != 'file' AND kind != 'directory'",
     );
-    const importsByFile = db.prepare(
-      `SELECT DISTINCT n2.file AS source FROM edges e
+    // Count imports per file — buildStructure only uses imports.length for metrics
+    const importCountByFile = db.prepare(
+      `SELECT COUNT(DISTINCT n2.file) AS cnt FROM edges e
        JOIN nodes n1 ON e.source_id = n1.id
        JOIN nodes n2 ON e.target_id = n2.id
        WHERE n1.file = ? AND e.kind = 'imports'`,
     );
+    const lineCountByFile = db.prepare(
+      `SELECT n.name AS file, m.line_count
+       FROM node_metrics m JOIN nodes n ON m.node_id = n.id
+       WHERE n.kind = 'file'`,
+    );
+    const cachedLineCounts = new Map();
+    for (const row of lineCountByFile.all()) {
+      cachedLineCounts.set(row.file, row.line_count);
+    }
     let loadedFromDb = 0;
     for (const { file: relPath } of existingFiles) {
       if (!fileSymbols.has(relPath)) {
+        const importCount = importCountByFile.get(relPath)?.cnt || 0;
         fileSymbols.set(relPath, {
           definitions: defsByFile.all(relPath),
-          imports: importsByFile.all(relPath),
+          imports: new Array(importCount),
           exports: [],
         });
         loadedFromDb++;
       }
       if (!lineCountMap.has(relPath)) {
-        const absPath = path.join(rootDir, relPath);
-        try {
-          const content = fs.readFileSync(absPath, 'utf-8');
-          lineCountMap.set(relPath, content.split('\n').length);
-        } catch {
-          lineCountMap.set(relPath, 0);
+        const cached = cachedLineCounts.get(relPath);
+        if (cached != null) {
+          lineCountMap.set(relPath, cached);
+        } else {
+          const absPath = path.join(rootDir, relPath);
+          try {
+            const content = fs.readFileSync(absPath, 'utf-8');
+            lineCountMap.set(relPath, content.split('\n').length);
+          } catch {
+            lineCountMap.set(relPath, 0);
+          }
         }
       }
     }

From 165f6ca3653be8ae2ec745f244f344d23b680fd5 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Wed, 25 Feb 2026 01:42:16 -0700
Subject: [PATCH 6/7] feat: add node role classification
 (entry/core/utility/adapter/dead/leaf)

Auto-classify every symbol based on fan-in/fan-out connectivity patterns
using adaptive median thresholds. Roles are computed during graph build
and stored in the DB (migration v5).

- classifyNodeRoles() in structure.js with median-based thresholds
- New `roles` CLI command with --role and --file filters
- New `node_roles` MCP tool (18 tools total)
- Role field surfaced in where/explain/context/stats/list-functions
- Dead code detection via `roles --role dead`
- Unit + integration tests for classification and queries
- Updated README, BACKLOG, and COMPETITIVE_ANALYSIS docs

Impact: 19 functions changed, 13 affected
---
 README.md                         |  19 ++-
 generated/COMPETITIVE_ANALYSIS.md |  56 ++++----
 roadmap/BACKLOG.md                |   6 +-
 src/builder.js                    |  11 ++
 src/cli.js                        |  24 ++++
 src/db.js                         |  10 ++
 src/index.js                      |   3 +
 src/mcp.js                        |  27 +++-
 src/queries.js                    | 133 ++++++++++++++++++-
 src/structure.js                  |  94 +++++++++++++
 tests/integration/roles.test.js   | 212 ++++++++++++++++++++++++++++++
 tests/unit/mcp.test.js            |  15 +++
 tests/unit/roles.test.js          | 183 ++++++++++++++++++++++++++
 13 files changed, 750 insertions(+), 43 deletions(-)
 create mode 100644 tests/integration/roles.test.js
 create mode 100644 tests/unit/roles.test.js

diff --git a/README.md b/README.md
index 011af9d3..844b94a4 100644
--- a/README.md
+++ b/README.md
@@ -55,7 +55,7 @@ cd your-project
 codegraph build
 ```
 
-That's it. No config files, no Docker, no JVM, no API keys, no accounts. The graph is ready to query. Add `codegraph mcp` to your AI agent's config and it has full access to your dependency graph through 17 MCP tools.
+That's it. No config files, no Docker, no JVM, no API keys, no accounts. The graph is ready to query. Add `codegraph mcp` to your AI agent's config and it has full access to your dependency graph through 18 MCP tools.
 
 ### Why it matters
 
@@ -79,6 +79,7 @@ That's it. No config files, no Docker, no JVM, no API keys, no accounts. The gra
 | MCP / AI agent support | **Yes** | — | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | — |
 | Git diff impact | **Yes** | — | — | — | — | **Yes** | — | **Yes** |
 | Watch mode | **Yes** | — | **Yes** | — | — | — | — | — |
+| Dead code / role classification | **Yes** | — | **Yes** | — | — | — | — | **Yes** |
 | Cycle detection | **Yes** | — | **Yes** | — | — | — | — | **Yes** |
 | Incremental rebuilds | **O(changed)** | — | O(n) Merkle | — | — | — | — | — |
 | Zero config | **Yes** | — | **Yes** | — | — | — | — | — |
@@ -94,7 +95,8 @@ That's it. No config files, no Docker, no JVM, no API keys, no accounts. The gra
 | **⚡** | **Always-fresh graph** | Three-tier change detection: journal (O(changed)) → mtime+size (O(n) stats) → hash (O(changed) reads). Sub-second rebuilds even on large codebases |
 | **🔓** | **Zero-cost core, LLM-enhanced when you want** | Full graph analysis with no API keys, no accounts, no cost. Optionally bring your own LLM provider — your code only goes where you choose |
 | **🔬** | **Function-level, not just files** | Traces `handleAuth()` → `validateToken()` → `decryptJWT()` and shows 14 callers across 9 files break if `decryptJWT` changes |
-| **🤖** | **Built for AI agents** | 17-tool [MCP server](https://modelcontextprotocol.io/) — AI assistants query your graph directly. Single-repo by default |
+| **🏷️** | **Role classification** | Every symbol auto-tagged as `entry`/`core`/`utility`/`adapter`/`dead`/`leaf` — agents instantly know what they're looking at |
+| **🤖** | **Built for AI agents** | 18-tool [MCP server](https://modelcontextprotocol.io/) — AI assistants query your graph directly. Single-repo by default |
 | **🌐** | **Multi-language, one CLI** | JS/TS + Python + Go + Rust + Java + C# + PHP + Ruby + HCL in a single graph |
 | **💥** | **Git diff impact** | `codegraph diff-impact` shows changed functions, their callers, and full blast radius — ships with a GitHub Actions workflow |
 | **🧠** | **Semantic search** | Local embeddings by default, LLM-powered when opted in — multi-query with RRF ranking via `"auth; token; JWT"` |
@@ -141,7 +143,7 @@ After modifying code:
 Or connect directly via MCP:
 
 ```bash
-codegraph mcp          # 17-tool MCP server — AI queries the graph directly
+codegraph mcp          # 18-tool MCP server — AI queries the graph directly
 ```
 
 Full agent setup: [AI Agent Guide](docs/ai-agent-guide.md) &middot; [CLAUDE.md template](docs/ai-agent-guide.md#claudemd-template)
@@ -161,11 +163,12 @@ Full agent setup: [AI Agent Guide](docs/ai-agent-guide.md) &middot; [CLAUDE.md t
 | 📊 | **Diff impact** | Parse `git diff`, find overlapping functions, trace their callers |
 | 🗺️ | **Module map** | Bird's-eye view of your most-connected files |
 | 🏗️ | **Structure & hotspots** | Directory cohesion scores, fan-in/fan-out hotspot detection, module boundaries |
+| 🏷️ | **Node role classification** | Every symbol auto-tagged as `entry`/`core`/`utility`/`adapter`/`dead`/`leaf` based on connectivity patterns — agents instantly know architectural role |
 | 🔄 | **Cycle detection** | Find circular dependencies at file or function level |
 | 📤 | **Export** | DOT (Graphviz), Mermaid, and JSON graph export |
 | 🧠 | **Semantic search** | Embeddings-powered natural language search with multi-query RRF ranking |
 | 👀 | **Watch mode** | Incrementally update the graph as files change |
-| 🤖 | **MCP server** | 17-tool MCP server for AI assistants; single-repo by default, opt-in multi-repo |
+| 🤖 | **MCP server** | 18-tool MCP server for AI assistants; single-repo by default, opt-in multi-repo |
 | ⚡ | **Always fresh** | Three-tier incremental detection — sub-second rebuilds even on large codebases |
 
 ## 📦 Commands
@@ -189,6 +192,9 @@ codegraph map -n 50 --no-tests # Top 50, excluding test files
 codegraph where <name>         # Where is a symbol defined and used?
 codegraph where --file src/db.js  # List symbols, imports, exports for a file
 codegraph stats                # Graph health: nodes, edges, languages, quality score
+codegraph roles                # Node role classification (entry, core, utility, adapter, dead, leaf)
+codegraph roles --role dead -T # Find dead code (unreferenced, non-exported symbols)
+codegraph roles --role core --file src/  # Core symbols in src/
 ```
 
 ### Deep Context (AI-Optimized)
@@ -402,7 +408,7 @@ Optional: `@huggingface/transformers` (semantic search), `@modelcontextprotocol/
 
 ### MCP Server
 
-Codegraph includes a built-in [Model Context Protocol](https://modelcontextprotocol.io/) server with 17 tools, so AI assistants can query your dependency graph directly:
+Codegraph includes a built-in [Model Context Protocol](https://modelcontextprotocol.io/) server with 18 tools, so AI assistants can query your dependency graph directly:
 
 ```bash
 codegraph mcp                  # Single-repo mode (default) — only local project
@@ -589,6 +595,7 @@ const { results: fused } = await multiSearchData(
 | Incremental rebuilds | **O(changed)** | — | O(n) Merkle | — | — | — |
 | MCP / AI agent support | **Yes** | — | **Yes** | **Yes** | **Yes** | **Yes** |
 | Git diff impact | **Yes** | — | — | — | — | **Yes** |
+| Dead code / role classification | **Yes** | — | **Yes** | — | — | — |
 | Semantic search | **Yes** | — | **Yes** | **Yes** | — | **Yes** |
 | Watch mode | **Yes** | — | **Yes** | — | — | — |
 | Zero config, no Docker/JVM | **Yes** | — | **Yes** | — | — | — |
@@ -606,7 +613,7 @@ See **[ROADMAP.md](ROADMAP.md)** for the full development roadmap and **[STABILI
 5. **Natural Language Queries** — `codegraph ask` command, conversational sessions
 6. **Expanded Language Support** — 8 new languages (12 → 20)
 7. **GitHub Integration & CI** — reusable GitHub Action, PR review, SARIF output
-8. **Visualization & Advanced** — web UI, dead code detection, monorepo support, agentic search
+8. **Visualization & Advanced** — web UI, monorepo support, agentic search
 
 ## 🤝 Contributing
 
diff --git a/generated/COMPETITIVE_ANALYSIS.md b/generated/COMPETITIVE_ANALYSIS.md
index ca1c2340..464df995 100644
--- a/generated/COMPETITIVE_ANALYSIS.md
+++ b/generated/COMPETITIVE_ANALYSIS.md
@@ -1,6 +1,6 @@
 # Competitive Analysis — Code Graph / Code Intelligence Tools
 
-**Date:** 2026-02-22
+**Date:** 2026-02-25
 **Scope:** 136+ code analysis tools evaluated, 81+ ranked against `@optave/codegraph`
 
 ---
@@ -20,15 +20,15 @@ Ranked by weighted score across 6 dimensions (each 1–5):
 | 5 | 4.2 | [seatedro/glimpse](https://github.com/seatedro/glimpse) | 349 | Rust | MIT | Clipboard-first codebase-to-LLM tool with call graphs, token counting, LSP resolution |
 | 6 | 4.0 | [SimplyLiz/CodeMCP (CKB)](https://github.com/SimplyLiz/CodeMCP) | 59 | Go | Custom | SCIP-based indexing, compound operations (83% token savings), CODEOWNERS, secret scanning |
 | 7 | 4.0 | [abhigyanpatwari/GitNexus](https://github.com/abhigyanpatwari/GitNexus) | — | TS/JS | PolyForm NC | Knowledge graph with precomputed structural intelligence, 7 MCP tools, hybrid BM25+semantic search, clustering, process tracing, KuzuDB. **Non-commercial only** |
-| 8 | 3.9 | [harshkedia177/axon](https://github.com/harshkedia177/axon) | 29 | Python | None | 11-phase pipeline, KuzuDB, Leiden community detection, dead code, change coupling |
-| 9 | 3.8 | [anrgct/autodev-codebase](https://github.com/anrgct/autodev-codebase) | 111 | TypeScript | None | 40+ languages, 7 embedding providers, Cytoscape.js visualization, LLM reranking |
-| 10 | 3.8 | [ShiftLeftSecurity/codepropertygraph](https://github.com/ShiftLeftSecurity/codepropertygraph) | 564 | Scala | Apache-2.0 | CPG specification + Tinkergraph library, Scala query DSL, protobuf serialization (Joern foundation) |
-| 11 | 3.8 | [Jakedismo/codegraph-rust](https://github.com/Jakedismo/codegraph-rust) | 142 | Rust | None | 100% Rust GraphRAG, SurrealDB, LSP-powered dataflow analysis, architecture boundary enforcement |
-| 12 | 3.7 | [Anandb71/arbor](https://github.com/Anandb71/arbor) | 85 | Rust | MIT | Native GUI, confidence scoring, architectural role classification, fuzzy search, MCP |
-| 13 | 3.7 | [JudiniLabs/mcp-code-graph](https://github.com/JudiniLabs/mcp-code-graph) | 380 | JavaScript | MIT | Cloud-hosted MCP server by CodeGPT, semantic search, dependency links (requires account) |
-| 14 | 3.7 | [entrepeneur4lyf/code-graph-mcp](https://github.com/entrepeneur4lyf/code-graph-mcp) | 80 | Python | MIT | ast-grep for 25+ languages, complexity metrics, code smells, circular dependency detection |
-| 15 | 3.7 | [cs-au-dk/jelly](https://github.com/cs-au-dk/jelly) | 417 | TypeScript | BSD-3 | Academic-grade JS/TS points-to analysis, call graphs, vulnerability exposure, 5 published papers |
-| **16** | **3.8** | **[@optave/codegraph](https://github.com/optave/codegraph)** | — | **JS/Rust** | **Apache-2.0** | **Sub-second incremental rebuilds, dual engine (native Rust + WASM), 11 languages, 17-tool MCP, qualified call resolution, `context`/`explain`/`where` AI-optimized commands, structure/hotspot analysis, zero-cost core + optional LLM enhancement** |
+| **8** | **4.0** | **[@optave/codegraph](https://github.com/optave/codegraph)** | — | **JS/Rust** | **Apache-2.0** | **Sub-second incremental rebuilds, dual engine (native Rust + WASM), 11 languages, 18-tool MCP, qualified call resolution, `context`/`explain`/`where` AI-optimized commands, structure/hotspot analysis, node role classification (entry/core/utility/adapter/dead/leaf), dead code detection, zero-cost core + optional LLM enhancement** |
+| 9 | 3.9 | [harshkedia177/axon](https://github.com/harshkedia177/axon) | 29 | Python | None | 11-phase pipeline, KuzuDB, Leiden community detection, dead code, change coupling |
+| 10 | 3.8 | [anrgct/autodev-codebase](https://github.com/anrgct/autodev-codebase) | 111 | TypeScript | None | 40+ languages, 7 embedding providers, Cytoscape.js visualization, LLM reranking |
+| 11 | 3.8 | [ShiftLeftSecurity/codepropertygraph](https://github.com/ShiftLeftSecurity/codepropertygraph) | 564 | Scala | Apache-2.0 | CPG specification + Tinkergraph library, Scala query DSL, protobuf serialization (Joern foundation) |
+| 12 | 3.8 | [Jakedismo/codegraph-rust](https://github.com/Jakedismo/codegraph-rust) | 142 | Rust | None | 100% Rust GraphRAG, SurrealDB, LSP-powered dataflow analysis, architecture boundary enforcement |
+| 13 | 3.7 | [Anandb71/arbor](https://github.com/Anandb71/arbor) | 85 | Rust | MIT | Native GUI, confidence scoring, architectural role classification, fuzzy search, MCP |
+| 14 | 3.7 | [JudiniLabs/mcp-code-graph](https://github.com/JudiniLabs/mcp-code-graph) | 380 | JavaScript | MIT | Cloud-hosted MCP server by CodeGPT, semantic search, dependency links (requires account) |
+| 15 | 3.7 | [entrepeneur4lyf/code-graph-mcp](https://github.com/entrepeneur4lyf/code-graph-mcp) | 80 | Python | MIT | ast-grep for 25+ languages, complexity metrics, code smells, circular dependency detection |
+| 16 | 3.7 | [cs-au-dk/jelly](https://github.com/cs-au-dk/jelly) | 417 | TypeScript | BSD-3 | Academic-grade JS/TS points-to analysis, call graphs, vulnerability exposure, 5 published papers |
 | 17 | 3.5 | [er77/code-graph-rag-mcp](https://github.com/er77/code-graph-rag-mcp) | 89 | TypeScript | MIT | 26 MCP methods, 11 languages, tree-sitter, semantic search, hotspot analysis, clone detection |
 | 18 | 3.5 | [MikeRecognex/mcp-codebase-index](https://github.com/MikeRecognex/mcp-codebase-index) | 25 | Python | AGPL-3.0 | 18 MCP tools, zero runtime deps, auto-incremental reindexing via git diff |
 | 19 | 3.5 | [nahisaho/CodeGraphMCPServer](https://github.com/nahisaho/CodeGraphMCPServer) | 7 | Python | MIT | GraphRAG with Louvain community detection, 16 languages, 14 MCP tools, 334 tests |
@@ -136,15 +136,15 @@ Ranked by weighted score across 6 dimensions (each 1–5):
 | 5 | glimpse | 4 | 4 | 5 | 3 | 5 | 5 |
 | 6 | CKB | 5 | 5 | 4 | 3 | 4 | 3 |
 | 7 | GitNexus | 5 | 5 | 4 | 4 | 4 | 2 |
-| 8 | axon | 5 | 5 | 4 | 2 | 4 | 2 |
-| 9 | autodev-codebase | 5 | 3 | 3 | 5 | 3 | 4 |
-| 10 | codepropertygraph | 4 | 5 | 2 | 4 | 5 | 3 |
-| 11 | codegraph-rust | 5 | 5 | 2 | 4 | 4 | 3 |
-| 12 | arbor | 4 | 4 | 5 | 4 | 5 | 3 |
-| 13 | mcp-code-graph | 4 | 3 | 4 | 4 | 3 | 4 |
-| 14 | code-graph-mcp | 4 | 4 | 4 | 5 | 3 | 2 |
-| 15 | jelly | 4 | 5 | 4 | 1 | 5 | 3 |
-| **16** | **codegraph (us)** | **4** | **4** | **5** | **4** | **4** | **2** |
+| **8** | **codegraph (us)** | **5** | **4** | **5** | **4** | **4** | **2** |
+| 9 | axon | 5 | 5 | 4 | 2 | 4 | 2 |
+| 10 | autodev-codebase | 5 | 3 | 3 | 5 | 3 | 4 |
+| 11 | codepropertygraph | 4 | 5 | 2 | 4 | 5 | 3 |
+| 12 | codegraph-rust | 5 | 5 | 2 | 4 | 4 | 3 |
+| 13 | arbor | 4 | 4 | 5 | 4 | 5 | 3 |
+| 14 | mcp-code-graph | 4 | 3 | 4 | 4 | 3 | 4 |
+| 15 | code-graph-mcp | 4 | 4 | 4 | 5 | 3 | 2 |
+| 16 | jelly | 4 | 5 | 4 | 1 | 5 | 3 |
 | 17 | code-graph-rag-mcp | 5 | 4 | 3 | 4 | 3 | 2 |
 | 18 | mcp-codebase-index | 4 | 3 | 5 | 3 | 4 | 2 |
 | 19 | CodeGraphMCPServer | 4 | 4 | 4 | 5 | 3 | 1 |
@@ -186,10 +186,11 @@ Ranked by weighted score across 6 dimensions (each 1–5):
 | **Zero-cost core, LLM-enhanced when you choose** | The full graph pipeline (parse, resolve, query, impact analysis) runs with no API keys, no cloud, no cost. LLM features (richer embeddings, semantic search) are an optional layer on top — using whichever provider the user already works with. Competitors either require cloud APIs for core features (code-graph-rag, autodev-codebase, mcp-code-graph) or offer no AI enhancement at all (CKB, axon). Nobody else offers both modes in one tool |
 | **Data goes only where you send it** | Your code reaches exactly one place: the AI agent you already chose (via MCP). No additional third-party services, no surprise cloud calls. Competitors like code-graph-rag, autodev-codebase, mcp-code-graph, and Claude-code-memory send your code to additional AI providers beyond the agent you're using |
 | **Dual engine architecture** | Only project with native Rust (napi-rs) + automatic WASM fallback. Others are pure Rust (narsil-mcp, codegraph-rust) OR pure JS/Python — never both |
-| **Standalone CLI + MCP** | Full CLI experience (`context`, `explain`, `where`, `fn`, `diff-impact`, `map`, `deps`, `search`, `structure`, `hotspots`) alongside 17-tool MCP server. Many competitors are MCP-only (narsil-mcp, code-graph-mcp, CodeGraphMCPServer) with no standalone query interface |
+| **Standalone CLI + MCP** | Full CLI experience (`context`, `explain`, `where`, `fn`, `diff-impact`, `map`, `deps`, `search`, `structure`, `hotspots`, `roles`) alongside 18-tool MCP server. Many competitors are MCP-only (narsil-mcp, code-graph-mcp, CodeGraphMCPServer) with no standalone query interface |
 | **Single-repo MCP isolation** | Security-conscious default: tools have no `repo` property unless `--multi-repo` is explicitly enabled. Most competitors default to exposing everything |
 | **Zero-dependency deployment** | `npm install` and done. No Docker, no external databases, no Python, no SCIP toolchains, no JVM. Published platform-specific binaries (`@optave/codegraph-{platform}-{arch}`) resolve automatically. Joern requires JDK 21, cpg requires Gradle + language-specific deps, codegraph-rust requires SurrealDB + LSP servers |
 | **Structure & quality analysis** | `structure` shows directory cohesion scores, `hotspots` finds files with extreme fan-in/fan-out/density, `stats` includes a graph quality score (0-100) with false-positive warnings. These give agents architectural awareness without requiring external tools |
+| **Node role classification** | Every symbol is auto-tagged as `entry`/`core`/`utility`/`adapter`/`dead`/`leaf` based on fan-in/fan-out patterns with adaptive median thresholds. Agents instantly know a function's architectural role without reading surrounding code. Inspired by arbor's role classification — but we compute roles automatically during graph build rather than requiring manual tagging, and we surface roles across all query commands (`where`, `explain`, `context`, `stats`, `list-functions`). Dead code detection comes free as a byproduct |
 | **Callback pattern extraction** | Extracts symbols from Commander `.command().action()` (as `command:build`), Express route handlers (as `route:GET /api/users`), and event emitter listeners (as `event:data`). No competitor extracts symbols from framework callback patterns |
 
 ---
@@ -207,7 +208,7 @@ Ranked by weighted score across 6 dimensions (each 1–5):
 - **Feature breadth**: 90 MCP tools vs our 17; covers taint analysis, SBOM, license compliance, control flow graphs, data flow analysis
 - **Language count**: 32 languages (including Verilog, Fortran, PowerShell, Nix) vs our 11
 - **Security analysis**: vulnerability scanning with OWASP/CWE coverage — we have no security features
-- **Dead code detection**: built-in — we lack this
+- **Dead code detection**: built-in — *(Gap closed: our `roles --role dead` now surfaces unreferenced non-exported symbols)*
 - **Single-binary deployment**: ~30MB Rust binary via brew/scoop/cargo/npm — as easy as ours
 
 ### vs code-graph-rag (#3, 1,916 stars)
@@ -240,23 +241,24 @@ Ranked by weighted score across 6 dimensions (each 1–5):
 - **Auto-generated context files**: LLM-powered wiki and AGENTS.md/CLAUDE.md generation from the knowledge graph
 - **Tradeoff**: Full pipeline re-run on changes (no incremental builds), KuzuDB graph DB (heavier than SQLite), browser mode limited to ~5,000 files
 
-### vs axon (#8, 29 stars)
-- **Analysis depth**: their 11-phase pipeline includes community detection (Leiden), execution flow tracing, git change coupling, dead code detection — all features we lack
+### vs axon (#9, 29 stars)
+- **Analysis depth**: their 11-phase pipeline includes community detection (Leiden), execution flow tracing, git change coupling, dead code detection — *(Gap narrowed: we now have dead code detection via node role classification)*
 - **Graph database**: KuzuDB with native Cypher is more expressive for complex graph queries than our SQLite
 - **Branch structural diff**: compares code structure between branches using git worktrees
 
-### vs codegraph-rust (#11, 142 stars)
+### vs codegraph-rust (#12, 142 stars)
 - **LSP-powered analysis**: compiler-grade cross-file references via rust-analyzer, pyright, gopls vs our tree-sitter heuristics
 - **Dataflow edges**: defines/uses/flows_to/returns/mutates relationships we don't capture
 - **Architecture boundary enforcement**: configurable rules for detecting violations — we have no architectural awareness
 - **Tiered indexing**: fast/balanced/full modes for different use cases — we have one mode
 
-### vs jelly (#15, 417 stars)
+### vs jelly (#16, 417 stars)
 - **Points-to analysis**: flow-insensitive analysis with access paths for JS/TS — fundamentally more precise than our tree-sitter-based call resolution
 - **Academic rigor**: 5 published papers backing the methodology (Aarhus University)
 - **Vulnerability exposure analysis**: library usage pattern matching specific to the JS/TS ecosystem
 
 ### vs colbymchenry/codegraph (#20, 165 stars)
+- **No role classification**: they lack node role classification or dead code detection — we now have both
 - **Naming competitor**: same name, same tech stack (tree-sitter + SQLite + MCP + Node.js) — marketplace confusion risk
 - **Published benchmarks**: 67% fewer tool calls and measurable Claude Code token reduction — compelling marketing angle we lack. *(Gap narrowed: our `context` and `explain` compound commands now provide similar token savings by batching multiple queries into one call)*
 - **One-liner setup**: `npx @colbymchenry/codegraph` with interactive installer auto-configures Claude Code
@@ -268,7 +270,7 @@ Ranked by weighted score across 6 dimensions (each 1–5):
 ### Tier 1: High impact, low effort
 | Feature | Inspired by | Why | Status |
 |---------|------------|-----|--------|
-| **Dead code detection** | narsil-mcp, axon, codexray, CKB | We have the graph — find nodes with zero incoming edges (minus entry points/exports). Agents constantly ask "is this used?" | TODO |
+| ~~**Dead code detection**~~ | narsil-mcp, axon, codexray, CKB | ~~We have the graph — find nodes with zero incoming edges (minus entry points/exports). Agents constantly ask "is this used?"~~ | **DONE** — Delivered via node classification. `roles --role dead` lists all unreferenced, non-exported symbols |
 | ~~**Fuzzy symbol search**~~ | arbor | ~~Add Levenshtein/Jaro-Winkler to `fn` command. Currently requires exact match~~ | **DONE** — `fn` now has relevance scoring (exact > prefix > word-boundary > substring) with fan-in tiebreaker, plus `--file` and `--kind` filters |
 | ~~**Expose confidence scores**~~ | arbor | ~~Already computed internally in import resolution — just surface them~~ | **DONE** — confidence scores stored on every call edge, surfaced in `stats` graph quality score |
 | **Shortest path A→B** | codexray, arbor | BFS on existing edges table. We have `fn` for single chains but no A→B pathfinding | TODO |
@@ -279,7 +281,7 @@ Ranked by weighted score across 6 dimensions (each 1–5):
 | **Optional LLM provider integration** | code-graph-rag, autodev-codebase | Bring-your-own provider (OpenAI, etc.) for richer embeddings and AI-powered search. Enhancement layer only — core graph never depends on it. No other tool offers both zero-cost local and LLM-enhanced modes in one package | TODO |
 | ~~**Compound MCP tools**~~ | CKB, colbymchenry/codegraph | ~~`explore`/`understand` meta-tools that batch deps + fn + map into single responses~~ | **DONE** — `context` returns source + deps + callers + signature + tests in one call; `explain` returns structural summaries of files or functions |
 | **Token counting on responses** | glimpse, arbor | tiktoken-based counts so agents know context budget consumed | TODO |
-| **Node classification** | arbor | Auto-tag Entry Point / Core / Utility / Adapter from in-degree/out-degree patterns | TODO |
+| ~~**Node classification**~~ | arbor | ~~Auto-tag Entry Point / Core / Utility / Adapter from in-degree/out-degree patterns~~ | **DONE** — `classifyNodeRoles()` tags every symbol as `entry`/`core`/`utility`/`adapter`/`dead`/`leaf`. New `roles` CLI command, `node_roles` MCP tool (18 tools), `--role`/`--file` filters. Roles surfaced in `where`/`explain`/`context`/`stats`/`list-functions` |
 | **TF-IDF lightweight search** | codexray | SQLite FTS5 + TF-IDF as a middle tier (~50MB) between "no search" and full transformers (~500MB) | TODO |
 | **OWASP/CWE pattern detection** | narsil-mcp, CKB | Security pattern scanning on the existing AST — hardcoded secrets, SQL injection patterns, XSS | TODO |
 | **Formal code health metrics** | code-health-meter | Cyclomatic complexity, Maintainability Index, Halstead metrics per function — we already parse the AST | TODO |
diff --git a/roadmap/BACKLOG.md b/roadmap/BACKLOG.md
index 9aa9c371..5084b8d9 100644
--- a/roadmap/BACKLOG.md
+++ b/roadmap/BACKLOG.md
@@ -1,6 +1,6 @@
 # Codegraph Feature Backlog
 
-**Last updated:** 2026-02-23
+**Last updated:** 2026-02-25
 **Source:** Features derived from [COMPETITIVE_ANALYSIS.md](../generated/COMPETITIVE_ANALYSIS.md) and internal roadmap discussions.
 
 ---
@@ -26,9 +26,9 @@ Non-breaking, ordered by problem-fit:
 
 | ID | Title | Description | Category | Benefit | Zero-dep | Foundation-aligned | Problem-fit (1-5) | Breaking |
 |----|-------|-------------|----------|---------|----------|-------------------|-------------------|----------|
-| 4 | Node classification | Auto-tag symbols as Entry Point / Core / Utility / Adapter based on in-degree/out-degree patterns. High fan-in + low fan-out = Core. Zero fan-in + non-export = Dead. Inspired by arbor. | Intelligence | Agents immediately understand architectural role of any symbol without reading surrounding code — fewer orientation tokens | ✓ | ✓ | 5 | No |
+| 4 | ~~Node classification~~ | ~~Auto-tag symbols as Entry Point / Core / Utility / Adapter based on in-degree/out-degree patterns. High fan-in + low fan-out = Core. Zero fan-in + non-export = Dead. Inspired by arbor.~~ | Intelligence | ~~Agents immediately understand architectural role of any symbol without reading surrounding code — fewer orientation tokens~~ | ✓ | ✓ | 5 | No | **DONE** — `classifyNodeRoles()` in `structure.js` auto-tags every symbol as `entry`/`core`/`utility`/`adapter`/`dead`/`leaf` using median-based fan-in/fan-out thresholds. Roles stored in DB (`role` column, migration v5), surfaced in `where`/`explain`/`context`/`stats`/`list-functions`, new `roles` CLI command, new `node_roles` MCP tool (18 tools total). Includes `--role` and `--file` filters. |
 | 9 | Git change coupling | Analyze git history for files/functions that always change together. Surfaces hidden dependencies that the static graph can't see. Enhances `diff-impact` with historical co-change data. Inspired by axon. | Analysis | `diff-impact` catches more breakage by including historically coupled files; agents get a more complete blast radius picture | ✓ | ✓ | 5 | No |
-| 1 | Dead code detection | Find symbols with zero incoming edges (excluding entry points and exports). Agents constantly ask "is this used?" — the graph already has the data, we just need to surface it. Inspired by narsil-mcp, axon, codexray, CKB. | Analysis | Agents stop wasting tokens investigating dead code; developers get actionable cleanup lists without external tools | ✓ | ✓ | 4 | No |
+| 1 | ~~Dead code detection~~ | ~~Find symbols with zero incoming edges (excluding entry points and exports). Agents constantly ask "is this used?" — the graph already has the data, we just need to surface it. Inspired by narsil-mcp, axon, codexray, CKB.~~ | Analysis | ~~Agents stop wasting tokens investigating dead code; developers get actionable cleanup lists without external tools~~ | ✓ | ✓ | 4 | No | **DONE** — Delivered as part of node classification (ID 4). `codegraph roles --role dead -T` lists all symbols with zero fan-in that aren't exported. |
 | 2 | Shortest path A→B | BFS/Dijkstra on the existing edges table to find how symbol A reaches symbol B. We have `fn` for single-node chains but no A→B pathfinding. Inspired by codexray, arbor. | Navigation | Agents can answer "how does this function reach that one?" in one call instead of manually tracing chains | ✓ | ✓ | 4 | No |
 | 12 | Execution flow tracing | Framework-aware entry point detection (Express routes, CLI commands, event handlers) + BFS flow tracing from entry to leaf. Inspired by axon, GitNexus, code-context-mcp. | Navigation | Agents can answer "what happens when a user hits POST /login?" by tracing the full execution path in one query | ✓ | ✓ | 4 | No |
 | 16 | Branch structural diff | Compare code structure between two branches using git worktrees. Show added/removed/changed symbols and their impact. Inspired by axon. | Analysis | Teams can review structural impact of feature branches before merge; agents get branch-aware context | ✓ | ✓ | 4 | No |
diff --git a/src/builder.js b/src/builder.js
index ce9f6633..0feafc0a 100644
--- a/src/builder.js
+++ b/src/builder.js
@@ -892,6 +892,17 @@ export async function buildGraph(rootDir, opts = {}) {
     debug(`Structure analysis failed: ${err.message}`);
   }
 
+  // Classify node roles (entry, core, utility, adapter, dead, leaf)
+  try {
+    const { classifyNodeRoles } = await import('./structure.js');
+    const roleSummary = classifyNodeRoles(db);
+    debug(
+      `Roles: ${Object.entries(roleSummary).map(([r, c]) => `${r}=${c}`).join(', ')}`,
+    );
+  } catch (err) {
+    debug(`Role classification failed: ${err.message}`);
+  }
+
   const nodeCount = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c;
   info(`Graph built: ${nodeCount} nodes, ${edgeCount} edges`);
   info(`Stored in ${dbPath}`);
diff --git a/src/cli.js b/src/cli.js
index ccf8fbc4..963972d2 100644
--- a/src/cli.js
+++ b/src/cli.js
@@ -21,7 +21,9 @@ import {
   impactAnalysis,
   moduleMap,
   queryName,
+  roles,
   stats,
+  VALID_ROLES,
   where,
 } from './queries.js';
 import {
@@ -529,6 +531,28 @@ program
     }
   });
 
+program
+  .command('roles')
+  .description('Show node role classification: entry, core, utility, adapter, dead, leaf')
+  .option('-d, --db <path>', 'Path to graph.db')
+  .option('--role <role>', `Filter by role (${VALID_ROLES.join(', ')})`)
+  .option('-f, --file <path>', 'Scope to a specific file (partial match)')
+  .option('-T, --no-tests', 'Exclude test/spec files')
+  .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
+  .option('-j, --json', 'Output as JSON')
+  .action((opts) => {
+    if (opts.role && !VALID_ROLES.includes(opts.role)) {
+      console.error(`Invalid role "${opts.role}". Valid roles: ${VALID_ROLES.join(', ')}`);
+      process.exit(1);
+    }
+    roles(opts.db, {
+      role: opts.role,
+      file: opts.file,
+      noTests: resolveNoTests(opts),
+      json: opts.json,
+    });
+  });
+
 program
   .command('watch [dir]')
   .description('Watch project for file changes and incrementally update the graph')
diff --git a/src/db.js b/src/db.js
index cbabc93a..7d4b79fe 100644
--- a/src/db.js
+++ b/src/db.js
@@ -115,6 +115,16 @@ export function initSchema(db) {
   } catch {
     /* already exists */
   }
+  try {
+    db.exec('ALTER TABLE nodes ADD COLUMN role TEXT');
+  } catch {
+    /* already exists */
+  }
+  try {
+    db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_role ON nodes(role)');
+  } catch {
+    /* already exists */
+  }
 }
 
 export function findDbPath(customPath) {
diff --git a/src/index.js b/src/index.js
index 7435b8a6..9da90b5e 100644
--- a/src/index.js
+++ b/src/index.js
@@ -53,7 +53,9 @@ export {
   impactAnalysisData,
   moduleMapData,
   queryNameData,
+  rolesData,
   statsData,
+  VALID_ROLES,
   whereData,
 } from './queries.js';
 // Registry (multi-repo)
@@ -70,6 +72,7 @@ export {
 // Structure analysis
 export {
   buildStructure,
+  classifyNodeRoles,
   formatHotspots,
   formatModuleBoundaries,
   formatStructure,
diff --git a/src/mcp.js b/src/mcp.js
index 83ab1f90..2daeeb84 100644
--- a/src/mcp.js
+++ b/src/mcp.js
@@ -8,7 +8,7 @@
 import { createRequire } from 'node:module';
 import { findCycles } from './cycles.js';
 import { findDbPath } from './db.js';
-import { ALL_SYMBOL_KINDS, diffImpactMermaid } from './queries.js';
+import { ALL_SYMBOL_KINDS, diffImpactMermaid, VALID_ROLES } from './queries.js';
 
 const REPO_PROP = {
   repo: {
@@ -273,6 +273,23 @@ const BASE_TOOLS = [
       },
     },
   },
+  {
+    name: 'node_roles',
+    description:
+      'Show node role classification (entry, core, utility, adapter, dead, leaf) based on connectivity patterns',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        role: {
+          type: 'string',
+          enum: VALID_ROLES,
+          description: 'Filter to a specific role',
+        },
+        file: { type: 'string', description: 'Scope to a specific file (partial match)' },
+        no_tests: { type: 'boolean', description: 'Exclude test files', default: false },
+      },
+    },
+  },
   {
     name: 'hotspots',
     description:
@@ -372,6 +389,7 @@ export async function startMCPServer(customDbPath, options = {}) {
     whereData,
     diffImpactData,
     listFunctionsData,
+    rolesData,
   } = await import('./queries.js');
 
   const require = createRequire(import.meta.url);
@@ -540,6 +558,13 @@ export async function startMCPServer(customDbPath, options = {}) {
             noTests: args.no_tests,
           });
           break;
+        case 'node_roles':
+          result = rolesData(dbPath, {
+            role: args.role,
+            file: args.file,
+            noTests: args.no_tests,
+          });
+          break;
         case 'structure': {
           const { structureData } = await import('./structure.js');
           result = structureData(dbPath, {
diff --git a/src/queries.js b/src/queries.js
index 91bb0c4f..3e1eac62 100644
--- a/src/queries.js
+++ b/src/queries.js
@@ -67,6 +67,8 @@ export const ALL_SYMBOL_KINDS = [
   'module',
 ];
 
+export const VALID_ROLES = ['entry', 'core', 'utility', 'adapter', 'dead', 'leaf'];
+
 /**
  * Get all ancestor class names for a given class using extends edges.
  */
@@ -876,7 +878,7 @@ export function listFunctionsData(customDbPath, opts = {}) {
 
   let rows = db
     .prepare(
-      `SELECT name, kind, file, line FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY file, line`,
+      `SELECT name, kind, file, line, role FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY file, line`,
     )
     .all(...params);
 
@@ -1077,6 +1079,24 @@ export function statsData(customDbPath, opts = {}) {
     falsePositiveWarnings,
   };
 
+  // Role distribution
+  let roleRows;
+  if (noTests) {
+    const allRoleNodes = db
+      .prepare('SELECT role, file FROM nodes WHERE role IS NOT NULL')
+      .all();
+    const filtered = allRoleNodes.filter((n) => !isTestFile(n.file));
+    const counts = {};
+    for (const n of filtered) counts[n.role] = (counts[n.role] || 0) + 1;
+    roleRows = Object.entries(counts).map(([role, c]) => ({ role, c }));
+  } else {
+    roleRows = db
+      .prepare('SELECT role, COUNT(*) as c FROM nodes WHERE role IS NOT NULL GROUP BY role')
+      .all();
+  }
+  const roles = {};
+  for (const r of roleRows) roles[r.role] = r.c;
+
   db.close();
   return {
     nodes: { total: totalNodes, byKind: nodesByKind },
@@ -1086,6 +1106,7 @@ export function statsData(customDbPath, opts = {}) {
     hotspots,
     embeddings,
     quality,
+    roles,
   };
 }
 
@@ -1182,6 +1203,22 @@ export function stats(customDbPath, opts = {}) {
     }
   }
 
+  // Roles
+  if (data.roles && Object.keys(data.roles).length > 0) {
+    const total = Object.values(data.roles).reduce((a, b) => a + b, 0);
+    console.log(`\nRoles:     ${total} classified symbols`);
+    const roleParts = Object.entries(data.roles)
+      .sort((a, b) => b[1] - a[1])
+      .map(([k, v]) => `${k} ${v}`);
+    for (let i = 0; i < roleParts.length; i += 3) {
+      const row = roleParts
+        .slice(i, i + 3)
+        .map((p) => p.padEnd(18))
+        .join('');
+      console.log(`  ${row}`);
+    }
+  }
+
   console.log();
 }
 
@@ -1649,6 +1686,7 @@ export function contextData(name, customDbPath, opts = {}) {
       kind: node.kind,
       file: node.file,
       line: node.line,
+      role: node.role || null,
       endLine: node.end_line || null,
       source,
       signature,
@@ -1675,7 +1713,8 @@ export function context(name, customDbPath, opts = {}) {
 
   for (const r of data.results) {
     const lineRange = r.endLine ? `${r.line}-${r.endLine}` : `${r.line}`;
-    console.log(`\n# ${r.name} (${r.kind}) — ${r.file}:${lineRange}\n`);
+    const roleTag = r.role ? ` [${r.role}]` : '';
+    console.log(`\n# ${r.name} (${r.kind})${roleTag} — ${r.file}:${lineRange}\n`);
 
     // Signature
     if (r.signature) {
@@ -1787,6 +1826,7 @@ function explainFileImpl(db, target, getFileLines) {
       name: s.name,
       kind: s.kind,
       line: s.line,
+      role: s.role || null,
       summary: fileLines ? extractSummary(fileLines, s.line) : null,
       signature: fileLines ? extractSignature(fileLines, s.line) : null,
     });
@@ -1907,6 +1947,7 @@ function explainFunctionImpl(db, target, noTests, getFileLines) {
       kind: node.kind,
       file: node.file,
       line: node.line,
+      role: node.role || null,
       endLine: node.end_line || null,
       lineCount,
       summary,
@@ -2018,8 +2059,9 @@ export function explain(target, customDbPath, opts = {}) {
         console.log(`\n## Exported`);
         for (const s of r.publicApi) {
           const sig = s.signature?.params != null ? `(${s.signature.params})` : '';
+          const roleTag = s.role ? ` [${s.role}]` : '';
           const summary = s.summary ? `  -- ${s.summary}` : '';
-          console.log(`  ${kindIcon(s.kind)} ${s.name}${sig} :${s.line}${summary}`);
+          console.log(`  ${kindIcon(s.kind)} ${s.name}${sig}${roleTag} :${s.line}${summary}`);
         }
       }
 
@@ -2027,8 +2069,9 @@ export function explain(target, customDbPath, opts = {}) {
         console.log(`\n## Internal`);
         for (const s of r.internal) {
           const sig = s.signature?.params != null ? `(${s.signature.params})` : '';
+          const roleTag = s.role ? ` [${s.role}]` : '';
           const summary = s.summary ? `  -- ${s.summary}` : '';
-          console.log(`  ${kindIcon(s.kind)} ${s.name}${sig} :${s.line}${summary}`);
+          console.log(`  ${kindIcon(s.kind)} ${s.name}${sig}${roleTag} :${s.line}${summary}`);
         }
       }
 
@@ -2045,9 +2088,10 @@ export function explain(target, customDbPath, opts = {}) {
       const lineRange = r.endLine ? `${r.line}-${r.endLine}` : `${r.line}`;
       const lineInfo = r.lineCount ? `${r.lineCount} lines` : '';
       const summaryPart = r.summary ? ` | ${r.summary}` : '';
+      const roleTag = r.role ? ` [${r.role}]` : '';
       const depthLevel = r._depth || 0;
       const heading = depthLevel === 0 ? '#' : '##'.padEnd(depthLevel + 2, '#');
-      console.log(`\n${indent}${heading} ${r.name} (${r.kind})  ${r.file}:${lineRange}`);
+      console.log(`\n${indent}${heading} ${r.name} (${r.kind})${roleTag}  ${r.file}:${lineRange}`);
       if (lineInfo || r.summary) {
         console.log(`${indent}  ${lineInfo}${summaryPart}`);
       }
@@ -2134,6 +2178,7 @@ function whereSymbolImpl(db, target, noTests) {
       kind: node.kind,
       file: node.file,
       line: node.line,
+      role: node.role || null,
       exported,
       uses: uses.map((u) => ({ name: u.name, file: u.file, line: u.line })),
     };
@@ -2220,8 +2265,9 @@ export function where(target, customDbPath, opts = {}) {
 
   if (data.mode === 'symbol') {
     for (const r of data.results) {
+      const roleTag = r.role ? ` [${r.role}]` : '';
       const tag = r.exported ? '  (exported)' : '';
-      console.log(`\n${kindIcon(r.kind)} ${r.name}  ${r.file}:${r.line}${tag}`);
+      console.log(`\n${kindIcon(r.kind)} ${r.name}${roleTag}  ${r.file}:${r.line}${tag}`);
       if (r.uses.length > 0) {
         const useStrs = r.uses.map((u) => `${u.file}:${u.line}`);
         console.log(`  Used in: ${useStrs.join(', ')}`);
@@ -2250,6 +2296,81 @@ export function where(target, customDbPath, opts = {}) {
   console.log();
 }
 
+// ─── rolesData ──────────────────────────────────────────────────────────
+
+export function rolesData(customDbPath, opts = {}) {
+  const db = openReadonlyOrFail(customDbPath);
+  const noTests = opts.noTests || false;
+  const filterRole = opts.role || null;
+  const filterFile = opts.file || null;
+
+  const conditions = ['role IS NOT NULL'];
+  const params = [];
+
+  if (filterRole) {
+    conditions.push('role = ?');
+    params.push(filterRole);
+  }
+  if (filterFile) {
+    conditions.push('file LIKE ?');
+    params.push(`%${filterFile}%`);
+  }
+
+  let rows = db
+    .prepare(
+      `SELECT name, kind, file, line, role FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY role, file, line`,
+    )
+    .all(...params);
+
+  if (noTests) rows = rows.filter((r) => !isTestFile(r.file));
+
+  const summary = {};
+  for (const r of rows) {
+    summary[r.role] = (summary[r.role] || 0) + 1;
+  }
+
+  db.close();
+  return { count: rows.length, summary, symbols: rows };
+}
+
+export function roles(customDbPath, opts = {}) {
+  const data = rolesData(customDbPath, opts);
+  if (opts.json) {
+    console.log(JSON.stringify(data, null, 2));
+    return;
+  }
+
+  if (data.count === 0) {
+    console.log('No classified symbols found. Run "codegraph build" first.');
+    return;
+  }
+
+  const total = data.count;
+  console.log(`\nNode roles (${total} symbols):\n`);
+
+  const summaryParts = Object.entries(data.summary)
+    .sort((a, b) => b[1] - a[1])
+    .map(([role, count]) => `${role}: ${count}`);
+  console.log(`  ${summaryParts.join('  ')}\n`);
+
+  const byRole = {};
+  for (const s of data.symbols) {
+    if (!byRole[s.role]) byRole[s.role] = [];
+    byRole[s.role].push(s);
+  }
+
+  for (const [role, symbols] of Object.entries(byRole)) {
+    console.log(`## ${role} (${symbols.length})`);
+    for (const s of symbols.slice(0, 30)) {
+      console.log(`  ${kindIcon(s.kind)} ${s.name}  ${s.file}:${s.line}`);
+    }
+    if (symbols.length > 30) {
+      console.log(`  ... and ${symbols.length - 30} more`);
+    }
+    console.log();
+  }
+}
+
 export function fnImpact(name, customDbPath, opts = {}) {
   const data = fnImpactData(name, customDbPath, opts);
   if (opts.json) {
diff --git a/src/structure.js b/src/structure.js
index ba348f37..e094e72a 100644
--- a/src/structure.js
+++ b/src/structure.js
@@ -224,6 +224,100 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director
   debug(`Structure: ${dirCount} directories, ${fileSymbols.size} files with metrics`);
 }
 
+// ─── Node role classification ─────────────────────────────────────────
+
+function median(sorted) {
+  if (sorted.length === 0) return 0;
+  const mid = Math.floor(sorted.length / 2);
+  return sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid];
+}
+
+export function classifyNodeRoles(db) {
+  const rows = db
+    .prepare(
+      `SELECT n.id, n.kind, n.file,
+        COALESCE(fi.cnt, 0) AS fan_in,
+        COALESCE(fo.cnt, 0) AS fan_out
+      FROM nodes n
+      LEFT JOIN (
+        SELECT target_id, COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY target_id
+      ) fi ON n.id = fi.target_id
+      LEFT JOIN (
+        SELECT source_id, COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY source_id
+      ) fo ON n.id = fo.source_id
+      WHERE n.kind NOT IN ('file', 'directory')`,
+    )
+    .all();
+
+  if (rows.length === 0) {
+    return { entry: 0, core: 0, utility: 0, adapter: 0, dead: 0, leaf: 0 };
+  }
+
+  const exportedIds = new Set(
+    db
+      .prepare(
+        `SELECT DISTINCT e.target_id
+        FROM edges e
+        JOIN nodes caller ON e.source_id = caller.id
+        JOIN nodes target ON e.target_id = target.id
+        WHERE e.kind = 'calls' AND caller.file != target.file`,
+      )
+      .all()
+      .map((r) => r.target_id),
+  );
+
+  const nonZeroFanIn = rows
+    .filter((r) => r.fan_in > 0)
+    .map((r) => r.fan_in)
+    .sort((a, b) => a - b);
+  const nonZeroFanOut = rows
+    .filter((r) => r.fan_out > 0)
+    .map((r) => r.fan_out)
+    .sort((a, b) => a - b);
+
+  const medFanIn = median(nonZeroFanIn);
+  const medFanOut = median(nonZeroFanOut);
+
+  const updates = [];
+  const summary = { entry: 0, core: 0, utility: 0, adapter: 0, dead: 0, leaf: 0 };
+
+  for (const row of rows) {
+    const highIn = row.fan_in >= medFanIn && row.fan_in > 0;
+    const highOut = row.fan_out >= medFanOut && row.fan_out > 0;
+    const isExported = exportedIds.has(row.id);
+
+    let role;
+    if (row.fan_in === 0 && !isExported) {
+      role = 'dead';
+    } else if (row.fan_in === 0 && isExported) {
+      role = 'entry';
+    } else if (highIn && !highOut) {
+      role = 'core';
+    } else if (highIn && highOut) {
+      role = 'utility';
+    } else if (!highIn && highOut) {
+      role = 'adapter';
+    } else {
+      role = 'leaf';
+    }
+
+    updates.push({ id: row.id, role });
+    summary[role]++;
+  }
+
+  const clearRoles = db.prepare('UPDATE nodes SET role = NULL');
+  const setRole = db.prepare('UPDATE nodes SET role = ? WHERE id = ?');
+
+  db.transaction(() => {
+    clearRoles.run();
+    for (const u of updates) {
+      setRole.run(u.role, u.id);
+    }
+  })();
+
+  return summary;
+}
+
 // ─── Query functions (read-only) ──────────────────────────────────────
 
 /**
diff --git a/tests/integration/roles.test.js b/tests/integration/roles.test.js
new file mode 100644
index 00000000..6dbc5212
--- /dev/null
+++ b/tests/integration/roles.test.js
@@ -0,0 +1,212 @@
+/**
+ * Integration tests for node role classification.
+ *
+ * Uses the same fixture DB pattern as queries.test.js — a hand-crafted
+ * in-file DB with known nodes and edges — then exercises rolesData,
+ * statsData, whereData, explainData, and listFunctionsData to verify
+ * roles appear in all expected outputs.
+ */
+
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import Database from 'better-sqlite3';
+import { afterAll, beforeAll, describe, expect, test } from 'vitest';
+import { initSchema } from '../../src/db.js';
+import {
+  explainData,
+  listFunctionsData,
+  rolesData,
+  statsData,
+  whereData,
+} from '../../src/queries.js';
+import { classifyNodeRoles } from '../../src/structure.js';
+
+// ─── Helpers ───────────────────────────────────────────────────────────
+
+function insertNode(db, name, kind, file, line) {
+  return db
+    .prepare('INSERT INTO nodes (name, kind, file, line) VALUES (?, ?, ?, ?)')
+    .run(name, kind, file, line).lastInsertRowid;
+}
+
+function insertEdge(db, sourceId, targetId, kind, confidence = 1.0) {
+  db.prepare(
+    'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, ?, 0)',
+  ).run(sourceId, targetId, kind, confidence);
+}
+
+// ─── Fixture DB ────────────────────────────────────────────────────────
+
+let tmpDir, dbPath;
+
+beforeAll(() => {
+  tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-roles-'));
+  fs.mkdirSync(path.join(tmpDir, '.codegraph'));
+  dbPath = path.join(tmpDir, '.codegraph', 'graph.db');
+
+  const db = new Database(dbPath);
+  db.pragma('journal_mode = WAL');
+  initSchema(db);
+
+  // File nodes
+  const fApp = insertNode(db, 'app.js', 'file', 'app.js', 0);
+  const fLib = insertNode(db, 'lib.js', 'file', 'lib.js', 0);
+  const fTest = insertNode(db, 'app.test.js', 'file', 'app.test.js', 0);
+
+  // Function nodes
+  const main = insertNode(db, 'main', 'function', 'app.js', 1);
+  const process_ = insertNode(db, 'processData', 'function', 'app.js', 10);
+  const helper = insertNode(db, 'helper', 'function', 'lib.js', 1);
+  const format = insertNode(db, 'format', 'function', 'lib.js', 10);
+  const unused = insertNode(db, 'unused', 'function', 'lib.js', 20);
+  const testFn = insertNode(db, 'testMain', 'function', 'app.test.js', 1);
+
+  // Import edges
+  insertEdge(db, fApp, fLib, 'imports');
+  insertEdge(db, fTest, fApp, 'imports');
+
+  // Call edges:
+  // main → processData (same file)
+  // main → helper (cross-file) → makes helper exported
+  // processData → format (cross-file) → makes format exported
+  // helper → format (same file)
+  // testFn → main (cross-file) → makes main exported
+  insertEdge(db, main, process_, 'calls');
+  insertEdge(db, main, helper, 'calls');
+  insertEdge(db, process_, format, 'calls');
+  insertEdge(db, helper, format, 'calls');
+  insertEdge(db, testFn, main, 'calls');
+
+  // unused has no callers and no cross-file callers → dead
+
+  // Classify roles
+  classifyNodeRoles(db);
+
+  db.close();
+});
+
+afterAll(() => {
+  if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true });
+});
+
+// ─── rolesData ──────────────────────────────────────────────────────────
+
+describe('rolesData', () => {
+  test('returns all classified symbols with correct counts', () => {
+    const data = rolesData(dbPath);
+    expect(data.count).toBeGreaterThan(0);
+    expect(data.summary).toBeDefined();
+    expect(Object.keys(data.summary).length).toBeGreaterThan(0);
+    // Every symbol should have a role
+    for (const s of data.symbols) {
+      expect(s.role).toBeTruthy();
+    }
+  });
+
+  test('dead role includes unused function', () => {
+    const data = rolesData(dbPath, { role: 'dead' });
+    const names = data.symbols.map((s) => s.name);
+    expect(names).toContain('unused');
+  });
+
+  test('filters by role', () => {
+    const data = rolesData(dbPath, { role: 'dead' });
+    for (const s of data.symbols) {
+      expect(s.role).toBe('dead');
+    }
+    expect(data.summary.dead).toBe(data.count);
+  });
+
+  test('filters by file', () => {
+    const data = rolesData(dbPath, { file: 'lib.js' });
+    for (const s of data.symbols) {
+      expect(s.file).toContain('lib.js');
+    }
+  });
+
+  test('filters by noTests', () => {
+    const withTests = rolesData(dbPath);
+    const withoutTests = rolesData(dbPath, { noTests: true });
+    expect(withoutTests.count).toBeLessThan(withTests.count);
+    for (const s of withoutTests.symbols) {
+      expect(s.file).not.toMatch(/\.test\./);
+    }
+  });
+});
+
+// ─── statsData includes roles ───────────────────────────────────────────
+
+describe('statsData with roles', () => {
+  test('includes roles distribution', () => {
+    const data = statsData(dbPath);
+    expect(data.roles).toBeDefined();
+    expect(Object.keys(data.roles).length).toBeGreaterThan(0);
+    // Should have dead for the unused function
+    expect(data.roles.dead).toBeGreaterThanOrEqual(1);
+  });
+
+  test('roles distribution respects noTests filter', () => {
+    const withTests = statsData(dbPath);
+    const withoutTests = statsData(dbPath, { noTests: true });
+    const totalWith = Object.values(withTests.roles).reduce((a, b) => a + b, 0);
+    const totalWithout = Object.values(withoutTests.roles).reduce((a, b) => a + b, 0);
+    expect(totalWithout).toBeLessThanOrEqual(totalWith);
+  });
+});
+
+// ─── whereData includes role ────────────────────────────────────────────
+
+describe('whereData with roles', () => {
+  test('includes role field in symbol results', () => {
+    const data = whereData('main', dbPath);
+    expect(data.results.length).toBeGreaterThan(0);
+    const mainResult = data.results.find((r) => r.name === 'main');
+    expect(mainResult).toBeDefined();
+    expect(mainResult).toHaveProperty('role');
+    expect(mainResult.role).toBeTruthy();
+  });
+
+  test('dead function has dead role', () => {
+    const data = whereData('unused', dbPath);
+    const unusedResult = data.results.find((r) => r.name === 'unused');
+    expect(unusedResult).toBeDefined();
+    expect(unusedResult.role).toBe('dead');
+  });
+});
+
+// ─── explainData includes role ──────────────────────────────────────────
+
+describe('explainData with roles', () => {
+  test('function explain includes role field', () => {
+    const data = explainData('main', dbPath);
+    expect(data.results.length).toBeGreaterThan(0);
+    const mainResult = data.results.find((r) => r.name === 'main');
+    expect(mainResult).toBeDefined();
+    expect(mainResult).toHaveProperty('role');
+  });
+
+  test('file explain includes role in symbols', () => {
+    const data = explainData('lib.js', dbPath);
+    expect(data.results.length).toBeGreaterThan(0);
+    const fileResult = data.results[0];
+    // Check publicApi and internal arrays for role field
+    const allSymbols = [...(fileResult.publicApi || []), ...(fileResult.internal || [])];
+    expect(allSymbols.length).toBeGreaterThan(0);
+    for (const s of allSymbols) {
+      expect(s).toHaveProperty('role');
+    }
+  });
+});
+
+// ─── listFunctionsData includes role ────────────────────────────────────
+
+describe('listFunctionsData with roles', () => {
+  test('includes role field in function listings', () => {
+    const data = listFunctionsData(dbPath);
+    expect(data.count).toBeGreaterThan(0);
+    // At least some should have roles
+    const withRoles = data.functions.filter((f) => f.role);
+    expect(withRoles.length).toBeGreaterThan(0);
+  });
+});
diff --git a/tests/unit/mcp.test.js b/tests/unit/mcp.test.js
index 4199467c..0f3dd77b 100644
--- a/tests/unit/mcp.test.js
+++ b/tests/unit/mcp.test.js
@@ -25,6 +25,7 @@ const ALL_TOOL_NAMES = [
   'list_functions',
   'structure',
   'hotspots',
+  'node_roles',
   'list_repos',
 ];
 
@@ -232,6 +233,7 @@ describe('startMCPServer handler dispatch', () => {
       whereData: vi.fn(() => ({ target: 'test', mode: 'symbol', results: [] })),
       diffImpactData: vi.fn(() => ({ changedFiles: 0, affectedFunctions: [] })),
       listFunctionsData: vi.fn(() => ({ count: 0, functions: [] })),
+      rolesData: vi.fn(() => ({ count: 0, summary: {}, symbols: [] })),
     }));
 
     // Clear module cache and reimport
@@ -294,6 +296,7 @@ describe('startMCPServer handler dispatch', () => {
       whereData: vi.fn(),
       diffImpactData: vi.fn(),
       listFunctionsData: vi.fn(),
+      rolesData: vi.fn(),
     }));
 
     const { startMCPServer } = await import('../../src/mcp.js');
@@ -350,6 +353,7 @@ describe('startMCPServer handler dispatch', () => {
       whereData: vi.fn(),
       diffImpactData: vi.fn(),
       listFunctionsData: vi.fn(),
+      rolesData: vi.fn(),
     }));
 
     const { startMCPServer } = await import('../../src/mcp.js');
@@ -401,6 +405,7 @@ describe('startMCPServer handler dispatch', () => {
       whereData: vi.fn(),
       diffImpactData: diffImpactMock,
       listFunctionsData: vi.fn(),
+      rolesData: vi.fn(),
     }));
 
     const { startMCPServer } = await import('../../src/mcp.js');
@@ -457,6 +462,7 @@ describe('startMCPServer handler dispatch', () => {
       whereData: vi.fn(),
       diffImpactData: vi.fn(),
       listFunctionsData: listFnMock,
+      rolesData: vi.fn(),
     }));
 
     const { startMCPServer } = await import('../../src/mcp.js');
@@ -514,6 +520,7 @@ describe('startMCPServer handler dispatch', () => {
       whereData: vi.fn(),
       diffImpactData: vi.fn(),
       listFunctionsData: vi.fn(),
+      rolesData: vi.fn(),
     }));
 
     const { startMCPServer } = await import('../../src/mcp.js');
@@ -566,6 +573,7 @@ describe('startMCPServer handler dispatch', () => {
       whereData: vi.fn(),
       diffImpactData: vi.fn(),
       listFunctionsData: vi.fn(),
+      rolesData: vi.fn(),
     }));
 
     const { startMCPServer } = await import('../../src/mcp.js');
@@ -617,6 +625,7 @@ describe('startMCPServer handler dispatch', () => {
       whereData: vi.fn(),
       diffImpactData: vi.fn(),
       listFunctionsData: vi.fn(),
+      rolesData: vi.fn(),
     }));
 
     const { startMCPServer } = await import('../../src/mcp.js');
@@ -670,6 +679,7 @@ describe('startMCPServer handler dispatch', () => {
       whereData: vi.fn(),
       diffImpactData: vi.fn(),
       listFunctionsData: vi.fn(),
+      rolesData: vi.fn(),
     }));
 
     const { startMCPServer } = await import('../../src/mcp.js');
@@ -726,6 +736,7 @@ describe('startMCPServer handler dispatch', () => {
       whereData: vi.fn(),
       diffImpactData: vi.fn(),
       listFunctionsData: vi.fn(),
+      rolesData: vi.fn(),
     }));
 
     const { startMCPServer } = await import('../../src/mcp.js');
@@ -782,6 +793,7 @@ describe('startMCPServer handler dispatch', () => {
       whereData: vi.fn(),
       diffImpactData: vi.fn(),
       listFunctionsData: vi.fn(),
+      rolesData: vi.fn(),
     }));
 
     const { startMCPServer } = await import('../../src/mcp.js');
@@ -829,6 +841,7 @@ describe('startMCPServer handler dispatch', () => {
       whereData: vi.fn(),
       diffImpactData: vi.fn(),
       listFunctionsData: vi.fn(),
+      rolesData: vi.fn(),
     }));
 
     const { startMCPServer } = await import('../../src/mcp.js');
@@ -876,6 +889,7 @@ describe('startMCPServer handler dispatch', () => {
       whereData: vi.fn(),
       diffImpactData: vi.fn(),
       listFunctionsData: vi.fn(),
+      rolesData: vi.fn(),
     }));
 
     const { startMCPServer } = await import('../../src/mcp.js');
@@ -923,6 +937,7 @@ describe('startMCPServer handler dispatch', () => {
       whereData: vi.fn(),
       diffImpactData: vi.fn(),
       listFunctionsData: vi.fn(),
+      rolesData: vi.fn(),
     }));
 
     const { startMCPServer } = await import('../../src/mcp.js');
diff --git a/tests/unit/roles.test.js b/tests/unit/roles.test.js
new file mode 100644
index 00000000..8c216c2b
--- /dev/null
+++ b/tests/unit/roles.test.js
@@ -0,0 +1,183 @@
+/**
+ * Unit tests for classifyNodeRoles in src/structure.js
+ *
+ * Uses an in-memory SQLite database with hand-crafted nodes/edges
+ * to verify each role classification.
+ *
+ * Test graph:
+ *   entryFn    - exported (cross-file caller), fan_in=0 from non-test → entry
+ *   coreFn     - high fan_in, low fan_out → core
+ *   utilityFn  - high fan_in, high fan_out → utility
+ *   adapterFn  - low fan_in, high fan_out → adapter
+ *   deadFn     - fan_in=0, not exported → dead
+ *   leafFn     - low fan_in, low fan_out → leaf
+ */
+
+import Database from 'better-sqlite3';
+import { beforeEach, describe, expect, it } from 'vitest';
+import { initSchema } from '../../src/db.js';
+import { classifyNodeRoles } from '../../src/structure.js';
+
+let db;
+
+function setup() {
+  db = new Database(':memory:');
+  db.pragma('journal_mode = WAL');
+  initSchema(db);
+  return db;
+}
+
+function insertNode(name, kind, file, line) {
+  return db
+    .prepare('INSERT INTO nodes (name, kind, file, line) VALUES (?, ?, ?, ?)')
+    .run(name, kind, file, line).lastInsertRowid;
+}
+
+function insertEdge(sourceId, targetId, kind) {
+  db.prepare(
+    'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, 1.0, 0)',
+  ).run(sourceId, targetId, kind);
+}
+
+/**
+ * Build a graph where median fan_in = 2 and median fan_out = 2.
+ * This allows clear high/low classification.
+ */
+function buildTestGraph() {
+  // File nodes (these should NOT get roles)
+  const fA = insertNode('a.js', 'file', 'a.js', 0);
+  const fB = insertNode('b.js', 'file', 'b.js', 0);
+
+  // Function nodes
+  const entryFn = insertNode('entryFn', 'function', 'a.js', 1);
+  const coreFn = insertNode('coreFn', 'function', 'a.js', 10);
+  const utilityFn = insertNode('utilityFn', 'function', 'a.js', 20);
+  const adapterFn = insertNode('adapterFn', 'function', 'b.js', 1);
+  const deadFn = insertNode('deadFn', 'function', 'b.js', 10);
+  const leafFn = insertNode('leafFn', 'function', 'b.js', 20);
+
+  // Helper targets for fan_out edges
+  const helperA = insertNode('helperA', 'function', 'a.js', 30);
+  const helperB = insertNode('helperB', 'function', 'a.js', 40);
+  const helperC = insertNode('helperC', 'function', 'b.js', 30);
+  const helperD = insertNode('helperD', 'function', 'b.js', 40);
+
+  // entryFn: fan_in=0, but exported (cross-file caller) → entry
+  // No callers from same file, but one cross-file caller
+  const crossCaller = insertNode('crossCaller', 'function', 'b.js', 50);
+  insertEdge(crossCaller, entryFn, 'calls');
+
+  // coreFn: high fan_in (3 callers), low fan_out (0) → core
+  insertEdge(entryFn, coreFn, 'calls');
+  insertEdge(adapterFn, coreFn, 'calls');
+  insertEdge(leafFn, coreFn, 'calls');
+
+  // utilityFn: high fan_in (3 callers), high fan_out (3 callees) → utility
+  insertEdge(entryFn, utilityFn, 'calls');
+  insertEdge(adapterFn, utilityFn, 'calls');
+  insertEdge(crossCaller, utilityFn, 'calls');
+  insertEdge(utilityFn, helperA, 'calls');
+  insertEdge(utilityFn, helperB, 'calls');
+  insertEdge(utilityFn, helperC, 'calls');
+
+  // adapterFn: low fan_in (1 caller), high fan_out (3 callees) → adapter
+  insertEdge(entryFn, adapterFn, 'calls');
+  // adapterFn already calls coreFn and utilityFn above
+  insertEdge(adapterFn, helperD, 'calls');
+
+  // deadFn: fan_in=0, not exported → dead
+  // No callers at all
+
+  // leafFn: low fan_in (1 caller), low fan_out (1 callee) → leaf
+  insertEdge(crossCaller, leafFn, 'calls');
+  // leafFn already calls coreFn above
+
+  return { fA, fB, entryFn, coreFn, utilityFn, adapterFn, deadFn, leafFn };
+}
+
+describe('classifyNodeRoles', () => {
+  beforeEach(() => {
+    setup();
+  });
+
+  it('classifies each role correctly', () => {
+    buildTestGraph();
+    const summary = classifyNodeRoles(db);
+
+    // Verify summary has all roles
+    expect(summary).toHaveProperty('entry');
+    expect(summary).toHaveProperty('core');
+    expect(summary).toHaveProperty('utility');
+    expect(summary).toHaveProperty('adapter');
+    expect(summary).toHaveProperty('dead');
+    expect(summary).toHaveProperty('leaf');
+
+    // Verify specific node roles
+    const getRole = (name) => db.prepare('SELECT role FROM nodes WHERE name = ?').get(name)?.role;
+
+    expect(getRole('deadFn')).toBe('dead');
+    expect(getRole('coreFn')).toBe('core');
+    expect(getRole('utilityFn')).toBe('utility');
+  });
+
+  it('marks file and directory nodes as NULL role', () => {
+    buildTestGraph();
+    // Insert a directory node
+    insertNode('src', 'directory', 'src', 0);
+    classifyNodeRoles(db);
+
+    const fileRole = db.prepare("SELECT role FROM nodes WHERE kind = 'file' LIMIT 1").get();
+    expect(fileRole.role).toBeNull();
+
+    const dirRole = db.prepare("SELECT role FROM nodes WHERE kind = 'directory' LIMIT 1").get();
+    expect(dirRole.role).toBeNull();
+  });
+
+  it('is idempotent (running twice gives same results)', () => {
+    buildTestGraph();
+    const summary1 = classifyNodeRoles(db);
+    const roles1 = db
+      .prepare('SELECT name, role FROM nodes WHERE role IS NOT NULL ORDER BY name')
+      .all();
+
+    const summary2 = classifyNodeRoles(db);
+    const roles2 = db
+      .prepare('SELECT name, role FROM nodes WHERE role IS NOT NULL ORDER BY name')
+      .all();
+
+    expect(summary1).toEqual(summary2);
+    expect(roles1).toEqual(roles2);
+  });
+
+  it('handles empty graph without crashing', () => {
+    const summary = classifyNodeRoles(db);
+    expect(summary).toEqual({ entry: 0, core: 0, utility: 0, adapter: 0, dead: 0, leaf: 0 });
+  });
+
+  it('adapts median thresholds to data', () => {
+    // Create a small graph: 2 functions with fan_in=[1,1], fan_out=[1,1]
+    // median of non-zero = 1 for both, so fan_in >= 1 = high, fan_out >= 1 = high
+    const fA = insertNode('a.js', 'file', 'a.js', 0);
+    const fn1 = insertNode('fn1', 'function', 'a.js', 1);
+    const fn2 = insertNode('fn2', 'function', 'a.js', 10);
+
+    // fn1 calls fn2, fn2 calls fn1 (mutual)
+    insertEdge(fn1, fn2, 'calls');
+    insertEdge(fn2, fn1, 'calls');
+
+    const summary = classifyNodeRoles(db);
+    // Both have fan_in=1 (>= median 1) and fan_out=1 (>= median 1) → utility
+    expect(summary.utility).toBe(2);
+  });
+
+  it('classifies nodes with only non-call edges as dead', () => {
+    const fA = insertNode('a.js', 'file', 'a.js', 0);
+    const fn1 = insertNode('fn1', 'function', 'a.js', 1);
+    // Only import edge, no call edge
+    insertEdge(fA, fn1, 'imports');
+
+    const summary = classifyNodeRoles(db);
+    const role = db.prepare("SELECT role FROM nodes WHERE name = 'fn1'").get();
+    expect(role.role).toBe('dead');
+  });
+});

From 20c1f7192b94e1eb1bc97ab39e50e31c6664af06 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Wed, 25 Feb 2026 01:42:47 -0700
Subject: [PATCH 7/7] style: fix lint warnings and format issues in roles
 feature

Impact: 2 functions changed, 1 affected
---
 src/builder.js                  | 4 +++-
 src/queries.js                  | 4 +---
 tests/integration/roles.test.js | 2 +-
 tests/unit/roles.test.js        | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/builder.js b/src/builder.js
index 0feafc0a..daed94d2 100644
--- a/src/builder.js
+++ b/src/builder.js
@@ -897,7 +897,9 @@ export async function buildGraph(rootDir, opts = {}) {
     const { classifyNodeRoles } = await import('./structure.js');
     const roleSummary = classifyNodeRoles(db);
     debug(
-      `Roles: ${Object.entries(roleSummary).map(([r, c]) => `${r}=${c}`).join(', ')}`,
+      `Roles: ${Object.entries(roleSummary)
+        .map(([r, c]) => `${r}=${c}`)
+        .join(', ')}`,
     );
   } catch (err) {
     debug(`Role classification failed: ${err.message}`);
diff --git a/src/queries.js b/src/queries.js
index 3e1eac62..3aaeaf67 100644
--- a/src/queries.js
+++ b/src/queries.js
@@ -1082,9 +1082,7 @@ export function statsData(customDbPath, opts = {}) {
   // Role distribution
   let roleRows;
   if (noTests) {
-    const allRoleNodes = db
-      .prepare('SELECT role, file FROM nodes WHERE role IS NOT NULL')
-      .all();
+    const allRoleNodes = db.prepare('SELECT role, file FROM nodes WHERE role IS NOT NULL').all();
     const filtered = allRoleNodes.filter((n) => !isTestFile(n.file));
     const counts = {};
     for (const n of filtered) counts[n.role] = (counts[n.role] || 0) + 1;
diff --git a/tests/integration/roles.test.js b/tests/integration/roles.test.js
index 6dbc5212..c76b5719 100644
--- a/tests/integration/roles.test.js
+++ b/tests/integration/roles.test.js
@@ -59,7 +59,7 @@ beforeAll(() => {
   const process_ = insertNode(db, 'processData', 'function', 'app.js', 10);
   const helper = insertNode(db, 'helper', 'function', 'lib.js', 1);
   const format = insertNode(db, 'format', 'function', 'lib.js', 10);
-  const unused = insertNode(db, 'unused', 'function', 'lib.js', 20);
+  insertNode(db, 'unused', 'function', 'lib.js', 20);
   const testFn = insertNode(db, 'testMain', 'function', 'app.test.js', 1);
 
   // Import edges
diff --git a/tests/unit/roles.test.js b/tests/unit/roles.test.js
index 8c216c2b..5f8c0e1a 100644
--- a/tests/unit/roles.test.js
+++ b/tests/unit/roles.test.js
@@ -157,7 +157,7 @@ describe('classifyNodeRoles', () => {
   it('adapts median thresholds to data', () => {
     // Create a small graph: 2 functions with fan_in=[1,1], fan_out=[1,1]
     // median of non-zero = 1 for both, so fan_in >= 1 = high, fan_out >= 1 = high
-    const fA = insertNode('a.js', 'file', 'a.js', 0);
+    insertNode('a.js', 'file', 'a.js', 0);
     const fn1 = insertNode('fn1', 'function', 'a.js', 1);
     const fn2 = insertNode('fn2', 'function', 'a.js', 10);
 
@@ -176,7 +176,7 @@ describe('classifyNodeRoles', () => {
     // Only import edge, no call edge
     insertEdge(fA, fn1, 'imports');
 
-    const summary = classifyNodeRoles(db);
+    classifyNodeRoles(db);
     const role = db.prepare("SELECT role FROM nodes WHERE name = 'fn1'").get();
     expect(role.role).toBe('dead');
   });