optave · carlos-alm · Feb 26, 2026 · Feb 26, 2026 · Feb 26, 2026 · Feb 26, 2026
diff --git a/.claude/skills/dogfood/SKILL.md b/.claude/skills/dogfood/SKILL.md
@@ -203,7 +203,7 @@ Before writing the report, **stop and think** about:
 
 - What testing approaches am I missing?
 - **Cross-command pipelines:** Have I tested `build` → `embed` → `search` → modify → `build` → `search`? Have I tested `watch` detecting changes then `diff-impact`?
-- **MCP server:** Have I tested the `mcp` command? Initialize via JSON-RPC on stdin, send `tools/list`, verify all 17 tools are present. Test single-repo mode (default — `list_repos` should be absent, no `repo` parameter on tools) vs `--multi-repo` mode.
+- **MCP server:** Have I tested the `mcp` command? Initialize via JSON-RPC on stdin, send `tools/list`, verify all 21 tools are present. Test single-repo mode (default — `list_repos` should be absent, no `repo` parameter on tools) vs `--multi-repo` mode.
 - **Programmatic API:** Have I tested `require('@optave/codegraph')` or `import` from `index.js`? Key exports to verify: `buildGraph`, `loadConfig`, `openDb`, `findDbPath`, `contextData`, `explainData`, `whereData`, `fnDepsData`, `diffImpactData`, `statsData`, `isNativeAvailable`, `EXTENSIONS`, `IGNORE_DIRS`, `ALL_SYMBOL_KINDS`, `MODELS`.
 - **Config options:** Have I tested `.codegraphrc.json`? Create one with `include`/`exclude` patterns, custom `aliases`, `build.incremental: false`, `query.defaultDepth`, `search.defaultMinScore`. Verify overrides work.
 - **Env var overrides:** `CODEGRAPH_LLM_PROVIDER`, `CODEGRAPH_LLM_API_KEY`, `CODEGRAPH_LLM_MODEL`, `CODEGRAPH_REGISTRY_PATH`.

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -54,25 +54,30 @@ jobs:
             echo "changed=true" >> "$GITHUB_OUTPUT"
           fi
 
+      - name: Extract version from result
+        id: version
+        run: echo "version=$(node -p "require('./benchmark-result.json').version")" >> "$GITHUB_OUTPUT"
+
       - name: Commit and push via PR
         if: steps.changes.outputs.changed == 'true'
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          VERSION: ${{ steps.version.outputs.version }}
         run: |
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"
 
-          BRANCH="docs/benchmark-build-$(date +%Y%m%d-%H%M%S)"
+          BRANCH="benchmark/build-v${VERSION}-$(date +%Y%m%d-%H%M%S)"
           git checkout -b "$BRANCH"
           git add generated/BUILD-BENCHMARKS.md README.md
-          git commit -m "docs: update build performance benchmarks"
+          git commit -m "docs: update build performance benchmarks (v${VERSION})"
           git push origin "$BRANCH"
 
           gh pr create \
             --base main \
             --head "$BRANCH" \
-            --title "docs: update build performance benchmarks" \
-            --body "Automated build benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
+            --title "docs: update build performance benchmarks (v${VERSION})" \
+            --body "Automated build benchmark update for **v${VERSION}** from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
 
   embedding-benchmark:
     runs-on: ubuntu-latest
@@ -131,25 +136,30 @@ jobs:
             echo "changed=true" >> "$GITHUB_OUTPUT"
           fi
 
+      - name: Extract version from result
+        id: version
+        run: echo "version=$(node -p "require('./embedding-benchmark-result.json').version")" >> "$GITHUB_OUTPUT"
+
       - name: Commit and push via PR
         if: steps.changes.outputs.changed == 'true'
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          VERSION: ${{ steps.version.outputs.version }}
         run: |
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"
 
-          BRANCH="docs/benchmark-embedding-$(date +%Y%m%d-%H%M%S)"
+          BRANCH="benchmark/embedding-v${VERSION}-$(date +%Y%m%d-%H%M%S)"
           git checkout -b "$BRANCH"
           git add generated/EMBEDDING-BENCHMARKS.md
-          git commit -m "docs: update embedding benchmarks"
+          git commit -m "docs: update embedding benchmarks (v${VERSION})"
           git push origin "$BRANCH"
 
           gh pr create \
             --base main \
             --head "$BRANCH" \
-            --title "docs: update embedding benchmarks" \
-            --body "Automated embedding benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
+            --title "docs: update embedding benchmarks (v${VERSION})" \
+            --body "Automated embedding benchmark update for **v${VERSION}** from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
 
   query-benchmark:
     runs-on: ubuntu-latest
@@ -196,25 +206,30 @@ jobs:
             echo "changed=true" >> "$GITHUB_OUTPUT"
           fi
 
+      - name: Extract version from result
+        id: version
+        run: echo "version=$(node -p "require('./query-benchmark-result.json').version")" >> "$GITHUB_OUTPUT"
+
       - name: Commit and push via PR
         if: steps.changes.outputs.changed == 'true'
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          VERSION: ${{ steps.version.outputs.version }}
         run: |
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"
 
-          BRANCH="docs/benchmark-query-$(date +%Y%m%d-%H%M%S)"
+          BRANCH="benchmark/query-v${VERSION}-$(date +%Y%m%d-%H%M%S)"
           git checkout -b "$BRANCH"
           git add generated/QUERY-BENCHMARKS.md
-          git commit -m "docs: update query benchmarks"
+          git commit -m "docs: update query benchmarks (v${VERSION})"
           git push origin "$BRANCH"
 
           gh pr create \
             --base main \
             --head "$BRANCH" \
-            --title "docs: update query benchmarks" \
-            --body "Automated query benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
+            --title "docs: update query benchmarks (v${VERSION})" \
+            --body "Automated query benchmark update for **v${VERSION}** from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
 
   incremental-benchmark:
     runs-on: ubuntu-latest
@@ -261,22 +276,27 @@ jobs:
             echo "changed=true" >> "$GITHUB_OUTPUT"
           fi
 
+      - name: Extract version from result
+        id: version
+        run: echo "version=$(node -p "require('./incremental-benchmark-result.json').version")" >> "$GITHUB_OUTPUT"
+
       - name: Commit and push via PR
         if: steps.changes.outputs.changed == 'true'
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          VERSION: ${{ steps.version.outputs.version }}
         run: |
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"
 
-          BRANCH="docs/benchmark-incremental-$(date +%Y%m%d-%H%M%S)"
+          BRANCH="benchmark/incremental-v${VERSION}-$(date +%Y%m%d-%H%M%S)"
           git checkout -b "$BRANCH"
           git add generated/INCREMENTAL-BENCHMARKS.md
-          git commit -m "docs: update incremental benchmarks"
+          git commit -m "docs: update incremental benchmarks (v${VERSION})"
           git push origin "$BRANCH"
 
           gh pr create \
             --base main \
             --head "$BRANCH" \
-            --title "docs: update incremental benchmarks" \
-            --body "Automated incremental benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
+            --title "docs: update incremental benchmarks (v${VERSION})" \
+            --body "Automated incremental benchmark update for **v${VERSION}** from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
diff --git a/README.md b/README.md
@@ -55,7 +55,7 @@ cd your-project
 codegraph build
 ```
 
-That's it. No config files, no Docker, no JVM, no API keys, no accounts. The graph is ready to query. Add `codegraph mcp` to your AI agent's config and it has full access to your dependency graph through 19 MCP tools.
+That's it. No config files, no Docker, no JVM, no API keys, no accounts. The graph is ready to query. Add `codegraph mcp` to your AI agent's config and it has full access to your dependency graph through 21 MCP tools (22 in multi-repo mode).
 
 ### Why it matters
 
@@ -97,7 +97,7 @@ That's it. No config files, no Docker, no JVM, no API keys, no accounts. The gra
 | **🔓** | **Zero-cost core, LLM-enhanced when you want** | Full graph analysis with no API keys, no accounts, no cost. Optionally bring your own LLM provider — your code only goes where you choose |
 | **🔬** | **Function-level, not just files** | Traces `handleAuth()` → `validateToken()` → `decryptJWT()` and shows 14 callers across 9 files break if `decryptJWT` changes |
 | **🏷️** | **Role classification** | Every symbol auto-tagged as `entry`/`core`/`utility`/`adapter`/`dead`/`leaf` — agents instantly know what they're looking at |
-| **🤖** | **Built for AI agents** | 19-tool [MCP server](https://modelcontextprotocol.io/) — AI assistants query your graph directly. Single-repo by default |
+| **🤖** | **Built for AI agents** | 21-tool [MCP server](https://modelcontextprotocol.io/) — AI assistants query your graph directly. Single-repo by default |
 | **🌐** | **Multi-language, one CLI** | JS/TS + Python + Go + Rust + Java + C# + PHP + Ruby + HCL in a single graph |
 | **💥** | **Git diff impact** | `codegraph diff-impact` shows changed functions, their callers, and full blast radius — enriched with historically coupled files from git co-change analysis. Ships with a GitHub Actions workflow |
 | **🧠** | **Semantic search** | Local embeddings by default, LLM-powered when opted in — multi-query with RRF ranking via `"auth; token; JWT"` |
@@ -144,7 +144,7 @@ After modifying code:
 Or connect directly via MCP:
 
 ```bash
-codegraph mcp          # 19-tool MCP server — AI queries the graph directly
+codegraph mcp          # 21-tool MCP server — AI queries the graph directly
 ```
 
 Full agent setup: [AI Agent Guide](docs/guides/ai-agent-guide.md) &middot; [CLAUDE.md template](docs/guides/ai-agent-guide.md#claudemd-template)
@@ -158,7 +158,7 @@ Full agent setup: [AI Agent Guide](docs/guides/ai-agent-guide.md) &middot; [CLAU
 | 🔍 | **Symbol search** | Find any function, class, or method by name — exact match priority, relevance scoring, `--file` and `--kind` filters |
 | 📁 | **File dependencies** | See what a file imports and what imports it |
 | 💥 | **Impact analysis** | Trace every file affected by a change (transitive) |
-| 🧬 | **Function-level tracing** | Call chains, caller trees, and function-level impact with qualified call resolution |
+| 🧬 | **Function-level tracing** | Call chains, caller trees, function-level impact, and A→B pathfinding with qualified call resolution |
 | 🎯 | **Deep context** | `context` gives AI agents source, deps, callers, signature, and tests for a function in one call; `explain` gives structural summaries of files or functions |
 | 📍 | **Fast lookup** | `where` shows exactly where a symbol is defined and used — minimal, fast |
 | 📊 | **Diff impact** | Parse `git diff`, find overlapping functions, trace their callers |
@@ -170,7 +170,7 @@ Full agent setup: [AI Agent Guide](docs/guides/ai-agent-guide.md) &middot; [CLAU
 | 📤 | **Export** | DOT (Graphviz), Mermaid, and JSON graph export |
 | 🧠 | **Semantic search** | Embeddings-powered natural language search with multi-query RRF ranking |
 | 👀 | **Watch mode** | Incrementally update the graph as files change |
-| 🤖 | **MCP server** | 19-tool MCP server for AI assistants; single-repo by default, opt-in multi-repo |
+| 🤖 | **MCP server** | 21-tool MCP server for AI assistants; single-repo by default, opt-in multi-repo |
 | ⚡ | **Always fresh** | Three-tier incremental detection — sub-second rebuilds even on large codebases |
 
 See [docs/examples](docs/examples) for real-world CLI and MCP usage examples.
@@ -217,6 +217,9 @@ codegraph impact <file>        # Transitive reverse dependency trace
 codegraph fn <name>            # Function-level: callers, callees, call chain
 codegraph fn <name> --no-tests --depth 5
 codegraph fn-impact <name>     # What functions break if this one changes
+codegraph path <from> <to>     # Shortest path between two symbols (A calls...calls B)
+codegraph path <from> <to> --reverse  # Follow edges backward
+codegraph path <from> <to> --max-depth 5 --kinds calls,imports
 codegraph diff-impact          # Impact of unstaged git changes
 codegraph diff-impact --staged # Impact of staged changes
 codegraph diff-impact HEAD~3   # Impact vs a specific ref
@@ -316,7 +319,7 @@ codegraph registry remove <name>  # Unregister
 | Flag | Description |
 |---|---|
 | `-d, --db <path>` | Custom path to `graph.db` |
-| `-T, --no-tests` | Exclude `.test.`, `.spec.`, `__test__` files (available on `fn`, `fn-impact`, `context`, `explain`, `where`, `diff-impact`, `search`, `map`, `hotspots`, `deps`, `impact`) |
+| `-T, --no-tests` | Exclude `.test.`, `.spec.`, `__test__` files (available on `fn`, `fn-impact`, `path`, `context`, `explain`, `where`, `diff-impact`, `search`, `map`, `hotspots`, `roles`, `co-change`, `deps`, `impact`) |
 | `--depth <n>` | Transitive trace depth (default varies by command) |
 | `-j, --json` | Output as JSON |
 | `-v, --verbose` | Enable debug output |
@@ -428,7 +431,7 @@ Optional: `@huggingface/transformers` (semantic search), `@modelcontextprotocol/
 
 ### MCP Server
 
-Codegraph includes a built-in [Model Context Protocol](https://modelcontextprotocol.io/) server with 19 tools, so AI assistants can query your dependency graph directly:
+Codegraph includes a built-in [Model Context Protocol](https://modelcontextprotocol.io/) server with 21 tools (22 in multi-repo mode), so AI assistants can query your dependency graph directly:
 
 ```bash
 codegraph mcp                  # Single-repo mode (default) — only local project
@@ -462,7 +465,11 @@ This project uses codegraph. The database is at `.codegraph/graph.db`.
 - `codegraph build .` — rebuild the graph (incremental by default)
 - `codegraph map` — module overview
 - `codegraph fn <name> -T` — function call chain
+- `codegraph path <from> <to> -T` — shortest call path between two symbols
 - `codegraph deps <file>` — file-level dependencies
+- `codegraph roles --role dead -T` — find dead code (unreferenced symbols)
+- `codegraph roles --role core -T` — find core symbols (high fan-in)
+- `codegraph co-change <file>` — files that historically change together
 - `codegraph search "<query>"` — semantic search (requires `codegraph embed`)
 - `codegraph cycles` — check for circular dependencies
 

diff --git a/docs/benchmarks/README.md b/docs/benchmarks/README.md
@@ -0,0 +1,129 @@
+# Token Savings Benchmark
+
+Quantifies how much codegraph reduces token usage when AI agents navigate large codebases, compared to raw file exploration (Glob/Grep/Read/Bash).
+
+## Prerequisites
+
+1. **Claude Agent SDK**
+   ```bash
+   npm install @anthropic-ai/claude-agent-sdk
+   ```
+
+2. **API key**
+   ```bash
+   export ANTHROPIC_API_KEY=sk-ant-...
+   ```
+
+3. **Git** (for cloning Next.js)
+
+4. **codegraph** installed in this repo (`npm install`)
+
+## Quick Start
+
+```bash
+# Smoke test — 1 issue, 1 run (~$2-4)
+node scripts/token-benchmark.js --issues csrf-case-insensitive --runs 1 > result.json
+
+# View the JSON
+cat result.json | jq .aggregate
+
+# Generate the markdown report
+node scripts/update-token-report.js result.json
+cat docs/benchmarks/TOKEN-SAVINGS.md
+```
+
+## Full Run
+
+```bash
+# All 5 issues × 3 runs (~$10-20)
+node scripts/token-benchmark.js > result.json
+node scripts/update-token-report.js result.json
+```
+
+## CLI Flags
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--runs <N>` | `3` | Number of runs per issue (medians used) |
+| `--model <model>` | `sonnet` | Claude model to use |
+| `--issues <id,...>` | all | Comma-separated subset of issue IDs |
+| `--nextjs-dir <path>` | `$TMPDIR/...` | Reuse existing Next.js clone |
+| `--skip-graph` | `false` | Skip codegraph rebuild (use existing DB) |
+| `--max-turns <N>` | `50` | Max agent turns per session |
+| `--max-budget <$>` | `2.00` | Max USD per session |
+| `--perf` | `false` | Also run build/query perf benchmarks on the Next.js graph |
+
+## Available Issues
+
+| ID | Difficulty | PR | Description |
+|----|:----------:|---:|-------------|
+| `csrf-case-insensitive` | Easy | #89127 | Case-insensitive CSRF origin matching |
+| `ready-in-time` | Medium | #88589 | Incorrect "Ready in" time display |
+| `aggregate-error-inspect` | Medium | #88999 | AggregateError.errors missing in output |
+| `otel-propagation` | Hard | #90181 | OTEL trace context propagation broken |
+| `static-rsc-payloads` | Hard | #89202 | Static RSC payloads not emitted/served |
+
+## Methodology
+
+### Setup
+- **Target repo:** [vercel/next.js](https://github.com/vercel/next.js) (~4,000 TypeScript files)
+- Each issue is a real closed PR with a known set of affected source files
+
+### Two conditions (identical except codegraph access)
+
+**Baseline:** Agent has `Glob`, `Grep`, `Read`, `Bash` tools. No codegraph.
+
+**Codegraph:** Agent has the same tools **plus** a codegraph MCP server providing structural navigation (symbol search, dependency tracking, impact analysis, call chains).
+
+### Controls
+- Same model for both conditions
+- Same issue prompt (bug description only — no hints about the solution)
+- Checkout pinned to the commit *before* the fix (agent can't see the answer in git history)
+- Same `maxTurns` and `maxBudgetUsd` budget caps
+
+### Metrics
+- **Input tokens:** Total tokens sent to the model (primary metric)
+- **Cost:** USD cost of the session
+- **Turns:** Number of agent turns (tool-use round-trips)
+- **Hit rate:** Percentage of ground-truth files correctly identified
+- **Tool calls:** Breakdown by tool type
+
+### Statistical handling
+- N runs per issue (default 3), median used to handle non-determinism
+- Error runs are excluded from aggregation
+
+## Cost Estimate
+
+| Scenario | Approximate cost |
+|----------|----------------:|
+| 1 issue × 1 run | $2-4 |
+| 1 issue × 3 runs | $6-12 |
+| 5 issues × 3 runs | $30-60 |
+
+Costs depend on model choice and issue difficulty. The `--max-budget` flag caps individual sessions.
+
+## Adding New Issues
+
+Edit `scripts/token-benchmark-issues.js` and add an entry to the `ISSUES` array:
+
+```js
+{
+  id: 'short-slug',
+  difficulty: 'easy|medium|hard',
+  pr: 12345,
+  title: 'PR title',
+  description: 'Bug description for the agent (no solution hints)',
+  commitBefore: 'abc123def...',  // SHA before the fix
+  expectedFiles: ['packages/next/src/path/to/file.ts'],
+}
+```
+
+Requirements:
+- Use a real closed PR with a clear bug description
+- `commitBefore` must be the parent of the merge commit (not the merge itself)
+- `expectedFiles` should list only source files, not tests
+- Verify the SHA exists: `git log --oneline <sha> -1` in the Next.js repo
+
+## Output Format
+
+The runner outputs JSON to stdout. See [TOKEN-SAVINGS.md](TOKEN-SAVINGS.md) for the generated report.
diff --git a/docs/benchmarks/TOKEN-SAVINGS.md b/docs/benchmarks/TOKEN-SAVINGS.md
@@ -0,0 +1,17 @@
+# Token Savings Benchmark: codegraph vs Raw Navigation
+
+Measures how much codegraph reduces token usage when an AI agent navigates
+the [Next.js](https://github.com/vercel/next.js) codebase (~4,000 TypeScript files).
+
+*No benchmark data yet. Run the benchmark to populate this report:*
+
+```bash
+node scripts/token-benchmark.js > result.json
+node scripts/update-token-report.js result.json
+```
+
+See [README.md](README.md) for full instructions.
+
+<!-- TOKEN_BENCHMARK_DATA
+[]
+-->