optave · carlos-alm · Feb 23, 2026 · Feb 23, 2026 · Feb 23, 2026 · Feb 23, 2026
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -1,15 +1,19 @@
 name: Benchmark
 
 on:
-  release:
-    types: [published]
+  workflow_run:
+    workflows: ["Publish"]
+    types: [completed]
   workflow_dispatch:
 
 permissions: {}
 
 jobs:
   benchmark:
     runs-on: ubuntu-latest
+    if: >-
+      github.event_name == 'workflow_dispatch' ||
+      github.event.workflow_run.conclusion == 'success'
     permissions:
       contents: write
       pull-requests: write

diff --git a/README.md b/README.md
@@ -93,7 +93,7 @@ Most code graph tools make you choose: **fast local analysis with no AI, or powe
 | **⚡** | **Always-fresh graph** | Three-tier change detection: journal (O(changed)) → mtime+size (O(n) stats) → hash (O(changed) reads). Sub-second rebuilds even on large codebases. Competitors re-index everything from scratch; Merkle-tree approaches still require O(n) filesystem scanning |
 | **🔓** | **Zero-cost core, LLM-enhanced when you want** | Full graph analysis with no API keys, no accounts, no cost. Optionally bring your own LLM provider for richer embeddings and AI-powered search — your code only goes to the provider you already chose |
 | **🔬** | **Function-level, not just files** | Traces `handleAuth()` → `validateToken()` → `decryptJWT()` and shows 14 callers across 9 files break if `decryptJWT` changes |
-| **🤖** | **Built for AI agents** | 17-tool [MCP server](https://modelcontextprotocol.io/) — AI assistants query your graph directly. Single-repo by default, your code doesn't leak to other projects |
+| **🤖** | **Built for AI agents** | 17-tool [MCP server](https://modelcontextprotocol.io/) with `context` and `explain` compound commands — AI assistants get full function context in one call. Single-repo by default, your code doesn't leak to other projects |
 | **🌐** | **Multi-language, one CLI** | JS/TS + Python + Go + Rust + Java + C# + PHP + Ruby + HCL in a single graph — no juggling Madge, pyan, and cflow |
 | **💥** | **Git diff impact** | `codegraph diff-impact` shows changed functions, their callers, and full blast radius — ships with a GitHub Actions workflow |
 | **🧠** | **Semantic search** | Local embeddings by default, LLM-powered embeddings when opted in — multi-query with RRF ranking via `"auth; token; JWT"` |
@@ -180,12 +180,15 @@ codegraph deps src/index.ts  # file-level import/export map
 
 | | Feature | Description |
 |---|---|---|
-| 🔍 | **Symbol search** | Find any function, class, or method by name with callers/callees |
+| 🔍 | **Symbol search** | Find any function, class, or method by name — exact match priority, relevance scoring, `--file` and `--kind` filters |
 | 📁 | **File dependencies** | See what a file imports and what imports it |
 | 💥 | **Impact analysis** | Trace every file affected by a change (transitive) |
-| 🧬 | **Function-level tracing** | Call chains, caller trees, and function-level impact |
+| 🧬 | **Function-level tracing** | Call chains, caller trees, and function-level impact with qualified call resolution |
+| 🎯 | **Deep context** | `context` gives AI agents source, deps, callers, signature, and tests for a function in one call; `explain` gives structural summaries of files or functions |
+| 📍 | **Fast lookup** | `where` shows exactly where a symbol is defined and used — minimal, fast |
 | 📊 | **Diff impact** | Parse `git diff`, find overlapping functions, trace their callers |
 | 🗺️ | **Module map** | Bird's-eye view of your most-connected files |
+| 🏗️ | **Structure & hotspots** | Directory cohesion scores, fan-in/fan-out hotspot detection, module boundaries |
 | 🔄 | **Cycle detection** | Find circular dependencies at file or function level |
 | 📤 | **Export** | DOT (Graphviz), Mermaid, and JSON graph export |
 | 🧠 | **Semantic search** | Embeddings-powered natural language search with multi-query RRF ranking |
@@ -210,7 +213,19 @@ codegraph watch [dir]          # Watch for changes, update graph incrementally
 codegraph query <name>         # Find a symbol — shows callers and callees
 codegraph deps <file>          # File imports/exports
 codegraph map                  # Top 20 most-connected files
-codegraph map -n 50            # Top 50
+codegraph map -n 50 --no-tests # Top 50, excluding test files
+codegraph where <name>         # Where is a symbol defined and used?
+codegraph where --file src/db.js  # List symbols, imports, exports for a file
+codegraph stats                # Graph health: nodes, edges, languages, quality score
+```
+
+### Deep Context (AI-Optimized)
+
+```bash
+codegraph context <name>       # Full context: source, deps, callers, signature, tests
+codegraph context <name> --depth 2 --no-tests  # Include callee source 2 levels deep
+codegraph explain <file>       # Structural summary: public API, internals, data flow
+codegraph explain <function>   # Function summary: signature, calls, callers, tests
 ```
 
 ### Impact Analysis
@@ -225,6 +240,14 @@ codegraph diff-impact --staged # Impact of staged changes
 codegraph diff-impact HEAD~3   # Impact vs a specific ref
 ```
 
+### Structure & Hotspots
+
+```bash
+codegraph structure            # Directory overview with cohesion scores
+codegraph hotspots             # Files with extreme fan-in, fan-out, or density
+codegraph hotspots --metric coupling --level directory --no-tests
+```
+
 ### Export & Visualization
 
 ```bash
@@ -268,9 +291,9 @@ A single trailing semicolon is ignored (falls back to single-query mode). The `-
 | `minilm` | all-MiniLM-L6-v2 | 384 | ~23 MB | Apache-2.0 | Fastest, good for quick iteration |
 | `jina-small` | jina-embeddings-v2-small-en | 512 | ~33 MB | Apache-2.0 | Better quality, still small |
 | `jina-base` | jina-embeddings-v2-base-en | 768 | ~137 MB | Apache-2.0 | High quality, 8192 token context |
-| `jina-code` (default) | jina-embeddings-v2-base-code | 768 | ~137 MB | Apache-2.0 | **Best for code search**, trained on code+text |
+| `jina-code` | jina-embeddings-v2-base-code | 768 | ~137 MB | Apache-2.0 | Best for code search, trained on code+text (requires HF token) |
 | `nomic` | nomic-embed-text-v1 | 768 | ~137 MB | Apache-2.0 | Good quality, 8192 context |
-| `nomic-v1.5` | nomic-embed-text-v1.5 | 768 | ~137 MB | Apache-2.0 | Improved nomic, Matryoshka dimensions |
+| `nomic-v1.5` (default) | nomic-embed-text-v1.5 | 768 | ~137 MB | Apache-2.0 | **Improved nomic, Matryoshka dimensions** |
 | `bge-large` | bge-large-en-v1.5 | 1024 | ~335 MB | MIT | Best general retrieval, top MTEB scores |
 
 The model used during `embed` is stored in the database, so `search` auto-detects it — no need to pass `--model` when searching.
@@ -304,13 +327,13 @@ By default, the MCP server only exposes the local project's graph. AI agents can
 | Flag | Description |
 |---|---|
 | `-d, --db <path>` | Custom path to `graph.db` |
-| `-T, --no-tests` | Exclude `.test.`, `.spec.`, `__test__` files |
+| `-T, --no-tests` | Exclude `.test.`, `.spec.`, `__test__` files (available on `fn`, `fn-impact`, `context`, `explain`, `where`, `diff-impact`, `search`, `map`, `hotspots`, `deps`, `impact`) |
 | `--depth <n>` | Transitive trace depth (default varies by command) |
 | `-j, --json` | Output as JSON |
 | `-v, --verbose` | Enable debug output |
 | `--engine <engine>` | Parser engine: `native`, `wasm`, or `auto` (default: `auto`) |
-| `-k, --kind <kind>` | Filter by kind: `function`, `method`, `class`, `struct`, `enum`, `trait`, `record`, `module` (search) |
-| `--file <pattern>` | Filter by file path pattern (search) |
+| `-k, --kind <kind>` | Filter by kind: `function`, `method`, `class`, `struct`, `enum`, `trait`, `record`, `module` (`fn`, `context`, `search`) |
+| `-f, --file <path>` | Scope to a specific file (`fn`, `context`, `where`) |
 | `--rrf-k <n>` | RRF smoothing constant for multi-query search (default 60) |
 
 ## 🌐 Language Support
@@ -361,18 +384,19 @@ Both engines produce identical output. Use `--engine native|wasm|auto` to contro
 
 ### Call Resolution
 
-Calls are resolved with priority and confidence scoring:
+Calls are resolved with **qualified resolution** — method calls (`obj.method()`) are distinguished from standalone function calls, and built-in receivers (`console`, `Math`, `JSON`, `Array`, `Promise`, etc.) are filtered out automatically. Import scope is respected: a call to `foo()` only resolves to functions that are actually imported or defined in the same file, eliminating false positives from name collisions.
 
 | Priority | Source | Confidence |
 |---|---|---|
 | 1 | **Import-aware** — `import { foo } from './bar'` → link to `bar` | `1.0` |
 | 2 | **Same-file** — definitions in the current file | `1.0` |
-| 3 | **Same directory** — definitions in sibling files | `0.7` |
-| 4 | **Same parent directory** — definitions in sibling dirs | `0.5` |
-| 5 | **Global fallback** — match by name across codebase | `0.3` |
-| 6 | **Method hierarchy** — resolved through `extends`/`implements` | — |
+| 3 | **Same directory** — definitions in sibling files (standalone calls only) | `0.7` |
+| 4 | **Same parent directory** — definitions in sibling dirs (standalone calls only) | `0.5` |
+| 5 | **Method hierarchy** — resolved through `extends`/`implements` | varies |
+
+Method calls on unknown receivers skip global fallback entirely — `stmt.run()` will never resolve to a standalone `run` function in another file. Duplicate caller/callee edges are deduplicated automatically. Dynamic patterns like `fn.call()`, `fn.apply()`, `fn.bind()`, and `obj["method"]()` are also detected on a best-effort basis.
 
-Dynamic patterns like `fn.call()`, `fn.apply()`, `fn.bind()`, and `obj["method"]()` are also detected on a best-effort basis.
+Codegraph also extracts symbols from common callback patterns: Commander `.command().action()` callbacks (as `command:build`), Express route handlers (as `route:GET /api/users`), and event emitter listeners (as `event:data`).
 
 ## 📊 Performance
 

diff --git a/generated/DOGFOOD_REPORT_v2.2.0.md b/generated/DOGFOOD_REPORT_v2.2.0.md
@@ -0,0 +1,82 @@
+# Dogfooding Report: @optave/codegraph@2.2.0
+
+**Date:** 2026-02-23
+**Tested against:** codegraph repo itself (92 files, 527 nodes)
+**Engine:** Native v0.1.0 (auto)
+
+## Working Commands (20/22)
+
+| Command | Status | Notes |
+|---------|--------|-------|
+| `build` | PASS | Native engine, 92 files, 527 nodes, 526 edges |
+| `query` | PASS | Correct callers/callees for `buildGraph` |
+| `impact` | PASS | 13 transitive deps for `src/db.js` |
+| `map` | PASS | Clean module overview |
+| `stats` | PASS | Full graph health overview |
+| `deps` | PASS | Correct imports/imported-by |
+| `fn` | PASS | Function-level call chain |
+| `fn-impact` | PASS | 3 transitive dependents |
+| `context` | PASS | Full source, deps, callers, tests |
+| `explain` (file) | PASS | Clean structural summary |
+| `explain` (function) | PASS | Calls, callers, tests |
+| `where` | PASS | Fast symbol lookup |
+| `diff-impact` | PASS | 11 changed functions, 44 callers affected |
+| `cycles` | PASS | 1 cycle: queries.js <-> cycles.js |
+| `hotspots` | PASS | Correct fan-in rankings |
+| `export` (DOT/Mermaid/JSON) | PASS | All 3 formats work |
+| `info` | PASS | Correct version + engine info |
+| `models` | PASS | Lists all 7 models |
+| `registry` | PASS | list/add/remove/prune subcommands |
+| `watch` | PASS | Starts, watches for changes |
+| `mcp` | PASS | Server initializes correctly via JSON-RPC |
+
+## Bugs Found
+
+### 1. `structure .` returns empty results (Medium severity)
+
+- `codegraph structure .` → "No directory structure found"
+- `codegraph structure` (no arg) → works perfectly (18 directories)
+- `codegraph structure src` → works correctly
+
+**Root cause:** In `structureData()` (`src/structure.js`), passing `.` as the `directory` filter normalizes to `"."` and then filters `d.name === '.' || d.name.startsWith('./')` — which matches nothing since directory names stored in the DB are relative paths like `src`, `tests`, etc.
+
+**Fix:** Treat `.` (or current dir equivalent) as `null`/no filter in `structureData()`.
+
+### 2. Stale embeddings after rebuild (Medium severity)
+
+- After an incremental `build`, embedding `node_id`s become orphaned (e.g. old IDs in 3077-range, new IDs in 4335-range)
+- `search` returns 0 results even at `--min-score 0.05` because no embeddings join to current nodes
+- Verified: 310 embeddings existed but 0 matched any node in the `nodes` table
+
+**Root cause:** `build` deletes and re-inserts nodes (getting new auto-increment IDs) but does not invalidate or rebuild embeddings.
+
+**Fix:** Either preserve node IDs across rebuilds, invalidate embeddings when node IDs change, or warn the user to re-run `embed`.
+
+### 3. `embed` default model requires HuggingFace auth (Medium severity)
+
+- `codegraph embed .` crashes with `Error: Unauthorized access to file` for the default `jina-code` model
+- The Jina model is gated on HuggingFace and requires an `HF_TOKEN` environment variable
+- `codegraph embed . --model minilm` works fine (public model)
+- The error is an unhandled exception with a full stack trace — not user-friendly
+
+**Fix:** Either default to a public model (e.g. `minilm`), auto-fallback to `minilm` on auth failure, or catch the error and provide a clear message with instructions.
+
+### 4. Cross-language false positive in export (Low severity)
+
+- One low-confidence (0.3) call edge: `main` (build.rs) → `setup` (tests/unit/structure.test.js)
+- Shows up in Mermaid/DOT exports as a spurious connection
+- Only 1 instance found across the entire graph
+
+**Fix:** Export commands could support a `--min-confidence` filter, or the default export could exclude edges below a threshold (e.g. 0.5).
+
+## `--no-tests` Flag
+
+Tested on `stats` and `map` — both correctly filter out test files:
+- `stats --no-tests`: 427 nodes (vs 527 total), 59 files (vs 92)
+- `map --no-tests`: excludes test files from ranking
+
+## Embedding & Search
+
+- `embed --model minilm` successfully generated 392 embeddings (384d)
+- `search "build graph"` returned 15 results after fresh embeddings (top hit: 37.9% `test_triangle_cycle`)
+- Search quality is reasonable but not ideal — `buildGraph` itself didn't appear in results for "build graph"
diff --git a/src/cli.js b/src/cli.js
@@ -374,7 +374,7 @@ program
   .action(() => {
     console.log('\nAvailable embedding models:\n');
     for (const [key, config] of Object.entries(MODELS)) {
-      const def = key === 'jina-code' ? ' (default)' : '';
+      const def = key === 'nomic-v1.5' ? ' (default)' : '';
       console.log(`  ${key.padEnd(12)} ${String(config.dim).padStart(4)}d  ${config.desc}${def}`);
     }
     console.log('\nUsage: codegraph embed --model <name>');
@@ -388,8 +388,8 @@ program
   )
   .option(
     '-m, --model <name>',
-    'Embedding model: minilm, jina-small, jina-base, jina-code (default), nomic, nomic-v1.5, bge-large. Run `codegraph models` for details',
-    'jina-code',
+    'Embedding model: minilm, jina-small, jina-base, jina-code, nomic, nomic-v1.5 (default), bge-large. Run `codegraph models` for details',
+    'nomic-v1.5',
   )
   .action(async (dir, opts) => {
     const root = path.resolve(dir || '.');

diff --git a/src/config.js b/src/config.js
@@ -19,7 +19,7 @@ export const DEFAULTS = {
     defaultDepth: 3,
     defaultLimit: 20,
   },
-  embeddings: { model: 'jina-code', llmProvider: null },
+  embeddings: { model: 'nomic-v1.5', llmProvider: null },
   llm: { provider: null, model: null, baseUrl: null, apiKey: null, apiKeyCommand: null },
   search: { defaultMinScore: 0.2, rrfK: 60, topK: 15 },
   ci: { failOnCycles: false, impactThreshold: null },

diff --git a/src/embedder.js b/src/embedder.js
@@ -55,7 +55,7 @@ export const MODELS = {
   },
 };
 
-export const DEFAULT_MODEL = 'jina-code';
+export const DEFAULT_MODEL = 'nomic-v1.5';
 const BATCH_SIZE_MAP = {
   minilm: 32,
   'jina-small': 16,

diff --git a/tests/unit/config.test.js b/tests/unit/config.test.js
@@ -55,7 +55,7 @@ describe('DEFAULTS', () => {
   });
 
   it('has embeddings defaults', () => {
-    expect(DEFAULTS.embeddings).toEqual({ model: 'jina-code', llmProvider: null });
+    expect(DEFAULTS.embeddings).toEqual({ model: 'nomic-v1.5', llmProvider: null });
   });
 
   it('has llm defaults', () => {