Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
228 changes: 228 additions & 0 deletions docs/json-schema.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
# JSON Schema — Stable Symbol Metadata

Every codegraph command that returns symbol data includes a **stable base shape** of 7 fields. Commands may add extra fields (e.g. `similarity`, `callees`), but these 7 are always present.

## Base Symbol Shape

| Field | Type | Description |
|------------|-------------------|-------------|
| `name` | `string` | Symbol identifier (e.g. `"buildGraph"`, `"MyClass.method"`) |
| `kind` | `string` | Symbol kind — see [Valid Kinds](#valid-kinds) |
| `file` | `string` | Repo-relative file path (forward slashes) |
| `line` | `number` | 1-based start line |
| `endLine` | `number \| null` | 1-based end line, or `null` if unavailable |
| `role` | `string \| null` | Architectural role classification, or `null` if unclassified — see [Valid Roles](#valid-roles) |
| `fileHash` | `string \| null` | SHA-256 hash of the file at build time, or `null` if unavailable |

### Valid Kinds

```
function method class interface type struct enum trait record module
```

Language-specific types use their native kind (e.g. Go structs use `struct`, Rust traits use `trait`, Ruby modules use `module`).

### Valid Roles

```
entry core utility adapter dead leaf
```

Roles are assigned during `codegraph build` based on call-graph topology. Symbols without enough signal remain `null`.

## Command Envelopes

### `where` (symbol mode)

```jsonc
{
"target": "buildGraph",
"mode": "symbol",
"results": [
{
"name": "buildGraph", // ← base 7
"kind": "function",
"file": "src/builder.js",
"line": 42,
"endLine": 180,
"role": "core",
"fileHash": "abc123...",
"exported": true, // ← command-specific
"uses": [ // lightweight refs (4 fields)
{ "name": "parseFile", "file": "src/parser.js", "line": 10 }
]
}
]
}
```

### `query`

```jsonc
{
"query": "buildGraph",
"results": [
{
"name": "buildGraph", // ← base 7
"kind": "function",
"file": "src/builder.js",
"line": 42,
"endLine": 180,
"role": "core",
"fileHash": "abc123...",
"callees": [ // lightweight refs
{ "name": "parseFile", "kind": "function", "file": "src/parser.js", "line": 10, "edgeKind": "calls" }
],
"callers": [
{ "name": "main", "kind": "function", "file": "src/cli.js", "line": 5, "edgeKind": "calls" }
]
}
]
}
```

### `fn` (fnDeps)

```jsonc
{
"name": "buildGraph",
"results": [
{
"name": "buildGraph", // ← base 7
"kind": "function",
"file": "src/builder.js",
"line": 42,
"endLine": 180,
"role": "core",
"fileHash": "abc123...",
"callees": [/* lightweight */],
"callers": [/* lightweight */],
"transitiveCallers": { "2": [/* lightweight */] }
}
]
}
```

### `fn-impact`

```jsonc
{
"name": "buildGraph",
"results": [
{
"name": "buildGraph", // ← base 7
"kind": "function",
"file": "src/builder.js",
"line": 42,
"endLine": 180,
"role": "core",
"fileHash": "abc123...",
"levels": { "1": [/* lightweight */], "2": [/* lightweight */] },
"totalDependents": 5
}
]
}
```

### `explain` (function mode)

```jsonc
{
"kind": "function",
"results": [
{
"name": "buildGraph", // ← base 7
"kind": "function",
"file": "src/builder.js",
"line": 42,
"endLine": 180,
"role": "core",
"fileHash": "abc123...",
"lineCount": 138, // ← command-specific
"summary": "...",
"signature": "...",
"complexity": { ... },
"callees": [/* lightweight */],
"callers": [/* lightweight */],
"relatedTests": [/* { file } */]
}
]
}
```

### `search` / `multi-search` / `fts` / `hybrid`

```jsonc
{
"results": [
{
"name": "buildGraph", // ← base 7
"kind": "function",
"file": "src/builder.js",
"line": 42,
"endLine": 180,
"role": "core",
"fileHash": "abc123...",
"similarity": 0.85 // ← search-specific (varies by mode)
}
]
}
```

### `list-functions`

```jsonc
{
"count": 42,
"functions": [
{
"name": "buildGraph", // ← base 7
"kind": "function",
"file": "src/builder.js",
"line": 42,
"endLine": 180,
"role": "core",
"fileHash": "abc123..."
}
]
}
```

### `roles`

```jsonc
{
"count": 42,
"summary": { "core": 10, "utility": 20, "entry": 5, "leaf": 7 },
"symbols": [
{
"name": "buildGraph", // ← base 7
"kind": "function",
"file": "src/builder.js",
"line": 42,
"endLine": 180,
"role": "core",
"fileHash": "abc123..."
}
]
}
```

## Lightweight Inner References

Nested/secondary references (callees, callers, transitive hops, path nodes) use a lightweight 4-field shape:

| Field | Type |
|--------|----------|
| `name` | `string` |
| `kind` | `string` |
| `file` | `string` |
| `line` | `number` |

Some contexts add extra fields like `edgeKind` or `viaHierarchy`.

## Notes

- `variable` is not a tracked kind — codegraph tracks function/type-level symbols only.
- Iterator functions (`iterListFunctions`, `iterRoles`) yield `endLine` and `role` but not `fileHash` (streaming avoids holding DB open for per-row hash lookups).
- The `normalizeSymbol(row, db, hashCache)` utility is exported from both `src/queries.js` and `src/index.js` for programmatic consumers.
32 changes: 16 additions & 16 deletions src/embedder.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import path from 'node:path';
import { createInterface } from 'node:readline';
import { closeDb, findDbPath, openDb, openReadonlyOrFail } from './db.js';
import { info, warn } from './logger.js';
import { normalizeSymbol } from './queries.js';

/**
* Split an identifier into readable words.
Expand Down Expand Up @@ -582,7 +583,7 @@ function _prepareSearch(customDbPath, opts = {}) {
const noTests = opts.noTests || false;
const TEST_PATTERN = /\.(test|spec)\.|__test__|__tests__|\.stories\./;
let sql = `
SELECT e.node_id, e.vector, e.text_preview, n.name, n.kind, n.file, n.line
SELECT e.node_id, e.vector, e.text_preview, n.name, n.kind, n.file, n.line, n.end_line, n.role
FROM embeddings e
JOIN nodes n ON e.node_id = n.id
`;
Expand Down Expand Up @@ -638,17 +639,15 @@ export async function searchData(query, customDbPath, opts = {}) {
return null;
}

const hc = new Map();
const results = [];
for (const row of rows) {
const vec = new Float32Array(new Uint8Array(row.vector).buffer);
const sim = cosineSim(queryVec, vec);

if (sim >= minScore) {
results.push({
name: row.name,
kind: row.kind,
file: row.file,
line: row.line,
...normalizeSymbol(row, db, hc),
similarity: sim,
});
}
Expand Down Expand Up @@ -734,14 +733,12 @@ export async function multiSearchData(queries, customDbPath, opts = {}) {
}

// Build results sorted by RRF score
const hc = new Map();
const results = [];
for (const [rowIndex, entry] of fusionMap) {
const row = rows[rowIndex];
results.push({
name: row.name,
kind: row.kind,
file: row.file,
line: row.line,
...normalizeSymbol(row, db, hc),
rrf: entry.rrfScore,
queryScores: entry.queryScores,
});
Expand Down Expand Up @@ -804,7 +801,7 @@ export function ftsSearchData(query, customDbPath, opts = {}) {

let sql = `
SELECT f.rowid AS node_id, rank AS bm25_score,
n.name, n.kind, n.file, n.line
n.name, n.kind, n.file, n.line, n.end_line, n.role
FROM fts_index f
JOIN nodes n ON f.rowid = n.id
WHERE fts_index MATCH ?
Expand Down Expand Up @@ -841,16 +838,13 @@ export function ftsSearchData(query, customDbPath, opts = {}) {
rows = rows.filter((row) => !TEST_PATTERN.test(row.file));
}

db.close();

const hc = new Map();
const results = rows.slice(0, limit).map((row) => ({
name: row.name,
kind: row.kind,
file: row.file,
line: row.line,
...normalizeSymbol(row, db, hc),
bm25Score: -row.bm25_score, // FTS5 rank is negative; negate for display
}));

db.close();
return { results };
}

Expand Down Expand Up @@ -924,6 +918,9 @@ export async function hybridSearchData(query, customDbPath, opts = {}) {
kind: item.kind,
file: item.file,
line: item.line,
endLine: item.endLine ?? null,
role: item.role ?? null,
fileHash: item.fileHash ?? null,
rrfScore: 0,
bm25Score: null,
bm25Rank: null,
Expand Down Expand Up @@ -955,6 +952,9 @@ export async function hybridSearchData(query, customDbPath, opts = {}) {
kind: e.kind,
file: e.file,
line: e.line,
endLine: e.endLine,
role: e.role,
fileHash: e.fileHash,
rrf: e.rrfScore,
bm25Score: e.bm25Score,
bm25Rank: e.bm25Rank,
Expand Down
1 change: 1 addition & 0 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ export {
iterWhere,
kindIcon,
moduleMapData,
normalizeSymbol,
pathData,
queryNameData,
rolesData,
Expand Down
Loading