From 376aa456a6e2edb88e754bae5f4ca945d5b48ea7 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 8 Mar 2026 04:29:21 -0600 Subject: [PATCH 01/10] refactor: split db.js into db/ module directory Move connection/locking logic to src/db/connection.js and schema migrations to src/db/migrations.js. src/db.js becomes a barrel re-export file so all 55 existing import sites continue to work unchanged. Impact: 10 functions changed, 102 affected --- src/db/connection.js | 88 ++++++++++++ src/db/migrations.js | 312 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 400 insertions(+) create mode 100644 src/db/connection.js create mode 100644 src/db/migrations.js diff --git a/src/db/connection.js b/src/db/connection.js new file mode 100644 index 00000000..beffdc41 --- /dev/null +++ b/src/db/connection.js @@ -0,0 +1,88 @@ +import fs from 'node:fs'; +import path from 'node:path'; +import Database from 'better-sqlite3'; +import { warn } from '../logger.js'; + +function isProcessAlive(pid) { + try { + process.kill(pid, 0); + return true; + } catch { + return false; + } +} + +function acquireAdvisoryLock(dbPath) { + const lockPath = `${dbPath}.lock`; + try { + if (fs.existsSync(lockPath)) { + const content = fs.readFileSync(lockPath, 'utf-8').trim(); + const pid = Number(content); + if (pid && pid !== process.pid && isProcessAlive(pid)) { + warn(`Another process (PID ${pid}) may be using this database. Proceeding with caution.`); + } + } + } catch { + /* ignore read errors */ + } + try { + fs.writeFileSync(lockPath, String(process.pid), 'utf-8'); + } catch { + /* best-effort */ + } +} + +function releaseAdvisoryLock(lockPath) { + try { + const content = fs.readFileSync(lockPath, 'utf-8').trim(); + if (Number(content) === process.pid) { + fs.unlinkSync(lockPath); + } + } catch { + /* ignore */ + } +} + +export function openDb(dbPath) { + const dir = path.dirname(dbPath); + if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); + acquireAdvisoryLock(dbPath); + const db = new Database(dbPath); + db.pragma('journal_mode = WAL'); + db.pragma('busy_timeout = 5000'); + db.__lockPath = `${dbPath}.lock`; + return db; +} + +export function closeDb(db) { + db.close(); + if (db.__lockPath) releaseAdvisoryLock(db.__lockPath); +} + +export function findDbPath(customPath) { + if (customPath) return path.resolve(customPath); + let dir = process.cwd(); + while (true) { + const candidate = path.join(dir, '.codegraph', 'graph.db'); + if (fs.existsSync(candidate)) return candidate; + const parent = path.dirname(dir); + if (parent === dir) break; + dir = parent; + } + return path.join(process.cwd(), '.codegraph', 'graph.db'); +} + +/** + * Open a database in readonly mode, with a user-friendly error if the DB doesn't exist. + */ +export function openReadonlyOrFail(customPath) { + const dbPath = findDbPath(customPath); + if (!fs.existsSync(dbPath)) { + console.error( + `No codegraph database found at ${dbPath}.\n` + + `Run "codegraph build" first to analyze your codebase.`, + ); + process.exit(1); + } + return new Database(dbPath, { readonly: true }); +} diff --git a/src/db/migrations.js b/src/db/migrations.js new file mode 100644 index 00000000..3f0d60ce --- /dev/null +++ b/src/db/migrations.js @@ -0,0 +1,312 @@ +import { debug } from '../logger.js'; + +// ─── Schema Migrations ───────────────────────────────────────────────── +export const MIGRATIONS = [ + { + version: 1, + up: ` + CREATE TABLE IF NOT EXISTS nodes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + kind TEXT NOT NULL, + file TEXT NOT NULL, + line INTEGER, + end_line INTEGER, + UNIQUE(name, kind, file, line) + ); + CREATE TABLE IF NOT EXISTS edges ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_id INTEGER NOT NULL, + target_id INTEGER NOT NULL, + kind TEXT NOT NULL, + confidence REAL DEFAULT 1.0, + dynamic INTEGER DEFAULT 0, + FOREIGN KEY(source_id) REFERENCES nodes(id), + FOREIGN KEY(target_id) REFERENCES nodes(id) + ); + CREATE INDEX IF NOT EXISTS idx_nodes_name ON nodes(name); + CREATE INDEX IF NOT EXISTS idx_nodes_file ON nodes(file); + CREATE INDEX IF NOT EXISTS idx_nodes_kind ON nodes(kind); + CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_id); + CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_id); + CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind); + CREATE TABLE IF NOT EXISTS node_metrics ( + node_id INTEGER PRIMARY KEY, + line_count INTEGER, + symbol_count INTEGER, + import_count INTEGER, + export_count INTEGER, + fan_in INTEGER, + fan_out INTEGER, + cohesion REAL, + file_count INTEGER, + FOREIGN KEY(node_id) REFERENCES nodes(id) + ); + CREATE INDEX IF NOT EXISTS idx_node_metrics_node ON node_metrics(node_id); + `, + }, + { + version: 2, + up: ` + CREATE INDEX IF NOT EXISTS idx_nodes_name_kind_file ON nodes(name, kind, file); + CREATE INDEX IF NOT EXISTS idx_nodes_file_kind ON nodes(file, kind); + CREATE INDEX IF NOT EXISTS idx_edges_source_kind ON edges(source_id, kind); + CREATE INDEX IF NOT EXISTS idx_edges_target_kind ON edges(target_id, kind); + `, + }, + { + version: 3, + up: ` + CREATE TABLE IF NOT EXISTS file_hashes ( + file TEXT PRIMARY KEY, + hash TEXT NOT NULL, + mtime INTEGER NOT NULL + ); + `, + }, + { + version: 4, + up: `ALTER TABLE file_hashes ADD COLUMN size INTEGER DEFAULT 0;`, + }, + { + version: 5, + up: ` + CREATE TABLE IF NOT EXISTS co_changes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + file_a TEXT NOT NULL, + file_b TEXT NOT NULL, + commit_count INTEGER NOT NULL, + jaccard REAL NOT NULL, + last_commit_epoch INTEGER, + UNIQUE(file_a, file_b) + ); + CREATE INDEX IF NOT EXISTS idx_co_changes_file_a ON co_changes(file_a); + CREATE INDEX IF NOT EXISTS idx_co_changes_file_b ON co_changes(file_b); + CREATE INDEX IF NOT EXISTS idx_co_changes_jaccard ON co_changes(jaccard DESC); + CREATE TABLE IF NOT EXISTS co_change_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + ); + `, + }, + { + version: 6, + up: ` + CREATE TABLE IF NOT EXISTS file_commit_counts ( + file TEXT PRIMARY KEY, + commit_count INTEGER NOT NULL DEFAULT 0 + ); + `, + }, + { + version: 7, + up: ` + CREATE TABLE IF NOT EXISTS build_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + ); + `, + }, + { + version: 8, + up: ` + CREATE TABLE IF NOT EXISTS function_complexity ( + node_id INTEGER PRIMARY KEY, + cognitive INTEGER NOT NULL, + cyclomatic INTEGER NOT NULL, + max_nesting INTEGER NOT NULL, + FOREIGN KEY(node_id) REFERENCES nodes(id) + ); + CREATE INDEX IF NOT EXISTS idx_fc_cognitive ON function_complexity(cognitive DESC); + CREATE INDEX IF NOT EXISTS idx_fc_cyclomatic ON function_complexity(cyclomatic DESC); + `, + }, + { + version: 9, + up: ` + ALTER TABLE function_complexity ADD COLUMN loc INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN sloc INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN comment_lines INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_n1 INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_n2 INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_big_n1 INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_big_n2 INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_vocabulary INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_length INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_volume REAL DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_difficulty REAL DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_effort REAL DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_bugs REAL DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN maintainability_index REAL DEFAULT 0; + CREATE INDEX IF NOT EXISTS idx_fc_mi ON function_complexity(maintainability_index ASC); + `, + }, + { + version: 10, + up: ` + CREATE TABLE IF NOT EXISTS dataflow ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_id INTEGER NOT NULL, + target_id INTEGER NOT NULL, + kind TEXT NOT NULL, + param_index INTEGER, + expression TEXT, + line INTEGER, + confidence REAL DEFAULT 1.0, + FOREIGN KEY(source_id) REFERENCES nodes(id), + FOREIGN KEY(target_id) REFERENCES nodes(id) + ); + CREATE INDEX IF NOT EXISTS idx_dataflow_source ON dataflow(source_id); + CREATE INDEX IF NOT EXISTS idx_dataflow_target ON dataflow(target_id); + CREATE INDEX IF NOT EXISTS idx_dataflow_kind ON dataflow(kind); + CREATE INDEX IF NOT EXISTS idx_dataflow_source_kind ON dataflow(source_id, kind); + `, + }, + { + version: 11, + up: ` + ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id); + CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id); + CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id); + `, + }, + { + version: 12, + up: ` + CREATE TABLE IF NOT EXISTS cfg_blocks ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + function_node_id INTEGER NOT NULL, + block_index INTEGER NOT NULL, + block_type TEXT NOT NULL, + start_line INTEGER, + end_line INTEGER, + label TEXT, + FOREIGN KEY(function_node_id) REFERENCES nodes(id), + UNIQUE(function_node_id, block_index) + ); + CREATE INDEX IF NOT EXISTS idx_cfg_blocks_fn ON cfg_blocks(function_node_id); + + CREATE TABLE IF NOT EXISTS cfg_edges ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + function_node_id INTEGER NOT NULL, + source_block_id INTEGER NOT NULL, + target_block_id INTEGER NOT NULL, + kind TEXT NOT NULL, + FOREIGN KEY(function_node_id) REFERENCES nodes(id), + FOREIGN KEY(source_block_id) REFERENCES cfg_blocks(id), + FOREIGN KEY(target_block_id) REFERENCES cfg_blocks(id) + ); + CREATE INDEX IF NOT EXISTS idx_cfg_edges_fn ON cfg_edges(function_node_id); + CREATE INDEX IF NOT EXISTS idx_cfg_edges_src ON cfg_edges(source_block_id); + CREATE INDEX IF NOT EXISTS idx_cfg_edges_tgt ON cfg_edges(target_block_id); + `, + }, + { + version: 13, + up: ` + CREATE TABLE IF NOT EXISTS ast_nodes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + file TEXT NOT NULL, + line INTEGER NOT NULL, + kind TEXT NOT NULL, + name TEXT NOT NULL, + text TEXT, + receiver TEXT, + parent_node_id INTEGER, + FOREIGN KEY(parent_node_id) REFERENCES nodes(id) + ); + CREATE INDEX IF NOT EXISTS idx_ast_kind ON ast_nodes(kind); + CREATE INDEX IF NOT EXISTS idx_ast_name ON ast_nodes(name); + CREATE INDEX IF NOT EXISTS idx_ast_file ON ast_nodes(file); + CREATE INDEX IF NOT EXISTS idx_ast_parent ON ast_nodes(parent_node_id); + CREATE INDEX IF NOT EXISTS idx_ast_kind_name ON ast_nodes(kind, name); + `, + }, + { + version: 14, + up: ` + ALTER TABLE nodes ADD COLUMN exported INTEGER DEFAULT 0; + CREATE INDEX IF NOT EXISTS idx_nodes_exported ON nodes(exported); + `, + }, +]; + +export function getBuildMeta(db, key) { + try { + const row = db.prepare('SELECT value FROM build_meta WHERE key = ?').get(key); + return row ? row.value : null; + } catch { + return null; + } +} + +export function setBuildMeta(db, entries) { + const upsert = db.prepare('INSERT OR REPLACE INTO build_meta (key, value) VALUES (?, ?)'); + const tx = db.transaction(() => { + for (const [key, value] of Object.entries(entries)) { + upsert.run(key, String(value)); + } + }); + tx(); +} + +export function initSchema(db) { + db.exec(`CREATE TABLE IF NOT EXISTS schema_version (version INTEGER NOT NULL DEFAULT 0)`); + + const row = db.prepare('SELECT version FROM schema_version').get(); + let currentVersion = row ? row.version : 0; + + if (!row) { + db.prepare('INSERT INTO schema_version (version) VALUES (0)').run(); + } + + for (const migration of MIGRATIONS) { + if (migration.version > currentVersion) { + debug(`Running migration v${migration.version}`); + db.exec(migration.up); + db.prepare('UPDATE schema_version SET version = ?').run(migration.version); + currentVersion = migration.version; + } + } + + try { + db.exec('ALTER TABLE nodes ADD COLUMN end_line INTEGER'); + } catch { + /* already exists */ + } + try { + db.exec('ALTER TABLE edges ADD COLUMN confidence REAL DEFAULT 1.0'); + } catch { + /* already exists */ + } + try { + db.exec('ALTER TABLE edges ADD COLUMN dynamic INTEGER DEFAULT 0'); + } catch { + /* already exists */ + } + try { + db.exec('ALTER TABLE nodes ADD COLUMN role TEXT'); + } catch { + /* already exists */ + } + try { + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_role ON nodes(role)'); + } catch { + /* already exists */ + } + try { + db.exec('ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id)'); + } catch { + /* already exists */ + } + try { + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id)'); + } catch { + /* already exists */ + } + try { + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id)'); + } catch { + /* already exists */ + } +} From c9406117b28ec3703bb701e3e622aba90c4b5231 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 8 Mar 2026 04:29:41 -0600 Subject: [PATCH 02/10] feat: add query builder and repository for node queries Introduce NodeQuery fluent builder and standalone helpers (testFilterSQL, kindInClause, fanInJoinSQL, fanOutJoinSQL) to eliminate duplicated WHERE clause and JOIN patterns across modules. Add repository.js with centralized read methods (findNodesForTriage, listFunctionNodes, iterateFunctionNodes, findNodesWithFanIn, count*). Includes 45 new unit tests covering builder composition and repository functions. Impact: 31 functions changed, 33 affected --- src/db.js | 418 ++----------------------------- src/db/query-builder.js | 211 ++++++++++++++++ src/db/repository.js | 125 +++++++++ src/index.js | 12 + tests/unit/query-builder.test.js | 238 ++++++++++++++++++ tests/unit/repository.test.js | 163 ++++++++++++ 6 files changed, 768 insertions(+), 399 deletions(-) create mode 100644 src/db/query-builder.js create mode 100644 src/db/repository.js create mode 100644 tests/unit/query-builder.test.js create mode 100644 tests/unit/repository.test.js diff --git a/src/db.js b/src/db.js index 31c0a428..ab5e7950 100644 --- a/src/db.js +++ b/src/db.js @@ -1,399 +1,19 @@ -import fs from 'node:fs'; -import path from 'node:path'; -import Database from 'better-sqlite3'; -import { debug, warn } from './logger.js'; - -// ─── Schema Migrations ───────────────────────────────────────────────── -export const MIGRATIONS = [ - { - version: 1, - up: ` - CREATE TABLE IF NOT EXISTS nodes ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - name TEXT NOT NULL, - kind TEXT NOT NULL, - file TEXT NOT NULL, - line INTEGER, - end_line INTEGER, - UNIQUE(name, kind, file, line) - ); - CREATE TABLE IF NOT EXISTS edges ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - source_id INTEGER NOT NULL, - target_id INTEGER NOT NULL, - kind TEXT NOT NULL, - confidence REAL DEFAULT 1.0, - dynamic INTEGER DEFAULT 0, - FOREIGN KEY(source_id) REFERENCES nodes(id), - FOREIGN KEY(target_id) REFERENCES nodes(id) - ); - CREATE INDEX IF NOT EXISTS idx_nodes_name ON nodes(name); - CREATE INDEX IF NOT EXISTS idx_nodes_file ON nodes(file); - CREATE INDEX IF NOT EXISTS idx_nodes_kind ON nodes(kind); - CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_id); - CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_id); - CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind); - CREATE TABLE IF NOT EXISTS node_metrics ( - node_id INTEGER PRIMARY KEY, - line_count INTEGER, - symbol_count INTEGER, - import_count INTEGER, - export_count INTEGER, - fan_in INTEGER, - fan_out INTEGER, - cohesion REAL, - file_count INTEGER, - FOREIGN KEY(node_id) REFERENCES nodes(id) - ); - CREATE INDEX IF NOT EXISTS idx_node_metrics_node ON node_metrics(node_id); - `, - }, - { - version: 2, - up: ` - CREATE INDEX IF NOT EXISTS idx_nodes_name_kind_file ON nodes(name, kind, file); - CREATE INDEX IF NOT EXISTS idx_nodes_file_kind ON nodes(file, kind); - CREATE INDEX IF NOT EXISTS idx_edges_source_kind ON edges(source_id, kind); - CREATE INDEX IF NOT EXISTS idx_edges_target_kind ON edges(target_id, kind); - `, - }, - { - version: 3, - up: ` - CREATE TABLE IF NOT EXISTS file_hashes ( - file TEXT PRIMARY KEY, - hash TEXT NOT NULL, - mtime INTEGER NOT NULL - ); - `, - }, - { - version: 4, - up: `ALTER TABLE file_hashes ADD COLUMN size INTEGER DEFAULT 0;`, - }, - { - version: 5, - up: ` - CREATE TABLE IF NOT EXISTS co_changes ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - file_a TEXT NOT NULL, - file_b TEXT NOT NULL, - commit_count INTEGER NOT NULL, - jaccard REAL NOT NULL, - last_commit_epoch INTEGER, - UNIQUE(file_a, file_b) - ); - CREATE INDEX IF NOT EXISTS idx_co_changes_file_a ON co_changes(file_a); - CREATE INDEX IF NOT EXISTS idx_co_changes_file_b ON co_changes(file_b); - CREATE INDEX IF NOT EXISTS idx_co_changes_jaccard ON co_changes(jaccard DESC); - CREATE TABLE IF NOT EXISTS co_change_meta ( - key TEXT PRIMARY KEY, - value TEXT NOT NULL - ); - `, - }, - { - version: 6, - up: ` - CREATE TABLE IF NOT EXISTS file_commit_counts ( - file TEXT PRIMARY KEY, - commit_count INTEGER NOT NULL DEFAULT 0 - ); - `, - }, - { - version: 7, - up: ` - CREATE TABLE IF NOT EXISTS build_meta ( - key TEXT PRIMARY KEY, - value TEXT NOT NULL - ); - `, - }, - { - version: 8, - up: ` - CREATE TABLE IF NOT EXISTS function_complexity ( - node_id INTEGER PRIMARY KEY, - cognitive INTEGER NOT NULL, - cyclomatic INTEGER NOT NULL, - max_nesting INTEGER NOT NULL, - FOREIGN KEY(node_id) REFERENCES nodes(id) - ); - CREATE INDEX IF NOT EXISTS idx_fc_cognitive ON function_complexity(cognitive DESC); - CREATE INDEX IF NOT EXISTS idx_fc_cyclomatic ON function_complexity(cyclomatic DESC); - `, - }, - { - version: 9, - up: ` - ALTER TABLE function_complexity ADD COLUMN loc INTEGER DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN sloc INTEGER DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN comment_lines INTEGER DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN halstead_n1 INTEGER DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN halstead_n2 INTEGER DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN halstead_big_n1 INTEGER DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN halstead_big_n2 INTEGER DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN halstead_vocabulary INTEGER DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN halstead_length INTEGER DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN halstead_volume REAL DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN halstead_difficulty REAL DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN halstead_effort REAL DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN halstead_bugs REAL DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN maintainability_index REAL DEFAULT 0; - CREATE INDEX IF NOT EXISTS idx_fc_mi ON function_complexity(maintainability_index ASC); - `, - }, - { - version: 10, - up: ` - CREATE TABLE IF NOT EXISTS dataflow ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - source_id INTEGER NOT NULL, - target_id INTEGER NOT NULL, - kind TEXT NOT NULL, - param_index INTEGER, - expression TEXT, - line INTEGER, - confidence REAL DEFAULT 1.0, - FOREIGN KEY(source_id) REFERENCES nodes(id), - FOREIGN KEY(target_id) REFERENCES nodes(id) - ); - CREATE INDEX IF NOT EXISTS idx_dataflow_source ON dataflow(source_id); - CREATE INDEX IF NOT EXISTS idx_dataflow_target ON dataflow(target_id); - CREATE INDEX IF NOT EXISTS idx_dataflow_kind ON dataflow(kind); - CREATE INDEX IF NOT EXISTS idx_dataflow_source_kind ON dataflow(source_id, kind); - `, - }, - { - version: 11, - up: ` - ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id); - CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id); - CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id); - `, - }, - { - version: 12, - up: ` - CREATE TABLE IF NOT EXISTS cfg_blocks ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - function_node_id INTEGER NOT NULL, - block_index INTEGER NOT NULL, - block_type TEXT NOT NULL, - start_line INTEGER, - end_line INTEGER, - label TEXT, - FOREIGN KEY(function_node_id) REFERENCES nodes(id), - UNIQUE(function_node_id, block_index) - ); - CREATE INDEX IF NOT EXISTS idx_cfg_blocks_fn ON cfg_blocks(function_node_id); - - CREATE TABLE IF NOT EXISTS cfg_edges ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - function_node_id INTEGER NOT NULL, - source_block_id INTEGER NOT NULL, - target_block_id INTEGER NOT NULL, - kind TEXT NOT NULL, - FOREIGN KEY(function_node_id) REFERENCES nodes(id), - FOREIGN KEY(source_block_id) REFERENCES cfg_blocks(id), - FOREIGN KEY(target_block_id) REFERENCES cfg_blocks(id) - ); - CREATE INDEX IF NOT EXISTS idx_cfg_edges_fn ON cfg_edges(function_node_id); - CREATE INDEX IF NOT EXISTS idx_cfg_edges_src ON cfg_edges(source_block_id); - CREATE INDEX IF NOT EXISTS idx_cfg_edges_tgt ON cfg_edges(target_block_id); - `, - }, - { - version: 13, - up: ` - CREATE TABLE IF NOT EXISTS ast_nodes ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - file TEXT NOT NULL, - line INTEGER NOT NULL, - kind TEXT NOT NULL, - name TEXT NOT NULL, - text TEXT, - receiver TEXT, - parent_node_id INTEGER, - FOREIGN KEY(parent_node_id) REFERENCES nodes(id) - ); - CREATE INDEX IF NOT EXISTS idx_ast_kind ON ast_nodes(kind); - CREATE INDEX IF NOT EXISTS idx_ast_name ON ast_nodes(name); - CREATE INDEX IF NOT EXISTS idx_ast_file ON ast_nodes(file); - CREATE INDEX IF NOT EXISTS idx_ast_parent ON ast_nodes(parent_node_id); - CREATE INDEX IF NOT EXISTS idx_ast_kind_name ON ast_nodes(kind, name); - `, - }, - { - version: 14, - up: ` - ALTER TABLE nodes ADD COLUMN exported INTEGER DEFAULT 0; - CREATE INDEX IF NOT EXISTS idx_nodes_exported ON nodes(exported); - `, - }, -]; - -export function getBuildMeta(db, key) { - try { - const row = db.prepare('SELECT value FROM build_meta WHERE key = ?').get(key); - return row ? row.value : null; - } catch { - return null; - } -} - -export function setBuildMeta(db, entries) { - const upsert = db.prepare('INSERT OR REPLACE INTO build_meta (key, value) VALUES (?, ?)'); - const tx = db.transaction(() => { - for (const [key, value] of Object.entries(entries)) { - upsert.run(key, String(value)); - } - }); - tx(); -} - -export function openDb(dbPath) { - const dir = path.dirname(dbPath); - if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); - acquireAdvisoryLock(dbPath); - const db = new Database(dbPath); - db.pragma('journal_mode = WAL'); - db.pragma('busy_timeout = 5000'); - db.__lockPath = `${dbPath}.lock`; - return db; -} - -export function closeDb(db) { - db.close(); - if (db.__lockPath) releaseAdvisoryLock(db.__lockPath); -} - -function isProcessAlive(pid) { - try { - process.kill(pid, 0); - return true; - } catch { - return false; - } -} - -function acquireAdvisoryLock(dbPath) { - const lockPath = `${dbPath}.lock`; - try { - if (fs.existsSync(lockPath)) { - const content = fs.readFileSync(lockPath, 'utf-8').trim(); - const pid = Number(content); - if (pid && pid !== process.pid && isProcessAlive(pid)) { - warn(`Another process (PID ${pid}) may be using this database. Proceeding with caution.`); - } - } - } catch { - /* ignore read errors */ - } - try { - fs.writeFileSync(lockPath, String(process.pid), 'utf-8'); - } catch { - /* best-effort */ - } -} - -function releaseAdvisoryLock(lockPath) { - try { - const content = fs.readFileSync(lockPath, 'utf-8').trim(); - if (Number(content) === process.pid) { - fs.unlinkSync(lockPath); - } - } catch { - /* ignore */ - } -} - -export function initSchema(db) { - db.exec(`CREATE TABLE IF NOT EXISTS schema_version (version INTEGER NOT NULL DEFAULT 0)`); - - const row = db.prepare('SELECT version FROM schema_version').get(); - let currentVersion = row ? row.version : 0; - - if (!row) { - db.prepare('INSERT INTO schema_version (version) VALUES (0)').run(); - } - - for (const migration of MIGRATIONS) { - if (migration.version > currentVersion) { - debug(`Running migration v${migration.version}`); - db.exec(migration.up); - db.prepare('UPDATE schema_version SET version = ?').run(migration.version); - currentVersion = migration.version; - } - } - - try { - db.exec('ALTER TABLE nodes ADD COLUMN end_line INTEGER'); - } catch { - /* already exists */ - } - try { - db.exec('ALTER TABLE edges ADD COLUMN confidence REAL DEFAULT 1.0'); - } catch { - /* already exists */ - } - try { - db.exec('ALTER TABLE edges ADD COLUMN dynamic INTEGER DEFAULT 0'); - } catch { - /* already exists */ - } - try { - db.exec('ALTER TABLE nodes ADD COLUMN role TEXT'); - } catch { - /* already exists */ - } - try { - db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_role ON nodes(role)'); - } catch { - /* already exists */ - } - try { - db.exec('ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id)'); - } catch { - /* already exists */ - } - try { - db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id)'); - } catch { - /* already exists */ - } - try { - db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id)'); - } catch { - /* already exists */ - } -} - -export function findDbPath(customPath) { - if (customPath) return path.resolve(customPath); - let dir = process.cwd(); - while (true) { - const candidate = path.join(dir, '.codegraph', 'graph.db'); - if (fs.existsSync(candidate)) return candidate; - const parent = path.dirname(dir); - if (parent === dir) break; - dir = parent; - } - return path.join(process.cwd(), '.codegraph', 'graph.db'); -} - -/** - * Open a database in readonly mode, with a user-friendly error if the DB doesn't exist. - */ -export function openReadonlyOrFail(customPath) { - const dbPath = findDbPath(customPath); - if (!fs.existsSync(dbPath)) { - console.error( - `No codegraph database found at ${dbPath}.\n` + - `Run "codegraph build" first to analyze your codebase.`, - ); - process.exit(1); - } - return new Database(dbPath, { readonly: true }); -} +// Barrel re-export — keeps all existing `import { ... } from './db.js'` working. +export { closeDb, findDbPath, openDb, openReadonlyOrFail } from './db/connection.js'; +export { getBuildMeta, initSchema, MIGRATIONS, setBuildMeta } from './db/migrations.js'; +export { + fanInJoinSQL, + fanOutJoinSQL, + kindInClause, + NodeQuery, + testFilterSQL, +} from './db/query-builder.js'; +export { + countEdges, + countFiles, + countNodes, + findNodesForTriage, + findNodesWithFanIn, + iterateFunctionNodes, + listFunctionNodes, +} from './db/repository.js'; diff --git a/src/db/query-builder.js b/src/db/query-builder.js new file mode 100644 index 00000000..eaef2b97 --- /dev/null +++ b/src/db/query-builder.js @@ -0,0 +1,211 @@ +// ─── Standalone Helpers ────────────────────────────────────────────── + +/** + * Return a SQL AND clause that excludes test/spec/stories files. + * Returns empty string when disabled. + * @param {string} [column='n.file'] - Column to filter on + * @param {boolean} [enabled=true] - No-op when false + */ +export function testFilterSQL(column = 'n.file', enabled = true) { + if (!enabled) return ''; + return `AND ${column} NOT LIKE '%.test.%' + AND ${column} NOT LIKE '%.spec.%' + AND ${column} NOT LIKE '%__test__%' + AND ${column} NOT LIKE '%__tests__%' + AND ${column} NOT LIKE '%.stories.%'`; +} + +/** + * Build IN (?, ?, ?) placeholders and params array for a kind filter. + * @param {string[]} kinds + * @returns {{ placeholders: string, params: string[] }} + */ +export function kindInClause(kinds) { + return { + placeholders: kinds.map(() => '?').join(', '), + params: [...kinds], + }; +} + +/** + * Return a LEFT JOIN subquery for fan-in (incoming edge count). + * @param {string} [edgeKind='calls'] - Edge kind to count + * @param {string} [alias='fi'] - Subquery alias + */ +export function fanInJoinSQL(edgeKind = 'calls', alias = 'fi') { + return `LEFT JOIN ( + SELECT target_id, COUNT(*) AS cnt FROM edges WHERE kind = '${edgeKind}' GROUP BY target_id + ) ${alias} ON ${alias}.target_id = n.id`; +} + +/** + * Return a LEFT JOIN subquery for fan-out (outgoing edge count). + * @param {string} [edgeKind='calls'] - Edge kind to count + * @param {string} [alias='fo'] - Subquery alias + */ +export function fanOutJoinSQL(edgeKind = 'calls', alias = 'fo') { + return `LEFT JOIN ( + SELECT source_id, COUNT(*) AS cnt FROM edges WHERE kind = '${edgeKind}' GROUP BY source_id + ) ${alias} ON ${alias}.source_id = n.id`; +} + +// ─── NodeQuery Fluent Builder ──────────────────────────────────────── + +/** + * Fluent builder for the common `SELECT ... FROM nodes n WHERE ...` pattern. + * Not an ORM — complex queries (BFS, correlated subqueries) stay as raw SQL. + */ +export class NodeQuery { + #selectCols = 'n.*'; + #joins = []; + #conditions = []; + #params = []; + #orderByClause = ''; + #limitValue = null; + + /** Set SELECT columns (default: `n.*`). */ + select(cols) { + this.#selectCols = cols; + return this; + } + + /** WHERE n.kind IN (?, ?, ...) */ + kinds(kindArray) { + if (!kindArray || kindArray.length === 0) return this; + const { placeholders, params } = kindInClause(kindArray); + this.#conditions.push(`n.kind IN (${placeholders})`); + this.#params.push(...params); + return this; + } + + /** Add 5 NOT LIKE conditions to exclude test files. No-op when enabled is falsy. */ + excludeTests(enabled) { + if (!enabled) return this; + this.#conditions.push( + `n.file NOT LIKE '%.test.%'`, + `n.file NOT LIKE '%.spec.%'`, + `n.file NOT LIKE '%__test__%'`, + `n.file NOT LIKE '%__tests__%'`, + `n.file NOT LIKE '%.stories.%'`, + ); + return this; + } + + /** WHERE n.file LIKE ? (no-op if falsy). */ + fileFilter(file) { + if (!file) return this; + this.#conditions.push('n.file LIKE ?'); + this.#params.push(`%${file}%`); + return this; + } + + /** WHERE n.kind = ? (no-op if falsy). */ + kindFilter(kind) { + if (!kind) return this; + this.#conditions.push('n.kind = ?'); + this.#params.push(kind); + return this; + } + + /** WHERE n.role = ? (no-op if falsy). */ + roleFilter(role) { + if (!role) return this; + this.#conditions.push('n.role = ?'); + this.#params.push(role); + return this; + } + + /** WHERE n.name LIKE ? (no-op if falsy). */ + nameLike(pattern) { + if (!pattern) return this; + this.#conditions.push('n.name LIKE ?'); + this.#params.push(`%${pattern}%`); + return this; + } + + /** Raw WHERE condition escape hatch. */ + where(sql, ...params) { + this.#conditions.push(sql); + this.#params.push(...params); + return this; + } + + /** Add fan-in LEFT JOIN subquery. */ + withFanIn(edgeKind = 'calls') { + this.#joins.push(fanInJoinSQL(edgeKind)); + return this; + } + + /** Add fan-out LEFT JOIN subquery. */ + withFanOut(edgeKind = 'calls') { + this.#joins.push(fanOutJoinSQL(edgeKind)); + return this; + } + + /** LEFT JOIN function_complexity. */ + withComplexity() { + this.#joins.push('LEFT JOIN function_complexity fc ON fc.node_id = n.id'); + return this; + } + + /** LEFT JOIN file_commit_counts. */ + withChurn() { + this.#joins.push('LEFT JOIN file_commit_counts fcc ON n.file = fcc.file'); + return this; + } + + /** Raw JOIN escape hatch. */ + join(sql) { + this.#joins.push(sql); + return this; + } + + /** ORDER BY clause. */ + orderBy(clause) { + this.#orderByClause = clause; + return this; + } + + /** LIMIT ?. */ + limit(n) { + if (n == null) return this; + this.#limitValue = n; + return this; + } + + /** Build the SQL and params without executing. */ + build() { + const joins = this.#joins.length > 0 ? `\n ${this.#joins.join('\n ')}` : ''; + const where = + this.#conditions.length > 0 ? `\n WHERE ${this.#conditions.join(' AND ')}` : ''; + const orderBy = this.#orderByClause ? `\n ORDER BY ${this.#orderByClause}` : ''; + + let limitClause = ''; + const params = [...this.#params]; + if (this.#limitValue != null) { + limitClause = '\n LIMIT ?'; + params.push(this.#limitValue); + } + + const sql = `SELECT ${this.#selectCols}\n FROM nodes n${joins}${where}${orderBy}${limitClause}`; + return { sql, params }; + } + + /** Execute and return all rows. */ + all(db) { + const { sql, params } = this.build(); + return db.prepare(sql).all(...params); + } + + /** Execute and return first row. */ + get(db) { + const { sql, params } = this.build(); + return db.prepare(sql).get(...params); + } + + /** Execute and return an iterator. */ + iterate(db) { + const { sql, params } = this.build(); + return db.prepare(sql).iterate(...params); + } +} diff --git a/src/db/repository.js b/src/db/repository.js new file mode 100644 index 00000000..6b656744 --- /dev/null +++ b/src/db/repository.js @@ -0,0 +1,125 @@ +import { NodeQuery } from './query-builder.js'; + +/** + * Find nodes matching a name pattern, with fan-in count. + * Used by findMatchingNodes in queries.js. + * + * @param {object} db - Database instance + * @param {string} namePattern - LIKE pattern (already wrapped with %) + * @param {object} [opts] + * @param {string[]} [opts.kinds] - Node kinds to match + * @param {string} [opts.file] - File filter (partial match) + * @returns {object[]} + */ +export function findNodesWithFanIn(db, namePattern, opts = {}) { + const q = new NodeQuery() + .select('n.*, COALESCE(fi.cnt, 0) AS fan_in') + .withFanIn() + .where('n.name LIKE ?', namePattern); + + if (opts.kinds) { + q.kinds(opts.kinds); + } + if (opts.file) { + q.fileFilter(opts.file); + } + + return q.all(db); +} + +/** + * Fetch nodes for triage scoring: fan-in + complexity + churn. + * Used by triageData in triage.js. + * + * @param {object} db + * @param {object} [opts] + * @returns {object[]} + */ +export function findNodesForTriage(db, opts = {}) { + const q = new NodeQuery() + .select( + `n.id, n.name, n.kind, n.file, n.line, n.end_line, n.role, + COALESCE(fi.cnt, 0) AS fan_in, + COALESCE(fc.cognitive, 0) AS cognitive, + COALESCE(fc.maintainability_index, 0) AS mi, + COALESCE(fc.cyclomatic, 0) AS cyclomatic, + COALESCE(fc.max_nesting, 0) AS max_nesting, + COALESCE(fcc.commit_count, 0) AS churn`, + ) + .kinds(['function', 'method', 'class']) + .withFanIn() + .withComplexity() + .withChurn() + .excludeTests(opts.noTests) + .fileFilter(opts.file) + .kindFilter(opts.kind) + .roleFilter(opts.role) + .orderBy('n.file, n.line'); + + return q.all(db); +} + +/** + * List function/method/class nodes with basic info. + * Used by listFunctionsData in queries.js. + * + * @param {object} db + * @param {object} [opts] + * @returns {object[]} + */ +export function listFunctionNodes(db, opts = {}) { + const q = new NodeQuery() + .select('name, kind, file, line, end_line, role') + .kinds(['function', 'method', 'class']) + .fileFilter(opts.file) + .nameLike(opts.pattern) + .orderBy('file, line'); + + return q.all(db); +} + +/** + * Iterator version of listFunctionNodes for memory efficiency. + * Used by iterListFunctions in queries.js. + * + * @param {object} db + * @param {object} [opts] + * @returns {IterableIterator} + */ +export function iterateFunctionNodes(db, opts = {}) { + const q = new NodeQuery() + .select('name, kind, file, line, end_line, role') + .kinds(['function', 'method', 'class']) + .fileFilter(opts.file) + .nameLike(opts.pattern) + .orderBy('file, line'); + + return q.iterate(db); +} + +/** + * Count total nodes. + * @param {object} db + * @returns {number} + */ +export function countNodes(db) { + return db.prepare('SELECT COUNT(*) AS cnt FROM nodes').get().cnt; +} + +/** + * Count total edges. + * @param {object} db + * @returns {number} + */ +export function countEdges(db) { + return db.prepare('SELECT COUNT(*) AS cnt FROM edges').get().cnt; +} + +/** + * Count distinct files. + * @param {object} db + * @returns {number} + */ +export function countFiles(db) { + return db.prepare('SELECT COUNT(DISTINCT file) AS cnt FROM nodes').get().cnt; +} diff --git a/src/index.js b/src/index.js index 9f51e6cf..20961a20 100644 --- a/src/index.js +++ b/src/index.js @@ -77,12 +77,24 @@ export { } from './dataflow.js'; // Database utilities export { + countEdges, + countFiles, + countNodes, + fanInJoinSQL, + fanOutJoinSQL, findDbPath, + findNodesForTriage, + findNodesWithFanIn, getBuildMeta, initSchema, + iterateFunctionNodes, + kindInClause, + listFunctionNodes, + NodeQuery, openDb, openReadonlyOrFail, setBuildMeta, + testFilterSQL, } from './db.js'; // Embeddings export { diff --git a/tests/unit/query-builder.test.js b/tests/unit/query-builder.test.js new file mode 100644 index 00000000..ade37b7b --- /dev/null +++ b/tests/unit/query-builder.test.js @@ -0,0 +1,238 @@ +import Database from 'better-sqlite3'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { initSchema } from '../../src/db/migrations.js'; +import { + fanInJoinSQL, + fanOutJoinSQL, + kindInClause, + NodeQuery, + testFilterSQL, +} from '../../src/db/query-builder.js'; + +// ─── testFilterSQL ─────────────────────────────────────────────────── + +describe('testFilterSQL', () => { + it('returns 5 NOT LIKE conditions with default column', () => { + const sql = testFilterSQL(); + expect(sql).toContain("n.file NOT LIKE '%.test.%'"); + expect(sql).toContain("n.file NOT LIKE '%.spec.%'"); + expect(sql).toContain("n.file NOT LIKE '%__test__%'"); + expect(sql).toContain("n.file NOT LIKE '%__tests__%'"); + expect(sql).toContain("n.file NOT LIKE '%.stories.%'"); + }); + + it('uses custom column', () => { + const sql = testFilterSQL('n.name'); + expect(sql).toContain("n.name NOT LIKE '%.test.%'"); + expect(sql).not.toContain('n.file'); + }); + + it('returns empty string when disabled', () => { + expect(testFilterSQL('n.file', false)).toBe(''); + }); +}); + +// ─── kindInClause ──────────────────────────────────────────────────── + +describe('kindInClause', () => { + it('returns correct placeholders and params', () => { + const result = kindInClause(['function', 'method', 'class']); + expect(result.placeholders).toBe('?, ?, ?'); + expect(result.params).toEqual(['function', 'method', 'class']); + }); + + it('handles single kind', () => { + const result = kindInClause(['function']); + expect(result.placeholders).toBe('?'); + expect(result.params).toEqual(['function']); + }); +}); + +// ─── fanInJoinSQL / fanOutJoinSQL ──────────────────────────────────── + +describe('fanInJoinSQL', () => { + it('returns LEFT JOIN with default alias and kind', () => { + const sql = fanInJoinSQL(); + expect(sql).toContain('LEFT JOIN'); + expect(sql).toContain('target_id'); + expect(sql).toContain("kind = 'calls'"); + expect(sql).toContain('fi'); + }); + + it('accepts custom edge kind and alias', () => { + const sql = fanInJoinSQL('imports', 'imp'); + expect(sql).toContain("kind = 'imports'"); + expect(sql).toContain('imp'); + }); +}); + +describe('fanOutJoinSQL', () => { + it('returns LEFT JOIN with source_id', () => { + const sql = fanOutJoinSQL(); + expect(sql).toContain('LEFT JOIN'); + expect(sql).toContain('source_id'); + expect(sql).toContain("kind = 'calls'"); + expect(sql).toContain('fo'); + }); + + it('accepts custom edge kind and alias', () => { + const sql = fanOutJoinSQL('imports', 'imp'); + expect(sql).toContain("kind = 'imports'"); + expect(sql).toContain('imp'); + }); +}); + +// ─── NodeQuery ─────────────────────────────────────────────────────── + +describe('NodeQuery', () => { + let db; + + beforeEach(() => { + db = new Database(':memory:'); + initSchema(db); + // Seed test data + const insert = db.prepare( + 'INSERT INTO nodes (name, kind, file, line, role) VALUES (?, ?, ?, ?, ?)', + ); + insert.run('foo', 'function', 'src/foo.js', 1, 'core'); + insert.run('bar', 'method', 'src/bar.js', 10, 'utility'); + insert.run('Baz', 'class', 'src/baz.js', 20, 'entry'); + insert.run('testHelper', 'function', 'src/foo.test.js', 1, null); + insert.run('specHelper', 'function', 'src/bar.spec.js', 1, null); + + // Add an edge for fan-in + const fooId = db.prepare("SELECT id FROM nodes WHERE name = 'foo'").get().id; + const barId = db.prepare("SELECT id FROM nodes WHERE name = 'bar'").get().id; + db.prepare('INSERT INTO edges (source_id, target_id, kind) VALUES (?, ?, ?)').run( + barId, + fooId, + 'calls', + ); + }); + + afterEach(() => { + db.close(); + }); + + it('.build() returns sql and params', () => { + const { sql, params } = new NodeQuery().kinds(['function']).build(); + expect(sql).toContain('SELECT n.*'); + expect(sql).toContain('FROM nodes n'); + expect(sql).toContain('n.kind IN (?)'); + expect(params).toEqual(['function']); + }); + + it('.select() changes columns', () => { + const { sql } = new NodeQuery().select('n.name, n.kind').build(); + expect(sql).toContain('SELECT n.name, n.kind'); + }); + + it('.kinds() filters by kind', () => { + const rows = new NodeQuery().kinds(['function']).all(db); + expect(rows.every((r) => r.kind === 'function')).toBe(true); + }); + + it('.excludeTests() filters test files', () => { + const all = new NodeQuery().all(db); + const noTests = new NodeQuery().excludeTests(true).all(db); + expect(all.length).toBeGreaterThan(noTests.length); + expect(noTests.every((r) => !r.file.includes('.test.') && !r.file.includes('.spec.'))).toBe( + true, + ); + }); + + it('.excludeTests(false) is a no-op', () => { + const all = new NodeQuery().all(db); + const noOp = new NodeQuery().excludeTests(false).all(db); + expect(noOp.length).toBe(all.length); + }); + + it('.fileFilter() filters by file', () => { + const rows = new NodeQuery().fileFilter('foo').all(db); + expect(rows.every((r) => r.file.includes('foo'))).toBe(true); + }); + + it('.kindFilter() filters by exact kind', () => { + const rows = new NodeQuery().kindFilter('class').all(db); + expect(rows.length).toBe(1); + expect(rows[0].name).toBe('Baz'); + }); + + it('.roleFilter() filters by role', () => { + const rows = new NodeQuery().roleFilter('core').all(db); + expect(rows.length).toBe(1); + expect(rows[0].name).toBe('foo'); + }); + + it('.nameLike() filters by name pattern', () => { + const rows = new NodeQuery().nameLike('ba').all(db); + expect(rows.every((r) => r.name.toLowerCase().includes('ba'))).toBe(true); + }); + + it('.where() adds raw condition', () => { + const rows = new NodeQuery().where('n.line > ?', 5).all(db); + expect(rows.every((r) => r.line > 5)).toBe(true); + }); + + it('.withFanIn() adds fan-in join', () => { + const rows = new NodeQuery() + .select('n.name, COALESCE(fi.cnt, 0) AS fan_in') + .withFanIn() + .where("n.name = 'foo'") + .all(db); + expect(rows[0].fan_in).toBe(1); + }); + + it('.withComplexity() adds complexity join', () => { + const { sql } = new NodeQuery().withComplexity().build(); + expect(sql).toContain('function_complexity'); + }); + + it('.withChurn() adds churn join', () => { + const { sql } = new NodeQuery().withChurn().build(); + expect(sql).toContain('file_commit_counts'); + }); + + it('.join() adds raw join', () => { + const { sql } = new NodeQuery().join('JOIN node_metrics nm ON n.id = nm.node_id').build(); + expect(sql).toContain('JOIN node_metrics nm ON n.id = nm.node_id'); + }); + + it('.orderBy() adds ORDER BY', () => { + const { sql } = new NodeQuery().orderBy('n.file, n.line').build(); + expect(sql).toContain('ORDER BY n.file, n.line'); + }); + + it('.limit() adds LIMIT param', () => { + const { sql, params } = new NodeQuery().limit(10).build(); + expect(sql).toContain('LIMIT ?'); + expect(params).toContain(10); + }); + + it('.get() returns first row', () => { + const row = new NodeQuery().where("n.name = 'foo'").get(db); + expect(row.name).toBe('foo'); + }); + + it('.iterate() returns an iterator', () => { + const iter = new NodeQuery().kinds(['function']).excludeTests(true).iterate(db); + const rows = [...iter]; + expect(rows.length).toBeGreaterThan(0); + expect(rows.every((r) => r.kind === 'function')).toBe(true); + }); + + it('chaining composes multiple conditions with AND', () => { + const { sql, params } = new NodeQuery() + .kinds(['function', 'method']) + .fileFilter('src') + .roleFilter('core') + .build(); + expect(sql).toContain('n.kind IN (?, ?)'); + expect(sql).toContain('n.file LIKE ?'); + expect(sql).toContain('n.role = ?'); + // All connected with AND + const whereClause = sql.split('WHERE')[1]; + expect(whereClause.match(/AND/g).length).toBe(2); + expect(params).toEqual(['function', 'method', '%src%', 'core']); + }); +}); diff --git a/tests/unit/repository.test.js b/tests/unit/repository.test.js new file mode 100644 index 00000000..9adda662 --- /dev/null +++ b/tests/unit/repository.test.js @@ -0,0 +1,163 @@ +import Database from 'better-sqlite3'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { initSchema } from '../../src/db/migrations.js'; +import { + countEdges, + countFiles, + countNodes, + findNodesForTriage, + findNodesWithFanIn, + iterateFunctionNodes, + listFunctionNodes, +} from '../../src/db/repository.js'; + +describe('repository', () => { + let db; + + beforeEach(() => { + db = new Database(':memory:'); + initSchema(db); + + const insertNode = db.prepare( + 'INSERT INTO nodes (name, kind, file, line, role) VALUES (?, ?, ?, ?, ?)', + ); + insertNode.run('foo', 'function', 'src/foo.js', 1, 'core'); + insertNode.run('bar', 'method', 'src/bar.js', 10, 'utility'); + insertNode.run('Baz', 'class', 'src/baz.js', 20, 'entry'); + insertNode.run('qux', 'interface', 'src/qux.js', 30, null); + insertNode.run('testFn', 'function', 'tests/foo.test.js', 1, null); + + // Edges + const fooId = db.prepare("SELECT id FROM nodes WHERE name = 'foo'").get().id; + const barId = db.prepare("SELECT id FROM nodes WHERE name = 'bar'").get().id; + const bazId = db.prepare("SELECT id FROM nodes WHERE name = 'Baz'").get().id; + db.prepare('INSERT INTO edges (source_id, target_id, kind) VALUES (?, ?, ?)').run( + barId, + fooId, + 'calls', + ); + db.prepare('INSERT INTO edges (source_id, target_id, kind) VALUES (?, ?, ?)').run( + bazId, + fooId, + 'calls', + ); + + // Complexity + db.prepare( + 'INSERT INTO function_complexity (node_id, cognitive, cyclomatic, max_nesting) VALUES (?, ?, ?, ?)', + ).run(fooId, 5, 3, 2); + }); + + afterEach(() => { + db.close(); + }); + + describe('findNodesWithFanIn', () => { + it('returns nodes with fan-in count', () => { + const rows = findNodesWithFanIn(db, '%foo%'); + const foo = rows.find((r) => r.name === 'foo'); + expect(foo).toBeDefined(); + expect(foo.fan_in).toBe(2); + }); + + it('filters by kinds', () => { + const rows = findNodesWithFanIn(db, '%foo%', { kinds: ['method'] }); + expect(rows.length).toBe(0); + }); + + it('filters by file', () => { + const rows = findNodesWithFanIn(db, '%foo%', { file: 'src' }); + expect(rows.every((r) => r.file.includes('src'))).toBe(true); + }); + }); + + describe('findNodesForTriage', () => { + it('returns function/method/class nodes with signals', () => { + const rows = findNodesForTriage(db); + expect(rows.length).toBe(4); // foo, bar, Baz, testFn + const foo = rows.find((r) => r.name === 'foo'); + expect(foo.fan_in).toBe(2); + expect(foo.cognitive).toBe(5); + }); + + it('excludes test files when noTests is set', () => { + const rows = findNodesForTriage(db, { noTests: true }); + expect(rows.every((r) => !r.file.includes('.test.'))).toBe(true); + }); + + it('filters by kind', () => { + const rows = findNodesForTriage(db, { kind: 'class' }); + expect(rows.length).toBe(1); + expect(rows[0].name).toBe('Baz'); + }); + + it('filters by role', () => { + const rows = findNodesForTriage(db, { role: 'core' }); + expect(rows.length).toBe(1); + expect(rows[0].name).toBe('foo'); + }); + + it('filters by file', () => { + const rows = findNodesForTriage(db, { file: 'bar' }); + expect(rows.length).toBe(1); + expect(rows[0].name).toBe('bar'); + }); + }); + + describe('listFunctionNodes', () => { + it('returns function/method/class nodes', () => { + const rows = listFunctionNodes(db); + expect(rows.length).toBe(4); // foo, bar, Baz, testFn + expect(rows.every((r) => ['function', 'method', 'class'].includes(r.kind))).toBe(true); + }); + + it('filters by file', () => { + const rows = listFunctionNodes(db, { file: 'foo' }); + expect(rows.every((r) => r.file.includes('foo'))).toBe(true); + }); + + it('filters by pattern', () => { + const rows = listFunctionNodes(db, { pattern: 'Baz' }); + expect(rows.length).toBe(1); + expect(rows[0].name).toBe('Baz'); + }); + + it('orders by file, line', () => { + const rows = listFunctionNodes(db); + for (let i = 1; i < rows.length; i++) { + const prev = `${rows[i - 1].file}:${String(rows[i - 1].line).padStart(6, '0')}`; + const curr = `${rows[i].file}:${String(rows[i].line).padStart(6, '0')}`; + expect(prev <= curr).toBe(true); + } + }); + }); + + describe('iterateFunctionNodes', () => { + it('returns an iterator over function nodes', () => { + const iter = iterateFunctionNodes(db); + const rows = [...iter]; + expect(rows.length).toBe(4); + expect(rows.every((r) => ['function', 'method', 'class'].includes(r.kind))).toBe(true); + }); + + it('filters by file and pattern', () => { + const rows = [...iterateFunctionNodes(db, { file: 'foo', pattern: 'foo' })]; + expect(rows.length).toBeGreaterThan(0); + expect(rows.every((r) => r.file.includes('foo'))).toBe(true); + }); + }); + + describe('countNodes / countEdges / countFiles', () => { + it('countNodes returns total', () => { + expect(countNodes(db)).toBe(5); + }); + + it('countEdges returns total', () => { + expect(countEdges(db)).toBe(2); + }); + + it('countFiles returns distinct file count', () => { + expect(countFiles(db)).toBe(5); + }); + }); +}); From 16d13a5fcb2d32d31733416713b0e81bc6928276 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 8 Mar 2026 04:29:57 -0600 Subject: [PATCH 03/10] refactor: migrate consumers to query builder and repository Replace inline SQL clause building with query-builder helpers and repository functions in 7 call sites across 3 files: - triage.js: triageData() uses findNodesForTriage() - queries.js: findMatchingNodes() uses findNodesWithFanIn(), listFunctionsData() uses listFunctionNodes(), iterListFunctions() uses iterateFunctionNodes(), moduleMapData() and statsData() use testFilterSQL() - structure.js: hotspotsData() uses testFilterSQL() Net reduction of ~100 lines of duplicated WHERE/JOIN patterns. Impact: 6 functions changed, 25 affected --- src/queries.js | 83 ++++++++---------------------------------------- src/structure.js | 11 ++----- src/triage.js | 50 ++++------------------------- 3 files changed, 22 insertions(+), 122 deletions(-) diff --git a/src/queries.js b/src/queries.js index 24d53e32..7d4aa03d 100644 --- a/src/queries.js +++ b/src/queries.js @@ -5,7 +5,14 @@ import { evaluateBoundaries } from './boundaries.js'; import { coChangeForFiles } from './cochange.js'; import { loadConfig } from './config.js'; import { findCycles } from './cycles.js'; -import { findDbPath, openReadonlyOrFail } from './db.js'; +import { + findDbPath, + findNodesWithFanIn, + iterateFunctionNodes, + listFunctionNodes, + openReadonlyOrFail, + testFilterSQL, +} from './db.js'; import { debug } from './logger.js'; import { ownersForFiles } from './owners.js'; import { paginateResult, printNdjson } from './paginate.js'; @@ -165,25 +172,8 @@ function resolveMethodViaHierarchy(db, methodName) { */ export function findMatchingNodes(db, name, opts = {}) { const kinds = opts.kind ? [opts.kind] : FUNCTION_KINDS; - const placeholders = kinds.map(() => '?').join(', '); - const params = [`%${name}%`, ...kinds]; - let fileCondition = ''; - if (opts.file) { - fileCondition = ' AND n.file LIKE ?'; - params.push(`%${opts.file}%`); - } - - const rows = db - .prepare(` - SELECT n.*, COALESCE(fi.cnt, 0) AS fan_in - FROM nodes n - LEFT JOIN ( - SELECT target_id, COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY target_id - ) fi ON fi.target_id = n.id - WHERE n.name LIKE ? AND n.kind IN (${placeholders})${fileCondition} - `) - .all(...params); + const rows = findNodesWithFanIn(db, `%${name}%`, { kinds, file: opts.file }); const nodes = opts.noTests ? rows.filter((n) => !isTestFile(n.file)) : rows; @@ -355,13 +345,7 @@ export function moduleMapData(customDbPath, limit = 20, opts = {}) { const db = openReadonlyOrFail(customDbPath); const noTests = opts.noTests || false; - const testFilter = noTests - ? `AND n.file NOT LIKE '%.test.%' - AND n.file NOT LIKE '%.spec.%' - AND n.file NOT LIKE '%__test__%' - AND n.file NOT LIKE '%__tests__%' - AND n.file NOT LIKE '%.stories.%'` - : ''; + const testFilter = testFilterSQL('n.file', noTests); const nodes = db .prepare(` @@ -1212,26 +1196,8 @@ export function diffImpactMermaid(customDbPath, opts = {}) { export function listFunctionsData(customDbPath, opts = {}) { const db = openReadonlyOrFail(customDbPath); const noTests = opts.noTests || false; - const kinds = ['function', 'method', 'class']; - const placeholders = kinds.map(() => '?').join(', '); - const conditions = [`kind IN (${placeholders})`]; - const params = [...kinds]; - - if (opts.file) { - conditions.push('file LIKE ?'); - params.push(`%${opts.file}%`); - } - if (opts.pattern) { - conditions.push('name LIKE ?'); - params.push(`%${opts.pattern}%`); - } - - let rows = db - .prepare( - `SELECT name, kind, file, line, end_line, role FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY file, line`, - ) - .all(...params); + let rows = listFunctionNodes(db, { file: opts.file, pattern: opts.pattern }); if (noTests) rows = rows.filter((r) => !isTestFile(r.file)); @@ -1255,25 +1221,8 @@ export function* iterListFunctions(customDbPath, opts = {}) { const db = openReadonlyOrFail(customDbPath); try { const noTests = opts.noTests || false; - const kinds = ['function', 'method', 'class']; - const placeholders = kinds.map(() => '?').join(', '); - - const conditions = [`kind IN (${placeholders})`]; - const params = [...kinds]; - - if (opts.file) { - conditions.push('file LIKE ?'); - params.push(`%${opts.file}%`); - } - if (opts.pattern) { - conditions.push('name LIKE ?'); - params.push(`%${opts.pattern}%`); - } - const stmt = db.prepare( - `SELECT name, kind, file, line, end_line, role FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY file, line`, - ); - for (const row of stmt.iterate(...params)) { + for (const row of iterateFunctionNodes(db, { file: opts.file, pattern: opts.pattern })) { if (noTests && isTestFile(row.file)) continue; yield { name: row.name, @@ -1464,13 +1413,7 @@ export function statsData(customDbPath, opts = {}) { const fnCycles = findCycles(db, { fileLevel: false, noTests }); // Top 5 coupling hotspots (fan-in + fan-out, file nodes) - const testFilter = noTests - ? `AND n.file NOT LIKE '%.test.%' - AND n.file NOT LIKE '%.spec.%' - AND n.file NOT LIKE '%__test__%' - AND n.file NOT LIKE '%__tests__%' - AND n.file NOT LIKE '%.stories.%'` - : ''; + const testFilter = testFilterSQL('n.file', noTests); const hotspotRows = db .prepare(` SELECT n.file, diff --git a/src/structure.js b/src/structure.js index f83445bd..fc085601 100644 --- a/src/structure.js +++ b/src/structure.js @@ -1,6 +1,6 @@ import path from 'node:path'; import { normalizePath } from './constants.js'; -import { openReadonlyOrFail } from './db.js'; +import { openReadonlyOrFail, testFilterSQL } from './db.js'; import { debug } from './logger.js'; import { paginateResult } from './paginate.js'; import { isTestFile } from './queries.js'; @@ -536,14 +536,7 @@ export function hotspotsData(customDbPath, opts = {}) { const kind = level === 'directory' ? 'directory' : 'file'; - const testFilter = - noTests && kind === 'file' - ? `AND n.name NOT LIKE '%.test.%' - AND n.name NOT LIKE '%.spec.%' - AND n.name NOT LIKE '%__test__%' - AND n.name NOT LIKE '%__tests__%' - AND n.name NOT LIKE '%.stories.%'` - : ''; + const testFilter = testFilterSQL('n.name', noTests && kind === 'file'); const HOTSPOT_QUERIES = { 'fan-in': db.prepare(` diff --git a/src/triage.js b/src/triage.js index 193f9493..05ae67e4 100644 --- a/src/triage.js +++ b/src/triage.js @@ -1,4 +1,4 @@ -import { openReadonlyOrFail } from './db.js'; +import { findNodesForTriage, openReadonlyOrFail } from './db.js'; import { warn } from './logger.js'; import { paginateResult, printNdjson } from './paginate.js'; import { isTestFile } from './queries.js'; @@ -54,50 +54,14 @@ export function triageData(customDbPath, opts = {}) { const sort = opts.sort || 'risk'; const weights = { ...DEFAULT_WEIGHTS, ...(opts.weights || {}) }; - // Build WHERE clause - let where = "WHERE n.kind IN ('function','method','class')"; - const params = []; - - if (noTests) { - where += ` AND n.file NOT LIKE '%.test.%' - AND n.file NOT LIKE '%.spec.%' - AND n.file NOT LIKE '%__test__%' - AND n.file NOT LIKE '%__tests__%' - AND n.file NOT LIKE '%.stories.%'`; - } - if (fileFilter) { - where += ' AND n.file LIKE ?'; - params.push(`%${fileFilter}%`); - } - if (kindFilter) { - where += ' AND n.kind = ?'; - params.push(kindFilter); - } - if (roleFilter) { - where += ' AND n.role = ?'; - params.push(roleFilter); - } - let rows; try { - rows = db - .prepare( - `SELECT n.id, n.name, n.kind, n.file, n.line, n.end_line, n.role, - COALESCE(fi.cnt, 0) AS fan_in, - COALESCE(fc.cognitive, 0) AS cognitive, - COALESCE(fc.maintainability_index, 0) AS mi, - COALESCE(fc.cyclomatic, 0) AS cyclomatic, - COALESCE(fc.max_nesting, 0) AS max_nesting, - COALESCE(fcc.commit_count, 0) AS churn - FROM nodes n - LEFT JOIN (SELECT target_id, COUNT(*) AS cnt FROM edges WHERE kind='calls' GROUP BY target_id) fi - ON n.id = fi.target_id - LEFT JOIN function_complexity fc ON fc.node_id = n.id - LEFT JOIN file_commit_counts fcc ON n.file = fcc.file - ${where} - ORDER BY n.file, n.line`, - ) - .all(...params); + rows = findNodesForTriage(db, { + noTests, + file: fileFilter, + kind: kindFilter, + role: roleFilter, + }); } catch (err) { warn(`triage query failed: ${err.message}`); db.close(); From 472c385f4232d9e87bac4f97fa2a62e1a9295778 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 8 Mar 2026 19:38:46 -0600 Subject: [PATCH 04/10] =?UTF-8?q?fix:=20address=20review=20=E2=80=94=20SQL?= =?UTF-8?q?=20injection=20guard,=20triage=20kind=20filter,=20dedup=20node?= =?UTF-8?q?=20query?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: 8 functions changed, 6 affected --- src/db/query-builder.js | 22 ++++++++++++++++++++++ src/db/repository.js | 41 +++++++++++++++++++++++------------------ 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/src/db/query-builder.js b/src/db/query-builder.js index eaef2b97..cc9e1f3f 100644 --- a/src/db/query-builder.js +++ b/src/db/query-builder.js @@ -1,3 +1,21 @@ +import { EVERY_EDGE_KIND } from '../queries.js'; + +// ─── Validation Helpers ───────────────────────────────────────────── + +const SAFE_ALIAS_RE = /^[a-z_][a-z0-9_]*$/i; + +function validateAlias(alias) { + if (!SAFE_ALIAS_RE.test(alias)) { + throw new Error(`Invalid SQL alias: ${alias}`); + } +} + +function validateEdgeKind(edgeKind) { + if (!EVERY_EDGE_KIND.includes(edgeKind)) { + throw new Error(`Invalid edge kind: ${edgeKind} (expected one of ${EVERY_EDGE_KIND.join(', ')})`); + } +} + // ─── Standalone Helpers ────────────────────────────────────────────── /** @@ -33,6 +51,8 @@ export function kindInClause(kinds) { * @param {string} [alias='fi'] - Subquery alias */ export function fanInJoinSQL(edgeKind = 'calls', alias = 'fi') { + validateEdgeKind(edgeKind); + validateAlias(alias); return `LEFT JOIN ( SELECT target_id, COUNT(*) AS cnt FROM edges WHERE kind = '${edgeKind}' GROUP BY target_id ) ${alias} ON ${alias}.target_id = n.id`; @@ -44,6 +64,8 @@ export function fanInJoinSQL(edgeKind = 'calls', alias = 'fi') { * @param {string} [alias='fo'] - Subquery alias */ export function fanOutJoinSQL(edgeKind = 'calls', alias = 'fo') { + validateEdgeKind(edgeKind); + validateAlias(alias); return `LEFT JOIN ( SELECT source_id, COUNT(*) AS cnt FROM edges WHERE kind = '${edgeKind}' GROUP BY source_id ) ${alias} ON ${alias}.source_id = n.id`; diff --git a/src/db/repository.js b/src/db/repository.js index 6b656744..0cc0499c 100644 --- a/src/db/repository.js +++ b/src/db/repository.js @@ -1,4 +1,5 @@ import { NodeQuery } from './query-builder.js'; +import { EVERY_SYMBOL_KIND } from '../queries.js'; /** * Find nodes matching a name pattern, with fan-in count. @@ -36,6 +37,11 @@ export function findNodesWithFanIn(db, namePattern, opts = {}) { * @returns {object[]} */ export function findNodesForTriage(db, opts = {}) { + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + throw new Error(`Invalid kind: ${opts.kind} (expected one of ${EVERY_SYMBOL_KIND.join(', ')})`); + } + + const kindsToUse = opts.kind ? [opts.kind] : ['function', 'method', 'class']; const q = new NodeQuery() .select( `n.id, n.name, n.kind, n.file, n.line, n.end_line, n.role, @@ -46,13 +52,12 @@ export function findNodesForTriage(db, opts = {}) { COALESCE(fc.max_nesting, 0) AS max_nesting, COALESCE(fcc.commit_count, 0) AS churn`, ) - .kinds(['function', 'method', 'class']) + .kinds(kindsToUse) .withFanIn() .withComplexity() .withChurn() .excludeTests(opts.noTests) .fileFilter(opts.file) - .kindFilter(opts.kind) .roleFilter(opts.role) .orderBy('n.file, n.line'); @@ -60,22 +65,29 @@ export function findNodesForTriage(db, opts = {}) { } /** - * List function/method/class nodes with basic info. - * Used by listFunctionsData in queries.js. - * - * @param {object} db + * Shared query builder for function/method/class node listing. * @param {object} [opts] - * @returns {object[]} + * @returns {NodeQuery} */ -export function listFunctionNodes(db, opts = {}) { - const q = new NodeQuery() +function _functionNodeQuery(opts = {}) { + return new NodeQuery() .select('name, kind, file, line, end_line, role') .kinds(['function', 'method', 'class']) .fileFilter(opts.file) .nameLike(opts.pattern) .orderBy('file, line'); +} - return q.all(db); +/** + * List function/method/class nodes with basic info. + * Used by listFunctionsData in queries.js. + * + * @param {object} db + * @param {object} [opts] + * @returns {object[]} + */ +export function listFunctionNodes(db, opts = {}) { + return _functionNodeQuery(opts).all(db); } /** @@ -87,14 +99,7 @@ export function listFunctionNodes(db, opts = {}) { * @returns {IterableIterator} */ export function iterateFunctionNodes(db, opts = {}) { - const q = new NodeQuery() - .select('name, kind, file, line, end_line, role') - .kinds(['function', 'method', 'class']) - .fileFilter(opts.file) - .nameLike(opts.pattern) - .orderBy('file, line'); - - return q.iterate(db); + return _functionNodeQuery(opts).iterate(db); } /** From b80a3bcdc4faa9ad345020d3a13d25d9b26b75e0 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 8 Mar 2026 19:39:09 -0600 Subject: [PATCH 05/10] style: fix formatting and import order for lint Impact: 1 functions changed, 4 affected --- src/db/query-builder.js | 4 +++- src/db/repository.js | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/db/query-builder.js b/src/db/query-builder.js index cc9e1f3f..203f927d 100644 --- a/src/db/query-builder.js +++ b/src/db/query-builder.js @@ -12,7 +12,9 @@ function validateAlias(alias) { function validateEdgeKind(edgeKind) { if (!EVERY_EDGE_KIND.includes(edgeKind)) { - throw new Error(`Invalid edge kind: ${edgeKind} (expected one of ${EVERY_EDGE_KIND.join(', ')})`); + throw new Error( + `Invalid edge kind: ${edgeKind} (expected one of ${EVERY_EDGE_KIND.join(', ')})`, + ); } } diff --git a/src/db/repository.js b/src/db/repository.js index 0cc0499c..2ea1e0d7 100644 --- a/src/db/repository.js +++ b/src/db/repository.js @@ -1,5 +1,5 @@ -import { NodeQuery } from './query-builder.js'; import { EVERY_SYMBOL_KIND } from '../queries.js'; +import { NodeQuery } from './query-builder.js'; /** * Find nodes matching a name pattern, with fan-in count. From f744150ae2bc0bbaf468afc819b2b42d7921cd98 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 8 Mar 2026 19:51:11 -0600 Subject: [PATCH 06/10] fix: validate column parameter in testFilterSQL to prevent SQL injection Impact: 2 functions changed, 1 affected --- src/db/query-builder.js | 8 ++++++++ tests/unit/query-builder.test.js | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/src/db/query-builder.js b/src/db/query-builder.js index 203f927d..d12368ba 100644 --- a/src/db/query-builder.js +++ b/src/db/query-builder.js @@ -3,6 +3,7 @@ import { EVERY_EDGE_KIND } from '../queries.js'; // ─── Validation Helpers ───────────────────────────────────────────── const SAFE_ALIAS_RE = /^[a-z_][a-z0-9_]*$/i; +const SAFE_COLUMN_RE = /^[a-z_][a-z0-9_]*(?:\.[a-z_][a-z0-9_]*)?$/i; function validateAlias(alias) { if (!SAFE_ALIAS_RE.test(alias)) { @@ -10,6 +11,12 @@ function validateAlias(alias) { } } +function validateColumn(column) { + if (!SAFE_COLUMN_RE.test(column)) { + throw new Error(`Invalid SQL column: ${column}`); + } +} + function validateEdgeKind(edgeKind) { if (!EVERY_EDGE_KIND.includes(edgeKind)) { throw new Error( @@ -28,6 +35,7 @@ function validateEdgeKind(edgeKind) { */ export function testFilterSQL(column = 'n.file', enabled = true) { if (!enabled) return ''; + validateColumn(column); return `AND ${column} NOT LIKE '%.test.%' AND ${column} NOT LIKE '%.spec.%' AND ${column} NOT LIKE '%__test__%' diff --git a/tests/unit/query-builder.test.js b/tests/unit/query-builder.test.js index ade37b7b..1da9de13 100644 --- a/tests/unit/query-builder.test.js +++ b/tests/unit/query-builder.test.js @@ -30,6 +30,11 @@ describe('testFilterSQL', () => { it('returns empty string when disabled', () => { expect(testFilterSQL('n.file', false)).toBe(''); }); + + it('rejects malicious column names', () => { + expect(() => testFilterSQL("1; DROP TABLE nodes --")).toThrow('Invalid SQL column'); + expect(() => testFilterSQL("n.file OR 1=1 --")).toThrow('Invalid SQL column'); + }); }); // ─── kindInClause ──────────────────────────────────────────────────── From 9001ae342dc16d2bb584caa156497a7f2c3db981 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 8 Mar 2026 19:51:24 -0600 Subject: [PATCH 07/10] style: fix quote style in query-builder test --- tests/unit/query-builder.test.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/query-builder.test.js b/tests/unit/query-builder.test.js index 1da9de13..5c24140b 100644 --- a/tests/unit/query-builder.test.js +++ b/tests/unit/query-builder.test.js @@ -32,8 +32,8 @@ describe('testFilterSQL', () => { }); it('rejects malicious column names', () => { - expect(() => testFilterSQL("1; DROP TABLE nodes --")).toThrow('Invalid SQL column'); - expect(() => testFilterSQL("n.file OR 1=1 --")).toThrow('Invalid SQL column'); + expect(() => testFilterSQL('1; DROP TABLE nodes --')).toThrow('Invalid SQL column'); + expect(() => testFilterSQL('n.file OR 1=1 --')).toThrow('Invalid SQL column'); }); }); From 131cde6ed2d2b08b3e0df8c19515050cbfca0510 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 8 Mar 2026 20:15:02 -0600 Subject: [PATCH 08/10] fix: break circular dep, validate orderBy/select in NodeQuery Extract kind/edge constants to src/kinds.js so db/ modules no longer import from queries.js (which itself imports from db.js), breaking the circular module dependency. Add input validation to NodeQuery.orderBy() and .select() methods to prevent SQL injection through unvalidated string interpolation. Impact: 6 functions changed, 3 affected --- src/db/query-builder.js | 43 ++++++++++++++++++++++- src/db/repository.js | 2 +- src/kinds.js | 49 ++++++++++++++++++++++++++ src/queries.js | 60 +++++++------------------------- tests/unit/query-builder.test.js | 28 +++++++++++++++ 5 files changed, 132 insertions(+), 50 deletions(-) create mode 100644 src/kinds.js diff --git a/src/db/query-builder.js b/src/db/query-builder.js index d12368ba..321643cf 100644 --- a/src/db/query-builder.js +++ b/src/db/query-builder.js @@ -1,9 +1,14 @@ -import { EVERY_EDGE_KIND } from '../queries.js'; +import { EVERY_EDGE_KIND } from '../kinds.js'; // ─── Validation Helpers ───────────────────────────────────────────── const SAFE_ALIAS_RE = /^[a-z_][a-z0-9_]*$/i; const SAFE_COLUMN_RE = /^[a-z_][a-z0-9_]*(?:\.[a-z_][a-z0-9_]*)?$/i; +// Matches: column, table.column, column ASC, table.column DESC +const SAFE_ORDER_TERM_RE = /^[a-z_][a-z0-9_]*(?:\.[a-z_][a-z0-9_]*)?\s*(?:asc|desc)?$/i; +// Matches safe SELECT expressions: column refs, *, table.*, COALESCE(...) AS alias +const SAFE_SELECT_TOKEN_RE = + /^(?:[a-z_][a-z0-9_]*(?:\.[a-z_*][a-z0-9_]*)?\s*(?:as\s+[a-z_][a-z0-9_]*)?|[a-z_]+\([^)]*\)\s*(?:as\s+[a-z_][a-z0-9_]*)?)$/i; function validateAlias(alias) { if (!SAFE_ALIAS_RE.test(alias)) { @@ -17,6 +22,40 @@ function validateColumn(column) { } } +function validateOrderBy(clause) { + const terms = clause.split(',').map((t) => t.trim()); + for (const term of terms) { + if (!SAFE_ORDER_TERM_RE.test(term)) { + throw new Error(`Invalid ORDER BY term: ${term}`); + } + } +} + +function splitTopLevelCommas(str) { + const parts = []; + let depth = 0; + let start = 0; + for (let i = 0; i < str.length; i++) { + if (str[i] === '(') depth++; + else if (str[i] === ')') depth--; + else if (str[i] === ',' && depth === 0) { + parts.push(str.slice(start, i).trim()); + start = i + 1; + } + } + parts.push(str.slice(start).trim()); + return parts; +} + +function validateSelectCols(cols) { + const tokens = splitTopLevelCommas(cols); + for (const token of tokens) { + if (!SAFE_SELECT_TOKEN_RE.test(token)) { + throw new Error(`Invalid SELECT expression: ${token}`); + } + } +} + function validateEdgeKind(edgeKind) { if (!EVERY_EDGE_KIND.includes(edgeKind)) { throw new Error( @@ -97,6 +136,7 @@ export class NodeQuery { /** Set SELECT columns (default: `n.*`). */ select(cols) { + validateSelectCols(cols); this.#selectCols = cols; return this; } @@ -194,6 +234,7 @@ export class NodeQuery { /** ORDER BY clause. */ orderBy(clause) { + validateOrderBy(clause); this.#orderByClause = clause; return this; } diff --git a/src/db/repository.js b/src/db/repository.js index 2ea1e0d7..e752e8ee 100644 --- a/src/db/repository.js +++ b/src/db/repository.js @@ -1,4 +1,4 @@ -import { EVERY_SYMBOL_KIND } from '../queries.js'; +import { EVERY_SYMBOL_KIND } from '../kinds.js'; import { NodeQuery } from './query-builder.js'; /** diff --git a/src/kinds.js b/src/kinds.js new file mode 100644 index 00000000..60d363fc --- /dev/null +++ b/src/kinds.js @@ -0,0 +1,49 @@ +// ── Symbol kind constants ─────────────────────────────────────────── +// Original 10 kinds — used as default query scope +export const CORE_SYMBOL_KINDS = [ + 'function', + 'method', + 'class', + 'interface', + 'type', + 'struct', + 'enum', + 'trait', + 'record', + 'module', +]; + +// Sub-declaration kinds (Phase 1) +export const EXTENDED_SYMBOL_KINDS = [ + 'parameter', + 'property', + 'constant', + // Phase 2 (reserved, not yet extracted): + // 'constructor', 'namespace', 'decorator', 'getter', 'setter', +]; + +// Full set for --kind validation and MCP enum +export const EVERY_SYMBOL_KIND = [...CORE_SYMBOL_KINDS, ...EXTENDED_SYMBOL_KINDS]; + +// Backward compat: ALL_SYMBOL_KINDS stays as the core 10 +export const ALL_SYMBOL_KINDS = CORE_SYMBOL_KINDS; + +// ── Edge kind constants ───────────────────────────────────────────── +// Core edge kinds — coupling and dependency relationships +export const CORE_EDGE_KINDS = [ + 'imports', + 'imports-type', + 'reexports', + 'calls', + 'extends', + 'implements', + 'contains', +]; + +// Structural edge kinds — parent/child and type relationships +export const STRUCTURAL_EDGE_KINDS = ['parameter_of', 'receiver']; + +// Full set for MCP enum and validation +export const EVERY_EDGE_KIND = [...CORE_EDGE_KINDS, ...STRUCTURAL_EDGE_KINDS]; + +export const VALID_ROLES = ['entry', 'core', 'utility', 'adapter', 'dead', 'leaf']; diff --git a/src/queries.js b/src/queries.js index 7d4aa03d..2a68b55c 100644 --- a/src/queries.js +++ b/src/queries.js @@ -13,6 +13,7 @@ import { openReadonlyOrFail, testFilterSQL, } from './db.js'; +import { ALL_SYMBOL_KINDS } from './kinds.js'; import { debug } from './logger.js'; import { ownersForFiles } from './owners.js'; import { paginateResult, printNdjson } from './paginate.js'; @@ -67,54 +68,17 @@ export const FALSE_POSITIVE_CALLER_THRESHOLD = 20; const FUNCTION_KINDS = ['function', 'method', 'class']; -// Original 10 kinds — used as default query scope -export const CORE_SYMBOL_KINDS = [ - 'function', - 'method', - 'class', - 'interface', - 'type', - 'struct', - 'enum', - 'trait', - 'record', - 'module', -]; - -// Sub-declaration kinds (Phase 1) -export const EXTENDED_SYMBOL_KINDS = [ - 'parameter', - 'property', - 'constant', - // Phase 2 (reserved, not yet extracted): - // 'constructor', 'namespace', 'decorator', 'getter', 'setter', -]; - -// Full set for --kind validation and MCP enum -export const EVERY_SYMBOL_KIND = [...CORE_SYMBOL_KINDS, ...EXTENDED_SYMBOL_KINDS]; - -// Backward compat: ALL_SYMBOL_KINDS stays as the core 10 -export const ALL_SYMBOL_KINDS = CORE_SYMBOL_KINDS; - -// ── Edge kind constants ───────────────────────────────────────────── -// Core edge kinds — coupling and dependency relationships -export const CORE_EDGE_KINDS = [ - 'imports', - 'imports-type', - 'reexports', - 'calls', - 'extends', - 'implements', - 'contains', -]; - -// Structural edge kinds — parent/child and type relationships -export const STRUCTURAL_EDGE_KINDS = ['parameter_of', 'receiver']; - -// Full set for MCP enum and validation -export const EVERY_EDGE_KIND = [...CORE_EDGE_KINDS, ...STRUCTURAL_EDGE_KINDS]; - -export const VALID_ROLES = ['entry', 'core', 'utility', 'adapter', 'dead', 'leaf']; +// Re-export kind/edge constants from kinds.js (canonical source) +export { + ALL_SYMBOL_KINDS, + CORE_EDGE_KINDS, + CORE_SYMBOL_KINDS, + EVERY_EDGE_KIND, + EVERY_SYMBOL_KIND, + EXTENDED_SYMBOL_KINDS, + STRUCTURAL_EDGE_KINDS, + VALID_ROLES, +} from './kinds.js'; /** * Get all ancestor class names for a given class using extends edges. diff --git a/tests/unit/query-builder.test.js b/tests/unit/query-builder.test.js index 5c24140b..7edbe320 100644 --- a/tests/unit/query-builder.test.js +++ b/tests/unit/query-builder.test.js @@ -208,6 +208,34 @@ describe('NodeQuery', () => { expect(sql).toContain('ORDER BY n.file, n.line'); }); + it('.orderBy() accepts ASC/DESC modifiers', () => { + const { sql } = new NodeQuery().orderBy('n.file ASC, n.line DESC').build(); + expect(sql).toContain('ORDER BY n.file ASC, n.line DESC'); + }); + + it('.orderBy() rejects SQL injection', () => { + expect(() => new NodeQuery().orderBy('n.file; DROP TABLE nodes --')).toThrow( + 'Invalid ORDER BY term', + ); + expect(() => new NodeQuery().orderBy('1=1 --')).toThrow('Invalid ORDER BY term'); + }); + + it('.select() rejects SQL injection', () => { + expect(() => new NodeQuery().select('*; DROP TABLE nodes --')).toThrow( + 'Invalid SELECT expression', + ); + expect(() => new NodeQuery().select('1 UNION SELECT * FROM edges')).toThrow( + 'Invalid SELECT expression', + ); + }); + + it('.select() accepts COALESCE expressions', () => { + const { sql } = new NodeQuery() + .select('n.name, COALESCE(fi.cnt, 0) AS fan_in') + .build(); + expect(sql).toContain('SELECT n.name, COALESCE(fi.cnt, 0) AS fan_in'); + }); + it('.limit() adds LIMIT param', () => { const { sql, params } = new NodeQuery().limit(10).build(); expect(sql).toContain('LIMIT ?'); From 633a2a21e75e0f5b12eaafc0831dbba9ea3a3224 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 8 Mar 2026 20:15:22 -0600 Subject: [PATCH 09/10] style: fix formatting in query-builder test --- tests/unit/query-builder.test.js | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/unit/query-builder.test.js b/tests/unit/query-builder.test.js index 7edbe320..f98f0f0d 100644 --- a/tests/unit/query-builder.test.js +++ b/tests/unit/query-builder.test.js @@ -230,9 +230,7 @@ describe('NodeQuery', () => { }); it('.select() accepts COALESCE expressions', () => { - const { sql } = new NodeQuery() - .select('n.name, COALESCE(fi.cnt, 0) AS fan_in') - .build(); + const { sql } = new NodeQuery().select('n.name, COALESCE(fi.cnt, 0) AS fan_in').build(); expect(sql).toContain('SELECT n.name, COALESCE(fi.cnt, 0) AS fan_in'); }); From 9c9ab51c18384d05e170c1834450bb71a3c1827a Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 8 Mar 2026 20:28:03 -0600 Subject: [PATCH 10/10] =?UTF-8?q?fix:=20address=20round-5=20review=20?= =?UTF-8?q?=E2=80=94=20private=20join(),=20role=20validation,=20noTests=20?= =?UTF-8?q?in=20=5FfunctionNodeQuery?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename NodeQuery.join() to _join() to remove raw SQL injection from public API - Add opts.role validation against VALID_ROLES in findNodesForTriage (mirrors opts.kind) - Add excludeTests(opts.noTests) to _functionNodeQuery so listFunctionNodes/iterateFunctionNodes filter test files at the SQL layer instead of post-fetch - Tests added for all three changes Impact: 8 functions changed, 2 affected --- src/db/query-builder.js | 16 ++++++---------- src/db/repository.js | 6 +++++- tests/unit/query-builder.test.js | 9 +++++++-- tests/unit/repository.test.js | 16 ++++++++++++++++ 4 files changed, 34 insertions(+), 13 deletions(-) diff --git a/src/db/query-builder.js b/src/db/query-builder.js index 321643cf..29b87686 100644 --- a/src/db/query-builder.js +++ b/src/db/query-builder.js @@ -204,30 +204,26 @@ export class NodeQuery { /** Add fan-in LEFT JOIN subquery. */ withFanIn(edgeKind = 'calls') { - this.#joins.push(fanInJoinSQL(edgeKind)); - return this; + return this._join(fanInJoinSQL(edgeKind)); } /** Add fan-out LEFT JOIN subquery. */ withFanOut(edgeKind = 'calls') { - this.#joins.push(fanOutJoinSQL(edgeKind)); - return this; + return this._join(fanOutJoinSQL(edgeKind)); } /** LEFT JOIN function_complexity. */ withComplexity() { - this.#joins.push('LEFT JOIN function_complexity fc ON fc.node_id = n.id'); - return this; + return this._join('LEFT JOIN function_complexity fc ON fc.node_id = n.id'); } /** LEFT JOIN file_commit_counts. */ withChurn() { - this.#joins.push('LEFT JOIN file_commit_counts fcc ON n.file = fcc.file'); - return this; + return this._join('LEFT JOIN file_commit_counts fcc ON n.file = fcc.file'); } - /** Raw JOIN escape hatch. */ - join(sql) { + /** @private Raw JOIN — internal use only; external callers should use withFanIn/withFanOut/withComplexity/withChurn. */ + _join(sql) { this.#joins.push(sql); return this; } diff --git a/src/db/repository.js b/src/db/repository.js index e752e8ee..d63edaf4 100644 --- a/src/db/repository.js +++ b/src/db/repository.js @@ -1,4 +1,4 @@ -import { EVERY_SYMBOL_KIND } from '../kinds.js'; +import { EVERY_SYMBOL_KIND, VALID_ROLES } from '../kinds.js'; import { NodeQuery } from './query-builder.js'; /** @@ -40,6 +40,9 @@ export function findNodesForTriage(db, opts = {}) { if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { throw new Error(`Invalid kind: ${opts.kind} (expected one of ${EVERY_SYMBOL_KIND.join(', ')})`); } + if (opts.role && !VALID_ROLES.includes(opts.role)) { + throw new Error(`Invalid role: ${opts.role} (expected one of ${VALID_ROLES.join(', ')})`); + } const kindsToUse = opts.kind ? [opts.kind] : ['function', 'method', 'class']; const q = new NodeQuery() @@ -75,6 +78,7 @@ function _functionNodeQuery(opts = {}) { .kinds(['function', 'method', 'class']) .fileFilter(opts.file) .nameLike(opts.pattern) + .excludeTests(opts.noTests) .orderBy('file, line'); } diff --git a/tests/unit/query-builder.test.js b/tests/unit/query-builder.test.js index f98f0f0d..e53c5e70 100644 --- a/tests/unit/query-builder.test.js +++ b/tests/unit/query-builder.test.js @@ -198,11 +198,16 @@ describe('NodeQuery', () => { expect(sql).toContain('file_commit_counts'); }); - it('.join() adds raw join', () => { - const { sql } = new NodeQuery().join('JOIN node_metrics nm ON n.id = nm.node_id').build(); + it('._join() adds raw join (internal API)', () => { + const { sql } = new NodeQuery()._join('JOIN node_metrics nm ON n.id = nm.node_id').build(); expect(sql).toContain('JOIN node_metrics nm ON n.id = nm.node_id'); }); + it('does not expose a public .join() method', () => { + const q = new NodeQuery(); + expect(typeof q.join).toBe('undefined'); + }); + it('.orderBy() adds ORDER BY', () => { const { sql } = new NodeQuery().orderBy('n.file, n.line').build(); expect(sql).toContain('ORDER BY n.file, n.line'); diff --git a/tests/unit/repository.test.js b/tests/unit/repository.test.js index 9adda662..aed2fabd 100644 --- a/tests/unit/repository.test.js +++ b/tests/unit/repository.test.js @@ -102,6 +102,10 @@ describe('repository', () => { expect(rows.length).toBe(1); expect(rows[0].name).toBe('bar'); }); + + it('throws on invalid role', () => { + expect(() => findNodesForTriage(db, { role: 'supervisor' })).toThrow('Invalid role'); + }); }); describe('listFunctionNodes', () => { @@ -122,6 +126,12 @@ describe('repository', () => { expect(rows[0].name).toBe('Baz'); }); + it('excludes test files when noTests is set', () => { + const rows = listFunctionNodes(db, { noTests: true }); + expect(rows.every((r) => !r.file.includes('.test.'))).toBe(true); + expect(rows.length).toBe(3); // foo, bar, Baz — excludes testFn + }); + it('orders by file, line', () => { const rows = listFunctionNodes(db); for (let i = 1; i < rows.length; i++) { @@ -145,6 +155,12 @@ describe('repository', () => { expect(rows.length).toBeGreaterThan(0); expect(rows.every((r) => r.file.includes('foo'))).toBe(true); }); + + it('excludes test files when noTests is set', () => { + const rows = [...iterateFunctionNodes(db, { noTests: true })]; + expect(rows.every((r) => !r.file.includes('.test.'))).toBe(true); + expect(rows.length).toBe(3); + }); }); describe('countNodes / countEdges / countFiles', () => {