diff --git a/src/db.js b/src/db.js index 31c0a428..ab5e7950 100644 --- a/src/db.js +++ b/src/db.js @@ -1,399 +1,19 @@ -import fs from 'node:fs'; -import path from 'node:path'; -import Database from 'better-sqlite3'; -import { debug, warn } from './logger.js'; - -// ─── Schema Migrations ───────────────────────────────────────────────── -export const MIGRATIONS = [ - { - version: 1, - up: ` - CREATE TABLE IF NOT EXISTS nodes ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - name TEXT NOT NULL, - kind TEXT NOT NULL, - file TEXT NOT NULL, - line INTEGER, - end_line INTEGER, - UNIQUE(name, kind, file, line) - ); - CREATE TABLE IF NOT EXISTS edges ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - source_id INTEGER NOT NULL, - target_id INTEGER NOT NULL, - kind TEXT NOT NULL, - confidence REAL DEFAULT 1.0, - dynamic INTEGER DEFAULT 0, - FOREIGN KEY(source_id) REFERENCES nodes(id), - FOREIGN KEY(target_id) REFERENCES nodes(id) - ); - CREATE INDEX IF NOT EXISTS idx_nodes_name ON nodes(name); - CREATE INDEX IF NOT EXISTS idx_nodes_file ON nodes(file); - CREATE INDEX IF NOT EXISTS idx_nodes_kind ON nodes(kind); - CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_id); - CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_id); - CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind); - CREATE TABLE IF NOT EXISTS node_metrics ( - node_id INTEGER PRIMARY KEY, - line_count INTEGER, - symbol_count INTEGER, - import_count INTEGER, - export_count INTEGER, - fan_in INTEGER, - fan_out INTEGER, - cohesion REAL, - file_count INTEGER, - FOREIGN KEY(node_id) REFERENCES nodes(id) - ); - CREATE INDEX IF NOT EXISTS idx_node_metrics_node ON node_metrics(node_id); - `, - }, - { - version: 2, - up: ` - CREATE INDEX IF NOT EXISTS idx_nodes_name_kind_file ON nodes(name, kind, file); - CREATE INDEX IF NOT EXISTS idx_nodes_file_kind ON nodes(file, kind); - CREATE INDEX IF NOT EXISTS idx_edges_source_kind ON edges(source_id, kind); - CREATE INDEX IF NOT EXISTS idx_edges_target_kind ON edges(target_id, kind); - `, - }, - { - version: 3, - up: ` - CREATE TABLE IF NOT EXISTS file_hashes ( - file TEXT PRIMARY KEY, - hash TEXT NOT NULL, - mtime INTEGER NOT NULL - ); - `, - }, - { - version: 4, - up: `ALTER TABLE file_hashes ADD COLUMN size INTEGER DEFAULT 0;`, - }, - { - version: 5, - up: ` - CREATE TABLE IF NOT EXISTS co_changes ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - file_a TEXT NOT NULL, - file_b TEXT NOT NULL, - commit_count INTEGER NOT NULL, - jaccard REAL NOT NULL, - last_commit_epoch INTEGER, - UNIQUE(file_a, file_b) - ); - CREATE INDEX IF NOT EXISTS idx_co_changes_file_a ON co_changes(file_a); - CREATE INDEX IF NOT EXISTS idx_co_changes_file_b ON co_changes(file_b); - CREATE INDEX IF NOT EXISTS idx_co_changes_jaccard ON co_changes(jaccard DESC); - CREATE TABLE IF NOT EXISTS co_change_meta ( - key TEXT PRIMARY KEY, - value TEXT NOT NULL - ); - `, - }, - { - version: 6, - up: ` - CREATE TABLE IF NOT EXISTS file_commit_counts ( - file TEXT PRIMARY KEY, - commit_count INTEGER NOT NULL DEFAULT 0 - ); - `, - }, - { - version: 7, - up: ` - CREATE TABLE IF NOT EXISTS build_meta ( - key TEXT PRIMARY KEY, - value TEXT NOT NULL - ); - `, - }, - { - version: 8, - up: ` - CREATE TABLE IF NOT EXISTS function_complexity ( - node_id INTEGER PRIMARY KEY, - cognitive INTEGER NOT NULL, - cyclomatic INTEGER NOT NULL, - max_nesting INTEGER NOT NULL, - FOREIGN KEY(node_id) REFERENCES nodes(id) - ); - CREATE INDEX IF NOT EXISTS idx_fc_cognitive ON function_complexity(cognitive DESC); - CREATE INDEX IF NOT EXISTS idx_fc_cyclomatic ON function_complexity(cyclomatic DESC); - `, - }, - { - version: 9, - up: ` - ALTER TABLE function_complexity ADD COLUMN loc INTEGER DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN sloc INTEGER DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN comment_lines INTEGER DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN halstead_n1 INTEGER DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN halstead_n2 INTEGER DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN halstead_big_n1 INTEGER DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN halstead_big_n2 INTEGER DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN halstead_vocabulary INTEGER DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN halstead_length INTEGER DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN halstead_volume REAL DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN halstead_difficulty REAL DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN halstead_effort REAL DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN halstead_bugs REAL DEFAULT 0; - ALTER TABLE function_complexity ADD COLUMN maintainability_index REAL DEFAULT 0; - CREATE INDEX IF NOT EXISTS idx_fc_mi ON function_complexity(maintainability_index ASC); - `, - }, - { - version: 10, - up: ` - CREATE TABLE IF NOT EXISTS dataflow ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - source_id INTEGER NOT NULL, - target_id INTEGER NOT NULL, - kind TEXT NOT NULL, - param_index INTEGER, - expression TEXT, - line INTEGER, - confidence REAL DEFAULT 1.0, - FOREIGN KEY(source_id) REFERENCES nodes(id), - FOREIGN KEY(target_id) REFERENCES nodes(id) - ); - CREATE INDEX IF NOT EXISTS idx_dataflow_source ON dataflow(source_id); - CREATE INDEX IF NOT EXISTS idx_dataflow_target ON dataflow(target_id); - CREATE INDEX IF NOT EXISTS idx_dataflow_kind ON dataflow(kind); - CREATE INDEX IF NOT EXISTS idx_dataflow_source_kind ON dataflow(source_id, kind); - `, - }, - { - version: 11, - up: ` - ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id); - CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id); - CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id); - `, - }, - { - version: 12, - up: ` - CREATE TABLE IF NOT EXISTS cfg_blocks ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - function_node_id INTEGER NOT NULL, - block_index INTEGER NOT NULL, - block_type TEXT NOT NULL, - start_line INTEGER, - end_line INTEGER, - label TEXT, - FOREIGN KEY(function_node_id) REFERENCES nodes(id), - UNIQUE(function_node_id, block_index) - ); - CREATE INDEX IF NOT EXISTS idx_cfg_blocks_fn ON cfg_blocks(function_node_id); - - CREATE TABLE IF NOT EXISTS cfg_edges ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - function_node_id INTEGER NOT NULL, - source_block_id INTEGER NOT NULL, - target_block_id INTEGER NOT NULL, - kind TEXT NOT NULL, - FOREIGN KEY(function_node_id) REFERENCES nodes(id), - FOREIGN KEY(source_block_id) REFERENCES cfg_blocks(id), - FOREIGN KEY(target_block_id) REFERENCES cfg_blocks(id) - ); - CREATE INDEX IF NOT EXISTS idx_cfg_edges_fn ON cfg_edges(function_node_id); - CREATE INDEX IF NOT EXISTS idx_cfg_edges_src ON cfg_edges(source_block_id); - CREATE INDEX IF NOT EXISTS idx_cfg_edges_tgt ON cfg_edges(target_block_id); - `, - }, - { - version: 13, - up: ` - CREATE TABLE IF NOT EXISTS ast_nodes ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - file TEXT NOT NULL, - line INTEGER NOT NULL, - kind TEXT NOT NULL, - name TEXT NOT NULL, - text TEXT, - receiver TEXT, - parent_node_id INTEGER, - FOREIGN KEY(parent_node_id) REFERENCES nodes(id) - ); - CREATE INDEX IF NOT EXISTS idx_ast_kind ON ast_nodes(kind); - CREATE INDEX IF NOT EXISTS idx_ast_name ON ast_nodes(name); - CREATE INDEX IF NOT EXISTS idx_ast_file ON ast_nodes(file); - CREATE INDEX IF NOT EXISTS idx_ast_parent ON ast_nodes(parent_node_id); - CREATE INDEX IF NOT EXISTS idx_ast_kind_name ON ast_nodes(kind, name); - `, - }, - { - version: 14, - up: ` - ALTER TABLE nodes ADD COLUMN exported INTEGER DEFAULT 0; - CREATE INDEX IF NOT EXISTS idx_nodes_exported ON nodes(exported); - `, - }, -]; - -export function getBuildMeta(db, key) { - try { - const row = db.prepare('SELECT value FROM build_meta WHERE key = ?').get(key); - return row ? row.value : null; - } catch { - return null; - } -} - -export function setBuildMeta(db, entries) { - const upsert = db.prepare('INSERT OR REPLACE INTO build_meta (key, value) VALUES (?, ?)'); - const tx = db.transaction(() => { - for (const [key, value] of Object.entries(entries)) { - upsert.run(key, String(value)); - } - }); - tx(); -} - -export function openDb(dbPath) { - const dir = path.dirname(dbPath); - if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); - acquireAdvisoryLock(dbPath); - const db = new Database(dbPath); - db.pragma('journal_mode = WAL'); - db.pragma('busy_timeout = 5000'); - db.__lockPath = `${dbPath}.lock`; - return db; -} - -export function closeDb(db) { - db.close(); - if (db.__lockPath) releaseAdvisoryLock(db.__lockPath); -} - -function isProcessAlive(pid) { - try { - process.kill(pid, 0); - return true; - } catch { - return false; - } -} - -function acquireAdvisoryLock(dbPath) { - const lockPath = `${dbPath}.lock`; - try { - if (fs.existsSync(lockPath)) { - const content = fs.readFileSync(lockPath, 'utf-8').trim(); - const pid = Number(content); - if (pid && pid !== process.pid && isProcessAlive(pid)) { - warn(`Another process (PID ${pid}) may be using this database. Proceeding with caution.`); - } - } - } catch { - /* ignore read errors */ - } - try { - fs.writeFileSync(lockPath, String(process.pid), 'utf-8'); - } catch { - /* best-effort */ - } -} - -function releaseAdvisoryLock(lockPath) { - try { - const content = fs.readFileSync(lockPath, 'utf-8').trim(); - if (Number(content) === process.pid) { - fs.unlinkSync(lockPath); - } - } catch { - /* ignore */ - } -} - -export function initSchema(db) { - db.exec(`CREATE TABLE IF NOT EXISTS schema_version (version INTEGER NOT NULL DEFAULT 0)`); - - const row = db.prepare('SELECT version FROM schema_version').get(); - let currentVersion = row ? row.version : 0; - - if (!row) { - db.prepare('INSERT INTO schema_version (version) VALUES (0)').run(); - } - - for (const migration of MIGRATIONS) { - if (migration.version > currentVersion) { - debug(`Running migration v${migration.version}`); - db.exec(migration.up); - db.prepare('UPDATE schema_version SET version = ?').run(migration.version); - currentVersion = migration.version; - } - } - - try { - db.exec('ALTER TABLE nodes ADD COLUMN end_line INTEGER'); - } catch { - /* already exists */ - } - try { - db.exec('ALTER TABLE edges ADD COLUMN confidence REAL DEFAULT 1.0'); - } catch { - /* already exists */ - } - try { - db.exec('ALTER TABLE edges ADD COLUMN dynamic INTEGER DEFAULT 0'); - } catch { - /* already exists */ - } - try { - db.exec('ALTER TABLE nodes ADD COLUMN role TEXT'); - } catch { - /* already exists */ - } - try { - db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_role ON nodes(role)'); - } catch { - /* already exists */ - } - try { - db.exec('ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id)'); - } catch { - /* already exists */ - } - try { - db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id)'); - } catch { - /* already exists */ - } - try { - db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id)'); - } catch { - /* already exists */ - } -} - -export function findDbPath(customPath) { - if (customPath) return path.resolve(customPath); - let dir = process.cwd(); - while (true) { - const candidate = path.join(dir, '.codegraph', 'graph.db'); - if (fs.existsSync(candidate)) return candidate; - const parent = path.dirname(dir); - if (parent === dir) break; - dir = parent; - } - return path.join(process.cwd(), '.codegraph', 'graph.db'); -} - -/** - * Open a database in readonly mode, with a user-friendly error if the DB doesn't exist. - */ -export function openReadonlyOrFail(customPath) { - const dbPath = findDbPath(customPath); - if (!fs.existsSync(dbPath)) { - console.error( - `No codegraph database found at ${dbPath}.\n` + - `Run "codegraph build" first to analyze your codebase.`, - ); - process.exit(1); - } - return new Database(dbPath, { readonly: true }); -} +// Barrel re-export — keeps all existing `import { ... } from './db.js'` working. +export { closeDb, findDbPath, openDb, openReadonlyOrFail } from './db/connection.js'; +export { getBuildMeta, initSchema, MIGRATIONS, setBuildMeta } from './db/migrations.js'; +export { + fanInJoinSQL, + fanOutJoinSQL, + kindInClause, + NodeQuery, + testFilterSQL, +} from './db/query-builder.js'; +export { + countEdges, + countFiles, + countNodes, + findNodesForTriage, + findNodesWithFanIn, + iterateFunctionNodes, + listFunctionNodes, +} from './db/repository.js'; diff --git a/src/db/connection.js b/src/db/connection.js new file mode 100644 index 00000000..beffdc41 --- /dev/null +++ b/src/db/connection.js @@ -0,0 +1,88 @@ +import fs from 'node:fs'; +import path from 'node:path'; +import Database from 'better-sqlite3'; +import { warn } from '../logger.js'; + +function isProcessAlive(pid) { + try { + process.kill(pid, 0); + return true; + } catch { + return false; + } +} + +function acquireAdvisoryLock(dbPath) { + const lockPath = `${dbPath}.lock`; + try { + if (fs.existsSync(lockPath)) { + const content = fs.readFileSync(lockPath, 'utf-8').trim(); + const pid = Number(content); + if (pid && pid !== process.pid && isProcessAlive(pid)) { + warn(`Another process (PID ${pid}) may be using this database. Proceeding with caution.`); + } + } + } catch { + /* ignore read errors */ + } + try { + fs.writeFileSync(lockPath, String(process.pid), 'utf-8'); + } catch { + /* best-effort */ + } +} + +function releaseAdvisoryLock(lockPath) { + try { + const content = fs.readFileSync(lockPath, 'utf-8').trim(); + if (Number(content) === process.pid) { + fs.unlinkSync(lockPath); + } + } catch { + /* ignore */ + } +} + +export function openDb(dbPath) { + const dir = path.dirname(dbPath); + if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); + acquireAdvisoryLock(dbPath); + const db = new Database(dbPath); + db.pragma('journal_mode = WAL'); + db.pragma('busy_timeout = 5000'); + db.__lockPath = `${dbPath}.lock`; + return db; +} + +export function closeDb(db) { + db.close(); + if (db.__lockPath) releaseAdvisoryLock(db.__lockPath); +} + +export function findDbPath(customPath) { + if (customPath) return path.resolve(customPath); + let dir = process.cwd(); + while (true) { + const candidate = path.join(dir, '.codegraph', 'graph.db'); + if (fs.existsSync(candidate)) return candidate; + const parent = path.dirname(dir); + if (parent === dir) break; + dir = parent; + } + return path.join(process.cwd(), '.codegraph', 'graph.db'); +} + +/** + * Open a database in readonly mode, with a user-friendly error if the DB doesn't exist. + */ +export function openReadonlyOrFail(customPath) { + const dbPath = findDbPath(customPath); + if (!fs.existsSync(dbPath)) { + console.error( + `No codegraph database found at ${dbPath}.\n` + + `Run "codegraph build" first to analyze your codebase.`, + ); + process.exit(1); + } + return new Database(dbPath, { readonly: true }); +} diff --git a/src/db/migrations.js b/src/db/migrations.js new file mode 100644 index 00000000..3f0d60ce --- /dev/null +++ b/src/db/migrations.js @@ -0,0 +1,312 @@ +import { debug } from '../logger.js'; + +// ─── Schema Migrations ───────────────────────────────────────────────── +export const MIGRATIONS = [ + { + version: 1, + up: ` + CREATE TABLE IF NOT EXISTS nodes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + kind TEXT NOT NULL, + file TEXT NOT NULL, + line INTEGER, + end_line INTEGER, + UNIQUE(name, kind, file, line) + ); + CREATE TABLE IF NOT EXISTS edges ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_id INTEGER NOT NULL, + target_id INTEGER NOT NULL, + kind TEXT NOT NULL, + confidence REAL DEFAULT 1.0, + dynamic INTEGER DEFAULT 0, + FOREIGN KEY(source_id) REFERENCES nodes(id), + FOREIGN KEY(target_id) REFERENCES nodes(id) + ); + CREATE INDEX IF NOT EXISTS idx_nodes_name ON nodes(name); + CREATE INDEX IF NOT EXISTS idx_nodes_file ON nodes(file); + CREATE INDEX IF NOT EXISTS idx_nodes_kind ON nodes(kind); + CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_id); + CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_id); + CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind); + CREATE TABLE IF NOT EXISTS node_metrics ( + node_id INTEGER PRIMARY KEY, + line_count INTEGER, + symbol_count INTEGER, + import_count INTEGER, + export_count INTEGER, + fan_in INTEGER, + fan_out INTEGER, + cohesion REAL, + file_count INTEGER, + FOREIGN KEY(node_id) REFERENCES nodes(id) + ); + CREATE INDEX IF NOT EXISTS idx_node_metrics_node ON node_metrics(node_id); + `, + }, + { + version: 2, + up: ` + CREATE INDEX IF NOT EXISTS idx_nodes_name_kind_file ON nodes(name, kind, file); + CREATE INDEX IF NOT EXISTS idx_nodes_file_kind ON nodes(file, kind); + CREATE INDEX IF NOT EXISTS idx_edges_source_kind ON edges(source_id, kind); + CREATE INDEX IF NOT EXISTS idx_edges_target_kind ON edges(target_id, kind); + `, + }, + { + version: 3, + up: ` + CREATE TABLE IF NOT EXISTS file_hashes ( + file TEXT PRIMARY KEY, + hash TEXT NOT NULL, + mtime INTEGER NOT NULL + ); + `, + }, + { + version: 4, + up: `ALTER TABLE file_hashes ADD COLUMN size INTEGER DEFAULT 0;`, + }, + { + version: 5, + up: ` + CREATE TABLE IF NOT EXISTS co_changes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + file_a TEXT NOT NULL, + file_b TEXT NOT NULL, + commit_count INTEGER NOT NULL, + jaccard REAL NOT NULL, + last_commit_epoch INTEGER, + UNIQUE(file_a, file_b) + ); + CREATE INDEX IF NOT EXISTS idx_co_changes_file_a ON co_changes(file_a); + CREATE INDEX IF NOT EXISTS idx_co_changes_file_b ON co_changes(file_b); + CREATE INDEX IF NOT EXISTS idx_co_changes_jaccard ON co_changes(jaccard DESC); + CREATE TABLE IF NOT EXISTS co_change_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + ); + `, + }, + { + version: 6, + up: ` + CREATE TABLE IF NOT EXISTS file_commit_counts ( + file TEXT PRIMARY KEY, + commit_count INTEGER NOT NULL DEFAULT 0 + ); + `, + }, + { + version: 7, + up: ` + CREATE TABLE IF NOT EXISTS build_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + ); + `, + }, + { + version: 8, + up: ` + CREATE TABLE IF NOT EXISTS function_complexity ( + node_id INTEGER PRIMARY KEY, + cognitive INTEGER NOT NULL, + cyclomatic INTEGER NOT NULL, + max_nesting INTEGER NOT NULL, + FOREIGN KEY(node_id) REFERENCES nodes(id) + ); + CREATE INDEX IF NOT EXISTS idx_fc_cognitive ON function_complexity(cognitive DESC); + CREATE INDEX IF NOT EXISTS idx_fc_cyclomatic ON function_complexity(cyclomatic DESC); + `, + }, + { + version: 9, + up: ` + ALTER TABLE function_complexity ADD COLUMN loc INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN sloc INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN comment_lines INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_n1 INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_n2 INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_big_n1 INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_big_n2 INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_vocabulary INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_length INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_volume REAL DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_difficulty REAL DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_effort REAL DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_bugs REAL DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN maintainability_index REAL DEFAULT 0; + CREATE INDEX IF NOT EXISTS idx_fc_mi ON function_complexity(maintainability_index ASC); + `, + }, + { + version: 10, + up: ` + CREATE TABLE IF NOT EXISTS dataflow ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_id INTEGER NOT NULL, + target_id INTEGER NOT NULL, + kind TEXT NOT NULL, + param_index INTEGER, + expression TEXT, + line INTEGER, + confidence REAL DEFAULT 1.0, + FOREIGN KEY(source_id) REFERENCES nodes(id), + FOREIGN KEY(target_id) REFERENCES nodes(id) + ); + CREATE INDEX IF NOT EXISTS idx_dataflow_source ON dataflow(source_id); + CREATE INDEX IF NOT EXISTS idx_dataflow_target ON dataflow(target_id); + CREATE INDEX IF NOT EXISTS idx_dataflow_kind ON dataflow(kind); + CREATE INDEX IF NOT EXISTS idx_dataflow_source_kind ON dataflow(source_id, kind); + `, + }, + { + version: 11, + up: ` + ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id); + CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id); + CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id); + `, + }, + { + version: 12, + up: ` + CREATE TABLE IF NOT EXISTS cfg_blocks ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + function_node_id INTEGER NOT NULL, + block_index INTEGER NOT NULL, + block_type TEXT NOT NULL, + start_line INTEGER, + end_line INTEGER, + label TEXT, + FOREIGN KEY(function_node_id) REFERENCES nodes(id), + UNIQUE(function_node_id, block_index) + ); + CREATE INDEX IF NOT EXISTS idx_cfg_blocks_fn ON cfg_blocks(function_node_id); + + CREATE TABLE IF NOT EXISTS cfg_edges ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + function_node_id INTEGER NOT NULL, + source_block_id INTEGER NOT NULL, + target_block_id INTEGER NOT NULL, + kind TEXT NOT NULL, + FOREIGN KEY(function_node_id) REFERENCES nodes(id), + FOREIGN KEY(source_block_id) REFERENCES cfg_blocks(id), + FOREIGN KEY(target_block_id) REFERENCES cfg_blocks(id) + ); + CREATE INDEX IF NOT EXISTS idx_cfg_edges_fn ON cfg_edges(function_node_id); + CREATE INDEX IF NOT EXISTS idx_cfg_edges_src ON cfg_edges(source_block_id); + CREATE INDEX IF NOT EXISTS idx_cfg_edges_tgt ON cfg_edges(target_block_id); + `, + }, + { + version: 13, + up: ` + CREATE TABLE IF NOT EXISTS ast_nodes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + file TEXT NOT NULL, + line INTEGER NOT NULL, + kind TEXT NOT NULL, + name TEXT NOT NULL, + text TEXT, + receiver TEXT, + parent_node_id INTEGER, + FOREIGN KEY(parent_node_id) REFERENCES nodes(id) + ); + CREATE INDEX IF NOT EXISTS idx_ast_kind ON ast_nodes(kind); + CREATE INDEX IF NOT EXISTS idx_ast_name ON ast_nodes(name); + CREATE INDEX IF NOT EXISTS idx_ast_file ON ast_nodes(file); + CREATE INDEX IF NOT EXISTS idx_ast_parent ON ast_nodes(parent_node_id); + CREATE INDEX IF NOT EXISTS idx_ast_kind_name ON ast_nodes(kind, name); + `, + }, + { + version: 14, + up: ` + ALTER TABLE nodes ADD COLUMN exported INTEGER DEFAULT 0; + CREATE INDEX IF NOT EXISTS idx_nodes_exported ON nodes(exported); + `, + }, +]; + +export function getBuildMeta(db, key) { + try { + const row = db.prepare('SELECT value FROM build_meta WHERE key = ?').get(key); + return row ? row.value : null; + } catch { + return null; + } +} + +export function setBuildMeta(db, entries) { + const upsert = db.prepare('INSERT OR REPLACE INTO build_meta (key, value) VALUES (?, ?)'); + const tx = db.transaction(() => { + for (const [key, value] of Object.entries(entries)) { + upsert.run(key, String(value)); + } + }); + tx(); +} + +export function initSchema(db) { + db.exec(`CREATE TABLE IF NOT EXISTS schema_version (version INTEGER NOT NULL DEFAULT 0)`); + + const row = db.prepare('SELECT version FROM schema_version').get(); + let currentVersion = row ? row.version : 0; + + if (!row) { + db.prepare('INSERT INTO schema_version (version) VALUES (0)').run(); + } + + for (const migration of MIGRATIONS) { + if (migration.version > currentVersion) { + debug(`Running migration v${migration.version}`); + db.exec(migration.up); + db.prepare('UPDATE schema_version SET version = ?').run(migration.version); + currentVersion = migration.version; + } + } + + try { + db.exec('ALTER TABLE nodes ADD COLUMN end_line INTEGER'); + } catch { + /* already exists */ + } + try { + db.exec('ALTER TABLE edges ADD COLUMN confidence REAL DEFAULT 1.0'); + } catch { + /* already exists */ + } + try { + db.exec('ALTER TABLE edges ADD COLUMN dynamic INTEGER DEFAULT 0'); + } catch { + /* already exists */ + } + try { + db.exec('ALTER TABLE nodes ADD COLUMN role TEXT'); + } catch { + /* already exists */ + } + try { + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_role ON nodes(role)'); + } catch { + /* already exists */ + } + try { + db.exec('ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id)'); + } catch { + /* already exists */ + } + try { + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id)'); + } catch { + /* already exists */ + } + try { + db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id)'); + } catch { + /* already exists */ + } +} diff --git a/src/db/query-builder.js b/src/db/query-builder.js new file mode 100644 index 00000000..29b87686 --- /dev/null +++ b/src/db/query-builder.js @@ -0,0 +1,280 @@ +import { EVERY_EDGE_KIND } from '../kinds.js'; + +// ─── Validation Helpers ───────────────────────────────────────────── + +const SAFE_ALIAS_RE = /^[a-z_][a-z0-9_]*$/i; +const SAFE_COLUMN_RE = /^[a-z_][a-z0-9_]*(?:\.[a-z_][a-z0-9_]*)?$/i; +// Matches: column, table.column, column ASC, table.column DESC +const SAFE_ORDER_TERM_RE = /^[a-z_][a-z0-9_]*(?:\.[a-z_][a-z0-9_]*)?\s*(?:asc|desc)?$/i; +// Matches safe SELECT expressions: column refs, *, table.*, COALESCE(...) AS alias +const SAFE_SELECT_TOKEN_RE = + /^(?:[a-z_][a-z0-9_]*(?:\.[a-z_*][a-z0-9_]*)?\s*(?:as\s+[a-z_][a-z0-9_]*)?|[a-z_]+\([^)]*\)\s*(?:as\s+[a-z_][a-z0-9_]*)?)$/i; + +function validateAlias(alias) { + if (!SAFE_ALIAS_RE.test(alias)) { + throw new Error(`Invalid SQL alias: ${alias}`); + } +} + +function validateColumn(column) { + if (!SAFE_COLUMN_RE.test(column)) { + throw new Error(`Invalid SQL column: ${column}`); + } +} + +function validateOrderBy(clause) { + const terms = clause.split(',').map((t) => t.trim()); + for (const term of terms) { + if (!SAFE_ORDER_TERM_RE.test(term)) { + throw new Error(`Invalid ORDER BY term: ${term}`); + } + } +} + +function splitTopLevelCommas(str) { + const parts = []; + let depth = 0; + let start = 0; + for (let i = 0; i < str.length; i++) { + if (str[i] === '(') depth++; + else if (str[i] === ')') depth--; + else if (str[i] === ',' && depth === 0) { + parts.push(str.slice(start, i).trim()); + start = i + 1; + } + } + parts.push(str.slice(start).trim()); + return parts; +} + +function validateSelectCols(cols) { + const tokens = splitTopLevelCommas(cols); + for (const token of tokens) { + if (!SAFE_SELECT_TOKEN_RE.test(token)) { + throw new Error(`Invalid SELECT expression: ${token}`); + } + } +} + +function validateEdgeKind(edgeKind) { + if (!EVERY_EDGE_KIND.includes(edgeKind)) { + throw new Error( + `Invalid edge kind: ${edgeKind} (expected one of ${EVERY_EDGE_KIND.join(', ')})`, + ); + } +} + +// ─── Standalone Helpers ────────────────────────────────────────────── + +/** + * Return a SQL AND clause that excludes test/spec/stories files. + * Returns empty string when disabled. + * @param {string} [column='n.file'] - Column to filter on + * @param {boolean} [enabled=true] - No-op when false + */ +export function testFilterSQL(column = 'n.file', enabled = true) { + if (!enabled) return ''; + validateColumn(column); + return `AND ${column} NOT LIKE '%.test.%' + AND ${column} NOT LIKE '%.spec.%' + AND ${column} NOT LIKE '%__test__%' + AND ${column} NOT LIKE '%__tests__%' + AND ${column} NOT LIKE '%.stories.%'`; +} + +/** + * Build IN (?, ?, ?) placeholders and params array for a kind filter. + * @param {string[]} kinds + * @returns {{ placeholders: string, params: string[] }} + */ +export function kindInClause(kinds) { + return { + placeholders: kinds.map(() => '?').join(', '), + params: [...kinds], + }; +} + +/** + * Return a LEFT JOIN subquery for fan-in (incoming edge count). + * @param {string} [edgeKind='calls'] - Edge kind to count + * @param {string} [alias='fi'] - Subquery alias + */ +export function fanInJoinSQL(edgeKind = 'calls', alias = 'fi') { + validateEdgeKind(edgeKind); + validateAlias(alias); + return `LEFT JOIN ( + SELECT target_id, COUNT(*) AS cnt FROM edges WHERE kind = '${edgeKind}' GROUP BY target_id + ) ${alias} ON ${alias}.target_id = n.id`; +} + +/** + * Return a LEFT JOIN subquery for fan-out (outgoing edge count). + * @param {string} [edgeKind='calls'] - Edge kind to count + * @param {string} [alias='fo'] - Subquery alias + */ +export function fanOutJoinSQL(edgeKind = 'calls', alias = 'fo') { + validateEdgeKind(edgeKind); + validateAlias(alias); + return `LEFT JOIN ( + SELECT source_id, COUNT(*) AS cnt FROM edges WHERE kind = '${edgeKind}' GROUP BY source_id + ) ${alias} ON ${alias}.source_id = n.id`; +} + +// ─── NodeQuery Fluent Builder ──────────────────────────────────────── + +/** + * Fluent builder for the common `SELECT ... FROM nodes n WHERE ...` pattern. + * Not an ORM — complex queries (BFS, correlated subqueries) stay as raw SQL. + */ +export class NodeQuery { + #selectCols = 'n.*'; + #joins = []; + #conditions = []; + #params = []; + #orderByClause = ''; + #limitValue = null; + + /** Set SELECT columns (default: `n.*`). */ + select(cols) { + validateSelectCols(cols); + this.#selectCols = cols; + return this; + } + + /** WHERE n.kind IN (?, ?, ...) */ + kinds(kindArray) { + if (!kindArray || kindArray.length === 0) return this; + const { placeholders, params } = kindInClause(kindArray); + this.#conditions.push(`n.kind IN (${placeholders})`); + this.#params.push(...params); + return this; + } + + /** Add 5 NOT LIKE conditions to exclude test files. No-op when enabled is falsy. */ + excludeTests(enabled) { + if (!enabled) return this; + this.#conditions.push( + `n.file NOT LIKE '%.test.%'`, + `n.file NOT LIKE '%.spec.%'`, + `n.file NOT LIKE '%__test__%'`, + `n.file NOT LIKE '%__tests__%'`, + `n.file NOT LIKE '%.stories.%'`, + ); + return this; + } + + /** WHERE n.file LIKE ? (no-op if falsy). */ + fileFilter(file) { + if (!file) return this; + this.#conditions.push('n.file LIKE ?'); + this.#params.push(`%${file}%`); + return this; + } + + /** WHERE n.kind = ? (no-op if falsy). */ + kindFilter(kind) { + if (!kind) return this; + this.#conditions.push('n.kind = ?'); + this.#params.push(kind); + return this; + } + + /** WHERE n.role = ? (no-op if falsy). */ + roleFilter(role) { + if (!role) return this; + this.#conditions.push('n.role = ?'); + this.#params.push(role); + return this; + } + + /** WHERE n.name LIKE ? (no-op if falsy). */ + nameLike(pattern) { + if (!pattern) return this; + this.#conditions.push('n.name LIKE ?'); + this.#params.push(`%${pattern}%`); + return this; + } + + /** Raw WHERE condition escape hatch. */ + where(sql, ...params) { + this.#conditions.push(sql); + this.#params.push(...params); + return this; + } + + /** Add fan-in LEFT JOIN subquery. */ + withFanIn(edgeKind = 'calls') { + return this._join(fanInJoinSQL(edgeKind)); + } + + /** Add fan-out LEFT JOIN subquery. */ + withFanOut(edgeKind = 'calls') { + return this._join(fanOutJoinSQL(edgeKind)); + } + + /** LEFT JOIN function_complexity. */ + withComplexity() { + return this._join('LEFT JOIN function_complexity fc ON fc.node_id = n.id'); + } + + /** LEFT JOIN file_commit_counts. */ + withChurn() { + return this._join('LEFT JOIN file_commit_counts fcc ON n.file = fcc.file'); + } + + /** @private Raw JOIN — internal use only; external callers should use withFanIn/withFanOut/withComplexity/withChurn. */ + _join(sql) { + this.#joins.push(sql); + return this; + } + + /** ORDER BY clause. */ + orderBy(clause) { + validateOrderBy(clause); + this.#orderByClause = clause; + return this; + } + + /** LIMIT ?. */ + limit(n) { + if (n == null) return this; + this.#limitValue = n; + return this; + } + + /** Build the SQL and params without executing. */ + build() { + const joins = this.#joins.length > 0 ? `\n ${this.#joins.join('\n ')}` : ''; + const where = + this.#conditions.length > 0 ? `\n WHERE ${this.#conditions.join(' AND ')}` : ''; + const orderBy = this.#orderByClause ? `\n ORDER BY ${this.#orderByClause}` : ''; + + let limitClause = ''; + const params = [...this.#params]; + if (this.#limitValue != null) { + limitClause = '\n LIMIT ?'; + params.push(this.#limitValue); + } + + const sql = `SELECT ${this.#selectCols}\n FROM nodes n${joins}${where}${orderBy}${limitClause}`; + return { sql, params }; + } + + /** Execute and return all rows. */ + all(db) { + const { sql, params } = this.build(); + return db.prepare(sql).all(...params); + } + + /** Execute and return first row. */ + get(db) { + const { sql, params } = this.build(); + return db.prepare(sql).get(...params); + } + + /** Execute and return an iterator. */ + iterate(db) { + const { sql, params } = this.build(); + return db.prepare(sql).iterate(...params); + } +} diff --git a/src/db/repository.js b/src/db/repository.js new file mode 100644 index 00000000..d63edaf4 --- /dev/null +++ b/src/db/repository.js @@ -0,0 +1,134 @@ +import { EVERY_SYMBOL_KIND, VALID_ROLES } from '../kinds.js'; +import { NodeQuery } from './query-builder.js'; + +/** + * Find nodes matching a name pattern, with fan-in count. + * Used by findMatchingNodes in queries.js. + * + * @param {object} db - Database instance + * @param {string} namePattern - LIKE pattern (already wrapped with %) + * @param {object} [opts] + * @param {string[]} [opts.kinds] - Node kinds to match + * @param {string} [opts.file] - File filter (partial match) + * @returns {object[]} + */ +export function findNodesWithFanIn(db, namePattern, opts = {}) { + const q = new NodeQuery() + .select('n.*, COALESCE(fi.cnt, 0) AS fan_in') + .withFanIn() + .where('n.name LIKE ?', namePattern); + + if (opts.kinds) { + q.kinds(opts.kinds); + } + if (opts.file) { + q.fileFilter(opts.file); + } + + return q.all(db); +} + +/** + * Fetch nodes for triage scoring: fan-in + complexity + churn. + * Used by triageData in triage.js. + * + * @param {object} db + * @param {object} [opts] + * @returns {object[]} + */ +export function findNodesForTriage(db, opts = {}) { + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + throw new Error(`Invalid kind: ${opts.kind} (expected one of ${EVERY_SYMBOL_KIND.join(', ')})`); + } + if (opts.role && !VALID_ROLES.includes(opts.role)) { + throw new Error(`Invalid role: ${opts.role} (expected one of ${VALID_ROLES.join(', ')})`); + } + + const kindsToUse = opts.kind ? [opts.kind] : ['function', 'method', 'class']; + const q = new NodeQuery() + .select( + `n.id, n.name, n.kind, n.file, n.line, n.end_line, n.role, + COALESCE(fi.cnt, 0) AS fan_in, + COALESCE(fc.cognitive, 0) AS cognitive, + COALESCE(fc.maintainability_index, 0) AS mi, + COALESCE(fc.cyclomatic, 0) AS cyclomatic, + COALESCE(fc.max_nesting, 0) AS max_nesting, + COALESCE(fcc.commit_count, 0) AS churn`, + ) + .kinds(kindsToUse) + .withFanIn() + .withComplexity() + .withChurn() + .excludeTests(opts.noTests) + .fileFilter(opts.file) + .roleFilter(opts.role) + .orderBy('n.file, n.line'); + + return q.all(db); +} + +/** + * Shared query builder for function/method/class node listing. + * @param {object} [opts] + * @returns {NodeQuery} + */ +function _functionNodeQuery(opts = {}) { + return new NodeQuery() + .select('name, kind, file, line, end_line, role') + .kinds(['function', 'method', 'class']) + .fileFilter(opts.file) + .nameLike(opts.pattern) + .excludeTests(opts.noTests) + .orderBy('file, line'); +} + +/** + * List function/method/class nodes with basic info. + * Used by listFunctionsData in queries.js. + * + * @param {object} db + * @param {object} [opts] + * @returns {object[]} + */ +export function listFunctionNodes(db, opts = {}) { + return _functionNodeQuery(opts).all(db); +} + +/** + * Iterator version of listFunctionNodes for memory efficiency. + * Used by iterListFunctions in queries.js. + * + * @param {object} db + * @param {object} [opts] + * @returns {IterableIterator} + */ +export function iterateFunctionNodes(db, opts = {}) { + return _functionNodeQuery(opts).iterate(db); +} + +/** + * Count total nodes. + * @param {object} db + * @returns {number} + */ +export function countNodes(db) { + return db.prepare('SELECT COUNT(*) AS cnt FROM nodes').get().cnt; +} + +/** + * Count total edges. + * @param {object} db + * @returns {number} + */ +export function countEdges(db) { + return db.prepare('SELECT COUNT(*) AS cnt FROM edges').get().cnt; +} + +/** + * Count distinct files. + * @param {object} db + * @returns {number} + */ +export function countFiles(db) { + return db.prepare('SELECT COUNT(DISTINCT file) AS cnt FROM nodes').get().cnt; +} diff --git a/src/index.js b/src/index.js index e6d72100..d47d9cac 100644 --- a/src/index.js +++ b/src/index.js @@ -77,12 +77,24 @@ export { } from './dataflow.js'; // Database utilities export { + countEdges, + countFiles, + countNodes, + fanInJoinSQL, + fanOutJoinSQL, findDbPath, + findNodesForTriage, + findNodesWithFanIn, getBuildMeta, initSchema, + iterateFunctionNodes, + kindInClause, + listFunctionNodes, + NodeQuery, openDb, openReadonlyOrFail, setBuildMeta, + testFilterSQL, } from './db.js'; // Embeddings export { diff --git a/src/kinds.js b/src/kinds.js new file mode 100644 index 00000000..60d363fc --- /dev/null +++ b/src/kinds.js @@ -0,0 +1,49 @@ +// ── Symbol kind constants ─────────────────────────────────────────── +// Original 10 kinds — used as default query scope +export const CORE_SYMBOL_KINDS = [ + 'function', + 'method', + 'class', + 'interface', + 'type', + 'struct', + 'enum', + 'trait', + 'record', + 'module', +]; + +// Sub-declaration kinds (Phase 1) +export const EXTENDED_SYMBOL_KINDS = [ + 'parameter', + 'property', + 'constant', + // Phase 2 (reserved, not yet extracted): + // 'constructor', 'namespace', 'decorator', 'getter', 'setter', +]; + +// Full set for --kind validation and MCP enum +export const EVERY_SYMBOL_KIND = [...CORE_SYMBOL_KINDS, ...EXTENDED_SYMBOL_KINDS]; + +// Backward compat: ALL_SYMBOL_KINDS stays as the core 10 +export const ALL_SYMBOL_KINDS = CORE_SYMBOL_KINDS; + +// ── Edge kind constants ───────────────────────────────────────────── +// Core edge kinds — coupling and dependency relationships +export const CORE_EDGE_KINDS = [ + 'imports', + 'imports-type', + 'reexports', + 'calls', + 'extends', + 'implements', + 'contains', +]; + +// Structural edge kinds — parent/child and type relationships +export const STRUCTURAL_EDGE_KINDS = ['parameter_of', 'receiver']; + +// Full set for MCP enum and validation +export const EVERY_EDGE_KIND = [...CORE_EDGE_KINDS, ...STRUCTURAL_EDGE_KINDS]; + +export const VALID_ROLES = ['entry', 'core', 'utility', 'adapter', 'dead', 'leaf']; diff --git a/src/queries.js b/src/queries.js index f6eeb64e..4db6678a 100644 --- a/src/queries.js +++ b/src/queries.js @@ -5,7 +5,15 @@ import { evaluateBoundaries } from './boundaries.js'; import { coChangeForFiles } from './cochange.js'; import { loadConfig } from './config.js'; import { findCycles } from './cycles.js'; -import { findDbPath, openReadonlyOrFail } from './db.js'; +import { + findDbPath, + findNodesWithFanIn, + iterateFunctionNodes, + listFunctionNodes, + openReadonlyOrFail, + testFilterSQL, +} from './db.js'; +import { ALL_SYMBOL_KINDS } from './kinds.js'; import { debug } from './logger.js'; import { ownersForFiles } from './owners.js'; import { paginateResult } from './paginate.js'; @@ -60,54 +68,17 @@ export const FALSE_POSITIVE_CALLER_THRESHOLD = 20; const FUNCTION_KINDS = ['function', 'method', 'class']; -// Original 10 kinds — used as default query scope -export const CORE_SYMBOL_KINDS = [ - 'function', - 'method', - 'class', - 'interface', - 'type', - 'struct', - 'enum', - 'trait', - 'record', - 'module', -]; - -// Sub-declaration kinds (Phase 1) -export const EXTENDED_SYMBOL_KINDS = [ - 'parameter', - 'property', - 'constant', - // Phase 2 (reserved, not yet extracted): - // 'constructor', 'namespace', 'decorator', 'getter', 'setter', -]; - -// Full set for --kind validation and MCP enum -export const EVERY_SYMBOL_KIND = [...CORE_SYMBOL_KINDS, ...EXTENDED_SYMBOL_KINDS]; - -// Backward compat: ALL_SYMBOL_KINDS stays as the core 10 -export const ALL_SYMBOL_KINDS = CORE_SYMBOL_KINDS; - -// ── Edge kind constants ───────────────────────────────────────────── -// Core edge kinds — coupling and dependency relationships -export const CORE_EDGE_KINDS = [ - 'imports', - 'imports-type', - 'reexports', - 'calls', - 'extends', - 'implements', - 'contains', -]; - -// Structural edge kinds — parent/child and type relationships -export const STRUCTURAL_EDGE_KINDS = ['parameter_of', 'receiver']; - -// Full set for MCP enum and validation -export const EVERY_EDGE_KIND = [...CORE_EDGE_KINDS, ...STRUCTURAL_EDGE_KINDS]; - -export const VALID_ROLES = ['entry', 'core', 'utility', 'adapter', 'dead', 'leaf']; +// Re-export kind/edge constants from kinds.js (canonical source) +export { + ALL_SYMBOL_KINDS, + CORE_EDGE_KINDS, + CORE_SYMBOL_KINDS, + EVERY_EDGE_KIND, + EVERY_SYMBOL_KIND, + EXTENDED_SYMBOL_KINDS, + STRUCTURAL_EDGE_KINDS, + VALID_ROLES, +} from './kinds.js'; /** * Get all ancestor class names for a given class using extends edges. @@ -165,25 +136,8 @@ function resolveMethodViaHierarchy(db, methodName) { */ export function findMatchingNodes(db, name, opts = {}) { const kinds = opts.kind ? [opts.kind] : FUNCTION_KINDS; - const placeholders = kinds.map(() => '?').join(', '); - const params = [`%${name}%`, ...kinds]; - let fileCondition = ''; - if (opts.file) { - fileCondition = ' AND n.file LIKE ?'; - params.push(`%${opts.file}%`); - } - - const rows = db - .prepare(` - SELECT n.*, COALESCE(fi.cnt, 0) AS fan_in - FROM nodes n - LEFT JOIN ( - SELECT target_id, COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY target_id - ) fi ON fi.target_id = n.id - WHERE n.name LIKE ? AND n.kind IN (${placeholders})${fileCondition} - `) - .all(...params); + const rows = findNodesWithFanIn(db, `%${name}%`, { kinds, file: opts.file }); const nodes = opts.noTests ? rows.filter((n) => !isTestFile(n.file)) : rows; @@ -355,13 +309,7 @@ export function moduleMapData(customDbPath, limit = 20, opts = {}) { const db = openReadonlyOrFail(customDbPath); const noTests = opts.noTests || false; - const testFilter = noTests - ? `AND n.file NOT LIKE '%.test.%' - AND n.file NOT LIKE '%.spec.%' - AND n.file NOT LIKE '%__test__%' - AND n.file NOT LIKE '%__tests__%' - AND n.file NOT LIKE '%.stories.%'` - : ''; + const testFilter = testFilterSQL('n.file', noTests); const nodes = db .prepare(` @@ -1153,26 +1101,8 @@ export function diffImpactMermaid(customDbPath, opts = {}) { export function listFunctionsData(customDbPath, opts = {}) { const db = openReadonlyOrFail(customDbPath); const noTests = opts.noTests || false; - const kinds = ['function', 'method', 'class']; - const placeholders = kinds.map(() => '?').join(', '); - const conditions = [`kind IN (${placeholders})`]; - const params = [...kinds]; - - if (opts.file) { - conditions.push('file LIKE ?'); - params.push(`%${opts.file}%`); - } - if (opts.pattern) { - conditions.push('name LIKE ?'); - params.push(`%${opts.pattern}%`); - } - - let rows = db - .prepare( - `SELECT name, kind, file, line, end_line, role FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY file, line`, - ) - .all(...params); + let rows = listFunctionNodes(db, { file: opts.file, pattern: opts.pattern }); if (noTests) rows = rows.filter((r) => !isTestFile(r.file)); @@ -1196,25 +1126,8 @@ export function* iterListFunctions(customDbPath, opts = {}) { const db = openReadonlyOrFail(customDbPath); try { const noTests = opts.noTests || false; - const kinds = ['function', 'method', 'class']; - const placeholders = kinds.map(() => '?').join(', '); - - const conditions = [`kind IN (${placeholders})`]; - const params = [...kinds]; - - if (opts.file) { - conditions.push('file LIKE ?'); - params.push(`%${opts.file}%`); - } - if (opts.pattern) { - conditions.push('name LIKE ?'); - params.push(`%${opts.pattern}%`); - } - const stmt = db.prepare( - `SELECT name, kind, file, line, end_line, role FROM nodes WHERE ${conditions.join(' AND ')} ORDER BY file, line`, - ); - for (const row of stmt.iterate(...params)) { + for (const row of iterateFunctionNodes(db, { file: opts.file, pattern: opts.pattern })) { if (noTests && isTestFile(row.file)) continue; yield { name: row.name, @@ -1405,13 +1318,7 @@ export function statsData(customDbPath, opts = {}) { const fnCycles = findCycles(db, { fileLevel: false, noTests }); // Top 5 coupling hotspots (fan-in + fan-out, file nodes) - const testFilter = noTests - ? `AND n.file NOT LIKE '%.test.%' - AND n.file NOT LIKE '%.spec.%' - AND n.file NOT LIKE '%__test__%' - AND n.file NOT LIKE '%__tests__%' - AND n.file NOT LIKE '%.stories.%'` - : ''; + const testFilter = testFilterSQL('n.file', noTests); const hotspotRows = db .prepare(` SELECT n.file, diff --git a/src/structure.js b/src/structure.js index ec56685d..e6dbe299 100644 --- a/src/structure.js +++ b/src/structure.js @@ -1,6 +1,6 @@ import path from 'node:path'; import { normalizePath } from './constants.js'; -import { openReadonlyOrFail } from './db.js'; +import { openReadonlyOrFail, testFilterSQL } from './db.js'; import { debug } from './logger.js'; import { paginateResult } from './paginate.js'; import { isTestFile } from './test-filter.js'; @@ -536,14 +536,7 @@ export function hotspotsData(customDbPath, opts = {}) { const kind = level === 'directory' ? 'directory' : 'file'; - const testFilter = - noTests && kind === 'file' - ? `AND n.name NOT LIKE '%.test.%' - AND n.name NOT LIKE '%.spec.%' - AND n.name NOT LIKE '%__test__%' - AND n.name NOT LIKE '%__tests__%' - AND n.name NOT LIKE '%.stories.%'` - : ''; + const testFilter = testFilterSQL('n.name', noTests && kind === 'file'); const HOTSPOT_QUERIES = { 'fan-in': db.prepare(` diff --git a/src/triage.js b/src/triage.js index 000397d0..56b49179 100644 --- a/src/triage.js +++ b/src/triage.js @@ -1,4 +1,4 @@ -import { openReadonlyOrFail } from './db.js'; +import { findNodesForTriage, openReadonlyOrFail } from './db.js'; import { warn } from './logger.js'; import { paginateResult } from './paginate.js'; import { outputResult } from './result-formatter.js'; @@ -55,50 +55,14 @@ export function triageData(customDbPath, opts = {}) { const sort = opts.sort || 'risk'; const weights = { ...DEFAULT_WEIGHTS, ...(opts.weights || {}) }; - // Build WHERE clause - let where = "WHERE n.kind IN ('function','method','class')"; - const params = []; - - if (noTests) { - where += ` AND n.file NOT LIKE '%.test.%' - AND n.file NOT LIKE '%.spec.%' - AND n.file NOT LIKE '%__test__%' - AND n.file NOT LIKE '%__tests__%' - AND n.file NOT LIKE '%.stories.%'`; - } - if (fileFilter) { - where += ' AND n.file LIKE ?'; - params.push(`%${fileFilter}%`); - } - if (kindFilter) { - where += ' AND n.kind = ?'; - params.push(kindFilter); - } - if (roleFilter) { - where += ' AND n.role = ?'; - params.push(roleFilter); - } - let rows; try { - rows = db - .prepare( - `SELECT n.id, n.name, n.kind, n.file, n.line, n.end_line, n.role, - COALESCE(fi.cnt, 0) AS fan_in, - COALESCE(fc.cognitive, 0) AS cognitive, - COALESCE(fc.maintainability_index, 0) AS mi, - COALESCE(fc.cyclomatic, 0) AS cyclomatic, - COALESCE(fc.max_nesting, 0) AS max_nesting, - COALESCE(fcc.commit_count, 0) AS churn - FROM nodes n - LEFT JOIN (SELECT target_id, COUNT(*) AS cnt FROM edges WHERE kind='calls' GROUP BY target_id) fi - ON n.id = fi.target_id - LEFT JOIN function_complexity fc ON fc.node_id = n.id - LEFT JOIN file_commit_counts fcc ON n.file = fcc.file - ${where} - ORDER BY n.file, n.line`, - ) - .all(...params); + rows = findNodesForTriage(db, { + noTests, + file: fileFilter, + kind: kindFilter, + role: roleFilter, + }); } catch (err) { warn(`triage query failed: ${err.message}`); db.close(); diff --git a/tests/unit/query-builder.test.js b/tests/unit/query-builder.test.js new file mode 100644 index 00000000..e53c5e70 --- /dev/null +++ b/tests/unit/query-builder.test.js @@ -0,0 +1,274 @@ +import Database from 'better-sqlite3'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { initSchema } from '../../src/db/migrations.js'; +import { + fanInJoinSQL, + fanOutJoinSQL, + kindInClause, + NodeQuery, + testFilterSQL, +} from '../../src/db/query-builder.js'; + +// ─── testFilterSQL ─────────────────────────────────────────────────── + +describe('testFilterSQL', () => { + it('returns 5 NOT LIKE conditions with default column', () => { + const sql = testFilterSQL(); + expect(sql).toContain("n.file NOT LIKE '%.test.%'"); + expect(sql).toContain("n.file NOT LIKE '%.spec.%'"); + expect(sql).toContain("n.file NOT LIKE '%__test__%'"); + expect(sql).toContain("n.file NOT LIKE '%__tests__%'"); + expect(sql).toContain("n.file NOT LIKE '%.stories.%'"); + }); + + it('uses custom column', () => { + const sql = testFilterSQL('n.name'); + expect(sql).toContain("n.name NOT LIKE '%.test.%'"); + expect(sql).not.toContain('n.file'); + }); + + it('returns empty string when disabled', () => { + expect(testFilterSQL('n.file', false)).toBe(''); + }); + + it('rejects malicious column names', () => { + expect(() => testFilterSQL('1; DROP TABLE nodes --')).toThrow('Invalid SQL column'); + expect(() => testFilterSQL('n.file OR 1=1 --')).toThrow('Invalid SQL column'); + }); +}); + +// ─── kindInClause ──────────────────────────────────────────────────── + +describe('kindInClause', () => { + it('returns correct placeholders and params', () => { + const result = kindInClause(['function', 'method', 'class']); + expect(result.placeholders).toBe('?, ?, ?'); + expect(result.params).toEqual(['function', 'method', 'class']); + }); + + it('handles single kind', () => { + const result = kindInClause(['function']); + expect(result.placeholders).toBe('?'); + expect(result.params).toEqual(['function']); + }); +}); + +// ─── fanInJoinSQL / fanOutJoinSQL ──────────────────────────────────── + +describe('fanInJoinSQL', () => { + it('returns LEFT JOIN with default alias and kind', () => { + const sql = fanInJoinSQL(); + expect(sql).toContain('LEFT JOIN'); + expect(sql).toContain('target_id'); + expect(sql).toContain("kind = 'calls'"); + expect(sql).toContain('fi'); + }); + + it('accepts custom edge kind and alias', () => { + const sql = fanInJoinSQL('imports', 'imp'); + expect(sql).toContain("kind = 'imports'"); + expect(sql).toContain('imp'); + }); +}); + +describe('fanOutJoinSQL', () => { + it('returns LEFT JOIN with source_id', () => { + const sql = fanOutJoinSQL(); + expect(sql).toContain('LEFT JOIN'); + expect(sql).toContain('source_id'); + expect(sql).toContain("kind = 'calls'"); + expect(sql).toContain('fo'); + }); + + it('accepts custom edge kind and alias', () => { + const sql = fanOutJoinSQL('imports', 'imp'); + expect(sql).toContain("kind = 'imports'"); + expect(sql).toContain('imp'); + }); +}); + +// ─── NodeQuery ─────────────────────────────────────────────────────── + +describe('NodeQuery', () => { + let db; + + beforeEach(() => { + db = new Database(':memory:'); + initSchema(db); + // Seed test data + const insert = db.prepare( + 'INSERT INTO nodes (name, kind, file, line, role) VALUES (?, ?, ?, ?, ?)', + ); + insert.run('foo', 'function', 'src/foo.js', 1, 'core'); + insert.run('bar', 'method', 'src/bar.js', 10, 'utility'); + insert.run('Baz', 'class', 'src/baz.js', 20, 'entry'); + insert.run('testHelper', 'function', 'src/foo.test.js', 1, null); + insert.run('specHelper', 'function', 'src/bar.spec.js', 1, null); + + // Add an edge for fan-in + const fooId = db.prepare("SELECT id FROM nodes WHERE name = 'foo'").get().id; + const barId = db.prepare("SELECT id FROM nodes WHERE name = 'bar'").get().id; + db.prepare('INSERT INTO edges (source_id, target_id, kind) VALUES (?, ?, ?)').run( + barId, + fooId, + 'calls', + ); + }); + + afterEach(() => { + db.close(); + }); + + it('.build() returns sql and params', () => { + const { sql, params } = new NodeQuery().kinds(['function']).build(); + expect(sql).toContain('SELECT n.*'); + expect(sql).toContain('FROM nodes n'); + expect(sql).toContain('n.kind IN (?)'); + expect(params).toEqual(['function']); + }); + + it('.select() changes columns', () => { + const { sql } = new NodeQuery().select('n.name, n.kind').build(); + expect(sql).toContain('SELECT n.name, n.kind'); + }); + + it('.kinds() filters by kind', () => { + const rows = new NodeQuery().kinds(['function']).all(db); + expect(rows.every((r) => r.kind === 'function')).toBe(true); + }); + + it('.excludeTests() filters test files', () => { + const all = new NodeQuery().all(db); + const noTests = new NodeQuery().excludeTests(true).all(db); + expect(all.length).toBeGreaterThan(noTests.length); + expect(noTests.every((r) => !r.file.includes('.test.') && !r.file.includes('.spec.'))).toBe( + true, + ); + }); + + it('.excludeTests(false) is a no-op', () => { + const all = new NodeQuery().all(db); + const noOp = new NodeQuery().excludeTests(false).all(db); + expect(noOp.length).toBe(all.length); + }); + + it('.fileFilter() filters by file', () => { + const rows = new NodeQuery().fileFilter('foo').all(db); + expect(rows.every((r) => r.file.includes('foo'))).toBe(true); + }); + + it('.kindFilter() filters by exact kind', () => { + const rows = new NodeQuery().kindFilter('class').all(db); + expect(rows.length).toBe(1); + expect(rows[0].name).toBe('Baz'); + }); + + it('.roleFilter() filters by role', () => { + const rows = new NodeQuery().roleFilter('core').all(db); + expect(rows.length).toBe(1); + expect(rows[0].name).toBe('foo'); + }); + + it('.nameLike() filters by name pattern', () => { + const rows = new NodeQuery().nameLike('ba').all(db); + expect(rows.every((r) => r.name.toLowerCase().includes('ba'))).toBe(true); + }); + + it('.where() adds raw condition', () => { + const rows = new NodeQuery().where('n.line > ?', 5).all(db); + expect(rows.every((r) => r.line > 5)).toBe(true); + }); + + it('.withFanIn() adds fan-in join', () => { + const rows = new NodeQuery() + .select('n.name, COALESCE(fi.cnt, 0) AS fan_in') + .withFanIn() + .where("n.name = 'foo'") + .all(db); + expect(rows[0].fan_in).toBe(1); + }); + + it('.withComplexity() adds complexity join', () => { + const { sql } = new NodeQuery().withComplexity().build(); + expect(sql).toContain('function_complexity'); + }); + + it('.withChurn() adds churn join', () => { + const { sql } = new NodeQuery().withChurn().build(); + expect(sql).toContain('file_commit_counts'); + }); + + it('._join() adds raw join (internal API)', () => { + const { sql } = new NodeQuery()._join('JOIN node_metrics nm ON n.id = nm.node_id').build(); + expect(sql).toContain('JOIN node_metrics nm ON n.id = nm.node_id'); + }); + + it('does not expose a public .join() method', () => { + const q = new NodeQuery(); + expect(typeof q.join).toBe('undefined'); + }); + + it('.orderBy() adds ORDER BY', () => { + const { sql } = new NodeQuery().orderBy('n.file, n.line').build(); + expect(sql).toContain('ORDER BY n.file, n.line'); + }); + + it('.orderBy() accepts ASC/DESC modifiers', () => { + const { sql } = new NodeQuery().orderBy('n.file ASC, n.line DESC').build(); + expect(sql).toContain('ORDER BY n.file ASC, n.line DESC'); + }); + + it('.orderBy() rejects SQL injection', () => { + expect(() => new NodeQuery().orderBy('n.file; DROP TABLE nodes --')).toThrow( + 'Invalid ORDER BY term', + ); + expect(() => new NodeQuery().orderBy('1=1 --')).toThrow('Invalid ORDER BY term'); + }); + + it('.select() rejects SQL injection', () => { + expect(() => new NodeQuery().select('*; DROP TABLE nodes --')).toThrow( + 'Invalid SELECT expression', + ); + expect(() => new NodeQuery().select('1 UNION SELECT * FROM edges')).toThrow( + 'Invalid SELECT expression', + ); + }); + + it('.select() accepts COALESCE expressions', () => { + const { sql } = new NodeQuery().select('n.name, COALESCE(fi.cnt, 0) AS fan_in').build(); + expect(sql).toContain('SELECT n.name, COALESCE(fi.cnt, 0) AS fan_in'); + }); + + it('.limit() adds LIMIT param', () => { + const { sql, params } = new NodeQuery().limit(10).build(); + expect(sql).toContain('LIMIT ?'); + expect(params).toContain(10); + }); + + it('.get() returns first row', () => { + const row = new NodeQuery().where("n.name = 'foo'").get(db); + expect(row.name).toBe('foo'); + }); + + it('.iterate() returns an iterator', () => { + const iter = new NodeQuery().kinds(['function']).excludeTests(true).iterate(db); + const rows = [...iter]; + expect(rows.length).toBeGreaterThan(0); + expect(rows.every((r) => r.kind === 'function')).toBe(true); + }); + + it('chaining composes multiple conditions with AND', () => { + const { sql, params } = new NodeQuery() + .kinds(['function', 'method']) + .fileFilter('src') + .roleFilter('core') + .build(); + expect(sql).toContain('n.kind IN (?, ?)'); + expect(sql).toContain('n.file LIKE ?'); + expect(sql).toContain('n.role = ?'); + // All connected with AND + const whereClause = sql.split('WHERE')[1]; + expect(whereClause.match(/AND/g).length).toBe(2); + expect(params).toEqual(['function', 'method', '%src%', 'core']); + }); +}); diff --git a/tests/unit/repository.test.js b/tests/unit/repository.test.js new file mode 100644 index 00000000..aed2fabd --- /dev/null +++ b/tests/unit/repository.test.js @@ -0,0 +1,179 @@ +import Database from 'better-sqlite3'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { initSchema } from '../../src/db/migrations.js'; +import { + countEdges, + countFiles, + countNodes, + findNodesForTriage, + findNodesWithFanIn, + iterateFunctionNodes, + listFunctionNodes, +} from '../../src/db/repository.js'; + +describe('repository', () => { + let db; + + beforeEach(() => { + db = new Database(':memory:'); + initSchema(db); + + const insertNode = db.prepare( + 'INSERT INTO nodes (name, kind, file, line, role) VALUES (?, ?, ?, ?, ?)', + ); + insertNode.run('foo', 'function', 'src/foo.js', 1, 'core'); + insertNode.run('bar', 'method', 'src/bar.js', 10, 'utility'); + insertNode.run('Baz', 'class', 'src/baz.js', 20, 'entry'); + insertNode.run('qux', 'interface', 'src/qux.js', 30, null); + insertNode.run('testFn', 'function', 'tests/foo.test.js', 1, null); + + // Edges + const fooId = db.prepare("SELECT id FROM nodes WHERE name = 'foo'").get().id; + const barId = db.prepare("SELECT id FROM nodes WHERE name = 'bar'").get().id; + const bazId = db.prepare("SELECT id FROM nodes WHERE name = 'Baz'").get().id; + db.prepare('INSERT INTO edges (source_id, target_id, kind) VALUES (?, ?, ?)').run( + barId, + fooId, + 'calls', + ); + db.prepare('INSERT INTO edges (source_id, target_id, kind) VALUES (?, ?, ?)').run( + bazId, + fooId, + 'calls', + ); + + // Complexity + db.prepare( + 'INSERT INTO function_complexity (node_id, cognitive, cyclomatic, max_nesting) VALUES (?, ?, ?, ?)', + ).run(fooId, 5, 3, 2); + }); + + afterEach(() => { + db.close(); + }); + + describe('findNodesWithFanIn', () => { + it('returns nodes with fan-in count', () => { + const rows = findNodesWithFanIn(db, '%foo%'); + const foo = rows.find((r) => r.name === 'foo'); + expect(foo).toBeDefined(); + expect(foo.fan_in).toBe(2); + }); + + it('filters by kinds', () => { + const rows = findNodesWithFanIn(db, '%foo%', { kinds: ['method'] }); + expect(rows.length).toBe(0); + }); + + it('filters by file', () => { + const rows = findNodesWithFanIn(db, '%foo%', { file: 'src' }); + expect(rows.every((r) => r.file.includes('src'))).toBe(true); + }); + }); + + describe('findNodesForTriage', () => { + it('returns function/method/class nodes with signals', () => { + const rows = findNodesForTriage(db); + expect(rows.length).toBe(4); // foo, bar, Baz, testFn + const foo = rows.find((r) => r.name === 'foo'); + expect(foo.fan_in).toBe(2); + expect(foo.cognitive).toBe(5); + }); + + it('excludes test files when noTests is set', () => { + const rows = findNodesForTriage(db, { noTests: true }); + expect(rows.every((r) => !r.file.includes('.test.'))).toBe(true); + }); + + it('filters by kind', () => { + const rows = findNodesForTriage(db, { kind: 'class' }); + expect(rows.length).toBe(1); + expect(rows[0].name).toBe('Baz'); + }); + + it('filters by role', () => { + const rows = findNodesForTriage(db, { role: 'core' }); + expect(rows.length).toBe(1); + expect(rows[0].name).toBe('foo'); + }); + + it('filters by file', () => { + const rows = findNodesForTriage(db, { file: 'bar' }); + expect(rows.length).toBe(1); + expect(rows[0].name).toBe('bar'); + }); + + it('throws on invalid role', () => { + expect(() => findNodesForTriage(db, { role: 'supervisor' })).toThrow('Invalid role'); + }); + }); + + describe('listFunctionNodes', () => { + it('returns function/method/class nodes', () => { + const rows = listFunctionNodes(db); + expect(rows.length).toBe(4); // foo, bar, Baz, testFn + expect(rows.every((r) => ['function', 'method', 'class'].includes(r.kind))).toBe(true); + }); + + it('filters by file', () => { + const rows = listFunctionNodes(db, { file: 'foo' }); + expect(rows.every((r) => r.file.includes('foo'))).toBe(true); + }); + + it('filters by pattern', () => { + const rows = listFunctionNodes(db, { pattern: 'Baz' }); + expect(rows.length).toBe(1); + expect(rows[0].name).toBe('Baz'); + }); + + it('excludes test files when noTests is set', () => { + const rows = listFunctionNodes(db, { noTests: true }); + expect(rows.every((r) => !r.file.includes('.test.'))).toBe(true); + expect(rows.length).toBe(3); // foo, bar, Baz — excludes testFn + }); + + it('orders by file, line', () => { + const rows = listFunctionNodes(db); + for (let i = 1; i < rows.length; i++) { + const prev = `${rows[i - 1].file}:${String(rows[i - 1].line).padStart(6, '0')}`; + const curr = `${rows[i].file}:${String(rows[i].line).padStart(6, '0')}`; + expect(prev <= curr).toBe(true); + } + }); + }); + + describe('iterateFunctionNodes', () => { + it('returns an iterator over function nodes', () => { + const iter = iterateFunctionNodes(db); + const rows = [...iter]; + expect(rows.length).toBe(4); + expect(rows.every((r) => ['function', 'method', 'class'].includes(r.kind))).toBe(true); + }); + + it('filters by file and pattern', () => { + const rows = [...iterateFunctionNodes(db, { file: 'foo', pattern: 'foo' })]; + expect(rows.length).toBeGreaterThan(0); + expect(rows.every((r) => r.file.includes('foo'))).toBe(true); + }); + + it('excludes test files when noTests is set', () => { + const rows = [...iterateFunctionNodes(db, { noTests: true })]; + expect(rows.every((r) => !r.file.includes('.test.'))).toBe(true); + expect(rows.length).toBe(3); + }); + }); + + describe('countNodes / countEdges / countFiles', () => { + it('countNodes returns total', () => { + expect(countNodes(db)).toBe(5); + }); + + it('countEdges returns total', () => { + expect(countEdges(db)).toBe(2); + }); + + it('countFiles returns distinct file count', () => { + expect(countFiles(db)).toBe(5); + }); + }); +});