Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions generated/architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ Consumers receive an engine object and call methods on it. They never branch on

**Current state:** The entire build pipeline is synchronous batch processing. Parse all files → insert all nodes → build all edges. The watcher does per-file updates but reimplements the pipeline in a simpler form.

**Problem:** For large repos (10K+ files), the user waits for the entire pipeline to complete before seeing anything. There's no progress reporting during parsing. There's no way to cancel a build mid-flight. The watcher's simplified pipeline diverges from the main build path (different code, different edge cases).
**Problem:** For large repos (10K+ files), the user waits for the entire pipeline to complete before seeing anything. There's no progress reporting during parsing. There's no way to cancel a build mid-flight. The watcher's simplified pipeline diverges from the main build path (different code, different edge cases). *(Note: two concrete edge cases — concurrent file edits causing EBUSY/EACCES during read, and symlink loops causing infinite recursion in `collectFiles` — have been fixed. `readFileSafe` retries on transient OS errors and is shared between `builder.js` and `watcher.js`. `collectFiles` tracks visited real paths to break symlink cycles.)*

**Ideal architecture — event-driven pipeline:**

Expand Down Expand Up @@ -473,6 +473,8 @@ This is a simple LRU or TTL cache that sits between the analysis layer and the r

**Problem:** Bug fixes to edge building in `builder.js` must be separately applied to `watcher.js`. The watcher's edge building is simpler (no barrel resolution, simpler confidence) which means watch-mode graphs are subtly different from full-build graphs.

**Partial progress:** `readFileSafe` (exported from `builder.js`, imported by `watcher.js`) is the first shared utility between the two modules. It retries on transient OS errors (EBUSY/EACCES/EPERM) that occur when editors perform non-atomic saves, replacing bare `readFileSync` calls in both code paths. This is a small step toward the shared-stages goal.

**Ideal fix:** The pipeline architecture from point #4 eliminates this entirely. Watch mode uses the same pipeline stages, just triggered per-file instead of per-project. The `insertNodes` and `buildEdges` stages are literally the same functions.

---
Expand Down Expand Up @@ -583,7 +585,7 @@ Consumers can only import from the documented entry points. Internal modules are
| 9 | Transitive import-aware confidence | Low-Medium | Accuracy |
| 14 | Query result caching | Low | Performance |
| 8 | Config profiles for monorepos | Low | Feature |
| 15 | Unify watcher/builder code paths | Low | Falls out of #4 |
| 15 | Unify watcher/builder code paths | Low | Falls out of #4 (partial: `readFileSafe` shared) |

Items 1–4 and 6 are foundational — they restructure the core and everything else becomes easier after them. Items 13 and 7 are the most impactful feature-level changes. Items 14–15 are natural consequences of earlier changes.

Expand Down
36 changes: 33 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 47 additions & 5 deletions src/builder.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,28 @@ const BUILTIN_RECEIVERS = new Set([
'require',
]);

export function collectFiles(dir, files = [], config = {}, directories = null) {
export function collectFiles(
dir,
files = [],
config = {},
directories = null,
_visited = new Set(),
) {
const trackDirs = directories !== null;

// Resolve real path to detect symlink loops
let realDir;
try {
realDir = fs.realpathSync(dir);
} catch {
return trackDirs ? { files, directories } : files;
}
if (_visited.has(realDir)) {
warn(`Symlink loop detected, skipping: ${dir}`);
return trackDirs ? { files, directories } : files;
}
_visited.add(realDir);

let entries;
try {
entries = fs.readdirSync(dir, { withFileTypes: true });
Expand All @@ -67,7 +87,7 @@ export function collectFiles(dir, files = [], config = {}, directories = null) {

const full = path.join(dir, entry.name);
if (entry.isDirectory()) {
collectFiles(full, files, config, directories);
collectFiles(full, files, config, directories, _visited);
} else if (EXTENSIONS.has(path.extname(entry.name))) {
files.push(full);
hasFiles = true;
Expand Down Expand Up @@ -125,6 +145,28 @@ function fileStat(filePath) {
}
}

/**
* Read a file with retry on transient errors (EBUSY/EACCES/EPERM).
* Editors performing non-atomic saves can cause these during mid-write.
*/
const TRANSIENT_CODES = new Set(['EBUSY', 'EACCES', 'EPERM']);
const RETRY_DELAY_MS = 50;

export function readFileSafe(filePath, retries = 2) {
for (let attempt = 0; ; attempt++) {
try {
return fs.readFileSync(filePath, 'utf-8');
} catch (err) {
if (attempt < retries && TRANSIENT_CODES.has(err.code)) {
const end = Date.now() + RETRY_DELAY_MS;
while (Date.now() < end) {}
continue;
}
throw err;
}
}
}

/**
* Determine which files have changed since last build.
* Three-tier cascade:
Expand Down Expand Up @@ -193,7 +235,7 @@ function getChangedFiles(db, allFiles, rootDir) {

let content;
try {
content = fs.readFileSync(absPath, 'utf-8');
content = readFileSafe(absPath);
} catch {
continue;
}
Expand Down Expand Up @@ -256,7 +298,7 @@ function getChangedFiles(db, allFiles, rootDir) {
for (const item of needsHash) {
let content;
try {
content = fs.readFileSync(item.file, 'utf-8');
content = readFileSafe(item.file);
} catch {
continue;
}
Expand Down Expand Up @@ -459,7 +501,7 @@ export async function buildGraph(rootDir, opts = {}) {
const absPath = path.join(rootDir, relPath);
let code;
try {
code = fs.readFileSync(absPath, 'utf-8');
code = readFileSafe(absPath);
} catch {
code = null;
}
Expand Down
3 changes: 2 additions & 1 deletion src/watcher.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import fs from 'node:fs';
import path from 'node:path';
import { readFileSafe } from './builder.js';
import { EXTENSIONS, IGNORE_DIRS, normalizePath } from './constants.js';
import { initSchema, openDb } from './db.js';
import { appendJournalEntries } from './journal.js';
Expand Down Expand Up @@ -35,7 +36,7 @@ async function updateFile(_db, rootDir, filePath, stmts, engineOpts, cache) {

let code;
try {
code = fs.readFileSync(filePath, 'utf-8');
code = readFileSafe(filePath);
} catch (err) {
warn(`Cannot read ${relPath}: ${err.message}`);
return null;
Expand Down