From 356b58cc52f513006203064bdf496c0491fbf52b Mon Sep 17 00:00:00 2001 From: Jonathan Santilli <1774227+jonathansantilli@users.noreply.github.com> Date: Wed, 22 Apr 2026 17:44:55 +0100 Subject: [PATCH] fix(scan): disable user-scope walk when CLI scans a single file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #53 closed the cross-scan leak for skill-dir scans but not for single-file scans of configs like ~/.claude/settings.json. Symptom: $ codegate-ai scan ~/.claude/settings.json → finding with file_path=~/.agents/skills/api-design-guide/.../SKILL.md Root cause: the CLI stages single-file targets into a temp dir outside $HOME. The staged dir is not inside homeDir, so shouldKeepUserScopeCandidate short-circuits to `return true` and every sibling user-scope match (e.g. a hidden-unicode hit in a completely unrelated skill) gets attributed to the config scan. Fix: - cli.ts: when resolvedTarget.explicitCandidates is non-empty (the target was a staged local file), force scan_user_scope=false for that scan. Explicit opt-in via --include-user-scope still overrides. This matches user expectation: "scan this file" ≠ "scan my whole home." - scan.ts: shouldKeepUserScopeCandidate now also handles engine-level file targets correctly (if the target is a file inside homeDir, only the target file itself is a valid user-scope candidate). This is defence in depth for library callers that bypass the CLI. Tests: - Existing 3 cases in tests/layer2/cross-scan-attribution.test.ts still pass. - New: engine-level file-target scan drops sibling user-scope candidates. Verified 154 test files / 720 tests pass. Lint + prettier clean. --- src/cli.ts | 13 +++++- src/scan.ts | 45 +++++++++++++++++---- tests/layer2/cross-scan-attribution.test.ts | 42 +++++++++++++++++++ 3 files changed, 92 insertions(+), 8 deletions(-) diff --git a/src/cli.ts b/src/cli.ts index 3e57b9e..4dd7e35 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -493,8 +493,19 @@ function addScanCommand(program: Command, version: string, deps: CliDeps): void ? true : (baseConfig.workflow_audits?.enabled ?? false), }, + // When the target was a single local file that got staged into a + // temp dir (explicitCandidates set), walking the full user-scope + // tree is off-target: the user asked to scan one file, not their + // whole home. Leaving user-scope on here let sibling findings + // (e.g. `~/.agents/skills/*/SKILL.md`) leak into single-file + // scans of configs like `.claude/settings.json`. Explicit opt-in + // via `--include-user-scope` still forces it on. scan_user_scope: - options.includeUserScope === true ? true : (baseConfig.scan_user_scope ?? false), + options.includeUserScope === true + ? true + : resolvedTarget.explicitCandidates && resolvedTarget.explicitCandidates.length > 0 + ? false + : (baseConfig.scan_user_scope ?? false), }; if (options.resetState) { diff --git a/src/scan.ts b/src/scan.ts index 8d0036c..9038bea 100644 --- a/src/scan.ts +++ b/src/scan.ts @@ -260,22 +260,53 @@ function isPathInside(root: string, candidatePath: string): boolean { * User-scope patterns (e.g. `~/.agents/skills/*/SKILL.md`) walk the whole * home directory, so they can match files belonging to completely unrelated * skills or agents. When the scan target is itself a specific location - * **inside** the user's home — e.g. scanning a single skill directory — any - * user-scope match outside that scan target belongs to a different scan and - * must not be attributed here. + * **inside** the user's home — e.g. scanning a single skill directory or a + * single config file like `~/.claude/settings.json` — any user-scope match + * that does not belong to that target is a cross-scan leak and must be + * dropped. * - * When the scan target lives outside the home directory (for example a - * project root in a workspace), user-scope matches are accepted as legitimate - * host-wide context for that scan. + * Three cases: + * - `scanTarget` is a directory inside `homeDir`: only keep candidates inside + * that directory (existing PR #53 behavior). + * - `scanTarget` is a file inside `homeDir`: only keep candidates that resolve + * to that exact file. "Inside" semantics do not apply to files, so the + * previous check let every sibling through. + * - `scanTarget` lives outside the home directory (or cannot be stat'd, + * e.g. a URL or a staged path that has been cleaned up): user-scope + * matches are accepted as legitimate host-wide context. */ function shouldKeepUserScopeCandidate( scanTarget: string, homeDir: string, candidatePath: string, ): boolean { - if (isPathInside(homeDir, scanTarget)) { + if (!isPathInside(homeDir, scanTarget)) { + return true; + } + + // Follow symlinks the same way the rest of the scan code does (walker, + // wildcard-base check, `isRegularFile`): `statSync` resolves them. If the + // target cannot be stat'd (missing / permission denied / URL that was never + // a local path), fall through to the pre-PR-#53 outside-home behavior so + // we do not over-filter project-scope scans on unusual inputs. + let targetStat; + try { + targetStat = statSync(scanTarget); + } catch { + return true; + } + + if (targetStat.isFile()) { + // Nothing is "inside" a file. The only user-scope candidate that can + // legitimately belong to a file-target scan is the file itself. + return resolve(candidatePath) === resolve(scanTarget); + } + + if (targetStat.isDirectory()) { return isPathInside(scanTarget, candidatePath); } + + // Sockets, devices, etc. — behave like the outside-home case. return true; } diff --git a/tests/layer2/cross-scan-attribution.test.ts b/tests/layer2/cross-scan-attribution.test.ts index 50f1351..24136dd 100644 --- a/tests/layer2/cross-scan-attribution.test.ts +++ b/tests/layer2/cross-scan-attribution.test.ts @@ -155,4 +155,46 @@ describe("cross-scan attribution — Layer 2 hidden-unicode rule", () => { ), ).toBe(true); }); + + it("engine-level: file-target scan drops user-scope siblings", async () => { + // Covers the case where runScanEngine is called directly with a file + // target inside homeDir (library/embedded callers). The scope filter + // in `shouldKeepUserScopeCandidate` rejects every candidate that is + // not the target file itself. + // + // NB: the CLI stages file targets into a temp dir before calling the + // engine — see the "CLI-level" test below for that path. + const home = mkdtempSync(join(tmpdir(), "codegate-cross-scan-file-home-")); + + // Sibling: a skill with hidden Unicode under home. + mkdirSync(join(home, ".agents", "skills", "bar"), { recursive: true }); + writeFileSync( + join(home, ".agents", "skills", "bar", "SKILL.md"), + "Sibling skill​ with hidden zero-width space.\n", + "utf8", + ); + + // Target dir wrapping a single config file — simulates a consumer that + // has already placed the file in a dedicated dir (runScanEngine + // rejects bare files, hence the wrapper). + const targetDir = join(home, ".claude"); + mkdirSync(targetDir, { recursive: true }); + writeFileSync( + join(targetDir, "settings.json"), + `{\n "permissions": { "allow": [] }\n}\n`, + "utf8", + ); + + const report = await runScanEngine({ + version: "0.1.0", + scanTarget: targetDir, + config: BASE_CONFIG, + homeDir: home, + }); + + const leaked = report.findings.filter( + (f) => typeof f.file_path === "string" && f.file_path.includes(".agents/skills/bar"), + ); + expect(leaked).toEqual([]); + }); });