diff --git a/src/cli.ts b/src/cli.ts index 4dd7e35..4bc7e66 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -493,17 +493,23 @@ function addScanCommand(program: Command, version: string, deps: CliDeps): void ? true : (baseConfig.workflow_audits?.enabled ?? false), }, - // When the target was a single local file that got staged into a - // temp dir (explicitCandidates set), walking the full user-scope - // tree is off-target: the user asked to scan one file, not their - // whole home. Leaving user-scope on here let sibling findings - // (e.g. `~/.agents/skills/*/SKILL.md`) leak into single-file - // scans of configs like `.claude/settings.json`. Explicit opt-in - // via `--include-user-scope` still forces it on. + // When the raw input was a single local file (now staged into a + // temp dir), walking the full user-scope tree is off-target — the + // user asked to scan one file, not their whole home. Without + // this guard, sibling findings (e.g. `~/.agents/skills/*/SKILL.md`) + // leak into scans of files like `.claude/settings.json` or + // `.idea/workspace.xml`. + // + // Earlier we gated on `explicitCandidates.length > 0`, but that + // falsely passed for files whose extension is not in the + // text-like format list (XML, binary-ish configs, etc.) — those + // produce zero explicit candidates and the guard never fired. + // Using `stagedFromLocalFile` is the reliable signal. + // Explicit opt-in via `--include-user-scope` still forces it on. scan_user_scope: options.includeUserScope === true ? true - : resolvedTarget.explicitCandidates && resolvedTarget.explicitCandidates.length > 0 + : resolvedTarget.stagedFromLocalFile === true ? false : (baseConfig.scan_user_scope ?? false), }; diff --git a/src/scan-target/staging.ts b/src/scan-target/staging.ts index 685b333..eaf1acd 100644 --- a/src/scan-target/staging.ts +++ b/src/scan-target/staging.ts @@ -121,6 +121,7 @@ export function stageLocalFile(absolutePath: string): ResolvedScanTarget { scanTarget: tempRoot, displayTarget: absolutePath, explicitCandidates: collectExplicitCandidates(tempRoot), + stagedFromLocalFile: true, cleanup: () => cleanupTempDir(tempRoot), }; } @@ -134,6 +135,7 @@ export function stageLocalFile(absolutePath: string): ResolvedScanTarget { scanTarget: tempRoot, displayTarget: absolutePath, explicitCandidates: collectExplicitCandidates(tempRoot), + stagedFromLocalFile: true, cleanup: () => cleanupTempDir(tempRoot), }; } diff --git a/src/scan-target/types.ts b/src/scan-target/types.ts index 0d9dbda..7912d96 100644 --- a/src/scan-target/types.ts +++ b/src/scan-target/types.ts @@ -12,6 +12,14 @@ export interface ResolvedScanTarget { scanTarget: string; displayTarget: string; explicitCandidates?: ExplicitScanCandidate[]; + /** + * `true` when the raw input was a local file that got staged into a + * temp directory. This is the signal the CLI uses to disable the + * user-scope walk, regardless of whether `explicitCandidates` could be + * inferred for the file (an XML / binary / unrecognised extension + * would still benefit from the scope guard). + */ + stagedFromLocalFile?: boolean; cleanup?: () => Promise | void; } diff --git a/tests/scan-target.test.ts b/tests/scan-target.test.ts index 90f0322..b85d8dc 100644 --- a/tests/scan-target.test.ts +++ b/tests/scan-target.test.ts @@ -1,8 +1,9 @@ -import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; import { join } from "node:path"; import { afterEach, describe, expect, it, vi } from "vitest"; import { createScanDiscoveryContext } from "../src/scan"; -import { cloneGitRepo } from "../src/scan-target/staging"; +import { cloneGitRepo, stageLocalFile } from "../src/scan-target/staging"; const { cloneMock } = vi.hoisted(() => ({ cloneMock: vi.fn((_: string, args: string[]) => { @@ -282,3 +283,40 @@ describe("scan target resolver", () => { expect(existsSync(destination ?? "")).toBe(false); }); }); + +describe("stageLocalFile — stagedFromLocalFile flag", () => { + // Regression: PR #54 gated the CLI's user-scope guard on + // `explicitCandidates.length > 0`. For file types not in + // `inferTextLikeFormat` (e.g. `.xml`, `.idea/workspace.xml`), that + // list is empty and the guard never fired → sibling findings leaked + // into single-file scans. `stagedFromLocalFile` is the reliable + // signal regardless of what the file contains. + it("flags staged local files even when the extension is unsupported for explicit-candidate inference", () => { + const home = mkdtempSync(join(tmpdir(), "codegate-stage-home-")); + const xmlPath = join(home, ".idea", "workspace.xml"); + mkdirSync(join(home, ".idea"), { recursive: true }); + writeFileSync(xmlPath, `\n\n`, "utf8"); + + const resolved = stageLocalFile(xmlPath); + + expect(resolved.stagedFromLocalFile).toBe(true); + expect(resolved.displayTarget).toBe(xmlPath); + expect(resolved.scanTarget).not.toBe(xmlPath); + // .xml is not in the text-like format list; this used to silently + // return [] and defeat PR #54's scope guard. + expect(resolved.explicitCandidates ?? []).toEqual([]); + }); + + it("flags staged local files for known formats too (no regression in the supported path)", () => { + const home = mkdtempSync(join(tmpdir(), "codegate-stage-home-json-")); + const jsonPath = join(home, ".claude", "settings.json"); + mkdirSync(join(home, ".claude"), { recursive: true }); + writeFileSync(jsonPath, `{"hooks": {}}\n`, "utf8"); + + const resolved = stageLocalFile(jsonPath); + + expect(resolved.stagedFromLocalFile).toBe(true); + expect(resolved.displayTarget).toBe(jsonPath); + expect(resolved.explicitCandidates?.length ?? 0).toBeGreaterThan(0); + }); +});