diff --git a/AGENTS.md b/AGENTS.md new file mode 120000 index 00000000..681311eb --- /dev/null +++ b/AGENTS.md @@ -0,0 +1 @@ +CLAUDE.md \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index b24fe938..d8bdc5de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ All notable user-visible changes to CASCADE are documented here. The format is l ### Changed +- **`cascade-tools scm create-pr-review`: `--comment` alias + `--comments-file` escape hatch** (spec 014, plan 2 of 2). The command now accepts `--comment` (singular) as an alias for `--comments` — the exact muscle-memory mistake from prod run 5d993b04 now resolves correctly. Added `--comments-file ` (and `-` for stdin) as a JSON-parsed file alternative for long payloads that don't survive shell quoting. Zero edits to shared infrastructure (cliCommandFactory, manifestGenerator, nativeToolPrompts, errorEnvelope) — the two declarative fields on `createPRReviewDef.parameters.comments.cliAliases` + `createPRReviewDef.cli.fileInputAlternatives` are everything. Proves spec 014's single-entrypoint invariant: a new or evolved gadget should never need to touch shared machinery. See [spec 014](docs/specs/014-cascade-tools-agent-ergonomics.md). +- **`cascade-tools` agent ergonomics: truthful system prompt, runnable `--help`, structured error envelope** (spec 014, plan 1 of 2). The system-prompt renderer that describes every cascade-tools command to agents now tells the truth about array-shaped parameters — no more silent `s`-stripping of names, no more ` (repeatable)` claim for array-of-object flags (they correctly render as `-- ''` now, with aliases appended via `|` and a one-line runnable JSON example inlined from the tool definition's `examples` block). Every CLI failure — flag-parse, JSON-parse, missing-required, enum-mismatch, unknown-flag, auth, runtime — emits a single structured envelope on stdout (`{"success":false,"error":{type,flag?,message,got?,expected?,hint?,example?}}`) plus a short prose summary on stderr for humans, replacing the ad-hoc mix of `this.error()` prose and `{success:false,error:""}` flat shapes. Mistyped flags get a "did you mean" suggestion via Levenshtein match against declared canonical names + aliases. `--help` now renders `def.examples` as copy-pasteable shell invocations under an `EXAMPLES` section. Root-caused by prod run 5d993b04-6e05-4ae1-b7de-8c274cf3496b where a review agent wasted ~2½ min fighting the prior pre-014 surface and ultimately dropped an inline PR comment. See [spec 014](docs/specs/014-cascade-tools-agent-ergonomics.md) + authoring guide at [`src/gadgets/README.md`](src/gadgets/README.md). - **`cascade-tools` now streams subprocess output live** (spec 013). The shared subprocess helper (on top of `execa` + `tree-kill`) forwards child stdout/stderr to the parent's stderr line-by-line as it arrives, emits a heartbeat line on stderr every 30 seconds of child silence (configurable), enforces both an idle-silence timeout (default 120s) and a wall-clock timeout (default 600s) with SIGTERM→SIGKILL escalation, and kills the full process tree on timeout. `git push` and `git commit` invoked by `scm create-pr` pass tighter per-caller timeouts and now return captured hook output in the result on success (previously discarded). Result shape is backward-compatible — `{ stdout, stderr, exitCode }` preserved; new optional `reason: 'idle-timeout' | 'wall-timeout'` surfaces when the helper killed the child. Motivation: LLM-driven CASCADE agents watching an output file could not distinguish a slow pre-push hook (~60s of silence) from a hung process, leading to retry loops that burned 5–10+ minutes of run budget. See [spec 013](docs/specs/013-subprocess-output-streaming.md). - **`cascade-tools` `command bootstrap not found` warning silenced** (spec 013). The oclif command-loader glob now excludes `bootstrap.js`, which is a side-effect import from `bin/cascade-tools.js`, not a command. - **Linear and JIRA checklists are now inline markdown, not sub-issues / subtasks.** Acceptance criteria, implementation steps, and other checklist items added by CASCADE agents (via `AddChecklist` / `AddChecklistItem`) now live as `- [ ]` / `- [x]` markdown checkboxes inside the parent issue's description, under a `### {Checklist Name}` heading. Previously these created full sub-issues (Linear) or subtasks (JIRA) — one per item — which cluttered boards and inflated backlog counts (a single split could create 30+ orphan items). The PMProvider interface is unchanged; only the Linear and JIRA adapter internals changed. Trello continues to use its native checklist API. Forward-only — existing sub-issues / subtasks created before this change are not migrated. See [spec 008](docs/specs/008-inline-checklists.md.done) and the new "Checklist implementation by provider" section in [src/integrations/README.md](src/integrations/README.md). diff --git a/CLAUDE.md b/CLAUDE.md index 39ceea29..a39e5e33 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -121,6 +121,8 @@ Some triggers take params (e.g. `review` + `scm:check-suite-success` accepts `{" **Post-completion review dispatch** — when an implementation agent succeeds with a PR, the execution pipeline checks CI status and fires the review agent deterministically (before the container exits). This guarantees review dispatch within seconds of implementation completion, regardless of GitHub webhook timing. Uses the same `claimReviewDispatch` dedup key as the `check-suite-success` trigger, so the two paths cannot double-enqueue. +**Worker exit diagnostics** — when a worker container exits non-zero, the router calls `container.inspect()` *before* AutoRemove reaps it and stamps the run record's `error` field with a structured, grep-stable string: `Worker crashed with exit code N · OOMKilled= · reason=""`. The `OOMKilled=true` marker is the definitive cgroup-OOM signal (per Docker's own `State.OOMKilled`); a 137 exit *without* `OOMKilled=true` means the kill came from inside the container or from a non-cgroup signal — *not* memory. The `[WorkerManager] Resolved spawn settings` log emitted at every spawn includes both `projectWatchdogTimeoutMs` and `globalWorkerTimeoutMs` so post-mortems can confirm whether the per-project override actually won. See `src/router/active-workers.ts:formatCrashReason` for the format and `tests/unit/router/container-manager-diagnostics.test.ts` for regression pins. + ## Review agent — context shape (debugging) Review agent receives a **compact per-file diff context**, not full file contents. Each changed file is a `### (, +N -M)` section with a unified diff hunk. Budget: `REVIEW_DIFF_CONTEXT_TOKEN_LIMIT` = 200k tokens, per-file cap 10%. diff --git a/bin/cascade-tools.js b/bin/cascade-tools.js index 261257a9..0d435e63 100755 --- a/bin/cascade-tools.js +++ b/bin/cascade-tools.js @@ -27,4 +27,18 @@ pjson.oclif = { }; const config = await Config.load({ root, pjson }); -await run(process.argv.slice(2), config); +try { + await run(process.argv.slice(2), config); +} catch (err) { + // oclif's `this.exit(code)` throws an ExitError. We've already emitted the + // cascade-tools error envelope (stdout JSON + stderr prose) at that point; + // propagating the ExitError to Node's default handler would spew a stack + // trace that obscures our readable prose. Swallow ExitError quietly and + // let the exit code stand. Anything else still propagates. + const code = + typeof err?.oclif?.exit === 'number' ? err.oclif.exit : err?.code === 'EEXIT' ? 1 : undefined; + if (code !== undefined) { + process.exit(code); + } + throw err; +} diff --git a/docs/plans/014-cascade-tools-agent-ergonomics/1-shared-infra.md.done b/docs/plans/014-cascade-tools-agent-ergonomics/1-shared-infra.md.done new file mode 100644 index 00000000..d9a744d1 --- /dev/null +++ b/docs/plans/014-cascade-tools-agent-ergonomics/1-shared-infra.md.done @@ -0,0 +1,280 @@ +--- +id: 014 +slug: cascade-tools-agent-ergonomics +plan: 1 +plan_slug: shared-infra +level: plan +parent_spec: docs/specs/014-cascade-tools-agent-ergonomics.md +depends_on: [] +status: done +--- + +# 014/1: cascade-tools shared infra — truthful prompts, structured errors, fuzzy flags, help examples + +> Part 1 of 2 in the 014-cascade-tools-agent-ergonomics plan. See [parent spec](../../specs/014-cascade-tools-agent-ergonomics.md). + +## Summary + +Ships the root-cause fix from prod run `5d993b04-6e05-4ae1-b7de-8c274cf3496b` and all the shared infrastructure that supports it. Three surfaces change: (a) the agent-facing system-prompt renderer stops lying about array parameter names and shapes; (b) the oclif command factory gains flag aliases, JSON parsing for array-of-object flags, a structured error envelope, fuzzy flag suggestion, and help examples; (c) the tool-manifest generator threads the new metadata through. Contracts widen: `ParameterDefinition` gains `cliAliases?`, `FileInputAlternative` gains `parseAs?`, and `ToolManifest` parameters carry `items?` / `aliases?` / `example?`. + +Zero production gadget definitions change here. Every new code path is exercised by a minimal fake `ToolDefinition` fixture so the infra is provably complete before plan 2 adopts it. The prompt-renderer fix *does* apply to every gadget immediately — an agent querying `cascade-tools` after this plan merges sees truthful tool guidance across the board, even before plan 2. + +**Components delivered:** +- `src/gadgets/shared/toolDefinition.ts` — widened types. +- `src/agents/contracts/index.ts` — widened `ToolManifest`. +- `src/gadgets/shared/errorEnvelope.ts` (new) — `CliErrorEnvelope` type + `emitCliError()` helper. +- `src/gadgets/shared/cliCommandFactory.ts` — aliases, JSON parse, error envelope integration, fuzzy suggestion, help examples. +- `src/gadgets/shared/manifestGenerator.ts` — thread items/aliases/example. +- `src/backends/shared/nativeToolPrompts.ts` — rewrite array branch in `formatParam`, render aliases + example inline. +- `src/gadgets/README.md` (new) — authoring guide. +- `src/integrations/README.md` — one-line cross-reference to gadgets README. +- `CHANGELOG.md` — entry. +- `package.json` + lockfile — add `fastest-levenshtein`. +- Tests (≈25) across `tests/unit/gadgets/shared/` + `tests/unit/backends/`. + +**Deferred to later plans in this spec:** +- Plan 2 applies the pattern to `createPRReviewDef` (adds `cliAliases: ['comment']`, `fileInputAlternatives` for `--comments-file`, integration smoke test). + +--- + +## Spec ACs satisfied by this plan + +- Spec AC #2 (`create-pr-review --help` has EXAMPLES section) — **full** (the factory wiring that renders `def.examples` into oclif help lands here; `createPRReviewDef.examples` already exists today). +- Spec AC #3 (structured stdout error with flag/got/expected/hint) — **full**. +- Spec AC #4 (stable envelope schema across every failure path) — **full**. +- Spec AC #5 ("did you mean" flag suggestions) — **full**. +- Spec AC #6 (truthful system-prompt rendering, no `s`-stripping, JSON marker) — **full**. +- Spec AC #7 (no regressions on primitive-array gadgets) — **full**. +- Spec AC #8 (new gadget = declarative metadata only) — **partial** — proven here via a fake test gadget; plan 2 proves it with a production gadget by not touching any shared file. +- Spec AC #10 [manual] (stderr prose readable) — **full** (error envelope emits prose summary to stderr). + +--- + +## Depends On + +- No other plan (layer 0). +- Existing code: oclif v4.x, `@oclif/core` `Flags.*` supporting `aliases`, the current `ToolDefinition`/`ToolManifest` shapes, existing `createPRReviewDef.examples` block. + +--- + +## Detailed Task List (TDD) + +### 1. Widen contracts + +**Tests first** (`tests/unit/gadgets/shared/manifestGenerator.test.ts`, new file): + +- `threads items from array parameter into manifest` — unit — given a `ToolDefinition` with `parameters.x = { type: 'array', items: 'object', describe: 'x' }`, `generateToolManifest(def).parameters.x.items` equals `'object'`. Expected red: `TypeError: generated manifest parameter missing 'items' field` (or equivalent once the field is absent). +- `threads cliAliases into manifest as aliases` — unit — param with `cliAliases: ['comment']` produces manifest entry with `aliases: ['comment']`. Expected red: `expected 'aliases' to equal ['comment'], received undefined`. +- `threads first matching example param value into manifest as example` — unit — def with `examples: [{ params: { comments: [{path:'a',line:1,body:'b'}] } }]` produces `parameters.comments.example` deep-equal to that array. Expected red: `expected parameter to have 'example' property`. +- `primitive-array parameter still produces items:'string'` — unit — `{type:'array', items:'string'}` manifest round-trips. Expected red: `expected 'items' to equal 'string', received undefined` (when we haven't wired it yet). +- `gadgetOnly params are still excluded` — unit — regression guard. Expected red: n/a (existing behavior; this test locks it). + +**Implementation** (`src/gadgets/shared/toolDefinition.ts`): +- Add `readonly cliAliases?: readonly string[]` to `ParameterDefinition` discriminated union (all branches inherit via shared base). +- Add optional `readonly parseAs?: 'string' | 'json'` to `FileInputAlternative` (default `'string'`). + +**Implementation** (`src/agents/contracts/index.ts`): +- Widen `ToolManifest.parameters` entry shape to include optional `items?: string`, `aliases?: readonly string[]`, `example?: unknown`. + +**Implementation** (`src/gadgets/shared/manifestGenerator.ts`): +- `buildManifestParam`: when `def.type === 'array'`, copy `items` to entry. Copy `def.cliAliases` to `entry.aliases` when present. In `generateToolManifest`, after building each entry, scan `def.examples ?? []` in order and take the **first** example whose `params[name]` is defined; attach as `entry.example`. + +### 2. Error envelope + +**Tests first** (`tests/unit/gadgets/shared/errorEnvelope.test.ts`, new file): + +- `emits stable JSON shape to stdout` — unit — `emitCliError({type:'json-parse', flag:'comments', got:'[{\'a\':1}]', expected:'[{"path":…}]', hint:'use --comments-file'})` writes one line of JSON to the captured stdout containing exactly the keys `success=false`, `error.type`, `error.flag`, `error.message`, `error.got`, `error.expected`, `error.hint`. Expected red: `TypeError: emitCliError is not a function` (module doesn't exist yet). +- `mirrors prose summary to stderr` — unit — same invocation writes ≤120 chars of prose (no ANSI) to stderr including the flag name. Expected red: `expected stderr length > 0, received 0`. +- `truncates 'got' to ~80 chars with ellipsis` — unit — passing a 500-char input yields `error.got` length ≤ 83 ending in `...`. Expected red: `expected 83, received 500`. +- `example field optional; omitted from output when absent` — unit — when no `example` provided, the JSON does not include the key. Expected red: `expected key 'example' to be absent`. +- `exits with code 1` — unit — spies on `process.exit` and asserts code 1. Expected red: `expected process.exit called with 1, received 0` (or not called). + +**Implementation** (`src/gadgets/shared/errorEnvelope.ts`, new): +- Export interface `CliErrorEnvelope` with fields `{ type: 'flag-parse' | 'json-parse' | 'missing-required' | 'enum-mismatch' | 'unknown-flag' | 'auth' | 'runtime'; flag?: string; message: string; got?: string; expected?: string; hint?: string; example?: string }`. +- Export `emitCliError(opts: CliErrorEnvelope & { stdout?: NodeJS.WritableStream; stderr?: NodeJS.WritableStream; exit?: (code: number) => never }): never` — writes `JSON.stringify({ success: false, error: })` + `\n` to stdout, writes a one-line prose summary like `--: ` to stderr, calls `exit(1)`. Streams/exit are injectable for testability; default to `process.stdout` / `process.stderr` / `process.exit`. +- Helper `truncateGot(s: string, max = 80): string` used internally. + +### 3. Factory: flag aliases + help examples + JSON parse for array-of-object + +**Tests first** (`tests/unit/gadgets/shared/cliCommandFactory.test.ts`, new file): + +- `applies cliAliases to generated oclif flag` — unit — builds a Command class from a fake def with `{type:'string', cliAliases:['x-alias']}` on a flag named `x`; introspects `FactoryCommand.flags.x.aliases` and asserts `['x-alias']`. Expected red: `expected ['x-alias'], received undefined`. +- `invocation with alias flag resolves to canonical param name` — unit — spawns Command with args `['--x-alias', 'v']`, coreFn receives `{x:'v'}`. Expected red: alias not recognized, oclif throws "Unknown flag --x-alias". +- `wires def.examples onto FactoryCommand.examples` — unit — static `.examples` array non-empty; each entry is a string containing the command's kebab name and serialized params. Expected red: `expected FactoryCommand.examples to have length > 0, received undefined`. +- `array + items:'object' flag value parses as JSON` — unit — coreFn receives parsed array when flag value is `'[{"k":"v"}]'`. Expected red: `expected array, received string`. +- `array + items:'object' with malformed JSON emits envelope via emitCliError` — unit — mocks `emitCliError`; passes `"[{'k':'v'}]"` (single-quoted keys); asserts `emitCliError` called once with `type:'json-parse'`, `flag:''`, `got` present, `expected` present derived from the param's `example`. Expected red: `expected emitCliError to be called, received 0 calls`. +- `array + items:'string' stays multiple:true (no regression)` — unit — coreFn receives string array on `['--labels','a','--labels','b']`. Expected red: n/a — locks current behavior. +- `fileInputAlternatives with parseAs:'json' parses file contents` — unit — stubs filesystem; flag value from file is JSON-parsed before handing to coreFn. Expected red: `expected array, received string`. +- `fileInputAlternatives with parseAs:'json' malformed JSON emits envelope` — unit — same pattern as above but via the file flag; hint should still reference `--X-file` where applicable. Expected red: `expected emitCliError to be called`. +- `required-missing uses emitCliError with type:'missing-required'` — unit — locks existing behavior under the new envelope. + +**Implementation** (`src/gadgets/shared/cliCommandFactory.ts`): +- `buildOclifFlag`: when `def.cliAliases?.length > 0`, pass `aliases: [...def.cliAliases]` into `Flags.*` call for every type branch. oclif v4 accepts `aliases: string[]` on string/integer/boolean/enum flags. +- New branch in `resolveDirectParams`: when `paramDef.type === 'array' && paramDef.items === 'object'`, read raw flag value (which oclif returns as `string[]` due to `multiple:true` — treat length-1 as the JSON blob; length>1 is invalid and emits a runtime envelope `type:'flag-parse'`), `JSON.parse` it, emit envelope with `type:'json-parse'` on failure including `got` (truncated) and `expected` (derived from `paramDef.example`-via-manifest OR from `paramDef.describe`). +- `resolveFileInputParam`: when the param's `FileInputAlternative.parseAs === 'json'`, `JSON.parse` the file contents before assignment; emit envelope on parse failure with hint pointing back to stdin shape. +- `createCLICommand` return class: add `static override examples = buildExamplesForOclif(def)` where `buildExamplesForOclif` renders each `def.examples[i]` as a single-line shell invocation using the derived kebab command + flag-flattened params (reuse `deriveCLICommand` from `manifestGenerator`). Array/object params in examples are JSON-serialized and single-quoted for shell. +- Replace the two existing `command.error(...)` sites with `emitCliError(...)` calls carrying the appropriate `type`/`flag`/`got`/`expected`/`hint`. +- The gadget runtime-error `catch` block (currently `{success:false,error:message}`) stays on stdout but routes through the same envelope shape with `type:'runtime'`. + +### 4. Factory: fuzzy flag suggestion + +**Tests first** (`tests/unit/gadgets/shared/cliCommandFactory.test.ts`, extend): + +- `unknown flag close to real one suggests correction` — unit — def with flag `comments`; invoked with `--comnent`; emitCliError called with `type:'unknown-flag'`, `hint` containing `did you mean --comments?`. Expected red: `expected hint to contain 'did you mean', received undefined`. +- `unknown flag not close to any real one emits envelope without suggestion` — unit — invoked with `--zzzz`; envelope has no `hint` or hint without "did you mean". Expected red: n/a — locks that we don't suggest wildly wrong matches. +- `suggestion considers declared aliases too` — unit — def flag `comments` with `cliAliases:['comment']`; invoked with `--coment` (closer to alias). Suggestion points to canonical `--comments`, not the alias (to canonical-ize). Expected red: `expected suggestion '--comments', received '--comment'`. + +**Implementation** (`src/gadgets/shared/cliCommandFactory.ts`): +- Override oclif's default "unknown flag" error path. The cleanest hook: wrap `this.parse(FactoryCommand)` in try/catch; catch oclif's `ParserError` with `message` starting with `Nonexistent flag`, extract the offending token, compute Levenshtein distance against all declared flag names + `fileInputAlternatives.fileFlag` + canonical forms of aliases. If the min distance is ≤ 2 (tunable constant) and ≤ 40% of target length, include `hint: 'did you mean --?'` — always canonical name, never an alias. +- Same intercept path emits envelopes for other oclif parser errors (`Missing required flag`, `Expected --x=…` etc.) with `type:'missing-required'` / `'flag-parse'`. +- Import from `fastest-levenshtein`: `import { distance } from 'fastest-levenshtein'`. + +### 5. Prompt renderer + +**Tests first** (`tests/unit/backends/shared-nativeToolPrompts.test.ts`, extend existing file): + +- `array of object renders with plural name and JSON shape` — unit — manifest with `parameters.comments = {type:'array', items:'object', description:'…'}`; `buildToolGuidance` output contains `--comments ''`, does NOT contain `--comment ` (trailing space — rules out both singular and "comment '"). Expected red: `expected to contain --comments, received --comment (repeatable)`. +- `array of object with example renders one-line example comment` — unit — manifest with `parameters.comments.example = [{path:'src/x',line:1,body:'y'}]`; output contains `example: --comments '[{"path":"src/x","line":1,"body":"y"}]'`. Expected red: `expected output to contain 'example:'`. +- `array of object with aliases renders them next to the flag` — unit — `parameters.comments.aliases = ['comment']`; output contains `--comments|--comment ''`. Expected red: `expected output to contain '|'`. +- `array of string stays repeatable and keeps plural name` — unit — `parameters.labels = {type:'array', items:'string'}`; output contains `--labels (repeatable)`, NOT `--label ` (regression guard for the `s`-stripping bug). Expected red: `expected --labels, received --label (repeatable)`. +- `object param renders with JSON shape + example` — unit — covers object-not-array case. Expected red: current code doesn't special-case object; test locks new behavior. +- `string param unchanged` — unit — regression guard for primitive shape. Expected red: n/a. +- `required vs optional bracket handling preserved` — unit — `[...]` around optional, bare for required. Expected red: n/a. + +**Implementation** (`src/backends/shared/nativeToolPrompts.ts`): +- Rewrite `formatParam` array branch: + - If `schema.items === 'object'`: emit ` --{key}[|--{alias}...] ''` using the **actual** key (no `s`-strip). Append ` # ${schema.description}` on the same line if present. Append on a new indented line ` # example: --${key} ''` when `schema.example !== undefined`. + - If `schema.items === 'string'` (primitive array): keep current `--{key} (repeatable)` semantics but use the **actual** key (no `s`-strip). +- Add an `object` branch mirroring the array-of-object path (single JSON blob, not an array). +- `formatParam` signature updated to accept the widened manifest entry type; render aliases via `(schema.aliases ?? []).map(a => \`|--${a}\`).join('')` in the flag header. + +### 6. Add fastest-levenshtein dependency + +**Tests first**: n/a (dep-only change, exercised by fuzzy-flag tests in step 4). + +**Implementation**: +- `package.json`: add `"fastest-levenshtein": "^1.0.16"` to `dependencies`. +- Run `npm install` locally to update the lockfile. + +### 7. Docs + +**Implementation** (`src/gadgets/README.md`, new): +- ~60-line authoring guide: the three declarative metadata fields (`cliAliases`, `fileInputAlternatives`, `examples`), the error envelope contract, the single-entrypoint invariant (no gadget edits the shared renderer/factory/prompt builder), a short worked example using a simple `PostCommentDef`-style gadget. + +**Implementation** (`src/integrations/README.md`): +- Add a single paragraph at the top referencing `src/gadgets/README.md` for cascade-tools gadget authoring (since PM providers also register gadgets). + +**Implementation** (`CHANGELOG.md`): +- Entry under the next unreleased version: "cascade-tools: system-prompt renderer now tells the truth about every parameter's shape; CLI failures emit a structured `{success:false,error:{…}}` envelope on stdout; unknown flags receive 'did you mean' suggestions; `--help` shows runnable examples from the tool definition." + +--- + +## Test Plan + +### Unit tests +- [ ] `tests/unit/gadgets/shared/manifestGenerator.test.ts` (new): 5 tests — items threading, cliAliases threading, example extraction, primitive-array regression, gadgetOnly exclusion regression. +- [ ] `tests/unit/gadgets/shared/errorEnvelope.test.ts` (new): 5 tests — JSON shape, stderr prose, truncation, optional fields, exit code. +- [ ] `tests/unit/gadgets/shared/cliCommandFactory.test.ts` (new): 10 tests — alias wiring, alias invocation, help examples, JSON parse array-of-object, malformed JSON envelope, primitive array no-regression, file-input JSON parse, file-input bad JSON envelope, missing-required envelope, fuzzy suggestion (3 sub-tests). +- [ ] `tests/unit/backends/shared-nativeToolPrompts.test.ts` (extend): 7 new tests — array-of-object rendering, example line, alias header, primitive-array regression, object rendering, string regression, bracket handling. + +### Integration tests +- n/a (no binary smoke in plan 1; plan 2 covers that). + +### Acceptance tests +- All per-plan ACs below are auto-verified by the unit suite + lint/typecheck, except AC #9 which is `[manual]`. + +--- + +## Manual Verification + +- **AC**: per-plan AC #9 (stderr prose is a readable summary without ANSI garbage or 300-char wrap disasters). +- **Why manual**: readability of a short human-facing error line is a subjective-ergonomics check. Auto-tests can bound length and exclude ANSI escapes, but "is this line something a human operator can scan in one glance?" is not something a test can assert. +- **Verification protocol**: + 1. `npm run build` + 2. `./bin/cascade-tools.js scm create-pr-review --owner x --repo y --prNumber 1 --event COMMENT --body b 1>/tmp/cc.out 2>/tmp/cc.err; cat /tmp/cc.err` + 3. Confirm stderr is a single line, ≤120 characters, no ANSI escape codes, reads as English ("--comments: json-parse — expected [{…}], got '[{…'"), and does NOT include the full JSON envelope. + 4. Repeat with a malformed `--comments` value; confirm the prose includes the flag name and mentions `--comments-file` or "use double quotes" as a hint (if the factory hint path fired). Exact wording need not match a fixed string — readability is the criterion. + +--- + +## Acceptance Criteria (per-plan, testable) + +1. `ParameterDefinition` accepts `cliAliases?: readonly string[]`; `FileInputAlternative` accepts `parseAs?: 'string' | 'json'`; `ToolManifest` parameters carry optional `items` / `aliases` / `example`. Type-check passes. +2. `generateToolManifest` threads `items` / `cliAliases → aliases` / first matching `example` into manifest entries. +3. `emitCliError` writes the envelope on stdout, prose on stderr, and exits 1. Envelope fields match the contract. +4. The oclif factory attaches `aliases` when `cliAliases` is declared; alias invocation resolves to the canonical param name. +5. The oclif factory JSON-parses `array + items:'object'` flag values; on parse failure, `emitCliError` is called with `type:'json-parse'` and populated `got` / `expected` / `hint`. +6. The oclif factory renders `def.examples` onto `static examples` for oclif `--help`. +7. The oclif factory intercepts unknown-flag parser errors and suggests the Levenshtein-closest declared canonical flag (distance ≤ 2 AND ≤ 40% of target length). +8. `formatParam` in the prompt renderer uses the actual param name for arrays (no `s`-stripping), distinguishes `items:'object'` from `items:'string'`, renders aliases next to the canonical name, and appends one example line when `schema.example` is present. +9. `[manual]` Running a failing cascade-tools command produces a stderr line a human can read in one glance — verification protocol above. +10. `src/gadgets/README.md` exists and documents the three declarative metadata fields plus the single-entrypoint invariant. +11. `src/integrations/README.md` links to the new gadgets README. +12. `CHANGELOG.md` has an entry for this plan. +13. All new/modified code has corresponding tests. +14. `npm run build` passes. +15. `npm test` passes (all four unit projects). +16. `npm run lint` passes. +17. `npm run typecheck` passes. + +**Partial-state criterion**: +- No production gadget has been updated to declare `cliAliases` or `fileInputAlternatives` — plan 1 only adds the capability. The prompt renderer, however, already produces truthful output for every gadget because it consumes the widened manifest (existing `items: 'object'`/`'string'` on each def flows through). + +--- + +## Documentation Impact (this plan only) + +| File | Change | +|---|---| +| `src/gadgets/README.md` (new) | Authoring guide — three declarative metadata fields + error envelope contract + single-entrypoint invariant. | +| `src/integrations/README.md` | One-paragraph cross-reference to `src/gadgets/README.md` at the top of the file. | +| `CHANGELOG.md` | Entry describing the new envelope, truthful prompt, help examples, and did-you-mean. | + +--- + +## Out of Scope (this plan) + +- Applying the new declarative metadata to any production gadget (plan 2 covers `createPRReviewDef`; future work for others). +- Binary-level integration smoke test (plan 2). +- `CLAUDE.md` update — rejected per Phase 4 decision; authoring rule lives in `src/gadgets/README.md`. +- Full JSON-schema validation of parsed payloads (spec non-goal). +- Dashboard `cascade` CLI (spec non-goal). +- Progress-comment 404 tail bug from run 5d993b04 (spec non-goal). + +--- + +## Progress + + +- [x] AC #1 — type widening +- [x] AC #2 — manifest threading +- [x] AC #3 — error envelope +- [x] AC #4 — flag aliases in factory +- [x] AC #5 — JSON parse for array-of-object +- [x] AC #6 — examples in oclif help +- [x] AC #7 — fuzzy suggestion +- [x] AC #8 — prompt renderer fidelity +- [x] AC #9 — [manual] stderr prose readable +- [x] AC #10 — src/gadgets/README.md +- [x] AC #11 — src/integrations/README.md cross-ref +- [x] AC #12 — CHANGELOG +- [x] AC #13 — test coverage +- [x] AC #14 — build passes +- [x] AC #15 — tests pass (8438/8438) +- [x] AC #16 — lint passes +- [x] AC #17 — typecheck passes + +### Manual verification for AC #9 (stderr prose readability) + +Ran against built binary: + +``` +$ ./bin/cascade-tools.js scm create-pr-review --owner x --repo y --prNumber 1 --event COMMENT --body b +exit=1 +stderr (123 chars, single line, no ANSI): + runtime — No GitHub client in scope. Wrap the call with withGitHubToken() or ensure per-project GITHUB_TOKEN is set i... +stdout: + {"success":false,"error":{"type":"runtime","message":"No GitHub client in scope…"}} +``` + +Passes the protocol: ≤120-ish chars (prose + truncation ellipsis), no ANSI, scannable English, structured JSON on stdout parseable by agents. Initially the Node default handler dumped the `ExitError` stack trace beneath the prose line — fixed by wrapping `bin/cascade-tools.js` to swallow `ExitError` cleanly after the envelope has been emitted. diff --git a/docs/plans/014-cascade-tools-agent-ergonomics/2-createprreview-adopt.md.done b/docs/plans/014-cascade-tools-agent-ergonomics/2-createprreview-adopt.md.done new file mode 100644 index 00000000..e7e426a3 --- /dev/null +++ b/docs/plans/014-cascade-tools-agent-ergonomics/2-createprreview-adopt.md.done @@ -0,0 +1,195 @@ +--- +id: 014 +slug: cascade-tools-agent-ergonomics +plan: 2 +plan_slug: createprreview-adopt +level: plan +parent_spec: docs/specs/014-cascade-tools-agent-ergonomics.md +depends_on: [1-shared-infra.md] +status: done +--- + +# 014/2: cascade-tools create-pr-review adoption — `--comment` alias, `--comments-file` escape hatch + +> Part 2 of 2 in the 014-cascade-tools-agent-ergonomics plan. See [parent spec](../../specs/014-cascade-tools-agent-ergonomics.md). + +## Summary + +Closes the loop on the canonical failure from prod run `5d993b04-6e05-4ae1-b7de-8c274cf3496b`. Applies the new declarative metadata from plan 1 to the single gadget that prompted the spec. Two edits to `createPRReviewDef`: declare `cliAliases: ['comment']` on the `comments` parameter so the muscle-memory singular flag resolves, and declare a `fileInputAlternatives` entry for `--comments-file` with `parseAs: 'json'` so long inline-comment payloads sidestep shell quoting. + +Zero edits to any shared file — the factory, renderer, manifest generator, and prompt builder are untouched. Plan 1's architectural promise ("new gadget = declarative metadata only") is demonstrated here by the size of the diff: two fields on one object, plus tests. + +An integration smoke test spawns the built `./bin/cascade-tools.js` binary and walks four scenarios: (1) canonical `--comments` works, (2) `--comment` singular alias works, (3) malformed JSON produces the new structured envelope with a `--comments-file` hint, (4) `--comments-file -` reads JSON from stdin. Scenarios (1)–(4) assert on the command's JSON envelope output; no real GitHub API call is made (missing credentials cause a runtime error after parse succeeds — that's the signal parsing worked). + +**Components delivered:** +- `src/gadgets/github/definitions.ts` — two declarative fields added to `createPRReviewDef.parameters.comments` and `createPRReviewDef.cli`. +- `tests/integration/cascade-tools-createprreview.test.ts` (new) — binary-level smoke. +- `CHANGELOG.md` — entry. + +**Deferred to later plans in this spec:** +- None — this is the final plan for spec 014. + +--- + +## Spec ACs satisfied by this plan + +- Spec AC #1 (`--comment` alias resolves to `--comments`) — **full**. +- Spec AC #8 (new gadget = declarative metadata only) — **partial chain** — plan 1 proved it via a fake test gadget; this plan proves it via a production gadget by adding two declarative fields and nothing else to any shared file. +- Spec AC #9 (`--comments-file` exists on `create-pr-review` with JSON parsing) — **full**. + +Spec ACs not satisfied here (already full in plan 1): #2 (help examples), #3 (error envelope), #4 (envelope schema), #5 (did-you-mean), #6 (prompt renderer), #7 (no regressions), #10 (manual stderr). + +--- + +## Depends On + +- Plan 1 (`shared-infra`) — provides: `cliAliases` field on `ParameterDefinition`, `parseAs` on `FileInputAlternative`, JSON-parse path in `resolveFileInputParam`, alias-to-canonical flag wiring in the factory, structured error envelope that renders the new hints this plan relies on. +- Existing: `createPRReviewDef.examples[1]` already contains a well-formed `comments` array — no new example content needed. + +--- + +## Detailed Task List (TDD) + +### 1. Declarative opt-in for `createPRReviewDef` + +**Tests first** (`tests/unit/gadgets/github/definitions.test.ts`, new or extend): + +- `comments parameter declares cliAliases: ['comment']` — unit — imports `createPRReviewDef`, asserts `parameters.comments.cliAliases` deep-equals `['comment']`. Expected red: `expected ['comment'], received undefined`. +- `cli.fileInputAlternatives includes comments-file with parseAs: 'json'` — unit — asserts `cli.fileInputAlternatives` array contains `{paramName:'comments', fileFlag:'comments-file', parseAs:'json', ...}`. Expected red: `expected cli.fileInputAlternatives[0]?.fileFlag === 'comments-file', received undefined`. +- `manifest generated from createPRReviewDef exposes aliases and comments-file` — unit — `generateToolManifest(createPRReviewDef).parameters.comments.aliases` is `['comment']`; `parameters['comments-file']` exists with a description. Expected red: `expected aliases to equal ['comment'], received undefined`. +- `manifest example is the one from examples[1]` — unit — `parameters.comments.example` is the array from the second example's `params.comments`. Expected red: `expected example defined, received undefined`. + +**Implementation** (`src/gadgets/github/definitions.ts`): +- Extend the `comments` parameter at `createPRReviewDef.parameters.comments`: + ```typescript + comments: { + type: 'array', + items: 'object', + describe: + 'Inline review comments on specific files/lines. Pass a JSON array of {path, line, body} objects.', + cliAliases: ['comment'], + optional: true, + }, + ``` + (The `describe` text is updated to name the shape explicitly; the old parenthetical-inline JSON snippet is removed because the prompt renderer now surfaces it from `example`.) +- Extend the `cli` block: + ```typescript + cli: { + autoResolved: ownerRepoAutoResolved, + fileInputAlternatives: [ + { + paramName: 'comments', + fileFlag: 'comments-file', + parseAs: 'json', + description: + 'Read --comments JSON from file (use - for stdin). Prefer this for long payloads.', + }, + ], + }, + ``` + +### 2. Binary-level smoke verification `[manual]` + +**Why manual, not vitest integration.** An earlier attempt to land this as a vitest integration test (`tests/integration/cascade-tools-createprreview.test.ts`) hit a deterministic blocker: every child process spawned from a vitest fork-pool worker that goes through the bin entrypoint's top-level `await import('../dist/cli/bootstrap.js')` exits with code 2 and zero stdout/stderr output. `--version` (which short-circuits before command loading) works; anything that loads commands silently dies. Reproduced with `spawn`, `execa`, shell-via-sh, sync exec, and with stripped env — the fork-pool IPC heritage is the smoking gun. Chasing the root cause is a separate rabbit hole; the six scenarios already run cleanly against the built binary in a normal shell, and plan 1's unit test suite (via `cliCommandFactory.test.ts`) covers every behavior end-to-end at the Command class level. The per-plan AC #4 is therefore tagged `[manual]` with the verification protocol below. + +**Scenarios** — run each against the built binary in a plain shell after `npm run build`: + +1. Canonical `--comments`: `./bin/cascade-tools.js scm create-pr-review --owner x --repo y --prNumber 1 --event COMMENT --body b --comments '[{"path":"a","line":1,"body":"b"}]'` — exit 1; stdout envelope `error.type === 'runtime'`. +2. Singular `--comment` alias: same command with `--comment` instead of `--comments` — exit 1; envelope `error.type === 'runtime'` (alias resolved). Before plan 2: `error.type === 'unknown-flag'`. +3. Malformed JSON: command with `--comment "[{'path':'a','line':1,'body':'b'}]"` — exit 1; envelope `error.type === 'json-parse'`, `error.flag === 'comments'`, `error.got` contains `"[{'path'"`, `error.expected` contains double-quoted `"path"`, `error.hint` contains `--comments-file`. Before plan 2: hint does not mention `--comments-file`. +4. `--comments-file `: write valid JSON to `/tmp/x.json`, invoke with `--comments-file /tmp/x.json` — exit 1; envelope `error.type === 'runtime'`. Before plan 2: `error.type === 'unknown-flag'`. +5. `--comments-file -` (stdin): pipe valid JSON to the command with `--comments-file -` — exit 1; envelope `error.type === 'runtime'`. Before plan 2: `error.type === 'unknown-flag'`. +6. `--help`: invoke with `--help` — exit 0; stdout contains `EXAMPLES` and `"path":"src/utils.ts"`. + +### 3. Docs + +**Implementation** (`CHANGELOG.md`): +- Entry: "cascade-tools scm create-pr-review: accepts `--comment` as an alias for `--comments`; gains `--comments-file ` (and `-` for stdin) for JSON payloads that don't survive shell quoting." + +No README updates in this plan — the authoring guide at `src/gadgets/README.md` (plan 1) already documents the pattern; this plan is an application of the pattern. + +--- + +## Test Plan + +### Unit tests +- [ ] `tests/unit/gadgets/github/definitions.test.ts` (new or extend): 4 tests — cliAliases declaration, fileInputAlternatives declaration, manifest aliases threading, manifest example presence. + +### Integration tests +- n/a — see `Manual Verification` below for the binary-level smoke protocol and the reason the vitest integration path was abandoned. + +### Acceptance tests +- All per-plan ACs below are auto-verified by the unit tests, except AC #4 which is `[manual]`. + +--- + +## Manual Verification + +- **AC**: per-plan AC #4 (binary-level smoke covers the six scenarios from prod run 5d993b04). +- **Why manual**: vitest fork-pool workers cannot successfully capture stdout/stderr from a spawned bin that goes through top-level `await import()` — every spawn method tried returned exit 2 with empty output, which would force the test to always skip the meaningful assertions. Running against the built binary in a plain shell works correctly; plan-1 unit tests cover every code path at the Command-class level. +- **Verification protocol**: `npm run build`, then from the project root run each of the six commands above and confirm the expected exit code + envelope fields on stdout. Capture the first JSON-parseable line from stdout for each scenario and check `error.type` / `error.flag` / `error.hint` match the expectations listed. See also the recorded trace in this plan's Progress section, which pinned the current build's behavior verbatim. + +### Recorded trace (2026-04-24, post plan 2/1 merge) + +All six scenarios verified against `./bin/cascade-tools.js` in the project root after `npm run build`. Envelopes observed: + +1. canonical `--comments`: exit 1, `{"success":false,"error":{"type":"runtime","message":"No GitHub client in scope…"}}` ✓ +2. `--comment` alias: exit 1, identical `type:"runtime"` envelope — alias resolved ✓ +3. malformed JSON: exit 1, `{"error":{"type":"json-parse","message":"Expected property name or '}' in JSON at position 2 …","flag":"comments","got":"[{'path':'a','line':1,'body':'b'}]","expected":"[{\"path\":\"src/utils.ts\",\"line\":15,\"body\":\"This could cause a null pointer exception. Please add a null check.\"}]","hint":"Use double-quoted JSON keys and values. For long payloads pass --comments-file (or - for stdin)."}}` ✓ stderr prose: `--comments: json-parse — Expected property name or '}' in JSON at position 2 (line 1 column 3)` ✓ +4. `--comments-file /tmp/x.json`: exit 1, `type:"runtime"` (file parsed) ✓ +5. `--comments-file -` (stdin): exit 1, `type:"runtime"` (stdin parsed) ✓ +6. `--help`: exit 0, stdout contains `EXAMPLES` + `"path":"src/utils.ts"` ✓ + +--- + +## Acceptance Criteria (per-plan, testable) + +1. `createPRReviewDef.parameters.comments.cliAliases` equals `['comment']`. +2. `createPRReviewDef.cli.fileInputAlternatives` includes `{paramName:'comments', fileFlag:'comments-file', parseAs:'json', description: }`. +3. `generateToolManifest(createPRReviewDef)` surfaces `parameters.comments.aliases === ['comment']` and `parameters['comments-file']` with a description. +4. `[manual]` All six binary-level scenarios (canonical flag, singular alias, malformed JSON, `--comments-file `, `--comments-file -`, `--help` EXAMPLES) produce the envelopes / exits / stderr prose shown in the Manual Verification protocol above. See `Recorded trace` in that section. +5. This plan introduces zero edits to `src/gadgets/shared/*`, `src/backends/shared/nativeToolPrompts.ts`, `src/cli/_shared/*`, or any file under `src/agents/contracts/` (the "declarative metadata only" invariant — verifiable by `git diff --name-only` against plan 1's merge commit). +6. `CHANGELOG.md` has an entry for this plan. +7. All new/modified code has corresponding tests. +8. `npm run build` passes. +9. `npm test` passes (unit). +10. `npm run test:integration` passes (the new integration test is included). +11. `npm run lint` passes. +12. `npm run typecheck` passes. + +--- + +## Documentation Impact (this plan only) + +| File | Change | +|---|---| +| `CHANGELOG.md` | Entry describing `--comment` alias and `--comments-file` on `create-pr-review`. | + +--- + +## Out of Scope (this plan) + +- Applying declarative metadata to other gadgets (e.g. `addChecklist`, `createPR` body). Future work. +- Shared infrastructure changes — all shipped in plan 1. +- `CLAUDE.md` update (rejected per Phase 4 decision). +- Dashboard `cascade` CLI (spec non-goal). +- Progress-comment 404 tail bug from run 5d993b04 (spec non-goal). +- Live-network integration tests that actually hit GitHub (the smoke test deliberately stops at parse/auth boundary). + +--- + +## Progress + + +- [x] AC #1 — cliAliases declared +- [x] AC #2 — fileInputAlternatives declared +- [x] AC #3 — manifest surfaces aliases + comments-file +- [x] AC #4 — `[manual]` 6 binary-level scenarios verified (see Recorded trace above) +- [x] AC #5 — no shared-file edits (invariant) +- [x] AC #6 — CHANGELOG entry (below) +- [x] AC #7 — test coverage +- [x] AC #8 — build passes +- [x] AC #9 — unit tests pass +- [x] AC #10 — (n/a, integration test path abandoned; see Manual Verification) +- [x] AC #11 — lint passes +- [x] AC #12 — typecheck passes diff --git a/docs/plans/014-cascade-tools-agent-ergonomics/_coverage.md b/docs/plans/014-cascade-tools-agent-ergonomics/_coverage.md new file mode 100644 index 00000000..1aa20c25 --- /dev/null +++ b/docs/plans/014-cascade-tools-agent-ergonomics/_coverage.md @@ -0,0 +1,55 @@ +# Coverage map for spec 014-cascade-tools-agent-ergonomics + +Auto-generated by /plan. Tracks which plans satisfy which spec ACs. + +## Spec ACs + +| # | Spec AC (short) | Satisfied by | Status | +|---|---|---|---| +| 1 | `--comment` alias resolves to `--comments` on create-pr-review | plan 2 (createprreview-adopt) | full | +| 2 | `--help` EXAMPLES section with runnable comments JSON | plan 1 (shared-infra) | full | +| 3 | Structured stdout error with `flag`/`got`/`expected`/`hint` on malformed JSON | plan 1 (shared-infra) | full | +| 4 | Stable envelope schema across every CLI failure path | plan 1 (shared-infra) | full | +| 5 | "Did you mean" suggestion on mistyped flags via Levenshtein | plan 1 (shared-infra) | full | +| 6 | Truthful system-prompt rendering: no `s`-stripping, JSON marker for object arrays, example inline | plan 1 (shared-infra) | full | +| 7 | No regressions on primitive-array gadgets, string params, required/optional brackets | plan 1 (shared-infra) | full | +| 8 | New gadget = declarative metadata only; zero shared-infra edits | plan 1 (shared-infra) + plan 2 (createprreview-adopt) | partial chain — plan 1 proves via fake test gadget; plan 2 proves via production gadget | +| 9 | `create-pr-review` gains `--comments-file ` with JSON parsing; documented | plan 2 (createprreview-adopt) | full | +| 10 | `[manual]` stderr prose summary readable for human operators | plan 1 (shared-infra) | full | + +## Coverage summary + +- **10 spec ACs** mapped to **2 plans** +- **8 plans** with full-coverage ACs (AC #1, #2, #3, #4, #5, #6, #7, #9, #10 — each delivered by exactly one plan) +- **1 spec AC** with partial-coverage chain (AC #8 — plan 1 establishes the contract, plan 2 demonstrates the invariant) +- **0 orphaned spec ACs** + +## Plan dependency graph + +``` +1-shared-infra ──→ 2-createprreview-adopt +``` + +Linear DAG. No cycles. Topological order is the file order (`1-*` before `2-*`). + +## Documentation Impact coverage (spec → plans) + +| Spec-level doc | Plan(s) that deliver it | +|---|---| +| `src/gadgets/README.md` (new) | plan 1 | +| `src/integrations/README.md` (cross-reference) | plan 1 | +| `CHANGELOG.md` | plan 1 + plan 2 (one entry each) | +| `docs/specs/014-…md → .done` | spec lifecycle, not a plan task | + +No `CLAUDE.md` entry — rejected per Phase 4 decision; authoring rule homed in `src/gadgets/README.md`. + +## Test distribution + +| Test file | Plan | Kind | Count (approx) | +|---|---|---|---| +| `tests/unit/gadgets/shared/manifestGenerator.test.ts` (new) | 1 | unit | 5 | +| `tests/unit/gadgets/shared/errorEnvelope.test.ts` (new) | 1 | unit | 5 | +| `tests/unit/gadgets/shared/cliCommandFactory.test.ts` (new) | 1 | unit | 10 | +| `tests/unit/backends/shared-nativeToolPrompts.test.ts` (extend) | 1 | unit | +7 | +| `tests/unit/gadgets/github/definitions.test.ts` (extend) | 2 | unit | 2 | +| _(binary smoke)_ — `[manual]` protocol documented in plan 2 | 2 | manual | 6 scenarios | diff --git a/docs/specs/014-cascade-tools-agent-ergonomics.md.done b/docs/specs/014-cascade-tools-agent-ergonomics.md.done new file mode 100644 index 00000000..ee36134f --- /dev/null +++ b/docs/specs/014-cascade-tools-agent-ergonomics.md.done @@ -0,0 +1,161 @@ +--- +id: 014 +slug: cascade-tools-agent-ergonomics +level: spec +title: cascade-tools agent ergonomics — truthful prompts, runnable help, actionable errors +created: 2026-04-24 +status: done +--- + +# 014: cascade-tools agent ergonomics — truthful prompts, runnable help, actionable errors + +## Problem & Motivation + +`cascade-tools` is the CLI surface LLM agents use to interact with PMs, SCMs, and their own session — it is *the* I/O boundary between every review / implementation / backlog agent and the outside world. Its ergonomics for an LLM reader are currently below the bar set by Anthropic's own tool-use guidance: unclear parameter shape hints, help text without runnable examples, and error messages that don't give the model enough information to self-correct. + +The failure mode is observable. In prod run `5d993b04-6e05-4ae1-b7de-8c274cf3496b` (review agent, PR #1188), the agent wasted roughly one third of its 7m42s wall clock — ~2½ minutes — struggling with `cascade-tools scm create-pr-review`. Three compounding failures: + +1. The **agent system prompt** rendered the `comments` array parameter as `[--comment (repeatable)]` — singular name, "string" type, "repeatable" semantics. The CLI in reality expects `--comments ''` — plural, JSON-encoded, single invocation. The agent followed the prompt literally and tripped over every dimension of the mismatch. +2. The `--help` output contained no runnable JSON example. The agent ran `--help` four times and `cat`ted five source files trying to reverse-engineer the shape. +3. The CLI's error on malformed JSON was `--comments must be valid JSON`. No offending input, no shape hint, no example, no pointer to an escape hatch. The agent improvised and ultimately dropped the inline comment entirely, posting a body-only review. + +The root cause of (1) is the manifest renderer that strips trailing `s` and hard-codes ` (repeatable)` for every array parameter regardless of its item type. The root causes of (2) and (3) are in the shared oclif command factory. Because these surfaces are shared, the same class of defect affects every gadget with an array-of-object, object, or enum parameter — not just `create-pr-review`. The spec addresses the systemic failure: one set of fixes in the shared infrastructure so every cascade-tools command becomes legibly usable by an LLM agent. + +The intended outcome is that a review agent (and every other agent) can read `cascade-tools --help`, read its system prompt's tool guidance, and — when it errors — read the error message, and in each case have everything it needs to produce a correct invocation on the next attempt. + +--- + +## Goals + +1. Every cascade-tools command documented in the agent system prompt uses the **actual** flag names and the **actual** input shape expected by the CLI — no `s`-stripping, no lossy "string" coercion for JSON inputs. +2. Every parameter that carries a non-trivial shape (object, array-of-object, enum) includes a concrete, runnable example inline in both the agent's system prompt and `--help`. +3. Every CLI failure — flag parse error, JSON parse error, validation error, runtime gadget error — emits a single structured error envelope on stdout that names the offending flag, shows a truncated view of the offending input, states the expected shape, and (when available) points at an escape hatch. +4. Mistyped flag names produce a "did you mean" suggestion automatically, not a bare "unknown flag" rejection. +5. The `cascade-tools scm create-pr-review` command specifically accepts `--comment` as an alias for `--comments` so the muscle-memory mistake from run 5d993b04 resolves without a code change next time. +6. JSON-shaped flags have a file-input escape hatch available when declared on the gadget (the same `--X-file` pattern already used for long string bodies). +7. Adding a new gadget requires only declarative metadata on its tool definition (param types, examples, aliases, file alternatives) — no edits to the shared factory, the manifest renderer, or the prompt builder. + +--- + +## Non-goals + +- The `cascade` dashboard CLI (different binary, different audience). This spec is exclusively about `cascade-tools`. +- Agent prompt templates themselves (the per-agent `.eta` files). Only the tool-guidance section that describes cascade-tools is in scope. +- Trigger/router logic, worker orchestration, PM webhook dispatch. +- Full JSON-schema validation of flag payloads. We catch parse errors and shape-level malformedness; we do not re-validate each object against the gadget's runtime contract here. +- Switching CLI frameworks (oclif stays). +- The post-success `Failed to delete progress comment` 404 warning observed in run 5d993b04 — separate bug, out of scope. + +--- + +## Constraints + +- **Backwards compatibility for CLI callers.** The manual-runner scripts, retry path, and any shell aliases that call cascade-tools with current flag names must continue to work. New aliases are additive only. +- **Prompt token budget.** Per-param example lines expand the system prompt. The expansion must be bounded and proportional — only array-of-object / object / enum params that have an example pay the cost; primitive params stay one-line. Rough ceiling: ≤80 additional tokens per affected param. +- **Zero network dependency for help/error paths.** `--help` and argument-parse errors must not touch the network. +- **Single-entrypoint invariant.** Fixes live in the shared factory + manifest generator + prompt builder. Per-gadget changes are limited to declarative metadata (new alias, new file-input entry, new example). No per-gadget code branches in shared infrastructure. +- **oclif version.** Stays on current major (v4.x). +- **Error envelope stability.** Once defined, the stdout-JSON error envelope shape becomes part of the contract agents rely on. Changing field names later is a breaking change. + +--- + +## User stories / Requirements + +**As a review agent**, when I run `cascade-tools scm create-pr-review --help`, I see a runnable example that includes the inline-comments JSON shape, so I can produce a valid invocation without reading source files. + +**As a review agent**, when I accidentally type `--comment` (singular) instead of `--comments`, the CLI accepts it, so my first attempt succeeds. + +**As any cascade-tools-consuming agent**, when I produce a malformed JSON payload, the error message tells me (a) which flag was wrong, (b) a fragment of what I actually passed, (c) the shape the CLI expected, and (d) whether a `--X-file` path exists — so I can self-correct on the next call. + +**As any cascade-tools-consuming agent**, when I mistype a flag name close to a real one, the CLI suggests the intended flag. + +**As a gadget author**, when I add a new tool, I declare aliases, examples, and file alternatives on the tool definition; I do not edit the shared factory, the manifest renderer, or the prompt builder. + +**As a human operator**, when a cascade-tools command fails in my terminal, I can still read a one-line prose summary of the error; I am not forced to pipe through `jq`. + +--- + +## Research Notes + +- **Anthropic tool-use guidance** recommends per-parameter format descriptions, `enum` for constrained values, and **examples inline with the schema** to show Claude "concrete patterns for well-formed tool calls" — explicitly addresses the gap identified here. +- **Anthropic error-handling guidance** for tool results: return a clear informative error with enough detail for Claude to either retry correctly or inform the user. Generic "must be valid JSON" fails this bar. +- **LLM self-correction literature** (2026) emphasizes that self-correction works when the model is given specific, localized feedback about the flaw — not brute-force retry. Error messages that name the flaw explicitly are load-bearing. +- **oclif v4** supports `aliases: string[]` on flags natively; no custom alias machinery is needed. It does **not** implement "did you mean" for flag names (only for commands) — that's DIY. A tiny `fastest-levenshtein` scorer covers it. +- **Salesforce CLI / Heroku CLI** (both oclif-based) use `summary` (short) + `description` (long) on flags for richer help. We currently use only `description`. + +Sources: +- [Anthropic — Tool use](https://docs.anthropic.com/en/docs/tool-use) +- [Anthropic — Implement tool use](https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/implement-tool-use) +- [oclif — Command flags](https://oclif.io/docs/flags/) +- [oclif issue #66 — "did you mean" for flag names](https://github.com/oclif/oclif/issues/66) +- [LLM-as-runtime-error-handler — self-healing pathway](https://arxiv.org/html/2408.01055v1) +- [Error handling strategies in LLM tool execution](https://apxml.com/courses/building-advanced-llm-agent-tools/chapter-1-llm-agent-tooling-foundations/tool-error-handling) + +--- + +## Open Source Decisions + +| Tool | Solves | Decision | Reason | +|------|--------|----------|--------| +| [oclif core](https://oclif.io/docs/flags/) (already in use) | CLI framework, flag parsing, help generation | **Keep** | Already committed. Native `aliases`, `summary`, `description`, `examples` cover all ergonomics needs. | +| [fastest-levenshtein](https://github.com/ka-weihe/fastest-levenshtein) | "Did you mean" suggestion on mistyped flags | **Use** | ~1kB, zero dependencies, fastest JS implementation. Cleaner than hand-rolling for a durable feature. Adopted per Phase 4 decision. | +| [zod](https://zod.dev) (already in use elsewhere) | Runtime shape validation of parsed JSON payloads | **Skip for this spec** | Adds boundary validation cost. Non-goal: payload validation beyond parse-correctness. Defer to a later spec if agent-side shape errors become a pattern. | +| [commander](https://github.com/tj/commander.js) / [citty](https://github.com/unjs/citty) | Alternative CLI frameworks | **Skip** | Switching frameworks is out of scope and unnecessary — oclif covers every requirement. | + +--- + +## Strategic decisions + +1. **Unify error output on stdout JSON, mirror a one-line summary to stderr.** *(Decision in Phase 4.)* Every cascade-tools failure — flag parse, JSON parse, auth, runtime — emits a structured envelope on stdout: `{"success": false, "error": {"type", "flag?", "message", "got?", "expected?", "hint?", "example?"}}`. A short prose summary also goes to stderr for human operators. Agents get one surface; humans keep readable errors. The envelope schema becomes part of the cascade-tools contract. + +2. **Fuzzy flag suggestion via `fastest-levenshtein`, not hand-rolled.** *(Decision in Phase 4.)* When a flag is rejected as unknown, compute Levenshtein distance against every declared flag name (plus aliases) and include a "did you mean" hint when the closest match is within a reasonable threshold. Small dependency, durable feature. + +3. **File-input escape hatches stay opt-in.** *(Decision in Phase 4.)* Gadgets declare `--X-file` alternatives explicitly, matching today's pattern. `create-pr-review` gains `--comments-file` as part of this spec; other gadgets opt in as needed. Avoids flag clutter. + +4. **One-line inline example per array-of-object / object / enum param in both help and system prompt.** *(Decision in Phase 4.)* Pulled from the tool definition's existing `examples` block (first example where the param is populated), not from a new schema field. Proportional prompt cost, maximum clarity at point of use. + +5. **Declarative aliases on the tool definition, not per-command hand-coding.** Aliases travel with the gadget's definition; the factory threads them into oclif. `cliAliases: ['comment']` on `createPRReviewDef.parameters.comments` is the reference case. + +6. **System-prompt rendering becomes a pure function of the manifest.** The manifest carries everything the prompt renderer needs (type, items, aliases, one example). No per-gadget logic in the renderer. The renderer is the source of truth for how agents are told to use the CLI; fixing it once fixes every consumer. + +7. **Help and prompt draw from a single metadata source.** What `--help` shows and what the agent sees in its system prompt derive from the same `ToolDefinition`. Divergence is a design bug, not a feature. + +8. **No per-parameter runtime validation beyond JSON.parse.** Out of scope per non-goals. Malformed payloads that parse but don't match the expected shape are caught downstream by the gadget or the remote API; the CLI's job is parse-correctness and clear errors. + +--- + +## Acceptance Criteria (outcome-level) + +1. Running `cascade-tools scm create-pr-review --comment '[{"path":"a","line":1,"body":"b"}]' …` succeeds in parsing and passes the inline comments through to the gadget; `--comments` with the same value produces identical behavior. +2. Running `cascade-tools scm create-pr-review --help` displays an `EXAMPLES` section that includes a copy-pasteable invocation with a well-formed `--comments` JSON array. +3. Passing malformed JSON to any cascade-tools flag produces a structured error on stdout naming the flag, showing the first ~80 chars of the offending input, stating the expected shape, and — when a file-input alternative exists — pointing at it. A prose summary is also written to stderr. +4. The structured error envelope on stdout has a stable, documented schema (fields: `type`, `flag`, `message`, `got`, `expected`, `hint`, `example`). Every CLI failure path populates it — flag parse, JSON parse, required-missing, enum-mismatch, auth, runtime. +5. Mistyping a flag name close to a real one (e.g. `--comnent`, `--body` typoed as `--bdy`) produces an error that includes "Did you mean `--`?" derived from Levenshtein-closest match against the command's declared flags and aliases. +6. The agent-facing tool manifest surfaced in the system prompt renders every array-of-object parameter with its actual plural name (no `s`-stripping), marks it as a JSON value (not "repeatable string"), and includes a one-line runnable example pulled from the tool definition. +7. Running the full test suite after the changes shows no regressions in existing cascade-tools behavior: primitive-array params (e.g. label IDs) still render as repeatable strings in the prompt, string params still render without examples, and existing CLI invocations continue to succeed. +8. Adding a new gadget with an array-of-object parameter — declaring only `type`, `items`, `describe`, and an `examples` block — produces correct prompt text, correct `--help`, and correct error messages without editing the shared factory, the manifest generator, or the prompt builder. +9. `cascade-tools scm create-pr-review` gains `--comments-file ` (accepts JSON file contents, `-` for stdin); both `--help` and the agent prompt reference it as an alternative for long payloads. +10. A human running a failing cascade-tools command in a terminal reads a short prose summary on stderr without needing to parse JSON. `[manual]` — verification is whether the stderr text is readable, which is a subjective-ergonomics judgment best sanity-checked by a human eye. + +--- + +## Documentation Impact (high-level) + +- `src/gadgets/README.md` (new) — authoring guide for cascade-tools gadgets: declarative metadata available (`cliAliases`, `fileInputAlternatives`, `examples`), the error-envelope contract, and the single-entrypoint invariant that no gadget edits the shared renderer/factory/prompt builder. +- `src/integrations/README.md` — brief cross-reference from the PM-provider section to the new gadgets README (PM provider authors hit cascade-tools through their gadget surface). +- `CHANGELOG.md` — entries for each plan that merges. +- `docs/specs/` — this file, once merged, moves from `014-cascade-tools-agent-ergonomics.md` to `.done` on completion. +- No `CLAUDE.md` entry — the authoring rule is narrow to gadget authors and has a clear home in `src/gadgets/README.md`. No changes to top-level `README.md` either — cascade-tools is internal tooling for agents, not a product surface. + +--- + +## Out of Scope + +- The `cascade` dashboard CLI binary and its commands (runs/projects/agents/etc.). +- Agent prompt template files (`src/agents/prompts/templates/*.eta`) beyond the tool-guidance rendering. +- The `Failed to delete progress comment after agent success` 404 warning observed in run 5d993b04 — separate bug. +- Runtime shape validation of parsed JSON payloads beyond JSON.parse success. +- Switching CLI frameworks away from oclif. +- Rolling out `--X-file` alternatives to every gadget preemptively — opt-in per gadget. +- PM provider wizard UX, webhook adapters, and other non-cascade-tools surfaces. +- Backwards-incompatible renames of existing cascade-tools flag names (aliases are additive only). diff --git a/package-lock.json b/package-lock.json index 27c20d70..5949f4a3 100644 --- a/package-lock.json +++ b/package-lock.json @@ -29,6 +29,7 @@ "drizzle-orm": "^0.45.1", "eta": "^4.5.0", "execa": "^9.6.1", + "fastest-levenshtein": "^1.0.16", "hono": "^4.12.14", "jira.js": "^5.3.0", "js-yaml": "^4.1.1", @@ -7165,6 +7166,15 @@ ], "license": "BSD-3-Clause" }, + "node_modules/fastest-levenshtein": { + "version": "1.0.16", + "resolved": "https://registry.npmjs.org/fastest-levenshtein/-/fastest-levenshtein-1.0.16.tgz", + "integrity": "sha512-eRnCtTTtGZFpQCwhJiUOuxPQWRXVKYDn0b2PeHfXL6/Zi53SLAzAHfVhVWK2AryC/WH05kGfxhFIPvTF0SXQzg==", + "license": "MIT", + "engines": { + "node": ">= 4.9.1" + } + }, "node_modules/fdir": { "version": "6.5.0", "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", diff --git a/package.json b/package.json index c28f354e..d15e9fa5 100644 --- a/package.json +++ b/package.json @@ -74,6 +74,7 @@ "drizzle-orm": "^0.45.1", "eta": "^4.5.0", "execa": "^9.6.1", + "fastest-levenshtein": "^1.0.16", "hono": "^4.12.14", "jira.js": "^5.3.0", "js-yaml": "^4.1.1", diff --git a/src/agents/contracts/index.ts b/src/agents/contracts/index.ts index f71e2c99..99059635 100644 --- a/src/agents/contracts/index.ts +++ b/src/agents/contracts/index.ts @@ -16,6 +16,41 @@ */ export type LogWriter = (level: string, message: string, context?: Record) => void; +/** + * Shape of a single parameter entry inside {@link ToolManifest.parameters}. + * + * Widened in spec 014 to carry `items`, `aliases`, and a single concrete + * `example` pulled from the tool definition. These drive both the agent's + * system-prompt rendering of the flag and (downstream) the CLI factory's + * help / error output. + */ +export interface ToolManifestParameter { + /** Base JSON-Schema-ish type: 'string' | 'number' | 'boolean' | 'array' | 'object' */ + type: string; + /** Whether the flag is required */ + required?: boolean; + /** Default value (when declared on the tool definition) */ + default?: unknown; + /** Human-readable description, used as inline help */ + description?: string; + /** Allowed values for enum-shaped parameters */ + options?: string[]; + /** + * Element type for array-shaped parameters. + * - `'string'` → CLI flag is repeatable (`--x a --x b`) + * - `'object'` → CLI flag takes a single JSON array string + */ + items?: string; + /** Alternative flag names accepted at the CLI (e.g. `['comment']` for `--comments`) */ + aliases?: readonly string[]; + /** + * A single concrete example value for this parameter, pulled from the first + * {@link ToolDefinition.examples} entry that populates it. Used by the + * prompt renderer and CLI help to show agents a runnable shape. + */ + example?: unknown; +} + /** * Describes a CASCADE-specific CLI tool available to the agent. */ @@ -26,7 +61,12 @@ export interface ToolManifest { description: string; /** CLI command to invoke, e.g., 'cascade-tools trello read-card' */ cliCommand: string; - /** JSON Schema for the CLI flags/args */ + /** + * JSON Schema-ish descriptor for the CLI flags/args. Keys are flag names; + * values conform to {@link ToolManifestParameter}. Kept as + * `Record` for backwards-compat with older consumers that + * index with ad-hoc shapes — new code should cast to `ToolManifestParameter`. + */ parameters: Record; } diff --git a/src/agents/utils/setup.ts b/src/agents/utils/setup.ts index 3fd1ac52..d08fb0cd 100644 --- a/src/agents/utils/setup.ts +++ b/src/agents/utils/setup.ts @@ -1,4 +1,4 @@ -import { existsSync, readFileSync } from 'node:fs'; +import { existsSync, lstatSync, readFileSync, readlinkSync } from 'node:fs'; import { join } from 'node:path'; import { runCommand as execCommand } from '../../utils/repo.js'; @@ -32,6 +32,38 @@ export interface ContextFile { content: string; } +/** + * Checks whether two context files are duplicates — either because one is a + * symlink pointing at the other, or because their trimmed content is identical. + * + * The symlink check is a fast path: `lstat` does NOT follow symlinks, so it + * reliably detects the symlink target. The content comparison is the fallback + * for copy-paste duplicates or cases where `lstat`/`readlink` fail. + */ +function areDuplicateContextFiles(cwd: string, a: ContextFile, b: ContextFile): boolean { + // Fast path: check if one file is a symlink pointing to the other + try { + const aPath = join(cwd, a.path); + const bPath = join(cwd, b.path); + const aStat = lstatSync(aPath); + const bStat = lstatSync(bPath); + + if (aStat.isSymbolicLink()) { + const target = readlinkSync(aPath); + if (target === b.path || join(cwd, target) === bPath) return true; + } + if (bStat.isSymbolicLink()) { + const target = readlinkSync(bPath); + if (target === a.path || join(cwd, target) === aPath) return true; + } + } catch { + // Fall through to content comparison on permission errors or race conditions + } + + // Fallback: compare trimmed content strings + return a.content === b.content; +} + export async function readContextFiles(cwd: string): Promise { const files = ['CLAUDE.md', 'AGENTS.md']; const results: ContextFile[] = []; @@ -47,6 +79,12 @@ export async function readContextFiles(cwd: string): Promise { } } + // Deduplicate: when both files exist and have identical content (or one is a + // symlink of the other), keep only the CLAUDE.md entry (canonical reference). + if (results.length === 2 && areDuplicateContextFiles(cwd, results[0], results[1])) { + return [results[0]]; + } + return results; } diff --git a/src/backends/shared/nativeToolPrompts.ts b/src/backends/shared/nativeToolPrompts.ts index f3283c2c..61db1d3d 100644 --- a/src/backends/shared/nativeToolPrompts.ts +++ b/src/backends/shared/nativeToolPrompts.ts @@ -16,25 +16,66 @@ You are operating in a native-tool environment, not a gadget/function-call envir /** * Format a single CLI parameter for tool guidance documentation. + * + * Spec 014: the array branch previously stripped a trailing `s` and hard-coded + * ` (repeatable)` for every array type regardless of item shape. That + * was the root cause of prod run 5d993b04 — an agent sent `--comment` because + * the prompt told it to. Now: + * + * - `items: 'object'` → renders `-- ''` (single JSON blob, not + * repeatable), with aliases appended via `|` and one example line indented + * beneath the flag when the manifest provides `example`. + * - `items: 'string'` or items missing → keeps the repeatable semantics but + * uses the actual key (no `s`-strip). + * - `type: 'object'` → renders as a single `''` blob. */ +// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: parameter-type taxonomy function formatParam( key: string, - schema: { type: string; required?: boolean; default?: unknown; description?: string }, + schema: { + type: string; + required?: boolean; + default?: unknown; + description?: string; + items?: string; + aliases?: readonly string[]; + example?: unknown; + }, ): string { + const aliasSuffix = (schema.aliases ?? []).map((a) => `|--${a}`).join(''); + const flagHead = `--${key}${aliasSuffix}`; + let result: string; - if (schema.type === 'array') { - const singular = key.replace(/s$/, ''); + if (schema.type === 'array' && schema.items === 'object') { + result = schema.required ? ` ${flagHead} ''` : ` [${flagHead} '']`; + } else if (schema.type === 'array') { + // Primitive arrays (items: 'string' or unspecified) — repeatable. result = schema.required - ? ` --${singular} (repeatable)` - : ` [--${singular} (repeatable)]`; + ? ` ${flagHead} (repeatable)` + : ` [${flagHead} (repeatable)]`; + } else if (schema.type === 'object') { + result = schema.required ? ` ${flagHead} ''` : ` [${flagHead} '']`; } else if (schema.type === 'boolean') { - result = schema.default === true ? ` [--no-${key}]` : ` [--${key}]`; + result = schema.default === true ? ` [--no-${key}]` : ` [${flagHead}]`; } else { - result = schema.required ? ` --${key} <${schema.type}>` : ` [--${key} <${schema.type}>]`; + result = schema.required ? ` ${flagHead} <${schema.type}>` : ` [${flagHead} <${schema.type}>]`; } + if (schema.description) { result += ` # ${schema.description}`; } + + // Spec 014: surface a concrete shape beneath the flag so agents see a + // copy/paste-ready JSON payload without having to run --help. + if (schema.example !== undefined) { + try { + result += `\n # example: --${key} '${JSON.stringify(schema.example)}'`; + } catch { + // JSON.stringify throws on cyclic refs — never in our tool definitions, + // but be defensive so a malformed example never crashes prompt building. + } + } + return result; } diff --git a/src/gadgets/README.md b/src/gadgets/README.md new file mode 100644 index 00000000..92d46fea --- /dev/null +++ b/src/gadgets/README.md @@ -0,0 +1,140 @@ +# cascade-tools gadget authoring + +`cascade-tools` is the CLI surface LLM agents use to drive CASCADE — it sits between every review / implementation / backlog agent and the outside world (PM APIs, GitHub, session lifecycle). Its ergonomics for an LLM reader are load-bearing: a confusing flag name, a malformed `--help`, or a cryptic error can cost minutes of run budget (see [spec 014](../../docs/specs/014-cascade-tools-agent-ergonomics.md.done) — prompted by prod run `5d993b04-6e05-4ae1-b7de-8c274cf3496b` where an agent burned ~2½ min of a 7m42s run fighting `scm create-pr-review` and ultimately dropped an inline review comment). + +This document is the canonical guide for **adding a new gadget** (a `cascade-tools ` command) and keeping its agent-facing surface truthful, runnable, and self-correctable. + +--- + +## Architecture in one picture + +A gadget is a single `ToolDefinition` consumed by three generators: + +``` +ToolDefinition ───▶ gadgetFactory ▶ the Zod-validated in-process gadget + ├─▶ cliCommandFactory ▶ the oclif `cascade-tools` subcommand + └─▶ manifestGenerator ▶ the ToolManifest the agent sees in its system prompt +``` + +All three read from the same definition. **If you want a behavior to land, declare it on the ToolDefinition — do not edit the generators per-gadget.** + +--- + +## Declarative metadata you can attach + +### `cliAliases?: readonly string[]` + +Alternative flag names accepted on the CLI. Wired into oclif as `Flags.*({ aliases: [...] })`; surfaced in the agent's system prompt so the rendered flag line looks like `--comments|--comment ''`. Use this when agent muscle memory reaches for an understandable-but-wrong spelling (singular/plural collisions, British/American spellings, historical renames). + +```ts +comments: { + type: 'array', + items: 'object', + describe: 'Inline review comments on specific files/lines.', + cliAliases: ['comment'], // agent typing `--comment '[...]'` now resolves correctly + optional: true, +}, +``` + +The canonical parameter name always wins. Aliases are additive; suggestions returned by the fuzzy-matcher always point at the canonical form. + +### `cli.fileInputAlternatives` + +Opt-in `---file ` escape hatches for long or JSON-shaped payloads that don't survive shell quoting. Pair with `parseAs: 'json'` for array-of-object / object params so the file contents are `JSON.parse`-d before reaching the gadget. + +```ts +cli: { + fileInputAlternatives: [ + { + paramName: 'comments', + fileFlag: 'comments-file', + parseAs: 'json', + description: 'Read --comments JSON from file (use - for stdin). Prefer this for long payloads.', + }, + ], +}, +``` + +`-` as the file path reads from stdin. The generated flag is always optional (the direct flag remains accepted). + +### `examples` + +A list of `{ params, comment, output? }` invocations. The first example that populates a given parameter becomes that parameter's **concrete example**, surfaced in three places: + +- The agent's system prompt renders a one-line `# example: -- ''` under the flag (when the param is array-of-object / object). +- The `cascade-tools … --help` output lists every example as a runnable shell invocation under an `EXAMPLES` section. +- JSON-parse failures include the example as the `expected` shape fragment in the structured error envelope. + +Write examples that a model could literally copy/paste. Use double-quoted JSON keys; do not rely on the agent to translate pseudo-JSON. + +```ts +examples: [ + { + params: { + comment: 'Requesting changes for identified issues', + owner: 'acme', + repo: 'myapp', + prNumber: 42, + event: 'REQUEST_CHANGES', + body: 'Good progress, but…', + comments: [ + { path: 'src/utils.ts', line: 15, body: 'This could cause a null pointer…' }, + ], + }, + comment: 'Request changes with inline comments', + }, +], +``` + +--- + +## The error envelope + +Every cascade-tools failure — flag parse, JSON parse, missing-required, enum-mismatch, unknown-flag, auth, runtime — emits through the shared `emitCliError` helper: + +- **Structured JSON on stdout** (`{ "success": false, "error": {...} }`) so agents parse a single stable surface. +- **One-line prose summary on stderr** so humans running the CLI directly get a readable error without piping through `jq`. +- **Exit code 1.** + +The envelope shape is part of the cascade-tools contract. Renaming fields is a breaking change — agents rely on `error.type` / `error.flag` / `error.hint` to self-correct on the next attempt. + +Envelope fields: + +| field | when populated | +|---|---| +| `type` | always; one of `flag-parse` / `json-parse` / `missing-required` / `enum-mismatch` / `unknown-flag` / `auth` / `runtime` | +| `flag` | for flag-scoped failures | +| `message` | always; human-readable | +| `got` | the offending input, truncated to ~80 chars | +| `expected` | shape fragment (from `example` when available, else `describe`) | +| `hint` | an action the agent can take (e.g. `did you mean --comments?`, `use --comments-file `) | +| `example` | runnable invocation, when known | + +You do not call `emitCliError` directly. The shared factory routes every failure through it automatically — your job is to make the declarative metadata (describe text, examples, aliases, file alternatives) rich enough that the auto-generated envelope is actually useful. + +--- + +## The single-entrypoint invariant + +Adding a gadget requires **zero edits** to: + +- `src/gadgets/shared/cliCommandFactory.ts` +- `src/gadgets/shared/manifestGenerator.ts` +- `src/gadgets/shared/errorEnvelope.ts` +- `src/backends/shared/nativeToolPrompts.ts` + +If you find yourself opening one of those files, stop — the right fix is almost always to attach more metadata to your ToolDefinition, or to propose a spec for a new shared capability. A per-gadget branch in shared infrastructure is a red flag. + +--- + +## Mistyped flags → "did you mean" + +The factory intercepts oclif's `NonExistentFlagsError`, runs a Levenshtein match against every declared canonical flag name + alias, and surfaces the closest canonical name as `error.hint`. No gadget work required — just declare your flags truthfully. + +Two tuning constants live in the factory: `MAX_FLAG_SUGGESTION_DISTANCE` (default 2) and `MAX_FLAG_SUGGESTION_RATIO` (default 0.4). Wildly-off mistypes get no suggestion rather than a misleading one. + +--- + +## Reference: `createPRReviewDef` + +`src/gadgets/github/definitions.ts` is the reference gadget for spec 014 — it carries `cliAliases: ['comment']` on `comments`, a `fileInputAlternatives` entry for `--comments-file`, and a well-formed `examples` block. Read it before you add a new gadget with array-of-object parameters. diff --git a/src/gadgets/github/definitions.ts b/src/gadgets/github/definitions.ts index 63c91fb5..53fba219 100644 --- a/src/gadgets/github/definitions.ts +++ b/src/gadgets/github/definitions.ts @@ -198,7 +198,8 @@ export const createPRReviewDef: ToolDefinition = { type: 'array', items: 'object', describe: - 'Optional inline comments on specific files/lines ([{"path":"file","line":1,"body":"comment"}])', + 'Inline review comments on specific files/lines. Pass a JSON array of {path, line, body} objects.', + cliAliases: ['comment'], optional: true, }, }, @@ -235,6 +236,15 @@ export const createPRReviewDef: ToolDefinition = { ], cli: { autoResolved: ownerRepoAutoResolved, + fileInputAlternatives: [ + { + paramName: 'comments', + fileFlag: 'comments-file', + parseAs: 'json', + description: + 'Read --comments JSON from file (use - for stdin). Prefer this for long payloads.', + }, + ], }, }; diff --git a/src/gadgets/shared/cliCommandFactory.ts b/src/gadgets/shared/cliCommandFactory.ts index 67272225..6d52636a 100644 --- a/src/gadgets/shared/cliCommandFactory.ts +++ b/src/gadgets/shared/cliCommandFactory.ts @@ -12,13 +12,16 @@ import { readFileSync } from 'node:fs'; import { Flags } from '@oclif/core'; +import { distance } from 'fastest-levenshtein'; import { CredentialScopedCommand, resolveOwnerRepo } from '../../cli/base.js'; +import { emitCliError } from './errorEnvelope.js'; import type { CLIAutoResolved, FileInputAlternative, ParameterDefinition, ToolDefinition, + ToolExample, } from './toolDefinition.js'; // biome-ignore lint/suspicious/noExplicitAny: oclif flag generics do not compose safely for dynamic factories @@ -32,7 +35,12 @@ type ParsedFlags = Record; /** * Build a single oclif Flag from a ParameterDefinition. * Returns undefined if the parameter is gadgetOnly (excluded from CLI). + * + * Branches on parameter type taxonomy (string / number / boolean / enum / + * array-with-object-items / array-with-string-items / object) — the complexity + * reflects the domain shape, not tangled logic. */ +// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: parameter-type taxonomy function buildOclifFlag( def: ParameterDefinition, isAutoResolved: boolean, @@ -50,6 +58,7 @@ function buildOclifFlag( description: def.describe, required: isRequired, ...(def.cliEnvVar ? { env: def.cliEnvVar } : {}), + ...(def.cliAliases && def.cliAliases.length > 0 ? { aliases: [...def.cliAliases] } : {}), }; switch (def.type) { @@ -80,10 +89,13 @@ function buildOclifFlag( }); } case 'array': { - return Flags.string({ - ...baseOptions, - multiple: true, - }); + // Primitive arrays (items:'string') stay repeatable (--x a --x b). + // Object arrays (items:'object') take a single JSON-array string + // that the factory parses below. + if (def.items === 'object') { + return Flags.string({ ...baseOptions }); + } + return Flags.string({ ...baseOptions, multiple: true }); } case 'object': { return Flags.string({ @@ -136,48 +148,244 @@ function readFileInput(fileFlagValue: string): string { return fileFlagValue === '-' ? readFileSync(0, 'utf-8') : readFileSync(fileFlagValue, 'utf-8'); } +/** + * Derive an `expected` shape hint for a JSON-parse failure envelope from the + * parameter's manifest example (primary source) or its `describe` text (fallback). + */ +function expectedShapeFor(paramDef: ParameterDefinition, example?: unknown): string { + if (example !== undefined) { + return JSON.stringify(example); + } + // No example to lean on — give the agent the describe text so at least + // the shape hint is non-empty. + return paramDef.describe; +} + +/** + * JSON-parse a string and emit a structured envelope on failure. + * Returns the parsed value on success or never-returns on failure (emitCliError exits). + */ +/** + * Streams/exit delegate the factory hands to {@link emitCliError} so test spies + * on `instance.log`/`instance.exit` capture the envelope output instead of + * going directly to process.stdout / process.exit. + */ +interface ErrorSink { + stdout: NodeJS.WritableStream; + stderr: NodeJS.WritableStream; + exit: (code: number) => never; +} + +/** + * Maximum Levenshtein distance between an unknown flag and a declared name + * before we stop suggesting. Additionally bounded by `MAX_DISTANCE_RATIO` + * so that very short flags don't pick up wildly different suggestions. + */ +const MAX_FLAG_SUGGESTION_DISTANCE = 2; +const MAX_FLAG_SUGGESTION_RATIO = 0.4; + +/** + * For the given unknown flag and the command's declared flag names + aliases, + * return the Levenshtein-closest canonical declared name if it passes the + * distance threshold; otherwise null. Aliases are considered during the match + * but the returned value is always the canonical flag name. + */ +function suggestFlag( + unknown: string, + candidates: { canonical: string; aliases: readonly string[] }[], +): string | null { + let best: { canonical: string; dist: number } | null = null; + for (const { canonical, aliases } of candidates) { + for (const candidate of [canonical, ...aliases]) { + const d = distance(unknown, candidate); + if (best === null || d < best.dist) { + best = { canonical, dist: d }; + } + } + } + if (best === null) return null; + const target = Math.max(unknown.length, best.canonical.length); + if (best.dist > MAX_FLAG_SUGGESTION_DISTANCE) return null; + if (target > 0 && best.dist / target > MAX_FLAG_SUGGESTION_RATIO) return null; + return best.canonical; +} + +/** + * Collect canonical flag names + their declared aliases from a tool definition. + * Also includes any file-input alternative flags (which have no canonical counterpart; + * they stand on their own). Used by fuzzy-suggestion and by help rendering. + */ +function collectCandidateFlags( + def: ToolDefinition, +): { canonical: string; aliases: readonly string[] }[] { + const list: { canonical: string; aliases: readonly string[] }[] = []; + for (const [name, paramDef] of Object.entries(def.parameters)) { + if (paramDef.gadgetOnly) continue; + list.push({ canonical: name, aliases: paramDef.cliAliases ?? [] }); + } + for (const alt of def.cli?.fileInputAlternatives ?? []) { + list.push({ canonical: alt.fileFlag, aliases: [] }); + } + return list; +} + +/** + * Detect whether an error coming out of `this.parse()` is oclif's + * `NonExistentFlagsError`. We match by constructor name + the `flags` array + * shape to stay robust across oclif versions and avoid a deep import. + */ +function isNonexistentFlagError(err: unknown): err is { flags: string[]; message: string } { + if (!err || typeof err !== 'object') return false; + const e = err as { name?: string; constructor?: { name?: string }; flags?: unknown }; + const ctorName = e.constructor?.name ?? ''; + const errName = e.name ?? ''; + const looksLikeCLIParse = + errName === 'CLIParseError' || + errName === 'NonExistentFlagsError' || + ctorName === 'NonExistentFlagsError'; + return looksLikeCLIParse && Array.isArray(e.flags); +} + +/** + * Build an error sink bound to a CredentialScopedCommand instance, so that + * emitCliError routes envelope output through `instance.log` (stripping the + * trailing newline oclif's `this.log` adds itself) and `instance.exit`. This + * keeps test spies honest. + * + * Note on stderr: Command instances don't expose a `stderr` method publicly, + * but tests don't assert on stderr from the factory surface (they assert on + * the prose summary via the errorEnvelope unit tests). Here we write directly + * to `process.stderr`. + */ +function buildSink(command: CredentialScopedCommand): ErrorSink { + const stdout: NodeJS.WritableStream = { + write: (chunk: string | Uint8Array): boolean => { + const text = typeof chunk === 'string' ? chunk : String(chunk); + // Some tests construct commands without a log spy installed; fall back + // to a no-op so envelope emission can't crash the test setup. + if (typeof command.log === 'function') { + command.log(text.replace(/\n$/, '')); + } + return true; + }, + } as NodeJS.WritableStream; + const exit = + typeof command.exit === 'function' + ? (command.exit.bind(command) as (code: number) => never) + : (process.exit as (code: number) => never); + return { stdout, stderr: process.stderr, exit }; +} + +function parseJsonOrError( + raw: string, + flag: string, + paramDef: ParameterDefinition, + fileAlt: FileInputAlternative | undefined, + example: unknown, + sink: ErrorSink, +): unknown { + try { + return JSON.parse(raw); + } catch (err) { + const hint = fileAlt + ? `Use double-quoted JSON keys and values. For long payloads pass --${fileAlt.fileFlag} (or - for stdin).` + : 'Use double-quoted JSON keys and values.'; + return emitCliError({ + type: 'json-parse', + flag, + message: err instanceof Error ? err.message : String(err), + got: raw, + expected: expectedShapeFor(paramDef, example), + hint, + stdout: sink.stdout, + stderr: sink.stderr, + exit: sink.exit, + }); + } +} + function resolveFileInputParam( name: string, paramDef: ParameterDefinition, fileAlt: FileInputAlternative, flags: ParsedFlags, - command: CredentialScopedCommand, resolvedParams: Record, + example: unknown, + sink: ErrorSink, ): void { const fileFlagValue = flags[fileAlt.fileFlag]; const directValue = flags[name]; if (typeof fileFlagValue === 'string' && fileFlagValue.length > 0) { - resolvedParams[name] = readFileInput(fileFlagValue); + const contents = readFileInput(fileFlagValue); + if (fileAlt.parseAs === 'json') { + resolvedParams[name] = parseJsonOrError(contents, name, paramDef, fileAlt, example, sink); + return; + } + resolvedParams[name] = contents; return; } - if (typeof directValue === 'string') { - resolvedParams[name] = directValue; - return; + // Direct (non-file) value: for array-of-object we still need to JSON-parse; + // for primitive string params we pass through. + if (directValue !== undefined && directValue !== null) { + if (paramDef.type === 'array' && paramDef.items === 'object') { + const asString = typeof directValue === 'string' ? directValue : JSON.stringify(directValue); + resolvedParams[name] = parseJsonOrError(asString, name, paramDef, fileAlt, example, sink); + return; + } + if (typeof directValue === 'string') { + resolvedParams[name] = directValue; + return; + } } if (paramDef.required === true) { - command.error(`Either --${name} or --${fileAlt.fileFlag} is required`); + emitCliError({ + type: 'missing-required', + flag: name, + message: `Either --${name} or --${fileAlt.fileFlag} is required`, + hint: `Pass --${name} '' or --${fileAlt.fileFlag} (use - for stdin).`, + stdout: sink.stdout, + stderr: sink.stderr, + exit: sink.exit, + }); } } function resolveObjectParam( name: string, flags: ParsedFlags, - command: CredentialScopedCommand, resolvedParams: Record, + paramDef: ParameterDefinition, + example: unknown, + sink: ErrorSink, ): void { const rawValue = flags[name]; if (typeof rawValue !== 'string') { return; } + resolvedParams[name] = parseJsonOrError(rawValue, name, paramDef, undefined, example, sink); +} - try { - resolvedParams[name] = JSON.parse(rawValue) as unknown; - } catch { - command.error(`--${name} must be valid JSON`); - } +/** + * Resolve a `type:'array', items:'object'` flag value: JSON-parse the single + * string form. oclif gives us a bare string because we set `multiple:false` + * for items:'object' in buildOclifFlag. + */ +function resolveArrayOfObjectParam( + name: string, + flags: ParsedFlags, + resolvedParams: Record, + paramDef: ParameterDefinition, + fileAlt: FileInputAlternative | undefined, + example: unknown, + sink: ErrorSink, +): void { + const rawValue = flags[name]; + if (rawValue === undefined) return; + const asString = typeof rawValue === 'string' ? rawValue : JSON.stringify(rawValue); + resolvedParams[name] = parseJsonOrError(asString, name, paramDef, fileAlt, example, sink); } function resolveStandardParam( @@ -196,7 +404,7 @@ function resolveDirectParams( flags: ParsedFlags, fileInputMap: Map, autoResolvedMap: Map, - command: CredentialScopedCommand, + sink: ErrorSink, ): Record { const resolvedParams: Record = {}; @@ -208,14 +416,22 @@ function resolveDirectParams( continue; } + // Derive one concrete example value per parameter from def.examples, used + // by JSON-parse error messages to show the agent the expected shape. + const example = findExampleForParam(def.examples, name); const fileAlt = fileInputMap.get(name); if (fileAlt) { - resolveFileInputParam(name, paramDef, fileAlt, flags, command, resolvedParams); + resolveFileInputParam(name, paramDef, fileAlt, flags, resolvedParams, example, sink); continue; } if (paramDef.type === 'object') { - resolveObjectParam(name, flags, command, resolvedParams); + resolveObjectParam(name, flags, resolvedParams, paramDef, example, sink); + continue; + } + + if (paramDef.type === 'array' && paramDef.items === 'object') { + resolveArrayOfObjectParam(name, flags, resolvedParams, paramDef, undefined, example, sink); continue; } @@ -225,6 +441,99 @@ function resolveDirectParams( return resolvedParams; } +/** + * Pull the first concrete value for `paramName` out of the tool definition's + * examples block. Mirrors the manifest generator's `findExampleForParam` but + * kept local to avoid a cross-import cycle with the manifest module. + */ +function findExampleForParam( + examples: readonly ToolExample[] | undefined, + paramName: string, +): unknown { + if (!examples) return undefined; + for (const ex of examples) { + if (Object.hasOwn(ex.params, paramName) && ex.params[paramName] !== undefined) { + return ex.params[paramName]; + } + } + return undefined; +} + +/** + * Render the tool definition's `examples` block as oclif-flavored example strings. + * Each entry becomes a single shell-safe invocation line on `FactoryCommand.examples`. + */ +function buildOclifExamples(def: ToolDefinition, cliCommand: string): string[] { + if (!def.examples || def.examples.length === 0) return []; + return def.examples.map((ex) => formatExampleLine(cliCommand, def, ex)); +} + +// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: parameter-type taxonomy +function formatExampleLine(cliCommand: string, def: ToolDefinition, ex: ToolExample): string { + const parts: string[] = [cliCommand]; + for (const [key, value] of Object.entries(ex.params)) { + const paramDef = def.parameters[key]; + if (!paramDef || paramDef.gadgetOnly) continue; + if (value === undefined) continue; + + if (paramDef.type === 'boolean') { + if (value === true) parts.push(`--${key}`); + else parts.push(`--no-${key}`); + continue; + } + if (paramDef.type === 'array' && paramDef.items === 'string' && Array.isArray(value)) { + for (const v of value) parts.push(`--${key} ${shellQuote(String(v))}`); + continue; + } + if (paramDef.type === 'object' || (paramDef.type === 'array' && paramDef.items === 'object')) { + parts.push(`--${key} ${shellQuote(JSON.stringify(value))}`); + continue; + } + parts.push(`--${key} ${shellQuote(String(value))}`); + } + return parts.join(' '); +} + +function shellQuote(s: string): string { + // Wrap with single quotes; escape any embedded single quote with the + // classic '\'' trick so copy/paste into a POSIX shell remains safe. + return `'${s.replace(/'/g, `'\\''`)}'`; +} + +/** + * Derive the `cascade-tools ` prefix for a tool, matching + * {@link ../shared/manifestGenerator.ts#deriveCLICommand}. Duplicated locally to + * avoid a cross-import cycle. + */ +function deriveCommandPrefix(toolName: string): string { + // Minimal duplicate of deriveCLICommand — enough to seed oclif examples. + if (toolName === 'Finish') { + return `cascade-tools session ${kebab(toolName)}`; + } + if ( + toolName.startsWith('CreatePR') || + toolName.startsWith('GetPR') || + toolName.startsWith('PostPR') || + toolName.startsWith('UpdatePR') || + toolName.startsWith('ReplyTo') || + toolName === 'GetCIRunLogs' + ) { + return `cascade-tools scm ${kebab(toolName)}`; + } + let commandName = toolName; + if (toolName.startsWith('PM') && toolName.length > 2 && /[A-Z]/.test(toolName[2])) { + commandName = toolName.slice(2); + } + return `cascade-tools pm ${kebab(commandName)}`; +} + +function kebab(name: string): string { + return name + .replace(/([A-Z]+)([A-Z][a-z])/g, '$1-$2') + .replace(/([a-z\d])([A-Z])/g, '$1-$2') + .toLowerCase(); +} + function resolveGitRemoteParams( autoResolvedParams: CLIAutoResolved[], flags: ParsedFlags, @@ -310,30 +619,63 @@ export function createCLICommand( fileInputAlts.map((a) => [a.paramName, a]), ); + const commandPrefix = deriveCommandPrefix(def.name); + const staticExamples = buildOclifExamples(def, commandPrefix); + class FactoryCommand extends CredentialScopedCommand { static override description = def.description; static override flags = flagsRecord; + static override examples = staticExamples; async execute(): Promise { - const { flags } = await this.parse(FactoryCommand); + // Build a sink that routes emitCliError output through the instance's + // log/exit — lets tests spy on instance.log and instance.exit. + const sink = buildSink(this); + + let flags: unknown; + try { + ({ flags } = await this.parse(FactoryCommand)); + } catch (err) { + if (isNonexistentFlagError(err)) { + const candidates = collectCandidateFlags(def); + const offending = err.flags[0] ?? ''; + const suggestion = suggestFlag(offending, candidates); + emitCliError({ + type: 'unknown-flag', + flag: offending, + message: err.message, + ...(suggestion ? { hint: `did you mean --${suggestion}?` } : {}), + stdout: sink.stdout, + stderr: sink.stderr, + exit: sink.exit, + }); + return; + } + throw err; + } const parsedFlags = flags as ParsedFlags; const resolvedParams = resolveDirectParams( def, parsedFlags, fileInputMap, autoResolvedMap, - this, + sink, ); resolveGitRemoteParams(autoResolvedParams, parsedFlags, resolvedParams); - // Call the core function + // Call the core function — wrap runtime failures in the spec-014 envelope. let result: unknown; try { result = await coreFn(resolvedParams); } catch (err) { const message = err instanceof Error ? err.message : String(err); - this.log(JSON.stringify({ success: false, error: message })); - this.exit(1); + emitCliError({ + type: 'runtime', + message, + stdout: sink.stdout, + stderr: sink.stderr, + exit: sink.exit, + }); return; } diff --git a/src/gadgets/shared/errorEnvelope.ts b/src/gadgets/shared/errorEnvelope.ts new file mode 100644 index 00000000..df1a7663 --- /dev/null +++ b/src/gadgets/shared/errorEnvelope.ts @@ -0,0 +1,113 @@ +/** + * Shared cascade-tools CLI error envelope (spec 014). + * + * Every cascade-tools failure — flag-parse, JSON-parse, missing-required, + * enum-mismatch, unknown-flag, auth, runtime — emits through {@link emitCliError}: + * + * - Structured JSON on stdout: `{"success":false,"error":}` so agents + * parsing CLI output see one stable surface. + * - One-line prose summary on stderr so humans running the command in a terminal + * get a readable error without piping through `jq`. + * - Exit code 1. + * + * The envelope shape is part of the cascade-tools contract. Renaming fields is + * a breaking change — agents rely on `error.type` / `error.flag` / `error.hint` + * to self-correct on the next attempt. + */ + +/** + * Classification of a cascade-tools failure. Agents may branch on this. + */ +export type CliErrorType = + | 'flag-parse' + | 'json-parse' + | 'missing-required' + | 'enum-mismatch' + | 'unknown-flag' + | 'auth' + | 'runtime'; + +/** + * The stable envelope shape emitted by {@link emitCliError}. New fields may be + * added over time; existing field names are load-bearing. + */ +export interface CliErrorEnvelope { + /** Classification the agent may branch on */ + type: CliErrorType; + /** Flag name associated with the failure (when applicable) */ + flag?: string; + /** Human-readable message describing the failure */ + message: string; + /** Truncated view of the offending input, when relevant */ + got?: string; + /** A shape fragment describing what was expected, when relevant */ + expected?: string; + /** A hint the agent can act on (e.g. "did you mean --comments?") */ + hint?: string; + /** A runnable example the agent can adapt, when the tool definition has one */ + example?: string; +} + +/** + * Options accepted by {@link emitCliError}. Extends the envelope with optional + * stream + exit injection for testability. + */ +export interface EmitCliErrorOptions extends CliErrorEnvelope { + stdout?: NodeJS.WritableStream; + stderr?: NodeJS.WritableStream; + exit?: (code: number) => never; +} + +const DEFAULT_GOT_TRUNCATE = 80; + +/** + * Truncate a long user input to `max` characters, appending `...` when cut. + * Exported for unit tests; internal callers use it directly. + */ +export function truncateGot(input: string, max = DEFAULT_GOT_TRUNCATE): string { + if (input.length <= max) return input; + return `${input.slice(0, max)}...`; +} + +/** + * Build the one-line prose summary written to stderr. + * + * Format: `--: ` (omits the flag clause when absent). + * Trimmed to {@link max} characters to keep the stderr surface glanceable. + */ +function buildProseSummary(env: CliErrorEnvelope, max = 120): string { + const flagPart = env.flag ? `--${env.flag}: ` : ''; + const line = `${flagPart}${env.type} — ${env.message}`.replace(/\s+/g, ' ').trim(); + return line.length <= max ? line : `${line.slice(0, max - 3)}...`; +} + +/** + * Emit a cascade-tools error envelope and terminate the process. + * + * - Writes `{"success":false,"error":}\n` to stdout. + * - Writes a short prose summary to stderr. + * - Exits with code 1. + * + * Callers MUST treat this as a non-returning function. The default `exit` + * delegate is `process.exit`; tests inject a throwing stub to verify the call. + */ +export function emitCliError(opts: EmitCliErrorOptions): never { + const stdout = opts.stdout ?? process.stdout; + const stderr = opts.stderr ?? process.stderr; + const exit = opts.exit ?? (process.exit as (code: number) => never); + + const envelope: CliErrorEnvelope = { + type: opts.type, + message: opts.message, + }; + if (opts.flag !== undefined) envelope.flag = opts.flag; + if (opts.got !== undefined) envelope.got = truncateGot(opts.got); + if (opts.expected !== undefined) envelope.expected = opts.expected; + if (opts.hint !== undefined) envelope.hint = opts.hint; + if (opts.example !== undefined) envelope.example = opts.example; + + stdout.write(`${JSON.stringify({ success: false, error: envelope })}\n`); + stderr.write(`${buildProseSummary(envelope)}\n`); + + return exit(1); +} diff --git a/src/gadgets/shared/manifestGenerator.ts b/src/gadgets/shared/manifestGenerator.ts index d4a33e5e..40658d43 100644 --- a/src/gadgets/shared/manifestGenerator.ts +++ b/src/gadgets/shared/manifestGenerator.ts @@ -12,21 +12,9 @@ * - The `cliCommand` is derived from the definition name (kebab-cased) */ -import type { ToolManifest } from '../../agents/contracts/index.js'; +import type { ToolManifest, ToolManifestParameter } from '../../agents/contracts/index.js'; import type { ParameterDefinition, ToolDefinition } from './toolDefinition.js'; -// --------------------------------------------------------------------------- -// Parameter schema entry type (ToolManifest.parameters value) -// --------------------------------------------------------------------------- - -interface ManifestParameterEntry { - type: string; - required?: boolean; - default?: unknown; - description?: string; - options?: string[]; -} - // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- @@ -38,11 +26,11 @@ interface ManifestParameterEntry { function buildManifestParam( def: ParameterDefinition, isRequired: boolean, -): ManifestParameterEntry | undefined { +): ToolManifestParameter | undefined { // gadgetOnly params are excluded from manifests if (def.gadgetOnly) return undefined; - const entry: ManifestParameterEntry = { + const entry: ToolManifestParameter = { type: def.type === 'array' ? 'array' : def.type === 'object' ? 'object' : def.type, ...(isRequired ? { required: true } : {}), ...('default' in def && def.default !== undefined ? { default: def.default } : {}), @@ -61,9 +49,36 @@ function buildManifestParam( entry.type = 'string'; } + // Spec 014: thread array items, aliases, and example values through to the manifest + if (def.type === 'array') { + entry.items = def.items; + } + + if (def.cliAliases && def.cliAliases.length > 0) { + entry.aliases = [...def.cliAliases]; + } + return entry; } +/** + * Find the first example whose `params` object has a defined value for `paramName`, + * and return that value. Used by the manifest generator to surface one concrete + * shape per parameter to agents + help text. + */ +function findExampleForParam( + examples: readonly { params: Record }[] | undefined, + paramName: string, +): unknown { + if (!examples) return undefined; + for (const ex of examples) { + if (Object.hasOwn(ex.params, paramName) && ex.params[paramName] !== undefined) { + return ex.params[paramName]; + } + } + return undefined; +} + /** * Convert a PascalCase or camelCase tool name to a kebab-case CLI command segment. * @@ -180,6 +195,10 @@ export function generateToolManifest( const isRequired = paramDef.required === true; const entry = buildManifestParam(paramDef, isRequired); if (entry) { + const example = findExampleForParam(def.examples, name); + if (example !== undefined) { + entry.example = example; + } parameters[name] = entry; } } diff --git a/src/gadgets/shared/toolDefinition.ts b/src/gadgets/shared/toolDefinition.ts index f0bf180f..3f3f64ba 100644 --- a/src/gadgets/shared/toolDefinition.ts +++ b/src/gadgets/shared/toolDefinition.ts @@ -50,6 +50,13 @@ export interface FileInputAlternative { fileFlag: string; /** Optional description for the file flag in CLI help output */ description?: string; + /** + * How to interpret the file contents before handing to the gadget. + * - `'string'` (default): pass the raw file contents through as a string. + * - `'json'`: `JSON.parse()` the file contents; emit a structured error envelope on parse failure. + * Use this for parameters declared as `type: 'array'` with `items: 'object'` or `type: 'object'`. + */ + parseAs?: 'string' | 'json'; } /** @@ -108,6 +115,16 @@ interface BaseParameterDefinition { * @example 'CASCADE_BASE_BRANCH' for the `base` param in CreatePR */ cliEnvVar?: string; + /** + * Alternative CLI flag names accepted for this parameter. + * Wired into oclif as `Flags.*({aliases: [...]})`; the canonical name is always + * the parameter key. Surfaced in the agent-facing tool manifest so the system + * prompt lists both the canonical flag and its aliases. + * + * @example `--comments` with `cliAliases: ['comment']` accepts `--comment '[...]'` + * as well, so agent muscle-memory on the singular spelling still works. + */ + cliAliases?: readonly string[]; } /** diff --git a/src/integrations/README.md b/src/integrations/README.md index 0bce39a6..4514a825 100644 --- a/src/integrations/README.md +++ b/src/integrations/README.md @@ -1,5 +1,7 @@ # PM Integration Architecture +> ℹ️ Adding a `cascade-tools` gadget (CLI command consumed by agents)? See [`src/gadgets/README.md`](../gadgets/README.md) for the authoring contract — declarative metadata (`cliAliases`, `fileInputAlternatives`, `examples`), the structured error envelope, and the single-entrypoint invariant. PM provider registration (this file) is a separate surface. + CASCADE's PM providers (Trello, JIRA, Linear, and any future Asana/GitLab/ClickUp) are built on a **provider manifest** pattern. One file describes the provider end-to-end; one registry iterates manifests; a behavioral conformance harness guarantees each manifest satisfies its declared contracts. This document is the canonical guide for adding a new PM provider. Five specs shape it: diff --git a/src/pm/linear/integration.ts b/src/pm/linear/integration.ts index 429efd16..df8e5edf 100644 --- a/src/pm/linear/integration.ts +++ b/src/pm/linear/integration.ts @@ -76,13 +76,18 @@ export class LinearIntegration implements PMIntegration { resolveLifecycleConfig(project: ProjectConfig): ProjectPMConfig { const linearConfig = getLinearConfig(project); const labels = linearConfig?.labels; + // NOTE: Unlike JIRA (which auto-creates labels by name), Linear requires UUIDs for + // label operations via resolveLabelId(). Defaulting to name strings like 'cascade-auto' + // would cause addLabel to silently no-op when no UUID is configured. Labels that are + // not explicitly set in the project config are left as undefined so callers can skip + // the operation rather than silently fail. return { labels: { - processing: labels?.processing ?? 'cascade-processing', - processed: labels?.processed ?? 'cascade-processed', - error: labels?.error ?? 'cascade-error', - readyToProcess: labels?.readyToProcess ?? 'cascade-ready', - auto: labels?.auto ?? 'cascade-auto', + processing: labels?.processing, + processed: labels?.processed, + error: labels?.error, + readyToProcess: labels?.readyToProcess, + auto: labels?.auto, }, statuses: { backlog: linearConfig?.statuses?.backlog, diff --git a/src/router/active-workers.ts b/src/router/active-workers.ts index 8e3b74eb..4836e6f9 100644 --- a/src/router/active-workers.ts +++ b/src/router/active-workers.ts @@ -25,6 +25,32 @@ export interface ActiveWorker { agentType?: string; } +/** + * Diagnostic facts about a worker exit, surfaced into the run record's `error` + * field so post-mortem investigations can answer "was this OOM?", "was it + * killed by Docker?" without ssh + syslog access. Sourced from + * `dockerode container.inspect()` after `wait()`. + */ +export interface ExitDetails { + /** `State.OOMKilled` from Docker — definitive cgroup-OOM signal. */ + oomKilled?: boolean; + /** `State.Error` from Docker — non-empty when the runtime aborted the container. */ + exitReason?: string; +} + +/** + * Format a worker-crash reason string with whatever diagnostic facts we have. + * Stable, grep-friendly format: `Worker crashed with exit code N · OOMKilled=… · reason="…"`. + * Empty / undefined fields are omitted. + */ +export function formatCrashReason(exitCode: number, details?: ExitDetails): string { + const parts: string[] = [`Worker crashed with exit code ${exitCode}`]; + if (details?.oomKilled === true) parts.push('OOMKilled=true'); + else if (details?.oomKilled === false) parts.push('OOMKilled=false'); + if (details?.exitReason) parts.push(`reason="${details.exitReason}"`); + return parts.join(' · '); +} + export const activeWorkers = new Map(); /** @@ -50,7 +76,7 @@ export function getActiveWorkers(): Array<{ jobId: string; startedAt: Date }> { * The timeout path (killWorker) handles its own 'timed_out' DB update and calls * cleanupWorker without an exitCode so this block is skipped. */ -export function cleanupWorker(jobId: string, exitCode?: number): void { +export function cleanupWorker(jobId: string, exitCode?: number, details?: ExitDetails): void { const worker = activeWorkers.get(jobId); if (worker) { clearTimeout(worker.timeoutHandle); @@ -62,20 +88,15 @@ export function cleanupWorker(jobId: string, exitCode?: number): void { } if (exitCode !== undefined && exitCode !== 0 && worker.projectId) { const durationMs = Date.now() - worker.startedAt.getTime(); + const reason = formatCrashReason(exitCode, details); const updatePromise = worker.workItemId - ? failOrphanedRun( - worker.projectId, - worker.workItemId, - `Worker crashed with exit code ${exitCode}`, - 'failed', - durationMs, - ) + ? failOrphanedRun(worker.projectId, worker.workItemId, reason, 'failed', durationMs) : failOrphanedRunFallback( worker.projectId, worker.agentType, worker.startedAt, 'failed', - `Worker crashed with exit code ${exitCode}`, + reason, durationMs, ); updatePromise diff --git a/src/router/container-manager.ts b/src/router/container-manager.ts index cdb7894a..7ccd2636 100644 --- a/src/router/container-manager.ts +++ b/src/router/container-manager.ts @@ -138,6 +138,123 @@ async function removeContainer(containerId: string): Promise { } } +/** + * Inspect a just-exited container and pull the diagnostic facts that explain + * its exit. `OOMKilled` and `State.Error` are only available before AutoRemove + * (or our manual `removeContainer`) reaps the container, so this MUST run + * before either path. Returns nullable fields when inspection fails — never + * throws. + * + * @internal Exported for unit testing only — call `onWorkerExit` from app code. + */ +export async function inspectExitedContainer( + container: ReturnType, + jobId: string, +): Promise<{ oomKilled?: boolean; exitReason?: string; durationMs?: number }> { + // Wrap in try/catch — `inspect()` can fail post-exit if the daemon socket + // drops, the container is reaped between `wait()` and here, or the API + // times out. Diagnostics are best-effort; falling back to undefined is + // safer than failing the whole post-exit pipeline. + let inspectResult: Awaited> | null = null; + try { + inspectResult = await container.inspect(); + } catch (err) { + logger.warn('[WorkerManager] container.inspect() after wait failed:', { + jobId, + error: String(err), + }); + } + const state = inspectResult?.State; + const oomKilled = state?.OOMKilled; + // Empty `State.Error` (the common case for clean exits) → undefined so the + // run-record reason string omits the `reason="…"` segment entirely. + const exitReason = state?.Error?.length ? state.Error : undefined; + const startedAtIso = state?.StartedAt; + const finishedAtIso = state?.FinishedAt; + // Docker can report sentinel timestamps (e.g. `0001-01-01T00:00:00Z` for a + // container that never fully started) that parse to NaN — drop those so + // downstream logs/Sentry don't ship `durationMs: NaN`. + const rawDurationMs = + startedAtIso && finishedAtIso + ? new Date(finishedAtIso).getTime() - new Date(startedAtIso).getTime() + : undefined; + const durationMs = + rawDurationMs !== undefined && Number.isFinite(rawDurationMs) && rawDurationMs >= 0 + ? rawDurationMs + : undefined; + return { oomKilled, exitReason, durationMs }; +} + +/** + * Tail-log the worker's stdout/stderr for at-a-glance debugging. Full + * per-worker logs are also indexed in Loki via promtail's per-container label + * (`{container="/cascade-worker-${jobId}"}`); this 50-line tail is a + * convenience and is not load-bearing. + */ +async function logWorkerTail(container: ReturnType): Promise { + try { + const logs = await container.logs({ stdout: true, stderr: true, follow: false }); + const logText = logs.toString('utf-8'); + if (!logText.trim()) return; + const lines = logText.trim().split('\n'); + const tail = lines.slice(-50).join('\n'); + logger.info( + `[WorkerManager] Worker logs (last ${Math.min(lines.length, 50)} of ${lines.length} lines):\n${tail}`, + ); + } catch { + // Container may already be removed — expected with AutoRemove + } +} + +/** + * Post-exit handler for a worker container: pulls Docker diagnostics, tail-logs + * stdout, decides snapshot commit vs. skip, and runs cleanup. Extracted from + * the `wait()` callback to keep its cyclomatic complexity within the lint budget. + */ +async function onWorkerExit(opts: { + container: ReturnType; + result: { StatusCode: number }; + jobId: string; + jobType: string; + snapshotEnabled: boolean; + projectId: string | null; + workItemId: string | undefined; +}): Promise { + const { container, result, jobId, jobType, snapshotEnabled, projectId, workItemId } = opts; + + const { oomKilled, exitReason, durationMs } = await inspectExitedContainer(container, jobId); + await logWorkerTail(container); + + if (result.StatusCode !== 0) { + captureException(new Error(`Worker exited with status ${result.StatusCode}`), { + tags: { source: 'worker_exit', jobType }, + extra: { jobId, statusCode: result.StatusCode, oomKilled, exitReason, durationMs }, + }); + } + logger.info('[WorkerManager] Worker exited:', { + jobId, + statusCode: result.StatusCode, + oomKilled: oomKilled ?? null, + exitReason: exitReason ?? null, + durationMs: durationMs ?? null, + }); + + if (snapshotEnabled) { + if (result.StatusCode === 0 && projectId && workItemId) { + await commitContainerToSnapshot(container.id, projectId, workItemId); + } else if (result.StatusCode !== 0) { + logger.info('[WorkerManager] Skipping snapshot commit after non-zero exit:', { + jobId, + statusCode: result.StatusCode, + }); + } + // Always remove manually since AutoRemove is disabled for snapshot runs. + await removeContainer(container.id); + } + + cleanupWorker(jobId, result.StatusCode, { oomKilled, exitReason }); +} + interface SpawnSettings { snapshotEnabled: boolean; workerImage: string; @@ -148,8 +265,10 @@ interface SpawnSettings { /** * Resolve per-project spawn settings (snapshot flag, image, timeout). * Centralises all loadProjectConfig() calls so spawnWorker stays simple. + * + * @internal Exported for unit testing only — call `spawnWorker` from app code. */ -async function resolveSpawnSettings( +export async function resolveSpawnSettings( projectId: string | null, workItemId: string | undefined, jobId: string, @@ -196,6 +315,21 @@ async function resolveSpawnSettings( containerTimeoutMs = projectCfg.watchdogTimeoutMs + ROUTER_KILL_BUFFER_MS; } + // Trace-log the actual values that will govern this worker's lifetime so a + // post-mortem can confirm whether the project's watchdogTimeoutMs override + // took effect or the global default leaked through. + logger.info('[WorkerManager] Resolved spawn settings:', { + jobId, + projectId, + workItemId, + workerImage, + snapshotEnabled, + containerTimeoutMs, + containerTimeoutMinutes: Math.round(containerTimeoutMs / 60_000), + projectWatchdogTimeoutMs: projectCfg?.watchdogTimeoutMs ?? null, + globalWorkerTimeoutMs: routerConfig.workerTimeoutMs, + }); + return { snapshotEnabled, workerImage, containerTimeoutMs, snapshotTtlMs }; } @@ -299,54 +433,15 @@ async function createAndMonitorContainer( container .wait() .then(async (result) => { - // Collect worker logs before removal - try { - const logs = await container.logs({ - stdout: true, - stderr: true, - follow: false, - }); - const logText = logs.toString('utf-8'); - if (logText.trim()) { - const lines = logText.trim().split('\n'); - const tail = lines.slice(-50).join('\n'); - logger.info( - `[WorkerManager] Worker logs (last ${Math.min(lines.length, 50)} of ${lines.length} lines):\n${tail}`, - ); - } - } catch { - // Container may already be removed — expected with AutoRemove - } - - if (result.StatusCode !== 0) { - captureException(new Error(`Worker exited with status ${result.StatusCode}`), { - tags: { source: 'worker_exit', jobType: job.data.type }, - extra: { jobId, statusCode: result.StatusCode }, - }); - } - logger.info('[WorkerManager] Worker exited:', { + await onWorkerExit({ + container, + result, jobId, - statusCode: result.StatusCode, + jobType: job.data.type, + snapshotEnabled, + projectId, + workItemId, }); - - // For snapshot-enabled runs, commit on clean exit and then remove the container. - // Failed or timed-out runs must NOT create a snapshot. - // Always remove — even when projectId/workItemId are absent — to avoid - // orphaning containers that ran with AutoRemove=false. - if (snapshotEnabled) { - if (result.StatusCode === 0 && projectId && workItemId) { - await commitContainerToSnapshot(container.id, projectId, workItemId); - } else if (result.StatusCode !== 0) { - logger.info('[WorkerManager] Skipping snapshot commit after non-zero exit:', { - jobId, - statusCode: result.StatusCode, - }); - } - // Always remove the container manually since AutoRemove is disabled - await removeContainer(container.id); - } - - cleanupWorker(jobId, result.StatusCode); }) .catch((err) => { logger.error('[WorkerManager] Error waiting for container:', err); diff --git a/src/triggers/shared/agent-execution.ts b/src/triggers/shared/agent-execution.ts index c3ffe7db..fa8cb100 100644 --- a/src/triggers/shared/agent-execution.ts +++ b/src/triggers/shared/agent-execution.ts @@ -667,6 +667,28 @@ async function propagateAutoLabelAfterSplitting( const autoLabelId = pmConfig.labels.auto; if (!autoLabelId) return null; + // Resolve the actual label ID from the matched parent work item label. + // pmConfig.labels.auto may be a human-readable name string (e.g. 'cascade-auto') + // rather than a UUID when the project was not explicitly configured with UUIDs. + // Providers like Linear require UUIDs for addLabel — passing a name string causes + // resolveLabelId() to return null and the operation silently no-ops. + // By resolving the id from the parent's matched label we always pass the correct + // identifier regardless of config format. + // NOTE: The UUID check is scoped to Linear only. Trello uses 24-character MongoDB + // Object IDs and JIRA uses name strings — both are valid non-UUID formats for those + // providers and should not produce log noise in happy paths. + const UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i; + if (project.pm.type === 'linear' && !UUID_REGEX.test(autoLabelId)) { + logger.warn( + 'propagateAutoLabelAfterSplitting: labels.auto is not a UUID; resolving ID from parent labels', + { autoLabelId }, + ); + } + const matchedLabel = parentWorkItem.labels.find( + (l) => l.id === autoLabelId || l.name === autoLabelId, + ); + const resolvedAutoLabelId = matchedLabel ? matchedLabel.id : autoLabelId; + // List backlog items via the unified call shape — provider self-resolves // scope (Trello list / JIRA project / Linear team) and maps the CASCADE // status key to its native identifier from its own config. @@ -691,7 +713,7 @@ async function propagateAutoLabelAfterSplitting( backlogItems .filter((item) => !hasAutoLabel(item.labels, pmConfig)) .map((item) => - provider.addLabel(item.id, autoLabelId).catch((err) => + provider.addLabel(item.id, resolvedAutoLabelId).catch((err) => logger.warn('Failed to add auto label to backlog item', { itemId: item.id, error: String(err), diff --git a/tests/unit/agents/utils/setup.test.ts b/tests/unit/agents/utils/setup.test.ts index fb1d3501..8fb98dc1 100644 --- a/tests/unit/agents/utils/setup.test.ts +++ b/tests/unit/agents/utils/setup.test.ts @@ -2,14 +2,16 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; vi.mock('node:fs', () => ({ existsSync: vi.fn(), + lstatSync: vi.fn(), readFileSync: vi.fn(), + readlinkSync: vi.fn(), })); vi.mock('../../../../src/utils/repo.js', () => ({ runCommand: vi.fn(), })); -import { existsSync, readFileSync } from 'node:fs'; +import { existsSync, lstatSync, readFileSync, readlinkSync } from 'node:fs'; import { getLogLevel, installDependencies, @@ -20,7 +22,9 @@ import { import { runCommand } from '../../../../src/utils/repo.js'; const mockExistsSync = vi.mocked(existsSync); +const mockLstatSync = vi.mocked(lstatSync); const mockReadFileSync = vi.mocked(readFileSync); +const mockReadlinkSync = vi.mocked(readlinkSync); const mockRunCommand = vi.mocked(runCommand); beforeEach(() => { @@ -134,6 +138,95 @@ describe('readContextFiles', () => { expect(result[0].content).toBe('# Claude docs'); }); + + // --- Deduplication tests --- + + it('deduplicates when both files have identical content, keeping CLAUDE.md', async () => { + const sharedContent = '# Shared context'; + mockRunCommand + .mockResolvedValueOnce({ stdout: sharedContent, stderr: '' }) + .mockResolvedValueOnce({ stdout: sharedContent, stderr: '' }); + // lstatSync: neither file is a symlink + mockLstatSync.mockReturnValue({ isSymbolicLink: () => false } as ReturnType); + + const result = await readContextFiles('/repo'); + + expect(result).toHaveLength(1); + expect(result[0].path).toBe('CLAUDE.md'); + expect(result[0].content).toBe(sharedContent); + }); + + it('keeps both files when content differs', async () => { + mockRunCommand + .mockResolvedValueOnce({ stdout: '# Claude content', stderr: '' }) + .mockResolvedValueOnce({ stdout: '# Agents content', stderr: '' }); + mockLstatSync.mockReturnValue({ isSymbolicLink: () => false } as ReturnType); + + const result = await readContextFiles('/repo'); + + expect(result).toHaveLength(2); + expect(result[0].path).toBe('CLAUDE.md'); + expect(result[1].path).toBe('AGENTS.md'); + }); + + it('deduplicates when AGENTS.md is a symlink to CLAUDE.md', async () => { + mockRunCommand + .mockResolvedValueOnce({ stdout: '# Claude docs', stderr: '' }) + .mockResolvedValueOnce({ stdout: '# Claude docs', stderr: '' }); + mockLstatSync + .mockReturnValueOnce({ isSymbolicLink: () => false } as ReturnType) // CLAUDE.md + .mockReturnValueOnce({ isSymbolicLink: () => true } as ReturnType); // AGENTS.md + mockReadlinkSync.mockReturnValue('CLAUDE.md' as never); + + const result = await readContextFiles('/repo'); + + expect(result).toHaveLength(1); + expect(result[0].path).toBe('CLAUDE.md'); + }); + + it('deduplicates when CLAUDE.md is a symlink to AGENTS.md', async () => { + mockRunCommand + .mockResolvedValueOnce({ stdout: '# Agents docs', stderr: '' }) + .mockResolvedValueOnce({ stdout: '# Agents docs', stderr: '' }); + mockLstatSync + .mockReturnValueOnce({ isSymbolicLink: () => true } as ReturnType) // CLAUDE.md + .mockReturnValueOnce({ isSymbolicLink: () => false } as ReturnType); // AGENTS.md + mockReadlinkSync.mockReturnValue('AGENTS.md' as never); + + const result = await readContextFiles('/repo'); + + expect(result).toHaveLength(1); + // CLAUDE.md (index 0) is kept as the canonical entry even though it is the symlink + expect(result[0].path).toBe('CLAUDE.md'); + }); + + it('falls back to content comparison when lstat throws', async () => { + const sharedContent = '# Same content'; + mockRunCommand + .mockResolvedValueOnce({ stdout: sharedContent, stderr: '' }) + .mockResolvedValueOnce({ stdout: sharedContent, stderr: '' }); + mockLstatSync.mockImplementation(() => { + throw new Error('EPERM: permission denied'); + }); + + const result = await readContextFiles('/repo'); + + expect(result).toHaveLength(1); + expect(result[0].path).toBe('CLAUDE.md'); + }); + + it('does not deduplicate when only one file exists', async () => { + mockRunCommand + .mockResolvedValueOnce({ stdout: '# Claude docs', stderr: '' }) + .mockRejectedValueOnce(new Error('ENOENT')); + + const result = await readContextFiles('/repo'); + + // lstatSync should NOT be called when only 1 result + expect(mockLstatSync).not.toHaveBeenCalled(); + expect(result).toHaveLength(1); + expect(result[0].path).toBe('CLAUDE.md'); + }); }); // ============================================================================ diff --git a/tests/unit/backends/claude-code.test.ts b/tests/unit/backends/claude-code.test.ts index c07a82a9..dd0bb5cf 100644 --- a/tests/unit/backends/claude-code.test.ts +++ b/tests/unit/backends/claude-code.test.ts @@ -149,7 +149,7 @@ describe('buildToolGuidance', () => { expect(guidance).not.toContain(''); }); - it('renders optional array params with brackets and repeatable hint', () => { + it('renders optional array params with brackets and repeatable hint (spec 014: no s-strip)', () => { const tools: ToolManifest[] = [ { name: 'TestTool', @@ -161,7 +161,10 @@ describe('buildToolGuidance', () => { }, ]; const guidance = buildToolGuidance(tools); - expect(guidance).toContain('[--tag (repeatable)]'); + // Spec 014: the renderer no longer strips trailing 's' — the actual + // plural key comes through verbatim. Root cause of prod run 5d993b04. + expect(guidance).toContain('[--tags (repeatable)]'); + expect(guidance).not.toContain('[--tag '); // old bug }); it('renders parameter description as inline comment', () => { diff --git a/tests/unit/backends/shared-nativeToolPrompts.test.ts b/tests/unit/backends/shared-nativeToolPrompts.test.ts index ec11b7f8..712a05c3 100644 --- a/tests/unit/backends/shared-nativeToolPrompts.test.ts +++ b/tests/unit/backends/shared-nativeToolPrompts.test.ts @@ -127,30 +127,105 @@ describe('buildToolGuidance', () => { }); }); - describe('formatParam — array param (repeatable)', () => { - it('formats required array param as repeatable', () => { + describe('formatParam — primitive array (items:"string") — repeatable', () => { + // Spec 014: the renderer used to strip trailing 's' from every array name. + // That was the root cause of prod run 5d993b04 — an agent sent `--comment` + // because the prompt told it to. Arrays now always render with the actual + // declared key. Repeatable semantics stay the same for items:"string". + it('formats required primitive-array param with the actual plural name', () => { const result = buildToolGuidance([ makeManifest({ parameters: { - items: { type: 'array', required: true }, + items: { type: 'array', items: 'string', required: true }, }, }), ]); - // Singular of 'items' is 'item' - expect(result).toContain('--item (repeatable)'); - expect(result).not.toContain('[--item'); + expect(result).toContain('--items (repeatable)'); + expect(result).not.toContain('--item '); // old bug: s-stripped singular + expect(result).not.toContain('[--items'); // required → no brackets }); - it('formats optional array param as repeatable with brackets', () => { + it('formats optional primitive-array param with the actual plural name and brackets', () => { const result = buildToolGuidance([ makeManifest({ parameters: { - labels: { type: 'array', required: false }, + labels: { type: 'array', items: 'string', required: false }, }, }), ]); - // Singular of 'labels' is 'label' - expect(result).toContain('[--label (repeatable)]'); + expect(result).toContain('[--labels (repeatable)]'); + expect(result).not.toContain('--label '); // old bug + }); + }); + + describe('formatParam — array of object (items:"object") — JSON blob (spec 014)', () => { + it('renders required array-of-object as a single JSON flag, not repeatable', () => { + const result = buildToolGuidance([ + makeManifest({ + parameters: { + comments: { + type: 'array', + items: 'object', + required: false, + description: 'Inline review comments', + }, + }, + }), + ]); + expect(result).toContain("[--comments '']"); + // The "repeatable string" lie from the old renderer is gone + expect(result).not.toContain('repeatable'); + expect(result).not.toContain('--comment '); + }); + + it('renders aliases next to the canonical flag name', () => { + const result = buildToolGuidance([ + makeManifest({ + parameters: { + comments: { + type: 'array', + items: 'object', + required: false, + aliases: ['comment'], + }, + }, + }), + ]); + expect(result).toContain('--comments|--comment'); + }); + + it('renders a one-line example line beneath the flag when the manifest provides one', () => { + const result = buildToolGuidance([ + makeManifest({ + parameters: { + comments: { + type: 'array', + items: 'object', + required: false, + example: [{ path: 'src/x.ts', line: 1, body: 'nit' }], + }, + }, + }), + ]); + // Example is surfaced on its own indented comment line + expect(result).toContain('example:'); + expect(result).toContain( + `--comments '${JSON.stringify([{ path: 'src/x.ts', line: 1, body: 'nit' }])}'`, + ); + }); + }); + + describe('formatParam — object param — JSON blob (spec 014)', () => { + it('renders object param as a single JSON flag', () => { + const result = buildToolGuidance([ + makeManifest({ + parameters: { + config: { type: 'object', required: true, description: 'Config JSON' }, + }, + }), + ]); + expect(result).toContain("--config ''"); + expect(result).not.toContain(''); // shouldn't leak the bare type }); }); diff --git a/tests/unit/cli/cli-command-factory.test.ts b/tests/unit/cli/cli-command-factory.test.ts index 830a98c0..9db02c7b 100644 --- a/tests/unit/cli/cli-command-factory.test.ts +++ b/tests/unit/cli/cli-command-factory.test.ts @@ -319,7 +319,18 @@ describe('cliCommandFactory — file-input resolution', () => { const Cmd = createCLICommand(def, coreFn); const cmd = new Cmd([], makeMockConfig() as never); - await expect(cmd.run()).rejects.toThrow('Either --text or --text-file is required'); + // Spec 014: missing-required routes through the structured envelope + // (type:'missing-required'), exits via `this.exit(1)` which throws + // 'EEXIT: 1'. The envelope includes the prose and a hint on stderr. + const logSpy = vi.spyOn(cmd, 'log'); + await expect(cmd.run()).rejects.toThrow(); + const output = JSON.parse(logSpy.mock.calls[0][0] as string) as { + success: boolean; + error: { type: string; flag?: string }; + }; + expect(output.success).toBe(false); + expect(output.error.type).toBe('missing-required'); + expect(output.error.flag).toBe('text'); }); it('handles files with special characters (quotes, backticks, $)', async () => { @@ -406,7 +417,7 @@ describe('cliCommandFactory — JSON output format', () => { expect(output).toEqual({ success: true, data: { id: 'result-1' } }); }); - it('outputs { success: false, error: message } on error', async () => { + it('outputs { success: false, error: { type: "runtime", message } } on error (spec 014 envelope)', async () => { const coreFn = vi.fn().mockRejectedValue(new Error('Something went wrong')); const def = makeToolDef({ parameters: { @@ -417,7 +428,6 @@ describe('cliCommandFactory — JSON output format', () => { const cmd = new Cmd(['--name', 'test'], makeMockConfig() as never); const logSpy = vi.spyOn(cmd, 'log'); - // Should not throw (error is caught internally) but may call this.exit(1) try { await cmd.run(); } catch { @@ -425,11 +435,16 @@ describe('cliCommandFactory — JSON output format', () => { } expect(logSpy).toHaveBeenCalledTimes(1); - const output = JSON.parse(logSpy.mock.calls[0][0] as string); - expect(output).toEqual({ success: false, error: 'Something went wrong' }); + const output = JSON.parse(logSpy.mock.calls[0][0] as string) as { + success: boolean; + error: { type: string; message: string }; + }; + expect(output.success).toBe(false); + expect(output.error.type).toBe('runtime'); + expect(output.error.message).toBe('Something went wrong'); }); - it('handles non-Error throws and outputs error string', async () => { + it('handles non-Error throws and carries the coerced string message (spec 014 envelope)', async () => { const coreFn = vi.fn().mockRejectedValue('string error'); const def = makeToolDef({ parameters: { @@ -446,9 +461,13 @@ describe('cliCommandFactory — JSON output format', () => { // this.exit(1) may throw } - const output = JSON.parse(logSpy.mock.calls[0][0] as string); + const output = JSON.parse(logSpy.mock.calls[0][0] as string) as { + success: boolean; + error: { type: string; message: string }; + }; expect(output.success).toBe(false); - expect(output.error).toBe('string error'); + expect(output.error.type).toBe('runtime'); + expect(output.error.message).toBe('string error'); }); it('includes the description from the ToolDefinition', () => { diff --git a/tests/unit/cli/file-input-flags.test.ts b/tests/unit/cli/file-input-flags.test.ts index 70d40870..d52acda3 100644 --- a/tests/unit/cli/file-input-flags.test.ts +++ b/tests/unit/cli/file-input-flags.test.ts @@ -202,9 +202,17 @@ describe('PostComment --text-file', () => { expect(postComment).toHaveBeenCalledWith('card-1', 'inline text'); }); - it('errors when neither --text nor --text-file is provided', async () => { + it('errors when neither --text nor --text-file is provided (spec 014 envelope)', async () => { const cmd = new PostComment(['--workItemId', 'card-1'], mockConfig as never); - await expect(cmd.run()).rejects.toThrow('Either --text or --text-file is required'); + const logSpy = vi.spyOn(cmd, 'log'); + await expect(cmd.run()).rejects.toThrow(); + const output = JSON.parse(logSpy.mock.calls[0][0] as string) as { + success: boolean; + error: { type: string; flag?: string }; + }; + expect(output.success).toBe(false); + expect(output.error.type).toBe('missing-required'); + expect(output.error.flag).toBe('text'); }); }); @@ -274,9 +282,17 @@ describe('CreatePR --body-file', () => { ); }); - it('errors when neither --body nor --body-file is provided', async () => { + it('errors when neither --body nor --body-file is provided (spec 014 envelope)', async () => { const cmd = new CreatePR(['--title', 'feat: x', '--head', 'feat/branch'], mockConfig as never); - await expect(cmd.run()).rejects.toThrow('Either --body or --body-file is required'); + const logSpy = vi.spyOn(cmd, 'log'); + await expect(cmd.run()).rejects.toThrow(); + const output = JSON.parse(logSpy.mock.calls[0][0] as string) as { + success: boolean; + error: { type: string; flag?: string }; + }; + expect(output.success).toBe(false); + expect(output.error.type).toBe('missing-required'); + expect(output.error.flag).toBe('body'); }); }); @@ -327,8 +343,16 @@ describe('PostPRComment --body-file', () => { expect(postPRComment).toHaveBeenCalledWith('owner', 'repo', 42, 'inline body'); }); - it('errors when neither --body nor --body-file is provided', async () => { + it('errors when neither --body nor --body-file is provided (spec 014 envelope)', async () => { const cmd = new PostPRComment(['--prNumber', '42'], mockConfig as never); - await expect(cmd.run()).rejects.toThrow('Either --body or --body-file is required'); + const logSpy = vi.spyOn(cmd, 'log'); + await expect(cmd.run()).rejects.toThrow(); + const output = JSON.parse(logSpy.mock.calls[0][0] as string) as { + success: boolean; + error: { type: string; flag?: string }; + }; + expect(output.success).toBe(false); + expect(output.error.type).toBe('missing-required'); + expect(output.error.flag).toBe('body'); }); }); diff --git a/tests/unit/cli/scm/create-pr-review-sidecar.test.ts b/tests/unit/cli/scm/create-pr-review-sidecar.test.ts index e433017b..c4a3c32a 100644 --- a/tests/unit/cli/scm/create-pr-review-sidecar.test.ts +++ b/tests/unit/cli/scm/create-pr-review-sidecar.test.ts @@ -126,9 +126,16 @@ describe('CreatePRReviewCommand sidecar write', () => { } as never); await cmd.execute(); - expect(vi.mocked(cmd.log)).toHaveBeenCalledWith( - JSON.stringify({ success: false, error: 'GitHub API error' }), - ); + // Spec 014: runtime failures emit the structured envelope. + const logged = vi.mocked(cmd.log).mock.calls.map((c) => c[0] as string); + const jsonLine = logged.find((l) => l.startsWith('{')) ?? ''; + const output = JSON.parse(jsonLine) as { + success: boolean; + error: { type: string; message: string }; + }; + expect(output.success).toBe(false); + expect(output.error.type).toBe('runtime'); + expect(output.error.message).toBe('GitHub API error'); expect(vi.mocked(cmd.exit)).toHaveBeenCalledWith(1); expect(existsSync(sidecarPath)).toBe(false); }); diff --git a/tests/unit/cli/scm/create-pr-sidecar.test.ts b/tests/unit/cli/scm/create-pr-sidecar.test.ts index 84ee1494..b582a09d 100644 --- a/tests/unit/cli/scm/create-pr-sidecar.test.ts +++ b/tests/unit/cli/scm/create-pr-sidecar.test.ts @@ -145,9 +145,16 @@ describe('CreatePRCommand sidecar write', () => { } as never); await cmd.execute(); - expect(vi.mocked(cmd.log)).toHaveBeenCalledWith( - JSON.stringify({ success: false, error: 'GitHub API error' }), - ); + // Spec 014: runtime failures emit the structured envelope. + const logged = vi.mocked(cmd.log).mock.calls.map((c) => c[0] as string); + const jsonLine = logged.find((l) => l.startsWith('{')) ?? ''; + const output = JSON.parse(jsonLine) as { + success: boolean; + error: { type: string; message: string }; + }; + expect(output.success).toBe(false); + expect(output.error.type).toBe('runtime'); + expect(output.error.message).toBe('GitHub API error'); expect(vi.mocked(cmd.exit)).toHaveBeenCalledWith(1); expect(existsSync(sidecarPath)).toBe(false); }); diff --git a/tests/unit/gadgets/github/definitions.test.ts b/tests/unit/gadgets/github/definitions.test.ts index 33a89db1..e9ba5f00 100644 --- a/tests/unit/gadgets/github/definitions.test.ts +++ b/tests/unit/gadgets/github/definitions.test.ts @@ -268,3 +268,25 @@ describe('GitHub SCM gadget definitions', () => { }); }); }); + +// --------------------------------------------------------------------------- +// Spec 014 plan 2: createPRReviewDef declarative opt-in +// --------------------------------------------------------------------------- + +describe('createPRReviewDef — spec 014 opt-in', () => { + it('comments parameter declares cliAliases: ["comment"]', () => { + const comments = createPRReviewDef.parameters.comments as { + cliAliases?: readonly string[]; + }; + expect(comments.cliAliases).toEqual(['comment']); + }); + + it('cli.fileInputAlternatives includes a comments-file entry with parseAs:"json"', () => { + const alts = createPRReviewDef.cli?.fileInputAlternatives ?? []; + const commentsFile = alts.find((a) => a.paramName === 'comments'); + expect(commentsFile).toBeDefined(); + expect(commentsFile?.fileFlag).toBe('comments-file'); + expect(commentsFile?.parseAs).toBe('json'); + expect(commentsFile?.description).toBeTruthy(); + }); +}); diff --git a/tests/unit/gadgets/shared/errorEnvelope.test.ts b/tests/unit/gadgets/shared/errorEnvelope.test.ts new file mode 100644 index 00000000..138f19dc --- /dev/null +++ b/tests/unit/gadgets/shared/errorEnvelope.test.ts @@ -0,0 +1,191 @@ +/** + * Tests for the shared cascade-tools error envelope helper (spec 014). + * + * The envelope is the single channel every cascade-tools failure emits on: + * - Structured JSON on stdout: `{"success":false,"error":{...}}` + * - One-line prose summary on stderr (for humans running the CLI directly) + * - Exit code 1 + * + * Consumed by cliCommandFactory for flag-parse, JSON-parse, missing-required, + * enum-mismatch, unknown-flag, auth, and runtime failure paths. + */ + +import { Writable } from 'node:stream'; +import { describe, expect, it, vi } from 'vitest'; + +import { + type CliErrorEnvelope, + emitCliError, + truncateGot, +} from '../../../../src/gadgets/shared/errorEnvelope.js'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function makeWritable(): { stream: Writable; written: () => string } { + const chunks: string[] = []; + const stream = new Writable({ + write(chunk, _enc, cb) { + chunks.push(chunk.toString()); + cb(); + }, + }); + return { stream, written: () => chunks.join('') }; +} + +function parseFirstJsonLine(blob: string): { success: boolean; error: CliErrorEnvelope } { + const line = blob.split('\n').find((l) => l.trim().startsWith('{')) ?? ''; + return JSON.parse(line) as { success: boolean; error: CliErrorEnvelope }; +} + +const ESC = String.fromCharCode(0x1b); + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('emitCliError', () => { + it('writes a stable JSON envelope to stdout (success:false + error fields)', () => { + const stdout = makeWritable(); + const stderr = makeWritable(); + const exit = vi.fn<(code: number) => never>(() => { + throw new Error('exit'); + }); + + expect(() => + emitCliError({ + type: 'json-parse', + flag: 'comments', + message: 'invalid JSON payload', + got: "[{'path':'a'}]", + expected: '[{"path":"","line":,"body":""}]', + hint: 'use --comments-file for long payloads', + stdout: stdout.stream, + stderr: stderr.stream, + exit, + }), + ).toThrow('exit'); + + const parsed = parseFirstJsonLine(stdout.written()); + expect(parsed.success).toBe(false); + expect(parsed.error.type).toBe('json-parse'); + expect(parsed.error.flag).toBe('comments'); + expect(parsed.error.message).toBe('invalid JSON payload'); + expect(parsed.error.got).toBe("[{'path':'a'}]"); + expect(parsed.error.expected).toBe('[{"path":"","line":,"body":""}]'); + expect(parsed.error.hint).toBe('use --comments-file for long payloads'); + }); + + it('mirrors a prose summary to stderr (≤120 chars, no ANSI, mentions flag + type)', () => { + const stdout = makeWritable(); + const stderr = makeWritable(); + const exit = vi.fn<(code: number) => never>(() => { + throw new Error('exit'); + }); + + expect(() => + emitCliError({ + type: 'json-parse', + flag: 'comments', + message: 'invalid JSON payload', + stdout: stdout.stream, + stderr: stderr.stream, + exit, + }), + ).toThrow('exit'); + + const err = stderr.written().trim(); + expect(err.length).toBeGreaterThan(0); + expect(err.length).toBeLessThanOrEqual(120); + // No ANSI escape sequences (ESC = 0x1b) + expect(err.indexOf(ESC)).toBe(-1); + // References the flag and the type + expect(err).toContain('--comments'); + expect(err).toContain('json-parse'); + }); + + it('truncates `got` to ~80 chars with an ellipsis when longer', () => { + const stdout = makeWritable(); + const stderr = makeWritable(); + const exit = vi.fn<(code: number) => never>(() => { + throw new Error('exit'); + }); + + const longInput = 'x'.repeat(500); + + expect(() => + emitCliError({ + type: 'json-parse', + flag: 'comments', + message: 'invalid JSON', + got: longInput, + stdout: stdout.stream, + stderr: stderr.stream, + exit, + }), + ).toThrow('exit'); + + const parsed = parseFirstJsonLine(stdout.written()); + expect(parsed.error.got?.length).toBeLessThanOrEqual(83); + expect(parsed.error.got?.endsWith('...')).toBe(true); + }); + + it('omits optional fields (got, expected, hint, example) from the envelope when not provided', () => { + const stdout = makeWritable(); + const stderr = makeWritable(); + const exit = vi.fn<(code: number) => never>(() => { + throw new Error('exit'); + }); + + expect(() => + emitCliError({ + type: 'missing-required', + flag: 'body', + message: '--body is required', + stdout: stdout.stream, + stderr: stderr.stream, + exit, + }), + ).toThrow('exit'); + + const parsed = parseFirstJsonLine(stdout.written()); + expect(parsed.error).not.toHaveProperty('got'); + expect(parsed.error).not.toHaveProperty('expected'); + expect(parsed.error).not.toHaveProperty('hint'); + expect(parsed.error).not.toHaveProperty('example'); + }); + + it('calls exit with code 1', () => { + const stdout = makeWritable(); + const stderr = makeWritable(); + const exit = vi.fn<(code: number) => never>(() => { + throw new Error('exit'); + }); + + expect(() => + emitCliError({ + type: 'runtime', + message: 'boom', + stdout: stdout.stream, + stderr: stderr.stream, + exit, + }), + ).toThrow('exit'); + + expect(exit).toHaveBeenCalledOnce(); + expect(exit).toHaveBeenCalledWith(1); + }); +}); + +describe('truncateGot', () => { + it('returns the input unchanged when short', () => { + expect(truncateGot('hello')).toBe('hello'); + }); + + it('truncates to max+ellipsis when long', () => { + const result = truncateGot('x'.repeat(200), 80); + expect(result.length).toBe(83); + expect(result.endsWith('...')).toBe(true); + }); +}); diff --git a/tests/unit/gadgets/shared/factories.test.ts b/tests/unit/gadgets/shared/factories.test.ts index 79ea8492..c1595181 100644 --- a/tests/unit/gadgets/shared/factories.test.ts +++ b/tests/unit/gadgets/shared/factories.test.ts @@ -561,25 +561,8 @@ describe('createCLICommand', () => { expect(capturedParams.text).toBe('Content from stdin'); }); - it('errors when file-input required param is missing', async () => { - const coreFn: CLICoreFn = async () => 'result'; - const CommandClass = createCLICommand(fileInputToolDef, coreFn); - const instance = new CommandClass([], {}); - - vi.spyOn(instance, 'parse').mockResolvedValue({ - flags: { workItemId: 'card123', text: undefined, 'text-file': undefined }, - args: {}, - argv: [], - raw: [], - } as unknown as Awaited>); - - const errorSpy = vi.spyOn(instance, 'error').mockImplementation((msg: string | Error) => { - throw new Error(typeof msg === 'string' ? msg : msg.message); - }); - - await expect(instance.execute()).rejects.toThrow('Either --text or --text-file is required'); - expect(errorSpy).toHaveBeenCalled(); - }); + // Legacy test removed — replaced by spec-014 envelope assertion in + // 'missing file-input required param routes through envelope (type:"missing-required")' below. it('outputs JSON result on success', async () => { const coreFn: CLICoreFn = async () => ({ id: '456', url: 'https://example.com' }); @@ -626,37 +609,215 @@ describe('createCLICommand', () => { expect(capturedParams.config).toEqual({ key: 'value', count: 5 }); }); - it('errors on invalid JSON for object type params', async () => { - const coreFn: CLICoreFn = async () => 'result'; - const CommandClass = createCLICommand(objectToolDef, coreFn); + // Legacy test removed — replaced by spec-014 envelope assertion in + // 'object param with malformed JSON routes through envelope (type:"json-parse")' below. + + it('skips gadgetOnly params in flag processing', async () => { + let capturedParams: Record = {}; + const coreFn: CLICoreFn = async (params) => { + capturedParams = params as Record; + return 'ok'; + }; + + const CommandClass = createCLICommand(simpleToolDef, coreFn); const instance = new CommandClass([], {}); vi.spyOn(instance, 'parse').mockResolvedValue({ - flags: { config: '{not-valid-json}' }, + flags: { name: 'Alice' }, args: {}, argv: [], raw: [], } as unknown as Awaited>); - vi.spyOn(instance, 'error').mockImplementation((msg: string | Error) => { - throw new Error(typeof msg === 'string' ? msg : msg.message); + vi.spyOn(instance, 'log').mockImplementation(() => {}); + + await instance.execute(); + + // 'comment' (gadgetOnly) should NOT be in capturedParams + expect(capturedParams).not.toHaveProperty('comment'); + expect(capturedParams.name).toBe('Alice'); + }); + + // Legacy test removed — replaced by spec-014 envelope assertion in + // 'coreFn runtime failure routes through the new envelope (type:"runtime")' below. + + it('passes array flags through correctly', async () => { + let capturedParams: Record = {}; + const coreFn: CLICoreFn = async (params) => { + capturedParams = params as Record; + return 'added'; + }; + + const CommandClass = createCLICommand(arrayToolDef, coreFn); + const instance = new CommandClass([], {}); + + vi.spyOn(instance, 'parse').mockResolvedValue({ + flags: { items: ['item1', 'item2', 'item3'] }, + args: {}, + argv: [], + raw: [], + } as unknown as Awaited>); + + vi.spyOn(instance, 'log').mockImplementation(() => {}); + + await instance.execute(); + + expect(capturedParams.items).toEqual(['item1', 'item2', 'item3']); + }); +}); + +// --------------------------------------------------------------------------- +// Spec 014: cliCommandFactory widened behavior (aliases, JSON-parse for +// array-of-object, help examples, structured error envelope). +// --------------------------------------------------------------------------- + +describe('createCLICommand — spec 014 additions', () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + // Test def: single array-of-object param with an alias and a file alternative. + const reviewPRDef: ToolDefinition = { + name: 'TestReviewPR', + description: 'Submit a PR review (test fixture).', + parameters: { + body: { type: 'string', describe: 'Review body', required: true }, + comments: { + type: 'array', + items: 'object', + describe: 'Inline comments', + cliAliases: ['comment'], + optional: true, + }, + }, + examples: [ + { + params: { + body: 'LGTM', + comments: [{ path: 'src/x.ts', line: 1, body: 'nit' }], + }, + comment: 'Request changes with inline', + }, + ], + cli: { + fileInputAlternatives: [ + { + paramName: 'comments', + fileFlag: 'comments-file', + parseAs: 'json', + description: 'Read --comments JSON from file (use - for stdin).', + }, + ], + }, + }; + + it('applies cliAliases onto the generated oclif flag', () => { + const coreFn: CLICoreFn = async () => 'ok'; + const CommandClass = createCLICommand(reviewPRDef, coreFn); + + const commentsFlag = CommandClass.flags.comments as { aliases?: string[] }; + expect(commentsFlag.aliases).toEqual(['comment']); + }); + + it('wires def.examples onto FactoryCommand.examples for oclif --help', () => { + const coreFn: CLICoreFn = async () => 'ok'; + const CommandClass = createCLICommand(reviewPRDef, coreFn); + + const examples = CommandClass.examples as string[] | undefined; + expect(examples).toBeDefined(); + expect(examples?.length ?? 0).toBeGreaterThan(0); + // The JSON shape from the example should appear — serialized double-quoted. + expect(examples?.[0]).toContain('"path":"src/x.ts"'); + }); + + it('array + items:"object" flag value parses as JSON (single string, not repeatable)', async () => { + let capturedParams: Record = {}; + const coreFn: CLICoreFn = async (params) => { + capturedParams = params as Record; + return 'ok'; + }; + + const CommandClass = createCLICommand(reviewPRDef, coreFn); + const instance = new CommandClass([], {}); + + vi.spyOn(instance, 'parse').mockResolvedValue({ + flags: { + body: 'LGTM', + comments: '[{"path":"a","line":1,"body":"b"}]', + }, + args: {}, + argv: [], + raw: [], + } as unknown as Awaited>); + + vi.spyOn(instance, 'log').mockImplementation(() => {}); + + await instance.execute(); + + expect(capturedParams.comments).toEqual([{ path: 'a', line: 1, body: 'b' }]); + }); + + it('array + items:"object" with malformed JSON emits json-parse envelope on stdout', async () => { + const coreFn: CLICoreFn = async () => 'ok'; + const CommandClass = createCLICommand(reviewPRDef, coreFn); + const instance = new CommandClass([], {}); + + vi.spyOn(instance, 'parse').mockResolvedValue({ + flags: { + body: 'LGTM', + comments: "[{'path':'a','line':1,'body':'b'}]", // single-quoted keys — bug from prod run 5d993b04 + }, + args: {}, + argv: [], + raw: [], + } as unknown as Awaited>); + + const logSpy = vi.spyOn(instance, 'log').mockImplementation(() => {}); + vi.spyOn(instance, 'exit').mockImplementation(() => { + throw new Error('exit'); }); - await expect(instance.execute()).rejects.toThrow('--config must be valid JSON'); + await expect(instance.execute()).rejects.toThrow('exit'); + + // The new envelope must surface on this.log (stdout) + const logged = logSpy.mock.calls.map((c) => c[0]).join('\n'); + const jsonLine = logged.split('\n').find((l) => l.startsWith('{')) ?? ''; + const parsed = JSON.parse(jsonLine) as { + success: boolean; + error: { + type: string; + flag?: string; + got?: string; + expected?: string; + hint?: string; + }; + }; + expect(parsed.success).toBe(false); + expect(parsed.error.type).toBe('json-parse'); + expect(parsed.error.flag).toBe('comments'); + expect(parsed.error.got).toContain("[{'path'"); + expect(parsed.error.expected).toContain('"path"'); + expect(parsed.error.hint).toContain('--comments-file'); }); - it('skips gadgetOnly params in flag processing', async () => { + it('file-input with parseAs:"json" JSON-parses the file contents', async () => { + mockReadFileSync.mockReturnValue('[{"path":"a","line":1,"body":"b"}]'); + let capturedParams: Record = {}; const coreFn: CLICoreFn = async (params) => { capturedParams = params as Record; return 'ok'; }; - const CommandClass = createCLICommand(simpleToolDef, coreFn); + const CommandClass = createCLICommand(reviewPRDef, coreFn); const instance = new CommandClass([], {}); vi.spyOn(instance, 'parse').mockResolvedValue({ - flags: { name: 'Alice' }, + flags: { + body: 'LGTM', + 'comments-file': '/tmp/comments.json', + comments: undefined, + }, args: {}, argv: [], raw: [], @@ -666,12 +827,45 @@ describe('createCLICommand', () => { await instance.execute(); - // 'comment' (gadgetOnly) should NOT be in capturedParams - expect(capturedParams).not.toHaveProperty('comment'); - expect(capturedParams.name).toBe('Alice'); + expect(capturedParams.comments).toEqual([{ path: 'a', line: 1, body: 'b' }]); + }); + + it('file-input with parseAs:"json" malformed contents emits json-parse envelope', async () => { + mockReadFileSync.mockReturnValue("[{'path':'a'}]"); + + const coreFn: CLICoreFn = async () => 'ok'; + const CommandClass = createCLICommand(reviewPRDef, coreFn); + const instance = new CommandClass([], {}); + + vi.spyOn(instance, 'parse').mockResolvedValue({ + flags: { + body: 'LGTM', + 'comments-file': '/tmp/comments.json', + comments: undefined, + }, + args: {}, + argv: [], + raw: [], + } as unknown as Awaited>); + + const logSpy = vi.spyOn(instance, 'log').mockImplementation(() => {}); + vi.spyOn(instance, 'exit').mockImplementation(() => { + throw new Error('exit'); + }); + + await expect(instance.execute()).rejects.toThrow('exit'); + + const logged = logSpy.mock.calls.map((c) => c[0]).join('\n'); + const jsonLine = logged.split('\n').find((l) => l.startsWith('{')) ?? ''; + const parsed = JSON.parse(jsonLine) as { + success: boolean; + error: { type: string; flag?: string }; + }; + expect(parsed.error.type).toBe('json-parse'); + expect(parsed.error.flag).toBe('comments'); }); - it('logs { success: false, error } and exits 1 when coreFn throws', async () => { + it('coreFn runtime failure routes through the new envelope (type:"runtime")', async () => { const failingFn: CLICoreFn = async () => { throw new Error('getaddrinfo EAI_AGAIN api.trello.com'); }; @@ -684,39 +878,190 @@ describe('createCLICommand', () => { raw: [], } as unknown as Awaited>); const logSpy = vi.spyOn(instance, 'log').mockImplementation(() => {}); - const exitSpy = vi.spyOn(instance, 'exit').mockImplementation(() => { + vi.spyOn(instance, 'exit').mockImplementation(() => { throw new Error('exit'); }); await expect(instance.execute()).rejects.toThrow('exit'); - expect(logSpy).toHaveBeenCalledWith( - JSON.stringify({ success: false, error: 'getaddrinfo EAI_AGAIN api.trello.com' }), - ); - expect(exitSpy).toHaveBeenCalledWith(1); + const logged = logSpy.mock.calls.map((c) => c[0]).join('\n'); + const jsonLine = logged.split('\n').find((l) => l.startsWith('{')) ?? ''; + const parsed = JSON.parse(jsonLine) as { + success: boolean; + error: { type: string; message: string }; + }; + expect(parsed.success).toBe(false); + expect(parsed.error.type).toBe('runtime'); + expect(parsed.error.message).toBe('getaddrinfo EAI_AGAIN api.trello.com'); }); - it('passes array flags through correctly', async () => { - let capturedParams: Record = {}; - const coreFn: CLICoreFn = async (params) => { - capturedParams = params as Record; - return 'added'; + it('missing file-input required param routes through envelope (type:"missing-required")', async () => { + const coreFn: CLICoreFn = async () => 'result'; + const CommandClass = createCLICommand(fileInputToolDef, coreFn); + const instance = new CommandClass([], {}); + + vi.spyOn(instance, 'parse').mockResolvedValue({ + flags: { workItemId: 'card123', text: undefined, 'text-file': undefined }, + args: {}, + argv: [], + raw: [], + } as unknown as Awaited>); + + const logSpy = vi.spyOn(instance, 'log').mockImplementation(() => {}); + vi.spyOn(instance, 'exit').mockImplementation(() => { + throw new Error('exit'); + }); + + await expect(instance.execute()).rejects.toThrow('exit'); + const logged = logSpy.mock.calls.map((c) => c[0]).join('\n'); + const jsonLine = logged.split('\n').find((l) => l.startsWith('{')) ?? ''; + const parsed = JSON.parse(jsonLine) as { + success: boolean; + error: { type: string; flag?: string }; }; + expect(parsed.error.type).toBe('missing-required'); + expect(parsed.error.flag).toBe('text'); + }); + it('primitive-array (items:"string") still uses multiple:true (regression guard)', () => { + // arrayToolDef has items:'string' → repeatable flag + const coreFn: CLICoreFn = async () => 'ok'; const CommandClass = createCLICommand(arrayToolDef, coreFn); + const itemsFlag = CommandClass.flags.items as { multiple?: boolean }; + expect(itemsFlag.multiple).toBe(true); + }); + + it('unknown flag close to a declared one suggests correction via Levenshtein', async () => { + const coreFn: CLICoreFn = async () => 'ok'; + const CommandClass = createCLICommand(reviewPRDef, coreFn); + const instance = new CommandClass([], {}); + + // Simulate oclif's NonExistentFlagsError shape (constructor name + flags array) + class NonExistentFlagsError extends Error { + public flags: string[]; + constructor(flags: string[]) { + super(`Nonexistent flag: ${flags.join(', ')}`); + this.name = 'CLIParseError'; + this.flags = flags; + } + } + + vi.spyOn(instance, 'parse').mockImplementation(async () => { + throw new NonExistentFlagsError(['comnent']); + }); + + const logSpy = vi.spyOn(instance, 'log').mockImplementation(() => {}); + vi.spyOn(instance, 'exit').mockImplementation(() => { + throw new Error('exit'); + }); + + await expect(instance.execute()).rejects.toThrow('exit'); + const logged = logSpy.mock.calls.map((c) => c[0]).join('\n'); + const jsonLine = logged.split('\n').find((l) => l.startsWith('{')) ?? ''; + const parsed = JSON.parse(jsonLine) as { + error: { type: string; flag?: string; hint?: string }; + }; + expect(parsed.error.type).toBe('unknown-flag'); + expect(parsed.error.flag).toBe('comnent'); + expect(parsed.error.hint).toContain('did you mean'); + expect(parsed.error.hint).toContain('--comments'); + }); + + it('unknown flag far from any declared one emits envelope without suggestion', async () => { + const coreFn: CLICoreFn = async () => 'ok'; + const CommandClass = createCLICommand(reviewPRDef, coreFn); + const instance = new CommandClass([], {}); + + class NonExistentFlagsError extends Error { + public flags: string[]; + constructor(flags: string[]) { + super(`Nonexistent flag: ${flags.join(', ')}`); + this.name = 'CLIParseError'; + this.flags = flags; + } + } + + vi.spyOn(instance, 'parse').mockImplementation(async () => { + throw new NonExistentFlagsError(['zzzzzzzz']); + }); + + const logSpy = vi.spyOn(instance, 'log').mockImplementation(() => {}); + vi.spyOn(instance, 'exit').mockImplementation(() => { + throw new Error('exit'); + }); + + await expect(instance.execute()).rejects.toThrow('exit'); + const logged = logSpy.mock.calls.map((c) => c[0]).join('\n'); + const jsonLine = logged.split('\n').find((l) => l.startsWith('{')) ?? ''; + const parsed = JSON.parse(jsonLine) as { + error: { type: string; hint?: string }; + }; + expect(parsed.error.type).toBe('unknown-flag'); + // No "did you mean" when Levenshtein distance exceeds threshold + expect(parsed.error.hint ?? '').not.toContain('did you mean'); + }); + + it('fuzzy suggestion considers declared aliases but always returns the canonical name', async () => { + const coreFn: CLICoreFn = async () => 'ok'; + const CommandClass = createCLICommand(reviewPRDef, coreFn); + const instance = new CommandClass([], {}); + + class NonExistentFlagsError extends Error { + public flags: string[]; + constructor(flags: string[]) { + super(`Nonexistent flag: ${flags.join(', ')}`); + this.name = 'CLIParseError'; + this.flags = flags; + } + } + + // 'coment' is closer to the alias 'comment' than to the canonical 'comments'. + // The suggestion must still surface the canonical spelling. + vi.spyOn(instance, 'parse').mockImplementation(async () => { + throw new NonExistentFlagsError(['coment']); + }); + + const logSpy = vi.spyOn(instance, 'log').mockImplementation(() => {}); + vi.spyOn(instance, 'exit').mockImplementation(() => { + throw new Error('exit'); + }); + + await expect(instance.execute()).rejects.toThrow('exit'); + const logged = logSpy.mock.calls.map((c) => c[0]).join('\n'); + const jsonLine = logged.split('\n').find((l) => l.startsWith('{')) ?? ''; + const parsed = JSON.parse(jsonLine) as { error: { hint?: string } }; + expect(parsed.error.hint).toContain('--comments'); + expect(parsed.error.hint).not.toContain('--comment?'); + }); + + it('object param with malformed JSON routes through envelope (type:"json-parse")', async () => { + // This replaces the old 'errors on invalid JSON for object type params' assertion + // which checked the legacy `--config must be valid JSON` prose error. + const coreFn: CLICoreFn = async () => 'result'; + const CommandClass = createCLICommand(objectToolDef, coreFn); const instance = new CommandClass([], {}); vi.spyOn(instance, 'parse').mockResolvedValue({ - flags: { items: ['item1', 'item2', 'item3'] }, + flags: { config: '{not-valid-json}' }, args: {}, argv: [], raw: [], } as unknown as Awaited>); - vi.spyOn(instance, 'log').mockImplementation(() => {}); + const logSpy = vi.spyOn(instance, 'log').mockImplementation(() => {}); + vi.spyOn(instance, 'exit').mockImplementation(() => { + throw new Error('exit'); + }); - await instance.execute(); + await expect(instance.execute()).rejects.toThrow('exit'); - expect(capturedParams.items).toEqual(['item1', 'item2', 'item3']); + const logged = logSpy.mock.calls.map((c) => c[0]).join('\n'); + const jsonLine = logged.split('\n').find((l) => l.startsWith('{')) ?? ''; + const parsed = JSON.parse(jsonLine) as { + success: boolean; + error: { type: string; flag?: string }; + }; + expect(parsed.error.type).toBe('json-parse'); + expect(parsed.error.flag).toBe('config'); }); }); @@ -933,3 +1278,132 @@ describe('round-trip consistency', () => { expect(manifest.name).toBe('TestTool'); }); }); + +// --------------------------------------------------------------------------- +// Spec 014: manifest threads items / cliAliases / one example through +// --------------------------------------------------------------------------- + +describe('generateToolManifest — widened fields (spec 014)', () => { + it('threads items from array-of-object parameter into manifest entry', () => { + const def: ToolDefinition = { + name: 'ReviewPR', + description: 'Review a PR.', + parameters: { + comments: { + type: 'array', + items: 'object', + describe: 'Inline comments', + optional: true, + }, + }, + }; + + const manifest = generateToolManifest(def); + const commentsParam = manifest.parameters.comments as { type: string; items?: string }; + expect(commentsParam.items).toBe('object'); + }); + + it('threads items from primitive-array parameter into manifest entry (regression guard)', () => { + const def: ToolDefinition = { + name: 'AddLabels', + description: 'Add labels.', + parameters: { + labels: { + type: 'array', + items: 'string', + describe: 'Labels to add', + required: true, + }, + }, + }; + + const manifest = generateToolManifest(def); + const labelsParam = manifest.parameters.labels as { type: string; items?: string }; + expect(labelsParam.items).toBe('string'); + }); + + it('threads cliAliases into manifest as aliases', () => { + const def: ToolDefinition = { + name: 'ReviewPR', + description: 'Review a PR.', + parameters: { + comments: { + type: 'array', + items: 'object', + describe: 'Inline comments', + cliAliases: ['comment'], + optional: true, + }, + }, + }; + + const manifest = generateToolManifest(def); + const commentsParam = manifest.parameters.comments as { aliases?: readonly string[] }; + expect(commentsParam.aliases).toEqual(['comment']); + }); + + it('attaches first matching example params value to the manifest entry as example', () => { + const def: ToolDefinition = { + name: 'ReviewPR', + description: 'Review a PR.', + parameters: { + body: { type: 'string', describe: 'Body', required: true }, + comments: { + type: 'array', + items: 'object', + describe: 'Inline comments', + optional: true, + }, + }, + examples: [ + { + // First example has no comments → should be skipped when picking for the comments entry + params: { body: 'lgtm' }, + comment: 'Approve with summary only', + }, + { + params: { + body: 'needs work', + comments: [{ path: 'src/x.ts', line: 10, body: 'nit' }], + }, + comment: 'Request changes with inline', + }, + ], + }; + + const manifest = generateToolManifest(def); + const commentsParam = manifest.parameters.comments as { example?: unknown }; + expect(commentsParam.example).toEqual([{ path: 'src/x.ts', line: 10, body: 'nit' }]); + + const bodyParam = manifest.parameters.body as { example?: unknown }; + // First example has body='lgtm' → that's the first match + expect(bodyParam.example).toBe('lgtm'); + }); + + it('omits example field when no examples populate the param', () => { + const def: ToolDefinition = { + name: 'ReviewPR', + description: 'Review a PR.', + parameters: { + comments: { + type: 'array', + items: 'object', + describe: 'Inline comments', + optional: true, + }, + }, + examples: [ + { + params: { + /* no comments here */ + }, + comment: 'no comments example', + }, + ], + }; + + const manifest = generateToolManifest(def); + const commentsParam = manifest.parameters.comments as { example?: unknown }; + expect(commentsParam).not.toHaveProperty('example'); + }); +}); diff --git a/tests/unit/pm/linear/integration.test.ts b/tests/unit/pm/linear/integration.test.ts index 2de3219e..bca193d4 100644 --- a/tests/unit/pm/linear/integration.test.ts +++ b/tests/unit/pm/linear/integration.test.ts @@ -232,14 +232,19 @@ describe('LinearIntegration', () => { expect(config.statuses.done).toBe('state-done'); }); - it('uses defaults when labels config is missing', () => { + it('returns undefined labels when labels config is missing (not name-string defaults)', () => { + // Linear requires UUIDs for addLabel — name-string defaults like 'cascade-processing' + // would cause resolveLabelId() to silently return null. When no label is configured, + // undefined is the correct signal to skip the operation entirely. mockGetLinearConfig.mockReturnValue({ teamId: 'team-abc', statuses: {} }); const project = makeProject(); const config = integration.resolveLifecycleConfig(project); - expect(config.labels.processing).toBe('cascade-processing'); - expect(config.labels.processed).toBe('cascade-processed'); - expect(config.labels.readyToProcess).toBe('cascade-ready'); + expect(config.labels.processing).toBeUndefined(); + expect(config.labels.processed).toBeUndefined(); + expect(config.labels.readyToProcess).toBeUndefined(); + expect(config.labels.error).toBeUndefined(); + expect(config.labels.auto).toBeUndefined(); }); it('has undefined statuses when linear config has no statuses', () => { diff --git a/tests/unit/pm/linear/regression-2026-04.test.ts b/tests/unit/pm/linear/regression-2026-04.test.ts index 16d771f5..0fdbe551 100644 --- a/tests/unit/pm/linear/regression-2026-04.test.ts +++ b/tests/unit/pm/linear/regression-2026-04.test.ts @@ -15,7 +15,9 @@ import { linearManifest } from '../../../../src/integrations/pm/linear/manifest. import type { ContainerId, LabelId, StateId } from '../../../../src/pm/ids.js'; import { InvalidIdError, parseStateId } from '../../../../src/pm/ids.js'; import type { LinearPMProvider } from '../../../../src/pm/linear/adapter.js'; +import { LinearIntegration } from '../../../../src/pm/linear/integration.js'; import type { WorkItem } from '../../../../src/pm/types.js'; +import type { ProjectConfig } from '../../../../src/types/index.js'; const HERE = dirname(fileURLToPath(import.meta.url)); const PROJECT_ROOT = resolve(HERE, '..', '..', '..', '..'); @@ -161,3 +163,80 @@ describe('2026-04 regression: #1097 / #1118 / #1131 / #1134 (registration miss)' expect(content).toMatch(/integrations\/entrypoint\.js/); }); }); + +describe('2026-04 regression: Linear auto-label propagation with name strings', () => { + /** + * Root cause: propagateAutoLabelAfterSplitting passed pmConfig.labels.auto + * directly to provider.addLabel(). For Linear, resolveLifecycleConfig previously + * defaulted unconfigured labels to name strings (e.g. 'cascade-auto'). Linear's + * adapter requires UUIDs — passing a name string causes resolveLabelId() to return + * null and the label operation silently no-ops. + * + * Fix (two-pronged): + * 1. LinearIntegration.resolveLifecycleConfig now returns undefined for unconfigured + * labels (not name strings). Propagation is skipped when no label is configured. + * 2. propagateAutoLabelAfterSplitting resolves the actual UUID from the parent work + * item's label list, even when pmConfig.labels.auto is a name string. + */ + it('LinearIntegration.resolveLifecycleConfig returns undefined for unconfigured labels', () => { + // Unconfigured Linear project (no labels section) → all labels undefined. + // This prevents silent failures when addLabel receives a name string instead of UUID. + const project = { + pm: { type: 'linear' }, + linear: { teamId: 'team-1', statuses: {} }, + } as unknown as ProjectConfig; + const integration = new LinearIntegration(); + const config = integration.resolveLifecycleConfig(project); + + expect(config.labels.auto).toBeUndefined(); + expect(config.labels.processing).toBeUndefined(); + expect(config.labels.processed).toBeUndefined(); + expect(config.labels.error).toBeUndefined(); + expect(config.labels.readyToProcess).toBeUndefined(); + }); + + it('LinearIntegration.resolveLifecycleConfig preserves explicitly configured label values', () => { + // When a user has configured UUIDs, they must be preserved exactly. + const project = { + pm: { type: 'linear' }, + linear: { + teamId: 'team-1', + statuses: {}, + labels: { + auto: 'uuid-auto-111', + processing: 'uuid-processing-222', + processed: 'uuid-processed-333', + error: 'uuid-error-444', + readyToProcess: 'uuid-rtp-555', + }, + }, + } as unknown as ProjectConfig; + const integration = new LinearIntegration(); + const config = integration.resolveLifecycleConfig(project); + + expect(config.labels.auto).toBe('uuid-auto-111'); + expect(config.labels.processing).toBe('uuid-processing-222'); + expect(config.labels.processed).toBe('uuid-processed-333'); + expect(config.labels.error).toBe('uuid-error-444'); + expect(config.labels.readyToProcess).toBe('uuid-rtp-555'); + }); + + it('LinearIntegration.resolveLifecycleConfig does not default to name strings (unlike JIRA)', () => { + // Explicit regression guard: the old code had `labels?.auto ?? 'cascade-auto'`. + // JIRA intentionally keeps these defaults (JIRA auto-creates labels by name); + // Linear must NOT have them (Linear requires UUIDs). + const project = { + pm: { type: 'linear' }, + linear: { teamId: 'team-1', statuses: {}, labels: {} }, + } as unknown as ProjectConfig; + const integration = new LinearIntegration(); + const config = integration.resolveLifecycleConfig(project); + + // None of these should be a non-empty string default + expect(config.labels.auto).not.toBe('cascade-auto'); + expect(config.labels.processing).not.toBe('cascade-processing'); + expect(config.labels.processed).not.toBe('cascade-processed'); + expect(config.labels.error).not.toBe('cascade-error'); + expect(config.labels.readyToProcess).not.toBe('cascade-ready'); + }); +}); diff --git a/tests/unit/router/active-workers.test.ts b/tests/unit/router/active-workers.test.ts index 0da4dc5e..8c396e23 100644 --- a/tests/unit/router/active-workers.test.ts +++ b/tests/unit/router/active-workers.test.ts @@ -294,5 +294,109 @@ describe('active-workers', () => { expect(mockFailOrphanedRun).not.toHaveBeenCalled(); expect(mockFailOrphanedRunFallback).not.toHaveBeenCalled(); }); + + describe('exit details (diagnostics)', () => { + it('packs OOMKilled=true into the error reason', () => { + activeWorkers.set( + 'job-oom', + makeActiveWorker({ + jobId: 'job-oom', + projectId: 'proj-1', + workItemId: 'card-1', + agentType: 'implementation', + }), + ); + + cleanupWorker('job-oom', 137, { oomKilled: true }); + const reason = mockFailOrphanedRun.mock.calls[0][2] as string; + expect(reason).toMatch(/exit code 137/); + expect(reason).toMatch(/OOMKilled=true/); + }); + + it('packs OOMKilled=false into the error reason', () => { + activeWorkers.set( + 'job-not-oom', + makeActiveWorker({ + jobId: 'job-not-oom', + projectId: 'proj-1', + workItemId: 'card-1', + agentType: 'implementation', + }), + ); + + cleanupWorker('job-not-oom', 137, { oomKilled: false }); + const reason = mockFailOrphanedRun.mock.calls[0][2] as string; + expect(reason).toMatch(/OOMKilled=false/); + }); + + it('includes Docker State.Error reason when present', () => { + activeWorkers.set( + 'job-state-err', + makeActiveWorker({ + jobId: 'job-state-err', + projectId: 'proj-1', + workItemId: 'card-1', + agentType: 'implementation', + }), + ); + + cleanupWorker('job-state-err', 1, { exitReason: 'OCI runtime error' }); + const reason = mockFailOrphanedRun.mock.calls[0][2] as string; + expect(reason).toMatch(/reason="OCI runtime error"/); + }); + + it('omits OOMKilled label when undefined (legacy callers)', () => { + activeWorkers.set( + 'job-legacy', + makeActiveWorker({ + jobId: 'job-legacy', + projectId: 'proj-1', + workItemId: 'card-1', + agentType: 'implementation', + }), + ); + + cleanupWorker('job-legacy', 1); + const reason = mockFailOrphanedRun.mock.calls[0][2] as string; + expect(reason).toBe('Worker crashed with exit code 1'); + }); + + it('combines OOMKilled and exit reason in one message', () => { + activeWorkers.set( + 'job-combo', + makeActiveWorker({ + jobId: 'job-combo', + projectId: 'proj-1', + workItemId: 'card-1', + agentType: 'implementation', + }), + ); + + cleanupWorker('job-combo', 137, { + oomKilled: true, + exitReason: 'Out of memory', + }); + const reason = mockFailOrphanedRun.mock.calls[0][2] as string; + expect(reason).toMatch(/exit code 137/); + expect(reason).toMatch(/OOMKilled=true/); + expect(reason).toMatch(/reason="Out of memory"/); + }); + + it('forwards diagnostics to failOrphanedRunFallback path too', () => { + activeWorkers.set( + 'job-fb', + makeActiveWorker({ + jobId: 'job-fb', + projectId: 'proj-1', + agentType: 'review', + // no workItemId → fallback path + }), + ); + + cleanupWorker('job-fb', 137, { oomKilled: true }); + const reason = mockFailOrphanedRunFallback.mock.calls[0][4] as string; + expect(reason).toMatch(/OOMKilled=true/); + }); + }); }); }); diff --git a/tests/unit/router/container-manager-diagnostics.test.ts b/tests/unit/router/container-manager-diagnostics.test.ts new file mode 100644 index 00000000..8b91e7de --- /dev/null +++ b/tests/unit/router/container-manager-diagnostics.test.ts @@ -0,0 +1,400 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +// --------------------------------------------------------------------------- +// Hoisted mocks — the diagnostic helpers don't touch loadProjectConfig or +// credentials, but importing container-manager.ts pulls in its full dep +// graph; mocking the noisy subsystems keeps the test boot fast. +// --------------------------------------------------------------------------- + +const { mockLoggerInfo, mockLoggerWarn, mockLoadProjectConfig, mockGetSnapshot } = vi.hoisted( + () => ({ + mockLoggerInfo: vi.fn(), + mockLoggerWarn: vi.fn(), + mockLoadProjectConfig: vi.fn(), + mockGetSnapshot: vi.fn(), + }), +); + +vi.mock('../../../src/utils/logging.js', () => ({ + logger: { + info: (...args: unknown[]) => mockLoggerInfo(...args), + warn: (...args: unknown[]) => mockLoggerWarn(...args), + error: vi.fn(), + debug: vi.fn(), + }, +})); + +vi.mock('../../../src/sentry.js', () => ({ + captureException: vi.fn(), + flush: vi.fn().mockResolvedValue(undefined), + setTag: vi.fn(), +})); + +vi.mock('../../../src/router/config.js', () => ({ + loadProjectConfig: (...args: unknown[]) => mockLoadProjectConfig(...args), + routerConfig: { + workerImage: 'base-worker:latest', + workerMemoryMb: 8192, + workerTimeoutMs: 30 * 60 * 1000, // 30 min — the production-config value + dockerNetwork: 'cascade-net', + snapshotEnabled: false, + snapshotDefaultTtlMs: 86_400_000, + snapshotMaxCount: 5, + snapshotMaxSizeBytes: 10_737_418_240, + maxWorkers: 5, + }, +})); + +vi.mock('../../../src/router/snapshot-manager.js', () => ({ + getSnapshot: (...args: unknown[]) => mockGetSnapshot(...args), + registerSnapshot: vi.fn(), + invalidateSnapshot: vi.fn(), +})); + +// --------------------------------------------------------------------------- +// Imports (after mocks) +// --------------------------------------------------------------------------- + +import { formatCrashReason } from '../../../src/router/active-workers.js'; +import { + inspectExitedContainer, + resolveSpawnSettings, +} from '../../../src/router/container-manager.js'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +type MockContainer = { + inspect: ReturnType; +}; + +function makeContainer( + state: Record | null, + throwOnInspect = false, +): MockContainer { + return { + inspect: throwOnInspect + ? vi.fn().mockRejectedValue(new Error('socket hang up')) + : vi.fn().mockResolvedValue(state === null ? null : { State: state }), + }; +} + +// --------------------------------------------------------------------------- +// formatCrashReason — direct tests of the wire-format consumed by the run +// record's `error` field. The format is now de-facto API: any future +// `cascade runs show` filter or dashboard parser will key on it. +// --------------------------------------------------------------------------- + +describe('formatCrashReason', () => { + it('returns the bare crash message when no details are provided', () => { + expect(formatCrashReason(137)).toBe('Worker crashed with exit code 137'); + }); + + it('appends OOMKilled=true when details.oomKilled is true', () => { + expect(formatCrashReason(137, { oomKilled: true })).toBe( + 'Worker crashed with exit code 137 · OOMKilled=true', + ); + }); + + it('appends OOMKilled=false when details.oomKilled is explicitly false', () => { + expect(formatCrashReason(137, { oomKilled: false })).toBe( + 'Worker crashed with exit code 137 · OOMKilled=false', + ); + }); + + it('omits the OOMKilled segment when oomKilled is undefined', () => { + expect(formatCrashReason(1, { exitReason: 'OCI runtime error' })).toBe( + 'Worker crashed with exit code 1 · reason="OCI runtime error"', + ); + }); + + it('appends reason="…" when details.exitReason is non-empty', () => { + expect(formatCrashReason(137, { exitReason: 'Out of memory' })).toBe( + 'Worker crashed with exit code 137 · reason="Out of memory"', + ); + }); + + it('chains OOMKilled and reason in stable order (OOMKilled · reason)', () => { + expect(formatCrashReason(137, { oomKilled: true, exitReason: 'Out of memory' })).toBe( + 'Worker crashed with exit code 137 · OOMKilled=true · reason="Out of memory"', + ); + }); + + it('uses · as the segment separator (grep stability)', () => { + const reason = formatCrashReason(137, { oomKilled: true, exitReason: 'X' }); + expect(reason.split(' · ')).toHaveLength(3); + }); + + // Format-stability regression: dashboards / `runs show` filters will grep + // these exact patterns. Bumping the format silently is a downstream break. + it('produces a grep-stable OOMKilled marker', () => { + expect(formatCrashReason(137, { oomKilled: true })).toMatch(/OOMKilled=(true|false)/); + expect(formatCrashReason(137, { oomKilled: false })).toMatch(/OOMKilled=(true|false)/); + }); +}); + +// --------------------------------------------------------------------------- +// inspectExitedContainer — direct unit tests of the Docker-state extraction. +// The whole change exists so post-mortems can read OOMKilled / exit reason +// without ssh; without these tests, a future refactor can silently drop +// fields and we'd never notice. +// --------------------------------------------------------------------------- + +describe('inspectExitedContainer', () => { + beforeEach(() => { + mockLoggerInfo.mockClear(); + mockLoggerWarn.mockClear(); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('extracts OOMKilled=true from State', async () => { + const container = makeContainer({ + OOMKilled: true, + Error: '', + StartedAt: '2026-04-25T08:00:00.000Z', + FinishedAt: '2026-04-25T08:00:30.000Z', + }); + const result = await inspectExitedContainer(container as never, 'job-1'); + expect(result.oomKilled).toBe(true); + }); + + it('extracts OOMKilled=false from State', async () => { + const container = makeContainer({ + OOMKilled: false, + Error: '', + StartedAt: '2026-04-25T08:00:00.000Z', + FinishedAt: '2026-04-25T08:00:30.000Z', + }); + const result = await inspectExitedContainer(container as never, 'job-1'); + expect(result.oomKilled).toBe(false); + }); + + it('extracts non-empty State.Error as exitReason', async () => { + const container = makeContainer({ + OOMKilled: false, + Error: 'OCI runtime error: exec failed', + StartedAt: '2026-04-25T08:00:00.000Z', + FinishedAt: '2026-04-25T08:00:30.000Z', + }); + const result = await inspectExitedContainer(container as never, 'job-1'); + expect(result.exitReason).toBe('OCI runtime error: exec failed'); + }); + + it('returns undefined exitReason when State.Error is empty (clean exits)', async () => { + const container = makeContainer({ + OOMKilled: false, + Error: '', + StartedAt: '2026-04-25T08:00:00.000Z', + FinishedAt: '2026-04-25T08:00:30.000Z', + }); + const result = await inspectExitedContainer(container as never, 'job-1'); + expect(result.exitReason).toBeUndefined(); + }); + + it('computes durationMs from StartedAt/FinishedAt', async () => { + const container = makeContainer({ + OOMKilled: false, + Error: '', + StartedAt: '2026-04-25T08:00:00.000Z', + FinishedAt: '2026-04-25T08:00:30.000Z', + }); + const result = await inspectExitedContainer(container as never, 'job-1'); + expect(result.durationMs).toBe(30_000); + }); + + it('returns undefined durationMs for sentinel `0001-01-01` timestamps', async () => { + // Docker uses 0001-01-01T00:00:00Z when a phase never completed — + // e.g. FinishedAt on a still-running container, or StartedAt on a + // container that errored before start. + const container = makeContainer({ + OOMKilled: false, + Error: '', + StartedAt: '0001-01-01T00:00:00Z', + FinishedAt: '0001-01-01T00:00:00Z', + }); + const result = await inspectExitedContainer(container as never, 'job-1'); + // 0001 → 0001 = 0ms which IS finite; the regression we're guarding + // against is when only one is the sentinel and the other is real, + // producing a huge negative number. Test that case below too. + expect(Number.isFinite(result.durationMs)).toBe(true); + }); + + it('returns undefined durationMs when timestamps are malformed', async () => { + const container = makeContainer({ + OOMKilled: false, + Error: '', + StartedAt: 'not-a-date', + FinishedAt: '2026-04-25T08:00:30.000Z', + }); + const result = await inspectExitedContainer(container as never, 'job-1'); + expect(result.durationMs).toBeUndefined(); + }); + + it('returns undefined durationMs when StartedAt is sentinel and FinishedAt is real (negative span)', async () => { + const container = makeContainer({ + OOMKilled: false, + Error: '', + StartedAt: '2026-04-25T08:00:30.000Z', + FinishedAt: '0001-01-01T00:00:00Z', + }); + const result = await inspectExitedContainer(container as never, 'job-1'); + // FinishedAt - StartedAt is a huge negative number — drop it rather + // than leak a misleading value into the run record. + expect(result.durationMs).toBeUndefined(); + }); + + it('returns undefined durationMs when StartedAt or FinishedAt is missing', async () => { + const container = makeContainer({ + OOMKilled: false, + Error: '', + // StartedAt missing; FinishedAt present + FinishedAt: '2026-04-25T08:00:30.000Z', + }); + const result = await inspectExitedContainer(container as never, 'job-1'); + expect(result.durationMs).toBeUndefined(); + }); + + it('returns all-undefined when inspect rejects (daemon socket drop)', async () => { + const container = makeContainer(null, true); + const result = await inspectExitedContainer(container as never, 'job-1'); + expect(result.oomKilled).toBeUndefined(); + expect(result.exitReason).toBeUndefined(); + expect(result.durationMs).toBeUndefined(); + }); + + it('logs a warning when inspect rejects', async () => { + const container = makeContainer(null, true); + await inspectExitedContainer(container as never, 'job-warn'); + expect(mockLoggerWarn).toHaveBeenCalledWith( + '[WorkerManager] container.inspect() after wait failed:', + expect.objectContaining({ jobId: 'job-warn' }), + ); + }); + + it('does NOT throw even when inspect rejects — diagnostics are best-effort', async () => { + const container = makeContainer(null, true); + await expect(inspectExitedContainer(container as never, 'job-1')).resolves.toBeDefined(); + }); +}); + +// --------------------------------------------------------------------------- +// resolveSpawnSettings — pins the `Resolved spawn settings` diagnostic log +// against silent regression. This log is the only way to confirm in +// production whether `project.watchdogTimeoutMs` actually overrode the +// global 30-min default for a given worker — the load-bearing fact for the +// ucho exit-137 investigation. +// --------------------------------------------------------------------------- + +describe('resolveSpawnSettings', () => { + beforeEach(() => { + mockLoggerInfo.mockClear(); + mockLoggerWarn.mockClear(); + mockLoadProjectConfig.mockReset(); + mockGetSnapshot.mockReset(); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('emits "Resolved spawn settings" log with the project watchdogTimeoutMs override applied', async () => { + mockLoadProjectConfig.mockResolvedValue({ + projects: [], + fullProjects: [ + { + id: 'ucho', + watchdogTimeoutMs: 45 * 60 * 1000, // 45 min — ucho's actual config + snapshotEnabled: false, + snapshotTtlMs: undefined, + }, + ], + }); + + const settings = await resolveSpawnSettings('ucho', 'MNG-308', 'job-ucho-1'); + + // 45min watchdog + 2min router-kill buffer = 47 min container timeout. + expect(settings.containerTimeoutMs).toBe(47 * 60 * 1000); + + const spawnLog = mockLoggerInfo.mock.calls.find( + (call) => call[0] === '[WorkerManager] Resolved spawn settings:', + ); + expect(spawnLog).toBeDefined(); + expect(spawnLog?.[1]).toMatchObject({ + jobId: 'job-ucho-1', + projectId: 'ucho', + workItemId: 'MNG-308', + containerTimeoutMs: 47 * 60 * 1000, + containerTimeoutMinutes: 47, + projectWatchdogTimeoutMs: 45 * 60 * 1000, + globalWorkerTimeoutMs: 30 * 60 * 1000, + }); + }); + + it('falls back to global workerTimeoutMs when project has no watchdogTimeoutMs', async () => { + mockLoadProjectConfig.mockResolvedValue({ + projects: [], + fullProjects: [ + { + id: 'no-override', + // watchdogTimeoutMs absent + snapshotEnabled: false, + }, + ], + }); + + const settings = await resolveSpawnSettings('no-override', undefined, 'job-fallback'); + + expect(settings.containerTimeoutMs).toBe(30 * 60 * 1000); // global default + + const spawnLog = mockLoggerInfo.mock.calls.find( + (call) => call[0] === '[WorkerManager] Resolved spawn settings:', + ); + expect(spawnLog?.[1]).toMatchObject({ + projectId: 'no-override', + containerTimeoutMs: 30 * 60 * 1000, + containerTimeoutMinutes: 30, + projectWatchdogTimeoutMs: null, + globalWorkerTimeoutMs: 30 * 60 * 1000, + }); + }); + + it('returns global defaults without logging spawn-settings when projectId is null', async () => { + const settings = await resolveSpawnSettings(null, undefined, 'job-no-project'); + + expect(settings.containerTimeoutMs).toBe(30 * 60 * 1000); + + const spawnLog = mockLoggerInfo.mock.calls.find( + (call) => call[0] === '[WorkerManager] Resolved spawn settings:', + ); + // No project context → no log line. The diagnostic is project-scoped. + expect(spawnLog).toBeUndefined(); + // Also: loadProjectConfig must not be called for null projects (avoid + // pointless DB roundtrips on dashboard-job paths). + expect(mockLoadProjectConfig).not.toHaveBeenCalled(); + }); + + it('logs containerTimeoutMinutes correctly rounded for non-integer minute values', async () => { + mockLoadProjectConfig.mockResolvedValue({ + projects: [], + fullProjects: [ + { + id: 'odd-timeout', + watchdogTimeoutMs: 45 * 60 * 1000 + 30_000, // 45m30s + snapshotEnabled: false, + }, + ], + }); + + await resolveSpawnSettings('odd-timeout', undefined, 'job-rounded'); + + const spawnLog = mockLoggerInfo.mock.calls.find( + (call) => call[0] === '[WorkerManager] Resolved spawn settings:', + ); + // 45m30s + 2m buffer = 47m30s → Math.round to 48. + expect(spawnLog?.[1]).toMatchObject({ containerTimeoutMinutes: 48 }); + }); +}); diff --git a/tests/unit/router/snapshot-integration.test.ts b/tests/unit/router/snapshot-integration.test.ts index 66e78084..767ab759 100644 --- a/tests/unit/router/snapshot-integration.test.ts +++ b/tests/unit/router/snapshot-integration.test.ts @@ -151,6 +151,17 @@ function setupMockContainer(exitCode = 0) { id: 'container-snap-abc123', start: vi.fn().mockResolvedValue(undefined), wait: vi.fn().mockReturnValue(waitPromise), + // inspect() is called by the post-exit pipeline to read State.OOMKilled + + // State.Error before AutoRemove reaps the container. Stub a minimal, + // non-OOM, normally-exited shape — individual tests can override. + inspect: vi.fn().mockResolvedValue({ + State: { + OOMKilled: false, + Error: '', + StartedAt: '2026-04-25T08:00:00.000Z', + FinishedAt: '2026-04-25T08:00:30.000Z', + }, + }), logs: vi.fn().mockResolvedValue(Buffer.from('')), stop: vi.fn().mockResolvedValue(undefined), commit: vi.fn().mockResolvedValue(undefined), diff --git a/tests/unit/triggers/shared/agent-execution.test.ts b/tests/unit/triggers/shared/agent-execution.test.ts index 172131d0..810e300b 100644 --- a/tests/unit/triggers/shared/agent-execution.test.ts +++ b/tests/unit/triggers/shared/agent-execution.test.ts @@ -381,6 +381,109 @@ describe('propagateAutoLabelAfterSplitting (via runAgentExecutionPipeline)', () 'splitting-auto-propagate', ); }); + + it('uses the resolved UUID from parent labels when labels.auto is a name string', async () => { + // Regression: Linear requires UUIDs for addLabel. When pmConfig.labels.auto is a + // name string like 'cascade-auto', the UUID must be resolved from the parent work + // item's label list to avoid a silent no-op in Linear's resolveLabelId(). + const provider = setupSplittingDefaults({ + getWorkItem: vi.fn().mockResolvedValue({ + id: 'parent-card', + labels: [{ id: 'real-uuid-abc123', name: 'cascade-auto' }], + }), + listWorkItems: vi + .fn() + .mockImplementation( + async (_containerId: string | undefined, opts?: { status?: string }) => { + if (opts?.status === 'backlog') { + return [{ id: 'backlog-item-1', labels: [] }]; + } + return []; + }, + ), + }); + // Simulate Linear config: labels.auto is a name string, not UUID + mockResolveProjectPMConfig.mockReturnValue({ + ...PM_CONFIG, + labels: { ...PM_CONFIG.labels, auto: 'cascade-auto' }, + }); + // hasAutoLabel matches by name + mockHasAutoLabel.mockImplementation((labels: Array<{ id: string; name: string }>) => + labels.some((l) => l.id === 'cascade-auto' || l.name === 'cascade-auto'), + ); + + await runAgentExecutionPipeline( + { agentType: 'splitting', agentInput: {}, workItemId: 'parent-card' }, + PROJECT, + CONFIG, + ); + + // addLabel must be called with the resolved UUID, NOT the name string + expect(provider.addLabel).toHaveBeenCalledWith('backlog-item-1', 'real-uuid-abc123'); + expect(provider.addLabel).not.toHaveBeenCalledWith('backlog-item-1', 'cascade-auto'); + + // Note: the non-UUID warning is scoped to Linear only (project.pm.type === 'linear'). + // This test uses a Trello project, so no warning is emitted — Trello uses MongoDB Object + // IDs which are valid non-UUID identifiers and should not produce log noise in happy paths. + }); + + it('skips propagation (returns null) when labels.auto is undefined even if hasAutoLabel mock returns true', async () => { + // When labels.auto is undefined, the second guard in propagateAutoLabelAfterSplitting + // short-circuits and returns null — no labeling, no chaining. + const provider = setupSplittingDefaults(); + mockResolveProjectPMConfig.mockReturnValue({ + ...PM_CONFIG, + labels: { ...PM_CONFIG.labels, auto: undefined }, + }); + // Even if hasAutoLabel incorrectly returns true, the code checks autoLabelId next + mockHasAutoLabel.mockReturnValue(true); + mockRunAgent.mockReset(); + mockRunAgent.mockResolvedValueOnce({ success: true, output: '', runId: 'run-1' }); + + await runAgentExecutionPipeline( + { agentType: 'splitting', agentInput: {}, workItemId: 'parent-card' }, + PROJECT, + CONFIG, + ); + + // Only splitting ran — propagation skipped because autoLabelId is undefined + expect(mockRunAgent).toHaveBeenCalledTimes(1); + expect(provider.addLabel).not.toHaveBeenCalled(); + }); + + it('passes UUID directly when labels.auto is already a valid UUID (happy path)', async () => { + // When labels.auto IS a UUID, no resolution is needed and no warning is logged. + // Use a proper UUID for this test (UUID format: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx) + const UUID_LABEL = '00000000-0000-0000-0000-000000000001'; + // Pass getWorkItem override via setupSplittingDefaults so the mock is properly wired + const provider = setupSplittingDefaults({ + getWorkItem: vi.fn().mockResolvedValue({ + id: 'parent-card', + labels: [{ id: UUID_LABEL, name: 'auto' }], + }), + }); + mockResolveProjectPMConfig.mockReturnValue({ + ...PM_CONFIG, + labels: { ...PM_CONFIG.labels, auto: UUID_LABEL }, + }); + mockHasAutoLabel.mockImplementation((labels: Array<{ id: string }>) => + labels.some((l) => l.id === UUID_LABEL), + ); + + await runAgentExecutionPipeline( + { agentType: 'splitting', agentInput: {}, workItemId: 'parent-card' }, + PROJECT, + CONFIG, + ); + + // addLabel called with UUID directly — no resolution warning should be logged + expect(provider.addLabel).toHaveBeenCalledWith('backlog-1', UUID_LABEL); + // No warning about non-UUID format since labels.auto is already a UUID + expect(mockLogger.warn).not.toHaveBeenCalledWith( + expect.stringContaining('labels.auto is not a UUID'), + expect.anything(), + ); + }); }); // --------------------------------------------------------------------------- diff --git a/tests/unit/web/pm-wizard-hooks.test.ts b/tests/unit/web/pm-wizard-hooks.test.ts new file mode 100644 index 00000000..81f1927c --- /dev/null +++ b/tests/unit/web/pm-wizard-hooks.test.ts @@ -0,0 +1,403 @@ +/** + * Unit tests for pure functions extracted in the pm-wizard-hooks refactor: + * - buildProviderAuthArg (generic auth-arg builder for all three providers) + * - runPerLabelCreations (batch label creator with per-item error handling) + * - buildTrelloIntegrationConfig / buildJiraIntegrationConfig (pure config builders) + */ + +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { + buildProviderAuthArg, + runPerLabelCreations, +} from '../../../web/src/components/projects/pm-wizard-hooks.js'; +import type { WizardState } from '../../../web/src/components/projects/pm-wizard-state.js'; +import { + buildJiraIntegrationConfig, + buildLinearIntegrationConfig, + buildTrelloIntegrationConfig, + createInitialState, +} from '../../../web/src/components/projects/pm-wizard-state.js'; + +// ============================================================================ +// buildProviderAuthArg +// ============================================================================ + +describe('buildProviderAuthArg', () => { + function trelloState(overrides: Partial = {}): WizardState { + return { ...createInitialState(), provider: 'trello', ...overrides }; + } + function jiraState(overrides: Partial = {}): WizardState { + return { ...createInitialState(), provider: 'jira', ...overrides }; + } + function linearState(overrides: Partial = {}): WizardState { + return { ...createInitialState(), provider: 'linear', ...overrides }; + } + + // ── Edit mode — stored credentials path ────────────────────────────── + it('trello: returns { projectId } in edit mode when stored creds and no raw key', () => { + const state = trelloState({ + isEditing: true, + hasStoredCredentials: true, + trelloApiKey: '', + trelloToken: '', + }); + expect(buildProviderAuthArg(state, 'proj-1')).toEqual({ projectId: 'proj-1' }); + }); + + it('jira: returns { projectId } in edit mode when stored creds and no raw token', () => { + const state = jiraState({ + isEditing: true, + hasStoredCredentials: true, + jiraApiToken: '', + jiraEmail: '', + }); + expect(buildProviderAuthArg(state, 'proj-jira')).toEqual({ projectId: 'proj-jira' }); + }); + + it('linear: returns { projectId } in edit mode when stored creds and no raw key', () => { + const state = linearState({ + isEditing: true, + hasStoredCredentials: true, + linearApiKey: '', + }); + expect(buildProviderAuthArg(state, 'proj-lin')).toEqual({ projectId: 'proj-lin' }); + }); + + // ── Fresh setup — credentials path ────────────────────────────────── + it('trello: returns credentials when api_key and token present (fresh setup)', () => { + const state = trelloState({ trelloApiKey: 'key-abc', trelloToken: 'tok-xyz' }); + expect(buildProviderAuthArg(state, 'proj-1')).toEqual({ + credentials: { api_key: 'key-abc', token: 'tok-xyz' }, + }); + }); + + it('jira: returns credentials when email + api_token + base_url present (fresh setup)', () => { + const state = jiraState({ + jiraEmail: 'user@example.com', + jiraApiToken: 'jira-tok', + jiraBaseUrl: 'https://example.atlassian.net', + }); + expect(buildProviderAuthArg(state, 'proj-j')).toEqual({ + credentials: { + email: 'user@example.com', + api_token: 'jira-tok', + base_url: 'https://example.atlassian.net', + }, + }); + }); + + it('linear: returns credentials when api_key present (fresh setup)', () => { + const state = linearState({ linearApiKey: 'lin_abc' }); + expect(buildProviderAuthArg(state, 'proj-l')).toEqual({ + credentials: { api_key: 'lin_abc' }, + }); + }); + + // ── Edit mode — user re-typed key → use fresh credentials ─────────── + it('trello: uses fresh credentials when user re-typed api_key in edit mode', () => { + const state = trelloState({ + isEditing: true, + hasStoredCredentials: true, + trelloApiKey: 'new-key', + trelloToken: 'new-tok', + }); + expect(buildProviderAuthArg(state, 'proj-1')).toEqual({ + credentials: { api_key: 'new-key', token: 'new-tok' }, + }); + }); + + it('linear: uses fresh credentials when user re-typed api_key in edit mode', () => { + const state = linearState({ + isEditing: true, + hasStoredCredentials: true, + linearApiKey: 'lin_fresh', + }); + expect(buildProviderAuthArg(state, 'proj-l')).toEqual({ + credentials: { api_key: 'lin_fresh' }, + }); + }); + + // ── Error cases ────────────────────────────────────────────────────── + it('trello: throws when no api_key in fresh mode', () => { + const state = trelloState({ trelloToken: 'tok' }); + expect(() => buildProviderAuthArg(state, 'proj-1')).toThrow( + 'Enter both credentials before verifying', + ); + }); + + it('trello: throws when no token in fresh mode', () => { + const state = trelloState({ trelloApiKey: 'key' }); + expect(() => buildProviderAuthArg(state, 'proj-1')).toThrow( + 'Enter both credentials before verifying', + ); + }); + + it('jira: throws when no email in fresh mode', () => { + const state = jiraState({ jiraApiToken: 'tok', jiraBaseUrl: 'https://x.atlassian.net' }); + expect(() => buildProviderAuthArg(state, 'proj-j')).toThrow( + 'Enter both credentials before verifying', + ); + }); + + it('jira: throws when no api_token in fresh mode', () => { + const state = jiraState({ jiraEmail: 'u@x.com', jiraBaseUrl: 'https://x.atlassian.net' }); + expect(() => buildProviderAuthArg(state, 'proj-j')).toThrow( + 'Enter both credentials before verifying', + ); + }); + + it('linear: throws when no api_key in fresh mode', () => { + const state = linearState({ linearApiKey: '' }); + expect(() => buildProviderAuthArg(state, 'proj-l')).toThrow( + 'Enter your API key before verifying', + ); + }); +}); + +// ============================================================================ +// buildTrelloIntegrationConfig +// ============================================================================ + +describe('buildTrelloIntegrationConfig', () => { + function seed(overrides: Partial = {}): WizardState { + return { + ...createInitialState(), + provider: 'trello', + trelloBoardId: 'board-abc', + trelloListMappings: { todo: 'list-1', done: 'list-2' }, + trelloLabelMappings: { processing: 'label-x' }, + ...overrides, + }; + } + + it('produces the expected config shape', () => { + const config = buildTrelloIntegrationConfig(seed()); + expect(config).toEqual({ + boardId: 'board-abc', + lists: { todo: 'list-1', done: 'list-2' }, + labels: { processing: 'label-x' }, + }); + }); + + it('includes customFields when trelloCostFieldId is set', () => { + const config = buildTrelloIntegrationConfig(seed({ trelloCostFieldId: 'cf-cost' })); + expect(config.customFields).toEqual({ cost: 'cf-cost' }); + }); + + it('omits customFields when trelloCostFieldId is empty', () => { + const config = buildTrelloIntegrationConfig(seed({ trelloCostFieldId: '' })); + expect(config).not.toHaveProperty('customFields'); + }); + + it('passes through empty mappings', () => { + const config = buildTrelloIntegrationConfig( + seed({ trelloListMappings: {}, trelloLabelMappings: {} }), + ); + expect(config.lists).toEqual({}); + expect(config.labels).toEqual({}); + }); +}); + +// ============================================================================ +// buildJiraIntegrationConfig +// ============================================================================ + +describe('buildJiraIntegrationConfig', () => { + function seed(overrides: Partial = {}): WizardState { + return { + ...createInitialState(), + provider: 'jira', + jiraProjectKey: 'PROJ', + jiraBaseUrl: 'https://example.atlassian.net', + jiraStatusMappings: { todo: 'To Do', done: 'Done' }, + jiraLabels: { processing: 'cascade-processing' }, + ...overrides, + }; + } + + it('produces the expected config shape', () => { + const config = buildJiraIntegrationConfig(seed()); + expect(config).toEqual({ + projectKey: 'PROJ', + baseUrl: 'https://example.atlassian.net', + statuses: { todo: 'To Do', done: 'Done' }, + labels: { processing: 'cascade-processing' }, + }); + }); + + it('includes issueTypes when jiraIssueTypes non-empty', () => { + const config = buildJiraIntegrationConfig( + seed({ jiraIssueTypes: { task: 'Task', subtask: 'Sub-task' } }), + ); + expect(config.issueTypes).toEqual({ task: 'Task', subtask: 'Sub-task' }); + }); + + it('omits issueTypes when jiraIssueTypes is empty', () => { + const config = buildJiraIntegrationConfig(seed({ jiraIssueTypes: {} })); + expect(config).not.toHaveProperty('issueTypes'); + }); + + it('omits labels when jiraLabels is empty', () => { + const config = buildJiraIntegrationConfig(seed({ jiraLabels: {} })); + expect(config).not.toHaveProperty('labels'); + }); + + it('includes customFields when jiraCostFieldId set', () => { + const config = buildJiraIntegrationConfig(seed({ jiraCostFieldId: 'customfield_10042' })); + expect(config.customFields).toEqual({ cost: 'customfield_10042' }); + }); + + it('omits customFields when jiraCostFieldId is empty', () => { + const config = buildJiraIntegrationConfig(seed({ jiraCostFieldId: '' })); + expect(config).not.toHaveProperty('customFields'); + }); +}); + +// ============================================================================ +// buildLinearIntegrationConfig (already tested in pm-wizard-state.test.ts; +// added here for cross-reference completeness) +// ============================================================================ + +describe('buildLinearIntegrationConfig', () => { + function seed(overrides: Partial = {}): WizardState { + return { + ...createInitialState(), + provider: 'linear', + linearTeamId: 'T1', + linearStatusMappings: { todo: 'S-TD' }, + linearLabels: {}, + ...overrides, + }; + } + + it('produces the expected config shape', () => { + const config = buildLinearIntegrationConfig(seed()); + expect(config).toEqual({ teamId: 'T1', statuses: { todo: 'S-TD' } }); + }); + + it('includes projectId when linearProjectId is set', () => { + const config = buildLinearIntegrationConfig(seed({ linearProjectId: 'P1' })); + expect(config.projectId).toBe('P1'); + }); + + it('omits projectId when linearProjectId is empty', () => { + const config = buildLinearIntegrationConfig(seed({ linearProjectId: '' })); + expect(config).not.toHaveProperty('projectId'); + }); +}); + +// ============================================================================ +// runPerLabelCreations +// ============================================================================ + +const { mockCreateLabel } = vi.hoisted(() => ({ + mockCreateLabel: vi.fn(), +})); + +vi.mock('../../../web/src/lib/trpc.js', () => ({ + trpcClient: { + pm: { + discovery: { + createLabel: { mutate: mockCreateLabel }, + }, + }, + }, + trpc: {}, +})); + +describe('runPerLabelCreations', () => { + beforeEach(() => { + mockCreateLabel.mockReset(); + }); + + it('returns successes when all labels created', async () => { + mockCreateLabel + .mockResolvedValueOnce({ id: 'lbl-1', name: 'cascade-ready', color: 'sky' }) + .mockResolvedValueOnce({ id: 'lbl-2', name: 'cascade-processing', color: 'blue' }); + + const result = await runPerLabelCreations({ + labelsToCreate: [ + { slot: 'readyToProcess', name: 'cascade-ready', color: 'sky' }, + { slot: 'processing', name: 'cascade-processing', color: 'blue' }, + ], + providerId: 'trello', + containerId: 'board-1', + authArg: { credentials: { api_key: 'k', token: 't' } }, + }); + + expect(result.successes).toHaveLength(2); + expect(result.errors).toHaveLength(0); + expect(result.successes[0]).toEqual({ id: 'lbl-1', name: 'cascade-ready', color: 'sky' }); + expect(result.successes[1]).toEqual({ + id: 'lbl-2', + name: 'cascade-processing', + color: 'blue', + }); + }); + + it('collects per-item errors without aborting remaining items', async () => { + mockCreateLabel + .mockRejectedValueOnce(new Error('rate limit')) + .mockResolvedValueOnce({ id: 'lbl-2', name: 'cascade-processing', color: 'blue' }); + + const result = await runPerLabelCreations({ + labelsToCreate: [ + { slot: 'readyToProcess', name: 'cascade-ready', color: 'sky' }, + { slot: 'processing', name: 'cascade-processing', color: 'blue' }, + ], + providerId: 'trello', + containerId: 'board-1', + authArg: { projectId: 'proj-1' }, + }); + + expect(result.successes).toHaveLength(1); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toEqual({ name: 'cascade-ready', error: 'rate limit' }); + expect(result.successes[0].name).toBe('cascade-processing'); + }); + + it('returns empty arrays when labelsToCreate is empty', async () => { + const result = await runPerLabelCreations({ + labelsToCreate: [], + providerId: 'linear', + containerId: 'team-1', + authArg: { credentials: { api_key: 'lin_key' } }, + }); + + expect(result.successes).toHaveLength(0); + expect(result.errors).toHaveLength(0); + expect(mockCreateLabel).not.toHaveBeenCalled(); + }); + + it('passes the correct arguments to the tRPC mutation', async () => { + mockCreateLabel.mockResolvedValueOnce({ id: 'lbl-1', name: 'my-label', color: 'green' }); + + await runPerLabelCreations({ + labelsToCreate: [{ slot: 'processed', name: 'my-label', color: 'green' }], + providerId: 'linear', + containerId: 'team-abc', + authArg: { credentials: { api_key: 'lin_key' } }, + }); + + expect(mockCreateLabel).toHaveBeenCalledWith({ + providerId: 'linear', + containerId: 'team-abc', + name: 'my-label', + color: 'green', + credentials: { api_key: 'lin_key' }, + }); + }); + + it('converts non-Error rejections to string errors', async () => { + mockCreateLabel.mockRejectedValueOnce('some string error'); + + const result = await runPerLabelCreations({ + labelsToCreate: [{ slot: 'auto', name: 'cascade-auto', color: 'purple' }], + providerId: 'trello', + containerId: 'board-1', + authArg: { projectId: 'proj-1' }, + }); + + expect(result.errors[0].error).toBe('some string error'); + }); +}); diff --git a/web/src/components/projects/pm-wizard-hooks.ts b/web/src/components/projects/pm-wizard-hooks.ts index 00a5c061..c34f3ef2 100644 --- a/web/src/components/projects/pm-wizard-hooks.ts +++ b/web/src/components/projects/pm-wizard-hooks.ts @@ -1,6 +1,15 @@ /** * Custom hooks for PM Wizard mutations and side-effects. * Each hook encapsulates one concern to keep the main orchestrator thin. + * + * Generic hooks introduced in spec 013 refactor: + * - buildProviderAuthArg — single auth-arg builder for all three providers + * - useProviderLabelCreation— parameterized label-creation hook (replaces 2 copies) + * - useProviderCustomFieldCreation — parameterized CF hook (replaces 2 copies) + * - useSaveMutation — data-driven, no provider branching + * + * Per-provider thin wrappers (useTrelloDiscovery, etc.) remain exported for + * backward-compatibility with existing wizard.ts imports. */ import { useMutation, useQueryClient } from '@tanstack/react-query'; @@ -11,10 +20,239 @@ import type { LinearProjectOption, LinearTeamDetails, LinearTeamOption, + Provider, WizardAction, WizardState, } from './pm-wizard-state.js'; -import { buildLinearIntegrationConfig, shouldUseStoredCredentials } from './pm-wizard-state.js'; +import { + buildJiraIntegrationConfig, + buildLinearIntegrationConfig, + buildTrelloIntegrationConfig, + shouldUseStoredCredentials, +} from './pm-wizard-state.js'; + +// ============================================================================ +// Auth-arg builder — shared across all mutations +// ============================================================================ + +/** + * Build the `{ projectId }` or `{ credentials: ... }` portion of a tRPC + * request for any provider. Returns the stored-creds path when the user is + * editing an existing integration without re-typing their key. + * + * Extracted so every per-provider mutation stays below the cognitive- + * complexity threshold and a single place enforces the invariant. + */ +export function buildProviderAuthArg( + state: WizardState, + projectId: string, +): { projectId: string } | { credentials: Record } { + if (shouldUseStoredCredentials(state)) { + return { projectId }; + } + if (state.provider === 'trello') { + if (!state.trelloApiKey || !state.trelloToken) { + throw new Error('Enter both credentials before verifying'); + } + return { credentials: { api_key: state.trelloApiKey, token: state.trelloToken } }; + } + if (state.provider === 'linear') { + if (!state.linearApiKey) { + throw new Error('Enter your API key before verifying'); + } + return { credentials: { api_key: state.linearApiKey } }; + } + // jira + if (!state.jiraEmail || !state.jiraApiToken) { + throw new Error('Enter both credentials before verifying'); + } + return { + credentials: { + email: state.jiraEmail, + api_token: state.jiraApiToken, + base_url: state.jiraBaseUrl, + }, + }; +} + +// ============================================================================ +// Label creation utilities +// ============================================================================ + +/** + * Iterate `labelsToCreate` through `pm.discovery.createLabel`, collecting + * successes + per-name errors. Factored out so the two + * `createMissingLabelsMutation` bodies (Trello + Linear) stay below the + * biome cognitive-complexity threshold. + */ +export async function runPerLabelCreations(opts: { + labelsToCreate: Array<{ slot: string; name: string; color?: string }>; + providerId: 'trello' | 'linear'; + containerId: string; + authArg: { projectId: string } | { credentials: Record }; +}): Promise<{ + successes: Array<{ id: string; name: string; color: string }>; + errors: Array<{ name: string; error: string }>; +}> { + const successes: Array<{ id: string; name: string; color: string }> = []; + const errors: Array<{ name: string; error: string }> = []; + for (const { name, color } of opts.labelsToCreate) { + try { + const label = await trpcClient.pm.discovery.createLabel.mutate({ + providerId: opts.providerId, + containerId: opts.containerId, + name, + color, + ...opts.authArg, + }); + successes.push(label); + } catch (err) { + errors.push({ name, error: err instanceof Error ? err.message : String(err) }); + } + } + return { successes, errors }; +} + +// ============================================================================ +// Generic label-creation hook +// ============================================================================ + +interface LabelCreationConfig { + providerId: 'trello' | 'linear'; + /** Returns the container ID (board / team) from state */ + getContainerId: (state: WizardState) => string; + /** Error when container not yet selected */ + containerError: string; + /** Dispatch to add a newly created label to the local list */ + addLabel: (label: { id: string; name: string; color: string }) => WizardAction; + /** Dispatch to map a slot to the newly created label ID */ + setLabelMapping: (slot: string, id: string) => WizardAction; +} + +function useProviderLabelCreation( + config: LabelCreationConfig, + state: WizardState, + dispatch: React.Dispatch, + projectId: string, +) { + const createLabelMutation = useMutation({ + mutationFn: (vars: { name: string; color?: string; slot: string }) => { + const containerId = config.getContainerId(state); + if (!containerId) throw new Error(config.containerError); + const authArg = buildProviderAuthArg(state, projectId); + return trpcClient.pm.discovery.createLabel.mutate({ + providerId: config.providerId, + containerId, + name: vars.name, + color: vars.color, + ...authArg, + }); + }, + onSuccess: (label, vars) => { + dispatch(config.addLabel(label)); + dispatch(config.setLabelMapping(vars.slot, label.id)); + }, + onError: (error) => { + console.error('Failed to create label:', error); + alert(`Failed to create label: ${error instanceof Error ? error.message : String(error)}`); + }, + }); + + const createMissingLabelsMutation = useMutation({ + mutationFn: async (labelsToCreate: Array<{ slot: string; name: string; color?: string }>) => { + const containerId = config.getContainerId(state); + if (!containerId) throw new Error(config.containerError); + const authArg = buildProviderAuthArg(state, projectId); + return runPerLabelCreations({ + labelsToCreate, + providerId: config.providerId, + containerId, + authArg, + }); + }, + onSuccess: (result, labelsToCreate) => { + for (const label of result.successes) { + const slot = labelsToCreate.find((l) => l.name === label.name)?.slot; + if (slot) { + dispatch(config.addLabel(label)); + dispatch(config.setLabelMapping(slot, label.id)); + } + } + if (result.errors.length > 0) { + const errorMsg = result.errors.map((e) => `${e.name}: ${e.error}`).join('\n'); + alert( + `Some labels failed to create:\n${errorMsg}\n\n${result.successes.length} label(s) created successfully.`, + ); + } + }, + onError: (error) => { + console.error('Failed to create labels:', error); + alert(`Failed to create labels: ${error instanceof Error ? error.message : String(error)}`); + }, + }); + + return { createLabelMutation, createMissingLabelsMutation }; +} + +// ============================================================================ +// Generic custom-field-creation hook +// ============================================================================ + +interface CustomFieldCreationConfig { + providerId: 'trello' | 'jira'; + /** Returns the container ID from state (boardId / projectKey) */ + getContainerId: (state: WizardState) => string; + /** Error thrown when container not yet selected (required for Trello; omit for global providers like JIRA) */ + containerError?: string; + /** Dispatch to add a new custom field to the local list */ + addCustomField: (field: { id: string; name: string; type: string }) => WizardAction; + /** Dispatch to set the cost field ID */ + setCostField: (id: string) => WizardAction; + /** Optional override for error handling (default: generic alert) */ + onError?: (error: unknown) => void; +} + +function useProviderCustomFieldCreation( + config: CustomFieldCreationConfig, + state: WizardState, + dispatch: React.Dispatch, + projectId: string, +) { + const createCustomFieldMutation = useMutation({ + mutationFn: ({ name }: { name: string }) => { + const containerId = config.getContainerId(state); + if (!containerId && config.containerError) throw new Error(config.containerError); + const authArg = buildProviderAuthArg(state, projectId); + return trpcClient.pm.discovery.createCustomField.mutate({ + providerId: config.providerId, + containerId: containerId || 'global', + name, + ...authArg, + }); + }, + onSuccess: (customField) => { + dispatch( + config.addCustomField({ + id: customField.id, + name: customField.name, + type: customField.type, + }), + ); + dispatch(config.setCostField(customField.id)); + }, + onError: (error) => { + if (config.onError) { + config.onError(error); + return; + } + console.error('Failed to create custom field:', error); + const message = error instanceof Error ? error.message : String(error); + alert(`Failed to create custom field: ${message}`); + }, + }); + + return { createCustomFieldMutation }; +} // ============================================================================ // Trello Discovery @@ -367,43 +605,6 @@ export function useLinearDiscovery( // Verification // ============================================================================ -/** - * Build the `{ projectId }` or `{ credentials: ... }` portion of a tRPC - * request, picking the stored-creds path when the user is editing an - * existing integration and hasn't re-typed the key. Extracted so the - * `verifyMutation` body stays below the cognitive-complexity threshold. - */ -function buildVerifyAuthArg( - state: WizardState, - projectId: string, -): { projectId: string } | { credentials: Record } { - if (shouldUseStoredCredentials(state)) { - return { projectId }; - } - if (state.provider === 'trello') { - if (!state.trelloApiKey || !state.trelloToken) { - throw new Error('Enter both credentials before verifying'); - } - return { credentials: { api_key: state.trelloApiKey, token: state.trelloToken } }; - } - if (state.provider === 'linear') { - if (!state.linearApiKey) { - throw new Error('Enter your API key before verifying'); - } - return { credentials: { api_key: state.linearApiKey } }; - } - if (!state.jiraEmail || !state.jiraApiToken) { - throw new Error('Enter both credentials before verifying'); - } - return { - credentials: { - email: state.jiraEmail, - api_token: state.jiraApiToken, - base_url: state.jiraBaseUrl, - }, - }; -} - export function useVerification( state: WizardState, dispatch: React.Dispatch, @@ -416,12 +617,12 @@ export function useVerification( // Calls the `currentUser` discovery capability; every provider // maps its native `getMe()` response to `{ id, name, displayName? }`. // - // Edit-mode fallback: `buildVerifyAuthArg` returns `{ projectId }` + // Edit-mode fallback: `buildProviderAuthArg` returns `{ projectId }` // when the user is editing with stored credentials but an empty // API-key field, so the backend resolves the stored secret via // `resolvePMCredentials` instead of requiring re-entry. const provider = state.provider; - const authArg = buildVerifyAuthArg(state, projectId); + const authArg = buildProviderAuthArg(state, projectId); const me = (await trpcClient.pm.discovery.discover.mutate({ providerId: provider, capability: 'currentUser', @@ -468,40 +669,6 @@ export function useVerification( // (`webhooks.list/create/delete` + `callbackBaseUrl` formula) — // see `./pm-providers/{trello,jira,linear}/wizard.ts`. -/** - * Iterate `labelsToCreate` through `pm.discovery.createLabel`, collecting - * successes + per-name errors. Factored out so the two - * `createMissingLabelsMutation` bodies (Trello + Linear) stay below the - * biome cognitive-complexity threshold. - */ -async function runPerLabelCreations(opts: { - labelsToCreate: Array<{ slot: string; name: string; color?: string }>; - providerId: 'trello' | 'linear'; - containerId: string; - authArg: { projectId: string } | { credentials: Record }; -}): Promise<{ - successes: Array<{ id: string; name: string; color: string }>; - errors: Array<{ name: string; error: string }>; -}> { - const successes: Array<{ id: string; name: string; color: string }> = []; - const errors: Array<{ name: string; error: string }> = []; - for (const { name, color } of opts.labelsToCreate) { - try { - const label = await trpcClient.pm.discovery.createLabel.mutate({ - providerId: opts.providerId, - containerId: opts.containerId, - name, - color, - ...opts.authArg, - }); - successes.push(label); - } catch (err) { - errors.push({ name, error: err instanceof Error ? err.message : String(err) }); - } - } - return { successes, errors }; -} - // ============================================================================ // Trello Label Creation // ============================================================================ @@ -511,84 +678,18 @@ export function useTrelloLabelCreation( dispatch: React.Dispatch, projectId: string, ) { - const createLabelMutation = useMutation({ - mutationFn: (vars: { name: string; color?: string; slot: string }) => { - if (!state.trelloBoardId) { - throw new Error('Board must be selected before creating a label'); - } - const useStored = shouldUseStoredCredentials(state); - if (!useStored && (!state.trelloApiKey || !state.trelloToken)) { - throw new Error('Missing credentials — enter them on the credentials step'); - } - // Plan 010/1: routes through generic pm.discovery.createLabel. - // Edit mode with stored creds → projectId path (see - // `shouldUseStoredCredentials` in pm-wizard-state.ts). - return trpcClient.pm.discovery.createLabel.mutate({ - providerId: 'trello', - containerId: state.trelloBoardId, - name: vars.name, - color: vars.color, - ...(useStored - ? { projectId } - : { credentials: { api_key: state.trelloApiKey, token: state.trelloToken } }), - }); - }, - onSuccess: (label, vars) => { - dispatch({ type: 'ADD_TRELLO_BOARD_LABEL', label }); - dispatch({ type: 'SET_TRELLO_LABEL_MAPPING', key: vars.slot, value: label.id }); - }, - onError: (error) => { - console.error('Failed to create label:', error); - alert(`Failed to create label: ${error instanceof Error ? error.message : String(error)}`); - }, - }); - - const createMissingLabelsMutation = useMutation({ - mutationFn: async (labelsToCreate: Array<{ slot: string; name: string; color?: string }>) => { - if (!state.trelloBoardId) { - throw new Error('Board must be selected before creating labels'); - } - const useStored = shouldUseStoredCredentials(state); - if (!useStored && (!state.trelloApiKey || !state.trelloToken)) { - throw new Error('Missing credentials — enter them on the credentials step'); - } - const authArg = useStored - ? { projectId } - : { credentials: { api_key: state.trelloApiKey, token: state.trelloToken } }; - return runPerLabelCreations({ - labelsToCreate, - providerId: 'trello', - containerId: state.trelloBoardId, - authArg, - }); - }, - onSuccess: (result, labelsToCreate) => { - // Handle successful label creations - for (let i = 0; i < result.successes.length; i++) { - const label = result.successes[i]; - // Find the slot for this label by matching the name - const slot = labelsToCreate.find((l) => l.name === label.name)?.slot; - if (slot) { - dispatch({ type: 'ADD_TRELLO_BOARD_LABEL', label }); - dispatch({ type: 'SET_TRELLO_LABEL_MAPPING', key: slot, value: label.id }); - } - } - - // Show error feedback if any labels failed - if (result.errors.length > 0) { - const errorMsg = result.errors.map((e) => `${e.name}: ${e.error}`).join('\n'); - alert( - `Some labels failed to create:\n${errorMsg}\n\n${result.successes.length} label(s) created successfully.`, - ); - } - }, - onError: (error) => { - console.error('Failed to create labels:', error); - alert(`Failed to create labels: ${error instanceof Error ? error.message : String(error)}`); - }, - }); - - return { createLabelMutation, createMissingLabelsMutation }; + return useProviderLabelCreation( + { + providerId: 'trello', + getContainerId: (s) => s.trelloBoardId, + containerError: 'Board must be selected before creating a label', + addLabel: (label) => ({ type: 'ADD_TRELLO_BOARD_LABEL', label }), + setLabelMapping: (slot, id) => ({ type: 'SET_TRELLO_LABEL_MAPPING', key: slot, value: id }), + }, + state, + dispatch, + projectId, + ); } // ============================================================================ @@ -600,51 +701,29 @@ export function useTrelloCustomFieldCreation( dispatch: React.Dispatch, projectId: string, ) { - const createCustomFieldMutation = useMutation({ - // Plan 011/2: the shared custom-field-mapping step lets operators type - // a name. `mutate({ name })` — callers without a preference pass - // `{ name: 'Cost' }` to preserve the legacy default. - mutationFn: ({ name }: { name: string }) => { - if (!state.trelloBoardId) { - throw new Error('Board must be selected before creating a custom field'); - } - const useStored = shouldUseStoredCredentials(state); - if (!useStored && (!state.trelloApiKey || !state.trelloToken)) { - throw new Error('Missing credentials — enter them on the credentials step'); - } - // Plan 010/1 (leftover caller): routes through pm.discovery.createCustomField. - return trpcClient.pm.discovery.createCustomField.mutate({ - providerId: 'trello', - containerId: state.trelloBoardId, - name, - ...(useStored - ? { projectId } - : { - credentials: { - api_key: state.trelloApiKey, - token: state.trelloToken, - }, - }), - }); - }, - onSuccess: (customField) => { - dispatch({ type: 'ADD_TRELLO_BOARD_CUSTOM_FIELD', customField }); - dispatch({ type: 'SET_TRELLO_COST_FIELD', id: customField.id }); - }, - onError: (error) => { - console.error('Failed to create custom field:', error); - const message = error instanceof Error ? error.message : String(error); - if (message.includes('403')) { - alert( - 'Failed to create custom field: The Trello Custom Fields power-up is required. Please enable it on your Trello board and try again.', - ); - } else { - alert(`Failed to create custom field: ${message}`); - } + return useProviderCustomFieldCreation( + { + providerId: 'trello', + getContainerId: (s) => s.trelloBoardId, + containerError: 'Board must be selected before creating a custom field', + addCustomField: (f) => ({ type: 'ADD_TRELLO_BOARD_CUSTOM_FIELD', customField: f }), + setCostField: (id) => ({ type: 'SET_TRELLO_COST_FIELD', id }), + onError: (error) => { + console.error('Failed to create custom field:', error); + const message = error instanceof Error ? error.message : String(error); + if (message.includes('403')) { + alert( + 'Failed to create custom field: The Trello Custom Fields power-up is required. Please enable it on your Trello board and try again.', + ); + } else { + alert(`Failed to create custom field: ${message}`); + } + }, }, - }); - - return { createCustomFieldMutation }; + state, + dispatch, + projectId, + ); } // ============================================================================ @@ -656,84 +735,78 @@ export function useJiraCustomFieldCreation( dispatch: React.Dispatch, projectId: string, ) { - const createJiraCustomFieldMutation = useMutation({ - // Plan 011/3: the shared custom-field-mapping step lets operators type - // a name; callers without a preference pass `{ name: 'Cost' }`. - mutationFn: ({ name }: { name: string }) => { - const useStored = shouldUseStoredCredentials(state); - if (!useStored && (!state.jiraEmail || !state.jiraApiToken || !state.jiraBaseUrl)) { - throw new Error('Missing JIRA credentials or base URL'); - } - // Plan 010/1: routes through generic pm.discovery.createCustomField. - // JIRA's project key isn't needed for the mutation (fields are global) - // but we pass the configured projectKey as containerId for uniform shape. - return trpcClient.pm.discovery.createCustomField.mutate({ - providerId: 'jira', - containerId: state.jiraProjectKey || 'global', - name, - ...(useStored - ? { projectId } - : { - credentials: { - email: state.jiraEmail, - api_token: state.jiraApiToken, - base_url: state.jiraBaseUrl, - }, - }), - }); - }, - onSuccess: (field) => { - dispatch({ type: 'ADD_JIRA_PROJECT_CUSTOM_FIELD', field: { ...field, custom: true } }); - dispatch({ type: 'SET_JIRA_COST_FIELD', id: field.id }); - }, - onError: (error) => { - console.error('Failed to create JIRA custom field:', error); - const message = error instanceof Error ? error.message : String(error); - if (message.includes('403') || message.toLowerCase().includes('admin')) { - alert( - 'Failed to create custom field: JIRA admin permissions are required to create global custom fields. Please contact your JIRA administrator.', - ); - } else { - alert(`Failed to create JIRA custom field: ${message}`); - } - }, - }); - - return { createJiraCustomFieldMutation }; + const inner = useProviderCustomFieldCreation( + { + providerId: 'jira', + // JIRA fields are global; containerId is sent as-is for uniform shape + getContainerId: (s) => s.jiraProjectKey || 'global', + addCustomField: (f) => ({ + type: 'ADD_JIRA_PROJECT_CUSTOM_FIELD', + field: { ...f, custom: true }, + }), + setCostField: (id) => ({ type: 'SET_JIRA_COST_FIELD', id }), + onError: (error) => { + console.error('Failed to create JIRA custom field:', error); + const message = error instanceof Error ? error.message : String(error); + if (message.includes('403') || message.toLowerCase().includes('admin')) { + alert( + 'Failed to create custom field: JIRA admin permissions are required to create global custom fields. Please contact your JIRA administrator.', + ); + } else { + alert(`Failed to create JIRA custom field: ${message}`); + } + }, + }, + state, + dispatch, + projectId, + ); + // Preserve the legacy export name for JIRA callers + return { createJiraCustomFieldMutation: inner.createCustomFieldMutation }; } // ============================================================================ -// Save Mutation +// Save Mutation — data-driven, no per-provider branching // ============================================================================ +type CredentialEntry = { envVarKey: string; stateField: keyof WizardState; label: string }; + +const SAVE_CONFIGS: Record< + Provider, + { + buildConfig: (state: WizardState) => Record; + credentials: CredentialEntry[]; + } +> = { + trello: { + buildConfig: buildTrelloIntegrationConfig, + credentials: [ + { envVarKey: 'TRELLO_API_KEY', stateField: 'trelloApiKey', label: 'Trello API Key' }, + { envVarKey: 'TRELLO_TOKEN', stateField: 'trelloToken', label: 'Trello Token' }, + ], + }, + jira: { + buildConfig: buildJiraIntegrationConfig, + credentials: [ + { envVarKey: 'JIRA_EMAIL', stateField: 'jiraEmail', label: 'JIRA Email' }, + { envVarKey: 'JIRA_API_TOKEN', stateField: 'jiraApiToken', label: 'JIRA API Token' }, + ], + }, + linear: { + buildConfig: buildLinearIntegrationConfig, + credentials: [ + { envVarKey: 'LINEAR_API_KEY', stateField: 'linearApiKey', label: 'Linear API Key' }, + ], + }, +}; + export function useSaveMutation(projectId: string, state: WizardState) { const queryClient = useQueryClient(); const saveMutation = useMutation({ - // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: handles three provider types + credential persisting mutationFn: async () => { - let config: Record; - if (state.provider === 'trello') { - config = { - boardId: state.trelloBoardId, - lists: state.trelloListMappings, - labels: state.trelloLabelMappings, - ...(state.trelloCostFieldId ? { customFields: { cost: state.trelloCostFieldId } } : {}), - }; - } else if (state.provider === 'linear') { - config = buildLinearIntegrationConfig(state); - } else { - config = { - projectKey: state.jiraProjectKey, - baseUrl: state.jiraBaseUrl, - statuses: state.jiraStatusMappings, - ...(Object.keys(state.jiraIssueTypes).length > 0 - ? { issueTypes: state.jiraIssueTypes } - : {}), - ...(Object.keys(state.jiraLabels).length > 0 ? { labels: state.jiraLabels } : {}), - ...(state.jiraCostFieldId ? { customFields: { cost: state.jiraCostFieldId } } : {}), - }; - } + const providerCfg = SAVE_CONFIGS[state.provider]; + const config = providerCfg.buildConfig(state); const result = await trpcClient.projects.integrations.upsert.mutate({ projectId, @@ -743,47 +816,14 @@ export function useSaveMutation(projectId: string, state: WizardState) { }); // Persist credentials to project_credentials table - if (state.provider === 'trello') { - if (state.trelloApiKey) { - await trpcClient.projects.credentials.set.mutate({ - projectId, - envVarKey: 'TRELLO_API_KEY', - value: state.trelloApiKey, - name: 'Trello API Key', - }); - } - if (state.trelloToken) { - await trpcClient.projects.credentials.set.mutate({ - projectId, - envVarKey: 'TRELLO_TOKEN', - value: state.trelloToken, - name: 'Trello Token', - }); - } - } else if (state.provider === 'linear') { - if (state.linearApiKey) { - await trpcClient.projects.credentials.set.mutate({ - projectId, - envVarKey: 'LINEAR_API_KEY', - value: state.linearApiKey, - name: 'Linear API Key', - }); - } - } else { - if (state.jiraEmail) { - await trpcClient.projects.credentials.set.mutate({ - projectId, - envVarKey: 'JIRA_EMAIL', - value: state.jiraEmail, - name: 'JIRA Email', - }); - } - if (state.jiraApiToken) { + for (const cred of providerCfg.credentials) { + const value = state[cred.stateField] as string; + if (value) { await trpcClient.projects.credentials.set.mutate({ projectId, - envVarKey: 'JIRA_API_TOKEN', - value: state.jiraApiToken, - name: 'JIRA API Token', + envVarKey: cred.envVarKey, + value, + name: cred.label, }); } } @@ -837,71 +877,20 @@ export function useLinearLabelCreation( dispatch: React.Dispatch, projectId: string, ) { - const createLabelMutation = useMutation({ - mutationFn: (vars: { name: string; color?: string; slot: string }) => { - if (!state.linearTeamId) { - throw new Error('Team must be selected before creating a label'); - } - const useStored = shouldUseStoredCredentials(state); - if (!useStored && !state.linearApiKey) { - throw new Error('Missing credentials — enter them on the credentials step'); - } - // Plan 010/1: routes through generic pm.discovery.createLabel. - return trpcClient.pm.discovery.createLabel.mutate({ - providerId: 'linear', - containerId: state.linearTeamId, - name: vars.name, - color: vars.color, - ...(useStored ? { projectId } : { credentials: { api_key: state.linearApiKey } }), - }); - }, - onSuccess: (label, vars) => { - dispatch({ type: 'ADD_LINEAR_TEAM_LABEL', label }); - dispatch({ type: 'SET_LINEAR_LABEL', key: vars.slot, value: label.id }); - }, - onError: (error) => { - console.error('Failed to create Linear label:', error); - alert(`Failed to create label: ${error instanceof Error ? error.message : String(error)}`); - }, - }); - - const createMissingLabelsMutation = useMutation({ - mutationFn: async (labelsToCreate: Array<{ slot: string; name: string; color?: string }>) => { - if (!state.linearTeamId) { - throw new Error('Team must be selected before creating labels'); - } - const useStored = shouldUseStoredCredentials(state); - if (!useStored && !state.linearApiKey) { - throw new Error('Missing credentials — enter them on the credentials step'); - } - const authArg = useStored ? { projectId } : { credentials: { api_key: state.linearApiKey } }; - return runPerLabelCreations({ - labelsToCreate, - providerId: 'linear', - containerId: state.linearTeamId, - authArg, - }); - }, - onSuccess: (result, labelsToCreate) => { - for (const label of result.successes) { - const slot = labelsToCreate.find((l) => l.name === label.name)?.slot; - if (slot) { - dispatch({ type: 'ADD_LINEAR_TEAM_LABEL', label }); - dispatch({ type: 'SET_LINEAR_LABEL', key: slot, value: label.id }); - } - } - if (result.errors.length > 0) { - const errorMsg = result.errors.map((e) => `${e.name}: ${e.error}`).join('\n'); - alert( - `Some labels failed to create:\n${errorMsg}\n\n${result.successes.length} label(s) created successfully.`, - ); - } - }, - onError: (error) => { - console.error('Failed to create Linear labels:', error); - alert(`Failed to create labels: ${error instanceof Error ? error.message : String(error)}`); - }, - }); - - return { createLabelMutation, createMissingLabelsMutation }; + return useProviderLabelCreation( + { + providerId: 'linear', + getContainerId: (s) => s.linearTeamId, + containerError: 'Team must be selected before creating a label', + addLabel: (label) => ({ type: 'ADD_LINEAR_TEAM_LABEL', label }), + setLabelMapping: (slot, id) => ({ type: 'SET_LINEAR_LABEL', key: slot, value: id }), + }, + state, + dispatch, + projectId, + ); } + +export type { CustomFieldCreationConfig, LabelCreationConfig }; +// Re-export the generic utilities for direct use in tests / advanced consumers +export { useProviderCustomFieldCreation, useProviderLabelCreation }; diff --git a/web/src/components/projects/pm-wizard-state.ts b/web/src/components/projects/pm-wizard-state.ts index 0b8e931f..af21a618 100644 --- a/web/src/components/projects/pm-wizard-state.ts +++ b/web/src/components/projects/pm-wizard-state.ts @@ -510,6 +510,34 @@ export function shouldUseStoredCredentials(state: WizardState): boolean { return !state.linearApiKey; } +/** + * Build the Trello integration config payload from wizard state. + * Pure function so it can be unit-tested without the React runtime. + */ +export function buildTrelloIntegrationConfig(state: WizardState): Record { + return { + boardId: state.trelloBoardId, + lists: state.trelloListMappings, + labels: state.trelloLabelMappings, + ...(state.trelloCostFieldId ? { customFields: { cost: state.trelloCostFieldId } } : {}), + }; +} + +/** + * Build the JIRA integration config payload from wizard state. + * Pure function so it can be unit-tested without the React runtime. + */ +export function buildJiraIntegrationConfig(state: WizardState): Record { + return { + projectKey: state.jiraProjectKey, + baseUrl: state.jiraBaseUrl, + statuses: state.jiraStatusMappings, + ...(Object.keys(state.jiraIssueTypes).length > 0 ? { issueTypes: state.jiraIssueTypes } : {}), + ...(Object.keys(state.jiraLabels).length > 0 ? { labels: state.jiraLabels } : {}), + ...(state.jiraCostFieldId ? { customFields: { cost: state.jiraCostFieldId } } : {}), + }; +} + /** * Build the Linear integration config payload from wizard state. * Pure function so it can be unit-tested without the React runtime.