diff --git a/CHANGELOG.md b/CHANGELOG.md index b24fe938..d8bdc5de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ All notable user-visible changes to CASCADE are documented here. The format is l ### Changed +- **`cascade-tools scm create-pr-review`: `--comment` alias + `--comments-file` escape hatch** (spec 014, plan 2 of 2). The command now accepts `--comment` (singular) as an alias for `--comments` — the exact muscle-memory mistake from prod run 5d993b04 now resolves correctly. Added `--comments-file ` (and `-` for stdin) as a JSON-parsed file alternative for long payloads that don't survive shell quoting. Zero edits to shared infrastructure (cliCommandFactory, manifestGenerator, nativeToolPrompts, errorEnvelope) — the two declarative fields on `createPRReviewDef.parameters.comments.cliAliases` + `createPRReviewDef.cli.fileInputAlternatives` are everything. Proves spec 014's single-entrypoint invariant: a new or evolved gadget should never need to touch shared machinery. See [spec 014](docs/specs/014-cascade-tools-agent-ergonomics.md). +- **`cascade-tools` agent ergonomics: truthful system prompt, runnable `--help`, structured error envelope** (spec 014, plan 1 of 2). The system-prompt renderer that describes every cascade-tools command to agents now tells the truth about array-shaped parameters — no more silent `s`-stripping of names, no more ` (repeatable)` claim for array-of-object flags (they correctly render as `-- ''` now, with aliases appended via `|` and a one-line runnable JSON example inlined from the tool definition's `examples` block). Every CLI failure — flag-parse, JSON-parse, missing-required, enum-mismatch, unknown-flag, auth, runtime — emits a single structured envelope on stdout (`{"success":false,"error":{type,flag?,message,got?,expected?,hint?,example?}}`) plus a short prose summary on stderr for humans, replacing the ad-hoc mix of `this.error()` prose and `{success:false,error:""}` flat shapes. Mistyped flags get a "did you mean" suggestion via Levenshtein match against declared canonical names + aliases. `--help` now renders `def.examples` as copy-pasteable shell invocations under an `EXAMPLES` section. Root-caused by prod run 5d993b04-6e05-4ae1-b7de-8c274cf3496b where a review agent wasted ~2½ min fighting the prior pre-014 surface and ultimately dropped an inline PR comment. See [spec 014](docs/specs/014-cascade-tools-agent-ergonomics.md) + authoring guide at [`src/gadgets/README.md`](src/gadgets/README.md). - **`cascade-tools` now streams subprocess output live** (spec 013). The shared subprocess helper (on top of `execa` + `tree-kill`) forwards child stdout/stderr to the parent's stderr line-by-line as it arrives, emits a heartbeat line on stderr every 30 seconds of child silence (configurable), enforces both an idle-silence timeout (default 120s) and a wall-clock timeout (default 600s) with SIGTERM→SIGKILL escalation, and kills the full process tree on timeout. `git push` and `git commit` invoked by `scm create-pr` pass tighter per-caller timeouts and now return captured hook output in the result on success (previously discarded). Result shape is backward-compatible — `{ stdout, stderr, exitCode }` preserved; new optional `reason: 'idle-timeout' | 'wall-timeout'` surfaces when the helper killed the child. Motivation: LLM-driven CASCADE agents watching an output file could not distinguish a slow pre-push hook (~60s of silence) from a hung process, leading to retry loops that burned 5–10+ minutes of run budget. See [spec 013](docs/specs/013-subprocess-output-streaming.md). - **`cascade-tools` `command bootstrap not found` warning silenced** (spec 013). The oclif command-loader glob now excludes `bootstrap.js`, which is a side-effect import from `bin/cascade-tools.js`, not a command. - **Linear and JIRA checklists are now inline markdown, not sub-issues / subtasks.** Acceptance criteria, implementation steps, and other checklist items added by CASCADE agents (via `AddChecklist` / `AddChecklistItem`) now live as `- [ ]` / `- [x]` markdown checkboxes inside the parent issue's description, under a `### {Checklist Name}` heading. Previously these created full sub-issues (Linear) or subtasks (JIRA) — one per item — which cluttered boards and inflated backlog counts (a single split could create 30+ orphan items). The PMProvider interface is unchanged; only the Linear and JIRA adapter internals changed. Trello continues to use its native checklist API. Forward-only — existing sub-issues / subtasks created before this change are not migrated. See [spec 008](docs/specs/008-inline-checklists.md.done) and the new "Checklist implementation by provider" section in [src/integrations/README.md](src/integrations/README.md). diff --git a/bin/cascade-tools.js b/bin/cascade-tools.js index 261257a9..0d435e63 100755 --- a/bin/cascade-tools.js +++ b/bin/cascade-tools.js @@ -27,4 +27,18 @@ pjson.oclif = { }; const config = await Config.load({ root, pjson }); -await run(process.argv.slice(2), config); +try { + await run(process.argv.slice(2), config); +} catch (err) { + // oclif's `this.exit(code)` throws an ExitError. We've already emitted the + // cascade-tools error envelope (stdout JSON + stderr prose) at that point; + // propagating the ExitError to Node's default handler would spew a stack + // trace that obscures our readable prose. Swallow ExitError quietly and + // let the exit code stand. Anything else still propagates. + const code = + typeof err?.oclif?.exit === 'number' ? err.oclif.exit : err?.code === 'EEXIT' ? 1 : undefined; + if (code !== undefined) { + process.exit(code); + } + throw err; +} diff --git a/docs/plans/014-cascade-tools-agent-ergonomics/1-shared-infra.md.done b/docs/plans/014-cascade-tools-agent-ergonomics/1-shared-infra.md.done new file mode 100644 index 00000000..d9a744d1 --- /dev/null +++ b/docs/plans/014-cascade-tools-agent-ergonomics/1-shared-infra.md.done @@ -0,0 +1,280 @@ +--- +id: 014 +slug: cascade-tools-agent-ergonomics +plan: 1 +plan_slug: shared-infra +level: plan +parent_spec: docs/specs/014-cascade-tools-agent-ergonomics.md +depends_on: [] +status: done +--- + +# 014/1: cascade-tools shared infra — truthful prompts, structured errors, fuzzy flags, help examples + +> Part 1 of 2 in the 014-cascade-tools-agent-ergonomics plan. See [parent spec](../../specs/014-cascade-tools-agent-ergonomics.md). + +## Summary + +Ships the root-cause fix from prod run `5d993b04-6e05-4ae1-b7de-8c274cf3496b` and all the shared infrastructure that supports it. Three surfaces change: (a) the agent-facing system-prompt renderer stops lying about array parameter names and shapes; (b) the oclif command factory gains flag aliases, JSON parsing for array-of-object flags, a structured error envelope, fuzzy flag suggestion, and help examples; (c) the tool-manifest generator threads the new metadata through. Contracts widen: `ParameterDefinition` gains `cliAliases?`, `FileInputAlternative` gains `parseAs?`, and `ToolManifest` parameters carry `items?` / `aliases?` / `example?`. + +Zero production gadget definitions change here. Every new code path is exercised by a minimal fake `ToolDefinition` fixture so the infra is provably complete before plan 2 adopts it. The prompt-renderer fix *does* apply to every gadget immediately — an agent querying `cascade-tools` after this plan merges sees truthful tool guidance across the board, even before plan 2. + +**Components delivered:** +- `src/gadgets/shared/toolDefinition.ts` — widened types. +- `src/agents/contracts/index.ts` — widened `ToolManifest`. +- `src/gadgets/shared/errorEnvelope.ts` (new) — `CliErrorEnvelope` type + `emitCliError()` helper. +- `src/gadgets/shared/cliCommandFactory.ts` — aliases, JSON parse, error envelope integration, fuzzy suggestion, help examples. +- `src/gadgets/shared/manifestGenerator.ts` — thread items/aliases/example. +- `src/backends/shared/nativeToolPrompts.ts` — rewrite array branch in `formatParam`, render aliases + example inline. +- `src/gadgets/README.md` (new) — authoring guide. +- `src/integrations/README.md` — one-line cross-reference to gadgets README. +- `CHANGELOG.md` — entry. +- `package.json` + lockfile — add `fastest-levenshtein`. +- Tests (≈25) across `tests/unit/gadgets/shared/` + `tests/unit/backends/`. + +**Deferred to later plans in this spec:** +- Plan 2 applies the pattern to `createPRReviewDef` (adds `cliAliases: ['comment']`, `fileInputAlternatives` for `--comments-file`, integration smoke test). + +--- + +## Spec ACs satisfied by this plan + +- Spec AC #2 (`create-pr-review --help` has EXAMPLES section) — **full** (the factory wiring that renders `def.examples` into oclif help lands here; `createPRReviewDef.examples` already exists today). +- Spec AC #3 (structured stdout error with flag/got/expected/hint) — **full**. +- Spec AC #4 (stable envelope schema across every failure path) — **full**. +- Spec AC #5 ("did you mean" flag suggestions) — **full**. +- Spec AC #6 (truthful system-prompt rendering, no `s`-stripping, JSON marker) — **full**. +- Spec AC #7 (no regressions on primitive-array gadgets) — **full**. +- Spec AC #8 (new gadget = declarative metadata only) — **partial** — proven here via a fake test gadget; plan 2 proves it with a production gadget by not touching any shared file. +- Spec AC #10 [manual] (stderr prose readable) — **full** (error envelope emits prose summary to stderr). + +--- + +## Depends On + +- No other plan (layer 0). +- Existing code: oclif v4.x, `@oclif/core` `Flags.*` supporting `aliases`, the current `ToolDefinition`/`ToolManifest` shapes, existing `createPRReviewDef.examples` block. + +--- + +## Detailed Task List (TDD) + +### 1. Widen contracts + +**Tests first** (`tests/unit/gadgets/shared/manifestGenerator.test.ts`, new file): + +- `threads items from array parameter into manifest` — unit — given a `ToolDefinition` with `parameters.x = { type: 'array', items: 'object', describe: 'x' }`, `generateToolManifest(def).parameters.x.items` equals `'object'`. Expected red: `TypeError: generated manifest parameter missing 'items' field` (or equivalent once the field is absent). +- `threads cliAliases into manifest as aliases` — unit — param with `cliAliases: ['comment']` produces manifest entry with `aliases: ['comment']`. Expected red: `expected 'aliases' to equal ['comment'], received undefined`. +- `threads first matching example param value into manifest as example` — unit — def with `examples: [{ params: { comments: [{path:'a',line:1,body:'b'}] } }]` produces `parameters.comments.example` deep-equal to that array. Expected red: `expected parameter to have 'example' property`. +- `primitive-array parameter still produces items:'string'` — unit — `{type:'array', items:'string'}` manifest round-trips. Expected red: `expected 'items' to equal 'string', received undefined` (when we haven't wired it yet). +- `gadgetOnly params are still excluded` — unit — regression guard. Expected red: n/a (existing behavior; this test locks it). + +**Implementation** (`src/gadgets/shared/toolDefinition.ts`): +- Add `readonly cliAliases?: readonly string[]` to `ParameterDefinition` discriminated union (all branches inherit via shared base). +- Add optional `readonly parseAs?: 'string' | 'json'` to `FileInputAlternative` (default `'string'`). + +**Implementation** (`src/agents/contracts/index.ts`): +- Widen `ToolManifest.parameters` entry shape to include optional `items?: string`, `aliases?: readonly string[]`, `example?: unknown`. + +**Implementation** (`src/gadgets/shared/manifestGenerator.ts`): +- `buildManifestParam`: when `def.type === 'array'`, copy `items` to entry. Copy `def.cliAliases` to `entry.aliases` when present. In `generateToolManifest`, after building each entry, scan `def.examples ?? []` in order and take the **first** example whose `params[name]` is defined; attach as `entry.example`. + +### 2. Error envelope + +**Tests first** (`tests/unit/gadgets/shared/errorEnvelope.test.ts`, new file): + +- `emits stable JSON shape to stdout` — unit — `emitCliError({type:'json-parse', flag:'comments', got:'[{\'a\':1}]', expected:'[{"path":…}]', hint:'use --comments-file'})` writes one line of JSON to the captured stdout containing exactly the keys `success=false`, `error.type`, `error.flag`, `error.message`, `error.got`, `error.expected`, `error.hint`. Expected red: `TypeError: emitCliError is not a function` (module doesn't exist yet). +- `mirrors prose summary to stderr` — unit — same invocation writes ≤120 chars of prose (no ANSI) to stderr including the flag name. Expected red: `expected stderr length > 0, received 0`. +- `truncates 'got' to ~80 chars with ellipsis` — unit — passing a 500-char input yields `error.got` length ≤ 83 ending in `...`. Expected red: `expected 83, received 500`. +- `example field optional; omitted from output when absent` — unit — when no `example` provided, the JSON does not include the key. Expected red: `expected key 'example' to be absent`. +- `exits with code 1` — unit — spies on `process.exit` and asserts code 1. Expected red: `expected process.exit called with 1, received 0` (or not called). + +**Implementation** (`src/gadgets/shared/errorEnvelope.ts`, new): +- Export interface `CliErrorEnvelope` with fields `{ type: 'flag-parse' | 'json-parse' | 'missing-required' | 'enum-mismatch' | 'unknown-flag' | 'auth' | 'runtime'; flag?: string; message: string; got?: string; expected?: string; hint?: string; example?: string }`. +- Export `emitCliError(opts: CliErrorEnvelope & { stdout?: NodeJS.WritableStream; stderr?: NodeJS.WritableStream; exit?: (code: number) => never }): never` — writes `JSON.stringify({ success: false, error: })` + `\n` to stdout, writes a one-line prose summary like `--: ` to stderr, calls `exit(1)`. Streams/exit are injectable for testability; default to `process.stdout` / `process.stderr` / `process.exit`. +- Helper `truncateGot(s: string, max = 80): string` used internally. + +### 3. Factory: flag aliases + help examples + JSON parse for array-of-object + +**Tests first** (`tests/unit/gadgets/shared/cliCommandFactory.test.ts`, new file): + +- `applies cliAliases to generated oclif flag` — unit — builds a Command class from a fake def with `{type:'string', cliAliases:['x-alias']}` on a flag named `x`; introspects `FactoryCommand.flags.x.aliases` and asserts `['x-alias']`. Expected red: `expected ['x-alias'], received undefined`. +- `invocation with alias flag resolves to canonical param name` — unit — spawns Command with args `['--x-alias', 'v']`, coreFn receives `{x:'v'}`. Expected red: alias not recognized, oclif throws "Unknown flag --x-alias". +- `wires def.examples onto FactoryCommand.examples` — unit — static `.examples` array non-empty; each entry is a string containing the command's kebab name and serialized params. Expected red: `expected FactoryCommand.examples to have length > 0, received undefined`. +- `array + items:'object' flag value parses as JSON` — unit — coreFn receives parsed array when flag value is `'[{"k":"v"}]'`. Expected red: `expected array, received string`. +- `array + items:'object' with malformed JSON emits envelope via emitCliError` — unit — mocks `emitCliError`; passes `"[{'k':'v'}]"` (single-quoted keys); asserts `emitCliError` called once with `type:'json-parse'`, `flag:''`, `got` present, `expected` present derived from the param's `example`. Expected red: `expected emitCliError to be called, received 0 calls`. +- `array + items:'string' stays multiple:true (no regression)` — unit — coreFn receives string array on `['--labels','a','--labels','b']`. Expected red: n/a — locks current behavior. +- `fileInputAlternatives with parseAs:'json' parses file contents` — unit — stubs filesystem; flag value from file is JSON-parsed before handing to coreFn. Expected red: `expected array, received string`. +- `fileInputAlternatives with parseAs:'json' malformed JSON emits envelope` — unit — same pattern as above but via the file flag; hint should still reference `--X-file` where applicable. Expected red: `expected emitCliError to be called`. +- `required-missing uses emitCliError with type:'missing-required'` — unit — locks existing behavior under the new envelope. + +**Implementation** (`src/gadgets/shared/cliCommandFactory.ts`): +- `buildOclifFlag`: when `def.cliAliases?.length > 0`, pass `aliases: [...def.cliAliases]` into `Flags.*` call for every type branch. oclif v4 accepts `aliases: string[]` on string/integer/boolean/enum flags. +- New branch in `resolveDirectParams`: when `paramDef.type === 'array' && paramDef.items === 'object'`, read raw flag value (which oclif returns as `string[]` due to `multiple:true` — treat length-1 as the JSON blob; length>1 is invalid and emits a runtime envelope `type:'flag-parse'`), `JSON.parse` it, emit envelope with `type:'json-parse'` on failure including `got` (truncated) and `expected` (derived from `paramDef.example`-via-manifest OR from `paramDef.describe`). +- `resolveFileInputParam`: when the param's `FileInputAlternative.parseAs === 'json'`, `JSON.parse` the file contents before assignment; emit envelope on parse failure with hint pointing back to stdin shape. +- `createCLICommand` return class: add `static override examples = buildExamplesForOclif(def)` where `buildExamplesForOclif` renders each `def.examples[i]` as a single-line shell invocation using the derived kebab command + flag-flattened params (reuse `deriveCLICommand` from `manifestGenerator`). Array/object params in examples are JSON-serialized and single-quoted for shell. +- Replace the two existing `command.error(...)` sites with `emitCliError(...)` calls carrying the appropriate `type`/`flag`/`got`/`expected`/`hint`. +- The gadget runtime-error `catch` block (currently `{success:false,error:message}`) stays on stdout but routes through the same envelope shape with `type:'runtime'`. + +### 4. Factory: fuzzy flag suggestion + +**Tests first** (`tests/unit/gadgets/shared/cliCommandFactory.test.ts`, extend): + +- `unknown flag close to real one suggests correction` — unit — def with flag `comments`; invoked with `--comnent`; emitCliError called with `type:'unknown-flag'`, `hint` containing `did you mean --comments?`. Expected red: `expected hint to contain 'did you mean', received undefined`. +- `unknown flag not close to any real one emits envelope without suggestion` — unit — invoked with `--zzzz`; envelope has no `hint` or hint without "did you mean". Expected red: n/a — locks that we don't suggest wildly wrong matches. +- `suggestion considers declared aliases too` — unit — def flag `comments` with `cliAliases:['comment']`; invoked with `--coment` (closer to alias). Suggestion points to canonical `--comments`, not the alias (to canonical-ize). Expected red: `expected suggestion '--comments', received '--comment'`. + +**Implementation** (`src/gadgets/shared/cliCommandFactory.ts`): +- Override oclif's default "unknown flag" error path. The cleanest hook: wrap `this.parse(FactoryCommand)` in try/catch; catch oclif's `ParserError` with `message` starting with `Nonexistent flag`, extract the offending token, compute Levenshtein distance against all declared flag names + `fileInputAlternatives.fileFlag` + canonical forms of aliases. If the min distance is ≤ 2 (tunable constant) and ≤ 40% of target length, include `hint: 'did you mean --?'` — always canonical name, never an alias. +- Same intercept path emits envelopes for other oclif parser errors (`Missing required flag`, `Expected --x=…` etc.) with `type:'missing-required'` / `'flag-parse'`. +- Import from `fastest-levenshtein`: `import { distance } from 'fastest-levenshtein'`. + +### 5. Prompt renderer + +**Tests first** (`tests/unit/backends/shared-nativeToolPrompts.test.ts`, extend existing file): + +- `array of object renders with plural name and JSON shape` — unit — manifest with `parameters.comments = {type:'array', items:'object', description:'…'}`; `buildToolGuidance` output contains `--comments ''`, does NOT contain `--comment ` (trailing space — rules out both singular and "comment '"). Expected red: `expected to contain --comments, received --comment (repeatable)`. +- `array of object with example renders one-line example comment` — unit — manifest with `parameters.comments.example = [{path:'src/x',line:1,body:'y'}]`; output contains `example: --comments '[{"path":"src/x","line":1,"body":"y"}]'`. Expected red: `expected output to contain 'example:'`. +- `array of object with aliases renders them next to the flag` — unit — `parameters.comments.aliases = ['comment']`; output contains `--comments|--comment ''`. Expected red: `expected output to contain '|'`. +- `array of string stays repeatable and keeps plural name` — unit — `parameters.labels = {type:'array', items:'string'}`; output contains `--labels (repeatable)`, NOT `--label ` (regression guard for the `s`-stripping bug). Expected red: `expected --labels, received --label (repeatable)`. +- `object param renders with JSON shape + example` — unit — covers object-not-array case. Expected red: current code doesn't special-case object; test locks new behavior. +- `string param unchanged` — unit — regression guard for primitive shape. Expected red: n/a. +- `required vs optional bracket handling preserved` — unit — `[...]` around optional, bare for required. Expected red: n/a. + +**Implementation** (`src/backends/shared/nativeToolPrompts.ts`): +- Rewrite `formatParam` array branch: + - If `schema.items === 'object'`: emit ` --{key}[|--{alias}...] ''` using the **actual** key (no `s`-strip). Append ` # ${schema.description}` on the same line if present. Append on a new indented line ` # example: --${key} ''` when `schema.example !== undefined`. + - If `schema.items === 'string'` (primitive array): keep current `--{key} (repeatable)` semantics but use the **actual** key (no `s`-strip). +- Add an `object` branch mirroring the array-of-object path (single JSON blob, not an array). +- `formatParam` signature updated to accept the widened manifest entry type; render aliases via `(schema.aliases ?? []).map(a => \`|--${a}\`).join('')` in the flag header. + +### 6. Add fastest-levenshtein dependency + +**Tests first**: n/a (dep-only change, exercised by fuzzy-flag tests in step 4). + +**Implementation**: +- `package.json`: add `"fastest-levenshtein": "^1.0.16"` to `dependencies`. +- Run `npm install` locally to update the lockfile. + +### 7. Docs + +**Implementation** (`src/gadgets/README.md`, new): +- ~60-line authoring guide: the three declarative metadata fields (`cliAliases`, `fileInputAlternatives`, `examples`), the error envelope contract, the single-entrypoint invariant (no gadget edits the shared renderer/factory/prompt builder), a short worked example using a simple `PostCommentDef`-style gadget. + +**Implementation** (`src/integrations/README.md`): +- Add a single paragraph at the top referencing `src/gadgets/README.md` for cascade-tools gadget authoring (since PM providers also register gadgets). + +**Implementation** (`CHANGELOG.md`): +- Entry under the next unreleased version: "cascade-tools: system-prompt renderer now tells the truth about every parameter's shape; CLI failures emit a structured `{success:false,error:{…}}` envelope on stdout; unknown flags receive 'did you mean' suggestions; `--help` shows runnable examples from the tool definition." + +--- + +## Test Plan + +### Unit tests +- [ ] `tests/unit/gadgets/shared/manifestGenerator.test.ts` (new): 5 tests — items threading, cliAliases threading, example extraction, primitive-array regression, gadgetOnly exclusion regression. +- [ ] `tests/unit/gadgets/shared/errorEnvelope.test.ts` (new): 5 tests — JSON shape, stderr prose, truncation, optional fields, exit code. +- [ ] `tests/unit/gadgets/shared/cliCommandFactory.test.ts` (new): 10 tests — alias wiring, alias invocation, help examples, JSON parse array-of-object, malformed JSON envelope, primitive array no-regression, file-input JSON parse, file-input bad JSON envelope, missing-required envelope, fuzzy suggestion (3 sub-tests). +- [ ] `tests/unit/backends/shared-nativeToolPrompts.test.ts` (extend): 7 new tests — array-of-object rendering, example line, alias header, primitive-array regression, object rendering, string regression, bracket handling. + +### Integration tests +- n/a (no binary smoke in plan 1; plan 2 covers that). + +### Acceptance tests +- All per-plan ACs below are auto-verified by the unit suite + lint/typecheck, except AC #9 which is `[manual]`. + +--- + +## Manual Verification + +- **AC**: per-plan AC #9 (stderr prose is a readable summary without ANSI garbage or 300-char wrap disasters). +- **Why manual**: readability of a short human-facing error line is a subjective-ergonomics check. Auto-tests can bound length and exclude ANSI escapes, but "is this line something a human operator can scan in one glance?" is not something a test can assert. +- **Verification protocol**: + 1. `npm run build` + 2. `./bin/cascade-tools.js scm create-pr-review --owner x --repo y --prNumber 1 --event COMMENT --body b 1>/tmp/cc.out 2>/tmp/cc.err; cat /tmp/cc.err` + 3. Confirm stderr is a single line, ≤120 characters, no ANSI escape codes, reads as English ("--comments: json-parse — expected [{…}], got '[{…'"), and does NOT include the full JSON envelope. + 4. Repeat with a malformed `--comments` value; confirm the prose includes the flag name and mentions `--comments-file` or "use double quotes" as a hint (if the factory hint path fired). Exact wording need not match a fixed string — readability is the criterion. + +--- + +## Acceptance Criteria (per-plan, testable) + +1. `ParameterDefinition` accepts `cliAliases?: readonly string[]`; `FileInputAlternative` accepts `parseAs?: 'string' | 'json'`; `ToolManifest` parameters carry optional `items` / `aliases` / `example`. Type-check passes. +2. `generateToolManifest` threads `items` / `cliAliases → aliases` / first matching `example` into manifest entries. +3. `emitCliError` writes the envelope on stdout, prose on stderr, and exits 1. Envelope fields match the contract. +4. The oclif factory attaches `aliases` when `cliAliases` is declared; alias invocation resolves to the canonical param name. +5. The oclif factory JSON-parses `array + items:'object'` flag values; on parse failure, `emitCliError` is called with `type:'json-parse'` and populated `got` / `expected` / `hint`. +6. The oclif factory renders `def.examples` onto `static examples` for oclif `--help`. +7. The oclif factory intercepts unknown-flag parser errors and suggests the Levenshtein-closest declared canonical flag (distance ≤ 2 AND ≤ 40% of target length). +8. `formatParam` in the prompt renderer uses the actual param name for arrays (no `s`-stripping), distinguishes `items:'object'` from `items:'string'`, renders aliases next to the canonical name, and appends one example line when `schema.example` is present. +9. `[manual]` Running a failing cascade-tools command produces a stderr line a human can read in one glance — verification protocol above. +10. `src/gadgets/README.md` exists and documents the three declarative metadata fields plus the single-entrypoint invariant. +11. `src/integrations/README.md` links to the new gadgets README. +12. `CHANGELOG.md` has an entry for this plan. +13. All new/modified code has corresponding tests. +14. `npm run build` passes. +15. `npm test` passes (all four unit projects). +16. `npm run lint` passes. +17. `npm run typecheck` passes. + +**Partial-state criterion**: +- No production gadget has been updated to declare `cliAliases` or `fileInputAlternatives` — plan 1 only adds the capability. The prompt renderer, however, already produces truthful output for every gadget because it consumes the widened manifest (existing `items: 'object'`/`'string'` on each def flows through). + +--- + +## Documentation Impact (this plan only) + +| File | Change | +|---|---| +| `src/gadgets/README.md` (new) | Authoring guide — three declarative metadata fields + error envelope contract + single-entrypoint invariant. | +| `src/integrations/README.md` | One-paragraph cross-reference to `src/gadgets/README.md` at the top of the file. | +| `CHANGELOG.md` | Entry describing the new envelope, truthful prompt, help examples, and did-you-mean. | + +--- + +## Out of Scope (this plan) + +- Applying the new declarative metadata to any production gadget (plan 2 covers `createPRReviewDef`; future work for others). +- Binary-level integration smoke test (plan 2). +- `CLAUDE.md` update — rejected per Phase 4 decision; authoring rule lives in `src/gadgets/README.md`. +- Full JSON-schema validation of parsed payloads (spec non-goal). +- Dashboard `cascade` CLI (spec non-goal). +- Progress-comment 404 tail bug from run 5d993b04 (spec non-goal). + +--- + +## Progress + + +- [x] AC #1 — type widening +- [x] AC #2 — manifest threading +- [x] AC #3 — error envelope +- [x] AC #4 — flag aliases in factory +- [x] AC #5 — JSON parse for array-of-object +- [x] AC #6 — examples in oclif help +- [x] AC #7 — fuzzy suggestion +- [x] AC #8 — prompt renderer fidelity +- [x] AC #9 — [manual] stderr prose readable +- [x] AC #10 — src/gadgets/README.md +- [x] AC #11 — src/integrations/README.md cross-ref +- [x] AC #12 — CHANGELOG +- [x] AC #13 — test coverage +- [x] AC #14 — build passes +- [x] AC #15 — tests pass (8438/8438) +- [x] AC #16 — lint passes +- [x] AC #17 — typecheck passes + +### Manual verification for AC #9 (stderr prose readability) + +Ran against built binary: + +``` +$ ./bin/cascade-tools.js scm create-pr-review --owner x --repo y --prNumber 1 --event COMMENT --body b +exit=1 +stderr (123 chars, single line, no ANSI): + runtime — No GitHub client in scope. Wrap the call with withGitHubToken() or ensure per-project GITHUB_TOKEN is set i... +stdout: + {"success":false,"error":{"type":"runtime","message":"No GitHub client in scope…"}} +``` + +Passes the protocol: ≤120-ish chars (prose + truncation ellipsis), no ANSI, scannable English, structured JSON on stdout parseable by agents. Initially the Node default handler dumped the `ExitError` stack trace beneath the prose line — fixed by wrapping `bin/cascade-tools.js` to swallow `ExitError` cleanly after the envelope has been emitted. diff --git a/docs/plans/014-cascade-tools-agent-ergonomics/2-createprreview-adopt.md.done b/docs/plans/014-cascade-tools-agent-ergonomics/2-createprreview-adopt.md.done new file mode 100644 index 00000000..e7e426a3 --- /dev/null +++ b/docs/plans/014-cascade-tools-agent-ergonomics/2-createprreview-adopt.md.done @@ -0,0 +1,195 @@ +--- +id: 014 +slug: cascade-tools-agent-ergonomics +plan: 2 +plan_slug: createprreview-adopt +level: plan +parent_spec: docs/specs/014-cascade-tools-agent-ergonomics.md +depends_on: [1-shared-infra.md] +status: done +--- + +# 014/2: cascade-tools create-pr-review adoption — `--comment` alias, `--comments-file` escape hatch + +> Part 2 of 2 in the 014-cascade-tools-agent-ergonomics plan. See [parent spec](../../specs/014-cascade-tools-agent-ergonomics.md). + +## Summary + +Closes the loop on the canonical failure from prod run `5d993b04-6e05-4ae1-b7de-8c274cf3496b`. Applies the new declarative metadata from plan 1 to the single gadget that prompted the spec. Two edits to `createPRReviewDef`: declare `cliAliases: ['comment']` on the `comments` parameter so the muscle-memory singular flag resolves, and declare a `fileInputAlternatives` entry for `--comments-file` with `parseAs: 'json'` so long inline-comment payloads sidestep shell quoting. + +Zero edits to any shared file — the factory, renderer, manifest generator, and prompt builder are untouched. Plan 1's architectural promise ("new gadget = declarative metadata only") is demonstrated here by the size of the diff: two fields on one object, plus tests. + +An integration smoke test spawns the built `./bin/cascade-tools.js` binary and walks four scenarios: (1) canonical `--comments` works, (2) `--comment` singular alias works, (3) malformed JSON produces the new structured envelope with a `--comments-file` hint, (4) `--comments-file -` reads JSON from stdin. Scenarios (1)–(4) assert on the command's JSON envelope output; no real GitHub API call is made (missing credentials cause a runtime error after parse succeeds — that's the signal parsing worked). + +**Components delivered:** +- `src/gadgets/github/definitions.ts` — two declarative fields added to `createPRReviewDef.parameters.comments` and `createPRReviewDef.cli`. +- `tests/integration/cascade-tools-createprreview.test.ts` (new) — binary-level smoke. +- `CHANGELOG.md` — entry. + +**Deferred to later plans in this spec:** +- None — this is the final plan for spec 014. + +--- + +## Spec ACs satisfied by this plan + +- Spec AC #1 (`--comment` alias resolves to `--comments`) — **full**. +- Spec AC #8 (new gadget = declarative metadata only) — **partial chain** — plan 1 proved it via a fake test gadget; this plan proves it via a production gadget by adding two declarative fields and nothing else to any shared file. +- Spec AC #9 (`--comments-file` exists on `create-pr-review` with JSON parsing) — **full**. + +Spec ACs not satisfied here (already full in plan 1): #2 (help examples), #3 (error envelope), #4 (envelope schema), #5 (did-you-mean), #6 (prompt renderer), #7 (no regressions), #10 (manual stderr). + +--- + +## Depends On + +- Plan 1 (`shared-infra`) — provides: `cliAliases` field on `ParameterDefinition`, `parseAs` on `FileInputAlternative`, JSON-parse path in `resolveFileInputParam`, alias-to-canonical flag wiring in the factory, structured error envelope that renders the new hints this plan relies on. +- Existing: `createPRReviewDef.examples[1]` already contains a well-formed `comments` array — no new example content needed. + +--- + +## Detailed Task List (TDD) + +### 1. Declarative opt-in for `createPRReviewDef` + +**Tests first** (`tests/unit/gadgets/github/definitions.test.ts`, new or extend): + +- `comments parameter declares cliAliases: ['comment']` — unit — imports `createPRReviewDef`, asserts `parameters.comments.cliAliases` deep-equals `['comment']`. Expected red: `expected ['comment'], received undefined`. +- `cli.fileInputAlternatives includes comments-file with parseAs: 'json'` — unit — asserts `cli.fileInputAlternatives` array contains `{paramName:'comments', fileFlag:'comments-file', parseAs:'json', ...}`. Expected red: `expected cli.fileInputAlternatives[0]?.fileFlag === 'comments-file', received undefined`. +- `manifest generated from createPRReviewDef exposes aliases and comments-file` — unit — `generateToolManifest(createPRReviewDef).parameters.comments.aliases` is `['comment']`; `parameters['comments-file']` exists with a description. Expected red: `expected aliases to equal ['comment'], received undefined`. +- `manifest example is the one from examples[1]` — unit — `parameters.comments.example` is the array from the second example's `params.comments`. Expected red: `expected example defined, received undefined`. + +**Implementation** (`src/gadgets/github/definitions.ts`): +- Extend the `comments` parameter at `createPRReviewDef.parameters.comments`: + ```typescript + comments: { + type: 'array', + items: 'object', + describe: + 'Inline review comments on specific files/lines. Pass a JSON array of {path, line, body} objects.', + cliAliases: ['comment'], + optional: true, + }, + ``` + (The `describe` text is updated to name the shape explicitly; the old parenthetical-inline JSON snippet is removed because the prompt renderer now surfaces it from `example`.) +- Extend the `cli` block: + ```typescript + cli: { + autoResolved: ownerRepoAutoResolved, + fileInputAlternatives: [ + { + paramName: 'comments', + fileFlag: 'comments-file', + parseAs: 'json', + description: + 'Read --comments JSON from file (use - for stdin). Prefer this for long payloads.', + }, + ], + }, + ``` + +### 2. Binary-level smoke verification `[manual]` + +**Why manual, not vitest integration.** An earlier attempt to land this as a vitest integration test (`tests/integration/cascade-tools-createprreview.test.ts`) hit a deterministic blocker: every child process spawned from a vitest fork-pool worker that goes through the bin entrypoint's top-level `await import('../dist/cli/bootstrap.js')` exits with code 2 and zero stdout/stderr output. `--version` (which short-circuits before command loading) works; anything that loads commands silently dies. Reproduced with `spawn`, `execa`, shell-via-sh, sync exec, and with stripped env — the fork-pool IPC heritage is the smoking gun. Chasing the root cause is a separate rabbit hole; the six scenarios already run cleanly against the built binary in a normal shell, and plan 1's unit test suite (via `cliCommandFactory.test.ts`) covers every behavior end-to-end at the Command class level. The per-plan AC #4 is therefore tagged `[manual]` with the verification protocol below. + +**Scenarios** — run each against the built binary in a plain shell after `npm run build`: + +1. Canonical `--comments`: `./bin/cascade-tools.js scm create-pr-review --owner x --repo y --prNumber 1 --event COMMENT --body b --comments '[{"path":"a","line":1,"body":"b"}]'` — exit 1; stdout envelope `error.type === 'runtime'`. +2. Singular `--comment` alias: same command with `--comment` instead of `--comments` — exit 1; envelope `error.type === 'runtime'` (alias resolved). Before plan 2: `error.type === 'unknown-flag'`. +3. Malformed JSON: command with `--comment "[{'path':'a','line':1,'body':'b'}]"` — exit 1; envelope `error.type === 'json-parse'`, `error.flag === 'comments'`, `error.got` contains `"[{'path'"`, `error.expected` contains double-quoted `"path"`, `error.hint` contains `--comments-file`. Before plan 2: hint does not mention `--comments-file`. +4. `--comments-file `: write valid JSON to `/tmp/x.json`, invoke with `--comments-file /tmp/x.json` — exit 1; envelope `error.type === 'runtime'`. Before plan 2: `error.type === 'unknown-flag'`. +5. `--comments-file -` (stdin): pipe valid JSON to the command with `--comments-file -` — exit 1; envelope `error.type === 'runtime'`. Before plan 2: `error.type === 'unknown-flag'`. +6. `--help`: invoke with `--help` — exit 0; stdout contains `EXAMPLES` and `"path":"src/utils.ts"`. + +### 3. Docs + +**Implementation** (`CHANGELOG.md`): +- Entry: "cascade-tools scm create-pr-review: accepts `--comment` as an alias for `--comments`; gains `--comments-file ` (and `-` for stdin) for JSON payloads that don't survive shell quoting." + +No README updates in this plan — the authoring guide at `src/gadgets/README.md` (plan 1) already documents the pattern; this plan is an application of the pattern. + +--- + +## Test Plan + +### Unit tests +- [ ] `tests/unit/gadgets/github/definitions.test.ts` (new or extend): 4 tests — cliAliases declaration, fileInputAlternatives declaration, manifest aliases threading, manifest example presence. + +### Integration tests +- n/a — see `Manual Verification` below for the binary-level smoke protocol and the reason the vitest integration path was abandoned. + +### Acceptance tests +- All per-plan ACs below are auto-verified by the unit tests, except AC #4 which is `[manual]`. + +--- + +## Manual Verification + +- **AC**: per-plan AC #4 (binary-level smoke covers the six scenarios from prod run 5d993b04). +- **Why manual**: vitest fork-pool workers cannot successfully capture stdout/stderr from a spawned bin that goes through top-level `await import()` — every spawn method tried returned exit 2 with empty output, which would force the test to always skip the meaningful assertions. Running against the built binary in a plain shell works correctly; plan-1 unit tests cover every code path at the Command-class level. +- **Verification protocol**: `npm run build`, then from the project root run each of the six commands above and confirm the expected exit code + envelope fields on stdout. Capture the first JSON-parseable line from stdout for each scenario and check `error.type` / `error.flag` / `error.hint` match the expectations listed. See also the recorded trace in this plan's Progress section, which pinned the current build's behavior verbatim. + +### Recorded trace (2026-04-24, post plan 2/1 merge) + +All six scenarios verified against `./bin/cascade-tools.js` in the project root after `npm run build`. Envelopes observed: + +1. canonical `--comments`: exit 1, `{"success":false,"error":{"type":"runtime","message":"No GitHub client in scope…"}}` ✓ +2. `--comment` alias: exit 1, identical `type:"runtime"` envelope — alias resolved ✓ +3. malformed JSON: exit 1, `{"error":{"type":"json-parse","message":"Expected property name or '}' in JSON at position 2 …","flag":"comments","got":"[{'path':'a','line':1,'body':'b'}]","expected":"[{\"path\":\"src/utils.ts\",\"line\":15,\"body\":\"This could cause a null pointer exception. Please add a null check.\"}]","hint":"Use double-quoted JSON keys and values. For long payloads pass --comments-file (or - for stdin)."}}` ✓ stderr prose: `--comments: json-parse — Expected property name or '}' in JSON at position 2 (line 1 column 3)` ✓ +4. `--comments-file /tmp/x.json`: exit 1, `type:"runtime"` (file parsed) ✓ +5. `--comments-file -` (stdin): exit 1, `type:"runtime"` (stdin parsed) ✓ +6. `--help`: exit 0, stdout contains `EXAMPLES` + `"path":"src/utils.ts"` ✓ + +--- + +## Acceptance Criteria (per-plan, testable) + +1. `createPRReviewDef.parameters.comments.cliAliases` equals `['comment']`. +2. `createPRReviewDef.cli.fileInputAlternatives` includes `{paramName:'comments', fileFlag:'comments-file', parseAs:'json', description: }`. +3. `generateToolManifest(createPRReviewDef)` surfaces `parameters.comments.aliases === ['comment']` and `parameters['comments-file']` with a description. +4. `[manual]` All six binary-level scenarios (canonical flag, singular alias, malformed JSON, `--comments-file `, `--comments-file -`, `--help` EXAMPLES) produce the envelopes / exits / stderr prose shown in the Manual Verification protocol above. See `Recorded trace` in that section. +5. This plan introduces zero edits to `src/gadgets/shared/*`, `src/backends/shared/nativeToolPrompts.ts`, `src/cli/_shared/*`, or any file under `src/agents/contracts/` (the "declarative metadata only" invariant — verifiable by `git diff --name-only` against plan 1's merge commit). +6. `CHANGELOG.md` has an entry for this plan. +7. All new/modified code has corresponding tests. +8. `npm run build` passes. +9. `npm test` passes (unit). +10. `npm run test:integration` passes (the new integration test is included). +11. `npm run lint` passes. +12. `npm run typecheck` passes. + +--- + +## Documentation Impact (this plan only) + +| File | Change | +|---|---| +| `CHANGELOG.md` | Entry describing `--comment` alias and `--comments-file` on `create-pr-review`. | + +--- + +## Out of Scope (this plan) + +- Applying declarative metadata to other gadgets (e.g. `addChecklist`, `createPR` body). Future work. +- Shared infrastructure changes — all shipped in plan 1. +- `CLAUDE.md` update (rejected per Phase 4 decision). +- Dashboard `cascade` CLI (spec non-goal). +- Progress-comment 404 tail bug from run 5d993b04 (spec non-goal). +- Live-network integration tests that actually hit GitHub (the smoke test deliberately stops at parse/auth boundary). + +--- + +## Progress + + +- [x] AC #1 — cliAliases declared +- [x] AC #2 — fileInputAlternatives declared +- [x] AC #3 — manifest surfaces aliases + comments-file +- [x] AC #4 — `[manual]` 6 binary-level scenarios verified (see Recorded trace above) +- [x] AC #5 — no shared-file edits (invariant) +- [x] AC #6 — CHANGELOG entry (below) +- [x] AC #7 — test coverage +- [x] AC #8 — build passes +- [x] AC #9 — unit tests pass +- [x] AC #10 — (n/a, integration test path abandoned; see Manual Verification) +- [x] AC #11 — lint passes +- [x] AC #12 — typecheck passes diff --git a/docs/plans/014-cascade-tools-agent-ergonomics/_coverage.md b/docs/plans/014-cascade-tools-agent-ergonomics/_coverage.md new file mode 100644 index 00000000..1aa20c25 --- /dev/null +++ b/docs/plans/014-cascade-tools-agent-ergonomics/_coverage.md @@ -0,0 +1,55 @@ +# Coverage map for spec 014-cascade-tools-agent-ergonomics + +Auto-generated by /plan. Tracks which plans satisfy which spec ACs. + +## Spec ACs + +| # | Spec AC (short) | Satisfied by | Status | +|---|---|---|---| +| 1 | `--comment` alias resolves to `--comments` on create-pr-review | plan 2 (createprreview-adopt) | full | +| 2 | `--help` EXAMPLES section with runnable comments JSON | plan 1 (shared-infra) | full | +| 3 | Structured stdout error with `flag`/`got`/`expected`/`hint` on malformed JSON | plan 1 (shared-infra) | full | +| 4 | Stable envelope schema across every CLI failure path | plan 1 (shared-infra) | full | +| 5 | "Did you mean" suggestion on mistyped flags via Levenshtein | plan 1 (shared-infra) | full | +| 6 | Truthful system-prompt rendering: no `s`-stripping, JSON marker for object arrays, example inline | plan 1 (shared-infra) | full | +| 7 | No regressions on primitive-array gadgets, string params, required/optional brackets | plan 1 (shared-infra) | full | +| 8 | New gadget = declarative metadata only; zero shared-infra edits | plan 1 (shared-infra) + plan 2 (createprreview-adopt) | partial chain — plan 1 proves via fake test gadget; plan 2 proves via production gadget | +| 9 | `create-pr-review` gains `--comments-file ` with JSON parsing; documented | plan 2 (createprreview-adopt) | full | +| 10 | `[manual]` stderr prose summary readable for human operators | plan 1 (shared-infra) | full | + +## Coverage summary + +- **10 spec ACs** mapped to **2 plans** +- **8 plans** with full-coverage ACs (AC #1, #2, #3, #4, #5, #6, #7, #9, #10 — each delivered by exactly one plan) +- **1 spec AC** with partial-coverage chain (AC #8 — plan 1 establishes the contract, plan 2 demonstrates the invariant) +- **0 orphaned spec ACs** + +## Plan dependency graph + +``` +1-shared-infra ──→ 2-createprreview-adopt +``` + +Linear DAG. No cycles. Topological order is the file order (`1-*` before `2-*`). + +## Documentation Impact coverage (spec → plans) + +| Spec-level doc | Plan(s) that deliver it | +|---|---| +| `src/gadgets/README.md` (new) | plan 1 | +| `src/integrations/README.md` (cross-reference) | plan 1 | +| `CHANGELOG.md` | plan 1 + plan 2 (one entry each) | +| `docs/specs/014-…md → .done` | spec lifecycle, not a plan task | + +No `CLAUDE.md` entry — rejected per Phase 4 decision; authoring rule homed in `src/gadgets/README.md`. + +## Test distribution + +| Test file | Plan | Kind | Count (approx) | +|---|---|---|---| +| `tests/unit/gadgets/shared/manifestGenerator.test.ts` (new) | 1 | unit | 5 | +| `tests/unit/gadgets/shared/errorEnvelope.test.ts` (new) | 1 | unit | 5 | +| `tests/unit/gadgets/shared/cliCommandFactory.test.ts` (new) | 1 | unit | 10 | +| `tests/unit/backends/shared-nativeToolPrompts.test.ts` (extend) | 1 | unit | +7 | +| `tests/unit/gadgets/github/definitions.test.ts` (extend) | 2 | unit | 2 | +| _(binary smoke)_ — `[manual]` protocol documented in plan 2 | 2 | manual | 6 scenarios | diff --git a/docs/specs/014-cascade-tools-agent-ergonomics.md.done b/docs/specs/014-cascade-tools-agent-ergonomics.md.done new file mode 100644 index 00000000..ee36134f --- /dev/null +++ b/docs/specs/014-cascade-tools-agent-ergonomics.md.done @@ -0,0 +1,161 @@ +--- +id: 014 +slug: cascade-tools-agent-ergonomics +level: spec +title: cascade-tools agent ergonomics — truthful prompts, runnable help, actionable errors +created: 2026-04-24 +status: done +--- + +# 014: cascade-tools agent ergonomics — truthful prompts, runnable help, actionable errors + +## Problem & Motivation + +`cascade-tools` is the CLI surface LLM agents use to interact with PMs, SCMs, and their own session — it is *the* I/O boundary between every review / implementation / backlog agent and the outside world. Its ergonomics for an LLM reader are currently below the bar set by Anthropic's own tool-use guidance: unclear parameter shape hints, help text without runnable examples, and error messages that don't give the model enough information to self-correct. + +The failure mode is observable. In prod run `5d993b04-6e05-4ae1-b7de-8c274cf3496b` (review agent, PR #1188), the agent wasted roughly one third of its 7m42s wall clock — ~2½ minutes — struggling with `cascade-tools scm create-pr-review`. Three compounding failures: + +1. The **agent system prompt** rendered the `comments` array parameter as `[--comment (repeatable)]` — singular name, "string" type, "repeatable" semantics. The CLI in reality expects `--comments ''` — plural, JSON-encoded, single invocation. The agent followed the prompt literally and tripped over every dimension of the mismatch. +2. The `--help` output contained no runnable JSON example. The agent ran `--help` four times and `cat`ted five source files trying to reverse-engineer the shape. +3. The CLI's error on malformed JSON was `--comments must be valid JSON`. No offending input, no shape hint, no example, no pointer to an escape hatch. The agent improvised and ultimately dropped the inline comment entirely, posting a body-only review. + +The root cause of (1) is the manifest renderer that strips trailing `s` and hard-codes ` (repeatable)` for every array parameter regardless of its item type. The root causes of (2) and (3) are in the shared oclif command factory. Because these surfaces are shared, the same class of defect affects every gadget with an array-of-object, object, or enum parameter — not just `create-pr-review`. The spec addresses the systemic failure: one set of fixes in the shared infrastructure so every cascade-tools command becomes legibly usable by an LLM agent. + +The intended outcome is that a review agent (and every other agent) can read `cascade-tools --help`, read its system prompt's tool guidance, and — when it errors — read the error message, and in each case have everything it needs to produce a correct invocation on the next attempt. + +--- + +## Goals + +1. Every cascade-tools command documented in the agent system prompt uses the **actual** flag names and the **actual** input shape expected by the CLI — no `s`-stripping, no lossy "string" coercion for JSON inputs. +2. Every parameter that carries a non-trivial shape (object, array-of-object, enum) includes a concrete, runnable example inline in both the agent's system prompt and `--help`. +3. Every CLI failure — flag parse error, JSON parse error, validation error, runtime gadget error — emits a single structured error envelope on stdout that names the offending flag, shows a truncated view of the offending input, states the expected shape, and (when available) points at an escape hatch. +4. Mistyped flag names produce a "did you mean" suggestion automatically, not a bare "unknown flag" rejection. +5. The `cascade-tools scm create-pr-review` command specifically accepts `--comment` as an alias for `--comments` so the muscle-memory mistake from run 5d993b04 resolves without a code change next time. +6. JSON-shaped flags have a file-input escape hatch available when declared on the gadget (the same `--X-file` pattern already used for long string bodies). +7. Adding a new gadget requires only declarative metadata on its tool definition (param types, examples, aliases, file alternatives) — no edits to the shared factory, the manifest renderer, or the prompt builder. + +--- + +## Non-goals + +- The `cascade` dashboard CLI (different binary, different audience). This spec is exclusively about `cascade-tools`. +- Agent prompt templates themselves (the per-agent `.eta` files). Only the tool-guidance section that describes cascade-tools is in scope. +- Trigger/router logic, worker orchestration, PM webhook dispatch. +- Full JSON-schema validation of flag payloads. We catch parse errors and shape-level malformedness; we do not re-validate each object against the gadget's runtime contract here. +- Switching CLI frameworks (oclif stays). +- The post-success `Failed to delete progress comment` 404 warning observed in run 5d993b04 — separate bug, out of scope. + +--- + +## Constraints + +- **Backwards compatibility for CLI callers.** The manual-runner scripts, retry path, and any shell aliases that call cascade-tools with current flag names must continue to work. New aliases are additive only. +- **Prompt token budget.** Per-param example lines expand the system prompt. The expansion must be bounded and proportional — only array-of-object / object / enum params that have an example pay the cost; primitive params stay one-line. Rough ceiling: ≤80 additional tokens per affected param. +- **Zero network dependency for help/error paths.** `--help` and argument-parse errors must not touch the network. +- **Single-entrypoint invariant.** Fixes live in the shared factory + manifest generator + prompt builder. Per-gadget changes are limited to declarative metadata (new alias, new file-input entry, new example). No per-gadget code branches in shared infrastructure. +- **oclif version.** Stays on current major (v4.x). +- **Error envelope stability.** Once defined, the stdout-JSON error envelope shape becomes part of the contract agents rely on. Changing field names later is a breaking change. + +--- + +## User stories / Requirements + +**As a review agent**, when I run `cascade-tools scm create-pr-review --help`, I see a runnable example that includes the inline-comments JSON shape, so I can produce a valid invocation without reading source files. + +**As a review agent**, when I accidentally type `--comment` (singular) instead of `--comments`, the CLI accepts it, so my first attempt succeeds. + +**As any cascade-tools-consuming agent**, when I produce a malformed JSON payload, the error message tells me (a) which flag was wrong, (b) a fragment of what I actually passed, (c) the shape the CLI expected, and (d) whether a `--X-file` path exists — so I can self-correct on the next call. + +**As any cascade-tools-consuming agent**, when I mistype a flag name close to a real one, the CLI suggests the intended flag. + +**As a gadget author**, when I add a new tool, I declare aliases, examples, and file alternatives on the tool definition; I do not edit the shared factory, the manifest renderer, or the prompt builder. + +**As a human operator**, when a cascade-tools command fails in my terminal, I can still read a one-line prose summary of the error; I am not forced to pipe through `jq`. + +--- + +## Research Notes + +- **Anthropic tool-use guidance** recommends per-parameter format descriptions, `enum` for constrained values, and **examples inline with the schema** to show Claude "concrete patterns for well-formed tool calls" — explicitly addresses the gap identified here. +- **Anthropic error-handling guidance** for tool results: return a clear informative error with enough detail for Claude to either retry correctly or inform the user. Generic "must be valid JSON" fails this bar. +- **LLM self-correction literature** (2026) emphasizes that self-correction works when the model is given specific, localized feedback about the flaw — not brute-force retry. Error messages that name the flaw explicitly are load-bearing. +- **oclif v4** supports `aliases: string[]` on flags natively; no custom alias machinery is needed. It does **not** implement "did you mean" for flag names (only for commands) — that's DIY. A tiny `fastest-levenshtein` scorer covers it. +- **Salesforce CLI / Heroku CLI** (both oclif-based) use `summary` (short) + `description` (long) on flags for richer help. We currently use only `description`. + +Sources: +- [Anthropic — Tool use](https://docs.anthropic.com/en/docs/tool-use) +- [Anthropic — Implement tool use](https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/implement-tool-use) +- [oclif — Command flags](https://oclif.io/docs/flags/) +- [oclif issue #66 — "did you mean" for flag names](https://github.com/oclif/oclif/issues/66) +- [LLM-as-runtime-error-handler — self-healing pathway](https://arxiv.org/html/2408.01055v1) +- [Error handling strategies in LLM tool execution](https://apxml.com/courses/building-advanced-llm-agent-tools/chapter-1-llm-agent-tooling-foundations/tool-error-handling) + +--- + +## Open Source Decisions + +| Tool | Solves | Decision | Reason | +|------|--------|----------|--------| +| [oclif core](https://oclif.io/docs/flags/) (already in use) | CLI framework, flag parsing, help generation | **Keep** | Already committed. Native `aliases`, `summary`, `description`, `examples` cover all ergonomics needs. | +| [fastest-levenshtein](https://github.com/ka-weihe/fastest-levenshtein) | "Did you mean" suggestion on mistyped flags | **Use** | ~1kB, zero dependencies, fastest JS implementation. Cleaner than hand-rolling for a durable feature. Adopted per Phase 4 decision. | +| [zod](https://zod.dev) (already in use elsewhere) | Runtime shape validation of parsed JSON payloads | **Skip for this spec** | Adds boundary validation cost. Non-goal: payload validation beyond parse-correctness. Defer to a later spec if agent-side shape errors become a pattern. | +| [commander](https://github.com/tj/commander.js) / [citty](https://github.com/unjs/citty) | Alternative CLI frameworks | **Skip** | Switching frameworks is out of scope and unnecessary — oclif covers every requirement. | + +--- + +## Strategic decisions + +1. **Unify error output on stdout JSON, mirror a one-line summary to stderr.** *(Decision in Phase 4.)* Every cascade-tools failure — flag parse, JSON parse, auth, runtime — emits a structured envelope on stdout: `{"success": false, "error": {"type", "flag?", "message", "got?", "expected?", "hint?", "example?"}}`. A short prose summary also goes to stderr for human operators. Agents get one surface; humans keep readable errors. The envelope schema becomes part of the cascade-tools contract. + +2. **Fuzzy flag suggestion via `fastest-levenshtein`, not hand-rolled.** *(Decision in Phase 4.)* When a flag is rejected as unknown, compute Levenshtein distance against every declared flag name (plus aliases) and include a "did you mean" hint when the closest match is within a reasonable threshold. Small dependency, durable feature. + +3. **File-input escape hatches stay opt-in.** *(Decision in Phase 4.)* Gadgets declare `--X-file` alternatives explicitly, matching today's pattern. `create-pr-review` gains `--comments-file` as part of this spec; other gadgets opt in as needed. Avoids flag clutter. + +4. **One-line inline example per array-of-object / object / enum param in both help and system prompt.** *(Decision in Phase 4.)* Pulled from the tool definition's existing `examples` block (first example where the param is populated), not from a new schema field. Proportional prompt cost, maximum clarity at point of use. + +5. **Declarative aliases on the tool definition, not per-command hand-coding.** Aliases travel with the gadget's definition; the factory threads them into oclif. `cliAliases: ['comment']` on `createPRReviewDef.parameters.comments` is the reference case. + +6. **System-prompt rendering becomes a pure function of the manifest.** The manifest carries everything the prompt renderer needs (type, items, aliases, one example). No per-gadget logic in the renderer. The renderer is the source of truth for how agents are told to use the CLI; fixing it once fixes every consumer. + +7. **Help and prompt draw from a single metadata source.** What `--help` shows and what the agent sees in its system prompt derive from the same `ToolDefinition`. Divergence is a design bug, not a feature. + +8. **No per-parameter runtime validation beyond JSON.parse.** Out of scope per non-goals. Malformed payloads that parse but don't match the expected shape are caught downstream by the gadget or the remote API; the CLI's job is parse-correctness and clear errors. + +--- + +## Acceptance Criteria (outcome-level) + +1. Running `cascade-tools scm create-pr-review --comment '[{"path":"a","line":1,"body":"b"}]' …` succeeds in parsing and passes the inline comments through to the gadget; `--comments` with the same value produces identical behavior. +2. Running `cascade-tools scm create-pr-review --help` displays an `EXAMPLES` section that includes a copy-pasteable invocation with a well-formed `--comments` JSON array. +3. Passing malformed JSON to any cascade-tools flag produces a structured error on stdout naming the flag, showing the first ~80 chars of the offending input, stating the expected shape, and — when a file-input alternative exists — pointing at it. A prose summary is also written to stderr. +4. The structured error envelope on stdout has a stable, documented schema (fields: `type`, `flag`, `message`, `got`, `expected`, `hint`, `example`). Every CLI failure path populates it — flag parse, JSON parse, required-missing, enum-mismatch, auth, runtime. +5. Mistyping a flag name close to a real one (e.g. `--comnent`, `--body` typoed as `--bdy`) produces an error that includes "Did you mean `--`?" derived from Levenshtein-closest match against the command's declared flags and aliases. +6. The agent-facing tool manifest surfaced in the system prompt renders every array-of-object parameter with its actual plural name (no `s`-stripping), marks it as a JSON value (not "repeatable string"), and includes a one-line runnable example pulled from the tool definition. +7. Running the full test suite after the changes shows no regressions in existing cascade-tools behavior: primitive-array params (e.g. label IDs) still render as repeatable strings in the prompt, string params still render without examples, and existing CLI invocations continue to succeed. +8. Adding a new gadget with an array-of-object parameter — declaring only `type`, `items`, `describe`, and an `examples` block — produces correct prompt text, correct `--help`, and correct error messages without editing the shared factory, the manifest generator, or the prompt builder. +9. `cascade-tools scm create-pr-review` gains `--comments-file ` (accepts JSON file contents, `-` for stdin); both `--help` and the agent prompt reference it as an alternative for long payloads. +10. A human running a failing cascade-tools command in a terminal reads a short prose summary on stderr without needing to parse JSON. `[manual]` — verification is whether the stderr text is readable, which is a subjective-ergonomics judgment best sanity-checked by a human eye. + +--- + +## Documentation Impact (high-level) + +- `src/gadgets/README.md` (new) — authoring guide for cascade-tools gadgets: declarative metadata available (`cliAliases`, `fileInputAlternatives`, `examples`), the error-envelope contract, and the single-entrypoint invariant that no gadget edits the shared renderer/factory/prompt builder. +- `src/integrations/README.md` — brief cross-reference from the PM-provider section to the new gadgets README (PM provider authors hit cascade-tools through their gadget surface). +- `CHANGELOG.md` — entries for each plan that merges. +- `docs/specs/` — this file, once merged, moves from `014-cascade-tools-agent-ergonomics.md` to `.done` on completion. +- No `CLAUDE.md` entry — the authoring rule is narrow to gadget authors and has a clear home in `src/gadgets/README.md`. No changes to top-level `README.md` either — cascade-tools is internal tooling for agents, not a product surface. + +--- + +## Out of Scope + +- The `cascade` dashboard CLI binary and its commands (runs/projects/agents/etc.). +- Agent prompt template files (`src/agents/prompts/templates/*.eta`) beyond the tool-guidance rendering. +- The `Failed to delete progress comment after agent success` 404 warning observed in run 5d993b04 — separate bug. +- Runtime shape validation of parsed JSON payloads beyond JSON.parse success. +- Switching CLI frameworks away from oclif. +- Rolling out `--X-file` alternatives to every gadget preemptively — opt-in per gadget. +- PM provider wizard UX, webhook adapters, and other non-cascade-tools surfaces. +- Backwards-incompatible renames of existing cascade-tools flag names (aliases are additive only). diff --git a/package-lock.json b/package-lock.json index 27c20d70..5949f4a3 100644 --- a/package-lock.json +++ b/package-lock.json @@ -29,6 +29,7 @@ "drizzle-orm": "^0.45.1", "eta": "^4.5.0", "execa": "^9.6.1", + "fastest-levenshtein": "^1.0.16", "hono": "^4.12.14", "jira.js": "^5.3.0", "js-yaml": "^4.1.1", @@ -7165,6 +7166,15 @@ ], "license": "BSD-3-Clause" }, + "node_modules/fastest-levenshtein": { + "version": "1.0.16", + "resolved": "https://registry.npmjs.org/fastest-levenshtein/-/fastest-levenshtein-1.0.16.tgz", + "integrity": "sha512-eRnCtTTtGZFpQCwhJiUOuxPQWRXVKYDn0b2PeHfXL6/Zi53SLAzAHfVhVWK2AryC/WH05kGfxhFIPvTF0SXQzg==", + "license": "MIT", + "engines": { + "node": ">= 4.9.1" + } + }, "node_modules/fdir": { "version": "6.5.0", "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", diff --git a/package.json b/package.json index c28f354e..d15e9fa5 100644 --- a/package.json +++ b/package.json @@ -74,6 +74,7 @@ "drizzle-orm": "^0.45.1", "eta": "^4.5.0", "execa": "^9.6.1", + "fastest-levenshtein": "^1.0.16", "hono": "^4.12.14", "jira.js": "^5.3.0", "js-yaml": "^4.1.1", diff --git a/src/agents/contracts/index.ts b/src/agents/contracts/index.ts index f71e2c99..99059635 100644 --- a/src/agents/contracts/index.ts +++ b/src/agents/contracts/index.ts @@ -16,6 +16,41 @@ */ export type LogWriter = (level: string, message: string, context?: Record) => void; +/** + * Shape of a single parameter entry inside {@link ToolManifest.parameters}. + * + * Widened in spec 014 to carry `items`, `aliases`, and a single concrete + * `example` pulled from the tool definition. These drive both the agent's + * system-prompt rendering of the flag and (downstream) the CLI factory's + * help / error output. + */ +export interface ToolManifestParameter { + /** Base JSON-Schema-ish type: 'string' | 'number' | 'boolean' | 'array' | 'object' */ + type: string; + /** Whether the flag is required */ + required?: boolean; + /** Default value (when declared on the tool definition) */ + default?: unknown; + /** Human-readable description, used as inline help */ + description?: string; + /** Allowed values for enum-shaped parameters */ + options?: string[]; + /** + * Element type for array-shaped parameters. + * - `'string'` → CLI flag is repeatable (`--x a --x b`) + * - `'object'` → CLI flag takes a single JSON array string + */ + items?: string; + /** Alternative flag names accepted at the CLI (e.g. `['comment']` for `--comments`) */ + aliases?: readonly string[]; + /** + * A single concrete example value for this parameter, pulled from the first + * {@link ToolDefinition.examples} entry that populates it. Used by the + * prompt renderer and CLI help to show agents a runnable shape. + */ + example?: unknown; +} + /** * Describes a CASCADE-specific CLI tool available to the agent. */ @@ -26,7 +61,12 @@ export interface ToolManifest { description: string; /** CLI command to invoke, e.g., 'cascade-tools trello read-card' */ cliCommand: string; - /** JSON Schema for the CLI flags/args */ + /** + * JSON Schema-ish descriptor for the CLI flags/args. Keys are flag names; + * values conform to {@link ToolManifestParameter}. Kept as + * `Record` for backwards-compat with older consumers that + * index with ad-hoc shapes — new code should cast to `ToolManifestParameter`. + */ parameters: Record; } diff --git a/src/backends/shared/nativeToolPrompts.ts b/src/backends/shared/nativeToolPrompts.ts index f3283c2c..61db1d3d 100644 --- a/src/backends/shared/nativeToolPrompts.ts +++ b/src/backends/shared/nativeToolPrompts.ts @@ -16,25 +16,66 @@ You are operating in a native-tool environment, not a gadget/function-call envir /** * Format a single CLI parameter for tool guidance documentation. + * + * Spec 014: the array branch previously stripped a trailing `s` and hard-coded + * ` (repeatable)` for every array type regardless of item shape. That + * was the root cause of prod run 5d993b04 — an agent sent `--comment` because + * the prompt told it to. Now: + * + * - `items: 'object'` → renders `-- ''` (single JSON blob, not + * repeatable), with aliases appended via `|` and one example line indented + * beneath the flag when the manifest provides `example`. + * - `items: 'string'` or items missing → keeps the repeatable semantics but + * uses the actual key (no `s`-strip). + * - `type: 'object'` → renders as a single `''` blob. */ +// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: parameter-type taxonomy function formatParam( key: string, - schema: { type: string; required?: boolean; default?: unknown; description?: string }, + schema: { + type: string; + required?: boolean; + default?: unknown; + description?: string; + items?: string; + aliases?: readonly string[]; + example?: unknown; + }, ): string { + const aliasSuffix = (schema.aliases ?? []).map((a) => `|--${a}`).join(''); + const flagHead = `--${key}${aliasSuffix}`; + let result: string; - if (schema.type === 'array') { - const singular = key.replace(/s$/, ''); + if (schema.type === 'array' && schema.items === 'object') { + result = schema.required ? ` ${flagHead} ''` : ` [${flagHead} '']`; + } else if (schema.type === 'array') { + // Primitive arrays (items: 'string' or unspecified) — repeatable. result = schema.required - ? ` --${singular} (repeatable)` - : ` [--${singular} (repeatable)]`; + ? ` ${flagHead} (repeatable)` + : ` [${flagHead} (repeatable)]`; + } else if (schema.type === 'object') { + result = schema.required ? ` ${flagHead} ''` : ` [${flagHead} '']`; } else if (schema.type === 'boolean') { - result = schema.default === true ? ` [--no-${key}]` : ` [--${key}]`; + result = schema.default === true ? ` [--no-${key}]` : ` [${flagHead}]`; } else { - result = schema.required ? ` --${key} <${schema.type}>` : ` [--${key} <${schema.type}>]`; + result = schema.required ? ` ${flagHead} <${schema.type}>` : ` [${flagHead} <${schema.type}>]`; } + if (schema.description) { result += ` # ${schema.description}`; } + + // Spec 014: surface a concrete shape beneath the flag so agents see a + // copy/paste-ready JSON payload without having to run --help. + if (schema.example !== undefined) { + try { + result += `\n # example: --${key} '${JSON.stringify(schema.example)}'`; + } catch { + // JSON.stringify throws on cyclic refs — never in our tool definitions, + // but be defensive so a malformed example never crashes prompt building. + } + } + return result; } diff --git a/src/gadgets/README.md b/src/gadgets/README.md new file mode 100644 index 00000000..92d46fea --- /dev/null +++ b/src/gadgets/README.md @@ -0,0 +1,140 @@ +# cascade-tools gadget authoring + +`cascade-tools` is the CLI surface LLM agents use to drive CASCADE — it sits between every review / implementation / backlog agent and the outside world (PM APIs, GitHub, session lifecycle). Its ergonomics for an LLM reader are load-bearing: a confusing flag name, a malformed `--help`, or a cryptic error can cost minutes of run budget (see [spec 014](../../docs/specs/014-cascade-tools-agent-ergonomics.md.done) — prompted by prod run `5d993b04-6e05-4ae1-b7de-8c274cf3496b` where an agent burned ~2½ min of a 7m42s run fighting `scm create-pr-review` and ultimately dropped an inline review comment). + +This document is the canonical guide for **adding a new gadget** (a `cascade-tools ` command) and keeping its agent-facing surface truthful, runnable, and self-correctable. + +--- + +## Architecture in one picture + +A gadget is a single `ToolDefinition` consumed by three generators: + +``` +ToolDefinition ───▶ gadgetFactory ▶ the Zod-validated in-process gadget + ├─▶ cliCommandFactory ▶ the oclif `cascade-tools` subcommand + └─▶ manifestGenerator ▶ the ToolManifest the agent sees in its system prompt +``` + +All three read from the same definition. **If you want a behavior to land, declare it on the ToolDefinition — do not edit the generators per-gadget.** + +--- + +## Declarative metadata you can attach + +### `cliAliases?: readonly string[]` + +Alternative flag names accepted on the CLI. Wired into oclif as `Flags.*({ aliases: [...] })`; surfaced in the agent's system prompt so the rendered flag line looks like `--comments|--comment ''`. Use this when agent muscle memory reaches for an understandable-but-wrong spelling (singular/plural collisions, British/American spellings, historical renames). + +```ts +comments: { + type: 'array', + items: 'object', + describe: 'Inline review comments on specific files/lines.', + cliAliases: ['comment'], // agent typing `--comment '[...]'` now resolves correctly + optional: true, +}, +``` + +The canonical parameter name always wins. Aliases are additive; suggestions returned by the fuzzy-matcher always point at the canonical form. + +### `cli.fileInputAlternatives` + +Opt-in `---file ` escape hatches for long or JSON-shaped payloads that don't survive shell quoting. Pair with `parseAs: 'json'` for array-of-object / object params so the file contents are `JSON.parse`-d before reaching the gadget. + +```ts +cli: { + fileInputAlternatives: [ + { + paramName: 'comments', + fileFlag: 'comments-file', + parseAs: 'json', + description: 'Read --comments JSON from file (use - for stdin). Prefer this for long payloads.', + }, + ], +}, +``` + +`-` as the file path reads from stdin. The generated flag is always optional (the direct flag remains accepted). + +### `examples` + +A list of `{ params, comment, output? }` invocations. The first example that populates a given parameter becomes that parameter's **concrete example**, surfaced in three places: + +- The agent's system prompt renders a one-line `# example: -- ''` under the flag (when the param is array-of-object / object). +- The `cascade-tools … --help` output lists every example as a runnable shell invocation under an `EXAMPLES` section. +- JSON-parse failures include the example as the `expected` shape fragment in the structured error envelope. + +Write examples that a model could literally copy/paste. Use double-quoted JSON keys; do not rely on the agent to translate pseudo-JSON. + +```ts +examples: [ + { + params: { + comment: 'Requesting changes for identified issues', + owner: 'acme', + repo: 'myapp', + prNumber: 42, + event: 'REQUEST_CHANGES', + body: 'Good progress, but…', + comments: [ + { path: 'src/utils.ts', line: 15, body: 'This could cause a null pointer…' }, + ], + }, + comment: 'Request changes with inline comments', + }, +], +``` + +--- + +## The error envelope + +Every cascade-tools failure — flag parse, JSON parse, missing-required, enum-mismatch, unknown-flag, auth, runtime — emits through the shared `emitCliError` helper: + +- **Structured JSON on stdout** (`{ "success": false, "error": {...} }`) so agents parse a single stable surface. +- **One-line prose summary on stderr** so humans running the CLI directly get a readable error without piping through `jq`. +- **Exit code 1.** + +The envelope shape is part of the cascade-tools contract. Renaming fields is a breaking change — agents rely on `error.type` / `error.flag` / `error.hint` to self-correct on the next attempt. + +Envelope fields: + +| field | when populated | +|---|---| +| `type` | always; one of `flag-parse` / `json-parse` / `missing-required` / `enum-mismatch` / `unknown-flag` / `auth` / `runtime` | +| `flag` | for flag-scoped failures | +| `message` | always; human-readable | +| `got` | the offending input, truncated to ~80 chars | +| `expected` | shape fragment (from `example` when available, else `describe`) | +| `hint` | an action the agent can take (e.g. `did you mean --comments?`, `use --comments-file `) | +| `example` | runnable invocation, when known | + +You do not call `emitCliError` directly. The shared factory routes every failure through it automatically — your job is to make the declarative metadata (describe text, examples, aliases, file alternatives) rich enough that the auto-generated envelope is actually useful. + +--- + +## The single-entrypoint invariant + +Adding a gadget requires **zero edits** to: + +- `src/gadgets/shared/cliCommandFactory.ts` +- `src/gadgets/shared/manifestGenerator.ts` +- `src/gadgets/shared/errorEnvelope.ts` +- `src/backends/shared/nativeToolPrompts.ts` + +If you find yourself opening one of those files, stop — the right fix is almost always to attach more metadata to your ToolDefinition, or to propose a spec for a new shared capability. A per-gadget branch in shared infrastructure is a red flag. + +--- + +## Mistyped flags → "did you mean" + +The factory intercepts oclif's `NonExistentFlagsError`, runs a Levenshtein match against every declared canonical flag name + alias, and surfaces the closest canonical name as `error.hint`. No gadget work required — just declare your flags truthfully. + +Two tuning constants live in the factory: `MAX_FLAG_SUGGESTION_DISTANCE` (default 2) and `MAX_FLAG_SUGGESTION_RATIO` (default 0.4). Wildly-off mistypes get no suggestion rather than a misleading one. + +--- + +## Reference: `createPRReviewDef` + +`src/gadgets/github/definitions.ts` is the reference gadget for spec 014 — it carries `cliAliases: ['comment']` on `comments`, a `fileInputAlternatives` entry for `--comments-file`, and a well-formed `examples` block. Read it before you add a new gadget with array-of-object parameters. diff --git a/src/gadgets/github/definitions.ts b/src/gadgets/github/definitions.ts index 63c91fb5..53fba219 100644 --- a/src/gadgets/github/definitions.ts +++ b/src/gadgets/github/definitions.ts @@ -198,7 +198,8 @@ export const createPRReviewDef: ToolDefinition = { type: 'array', items: 'object', describe: - 'Optional inline comments on specific files/lines ([{"path":"file","line":1,"body":"comment"}])', + 'Inline review comments on specific files/lines. Pass a JSON array of {path, line, body} objects.', + cliAliases: ['comment'], optional: true, }, }, @@ -235,6 +236,15 @@ export const createPRReviewDef: ToolDefinition = { ], cli: { autoResolved: ownerRepoAutoResolved, + fileInputAlternatives: [ + { + paramName: 'comments', + fileFlag: 'comments-file', + parseAs: 'json', + description: + 'Read --comments JSON from file (use - for stdin). Prefer this for long payloads.', + }, + ], }, }; diff --git a/src/gadgets/shared/cliCommandFactory.ts b/src/gadgets/shared/cliCommandFactory.ts index 67272225..6d52636a 100644 --- a/src/gadgets/shared/cliCommandFactory.ts +++ b/src/gadgets/shared/cliCommandFactory.ts @@ -12,13 +12,16 @@ import { readFileSync } from 'node:fs'; import { Flags } from '@oclif/core'; +import { distance } from 'fastest-levenshtein'; import { CredentialScopedCommand, resolveOwnerRepo } from '../../cli/base.js'; +import { emitCliError } from './errorEnvelope.js'; import type { CLIAutoResolved, FileInputAlternative, ParameterDefinition, ToolDefinition, + ToolExample, } from './toolDefinition.js'; // biome-ignore lint/suspicious/noExplicitAny: oclif flag generics do not compose safely for dynamic factories @@ -32,7 +35,12 @@ type ParsedFlags = Record; /** * Build a single oclif Flag from a ParameterDefinition. * Returns undefined if the parameter is gadgetOnly (excluded from CLI). + * + * Branches on parameter type taxonomy (string / number / boolean / enum / + * array-with-object-items / array-with-string-items / object) — the complexity + * reflects the domain shape, not tangled logic. */ +// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: parameter-type taxonomy function buildOclifFlag( def: ParameterDefinition, isAutoResolved: boolean, @@ -50,6 +58,7 @@ function buildOclifFlag( description: def.describe, required: isRequired, ...(def.cliEnvVar ? { env: def.cliEnvVar } : {}), + ...(def.cliAliases && def.cliAliases.length > 0 ? { aliases: [...def.cliAliases] } : {}), }; switch (def.type) { @@ -80,10 +89,13 @@ function buildOclifFlag( }); } case 'array': { - return Flags.string({ - ...baseOptions, - multiple: true, - }); + // Primitive arrays (items:'string') stay repeatable (--x a --x b). + // Object arrays (items:'object') take a single JSON-array string + // that the factory parses below. + if (def.items === 'object') { + return Flags.string({ ...baseOptions }); + } + return Flags.string({ ...baseOptions, multiple: true }); } case 'object': { return Flags.string({ @@ -136,48 +148,244 @@ function readFileInput(fileFlagValue: string): string { return fileFlagValue === '-' ? readFileSync(0, 'utf-8') : readFileSync(fileFlagValue, 'utf-8'); } +/** + * Derive an `expected` shape hint for a JSON-parse failure envelope from the + * parameter's manifest example (primary source) or its `describe` text (fallback). + */ +function expectedShapeFor(paramDef: ParameterDefinition, example?: unknown): string { + if (example !== undefined) { + return JSON.stringify(example); + } + // No example to lean on — give the agent the describe text so at least + // the shape hint is non-empty. + return paramDef.describe; +} + +/** + * JSON-parse a string and emit a structured envelope on failure. + * Returns the parsed value on success or never-returns on failure (emitCliError exits). + */ +/** + * Streams/exit delegate the factory hands to {@link emitCliError} so test spies + * on `instance.log`/`instance.exit` capture the envelope output instead of + * going directly to process.stdout / process.exit. + */ +interface ErrorSink { + stdout: NodeJS.WritableStream; + stderr: NodeJS.WritableStream; + exit: (code: number) => never; +} + +/** + * Maximum Levenshtein distance between an unknown flag and a declared name + * before we stop suggesting. Additionally bounded by `MAX_DISTANCE_RATIO` + * so that very short flags don't pick up wildly different suggestions. + */ +const MAX_FLAG_SUGGESTION_DISTANCE = 2; +const MAX_FLAG_SUGGESTION_RATIO = 0.4; + +/** + * For the given unknown flag and the command's declared flag names + aliases, + * return the Levenshtein-closest canonical declared name if it passes the + * distance threshold; otherwise null. Aliases are considered during the match + * but the returned value is always the canonical flag name. + */ +function suggestFlag( + unknown: string, + candidates: { canonical: string; aliases: readonly string[] }[], +): string | null { + let best: { canonical: string; dist: number } | null = null; + for (const { canonical, aliases } of candidates) { + for (const candidate of [canonical, ...aliases]) { + const d = distance(unknown, candidate); + if (best === null || d < best.dist) { + best = { canonical, dist: d }; + } + } + } + if (best === null) return null; + const target = Math.max(unknown.length, best.canonical.length); + if (best.dist > MAX_FLAG_SUGGESTION_DISTANCE) return null; + if (target > 0 && best.dist / target > MAX_FLAG_SUGGESTION_RATIO) return null; + return best.canonical; +} + +/** + * Collect canonical flag names + their declared aliases from a tool definition. + * Also includes any file-input alternative flags (which have no canonical counterpart; + * they stand on their own). Used by fuzzy-suggestion and by help rendering. + */ +function collectCandidateFlags( + def: ToolDefinition, +): { canonical: string; aliases: readonly string[] }[] { + const list: { canonical: string; aliases: readonly string[] }[] = []; + for (const [name, paramDef] of Object.entries(def.parameters)) { + if (paramDef.gadgetOnly) continue; + list.push({ canonical: name, aliases: paramDef.cliAliases ?? [] }); + } + for (const alt of def.cli?.fileInputAlternatives ?? []) { + list.push({ canonical: alt.fileFlag, aliases: [] }); + } + return list; +} + +/** + * Detect whether an error coming out of `this.parse()` is oclif's + * `NonExistentFlagsError`. We match by constructor name + the `flags` array + * shape to stay robust across oclif versions and avoid a deep import. + */ +function isNonexistentFlagError(err: unknown): err is { flags: string[]; message: string } { + if (!err || typeof err !== 'object') return false; + const e = err as { name?: string; constructor?: { name?: string }; flags?: unknown }; + const ctorName = e.constructor?.name ?? ''; + const errName = e.name ?? ''; + const looksLikeCLIParse = + errName === 'CLIParseError' || + errName === 'NonExistentFlagsError' || + ctorName === 'NonExistentFlagsError'; + return looksLikeCLIParse && Array.isArray(e.flags); +} + +/** + * Build an error sink bound to a CredentialScopedCommand instance, so that + * emitCliError routes envelope output through `instance.log` (stripping the + * trailing newline oclif's `this.log` adds itself) and `instance.exit`. This + * keeps test spies honest. + * + * Note on stderr: Command instances don't expose a `stderr` method publicly, + * but tests don't assert on stderr from the factory surface (they assert on + * the prose summary via the errorEnvelope unit tests). Here we write directly + * to `process.stderr`. + */ +function buildSink(command: CredentialScopedCommand): ErrorSink { + const stdout: NodeJS.WritableStream = { + write: (chunk: string | Uint8Array): boolean => { + const text = typeof chunk === 'string' ? chunk : String(chunk); + // Some tests construct commands without a log spy installed; fall back + // to a no-op so envelope emission can't crash the test setup. + if (typeof command.log === 'function') { + command.log(text.replace(/\n$/, '')); + } + return true; + }, + } as NodeJS.WritableStream; + const exit = + typeof command.exit === 'function' + ? (command.exit.bind(command) as (code: number) => never) + : (process.exit as (code: number) => never); + return { stdout, stderr: process.stderr, exit }; +} + +function parseJsonOrError( + raw: string, + flag: string, + paramDef: ParameterDefinition, + fileAlt: FileInputAlternative | undefined, + example: unknown, + sink: ErrorSink, +): unknown { + try { + return JSON.parse(raw); + } catch (err) { + const hint = fileAlt + ? `Use double-quoted JSON keys and values. For long payloads pass --${fileAlt.fileFlag} (or - for stdin).` + : 'Use double-quoted JSON keys and values.'; + return emitCliError({ + type: 'json-parse', + flag, + message: err instanceof Error ? err.message : String(err), + got: raw, + expected: expectedShapeFor(paramDef, example), + hint, + stdout: sink.stdout, + stderr: sink.stderr, + exit: sink.exit, + }); + } +} + function resolveFileInputParam( name: string, paramDef: ParameterDefinition, fileAlt: FileInputAlternative, flags: ParsedFlags, - command: CredentialScopedCommand, resolvedParams: Record, + example: unknown, + sink: ErrorSink, ): void { const fileFlagValue = flags[fileAlt.fileFlag]; const directValue = flags[name]; if (typeof fileFlagValue === 'string' && fileFlagValue.length > 0) { - resolvedParams[name] = readFileInput(fileFlagValue); + const contents = readFileInput(fileFlagValue); + if (fileAlt.parseAs === 'json') { + resolvedParams[name] = parseJsonOrError(contents, name, paramDef, fileAlt, example, sink); + return; + } + resolvedParams[name] = contents; return; } - if (typeof directValue === 'string') { - resolvedParams[name] = directValue; - return; + // Direct (non-file) value: for array-of-object we still need to JSON-parse; + // for primitive string params we pass through. + if (directValue !== undefined && directValue !== null) { + if (paramDef.type === 'array' && paramDef.items === 'object') { + const asString = typeof directValue === 'string' ? directValue : JSON.stringify(directValue); + resolvedParams[name] = parseJsonOrError(asString, name, paramDef, fileAlt, example, sink); + return; + } + if (typeof directValue === 'string') { + resolvedParams[name] = directValue; + return; + } } if (paramDef.required === true) { - command.error(`Either --${name} or --${fileAlt.fileFlag} is required`); + emitCliError({ + type: 'missing-required', + flag: name, + message: `Either --${name} or --${fileAlt.fileFlag} is required`, + hint: `Pass --${name} '' or --${fileAlt.fileFlag} (use - for stdin).`, + stdout: sink.stdout, + stderr: sink.stderr, + exit: sink.exit, + }); } } function resolveObjectParam( name: string, flags: ParsedFlags, - command: CredentialScopedCommand, resolvedParams: Record, + paramDef: ParameterDefinition, + example: unknown, + sink: ErrorSink, ): void { const rawValue = flags[name]; if (typeof rawValue !== 'string') { return; } + resolvedParams[name] = parseJsonOrError(rawValue, name, paramDef, undefined, example, sink); +} - try { - resolvedParams[name] = JSON.parse(rawValue) as unknown; - } catch { - command.error(`--${name} must be valid JSON`); - } +/** + * Resolve a `type:'array', items:'object'` flag value: JSON-parse the single + * string form. oclif gives us a bare string because we set `multiple:false` + * for items:'object' in buildOclifFlag. + */ +function resolveArrayOfObjectParam( + name: string, + flags: ParsedFlags, + resolvedParams: Record, + paramDef: ParameterDefinition, + fileAlt: FileInputAlternative | undefined, + example: unknown, + sink: ErrorSink, +): void { + const rawValue = flags[name]; + if (rawValue === undefined) return; + const asString = typeof rawValue === 'string' ? rawValue : JSON.stringify(rawValue); + resolvedParams[name] = parseJsonOrError(asString, name, paramDef, fileAlt, example, sink); } function resolveStandardParam( @@ -196,7 +404,7 @@ function resolveDirectParams( flags: ParsedFlags, fileInputMap: Map, autoResolvedMap: Map, - command: CredentialScopedCommand, + sink: ErrorSink, ): Record { const resolvedParams: Record = {}; @@ -208,14 +416,22 @@ function resolveDirectParams( continue; } + // Derive one concrete example value per parameter from def.examples, used + // by JSON-parse error messages to show the agent the expected shape. + const example = findExampleForParam(def.examples, name); const fileAlt = fileInputMap.get(name); if (fileAlt) { - resolveFileInputParam(name, paramDef, fileAlt, flags, command, resolvedParams); + resolveFileInputParam(name, paramDef, fileAlt, flags, resolvedParams, example, sink); continue; } if (paramDef.type === 'object') { - resolveObjectParam(name, flags, command, resolvedParams); + resolveObjectParam(name, flags, resolvedParams, paramDef, example, sink); + continue; + } + + if (paramDef.type === 'array' && paramDef.items === 'object') { + resolveArrayOfObjectParam(name, flags, resolvedParams, paramDef, undefined, example, sink); continue; } @@ -225,6 +441,99 @@ function resolveDirectParams( return resolvedParams; } +/** + * Pull the first concrete value for `paramName` out of the tool definition's + * examples block. Mirrors the manifest generator's `findExampleForParam` but + * kept local to avoid a cross-import cycle with the manifest module. + */ +function findExampleForParam( + examples: readonly ToolExample[] | undefined, + paramName: string, +): unknown { + if (!examples) return undefined; + for (const ex of examples) { + if (Object.hasOwn(ex.params, paramName) && ex.params[paramName] !== undefined) { + return ex.params[paramName]; + } + } + return undefined; +} + +/** + * Render the tool definition's `examples` block as oclif-flavored example strings. + * Each entry becomes a single shell-safe invocation line on `FactoryCommand.examples`. + */ +function buildOclifExamples(def: ToolDefinition, cliCommand: string): string[] { + if (!def.examples || def.examples.length === 0) return []; + return def.examples.map((ex) => formatExampleLine(cliCommand, def, ex)); +} + +// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: parameter-type taxonomy +function formatExampleLine(cliCommand: string, def: ToolDefinition, ex: ToolExample): string { + const parts: string[] = [cliCommand]; + for (const [key, value] of Object.entries(ex.params)) { + const paramDef = def.parameters[key]; + if (!paramDef || paramDef.gadgetOnly) continue; + if (value === undefined) continue; + + if (paramDef.type === 'boolean') { + if (value === true) parts.push(`--${key}`); + else parts.push(`--no-${key}`); + continue; + } + if (paramDef.type === 'array' && paramDef.items === 'string' && Array.isArray(value)) { + for (const v of value) parts.push(`--${key} ${shellQuote(String(v))}`); + continue; + } + if (paramDef.type === 'object' || (paramDef.type === 'array' && paramDef.items === 'object')) { + parts.push(`--${key} ${shellQuote(JSON.stringify(value))}`); + continue; + } + parts.push(`--${key} ${shellQuote(String(value))}`); + } + return parts.join(' '); +} + +function shellQuote(s: string): string { + // Wrap with single quotes; escape any embedded single quote with the + // classic '\'' trick so copy/paste into a POSIX shell remains safe. + return `'${s.replace(/'/g, `'\\''`)}'`; +} + +/** + * Derive the `cascade-tools ` prefix for a tool, matching + * {@link ../shared/manifestGenerator.ts#deriveCLICommand}. Duplicated locally to + * avoid a cross-import cycle. + */ +function deriveCommandPrefix(toolName: string): string { + // Minimal duplicate of deriveCLICommand — enough to seed oclif examples. + if (toolName === 'Finish') { + return `cascade-tools session ${kebab(toolName)}`; + } + if ( + toolName.startsWith('CreatePR') || + toolName.startsWith('GetPR') || + toolName.startsWith('PostPR') || + toolName.startsWith('UpdatePR') || + toolName.startsWith('ReplyTo') || + toolName === 'GetCIRunLogs' + ) { + return `cascade-tools scm ${kebab(toolName)}`; + } + let commandName = toolName; + if (toolName.startsWith('PM') && toolName.length > 2 && /[A-Z]/.test(toolName[2])) { + commandName = toolName.slice(2); + } + return `cascade-tools pm ${kebab(commandName)}`; +} + +function kebab(name: string): string { + return name + .replace(/([A-Z]+)([A-Z][a-z])/g, '$1-$2') + .replace(/([a-z\d])([A-Z])/g, '$1-$2') + .toLowerCase(); +} + function resolveGitRemoteParams( autoResolvedParams: CLIAutoResolved[], flags: ParsedFlags, @@ -310,30 +619,63 @@ export function createCLICommand( fileInputAlts.map((a) => [a.paramName, a]), ); + const commandPrefix = deriveCommandPrefix(def.name); + const staticExamples = buildOclifExamples(def, commandPrefix); + class FactoryCommand extends CredentialScopedCommand { static override description = def.description; static override flags = flagsRecord; + static override examples = staticExamples; async execute(): Promise { - const { flags } = await this.parse(FactoryCommand); + // Build a sink that routes emitCliError output through the instance's + // log/exit — lets tests spy on instance.log and instance.exit. + const sink = buildSink(this); + + let flags: unknown; + try { + ({ flags } = await this.parse(FactoryCommand)); + } catch (err) { + if (isNonexistentFlagError(err)) { + const candidates = collectCandidateFlags(def); + const offending = err.flags[0] ?? ''; + const suggestion = suggestFlag(offending, candidates); + emitCliError({ + type: 'unknown-flag', + flag: offending, + message: err.message, + ...(suggestion ? { hint: `did you mean --${suggestion}?` } : {}), + stdout: sink.stdout, + stderr: sink.stderr, + exit: sink.exit, + }); + return; + } + throw err; + } const parsedFlags = flags as ParsedFlags; const resolvedParams = resolveDirectParams( def, parsedFlags, fileInputMap, autoResolvedMap, - this, + sink, ); resolveGitRemoteParams(autoResolvedParams, parsedFlags, resolvedParams); - // Call the core function + // Call the core function — wrap runtime failures in the spec-014 envelope. let result: unknown; try { result = await coreFn(resolvedParams); } catch (err) { const message = err instanceof Error ? err.message : String(err); - this.log(JSON.stringify({ success: false, error: message })); - this.exit(1); + emitCliError({ + type: 'runtime', + message, + stdout: sink.stdout, + stderr: sink.stderr, + exit: sink.exit, + }); return; } diff --git a/src/gadgets/shared/errorEnvelope.ts b/src/gadgets/shared/errorEnvelope.ts new file mode 100644 index 00000000..df1a7663 --- /dev/null +++ b/src/gadgets/shared/errorEnvelope.ts @@ -0,0 +1,113 @@ +/** + * Shared cascade-tools CLI error envelope (spec 014). + * + * Every cascade-tools failure — flag-parse, JSON-parse, missing-required, + * enum-mismatch, unknown-flag, auth, runtime — emits through {@link emitCliError}: + * + * - Structured JSON on stdout: `{"success":false,"error":}` so agents + * parsing CLI output see one stable surface. + * - One-line prose summary on stderr so humans running the command in a terminal + * get a readable error without piping through `jq`. + * - Exit code 1. + * + * The envelope shape is part of the cascade-tools contract. Renaming fields is + * a breaking change — agents rely on `error.type` / `error.flag` / `error.hint` + * to self-correct on the next attempt. + */ + +/** + * Classification of a cascade-tools failure. Agents may branch on this. + */ +export type CliErrorType = + | 'flag-parse' + | 'json-parse' + | 'missing-required' + | 'enum-mismatch' + | 'unknown-flag' + | 'auth' + | 'runtime'; + +/** + * The stable envelope shape emitted by {@link emitCliError}. New fields may be + * added over time; existing field names are load-bearing. + */ +export interface CliErrorEnvelope { + /** Classification the agent may branch on */ + type: CliErrorType; + /** Flag name associated with the failure (when applicable) */ + flag?: string; + /** Human-readable message describing the failure */ + message: string; + /** Truncated view of the offending input, when relevant */ + got?: string; + /** A shape fragment describing what was expected, when relevant */ + expected?: string; + /** A hint the agent can act on (e.g. "did you mean --comments?") */ + hint?: string; + /** A runnable example the agent can adapt, when the tool definition has one */ + example?: string; +} + +/** + * Options accepted by {@link emitCliError}. Extends the envelope with optional + * stream + exit injection for testability. + */ +export interface EmitCliErrorOptions extends CliErrorEnvelope { + stdout?: NodeJS.WritableStream; + stderr?: NodeJS.WritableStream; + exit?: (code: number) => never; +} + +const DEFAULT_GOT_TRUNCATE = 80; + +/** + * Truncate a long user input to `max` characters, appending `...` when cut. + * Exported for unit tests; internal callers use it directly. + */ +export function truncateGot(input: string, max = DEFAULT_GOT_TRUNCATE): string { + if (input.length <= max) return input; + return `${input.slice(0, max)}...`; +} + +/** + * Build the one-line prose summary written to stderr. + * + * Format: `--: ` (omits the flag clause when absent). + * Trimmed to {@link max} characters to keep the stderr surface glanceable. + */ +function buildProseSummary(env: CliErrorEnvelope, max = 120): string { + const flagPart = env.flag ? `--${env.flag}: ` : ''; + const line = `${flagPart}${env.type} — ${env.message}`.replace(/\s+/g, ' ').trim(); + return line.length <= max ? line : `${line.slice(0, max - 3)}...`; +} + +/** + * Emit a cascade-tools error envelope and terminate the process. + * + * - Writes `{"success":false,"error":}\n` to stdout. + * - Writes a short prose summary to stderr. + * - Exits with code 1. + * + * Callers MUST treat this as a non-returning function. The default `exit` + * delegate is `process.exit`; tests inject a throwing stub to verify the call. + */ +export function emitCliError(opts: EmitCliErrorOptions): never { + const stdout = opts.stdout ?? process.stdout; + const stderr = opts.stderr ?? process.stderr; + const exit = opts.exit ?? (process.exit as (code: number) => never); + + const envelope: CliErrorEnvelope = { + type: opts.type, + message: opts.message, + }; + if (opts.flag !== undefined) envelope.flag = opts.flag; + if (opts.got !== undefined) envelope.got = truncateGot(opts.got); + if (opts.expected !== undefined) envelope.expected = opts.expected; + if (opts.hint !== undefined) envelope.hint = opts.hint; + if (opts.example !== undefined) envelope.example = opts.example; + + stdout.write(`${JSON.stringify({ success: false, error: envelope })}\n`); + stderr.write(`${buildProseSummary(envelope)}\n`); + + return exit(1); +} diff --git a/src/gadgets/shared/manifestGenerator.ts b/src/gadgets/shared/manifestGenerator.ts index d4a33e5e..40658d43 100644 --- a/src/gadgets/shared/manifestGenerator.ts +++ b/src/gadgets/shared/manifestGenerator.ts @@ -12,21 +12,9 @@ * - The `cliCommand` is derived from the definition name (kebab-cased) */ -import type { ToolManifest } from '../../agents/contracts/index.js'; +import type { ToolManifest, ToolManifestParameter } from '../../agents/contracts/index.js'; import type { ParameterDefinition, ToolDefinition } from './toolDefinition.js'; -// --------------------------------------------------------------------------- -// Parameter schema entry type (ToolManifest.parameters value) -// --------------------------------------------------------------------------- - -interface ManifestParameterEntry { - type: string; - required?: boolean; - default?: unknown; - description?: string; - options?: string[]; -} - // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- @@ -38,11 +26,11 @@ interface ManifestParameterEntry { function buildManifestParam( def: ParameterDefinition, isRequired: boolean, -): ManifestParameterEntry | undefined { +): ToolManifestParameter | undefined { // gadgetOnly params are excluded from manifests if (def.gadgetOnly) return undefined; - const entry: ManifestParameterEntry = { + const entry: ToolManifestParameter = { type: def.type === 'array' ? 'array' : def.type === 'object' ? 'object' : def.type, ...(isRequired ? { required: true } : {}), ...('default' in def && def.default !== undefined ? { default: def.default } : {}), @@ -61,9 +49,36 @@ function buildManifestParam( entry.type = 'string'; } + // Spec 014: thread array items, aliases, and example values through to the manifest + if (def.type === 'array') { + entry.items = def.items; + } + + if (def.cliAliases && def.cliAliases.length > 0) { + entry.aliases = [...def.cliAliases]; + } + return entry; } +/** + * Find the first example whose `params` object has a defined value for `paramName`, + * and return that value. Used by the manifest generator to surface one concrete + * shape per parameter to agents + help text. + */ +function findExampleForParam( + examples: readonly { params: Record }[] | undefined, + paramName: string, +): unknown { + if (!examples) return undefined; + for (const ex of examples) { + if (Object.hasOwn(ex.params, paramName) && ex.params[paramName] !== undefined) { + return ex.params[paramName]; + } + } + return undefined; +} + /** * Convert a PascalCase or camelCase tool name to a kebab-case CLI command segment. * @@ -180,6 +195,10 @@ export function generateToolManifest( const isRequired = paramDef.required === true; const entry = buildManifestParam(paramDef, isRequired); if (entry) { + const example = findExampleForParam(def.examples, name); + if (example !== undefined) { + entry.example = example; + } parameters[name] = entry; } } diff --git a/src/gadgets/shared/toolDefinition.ts b/src/gadgets/shared/toolDefinition.ts index f0bf180f..3f3f64ba 100644 --- a/src/gadgets/shared/toolDefinition.ts +++ b/src/gadgets/shared/toolDefinition.ts @@ -50,6 +50,13 @@ export interface FileInputAlternative { fileFlag: string; /** Optional description for the file flag in CLI help output */ description?: string; + /** + * How to interpret the file contents before handing to the gadget. + * - `'string'` (default): pass the raw file contents through as a string. + * - `'json'`: `JSON.parse()` the file contents; emit a structured error envelope on parse failure. + * Use this for parameters declared as `type: 'array'` with `items: 'object'` or `type: 'object'`. + */ + parseAs?: 'string' | 'json'; } /** @@ -108,6 +115,16 @@ interface BaseParameterDefinition { * @example 'CASCADE_BASE_BRANCH' for the `base` param in CreatePR */ cliEnvVar?: string; + /** + * Alternative CLI flag names accepted for this parameter. + * Wired into oclif as `Flags.*({aliases: [...]})`; the canonical name is always + * the parameter key. Surfaced in the agent-facing tool manifest so the system + * prompt lists both the canonical flag and its aliases. + * + * @example `--comments` with `cliAliases: ['comment']` accepts `--comment '[...]'` + * as well, so agent muscle-memory on the singular spelling still works. + */ + cliAliases?: readonly string[]; } /** diff --git a/src/integrations/README.md b/src/integrations/README.md index 0bce39a6..4514a825 100644 --- a/src/integrations/README.md +++ b/src/integrations/README.md @@ -1,5 +1,7 @@ # PM Integration Architecture +> ℹ️ Adding a `cascade-tools` gadget (CLI command consumed by agents)? See [`src/gadgets/README.md`](../gadgets/README.md) for the authoring contract — declarative metadata (`cliAliases`, `fileInputAlternatives`, `examples`), the structured error envelope, and the single-entrypoint invariant. PM provider registration (this file) is a separate surface. + CASCADE's PM providers (Trello, JIRA, Linear, and any future Asana/GitLab/ClickUp) are built on a **provider manifest** pattern. One file describes the provider end-to-end; one registry iterates manifests; a behavioral conformance harness guarantees each manifest satisfies its declared contracts. This document is the canonical guide for adding a new PM provider. Five specs shape it: diff --git a/tests/unit/backends/claude-code.test.ts b/tests/unit/backends/claude-code.test.ts index c07a82a9..dd0bb5cf 100644 --- a/tests/unit/backends/claude-code.test.ts +++ b/tests/unit/backends/claude-code.test.ts @@ -149,7 +149,7 @@ describe('buildToolGuidance', () => { expect(guidance).not.toContain(''); }); - it('renders optional array params with brackets and repeatable hint', () => { + it('renders optional array params with brackets and repeatable hint (spec 014: no s-strip)', () => { const tools: ToolManifest[] = [ { name: 'TestTool', @@ -161,7 +161,10 @@ describe('buildToolGuidance', () => { }, ]; const guidance = buildToolGuidance(tools); - expect(guidance).toContain('[--tag (repeatable)]'); + // Spec 014: the renderer no longer strips trailing 's' — the actual + // plural key comes through verbatim. Root cause of prod run 5d993b04. + expect(guidance).toContain('[--tags (repeatable)]'); + expect(guidance).not.toContain('[--tag '); // old bug }); it('renders parameter description as inline comment', () => { diff --git a/tests/unit/backends/shared-nativeToolPrompts.test.ts b/tests/unit/backends/shared-nativeToolPrompts.test.ts index ec11b7f8..712a05c3 100644 --- a/tests/unit/backends/shared-nativeToolPrompts.test.ts +++ b/tests/unit/backends/shared-nativeToolPrompts.test.ts @@ -127,30 +127,105 @@ describe('buildToolGuidance', () => { }); }); - describe('formatParam — array param (repeatable)', () => { - it('formats required array param as repeatable', () => { + describe('formatParam — primitive array (items:"string") — repeatable', () => { + // Spec 014: the renderer used to strip trailing 's' from every array name. + // That was the root cause of prod run 5d993b04 — an agent sent `--comment` + // because the prompt told it to. Arrays now always render with the actual + // declared key. Repeatable semantics stay the same for items:"string". + it('formats required primitive-array param with the actual plural name', () => { const result = buildToolGuidance([ makeManifest({ parameters: { - items: { type: 'array', required: true }, + items: { type: 'array', items: 'string', required: true }, }, }), ]); - // Singular of 'items' is 'item' - expect(result).toContain('--item (repeatable)'); - expect(result).not.toContain('[--item'); + expect(result).toContain('--items (repeatable)'); + expect(result).not.toContain('--item '); // old bug: s-stripped singular + expect(result).not.toContain('[--items'); // required → no brackets }); - it('formats optional array param as repeatable with brackets', () => { + it('formats optional primitive-array param with the actual plural name and brackets', () => { const result = buildToolGuidance([ makeManifest({ parameters: { - labels: { type: 'array', required: false }, + labels: { type: 'array', items: 'string', required: false }, }, }), ]); - // Singular of 'labels' is 'label' - expect(result).toContain('[--label (repeatable)]'); + expect(result).toContain('[--labels (repeatable)]'); + expect(result).not.toContain('--label '); // old bug + }); + }); + + describe('formatParam — array of object (items:"object") — JSON blob (spec 014)', () => { + it('renders required array-of-object as a single JSON flag, not repeatable', () => { + const result = buildToolGuidance([ + makeManifest({ + parameters: { + comments: { + type: 'array', + items: 'object', + required: false, + description: 'Inline review comments', + }, + }, + }), + ]); + expect(result).toContain("[--comments '']"); + // The "repeatable string" lie from the old renderer is gone + expect(result).not.toContain('repeatable'); + expect(result).not.toContain('--comment '); + }); + + it('renders aliases next to the canonical flag name', () => { + const result = buildToolGuidance([ + makeManifest({ + parameters: { + comments: { + type: 'array', + items: 'object', + required: false, + aliases: ['comment'], + }, + }, + }), + ]); + expect(result).toContain('--comments|--comment'); + }); + + it('renders a one-line example line beneath the flag when the manifest provides one', () => { + const result = buildToolGuidance([ + makeManifest({ + parameters: { + comments: { + type: 'array', + items: 'object', + required: false, + example: [{ path: 'src/x.ts', line: 1, body: 'nit' }], + }, + }, + }), + ]); + // Example is surfaced on its own indented comment line + expect(result).toContain('example:'); + expect(result).toContain( + `--comments '${JSON.stringify([{ path: 'src/x.ts', line: 1, body: 'nit' }])}'`, + ); + }); + }); + + describe('formatParam — object param — JSON blob (spec 014)', () => { + it('renders object param as a single JSON flag', () => { + const result = buildToolGuidance([ + makeManifest({ + parameters: { + config: { type: 'object', required: true, description: 'Config JSON' }, + }, + }), + ]); + expect(result).toContain("--config ''"); + expect(result).not.toContain(''); // shouldn't leak the bare type }); }); diff --git a/tests/unit/cli/cli-command-factory.test.ts b/tests/unit/cli/cli-command-factory.test.ts index 830a98c0..9db02c7b 100644 --- a/tests/unit/cli/cli-command-factory.test.ts +++ b/tests/unit/cli/cli-command-factory.test.ts @@ -319,7 +319,18 @@ describe('cliCommandFactory — file-input resolution', () => { const Cmd = createCLICommand(def, coreFn); const cmd = new Cmd([], makeMockConfig() as never); - await expect(cmd.run()).rejects.toThrow('Either --text or --text-file is required'); + // Spec 014: missing-required routes through the structured envelope + // (type:'missing-required'), exits via `this.exit(1)` which throws + // 'EEXIT: 1'. The envelope includes the prose and a hint on stderr. + const logSpy = vi.spyOn(cmd, 'log'); + await expect(cmd.run()).rejects.toThrow(); + const output = JSON.parse(logSpy.mock.calls[0][0] as string) as { + success: boolean; + error: { type: string; flag?: string }; + }; + expect(output.success).toBe(false); + expect(output.error.type).toBe('missing-required'); + expect(output.error.flag).toBe('text'); }); it('handles files with special characters (quotes, backticks, $)', async () => { @@ -406,7 +417,7 @@ describe('cliCommandFactory — JSON output format', () => { expect(output).toEqual({ success: true, data: { id: 'result-1' } }); }); - it('outputs { success: false, error: message } on error', async () => { + it('outputs { success: false, error: { type: "runtime", message } } on error (spec 014 envelope)', async () => { const coreFn = vi.fn().mockRejectedValue(new Error('Something went wrong')); const def = makeToolDef({ parameters: { @@ -417,7 +428,6 @@ describe('cliCommandFactory — JSON output format', () => { const cmd = new Cmd(['--name', 'test'], makeMockConfig() as never); const logSpy = vi.spyOn(cmd, 'log'); - // Should not throw (error is caught internally) but may call this.exit(1) try { await cmd.run(); } catch { @@ -425,11 +435,16 @@ describe('cliCommandFactory — JSON output format', () => { } expect(logSpy).toHaveBeenCalledTimes(1); - const output = JSON.parse(logSpy.mock.calls[0][0] as string); - expect(output).toEqual({ success: false, error: 'Something went wrong' }); + const output = JSON.parse(logSpy.mock.calls[0][0] as string) as { + success: boolean; + error: { type: string; message: string }; + }; + expect(output.success).toBe(false); + expect(output.error.type).toBe('runtime'); + expect(output.error.message).toBe('Something went wrong'); }); - it('handles non-Error throws and outputs error string', async () => { + it('handles non-Error throws and carries the coerced string message (spec 014 envelope)', async () => { const coreFn = vi.fn().mockRejectedValue('string error'); const def = makeToolDef({ parameters: { @@ -446,9 +461,13 @@ describe('cliCommandFactory — JSON output format', () => { // this.exit(1) may throw } - const output = JSON.parse(logSpy.mock.calls[0][0] as string); + const output = JSON.parse(logSpy.mock.calls[0][0] as string) as { + success: boolean; + error: { type: string; message: string }; + }; expect(output.success).toBe(false); - expect(output.error).toBe('string error'); + expect(output.error.type).toBe('runtime'); + expect(output.error.message).toBe('string error'); }); it('includes the description from the ToolDefinition', () => { diff --git a/tests/unit/cli/file-input-flags.test.ts b/tests/unit/cli/file-input-flags.test.ts index 70d40870..d52acda3 100644 --- a/tests/unit/cli/file-input-flags.test.ts +++ b/tests/unit/cli/file-input-flags.test.ts @@ -202,9 +202,17 @@ describe('PostComment --text-file', () => { expect(postComment).toHaveBeenCalledWith('card-1', 'inline text'); }); - it('errors when neither --text nor --text-file is provided', async () => { + it('errors when neither --text nor --text-file is provided (spec 014 envelope)', async () => { const cmd = new PostComment(['--workItemId', 'card-1'], mockConfig as never); - await expect(cmd.run()).rejects.toThrow('Either --text or --text-file is required'); + const logSpy = vi.spyOn(cmd, 'log'); + await expect(cmd.run()).rejects.toThrow(); + const output = JSON.parse(logSpy.mock.calls[0][0] as string) as { + success: boolean; + error: { type: string; flag?: string }; + }; + expect(output.success).toBe(false); + expect(output.error.type).toBe('missing-required'); + expect(output.error.flag).toBe('text'); }); }); @@ -274,9 +282,17 @@ describe('CreatePR --body-file', () => { ); }); - it('errors when neither --body nor --body-file is provided', async () => { + it('errors when neither --body nor --body-file is provided (spec 014 envelope)', async () => { const cmd = new CreatePR(['--title', 'feat: x', '--head', 'feat/branch'], mockConfig as never); - await expect(cmd.run()).rejects.toThrow('Either --body or --body-file is required'); + const logSpy = vi.spyOn(cmd, 'log'); + await expect(cmd.run()).rejects.toThrow(); + const output = JSON.parse(logSpy.mock.calls[0][0] as string) as { + success: boolean; + error: { type: string; flag?: string }; + }; + expect(output.success).toBe(false); + expect(output.error.type).toBe('missing-required'); + expect(output.error.flag).toBe('body'); }); }); @@ -327,8 +343,16 @@ describe('PostPRComment --body-file', () => { expect(postPRComment).toHaveBeenCalledWith('owner', 'repo', 42, 'inline body'); }); - it('errors when neither --body nor --body-file is provided', async () => { + it('errors when neither --body nor --body-file is provided (spec 014 envelope)', async () => { const cmd = new PostPRComment(['--prNumber', '42'], mockConfig as never); - await expect(cmd.run()).rejects.toThrow('Either --body or --body-file is required'); + const logSpy = vi.spyOn(cmd, 'log'); + await expect(cmd.run()).rejects.toThrow(); + const output = JSON.parse(logSpy.mock.calls[0][0] as string) as { + success: boolean; + error: { type: string; flag?: string }; + }; + expect(output.success).toBe(false); + expect(output.error.type).toBe('missing-required'); + expect(output.error.flag).toBe('body'); }); }); diff --git a/tests/unit/cli/scm/create-pr-review-sidecar.test.ts b/tests/unit/cli/scm/create-pr-review-sidecar.test.ts index e433017b..c4a3c32a 100644 --- a/tests/unit/cli/scm/create-pr-review-sidecar.test.ts +++ b/tests/unit/cli/scm/create-pr-review-sidecar.test.ts @@ -126,9 +126,16 @@ describe('CreatePRReviewCommand sidecar write', () => { } as never); await cmd.execute(); - expect(vi.mocked(cmd.log)).toHaveBeenCalledWith( - JSON.stringify({ success: false, error: 'GitHub API error' }), - ); + // Spec 014: runtime failures emit the structured envelope. + const logged = vi.mocked(cmd.log).mock.calls.map((c) => c[0] as string); + const jsonLine = logged.find((l) => l.startsWith('{')) ?? ''; + const output = JSON.parse(jsonLine) as { + success: boolean; + error: { type: string; message: string }; + }; + expect(output.success).toBe(false); + expect(output.error.type).toBe('runtime'); + expect(output.error.message).toBe('GitHub API error'); expect(vi.mocked(cmd.exit)).toHaveBeenCalledWith(1); expect(existsSync(sidecarPath)).toBe(false); }); diff --git a/tests/unit/cli/scm/create-pr-sidecar.test.ts b/tests/unit/cli/scm/create-pr-sidecar.test.ts index 84ee1494..b582a09d 100644 --- a/tests/unit/cli/scm/create-pr-sidecar.test.ts +++ b/tests/unit/cli/scm/create-pr-sidecar.test.ts @@ -145,9 +145,16 @@ describe('CreatePRCommand sidecar write', () => { } as never); await cmd.execute(); - expect(vi.mocked(cmd.log)).toHaveBeenCalledWith( - JSON.stringify({ success: false, error: 'GitHub API error' }), - ); + // Spec 014: runtime failures emit the structured envelope. + const logged = vi.mocked(cmd.log).mock.calls.map((c) => c[0] as string); + const jsonLine = logged.find((l) => l.startsWith('{')) ?? ''; + const output = JSON.parse(jsonLine) as { + success: boolean; + error: { type: string; message: string }; + }; + expect(output.success).toBe(false); + expect(output.error.type).toBe('runtime'); + expect(output.error.message).toBe('GitHub API error'); expect(vi.mocked(cmd.exit)).toHaveBeenCalledWith(1); expect(existsSync(sidecarPath)).toBe(false); }); diff --git a/tests/unit/gadgets/github/definitions.test.ts b/tests/unit/gadgets/github/definitions.test.ts index 33a89db1..e9ba5f00 100644 --- a/tests/unit/gadgets/github/definitions.test.ts +++ b/tests/unit/gadgets/github/definitions.test.ts @@ -268,3 +268,25 @@ describe('GitHub SCM gadget definitions', () => { }); }); }); + +// --------------------------------------------------------------------------- +// Spec 014 plan 2: createPRReviewDef declarative opt-in +// --------------------------------------------------------------------------- + +describe('createPRReviewDef — spec 014 opt-in', () => { + it('comments parameter declares cliAliases: ["comment"]', () => { + const comments = createPRReviewDef.parameters.comments as { + cliAliases?: readonly string[]; + }; + expect(comments.cliAliases).toEqual(['comment']); + }); + + it('cli.fileInputAlternatives includes a comments-file entry with parseAs:"json"', () => { + const alts = createPRReviewDef.cli?.fileInputAlternatives ?? []; + const commentsFile = alts.find((a) => a.paramName === 'comments'); + expect(commentsFile).toBeDefined(); + expect(commentsFile?.fileFlag).toBe('comments-file'); + expect(commentsFile?.parseAs).toBe('json'); + expect(commentsFile?.description).toBeTruthy(); + }); +}); diff --git a/tests/unit/gadgets/shared/errorEnvelope.test.ts b/tests/unit/gadgets/shared/errorEnvelope.test.ts new file mode 100644 index 00000000..138f19dc --- /dev/null +++ b/tests/unit/gadgets/shared/errorEnvelope.test.ts @@ -0,0 +1,191 @@ +/** + * Tests for the shared cascade-tools error envelope helper (spec 014). + * + * The envelope is the single channel every cascade-tools failure emits on: + * - Structured JSON on stdout: `{"success":false,"error":{...}}` + * - One-line prose summary on stderr (for humans running the CLI directly) + * - Exit code 1 + * + * Consumed by cliCommandFactory for flag-parse, JSON-parse, missing-required, + * enum-mismatch, unknown-flag, auth, and runtime failure paths. + */ + +import { Writable } from 'node:stream'; +import { describe, expect, it, vi } from 'vitest'; + +import { + type CliErrorEnvelope, + emitCliError, + truncateGot, +} from '../../../../src/gadgets/shared/errorEnvelope.js'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function makeWritable(): { stream: Writable; written: () => string } { + const chunks: string[] = []; + const stream = new Writable({ + write(chunk, _enc, cb) { + chunks.push(chunk.toString()); + cb(); + }, + }); + return { stream, written: () => chunks.join('') }; +} + +function parseFirstJsonLine(blob: string): { success: boolean; error: CliErrorEnvelope } { + const line = blob.split('\n').find((l) => l.trim().startsWith('{')) ?? ''; + return JSON.parse(line) as { success: boolean; error: CliErrorEnvelope }; +} + +const ESC = String.fromCharCode(0x1b); + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('emitCliError', () => { + it('writes a stable JSON envelope to stdout (success:false + error fields)', () => { + const stdout = makeWritable(); + const stderr = makeWritable(); + const exit = vi.fn<(code: number) => never>(() => { + throw new Error('exit'); + }); + + expect(() => + emitCliError({ + type: 'json-parse', + flag: 'comments', + message: 'invalid JSON payload', + got: "[{'path':'a'}]", + expected: '[{"path":"","line":,"body":""}]', + hint: 'use --comments-file for long payloads', + stdout: stdout.stream, + stderr: stderr.stream, + exit, + }), + ).toThrow('exit'); + + const parsed = parseFirstJsonLine(stdout.written()); + expect(parsed.success).toBe(false); + expect(parsed.error.type).toBe('json-parse'); + expect(parsed.error.flag).toBe('comments'); + expect(parsed.error.message).toBe('invalid JSON payload'); + expect(parsed.error.got).toBe("[{'path':'a'}]"); + expect(parsed.error.expected).toBe('[{"path":"","line":,"body":""}]'); + expect(parsed.error.hint).toBe('use --comments-file for long payloads'); + }); + + it('mirrors a prose summary to stderr (≤120 chars, no ANSI, mentions flag + type)', () => { + const stdout = makeWritable(); + const stderr = makeWritable(); + const exit = vi.fn<(code: number) => never>(() => { + throw new Error('exit'); + }); + + expect(() => + emitCliError({ + type: 'json-parse', + flag: 'comments', + message: 'invalid JSON payload', + stdout: stdout.stream, + stderr: stderr.stream, + exit, + }), + ).toThrow('exit'); + + const err = stderr.written().trim(); + expect(err.length).toBeGreaterThan(0); + expect(err.length).toBeLessThanOrEqual(120); + // No ANSI escape sequences (ESC = 0x1b) + expect(err.indexOf(ESC)).toBe(-1); + // References the flag and the type + expect(err).toContain('--comments'); + expect(err).toContain('json-parse'); + }); + + it('truncates `got` to ~80 chars with an ellipsis when longer', () => { + const stdout = makeWritable(); + const stderr = makeWritable(); + const exit = vi.fn<(code: number) => never>(() => { + throw new Error('exit'); + }); + + const longInput = 'x'.repeat(500); + + expect(() => + emitCliError({ + type: 'json-parse', + flag: 'comments', + message: 'invalid JSON', + got: longInput, + stdout: stdout.stream, + stderr: stderr.stream, + exit, + }), + ).toThrow('exit'); + + const parsed = parseFirstJsonLine(stdout.written()); + expect(parsed.error.got?.length).toBeLessThanOrEqual(83); + expect(parsed.error.got?.endsWith('...')).toBe(true); + }); + + it('omits optional fields (got, expected, hint, example) from the envelope when not provided', () => { + const stdout = makeWritable(); + const stderr = makeWritable(); + const exit = vi.fn<(code: number) => never>(() => { + throw new Error('exit'); + }); + + expect(() => + emitCliError({ + type: 'missing-required', + flag: 'body', + message: '--body is required', + stdout: stdout.stream, + stderr: stderr.stream, + exit, + }), + ).toThrow('exit'); + + const parsed = parseFirstJsonLine(stdout.written()); + expect(parsed.error).not.toHaveProperty('got'); + expect(parsed.error).not.toHaveProperty('expected'); + expect(parsed.error).not.toHaveProperty('hint'); + expect(parsed.error).not.toHaveProperty('example'); + }); + + it('calls exit with code 1', () => { + const stdout = makeWritable(); + const stderr = makeWritable(); + const exit = vi.fn<(code: number) => never>(() => { + throw new Error('exit'); + }); + + expect(() => + emitCliError({ + type: 'runtime', + message: 'boom', + stdout: stdout.stream, + stderr: stderr.stream, + exit, + }), + ).toThrow('exit'); + + expect(exit).toHaveBeenCalledOnce(); + expect(exit).toHaveBeenCalledWith(1); + }); +}); + +describe('truncateGot', () => { + it('returns the input unchanged when short', () => { + expect(truncateGot('hello')).toBe('hello'); + }); + + it('truncates to max+ellipsis when long', () => { + const result = truncateGot('x'.repeat(200), 80); + expect(result.length).toBe(83); + expect(result.endsWith('...')).toBe(true); + }); +}); diff --git a/tests/unit/gadgets/shared/factories.test.ts b/tests/unit/gadgets/shared/factories.test.ts index 79ea8492..c1595181 100644 --- a/tests/unit/gadgets/shared/factories.test.ts +++ b/tests/unit/gadgets/shared/factories.test.ts @@ -561,25 +561,8 @@ describe('createCLICommand', () => { expect(capturedParams.text).toBe('Content from stdin'); }); - it('errors when file-input required param is missing', async () => { - const coreFn: CLICoreFn = async () => 'result'; - const CommandClass = createCLICommand(fileInputToolDef, coreFn); - const instance = new CommandClass([], {}); - - vi.spyOn(instance, 'parse').mockResolvedValue({ - flags: { workItemId: 'card123', text: undefined, 'text-file': undefined }, - args: {}, - argv: [], - raw: [], - } as unknown as Awaited>); - - const errorSpy = vi.spyOn(instance, 'error').mockImplementation((msg: string | Error) => { - throw new Error(typeof msg === 'string' ? msg : msg.message); - }); - - await expect(instance.execute()).rejects.toThrow('Either --text or --text-file is required'); - expect(errorSpy).toHaveBeenCalled(); - }); + // Legacy test removed — replaced by spec-014 envelope assertion in + // 'missing file-input required param routes through envelope (type:"missing-required")' below. it('outputs JSON result on success', async () => { const coreFn: CLICoreFn = async () => ({ id: '456', url: 'https://example.com' }); @@ -626,37 +609,215 @@ describe('createCLICommand', () => { expect(capturedParams.config).toEqual({ key: 'value', count: 5 }); }); - it('errors on invalid JSON for object type params', async () => { - const coreFn: CLICoreFn = async () => 'result'; - const CommandClass = createCLICommand(objectToolDef, coreFn); + // Legacy test removed — replaced by spec-014 envelope assertion in + // 'object param with malformed JSON routes through envelope (type:"json-parse")' below. + + it('skips gadgetOnly params in flag processing', async () => { + let capturedParams: Record = {}; + const coreFn: CLICoreFn = async (params) => { + capturedParams = params as Record; + return 'ok'; + }; + + const CommandClass = createCLICommand(simpleToolDef, coreFn); const instance = new CommandClass([], {}); vi.spyOn(instance, 'parse').mockResolvedValue({ - flags: { config: '{not-valid-json}' }, + flags: { name: 'Alice' }, args: {}, argv: [], raw: [], } as unknown as Awaited>); - vi.spyOn(instance, 'error').mockImplementation((msg: string | Error) => { - throw new Error(typeof msg === 'string' ? msg : msg.message); + vi.spyOn(instance, 'log').mockImplementation(() => {}); + + await instance.execute(); + + // 'comment' (gadgetOnly) should NOT be in capturedParams + expect(capturedParams).not.toHaveProperty('comment'); + expect(capturedParams.name).toBe('Alice'); + }); + + // Legacy test removed — replaced by spec-014 envelope assertion in + // 'coreFn runtime failure routes through the new envelope (type:"runtime")' below. + + it('passes array flags through correctly', async () => { + let capturedParams: Record = {}; + const coreFn: CLICoreFn = async (params) => { + capturedParams = params as Record; + return 'added'; + }; + + const CommandClass = createCLICommand(arrayToolDef, coreFn); + const instance = new CommandClass([], {}); + + vi.spyOn(instance, 'parse').mockResolvedValue({ + flags: { items: ['item1', 'item2', 'item3'] }, + args: {}, + argv: [], + raw: [], + } as unknown as Awaited>); + + vi.spyOn(instance, 'log').mockImplementation(() => {}); + + await instance.execute(); + + expect(capturedParams.items).toEqual(['item1', 'item2', 'item3']); + }); +}); + +// --------------------------------------------------------------------------- +// Spec 014: cliCommandFactory widened behavior (aliases, JSON-parse for +// array-of-object, help examples, structured error envelope). +// --------------------------------------------------------------------------- + +describe('createCLICommand — spec 014 additions', () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + // Test def: single array-of-object param with an alias and a file alternative. + const reviewPRDef: ToolDefinition = { + name: 'TestReviewPR', + description: 'Submit a PR review (test fixture).', + parameters: { + body: { type: 'string', describe: 'Review body', required: true }, + comments: { + type: 'array', + items: 'object', + describe: 'Inline comments', + cliAliases: ['comment'], + optional: true, + }, + }, + examples: [ + { + params: { + body: 'LGTM', + comments: [{ path: 'src/x.ts', line: 1, body: 'nit' }], + }, + comment: 'Request changes with inline', + }, + ], + cli: { + fileInputAlternatives: [ + { + paramName: 'comments', + fileFlag: 'comments-file', + parseAs: 'json', + description: 'Read --comments JSON from file (use - for stdin).', + }, + ], + }, + }; + + it('applies cliAliases onto the generated oclif flag', () => { + const coreFn: CLICoreFn = async () => 'ok'; + const CommandClass = createCLICommand(reviewPRDef, coreFn); + + const commentsFlag = CommandClass.flags.comments as { aliases?: string[] }; + expect(commentsFlag.aliases).toEqual(['comment']); + }); + + it('wires def.examples onto FactoryCommand.examples for oclif --help', () => { + const coreFn: CLICoreFn = async () => 'ok'; + const CommandClass = createCLICommand(reviewPRDef, coreFn); + + const examples = CommandClass.examples as string[] | undefined; + expect(examples).toBeDefined(); + expect(examples?.length ?? 0).toBeGreaterThan(0); + // The JSON shape from the example should appear — serialized double-quoted. + expect(examples?.[0]).toContain('"path":"src/x.ts"'); + }); + + it('array + items:"object" flag value parses as JSON (single string, not repeatable)', async () => { + let capturedParams: Record = {}; + const coreFn: CLICoreFn = async (params) => { + capturedParams = params as Record; + return 'ok'; + }; + + const CommandClass = createCLICommand(reviewPRDef, coreFn); + const instance = new CommandClass([], {}); + + vi.spyOn(instance, 'parse').mockResolvedValue({ + flags: { + body: 'LGTM', + comments: '[{"path":"a","line":1,"body":"b"}]', + }, + args: {}, + argv: [], + raw: [], + } as unknown as Awaited>); + + vi.spyOn(instance, 'log').mockImplementation(() => {}); + + await instance.execute(); + + expect(capturedParams.comments).toEqual([{ path: 'a', line: 1, body: 'b' }]); + }); + + it('array + items:"object" with malformed JSON emits json-parse envelope on stdout', async () => { + const coreFn: CLICoreFn = async () => 'ok'; + const CommandClass = createCLICommand(reviewPRDef, coreFn); + const instance = new CommandClass([], {}); + + vi.spyOn(instance, 'parse').mockResolvedValue({ + flags: { + body: 'LGTM', + comments: "[{'path':'a','line':1,'body':'b'}]", // single-quoted keys — bug from prod run 5d993b04 + }, + args: {}, + argv: [], + raw: [], + } as unknown as Awaited>); + + const logSpy = vi.spyOn(instance, 'log').mockImplementation(() => {}); + vi.spyOn(instance, 'exit').mockImplementation(() => { + throw new Error('exit'); }); - await expect(instance.execute()).rejects.toThrow('--config must be valid JSON'); + await expect(instance.execute()).rejects.toThrow('exit'); + + // The new envelope must surface on this.log (stdout) + const logged = logSpy.mock.calls.map((c) => c[0]).join('\n'); + const jsonLine = logged.split('\n').find((l) => l.startsWith('{')) ?? ''; + const parsed = JSON.parse(jsonLine) as { + success: boolean; + error: { + type: string; + flag?: string; + got?: string; + expected?: string; + hint?: string; + }; + }; + expect(parsed.success).toBe(false); + expect(parsed.error.type).toBe('json-parse'); + expect(parsed.error.flag).toBe('comments'); + expect(parsed.error.got).toContain("[{'path'"); + expect(parsed.error.expected).toContain('"path"'); + expect(parsed.error.hint).toContain('--comments-file'); }); - it('skips gadgetOnly params in flag processing', async () => { + it('file-input with parseAs:"json" JSON-parses the file contents', async () => { + mockReadFileSync.mockReturnValue('[{"path":"a","line":1,"body":"b"}]'); + let capturedParams: Record = {}; const coreFn: CLICoreFn = async (params) => { capturedParams = params as Record; return 'ok'; }; - const CommandClass = createCLICommand(simpleToolDef, coreFn); + const CommandClass = createCLICommand(reviewPRDef, coreFn); const instance = new CommandClass([], {}); vi.spyOn(instance, 'parse').mockResolvedValue({ - flags: { name: 'Alice' }, + flags: { + body: 'LGTM', + 'comments-file': '/tmp/comments.json', + comments: undefined, + }, args: {}, argv: [], raw: [], @@ -666,12 +827,45 @@ describe('createCLICommand', () => { await instance.execute(); - // 'comment' (gadgetOnly) should NOT be in capturedParams - expect(capturedParams).not.toHaveProperty('comment'); - expect(capturedParams.name).toBe('Alice'); + expect(capturedParams.comments).toEqual([{ path: 'a', line: 1, body: 'b' }]); + }); + + it('file-input with parseAs:"json" malformed contents emits json-parse envelope', async () => { + mockReadFileSync.mockReturnValue("[{'path':'a'}]"); + + const coreFn: CLICoreFn = async () => 'ok'; + const CommandClass = createCLICommand(reviewPRDef, coreFn); + const instance = new CommandClass([], {}); + + vi.spyOn(instance, 'parse').mockResolvedValue({ + flags: { + body: 'LGTM', + 'comments-file': '/tmp/comments.json', + comments: undefined, + }, + args: {}, + argv: [], + raw: [], + } as unknown as Awaited>); + + const logSpy = vi.spyOn(instance, 'log').mockImplementation(() => {}); + vi.spyOn(instance, 'exit').mockImplementation(() => { + throw new Error('exit'); + }); + + await expect(instance.execute()).rejects.toThrow('exit'); + + const logged = logSpy.mock.calls.map((c) => c[0]).join('\n'); + const jsonLine = logged.split('\n').find((l) => l.startsWith('{')) ?? ''; + const parsed = JSON.parse(jsonLine) as { + success: boolean; + error: { type: string; flag?: string }; + }; + expect(parsed.error.type).toBe('json-parse'); + expect(parsed.error.flag).toBe('comments'); }); - it('logs { success: false, error } and exits 1 when coreFn throws', async () => { + it('coreFn runtime failure routes through the new envelope (type:"runtime")', async () => { const failingFn: CLICoreFn = async () => { throw new Error('getaddrinfo EAI_AGAIN api.trello.com'); }; @@ -684,39 +878,190 @@ describe('createCLICommand', () => { raw: [], } as unknown as Awaited>); const logSpy = vi.spyOn(instance, 'log').mockImplementation(() => {}); - const exitSpy = vi.spyOn(instance, 'exit').mockImplementation(() => { + vi.spyOn(instance, 'exit').mockImplementation(() => { throw new Error('exit'); }); await expect(instance.execute()).rejects.toThrow('exit'); - expect(logSpy).toHaveBeenCalledWith( - JSON.stringify({ success: false, error: 'getaddrinfo EAI_AGAIN api.trello.com' }), - ); - expect(exitSpy).toHaveBeenCalledWith(1); + const logged = logSpy.mock.calls.map((c) => c[0]).join('\n'); + const jsonLine = logged.split('\n').find((l) => l.startsWith('{')) ?? ''; + const parsed = JSON.parse(jsonLine) as { + success: boolean; + error: { type: string; message: string }; + }; + expect(parsed.success).toBe(false); + expect(parsed.error.type).toBe('runtime'); + expect(parsed.error.message).toBe('getaddrinfo EAI_AGAIN api.trello.com'); }); - it('passes array flags through correctly', async () => { - let capturedParams: Record = {}; - const coreFn: CLICoreFn = async (params) => { - capturedParams = params as Record; - return 'added'; + it('missing file-input required param routes through envelope (type:"missing-required")', async () => { + const coreFn: CLICoreFn = async () => 'result'; + const CommandClass = createCLICommand(fileInputToolDef, coreFn); + const instance = new CommandClass([], {}); + + vi.spyOn(instance, 'parse').mockResolvedValue({ + flags: { workItemId: 'card123', text: undefined, 'text-file': undefined }, + args: {}, + argv: [], + raw: [], + } as unknown as Awaited>); + + const logSpy = vi.spyOn(instance, 'log').mockImplementation(() => {}); + vi.spyOn(instance, 'exit').mockImplementation(() => { + throw new Error('exit'); + }); + + await expect(instance.execute()).rejects.toThrow('exit'); + const logged = logSpy.mock.calls.map((c) => c[0]).join('\n'); + const jsonLine = logged.split('\n').find((l) => l.startsWith('{')) ?? ''; + const parsed = JSON.parse(jsonLine) as { + success: boolean; + error: { type: string; flag?: string }; }; + expect(parsed.error.type).toBe('missing-required'); + expect(parsed.error.flag).toBe('text'); + }); + it('primitive-array (items:"string") still uses multiple:true (regression guard)', () => { + // arrayToolDef has items:'string' → repeatable flag + const coreFn: CLICoreFn = async () => 'ok'; const CommandClass = createCLICommand(arrayToolDef, coreFn); + const itemsFlag = CommandClass.flags.items as { multiple?: boolean }; + expect(itemsFlag.multiple).toBe(true); + }); + + it('unknown flag close to a declared one suggests correction via Levenshtein', async () => { + const coreFn: CLICoreFn = async () => 'ok'; + const CommandClass = createCLICommand(reviewPRDef, coreFn); + const instance = new CommandClass([], {}); + + // Simulate oclif's NonExistentFlagsError shape (constructor name + flags array) + class NonExistentFlagsError extends Error { + public flags: string[]; + constructor(flags: string[]) { + super(`Nonexistent flag: ${flags.join(', ')}`); + this.name = 'CLIParseError'; + this.flags = flags; + } + } + + vi.spyOn(instance, 'parse').mockImplementation(async () => { + throw new NonExistentFlagsError(['comnent']); + }); + + const logSpy = vi.spyOn(instance, 'log').mockImplementation(() => {}); + vi.spyOn(instance, 'exit').mockImplementation(() => { + throw new Error('exit'); + }); + + await expect(instance.execute()).rejects.toThrow('exit'); + const logged = logSpy.mock.calls.map((c) => c[0]).join('\n'); + const jsonLine = logged.split('\n').find((l) => l.startsWith('{')) ?? ''; + const parsed = JSON.parse(jsonLine) as { + error: { type: string; flag?: string; hint?: string }; + }; + expect(parsed.error.type).toBe('unknown-flag'); + expect(parsed.error.flag).toBe('comnent'); + expect(parsed.error.hint).toContain('did you mean'); + expect(parsed.error.hint).toContain('--comments'); + }); + + it('unknown flag far from any declared one emits envelope without suggestion', async () => { + const coreFn: CLICoreFn = async () => 'ok'; + const CommandClass = createCLICommand(reviewPRDef, coreFn); + const instance = new CommandClass([], {}); + + class NonExistentFlagsError extends Error { + public flags: string[]; + constructor(flags: string[]) { + super(`Nonexistent flag: ${flags.join(', ')}`); + this.name = 'CLIParseError'; + this.flags = flags; + } + } + + vi.spyOn(instance, 'parse').mockImplementation(async () => { + throw new NonExistentFlagsError(['zzzzzzzz']); + }); + + const logSpy = vi.spyOn(instance, 'log').mockImplementation(() => {}); + vi.spyOn(instance, 'exit').mockImplementation(() => { + throw new Error('exit'); + }); + + await expect(instance.execute()).rejects.toThrow('exit'); + const logged = logSpy.mock.calls.map((c) => c[0]).join('\n'); + const jsonLine = logged.split('\n').find((l) => l.startsWith('{')) ?? ''; + const parsed = JSON.parse(jsonLine) as { + error: { type: string; hint?: string }; + }; + expect(parsed.error.type).toBe('unknown-flag'); + // No "did you mean" when Levenshtein distance exceeds threshold + expect(parsed.error.hint ?? '').not.toContain('did you mean'); + }); + + it('fuzzy suggestion considers declared aliases but always returns the canonical name', async () => { + const coreFn: CLICoreFn = async () => 'ok'; + const CommandClass = createCLICommand(reviewPRDef, coreFn); + const instance = new CommandClass([], {}); + + class NonExistentFlagsError extends Error { + public flags: string[]; + constructor(flags: string[]) { + super(`Nonexistent flag: ${flags.join(', ')}`); + this.name = 'CLIParseError'; + this.flags = flags; + } + } + + // 'coment' is closer to the alias 'comment' than to the canonical 'comments'. + // The suggestion must still surface the canonical spelling. + vi.spyOn(instance, 'parse').mockImplementation(async () => { + throw new NonExistentFlagsError(['coment']); + }); + + const logSpy = vi.spyOn(instance, 'log').mockImplementation(() => {}); + vi.spyOn(instance, 'exit').mockImplementation(() => { + throw new Error('exit'); + }); + + await expect(instance.execute()).rejects.toThrow('exit'); + const logged = logSpy.mock.calls.map((c) => c[0]).join('\n'); + const jsonLine = logged.split('\n').find((l) => l.startsWith('{')) ?? ''; + const parsed = JSON.parse(jsonLine) as { error: { hint?: string } }; + expect(parsed.error.hint).toContain('--comments'); + expect(parsed.error.hint).not.toContain('--comment?'); + }); + + it('object param with malformed JSON routes through envelope (type:"json-parse")', async () => { + // This replaces the old 'errors on invalid JSON for object type params' assertion + // which checked the legacy `--config must be valid JSON` prose error. + const coreFn: CLICoreFn = async () => 'result'; + const CommandClass = createCLICommand(objectToolDef, coreFn); const instance = new CommandClass([], {}); vi.spyOn(instance, 'parse').mockResolvedValue({ - flags: { items: ['item1', 'item2', 'item3'] }, + flags: { config: '{not-valid-json}' }, args: {}, argv: [], raw: [], } as unknown as Awaited>); - vi.spyOn(instance, 'log').mockImplementation(() => {}); + const logSpy = vi.spyOn(instance, 'log').mockImplementation(() => {}); + vi.spyOn(instance, 'exit').mockImplementation(() => { + throw new Error('exit'); + }); - await instance.execute(); + await expect(instance.execute()).rejects.toThrow('exit'); - expect(capturedParams.items).toEqual(['item1', 'item2', 'item3']); + const logged = logSpy.mock.calls.map((c) => c[0]).join('\n'); + const jsonLine = logged.split('\n').find((l) => l.startsWith('{')) ?? ''; + const parsed = JSON.parse(jsonLine) as { + success: boolean; + error: { type: string; flag?: string }; + }; + expect(parsed.error.type).toBe('json-parse'); + expect(parsed.error.flag).toBe('config'); }); }); @@ -933,3 +1278,132 @@ describe('round-trip consistency', () => { expect(manifest.name).toBe('TestTool'); }); }); + +// --------------------------------------------------------------------------- +// Spec 014: manifest threads items / cliAliases / one example through +// --------------------------------------------------------------------------- + +describe('generateToolManifest — widened fields (spec 014)', () => { + it('threads items from array-of-object parameter into manifest entry', () => { + const def: ToolDefinition = { + name: 'ReviewPR', + description: 'Review a PR.', + parameters: { + comments: { + type: 'array', + items: 'object', + describe: 'Inline comments', + optional: true, + }, + }, + }; + + const manifest = generateToolManifest(def); + const commentsParam = manifest.parameters.comments as { type: string; items?: string }; + expect(commentsParam.items).toBe('object'); + }); + + it('threads items from primitive-array parameter into manifest entry (regression guard)', () => { + const def: ToolDefinition = { + name: 'AddLabels', + description: 'Add labels.', + parameters: { + labels: { + type: 'array', + items: 'string', + describe: 'Labels to add', + required: true, + }, + }, + }; + + const manifest = generateToolManifest(def); + const labelsParam = manifest.parameters.labels as { type: string; items?: string }; + expect(labelsParam.items).toBe('string'); + }); + + it('threads cliAliases into manifest as aliases', () => { + const def: ToolDefinition = { + name: 'ReviewPR', + description: 'Review a PR.', + parameters: { + comments: { + type: 'array', + items: 'object', + describe: 'Inline comments', + cliAliases: ['comment'], + optional: true, + }, + }, + }; + + const manifest = generateToolManifest(def); + const commentsParam = manifest.parameters.comments as { aliases?: readonly string[] }; + expect(commentsParam.aliases).toEqual(['comment']); + }); + + it('attaches first matching example params value to the manifest entry as example', () => { + const def: ToolDefinition = { + name: 'ReviewPR', + description: 'Review a PR.', + parameters: { + body: { type: 'string', describe: 'Body', required: true }, + comments: { + type: 'array', + items: 'object', + describe: 'Inline comments', + optional: true, + }, + }, + examples: [ + { + // First example has no comments → should be skipped when picking for the comments entry + params: { body: 'lgtm' }, + comment: 'Approve with summary only', + }, + { + params: { + body: 'needs work', + comments: [{ path: 'src/x.ts', line: 10, body: 'nit' }], + }, + comment: 'Request changes with inline', + }, + ], + }; + + const manifest = generateToolManifest(def); + const commentsParam = manifest.parameters.comments as { example?: unknown }; + expect(commentsParam.example).toEqual([{ path: 'src/x.ts', line: 10, body: 'nit' }]); + + const bodyParam = manifest.parameters.body as { example?: unknown }; + // First example has body='lgtm' → that's the first match + expect(bodyParam.example).toBe('lgtm'); + }); + + it('omits example field when no examples populate the param', () => { + const def: ToolDefinition = { + name: 'ReviewPR', + description: 'Review a PR.', + parameters: { + comments: { + type: 'array', + items: 'object', + describe: 'Inline comments', + optional: true, + }, + }, + examples: [ + { + params: { + /* no comments here */ + }, + comment: 'no comments example', + }, + ], + }; + + const manifest = generateToolManifest(def); + const commentsParam = manifest.parameters.comments as { example?: unknown }; + expect(commentsParam).not.toHaveProperty('example'); + }); +});