From 12e891040d618f793f2a8cb29c454160c750ffc9 Mon Sep 17 00:00:00 2001 From: Terada Kousuke Date: Wed, 8 Apr 2026 23:59:37 +0900 Subject: [PATCH 1/6] fix(guardrails): address 8 foundation bugs from PR #90 and Issues #140/#141/#142 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug 0-1: Add exit code guards to all git() helper callers to prevent using stdout from failed commands in security decisions. Bug 0-2: Fix team.ts mut() merge regex to use (\s|$) instead of \b, preventing false matches on "git merge-base". Bug 0-3: Create MUTATING_TOOLS constant (edit/write/apply_patch/multiedit) and use it consistently for review_state resets and advisories. Bug 0-4: Verified Python test detector regex already handles subdirectory paths correctly via (^|\/) pattern — no change needed. Bug 0-5: Convert chat.message git fetch from blocking await to fire-and-forget with deferred advisory surfaced on next message. Bug 0-6: Add secExempt pattern to skip .env.example/.sample/.template files from secret material read blocks. Bug 0-7: Add apply_patch to tool.execute.before preflight checks for security files, config files, version baseline, and context budget. Bug 0-8: Tighten mut[] regex patterns: \brm\b → \brm\s+, \bmv\b → \bmv\s+, \bcp\b → \bcp\s+ to reduce false positives from arguments. Closes #140, Closes #141, Closes #142 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../guardrails/profile/plugins/guardrail.ts | 121 +++++++++++------- packages/guardrails/profile/plugins/team.ts | 9 +- 2 files changed, 77 insertions(+), 53 deletions(-) diff --git a/packages/guardrails/profile/plugins/guardrail.ts b/packages/guardrails/profile/plugins/guardrail.ts index 13d9b425b12b..f77c5650e2b5 100644 --- a/packages/guardrails/profile/plugins/guardrail.ts +++ b/packages/guardrails/profile/plugins/guardrail.ts @@ -24,9 +24,9 @@ const cfg = [ ] const mut = [ - /\brm\b/i, - /\bmv\b/i, - /\bcp\b/i, + /\brm\s+/i, + /\bmv\s+/i, + /\bcp\s+/i, /\bchmod\b/i, /\bchown\b/i, /\btouch\b/i, @@ -37,6 +37,8 @@ const mut = [ />/, ] +const MUTATING_TOOLS = new Set(["edit", "write", "apply_patch", "multiedit"]) + const src = new Set([ ".ts", ".tsx", @@ -98,7 +100,10 @@ function rel(root: string, file: string) { return abs.slice(dir.length + 1) } +const secExempt = /\.(example|sample|template)$/i + function has(file: string, list: RegExp[]) { + if (list === sec && secExempt.test(file)) return false return list.some((item) => item.test(file)) } @@ -454,6 +459,7 @@ export default async function guardrail(input: { let branchWarning = "" try { const branchRes = await git(input.worktree, ["branch", "--show-current"]) + if (branchRes.code !== 0) throw new Error("git branch failed") const currentBranch = branchRes.stdout.trim() if (/^(main|master)$/.test(currentBranch)) { branchWarning = `WARNING: on ${currentBranch} branch. Create a feature branch: git checkout -b feat/ develop` @@ -546,35 +552,45 @@ export default async function guardrail(input: { ) => { if (out.message.role !== "user") return const data = await stash(state) - if (flag(data.git_freshness_checked)) return - await mark({ git_freshness_checked: true }) - try { - const proc = Bun.spawn(["git", "-C", input.worktree, "fetch", "--dry-run"], { - stdout: "pipe", - stderr: "pipe", + // Surface deferred git freshness advisory from previous fire-and-forget fetch + const pendingFreshness = str(data.git_freshness_advisory) + if (pendingFreshness) { + out.parts.push({ + id: crypto.randomUUID(), + sessionID: out.message.sessionID, + messageID: out.message.id, + type: "text", + text: pendingFreshness, }) - const fetchResult = await Promise.race([ - Promise.all([ - new Response(proc.stdout).text(), - new Response(proc.stderr).text(), - proc.exited, - ]).then(([stdout, stderr, code]) => ({ stdout, stderr, code })), - Bun.sleep(5000).then(() => { - proc.kill() - return null - }), - ]) - if (fetchResult && fetchResult.code === 0 && (fetchResult.stdout.trim() || fetchResult.stderr.includes("From"))) { - out.parts.push({ - id: crypto.randomUUID(), - sessionID: out.message.sessionID, - messageID: out.message.id, - type: "text", - text: "⚠️ Your branch may be behind origin. Consider running `git pull` before making changes.", - }) - } - } catch { - // git fetch may fail in offline or no-remote scenarios; skip silently + await mark({ git_freshness_advisory: "" }) + } + if (!flag(data.git_freshness_checked)) { + await mark({ git_freshness_checked: true }) + // Fire-and-forget: do not block chat.message handler on slow git fetch + void (async () => { + try { + const proc = Bun.spawn(["git", "-C", input.worktree, "fetch", "--dry-run"], { + stdout: "pipe", + stderr: "pipe", + }) + const fetchResult = await Promise.race([ + Promise.all([ + new Response(proc.stdout).text(), + new Response(proc.stderr).text(), + proc.exited, + ]).then(([stdout, stderr, code]) => ({ stdout, stderr, code })), + Bun.sleep(5000).then(() => { + proc.kill() + return null + }), + ]) + if (fetchResult && fetchResult.code === 0 && (fetchResult.stdout.trim() || fetchResult.stderr.includes("From"))) { + await mark({ git_freshness_advisory: "⚠️ Your branch may be behind origin. Consider running `git pull` before making changes." }) + } + } catch { + // git fetch may fail in offline or no-remote scenarios; skip silently + } + })() } // Branch hygiene: surface stored branch warning from session.created const branchWarn = str(data.branch_warning) @@ -609,21 +625,21 @@ export default async function guardrail(input: { out: { args: Record }, ) => { const file = pick(out.args ?? item.args) - if (file && (item.tool === "read" || item.tool === "edit" || item.tool === "write")) { + if (file && (item.tool === "read" || item.tool === "edit" || item.tool === "write" || item.tool === "apply_patch")) { const err = deny(file, item.tool === "read" ? "read" : "edit") if (err) { await mark({ last_block: item.tool, last_file: rel(input.worktree, file), last_reason: err }) throw new Error(text(err)) } } - if (item.tool === "edit" || item.tool === "write") { + if (item.tool === "edit" || item.tool === "write" || item.tool === "apply_patch") { const err = await version(out.args ?? {}) if (err) { await mark({ last_block: item.tool, last_file: file ? rel(input.worktree, file) : "", last_reason: err }) throw new Error(text(err)) } } - if ((item.tool === "edit" || item.tool === "write") && file && code(file)) { + if ((item.tool === "edit" || item.tool === "write" || item.tool === "apply_patch") && file && code(file)) { const count = await budget() if (count >= 4) { const budgetData = await stash(state) @@ -654,7 +670,9 @@ export default async function guardrail(input: { throw new Error(text(`merge blocked: PR #${prNum} has unresolved CRITICAL=${criticalCount} HIGH=${highCount} review findings`)) } try { - const branch = (await git(input.worktree, ["branch", "--show-current"])).stdout.trim() + const branchResult = await git(input.worktree, ["branch", "--show-current"]) + if (branchResult.code !== 0) throw new Error("git branch failed") + const branch = branchResult.stdout.trim() const tier = /^(ci|chore|docs)\//.test(branch) ? "EXEMPT" : /^fix\//.test(branch) ? "LIGHT" : "FULL" if (tier === "EXEMPT") { @@ -712,7 +730,7 @@ export default async function guardrail(input: { if (/\bgh\s+pr\s+merge\b/i.test(cmd)) { try { const repoRes = await git(input.worktree, ["remote", "get-url", "origin"]) - const repo = repoRes.stdout.trim().replace(/.*github\.com[:/]/, "").replace(/\.git$/, "") + const repo = repoRes.code === 0 ? repoRes.stdout.trim().replace(/.*github\.com[:/]/, "").replace(/\.git$/, "") : "" const prMatch2 = cmd.match(/\bgh\s+pr\s+merge\s+(\d+)/i) const prNum2 = prMatch2 ? prMatch2[1] : "" if (repo && prNum2) { @@ -730,7 +748,8 @@ export default async function guardrail(input: { // [Phase6] Stacked PR rebase gate: warn if child PRs exist and are stale if (/\bgh\s+pr\s+merge\b/i.test(cmd)) { try { - const curBranch = (await git(input.worktree, ["branch", "--show-current"])).stdout.trim() + const curBranchRes = await git(input.worktree, ["branch", "--show-current"]) + const curBranch = curBranchRes.code === 0 ? curBranchRes.stdout.trim() : "" if (curBranch) { const proc3 = Bun.spawn(["gh", "pr", "list", "--base", curBranch, "--json", "number,headRefName", "--jq", ".[].number"], { cwd: input.worktree, stdout: "pipe", stderr: "pipe", @@ -761,7 +780,7 @@ export default async function guardrail(input: { if (!/\bgit\s+push\s+(?:(?:-\w+|--[\w-]+)\s+)*\S+\s+\S+/i.test(cmd)) { try { const result = await git(input.worktree, ["branch", "--show-current"]) - if (result.stdout && protectedBranch.test(result.stdout.trim())) { + if (result.code === 0 && result.stdout && protectedBranch.test(result.stdout.trim())) { throw new Error(text("direct push to protected branch blocked — use a PR workflow")) } } catch (e) { if (String(e).includes("blocked")) throw e } @@ -771,8 +790,9 @@ export default async function guardrail(input: { if (/\bgit\s+(checkout\s+-b|switch\s+-c)\b/i.test(cmd)) { try { const devCheck = await git(input.worktree, ["rev-parse", "--verify", "origin/develop"]) - if (devCheck.stdout.trim()) { - const branch = (await git(input.worktree, ["branch", "--show-current"])).stdout.trim() + if (devCheck.code === 0 && devCheck.stdout.trim()) { + const branchCheck = await git(input.worktree, ["branch", "--show-current"]) + const branch = branchCheck.code === 0 ? branchCheck.stdout.trim() : "" if (/^(main|master)$/.test(branch)) { await mark({ last_block: "bash", last_command: cmd, last_reason: "branch creation from main blocked" }) throw new Error(text("branch creation from main blocked: checkout develop first, then create branch")) @@ -870,6 +890,7 @@ export default async function guardrail(input: { if (/\bgh\s+pr\s+create\b/i.test(cmd)) { try { const diffRes = await git(input.worktree, ["diff", "--name-only", "origin/develop...HEAD"]) + if (diffRes.code !== 0) throw new Error("git diff failed") const changedFiles = diffRes.stdout.trim() const hasInfra = /^(hooks\/|scripts\/)[^/]+\.sh$/m.test(changedFiles) if (hasInfra) { @@ -887,9 +908,10 @@ export default async function guardrail(input: { if (/--base\s+main(\s|$)/i.test(cmd)) { try { const devCheck = await git(input.worktree, ["rev-parse", "--verify", "origin/develop"]) - if (devCheck.stdout.trim()) { + if (devCheck.code === 0 && devCheck.stdout.trim()) { // Allow release PR from develop - const branch = (await git(input.worktree, ["branch", "--show-current"])).stdout.trim() + const branchForPr = await git(input.worktree, ["branch", "--show-current"]) + const branch = branchForPr.code === 0 ? branchForPr.stdout.trim() : "" if (branch !== "develop" && !/--head\s+develop/i.test(cmd)) { await mark({ last_block: "bash", last_command: cmd, last_reason: "PR targeting main when develop exists" }) throw new Error(text("PR targeting main blocked: use --base develop. Release PRs must be from develop branch.")) @@ -1040,7 +1062,7 @@ export default async function guardrail(input: { } } - if ((item.tool === "edit" || item.tool === "write" || item.tool === "apply_patch") && file) { + if (MUTATING_TOOLS.has(item.tool) && file) { const seen = list(data.edited_files) const next = seen.includes(rel(input.worktree, file)) ? seen : [...seen, rel(input.worktree, file)] const nextEditCount = num(data.edit_count) + 1 @@ -1067,7 +1089,7 @@ export default async function guardrail(input: { } // Architecture layer advisory - if ((item.tool === "edit" || item.tool === "write") && file && code(file)) { + if (MUTATING_TOOLS.has(item.tool) && file && code(file)) { const relFile = rel(input.worktree, file) const content = typeof item.args?.content === "string" ? item.args.content : typeof item.args?.newString === "string" ? item.args.newString : "" @@ -1128,7 +1150,7 @@ export default async function guardrail(input: { if (prMatch) { const prNum = prMatch[1] const repoRes = await git(input.worktree, ["remote", "get-url", "origin"]) - const repo = repoRes.stdout.trim().replace(/.*github\.com[:/]/, "").replace(/\.git$/, "") + const repo = repoRes.code === 0 ? repoRes.stdout.trim().replace(/.*github\.com[:/]/, "").replace(/\.git$/, "") : "" if (repo) { const proc = Bun.spawn(["gh", "api", `repos/${repo}/pulls/${prNum}/files`, "--jq", ".[].filename"], { cwd: input.worktree, stdout: "pipe", stderr: "pipe", @@ -1224,10 +1246,11 @@ export default async function guardrail(input: { // [W9] workflow-sync-guard: warn when workflow files differ from main after push if (item.tool === "bash" && /\bgit\s+push\b/i.test(str(item.args?.command))) { try { - const branch = (await git(input.worktree, ["branch", "--show-current"])).stdout.trim() + const wfBranch = await git(input.worktree, ["branch", "--show-current"]) + const branch = wfBranch.code === 0 ? wfBranch.stdout.trim() : "" if (branch && !/^(main|master)$/.test(branch)) { const wfDiff = await git(input.worktree, ["diff", "--name-only", "main..HEAD", "--", ".github/workflows/"]) - const wfFiles = wfDiff.stdout.trim() + const wfFiles = wfDiff.code === 0 ? wfDiff.stdout.trim() : "" if (wfFiles) { out.output += "\n\n⚠️ [WORKFLOW SYNC] .github/workflows/ files differ from main:\n" + wfFiles.split("\n").map((f: string) => " - " + f).join("\n") + @@ -1280,7 +1303,7 @@ export default async function guardrail(input: { } } // Endpoint dataflow advisory: detect API endpoint modifications - if ((item.tool === "edit" || item.tool === "write") && file && code(file)) { + if (MUTATING_TOOLS.has(item.tool) && file && code(file)) { const relFile = rel(input.worktree, file) const content = typeof item.args?.content === "string" ? item.args.content : typeof item.args?.newString === "string" ? item.args.newString : "" @@ -1291,7 +1314,7 @@ export default async function guardrail(input: { } // Doc update scope: remind about related documentation when modifying source - if ((item.tool === "edit" || item.tool === "write") && file && code(file)) { + if (MUTATING_TOOLS.has(item.tool) && file && code(file)) { const relFile = rel(input.worktree, file) const editsSinceDocCheck = num(data.edits_since_doc_reminder) if (editsSinceDocCheck >= 5) { diff --git a/packages/guardrails/profile/plugins/team.ts b/packages/guardrails/profile/plugins/team.ts index 2daf62b7b177..b27eec60a1d4 100644 --- a/packages/guardrails/profile/plugins/team.ts +++ b/packages/guardrails/profile/plugins/team.ts @@ -148,9 +148,9 @@ function scrub(cmd: string) { function mut(cmd: string) { const data = scrub(cmd) return [ - /\brm\b/i, - /\bmv\b/i, - /\bcp\b/i, + /\brm\s+/i, + /\bmv\s+/i, + /\bcp\s+/i, /\bchmod\b/i, /\bchown\b/i, /\btouch\b/i, @@ -159,7 +159,8 @@ function mut(cmd: string) { /\bsed\s+-i\b/i, /\bperl\s+-pi\b/i, />/, - /\bgit\s+(apply|am|merge|rebase|cherry-pick|checkout\s+--|reset\s+--hard)\b/i, + /\bgit\s+(apply|am|rebase|cherry-pick|checkout\s+--|reset\s+--hard)\b/i, + /\bgit\s+merge(\s|$)/i, ].some((item) => item.test(data)) } From d586d094fce05639f130a259ace0ce74a4cd8c1c Mon Sep 17 00:00:00 2001 From: Terada Kousuke Date: Thu, 9 Apr 2026 00:16:47 +0900 Subject: [PATCH 2/6] feat(guardrails): ship agent + delegation fix + worktree auto-commit - Add ship agent with gh pr merge permission for /ship command - Rewrite /ship from read-only gate to executable merge workflow - Fix delegation worktree empty init (Issue #144): verify files after creation - Inject working directory path into child session prompts - Auto-commit applied worker changes in merge() to prevent uncommitted drift - Add ship agent to AGENTS.md subagents table Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/guardrails/profile/AGENTS.md | 1 + packages/guardrails/profile/agents/ship.md | 41 ++++++++++++++++++++ packages/guardrails/profile/commands/ship.md | 35 ++++++++++------- packages/guardrails/profile/plugins/team.ts | 17 ++++++++ 4 files changed, 80 insertions(+), 14 deletions(-) create mode 100644 packages/guardrails/profile/agents/ship.md diff --git a/packages/guardrails/profile/AGENTS.md b/packages/guardrails/profile/AGENTS.md index 5a8dd04525d3..be7762266f44 100644 --- a/packages/guardrails/profile/AGENTS.md +++ b/packages/guardrails/profile/AGENTS.md @@ -84,6 +84,7 @@ | `investigate` | `/investigate`, debugging tasks | Deep exploration subagent. Reads code, traces data flow, and identifies root causes without edits. | | `security` | `/review` (security scope), OWASP checks | Security-focused review subagent. Scans for OWASP Top 10 vulnerabilities, credential leaks, and injection risks. | | `code-reviewer` | `/review`, PR review pipeline | Read-only review agent. Analyzes diffs for quality, correctness, and style issues. | +| `ship` | `/ship` command | Ship agent for merge execution. Verifies all merge gates (CI, review, no CHANGES_REQUESTED) and executes `gh pr merge`. Write-restricted except for merge commands. | | `terraform-engineer` | Infrastructure-as-code tasks | Terraform specialist for module design, state management, and multi-cloud provisioning. Write-capable with safe Terraform CLI commands only. | | `cloud-architect` | Architecture design, Well-Architected reviews | Read-only cloud architecture analyst for system design, scalability, and compliance. | | `deployment-engineer` | CI/CD pipeline, container deployments | Write-capable deployment specialist for zero-downtime releases with Docker and Kubernetes read commands. | diff --git a/packages/guardrails/profile/agents/ship.md b/packages/guardrails/profile/agents/ship.md new file mode 100644 index 000000000000..50181185225a --- /dev/null +++ b/packages/guardrails/profile/agents/ship.md @@ -0,0 +1,41 @@ +--- +description: Ship agent with merge capability for the /ship command pipeline. +mode: subagent +permission: + question: allow + bash: + "*": ask + "git status*": allow + "git diff*": allow + "git log*": allow + "git show*": allow + "git branch*": allow + "gh pr checks*": allow + "gh pr view*": allow + "gh pr merge*": allow + "gh pr list*": allow + "gh api *": ask + "rm -rf *": deny + "sudo *": deny + "git checkout -- *": deny + "git push --force*": deny + "git reset --hard*": deny + read: + "*": allow + "*.env*": deny + edit: + "*": deny + write: + "*": deny +--- + +Ship agent for the /ship command pipeline. Verifies all merge gates and executes `gh pr merge`. + +The guardrail plugin enforces merge gates at the tool level: +- review_state must be "done" (set by /review command) +- CI checks must be green (gh pr checks) +- No unresolved CRITICAL/HIGH review findings +- No CHANGES_REQUESTED reviews + +Execute the merge only after programmatically verifying all gates. +Do NOT skip gate verification. If any gate fails, report the failure with evidence. diff --git a/packages/guardrails/profile/commands/ship.md b/packages/guardrails/profile/commands/ship.md index f94e1e94e84c..9a7fcc36c3df 100644 --- a/packages/guardrails/profile/commands/ship.md +++ b/packages/guardrails/profile/commands/ship.md @@ -1,23 +1,30 @@ --- -description: Run a release-readiness gate without edit access. -agent: review +description: Execute the merge workflow after all gates pass. +agent: ship subtask: true --- -Run a release-readiness check for the current work. +Execute the ship workflow for the current work: -Required gates: +1. Identify the current PR: + - Run `gh pr list --head $(git branch --show-current)` to find the PR + - If no PR exists, report "No PR found for current branch" -- the scope still matches the requested goal -- relevant verification has been run and cited -- risky shell or write operations did not bypass policy -- remaining approvals, CI, provider, or review gates are listed explicitly +2. Verify all gates: + - Run `gh pr checks ` — all checks must pass + - Run `gh pr view --json reviews` — no CHANGES_REQUESTED + - Verify the guardrail review_state (the plugin enforces this at merge time) -Output: +3. If all gates pass: + - Run `gh pr merge --merge` + - Verify the merge succeeded with `gh pr view --json state` + - Report: "PR # merged successfully" -- Ready or Not ready -- Evidence -- Blocking gates -- Next action +4. If any gate fails: + - List each failing gate with evidence (CI output, review status) + - Suggest specific remediation for each failure + - Do NOT attempt to merge -Default scope is the current uncommitted work unless `$ARGUMENTS` narrows it. +Default scope is the current branch unless $ARGUMENTS specifies a PR number. + +$ARGUMENTS diff --git a/packages/guardrails/profile/plugins/team.ts b/packages/guardrails/profile/plugins/team.ts index b27eec60a1d4..e4bf8499b7fd 100644 --- a/packages/guardrails/profile/plugins/team.ts +++ b/packages/guardrails/profile/plugins/team.ts @@ -277,6 +277,12 @@ async function yardadd(dir: string, id: string) { } const made = await git(dir, ["worktree", "add", "--detach", next, "HEAD"]) if (made.code !== 0) throw new Error(made.err || made.out || "Failed to create git worktree") + // Verify worktree actually has files (Issue #144: empty git init) + const files = await git(next, ["ls-files", "--cached"]).catch(() => ({ code: 1, out: "", err: "" })) + if (files.code !== 0 || !files.out.trim()) { + // Worktree might be empty — force checkout HEAD contents + await git(next, ["checkout", "HEAD", "--", "."]) + } return next } @@ -312,6 +318,17 @@ async function merge(dir: string, item: string, run: string, id: string) { verification.issues.push(`${untracked} untracked files after merge — check for stale artifacts`) } } catch { /* verification is advisory */ } + // Auto-commit applied changes so they are not left uncommitted in the parent worktree. + // Without this, patched changes remain as unstaged modifications — the root cause of + // "worktree changes returned but not committed" (Issue #144, Claude Code Agent parity gap). + try { + await git(dir, ["add", "-A"]) + const commitMsg = `chore(team): apply worker changes from task ${id}` + const commit = await git(dir, ["commit", "-m", commitMsg, "--no-verify"]) + if (commit.code !== 0 && !commit.err.includes("nothing to commit")) { + verification.issues.push(`Auto-commit failed: ${commit.err || commit.out}`) + } + } catch { /* auto-commit is best-effort; parent session can still commit manually */ } await yardrm(dir, item) return { patch: next, merged: true, verification } } From 03e5f847cfdc7649533f21bef6f6f63f86b27a8f Mon Sep 17 00:00:00 2001 From: Terada Kousuke Date: Thu, 9 Apr 2026 00:32:25 +0900 Subject: [PATCH 3/6] feat(guardrails): add ship agent with merge capability + rewrite /ship command Add a dedicated ship agent with `gh pr merge` permission, replacing the read-only review agent for the /ship command. The ship agent uses deny-by-default permissions with explicit allowlists for git read commands and gh PR operations, enabling OpenCode to actually execute merges instead of saying "please merge manually." - New: packages/guardrails/profile/agents/ship.md (deny-by-default, gh pr merge allowed) - Rewrite: /ship command from read-only gate check to executable merge workflow - Update: AGENTS.md subagents table with ship agent entry - Update: test replay fixture and assertion to use ship agent Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/guardrails/profile/AGENTS.md | 1 + packages/guardrails/profile/agents/ship.md | 42 +++++++++++++++++++ packages/guardrails/profile/commands/ship.md | 35 +++++++++------- .../opencode/test/scenario/guardrails.test.ts | 2 +- packages/opencode/test/scenario/replay.ts | 14 +++---- 5 files changed, 72 insertions(+), 22 deletions(-) create mode 100644 packages/guardrails/profile/agents/ship.md diff --git a/packages/guardrails/profile/AGENTS.md b/packages/guardrails/profile/AGENTS.md index 5a8dd04525d3..be7762266f44 100644 --- a/packages/guardrails/profile/AGENTS.md +++ b/packages/guardrails/profile/AGENTS.md @@ -84,6 +84,7 @@ | `investigate` | `/investigate`, debugging tasks | Deep exploration subagent. Reads code, traces data flow, and identifies root causes without edits. | | `security` | `/review` (security scope), OWASP checks | Security-focused review subagent. Scans for OWASP Top 10 vulnerabilities, credential leaks, and injection risks. | | `code-reviewer` | `/review`, PR review pipeline | Read-only review agent. Analyzes diffs for quality, correctness, and style issues. | +| `ship` | `/ship` command | Ship agent for merge execution. Verifies all merge gates (CI, review, no CHANGES_REQUESTED) and executes `gh pr merge`. Write-restricted except for merge commands. | | `terraform-engineer` | Infrastructure-as-code tasks | Terraform specialist for module design, state management, and multi-cloud provisioning. Write-capable with safe Terraform CLI commands only. | | `cloud-architect` | Architecture design, Well-Architected reviews | Read-only cloud architecture analyst for system design, scalability, and compliance. | | `deployment-engineer` | CI/CD pipeline, container deployments | Write-capable deployment specialist for zero-downtime releases with Docker and Kubernetes read commands. | diff --git a/packages/guardrails/profile/agents/ship.md b/packages/guardrails/profile/agents/ship.md new file mode 100644 index 000000000000..93e8a8f8bc03 --- /dev/null +++ b/packages/guardrails/profile/agents/ship.md @@ -0,0 +1,42 @@ +--- +description: Ship agent with merge capability for the /ship command pipeline. +mode: subagent +permission: + "*": deny + read: allow + grep: allow + glob: allow + list: allow + bash: + "*": deny + "git status*": allow + "git diff*": allow + "git log*": allow + "git show*": allow + "git branch*": allow + "gh pr checks*": allow + "gh pr view*": allow + "gh pr merge*": allow + "gh pr list*": allow + "gh api *": ask + "rm -rf *": deny + "sudo *": deny + "git checkout -- *": deny + "git push --force*": deny + "git reset --hard*": deny + edit: + "*": deny + write: + "*": deny +--- + +Ship agent for the /ship command pipeline. Verifies all merge gates and executes `gh pr merge`. + +The guardrail plugin enforces merge gates at the tool level: +- review_state must be "done" (set by /review command) +- CI checks must be green (gh pr checks) +- No unresolved CRITICAL/HIGH review findings +- No CHANGES_REQUESTED reviews + +Execute the merge only after programmatically verifying all gates. +Do NOT skip gate verification. If any gate fails, report the failure with evidence. diff --git a/packages/guardrails/profile/commands/ship.md b/packages/guardrails/profile/commands/ship.md index f94e1e94e84c..9a7fcc36c3df 100644 --- a/packages/guardrails/profile/commands/ship.md +++ b/packages/guardrails/profile/commands/ship.md @@ -1,23 +1,30 @@ --- -description: Run a release-readiness gate without edit access. -agent: review +description: Execute the merge workflow after all gates pass. +agent: ship subtask: true --- -Run a release-readiness check for the current work. +Execute the ship workflow for the current work: -Required gates: +1. Identify the current PR: + - Run `gh pr list --head $(git branch --show-current)` to find the PR + - If no PR exists, report "No PR found for current branch" -- the scope still matches the requested goal -- relevant verification has been run and cited -- risky shell or write operations did not bypass policy -- remaining approvals, CI, provider, or review gates are listed explicitly +2. Verify all gates: + - Run `gh pr checks ` — all checks must pass + - Run `gh pr view --json reviews` — no CHANGES_REQUESTED + - Verify the guardrail review_state (the plugin enforces this at merge time) -Output: +3. If all gates pass: + - Run `gh pr merge --merge` + - Verify the merge succeeded with `gh pr view --json state` + - Report: "PR # merged successfully" -- Ready or Not ready -- Evidence -- Blocking gates -- Next action +4. If any gate fails: + - List each failing gate with evidence (CI output, review status) + - Suggest specific remediation for each failure + - Do NOT attempt to merge -Default scope is the current uncommitted work unless `$ARGUMENTS` narrows it. +Default scope is the current branch unless $ARGUMENTS specifies a PR number. + +$ARGUMENTS diff --git a/packages/opencode/test/scenario/guardrails.test.ts b/packages/opencode/test/scenario/guardrails.test.ts index de3909247cdc..2db03cc1c8be 100644 --- a/packages/opencode/test/scenario/guardrails.test.ts +++ b/packages/opencode/test/scenario/guardrails.test.ts @@ -256,7 +256,7 @@ Use the project-local command. expect(map.implement?.agent).toBe("implement") expect(map.review?.agent).toBe("review") expect(map.review?.subtask).toBe(true) - expect(map.ship?.agent).toBe("review") + expect(map.ship?.agent).toBe("ship") expect(map.ship?.subtask).toBe(true) expect(map.handoff?.agent).toBe("review") expect(map.handoff?.subtask).toBe(true) diff --git a/packages/opencode/test/scenario/replay.ts b/packages/opencode/test/scenario/replay.ts index 5896699a80bb..2c6c54bc1d81 100644 --- a/packages/opencode/test/scenario/replay.ts +++ b/packages/opencode/test/scenario/replay.ts @@ -99,14 +99,14 @@ export const replays = { command: "ship", args: "Check the current work only.", mode: "subtask", - agent: "review", + agent: "ship", model: "openai/gpt-5-mini", models: ["gpt-5-mini", "gpt-5-mini"], prompt: [ - "Run a release-readiness check for the current work.", - "Ready or Not ready", - "Blocking gates", - "Next action", + "Execute the ship workflow for the current work:", + "Identify the current PR", + "Verify all gates", + "gh pr merge", ], guard: [ "Guardrail runtime state:", @@ -134,8 +134,8 @@ export const replays = { name: "task", input: { description: "ship current work", - prompt: "Run a release-readiness check for the current work.", - subagent_type: "review", + prompt: "Execute the ship workflow for the current work:", + subagent_type: "ship", command: "ship", }, }, From ce74fe3d1445dce69fa9a264e4c3a298b9e95082 Mon Sep 17 00:00:00 2001 From: Terada Kousuke Date: Thu, 9 Apr 2026 00:45:15 +0900 Subject: [PATCH 4/6] feat(guardrails): workflow orchestration engine + auto-review + issue management Add autonomous pipeline support to guardrail.ts: - Client type for session API (create, prompt, status, messages, abort) - Workflow state tracking (phase, PR URL, review attempts, issues) - Auto-review trigger on session.idle (3+ edits, review pending) - pollIdle/readResult/parseFindings helpers for review pipeline - checklist() gate for merge readiness (tests, review, CI, findings) - Workflow phase transitions in tool.execute.after (PR/test/merge/CI/issue) - experimental.chat.system.transform hook for pipeline guidance - command.execute.before: /implement and /auto workflow initialization - Compacting context includes workflow phase, PR, review attempts - New /auto command for full autonomous pipeline execution Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/guardrails/profile/commands/auto.md | 19 ++ .../guardrails/profile/plugins/guardrail.ts | 242 +++++++++++++++++- 2 files changed, 260 insertions(+), 1 deletion(-) create mode 100644 packages/guardrails/profile/commands/auto.md diff --git a/packages/guardrails/profile/commands/auto.md b/packages/guardrails/profile/commands/auto.md new file mode 100644 index 000000000000..8737a964e5a2 --- /dev/null +++ b/packages/guardrails/profile/commands/auto.md @@ -0,0 +1,19 @@ +--- +description: Run the full autonomous pipeline on a task. +agent: implement +--- + +Execute the complete workflow for the requested change: + +1. Plan the implementation (if complex, use /delegate for parallelization) +2. Implement the change with tests +3. Create a PR with proper branch naming and conventional commits +4. Run /review and fix all CRITICAL/HIGH findings (max 3 review cycles) +5. Run /ship to verify gates and execute merge +6. Create follow-up issues for any discovered out-of-scope problems +7. Verify completion: implementation + tests + docs + review + merge + +Do NOT stop until all steps are complete or a hard blocker is encountered. +Report blockers explicitly with evidence. + +$ARGUMENTS diff --git a/packages/guardrails/profile/plugins/guardrail.ts b/packages/guardrails/profile/plugins/guardrail.ts index 13d9b425b12b..5ecc7f2abf9e 100644 --- a/packages/guardrails/profile/plugins/guardrail.ts +++ b/packages/guardrails/profile/plugins/guardrail.ts @@ -215,7 +215,36 @@ function cmp(left: string, right: string) { return a[2] - b[2] } +type Client = { + session: { + create(input: { body: { parentID: string; title: string }; query: { directory: string } }): Promise<{ data: { id: string } }> + promptAsync(input: { + path: { id: string } + query: { directory: string } + body: { + agent?: string + model?: { providerID: string; modelID: string } + tools?: Record + variant?: string + parts: { type: "text"; text: string }[] + } + }): Promise + prompt(input: { + path: { id: string } + query: { directory: string } + body: { + noReply?: boolean + parts: { type: "text"; text: string }[] + } + }): Promise + status(input: { query: { directory: string } }): Promise<{ data?: Record }> + messages(input: { path: { id: string }; query: { directory: string } }): Promise<{ data?: Array<{ info: { role: string; error?: { data?: { message?: string } } }; parts: Array<{ type?: string; text?: string }> }> }> + abort(input: { path: { id: string }; query: { directory: string } }): Promise + } +} + export default async function guardrail(input: { + client: Client directory: string worktree: string }, opts?: Record) { @@ -424,6 +453,97 @@ export default async function guardrail(input: { if (denyPreview && preview(data.model ?? {})) return `${provider}/${model || "unknown"} is preview-only` } + // --- Auto-review pipeline (models team.ts idle/snap pattern) --- + const REVIEW_POLL_GAP = 750 + + async function pollIdle(sessionID: string) { + for (;;) { + const stat = await input.client.session.status({ query: { directory: input.directory } }) + const item = stat.data?.[sessionID] + if (!item || item.type === "idle") return + await Bun.sleep(REVIEW_POLL_GAP) + } + } + + async function readResult(sessionID: string) { + const msgs = await input.client.session.messages({ path: { id: sessionID }, query: { directory: input.directory } }) + const msg = [...(msgs.data ?? [])].reverse().find((m) => m.info.role === "assistant") + if (!msg) return { text: "", error: "" } + const txt = msg.parts.filter((p) => p.type === "text").map((p) => p.text ?? "").join("\n") + const err = msg.info.error?.data?.message ?? "" + return { text: txt.slice(0, 4000), error: err } + } + + function parseFindings(raw: string) { + const critical = (raw.match(/CRITICAL/gi) ?? []).length + const high = (raw.match(/\bHIGH\b/gi) ?? []).length + const medium = (raw.match(/\bMEDIUM\b/gi) ?? []).length + const low = (raw.match(/\bLOW\b/gi) ?? []).length + return { critical, high, medium, low, total: critical + high + medium + low } + } + + async function autoReview(parentSession: string, data: Record) { + const made = await input.client.session.create({ + body: { parentID: parentSession, title: "Auto-review" }, + query: { directory: input.directory }, + }) + await input.client.session.promptAsync({ + path: { id: made.data.id }, + query: { directory: input.directory }, + body: { + agent: "code-reviewer", + tools: { edit: false, write: false, apply_patch: false, multiedit: false }, + parts: [{ + type: "text", + text: `Review the current working directory changes for quality, correctness, and security.\nEdited files: ${list(data.edited_files).join(", ") || "unknown"}\nEdit count: ${num(data.edit_count)}\nReport findings as CRITICAL, HIGH, MEDIUM, or LOW.`, + }], + }, + }) + await pollIdle(made.data.id) + const result = await readResult(made.data.id) + const findings = parseFindings(result.text) + await mark({ + auto_review_in_progress: false, + auto_review_session: made.data.id, + review_state: "done", + reviewed: true, + review_at: new Date().toISOString(), + edits_since_review: 0, + review_critical_count: findings.critical, + review_high_count: findings.high, + }) + await seen("auto_review.completed", { findings: findings.total, critical: findings.critical, high: findings.high }) + if (findings.critical > 0 || findings.high > 0) { + await input.client.session.prompt({ + path: { id: parentSession }, + query: { directory: input.directory }, + body: { + noReply: true, + parts: [{ + type: "text", + text: `[Auto-review] CRITICAL=${findings.critical} HIGH=${findings.high}. Fix findings before merging.\n\n${result.text.slice(0, 2000)}`, + }], + }, + }) + await mark({ workflow_phase: "fixing" }) + } + } + + function checklist(data: Record) { + const items = [ + { name: "tests_pass", pass: flag(data.tests_executed) }, + { name: "review_fresh", pass: str(data.review_state) === "done" && num(data.edits_since_review) === 0 }, + { name: "ci_green", pass: flag(data.ci_green) }, + { name: "no_critical", pass: num(data.review_critical_count) === 0 && num(data.review_high_count) === 0 }, + ] + return { + score: items.filter((i) => i.pass).length, + total: items.length, + blocking: items.filter((i) => !i.pass).map((i) => i.name), + summary: items.map((i) => `[${i.pass ? "x" : " "}] ${i.name}`).join(", "), + } + } + return { config: async (cfg: { provider?: Record @@ -491,6 +611,16 @@ export default async function guardrail(input: { last_merge_at: "", issue_verification_done: false, edits_since_doc_reminder: 0, + // Workflow orchestration state + workflow_phase: "idle", + workflow_review_attempts: 0, + workflow_pr_url: "", + workflow_issues_created: [], + auto_review_in_progress: false, + auto_review_session: "", + review_critical_count: 0, + review_high_count: 0, + ci_green: false, // [W9] auto-init-permissions: detected stacks detected_stacks: stacks, // [W9] enforce-branch-workflow: branch status @@ -518,6 +648,22 @@ export default async function guardrail(input: { last_event: event.type, }) } + if (event.type === "session.idle") { + const data = await stash(state) + const edits = num(data.edit_count) + const pending = str(data.review_state) !== "done" + const inProgress = flag(data.auto_review_in_progress) + const sessionID = str(event.properties?.sessionID) + + if (edits >= 3 && pending && !inProgress && sessionID) { + await mark({ auto_review_in_progress: true }) + await seen("auto_review.triggered", { edit_count: edits, sessionID }) + void autoReview(sessionID, data).catch(async (err) => { + await mark({ auto_review_in_progress: false }) + await seen("auto_review.failed", { error: String(err) }) + }) + } + } if (event.type === "session.compacted") { await mark({ last_compacted: event.properties?.sessionID, @@ -682,6 +828,13 @@ export default async function guardrail(input: { } } } + // Workflow checklist gate for merge commands + if (/\bgit\s+merge(\s|$)/i.test(cmd) || /\bgh\s+pr\s+merge(\s|$)/i.test(cmd)) { + const checks = checklist(bashData) + if (checks.score < 3) { + out.output = (out.output || "") + `\n\nCompletion checklist (${checks.score}/${checks.total}): ${checks.summary}\nBlocking: ${checks.blocking.join(", ")}` + } + } // CI hard block: verify all checks are green before gh pr merge if (/\bgh\s+pr\s+merge\b/i.test(cmd)) { try { @@ -1332,6 +1485,58 @@ export default async function guardrail(input: { } } + // Workflow phase transitions (auto-pipeline orchestration) + if (item.tool === "bash") { + const cmd = str(item.args?.command) + const output = str(out.output) + + // PR creation detection + if (/\bgh\s+pr\s+create\b/.test(cmd) && output.includes("github.com")) { + const prUrl = output.trim().match(/https:\/\/github\.com\/[^\s]+/) + if (prUrl) { + await mark({ workflow_phase: "testing", workflow_pr_url: prUrl[0] }) + await seen("workflow.pr_created", { url: prUrl[0] }) + out.output += "\n\n[WORKFLOW] PR created. Next: run tests, then /review, then /ship." + } + } + + // Test execution detection + if (/\b(bun\s+test|pytest|go\s+test|npm\s+test|vitest|jest)\b/.test(cmd)) { + const testExit = out.metadata?.exitCode ?? (output.includes("FAIL") ? 1 : 0) + if (testExit === 0 && !output.includes("FAIL")) { + await mark({ workflow_phase: "reviewing", tests_executed: true }) + await seen("workflow.tests_passed", {}) + out.output += "\n\n[WORKFLOW] Tests passed. Next: run /review." + } + } + + // Merge detection + if (/\bgh\s+pr\s+merge\b/.test(cmd) && !output.includes("error") && !output.includes("failed")) { + await mark({ workflow_phase: "done" }) + await seen("workflow.merged", {}) + out.output += "\n\n[WORKFLOW] Merge complete. Create follow-up issues for any discovered problems." + } + + // CI check detection + if (/\bgh\s+pr\s+checks\b/.test(cmd)) { + const allPass = !output.includes("fail") && !output.includes("pending") + if (allPass) { + await mark({ ci_green: true }) + } + } + + // Issue creation tracking + if (/\bgh\s+issue\s+create\b/.test(cmd) && output.includes("github.com")) { + const issueUrl = output.trim().match(/https:\/\/github\.com\/[^\s]+/) + if (issueUrl) { + const created = list(data.workflow_issues_created) + created.push(issueUrl[0]) + await mark({ workflow_issues_created: created }) + await seen("workflow.issue_created", { url: issueUrl[0] }) + } + } + } + // [NEW] audit-docker-build-args: surface warning if (item.tool === "bash" && /\bdocker\s+build\b/i.test(str(item.args?.command))) { const freshData = await stash(state) @@ -1406,7 +1611,22 @@ export default async function guardrail(input: { }[] }, ) => { - if (!["review", "ship", "handoff"].includes(item.command)) return + // Workflow initialization for /implement and /auto commands + if (item.command === "implement" || item.command === "auto") { + await mark({ workflow_phase: "implementing", workflow_review_attempts: 0 }) + await seen("workflow.started", { command: item.command }) + const wfPart = out.parts.find((p) => p.type === "subtask" && typeof p.prompt === "string") + if (wfPart) { + wfPart.prompt = (wfPart.prompt || "") + "\n\nAfter implementation:\n" + + "1. Run relevant tests (bun test / pytest / go test)\n" + + "2. Create a PR with proper branch naming\n" + + "3. Run /review and address CRITICAL/HIGH findings (max 3 cycles)\n" + + "4. Run /ship to merge\n" + + "5. Create follow-up issues for out-of-scope problems\n" + + "Do NOT stop until the pipeline completes or a hard blocker is hit." + } + } + if (!["review", "ship", "handoff", "implement", "auto"].includes(item.command)) return const data = await stash(state) const part = out.parts.find((item) => item.type === "subtask" && typeof item.prompt === "string") if (!part?.prompt) return @@ -1538,6 +1758,10 @@ export default async function guardrail(input: { `Edit/write count: ${num(data.edit_count)}.`, `Fact-check state: ${factLine(data)}.`, `Review state: ${reviewLine(data)}.`, + `Workflow phase: ${str(data.workflow_phase) || "idle"}.`, + `Workflow PR: ${str(data.workflow_pr_url) || "none"}.`, + `Review attempts: ${num(data.workflow_review_attempts)}.`, + `Auto-review: ${flag(data.auto_review_in_progress) ? "in-progress" : "idle"}.`, `Active tasks: ${num(data.active_task_count)} (Map entries: ${Object.keys(json(data.active_tasks)).length}).`, `LLM calls: ${num(data.llm_call_count)} (by provider: ${JSON.stringify(json(data.llm_calls_by_provider))}).`, `Providers used: ${list(data.session_providers).join(", ") || "none"}.`, @@ -1546,5 +1770,21 @@ export default async function guardrail(input: { ].join(" "), ) }, + "experimental.chat.system.transform": async ( + _item: {}, + out: { system: string[] }, + ) => { + const data = await stash(state) + const phase = str(data.workflow_phase) + if (phase && phase !== "idle" && phase !== "done") { + out.system.push( + `[WORKFLOW] Phase: ${phase}. Pipeline: implement→test→review→fix→ship. Complete autonomously. Do not stop at PR creation.` + ) + } + // Follow-up issue guidance + out.system.push( + "When you discover problems outside the current scope, create follow-up issues: `gh issue create --title '' --body '
' --label 'tech-debt'`. Do NOT fix out-of-scope problems inline." + ) + }, } } From 8e99e6c823d2edafe4e8e85e04498155ddaaad02 Mon Sep 17 00:00:00 2001 From: Terada Kousuke Date: Thu, 9 Apr 2026 00:53:38 +0900 Subject: [PATCH 5/6] =?UTF-8?q?fix(guardrails):=20address=20Codex=20CLI=20?= =?UTF-8?q?review=20=E2=80=94=202=20P1=20+=201=20P2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - parseFindings(): skip negation lines ("No CRITICAL issues") to avoid false positive severity counts blocking clean reviews - autoReview(): do not mark review_state="done" when session errored or returned empty — prevents merge gate bypass on failed reviews - team.ts merge(): scope auto-commit to patch-applied files only instead of git add -A, preventing unrelated local edits from being swept in Codex review: codex review --base dev (3 findings, all addressed) Co-Authored-By: Claude Opus 4.6 (1M context) --- .../guardrails/profile/plugins/guardrail.ts | 21 +++++++++++++++---- packages/guardrails/profile/plugins/team.ts | 17 ++++++++++----- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/packages/guardrails/profile/plugins/guardrail.ts b/packages/guardrails/profile/plugins/guardrail.ts index 34f53132fe58..fb589df6f36a 100644 --- a/packages/guardrails/profile/plugins/guardrail.ts +++ b/packages/guardrails/profile/plugins/guardrail.ts @@ -480,10 +480,17 @@ export default async function guardrail(input: { } function parseFindings(raw: string) { - const critical = (raw.match(/CRITICAL/gi) ?? []).length - const high = (raw.match(/\bHIGH\b/gi) ?? []).length - const medium = (raw.match(/\bMEDIUM\b/gi) ?? []).length - const low = (raw.match(/\bLOW\b/gi) ?? []).length + // Count only actionable findings — exclude negations like "No CRITICAL issues found" + const lines = raw.split("\n") + let critical = 0, high = 0, medium = 0, low = 0 + for (const line of lines) { + const neg = /\b(no|zero|0|none|without|aren't|isn't|not)\b/i.test(line) + if (neg) continue + if (/\bCRITICAL\b/i.test(line)) critical++ + if (/\bHIGH\b/i.test(line)) high++ + if (/\bMEDIUM\b/i.test(line)) medium++ + if (/\bLOW\b/i.test(line)) low++ + } return { critical, high, medium, low, total: critical + high + medium + low } } @@ -506,6 +513,12 @@ export default async function guardrail(input: { }) await pollIdle(made.data.id) const result = await readResult(made.data.id) + // Do not mark review as done if the session errored or returned empty + if (result.error || !result.text.trim()) { + await mark({ auto_review_in_progress: false }) + await seen("auto_review.errored", { error: result.error || "empty response" }) + return + } const findings = parseFindings(result.text) await mark({ auto_review_in_progress: false, diff --git a/packages/guardrails/profile/plugins/team.ts b/packages/guardrails/profile/plugins/team.ts index e4bf8499b7fd..c0acfa594b8d 100644 --- a/packages/guardrails/profile/plugins/team.ts +++ b/packages/guardrails/profile/plugins/team.ts @@ -321,12 +321,19 @@ async function merge(dir: string, item: string, run: string, id: string) { // Auto-commit applied changes so they are not left uncommitted in the parent worktree. // Without this, patched changes remain as unstaged modifications — the root cause of // "worktree changes returned but not committed" (Issue #144, Claude Code Agent parity gap). + // Auto-commit only the patch-applied files (not unrelated local edits). + // Use `git diff --name-only` to identify files changed by the patch, then stage only those. try { - await git(dir, ["add", "-A"]) - const commitMsg = `chore(team): apply worker changes from task ${id}` - const commit = await git(dir, ["commit", "-m", commitMsg, "--no-verify"]) - if (commit.code !== 0 && !commit.err.includes("nothing to commit")) { - verification.issues.push(`Auto-commit failed: ${commit.err || commit.out}`) + const changed = await git(dir, ["diff", "--name-only"]) + const untracked = await git(dir, ["ls-files", "--others", "--exclude-standard"]) + const files = [...changed.out.trim().split("\n"), ...untracked.out.trim().split("\n")].filter(Boolean) + if (files.length > 0) { + await git(dir, ["add", "--", ...files]) + const commitMsg = `chore(team): apply worker changes from task ${id}` + const commit = await git(dir, ["commit", "-m", commitMsg, "--no-verify"]) + if (commit.code !== 0 && !commit.err.includes("nothing to commit")) { + verification.issues.push(`Auto-commit failed: ${commit.err || commit.out}`) + } } } catch { /* auto-commit is best-effort; parent session can still commit manually */ } await yardrm(dir, item) From 3dd78c3ec01974c6367c1ef490932aeb1d12a3ca Mon Sep 17 00:00:00 2001 From: Terada Kousuke Date: Thu, 9 Apr 2026 00:55:05 +0900 Subject: [PATCH 6/6] =?UTF-8?q?fix(guardrails):=20address=20PR=20#145=20re?= =?UTF-8?q?view=20=E2=80=94=204=20findings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Scope secExempt to .env.example/.sample/.template only (not *.key.template) - Add 2-minute timeout to pollIdle() to prevent stuck auto-review - Tighten bare `>` regex to avoid matching inside quoted strings - Add error check + re-verify after yardadd() checkout fallback Review: PR #145 inline comments (6 findings, 4 new + 1 already fixed + 1 truncated) Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/guardrails/profile/plugins/guardrail.ts | 12 +++++++++--- packages/guardrails/profile/plugins/team.ts | 10 +++++++++- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/packages/guardrails/profile/plugins/guardrail.ts b/packages/guardrails/profile/plugins/guardrail.ts index fb589df6f36a..80a37fda9269 100644 --- a/packages/guardrails/profile/plugins/guardrail.ts +++ b/packages/guardrails/profile/plugins/guardrail.ts @@ -34,7 +34,7 @@ const mut = [ /\btee\b/i, /\bsed\s+-i\b/i, /\bperl\s+-pi\b/i, - />/, + /\s>[^&]|^>/, // redirect operator — excludes > inside quoted strings and &&/|| ] const MUTATING_TOOLS = new Set(["edit", "write", "apply_patch", "multiedit"]) @@ -100,10 +100,11 @@ function rel(root: string, file: string) { return abs.slice(dir.length + 1) } -const secExempt = /\.(example|sample|template)$/i +// Only exempt .env.example/.env.sample/.env.template — not *.key.template etc. +const secEnvExempt = /\.env\.(example|sample|template)$/i function has(file: string, list: RegExp[]) { - if (list === sec && secExempt.test(file)) return false + if (list === sec && secEnvExempt.test(file)) return false return list.some((item) => item.test(file)) } @@ -461,8 +462,13 @@ export default async function guardrail(input: { // --- Auto-review pipeline (models team.ts idle/snap pattern) --- const REVIEW_POLL_GAP = 750 + const REVIEW_TIMEOUT_MS = 120_000 // 2 minutes max for auto-review async function pollIdle(sessionID: string) { + const start = Date.now() for (;;) { + if (Date.now() - start > REVIEW_TIMEOUT_MS) { + throw new Error(`Auto-review timed out after ${REVIEW_TIMEOUT_MS}ms`) + } const stat = await input.client.session.status({ query: { directory: input.directory } }) const item = stat.data?.[sessionID] if (!item || item.type === "idle") return diff --git a/packages/guardrails/profile/plugins/team.ts b/packages/guardrails/profile/plugins/team.ts index c0acfa594b8d..1dec867e06ac 100644 --- a/packages/guardrails/profile/plugins/team.ts +++ b/packages/guardrails/profile/plugins/team.ts @@ -281,7 +281,15 @@ async function yardadd(dir: string, id: string) { const files = await git(next, ["ls-files", "--cached"]).catch(() => ({ code: 1, out: "", err: "" })) if (files.code !== 0 || !files.out.trim()) { // Worktree might be empty — force checkout HEAD contents - await git(next, ["checkout", "HEAD", "--", "."]) + const checkout = await git(next, ["checkout", "HEAD", "--", "."]) + if (checkout.code !== 0) { + throw new Error(`Worktree created but checkout failed: ${checkout.err || checkout.out}`) + } + // Re-verify files are present + const recheck = await git(next, ["ls-files", "--cached"]).catch(() => ({ code: 1, out: "", err: "" })) + if (recheck.code !== 0 || !recheck.out.trim()) { + throw new Error("Worktree is still empty after checkout — cannot proceed with delegation") + } } return next }