From f2dfecf8f62dbd14f90180850abd22e5040a8528 Mon Sep 17 00:00:00 2001 From: Terada Kousuke Date: Thu, 9 Apr 2026 01:03:03 +0900 Subject: [PATCH] =?UTF-8?q?fix(guardrails):=20address=20code-reviewer=20fi?= =?UTF-8?q?ndings=20=E2=80=94=203=20CRITICAL=20+=204=20HIGH?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CRITICAL-1: Guard workflow transitions with current phase check — prevents state pollution from manual operations in non-pipeline sessions CRITICAL-2: parseFindings() now matches structured patterns ([CRITICAL], **HIGH**) at line start, excludes injected context and negations CRITICAL-3: Remove --no-verify from team.ts auto-commit — pre-commit hooks must not be bypassed for security/quality gates HIGH-1: Use MUTATING_TOOLS.has() in tool.execute.before — multiedit now gated for secret files, config files, and version baselines HIGH-2: Increment workflow_review_attempts in autoReview(), cap at 3 — blocks infinite review loops with workflow_phase="blocked" HIGH-3: All phase transitions guarded by currentPhase check — PR create only from "implementing", test pass only from "testing", merge only from "shipping" HIGH-4: Ship agent denies gh pr merge --admin and --auto flags Also: system.transform issue guidance gated behind active pipeline, ci_green resets to false on non-passing checks Code-reviewer: 3 CRITICAL + 4 HIGH + 5 MEDIUM + 3 LOW (all CRITICAL/HIGH addressed) Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/guardrails/profile/agents/ship.md | 2 + .../guardrails/profile/plugins/guardrail.ts | 67 ++++++++++++------- packages/guardrails/profile/plugins/team.ts | 2 +- 3 files changed, 44 insertions(+), 27 deletions(-) diff --git a/packages/guardrails/profile/agents/ship.md b/packages/guardrails/profile/agents/ship.md index 93e8a8f8bc03..6888fbb345d1 100644 --- a/packages/guardrails/profile/agents/ship.md +++ b/packages/guardrails/profile/agents/ship.md @@ -16,6 +16,8 @@ permission: "git branch*": allow "gh pr checks*": allow "gh pr view*": allow + "gh pr merge * --admin*": deny + "gh pr merge * --auto*": deny "gh pr merge*": allow "gh pr list*": allow "gh api *": ask diff --git a/packages/guardrails/profile/plugins/guardrail.ts b/packages/guardrails/profile/plugins/guardrail.ts index 80a37fda9269..2df56a87c122 100644 --- a/packages/guardrails/profile/plugins/guardrail.ts +++ b/packages/guardrails/profile/plugins/guardrail.ts @@ -486,16 +486,21 @@ export default async function guardrail(input: { } function parseFindings(raw: string) { - // Count only actionable findings — exclude negations like "No CRITICAL issues found" + // Match structured finding patterns: "[CRITICAL]", "**CRITICAL**", "CRITICAL:", "- CRITICAL" + // Exclude negations, injected context lines, and prose mentions const lines = raw.split("\n") let critical = 0, high = 0, medium = 0, low = 0 for (const line of lines) { - const neg = /\b(no|zero|0|none|without|aren't|isn't|not)\b/i.test(line) - if (neg) continue - if (/\bCRITICAL\b/i.test(line)) critical++ - if (/\bHIGH\b/i.test(line)) high++ - if (/\bMEDIUM\b/i.test(line)) medium++ - if (/\bLOW\b/i.test(line)) low++ + const trimmed = line.trim() + // Skip negation lines + if (/\b(no|zero|0|none|without|aren't|isn't|not)\b/i.test(trimmed)) continue + // Skip injected guardrail context lines (e.g., "CRITICAL=0 HIGH=0") + if (/CRITICAL=\d|HIGH=\d|checklist|Guardrail mode/i.test(trimmed)) continue + // Match structured patterns: leading marker or bracketed/bold severity + if (/^[\s\-*]*\[?CRITICAL\]?[\s:*]/i.test(trimmed) || /^\*\*CRITICAL\*\*/i.test(trimmed)) critical++ + if (/^[\s\-*]*\[?HIGH\]?[\s:*]/i.test(trimmed) || /^\*\*HIGH\*\*/i.test(trimmed)) high++ + if (/^[\s\-*]*\[?MEDIUM\]?[\s:*]/i.test(trimmed) || /^\*\*MEDIUM\*\*/i.test(trimmed)) medium++ + if (/^[\s\-*]*\[?LOW\]?[\s:*]/i.test(trimmed) || /^\*\*LOW\*\*/i.test(trimmed)) low++ } return { critical, high, medium, low, total: critical + high + medium + low } } @@ -526,11 +531,18 @@ export default async function guardrail(input: { return } const findings = parseFindings(result.text) + const attempts = num(data.workflow_review_attempts) + 1 + if (attempts >= 3) { + await mark({ auto_review_in_progress: false, workflow_phase: "blocked", workflow_review_attempts: attempts }) + await seen("auto_review.max_attempts", { attempts }) + return + } await mark({ auto_review_in_progress: false, auto_review_session: made.data.id, review_state: "done", reviewed: true, + workflow_review_attempts: attempts, review_at: new Date().toISOString(), edits_since_review: 0, review_critical_count: findings.critical, @@ -790,21 +802,21 @@ export default async function guardrail(input: { out: { args: Record }, ) => { const file = pick(out.args ?? item.args) - if (file && (item.tool === "read" || item.tool === "edit" || item.tool === "write" || item.tool === "apply_patch")) { + if (file && (item.tool === "read" || MUTATING_TOOLS.has(item.tool))) { const err = deny(file, item.tool === "read" ? "read" : "edit") if (err) { await mark({ last_block: item.tool, last_file: rel(input.worktree, file), last_reason: err }) throw new Error(text(err)) } } - if (item.tool === "edit" || item.tool === "write" || item.tool === "apply_patch") { + if (MUTATING_TOOLS.has(item.tool)) { const err = await version(out.args ?? {}) if (err) { await mark({ last_block: item.tool, last_file: file ? rel(input.worktree, file) : "", last_reason: err }) throw new Error(text(err)) } } - if ((item.tool === "edit" || item.tool === "write" || item.tool === "apply_patch") && file && code(file)) { + if ((MUTATING_TOOLS.has(item.tool)) && file && code(file)) { const count = await budget() if (count >= 4) { const budgetData = await stash(state) @@ -1528,12 +1540,15 @@ export default async function guardrail(input: { } // Workflow phase transitions (auto-pipeline orchestration) + // Only transition when the pipeline is active — prevents state pollution from manual operations if (item.tool === "bash") { const cmd = str(item.args?.command) const output = str(out.output) + const currentPhase = str(bashData.workflow_phase) + const pipelineActive = currentPhase && currentPhase !== "idle" && currentPhase !== "done" && currentPhase !== "blocked" - // PR creation detection - if (/\bgh\s+pr\s+create\b/.test(cmd) && output.includes("github.com")) { + // PR creation detection — only when implementing + if (pipelineActive && currentPhase === "implementing" && /\bgh\s+pr\s+create\b/.test(cmd) && output.includes("github.com")) { const prUrl = output.trim().match(/https:\/\/github\.com\/[^\s]+/) if (prUrl) { await mark({ workflow_phase: "testing", workflow_pr_url: prUrl[0] }) @@ -1542,8 +1557,8 @@ export default async function guardrail(input: { } } - // Test execution detection - if (/\b(bun\s+test|pytest|go\s+test|npm\s+test|vitest|jest)\b/.test(cmd)) { + // Test execution detection — only when testing + if (pipelineActive && currentPhase === "testing" && /\b(bun\s+test|pytest|go\s+test|npm\s+test|vitest|jest)\b/.test(cmd)) { const testExit = out.metadata?.exitCode ?? (output.includes("FAIL") ? 1 : 0) if (testExit === 0 && !output.includes("FAIL")) { await mark({ workflow_phase: "reviewing", tests_executed: true }) @@ -1552,19 +1567,17 @@ export default async function guardrail(input: { } } - // Merge detection - if (/\bgh\s+pr\s+merge\b/.test(cmd) && !output.includes("error") && !output.includes("failed")) { + // Merge detection — only when shipping + if (pipelineActive && currentPhase === "shipping" && /\bgh\s+pr\s+merge\b/.test(cmd) && !output.includes("error") && !output.includes("failed")) { await mark({ workflow_phase: "done" }) await seen("workflow.merged", {}) out.output += "\n\n[WORKFLOW] Merge complete. Create follow-up issues for any discovered problems." } - // CI check detection - if (/\bgh\s+pr\s+checks\b/.test(cmd)) { - const allPass = !output.includes("fail") && !output.includes("pending") - if (allPass) { - await mark({ ci_green: true }) - } + // CI check detection — any active phase + if (pipelineActive && /\bgh\s+pr\s+checks\b/.test(cmd)) { + const allPass = !output.toLowerCase().includes("fail") && !output.toLowerCase().includes("pending") + await mark({ ci_green: allPass }) } // Issue creation tracking @@ -1823,10 +1836,12 @@ export default async function guardrail(input: { `[WORKFLOW] Phase: ${phase}. Pipeline: implement→test→review→fix→ship. Complete autonomously. Do not stop at PR creation.` ) } - // Follow-up issue guidance - out.system.push( - "When you discover problems outside the current scope, create follow-up issues: `gh issue create --title '' --body '
' --label 'tech-debt'`. Do NOT fix out-of-scope problems inline." - ) + // Follow-up issue guidance — only during active pipeline + if (phase && phase !== "idle" && phase !== "done") { + out.system.push( + "When you discover problems outside the current scope, create follow-up issues: `gh issue create --title '' --body '
' --label 'tech-debt'`. Do NOT fix out-of-scope problems inline." + ) + } }, } } diff --git a/packages/guardrails/profile/plugins/team.ts b/packages/guardrails/profile/plugins/team.ts index 1dec867e06ac..7f66f7dbdddb 100644 --- a/packages/guardrails/profile/plugins/team.ts +++ b/packages/guardrails/profile/plugins/team.ts @@ -338,7 +338,7 @@ async function merge(dir: string, item: string, run: string, id: string) { if (files.length > 0) { await git(dir, ["add", "--", ...files]) const commitMsg = `chore(team): apply worker changes from task ${id}` - const commit = await git(dir, ["commit", "-m", commitMsg, "--no-verify"]) + const commit = await git(dir, ["commit", "-m", commitMsg]) if (commit.code !== 0 && !commit.err.includes("nothing to commit")) { verification.issues.push(`Auto-commit failed: ${commit.err || commit.out}`) }