From f2dfecf8f62dbd14f90180850abd22e5040a8528 Mon Sep 17 00:00:00 2001
From: Terada Kousuke <company@cor-jp.com>
Date: Thu, 9 Apr 2026 01:03:03 +0900
Subject: [PATCH] =?UTF-8?q?fix(guardrails):=20address=20code-reviewer=20fi?=
 =?UTF-8?q?ndings=20=E2=80=94=203=20CRITICAL=20+=204=20HIGH?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CRITICAL-1: Guard workflow transitions with current phase check — prevents
  state pollution from manual operations in non-pipeline sessions
CRITICAL-2: parseFindings() now matches structured patterns ([CRITICAL],
  **HIGH**) at line start, excludes injected context and negations
CRITICAL-3: Remove --no-verify from team.ts auto-commit — pre-commit
  hooks must not be bypassed for security/quality gates

HIGH-1: Use MUTATING_TOOLS.has() in tool.execute.before — multiedit now
  gated for secret files, config files, and version baselines
HIGH-2: Increment workflow_review_attempts in autoReview(), cap at 3 —
  blocks infinite review loops with workflow_phase="blocked"
HIGH-3: All phase transitions guarded by currentPhase check — PR create
  only from "implementing", test pass only from "testing", merge only
  from "shipping"
HIGH-4: Ship agent denies gh pr merge --admin and --auto flags

Also: system.transform issue guidance gated behind active pipeline,
ci_green resets to false on non-passing checks

Code-reviewer: 3 CRITICAL + 4 HIGH + 5 MEDIUM + 3 LOW (all CRITICAL/HIGH addressed)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 packages/guardrails/profile/agents/ship.md    |  2 +
 .../guardrails/profile/plugins/guardrail.ts   | 67 ++++++++++++-------
 packages/guardrails/profile/plugins/team.ts   |  2 +-
 3 files changed, 44 insertions(+), 27 deletions(-)

diff --git a/packages/guardrails/profile/agents/ship.md b/packages/guardrails/profile/agents/ship.md
index 93e8a8f8bc03..6888fbb345d1 100644
--- a/packages/guardrails/profile/agents/ship.md
+++ b/packages/guardrails/profile/agents/ship.md
@@ -16,6 +16,8 @@ permission:
     "git branch*": allow
     "gh pr checks*": allow
     "gh pr view*": allow
+    "gh pr merge * --admin*": deny
+    "gh pr merge * --auto*": deny
     "gh pr merge*": allow
     "gh pr list*": allow
     "gh api *": ask
diff --git a/packages/guardrails/profile/plugins/guardrail.ts b/packages/guardrails/profile/plugins/guardrail.ts
index 80a37fda9269..2df56a87c122 100644
--- a/packages/guardrails/profile/plugins/guardrail.ts
+++ b/packages/guardrails/profile/plugins/guardrail.ts
@@ -486,16 +486,21 @@ export default async function guardrail(input: {
   }
 
   function parseFindings(raw: string) {
-    // Count only actionable findings — exclude negations like "No CRITICAL issues found"
+    // Match structured finding patterns: "[CRITICAL]", "**CRITICAL**", "CRITICAL:", "- CRITICAL"
+    // Exclude negations, injected context lines, and prose mentions
     const lines = raw.split("\n")
     let critical = 0, high = 0, medium = 0, low = 0
     for (const line of lines) {
-      const neg = /\b(no|zero|0|none|without|aren't|isn't|not)\b/i.test(line)
-      if (neg) continue
-      if (/\bCRITICAL\b/i.test(line)) critical++
-      if (/\bHIGH\b/i.test(line)) high++
-      if (/\bMEDIUM\b/i.test(line)) medium++
-      if (/\bLOW\b/i.test(line)) low++
+      const trimmed = line.trim()
+      // Skip negation lines
+      if (/\b(no|zero|0|none|without|aren't|isn't|not)\b/i.test(trimmed)) continue
+      // Skip injected guardrail context lines (e.g., "CRITICAL=0 HIGH=0")
+      if (/CRITICAL=\d|HIGH=\d|checklist|Guardrail mode/i.test(trimmed)) continue
+      // Match structured patterns: leading marker or bracketed/bold severity
+      if (/^[\s\-*]*\[?CRITICAL\]?[\s:*]/i.test(trimmed) || /^\*\*CRITICAL\*\*/i.test(trimmed)) critical++
+      if (/^[\s\-*]*\[?HIGH\]?[\s:*]/i.test(trimmed) || /^\*\*HIGH\*\*/i.test(trimmed)) high++
+      if (/^[\s\-*]*\[?MEDIUM\]?[\s:*]/i.test(trimmed) || /^\*\*MEDIUM\*\*/i.test(trimmed)) medium++
+      if (/^[\s\-*]*\[?LOW\]?[\s:*]/i.test(trimmed) || /^\*\*LOW\*\*/i.test(trimmed)) low++
     }
     return { critical, high, medium, low, total: critical + high + medium + low }
   }
@@ -526,11 +531,18 @@ export default async function guardrail(input: {
       return
     }
     const findings = parseFindings(result.text)
+    const attempts = num(data.workflow_review_attempts) + 1
+    if (attempts >= 3) {
+      await mark({ auto_review_in_progress: false, workflow_phase: "blocked", workflow_review_attempts: attempts })
+      await seen("auto_review.max_attempts", { attempts })
+      return
+    }
     await mark({
       auto_review_in_progress: false,
       auto_review_session: made.data.id,
       review_state: "done",
       reviewed: true,
+      workflow_review_attempts: attempts,
       review_at: new Date().toISOString(),
       edits_since_review: 0,
       review_critical_count: findings.critical,
@@ -790,21 +802,21 @@ export default async function guardrail(input: {
       out: { args: Record<string, unknown> },
     ) => {
       const file = pick(out.args ?? item.args)
-      if (file && (item.tool === "read" || item.tool === "edit" || item.tool === "write" || item.tool === "apply_patch")) {
+      if (file && (item.tool === "read" || MUTATING_TOOLS.has(item.tool))) {
         const err = deny(file, item.tool === "read" ? "read" : "edit")
         if (err) {
           await mark({ last_block: item.tool, last_file: rel(input.worktree, file), last_reason: err })
           throw new Error(text(err))
         }
       }
-      if (item.tool === "edit" || item.tool === "write" || item.tool === "apply_patch") {
+      if (MUTATING_TOOLS.has(item.tool)) {
         const err = await version(out.args ?? {})
         if (err) {
           await mark({ last_block: item.tool, last_file: file ? rel(input.worktree, file) : "", last_reason: err })
           throw new Error(text(err))
         }
       }
-      if ((item.tool === "edit" || item.tool === "write" || item.tool === "apply_patch") && file && code(file)) {
+      if ((MUTATING_TOOLS.has(item.tool)) && file && code(file)) {
         const count = await budget()
         if (count >= 4) {
           const budgetData = await stash(state)
@@ -1528,12 +1540,15 @@ export default async function guardrail(input: {
       }
 
       // Workflow phase transitions (auto-pipeline orchestration)
+      // Only transition when the pipeline is active — prevents state pollution from manual operations
       if (item.tool === "bash") {
         const cmd = str(item.args?.command)
         const output = str(out.output)
+        const currentPhase = str(bashData.workflow_phase)
+        const pipelineActive = currentPhase && currentPhase !== "idle" && currentPhase !== "done" && currentPhase !== "blocked"
 
-        // PR creation detection
-        if (/\bgh\s+pr\s+create\b/.test(cmd) && output.includes("github.com")) {
+        // PR creation detection — only when implementing
+        if (pipelineActive && currentPhase === "implementing" && /\bgh\s+pr\s+create\b/.test(cmd) && output.includes("github.com")) {
           const prUrl = output.trim().match(/https:\/\/github\.com\/[^\s]+/)
           if (prUrl) {
             await mark({ workflow_phase: "testing", workflow_pr_url: prUrl[0] })
@@ -1542,8 +1557,8 @@ export default async function guardrail(input: {
           }
         }
 
-        // Test execution detection
-        if (/\b(bun\s+test|pytest|go\s+test|npm\s+test|vitest|jest)\b/.test(cmd)) {
+        // Test execution detection — only when testing
+        if (pipelineActive && currentPhase === "testing" && /\b(bun\s+test|pytest|go\s+test|npm\s+test|vitest|jest)\b/.test(cmd)) {
           const testExit = out.metadata?.exitCode ?? (output.includes("FAIL") ? 1 : 0)
           if (testExit === 0 && !output.includes("FAIL")) {
             await mark({ workflow_phase: "reviewing", tests_executed: true })
@@ -1552,19 +1567,17 @@ export default async function guardrail(input: {
           }
         }
 
-        // Merge detection
-        if (/\bgh\s+pr\s+merge\b/.test(cmd) && !output.includes("error") && !output.includes("failed")) {
+        // Merge detection — only when shipping
+        if (pipelineActive && currentPhase === "shipping" && /\bgh\s+pr\s+merge\b/.test(cmd) && !output.includes("error") && !output.includes("failed")) {
           await mark({ workflow_phase: "done" })
           await seen("workflow.merged", {})
           out.output += "\n\n[WORKFLOW] Merge complete. Create follow-up issues for any discovered problems."
         }
 
-        // CI check detection
-        if (/\bgh\s+pr\s+checks\b/.test(cmd)) {
-          const allPass = !output.includes("fail") && !output.includes("pending")
-          if (allPass) {
-            await mark({ ci_green: true })
-          }
+        // CI check detection — any active phase
+        if (pipelineActive && /\bgh\s+pr\s+checks\b/.test(cmd)) {
+          const allPass = !output.toLowerCase().includes("fail") && !output.toLowerCase().includes("pending")
+          await mark({ ci_green: allPass })
         }
 
         // Issue creation tracking
@@ -1823,10 +1836,12 @@ export default async function guardrail(input: {
           `[WORKFLOW] Phase: ${phase}. Pipeline: implement→test→review→fix→ship. Complete autonomously. Do not stop at PR creation.`
         )
       }
-      // Follow-up issue guidance
-      out.system.push(
-        "When you discover problems outside the current scope, create follow-up issues: `gh issue create --title '<desc>' --body '<details>' --label 'tech-debt'`. Do NOT fix out-of-scope problems inline."
-      )
+      // Follow-up issue guidance — only during active pipeline
+      if (phase && phase !== "idle" && phase !== "done") {
+        out.system.push(
+          "When you discover problems outside the current scope, create follow-up issues: `gh issue create --title '<desc>' --body '<details>' --label 'tech-debt'`. Do NOT fix out-of-scope problems inline."
+        )
+      }
     },
   }
 }
diff --git a/packages/guardrails/profile/plugins/team.ts b/packages/guardrails/profile/plugins/team.ts
index 1dec867e06ac..7f66f7dbdddb 100644
--- a/packages/guardrails/profile/plugins/team.ts
+++ b/packages/guardrails/profile/plugins/team.ts
@@ -338,7 +338,7 @@ async function merge(dir: string, item: string, run: string, id: string) {
     if (files.length > 0) {
       await git(dir, ["add", "--", ...files])
       const commitMsg = `chore(team): apply worker changes from task ${id}`
-      const commit = await git(dir, ["commit", "-m", commitMsg, "--no-verify"])
+      const commit = await git(dir, ["commit", "-m", commitMsg])
       if (commit.code !== 0 && !commit.err.includes("nothing to commit")) {
         verification.issues.push(`Auto-commit failed: ${commit.err || commit.out}`)
       }