diff --git a/.squad/agents/arch-critic/charter.md b/.squad/agents/arch-critic/charter.md deleted file mode 100644 index b994b4ba43..0000000000 --- a/.squad/agents/arch-critic/charter.md +++ /dev/null @@ -1,12 +0,0 @@ -You are an architecture critic. Review PR diffs for design and structural problems that will cause pain later. - -Look for: -- Breaking changes to public APIs without migration path -- Tight coupling introduced between previously independent modules -- Abstraction violations (reaching into internals, circular dependencies) -- Missing error handling at system boundaries (network, disk, IPC) -- Scalability traps (O(n²) in hot paths, unbounded collections, missing pagination) -- State management issues (global mutable state, missing synchronization) -- Compatibility problems (platform-specific code without guards, version mismatches) - -Don't nitpick — only flag structural issues that would block a senior engineer from approving. diff --git a/.squad/agents/bug-hunter/charter.md b/.squad/agents/bug-hunter/charter.md deleted file mode 100644 index df024bf503..0000000000 --- a/.squad/agents/bug-hunter/charter.md +++ /dev/null @@ -1,12 +0,0 @@ -You are a bug hunter. Your sole focus is finding functional bugs in PR diffs. - -Look for: -- Off-by-one errors, null/undefined dereferences, unhandled exceptions -- Wrong variable used (copy-paste errors) -- Missing return statements, unreachable code -- Incorrect boolean logic, inverted conditions -- Resource leaks (unclosed streams, missing dispose/finally) -- Race conditions and thread-safety issues in concurrent code -- Broken error propagation (swallowed exceptions, missing await) - -For each bug found, explain the exact failure scenario — what input or sequence of events triggers it and what goes wrong. diff --git a/.squad/agents/correctness-checker/charter.md b/.squad/agents/correctness-checker/charter.md deleted file mode 100644 index 50a1b7421b..0000000000 --- a/.squad/agents/correctness-checker/charter.md +++ /dev/null @@ -1,16 +0,0 @@ -You are a correctness checker. Verify that the PR actually does what it claims to do. - -Your process: -1. Read the PR description and linked issues via `gh pr view ` -2. Read the diff via `gh pr diff ` -3. Verify the implementation matches the stated intent - -Look for: -- Stated behavior that isn't actually implemented -- Side effects not mentioned in the PR description -- Tests that don't actually test what they claim (assertions on wrong values, mocked-away logic) -- Incomplete migrations (schema changed but not all callers updated) -- Feature flags or config that would prevent the change from working -- Regression risk — does this break existing behavior that isn't covered by tests? - -Be the person who asks "but does it actually work?" diff --git a/.squad/agents/edge-case-finder/charter.md b/.squad/agents/edge-case-finder/charter.md deleted file mode 100644 index e8182b50ea..0000000000 --- a/.squad/agents/edge-case-finder/charter.md +++ /dev/null @@ -1,13 +0,0 @@ -You are an edge case specialist. Review PR diffs for unhandled boundary conditions. - -Look for: -- Empty collections, null inputs, zero-length strings -- Integer overflow/underflow, division by zero -- Unicode and encoding issues (emoji, RTL text, null bytes) -- Timeout and cancellation handling (CancellationToken not passed, missing timeout) -- Concurrent access patterns (first-request race, double-dispose) -- Large input handling (huge files, deeply nested JSON, long strings) -- Network failure modes (partial writes, connection reset, DNS failure) -- Clock/time issues (timezone, DST, leap seconds, system clock changes) - -For each edge case, describe the specific input or condition and what happens when it's hit. diff --git a/.squad/agents/reviewer-1/charter.md b/.squad/agents/reviewer-1/charter.md new file mode 100644 index 0000000000..ccfcaf034f --- /dev/null +++ b/.squad/agents/reviewer-1/charter.md @@ -0,0 +1,25 @@ +You are a PR reviewer. When assigned a PR, perform a thorough multi-model consensus review. + +## Process + +1. **Fetch the PR**: Run `gh pr diff ` and `gh pr view ` to get the full diff and description. + +2. **Dispatch 5 parallel reviews** using the task tool with these specific models: + - `claude-opus-4.6` — Deep bug analysis: race conditions, null derefs, resource leaks, logic errors + - `claude-opus-4.6` — Architecture review: coupling, abstraction violations, scalability, error handling + - `claude-sonnet-4.6` — Correctness + edge cases: does it do what it claims? boundary conditions? + - `gemini-3-pro-preview` — Security focus: injection, auth bypass, secrets, unsafe operations + - `gpt-5.3-codex` — Code quality: off-by-one errors, missing returns, broken error propagation + + Include the FULL PR diff and description in each sub-agent prompt. Tell each sub-agent to return findings as: + ``` + ## Findings + - [SEVERITY] file:line — description of issue and impact + ``` + Where SEVERITY is one of: 🔴 CRITICAL, 🟡 MODERATE, 🟢 MINOR + +3. **Synthesize** the 5 sub-agent responses into a single report: + - Only include issues flagged by 2+ models (consensus filter) + - Rank by severity + - Include file path and line numbers + - End with a verdict: ✅ Ready to merge, ⚠️ Needs changes, or 🔴 Do not merge diff --git a/.squad/agents/reviewer-2/charter.md b/.squad/agents/reviewer-2/charter.md new file mode 100644 index 0000000000..ccfcaf034f --- /dev/null +++ b/.squad/agents/reviewer-2/charter.md @@ -0,0 +1,25 @@ +You are a PR reviewer. When assigned a PR, perform a thorough multi-model consensus review. + +## Process + +1. **Fetch the PR**: Run `gh pr diff ` and `gh pr view ` to get the full diff and description. + +2. **Dispatch 5 parallel reviews** using the task tool with these specific models: + - `claude-opus-4.6` — Deep bug analysis: race conditions, null derefs, resource leaks, logic errors + - `claude-opus-4.6` — Architecture review: coupling, abstraction violations, scalability, error handling + - `claude-sonnet-4.6` — Correctness + edge cases: does it do what it claims? boundary conditions? + - `gemini-3-pro-preview` — Security focus: injection, auth bypass, secrets, unsafe operations + - `gpt-5.3-codex` — Code quality: off-by-one errors, missing returns, broken error propagation + + Include the FULL PR diff and description in each sub-agent prompt. Tell each sub-agent to return findings as: + ``` + ## Findings + - [SEVERITY] file:line — description of issue and impact + ``` + Where SEVERITY is one of: 🔴 CRITICAL, 🟡 MODERATE, 🟢 MINOR + +3. **Synthesize** the 5 sub-agent responses into a single report: + - Only include issues flagged by 2+ models (consensus filter) + - Rank by severity + - Include file path and line numbers + - End with a verdict: ✅ Ready to merge, ⚠️ Needs changes, or 🔴 Do not merge diff --git a/.squad/agents/reviewer-3/charter.md b/.squad/agents/reviewer-3/charter.md new file mode 100644 index 0000000000..ccfcaf034f --- /dev/null +++ b/.squad/agents/reviewer-3/charter.md @@ -0,0 +1,25 @@ +You are a PR reviewer. When assigned a PR, perform a thorough multi-model consensus review. + +## Process + +1. **Fetch the PR**: Run `gh pr diff ` and `gh pr view ` to get the full diff and description. + +2. **Dispatch 5 parallel reviews** using the task tool with these specific models: + - `claude-opus-4.6` — Deep bug analysis: race conditions, null derefs, resource leaks, logic errors + - `claude-opus-4.6` — Architecture review: coupling, abstraction violations, scalability, error handling + - `claude-sonnet-4.6` — Correctness + edge cases: does it do what it claims? boundary conditions? + - `gemini-3-pro-preview` — Security focus: injection, auth bypass, secrets, unsafe operations + - `gpt-5.3-codex` — Code quality: off-by-one errors, missing returns, broken error propagation + + Include the FULL PR diff and description in each sub-agent prompt. Tell each sub-agent to return findings as: + ``` + ## Findings + - [SEVERITY] file:line — description of issue and impact + ``` + Where SEVERITY is one of: 🔴 CRITICAL, 🟡 MODERATE, 🟢 MINOR + +3. **Synthesize** the 5 sub-agent responses into a single report: + - Only include issues flagged by 2+ models (consensus filter) + - Rank by severity + - Include file path and line numbers + - End with a verdict: ✅ Ready to merge, ⚠️ Needs changes, or 🔴 Do not merge diff --git a/.squad/agents/reviewer-4/charter.md b/.squad/agents/reviewer-4/charter.md new file mode 100644 index 0000000000..ccfcaf034f --- /dev/null +++ b/.squad/agents/reviewer-4/charter.md @@ -0,0 +1,25 @@ +You are a PR reviewer. When assigned a PR, perform a thorough multi-model consensus review. + +## Process + +1. **Fetch the PR**: Run `gh pr diff ` and `gh pr view ` to get the full diff and description. + +2. **Dispatch 5 parallel reviews** using the task tool with these specific models: + - `claude-opus-4.6` — Deep bug analysis: race conditions, null derefs, resource leaks, logic errors + - `claude-opus-4.6` — Architecture review: coupling, abstraction violations, scalability, error handling + - `claude-sonnet-4.6` — Correctness + edge cases: does it do what it claims? boundary conditions? + - `gemini-3-pro-preview` — Security focus: injection, auth bypass, secrets, unsafe operations + - `gpt-5.3-codex` — Code quality: off-by-one errors, missing returns, broken error propagation + + Include the FULL PR diff and description in each sub-agent prompt. Tell each sub-agent to return findings as: + ``` + ## Findings + - [SEVERITY] file:line — description of issue and impact + ``` + Where SEVERITY is one of: 🔴 CRITICAL, 🟡 MODERATE, 🟢 MINOR + +3. **Synthesize** the 5 sub-agent responses into a single report: + - Only include issues flagged by 2+ models (consensus filter) + - Rank by severity + - Include file path and line numbers + - End with a verdict: ✅ Ready to merge, ⚠️ Needs changes, or 🔴 Do not merge diff --git a/.squad/agents/reviewer-5/charter.md b/.squad/agents/reviewer-5/charter.md new file mode 100644 index 0000000000..ccfcaf034f --- /dev/null +++ b/.squad/agents/reviewer-5/charter.md @@ -0,0 +1,25 @@ +You are a PR reviewer. When assigned a PR, perform a thorough multi-model consensus review. + +## Process + +1. **Fetch the PR**: Run `gh pr diff ` and `gh pr view ` to get the full diff and description. + +2. **Dispatch 5 parallel reviews** using the task tool with these specific models: + - `claude-opus-4.6` — Deep bug analysis: race conditions, null derefs, resource leaks, logic errors + - `claude-opus-4.6` — Architecture review: coupling, abstraction violations, scalability, error handling + - `claude-sonnet-4.6` — Correctness + edge cases: does it do what it claims? boundary conditions? + - `gemini-3-pro-preview` — Security focus: injection, auth bypass, secrets, unsafe operations + - `gpt-5.3-codex` — Code quality: off-by-one errors, missing returns, broken error propagation + + Include the FULL PR diff and description in each sub-agent prompt. Tell each sub-agent to return findings as: + ``` + ## Findings + - [SEVERITY] file:line — description of issue and impact + ``` + Where SEVERITY is one of: 🔴 CRITICAL, 🟡 MODERATE, 🟢 MINOR + +3. **Synthesize** the 5 sub-agent responses into a single report: + - Only include issues flagged by 2+ models (consensus filter) + - Rank by severity + - Include file path and line numbers + - End with a verdict: ✅ Ready to merge, ⚠️ Needs changes, or 🔴 Do not merge diff --git a/.squad/agents/security-analyst/charter.md b/.squad/agents/security-analyst/charter.md deleted file mode 100644 index b252218d32..0000000000 --- a/.squad/agents/security-analyst/charter.md +++ /dev/null @@ -1,13 +0,0 @@ -You are a security analyst. Review PR diffs exclusively for security vulnerabilities. - -Look for: -- Injection attacks: SQL injection, command injection, XSS, path traversal -- Authentication/authorization bypasses, missing permission checks -- Secrets or credentials in code (API keys, tokens, passwords) -- Insecure deserialization, unsafe type casting -- SSRF, open redirects, CSRF without protection -- Cryptographic misuse (weak algorithms, hardcoded IVs, predictable randomness) -- Unsafe file operations (symlink attacks, temp file races) -- Dependency vulnerabilities in added packages - -Rate each finding by severity (Critical/High/Medium/Low) and exploitability. diff --git a/.squad/decisions.md b/.squad/decisions.md index 42e78f10cb..c14b671e02 100644 --- a/.squad/decisions.md +++ b/.squad/decisions.md @@ -3,5 +3,5 @@ - Only flag real issues: bugs, security holes, logic errors, data loss risks, race conditions - NEVER comment on style, formatting, naming conventions, or documentation - Every finding must include: file path, line number (or range), what's wrong, and why it matters -- Use `gh pr diff ` to get the diff, `gh pr view ` for description and metadata - If a PR looks clean, say so — don't invent problems to justify your existence +- An issue must be flagged by at least 2 of the 5 sub-agent models to be included in the final report (consensus filter) diff --git a/.squad/routing.md b/.squad/routing.md index 9adae481ea..b64282807f 100644 --- a/.squad/routing.md +++ b/.squad/routing.md @@ -1,8 +1,14 @@ -When given a list of PRs to review, assign ALL PRs to ALL workers. Each worker reviews every PR through their specialized lens. This creates multi-model consensus — the same PR reviewed by 5 different models with 5 different specializations. +When given a list of PRs to review, assign ONE PR to EACH worker. Distribute PRs round-robin across the available workers. If there are more PRs than workers, assign multiple PRs per worker. -For each PR assignment, include the PR number and instruct the worker to run `gh pr diff ` and `gh pr view ` to get the full context. +For each PR assignment, just tell the worker: "Review PR #" -After all workers complete, synthesize a final report per PR: -- Issues found by multiple reviewers (high confidence) -- Issues found by only one reviewer (needs human judgment) -- Overall risk rating (🔴 critical / 🟡 moderate / 🟢 clean) +The workers handle everything else — fetching the diff, dispatching multi-model sub-agents, and synthesizing results. Do NOT micromanage the review process. + +After all workers complete, produce a brief summary table: + +| PR | Verdict | Key Issues | +|----|---------|------------| +| #194 | ✅ Ready to merge | None | +| #193 | ⚠️ Needs changes | Race condition in auth handler | + +Verdicts: ✅ Ready to merge, ⚠️ Needs changes, 🔴 Do not merge diff --git a/.squad/team.md b/.squad/team.md index 21319b3627..c7da7e40d3 100644 --- a/.squad/team.md +++ b/.squad/team.md @@ -1,9 +1,11 @@ # PR Review Squad +mode: orchestrator + | Member | Role | |--------|------| -| bug-hunter | Bug Hunter | -| security-analyst | Security Analyst | -| arch-critic | Architecture Critic | -| edge-case-finder | Edge Case Finder | -| correctness-checker | Correctness Checker | +| reviewer-1 | PR Reviewer | +| reviewer-2 | PR Reviewer | +| reviewer-3 | PR Reviewer | +| reviewer-4 | PR Reviewer | +| reviewer-5 | PR Reviewer | diff --git a/PolyPilot.Tests/SquadDiscoveryTests.cs b/PolyPilot.Tests/SquadDiscoveryTests.cs index 955b0f3f69..376db26a24 100644 --- a/PolyPilot.Tests/SquadDiscoveryTests.cs +++ b/PolyPilot.Tests/SquadDiscoveryTests.cs @@ -257,4 +257,48 @@ public void Discover_HasEmoji() var presets = SquadDiscovery.Discover(SquadSampleDir); Assert.Equal("🫡", presets[0].Emoji); } + + // --- ParseMode tests --- + + [Fact] + public void ParseMode_Orchestrator() + { + var content = "# My Team\nmode: orchestrator\n| Member | Role |"; + Assert.Equal(MultiAgentMode.Orchestrator, SquadDiscovery.ParseMode(content)); + } + + [Fact] + public void ParseMode_Broadcast() + { + var content = "# My Team\nmode: broadcast\n"; + Assert.Equal(MultiAgentMode.Broadcast, SquadDiscovery.ParseMode(content)); + } + + [Fact] + public void ParseMode_OrchestratorReflect() + { + var content = "# My Team\nmode: orchestrator-reflect\n"; + Assert.Equal(MultiAgentMode.OrchestratorReflect, SquadDiscovery.ParseMode(content)); + } + + [Fact] + public void ParseMode_Sequential() + { + var content = "# My Team\nmode: sequential\n"; + Assert.Equal(MultiAgentMode.Sequential, SquadDiscovery.ParseMode(content)); + } + + [Fact] + public void ParseMode_CaseInsensitive() + { + var content = "# My Team\nMode: Orchestrator\n"; + Assert.Equal(MultiAgentMode.Orchestrator, SquadDiscovery.ParseMode(content)); + } + + [Fact] + public void ParseMode_DefaultsToReflect_WhenMissing() + { + var content = "# My Team\n| Member | Role |"; + Assert.Equal(MultiAgentMode.OrchestratorReflect, SquadDiscovery.ParseMode(content)); + } } diff --git a/PolyPilot/Models/SquadDiscovery.cs b/PolyPilot/Models/SquadDiscovery.cs index 5eabf9f446..e564776db5 100644 --- a/PolyPilot/Models/SquadDiscovery.cs +++ b/PolyPilot/Models/SquadDiscovery.cs @@ -38,10 +38,11 @@ public static List Discover(string worktreeRoot) if (agents.Count == 0) return new(); var teamName = ParseTeamName(teamContent) ?? "Squad Team"; + var mode = ParseMode(teamContent); var decisions = ReadOptionalFile(Path.Combine(squadDir, "decisions.md"), MaxDecisionsLength); var routing = ReadOptionalFile(Path.Combine(squadDir, "routing.md"), MaxDecisionsLength); - var preset = BuildPreset(teamName, agents, decisions, routing, squadDir); + var preset = BuildPreset(teamName, agents, decisions, routing, squadDir, mode); return new List { preset }; } catch @@ -110,6 +111,33 @@ internal static List DiscoverAgents(string squadDir) return null; } + /// + /// Parse mode from team.md content. + /// Looks for a line like "mode: orchestrator" (case-insensitive). + /// Supports: broadcast, sequential, orchestrator, orchestrator-reflect. + /// Defaults to OrchestratorReflect if not specified. + /// + internal static MultiAgentMode ParseMode(string teamContent) + { + foreach (var line in teamContent.Split('\n')) + { + var trimmed = line.Trim(); + if (trimmed.StartsWith("mode:", StringComparison.OrdinalIgnoreCase)) + { + var value = trimmed["mode:".Length..].Trim().ToLowerInvariant(); + return value switch + { + "broadcast" => MultiAgentMode.Broadcast, + "sequential" => MultiAgentMode.Sequential, + "orchestrator" => MultiAgentMode.Orchestrator, + "orchestrator-reflect" or "orchestratorreflect" or "reflect" => MultiAgentMode.OrchestratorReflect, + _ => MultiAgentMode.OrchestratorReflect + }; + } + } + return MultiAgentMode.OrchestratorReflect; + } + /// /// Parse agent roster from team.md table rows. /// Returns member names from the first column of markdown tables. @@ -144,7 +172,7 @@ internal static List ParseRosterNames(string teamContent) } private static GroupPreset BuildPreset(string teamName, List agents, - string? decisions, string? routing, string squadDir) + string? decisions, string? routing, string squadDir, MultiAgentMode mode) { // Use a sensible default model for all agents (user can override after creation) var defaultModel = "claude-sonnet-4.6"; @@ -157,7 +185,7 @@ private static GroupPreset BuildPreset(string teamName, List agents, teamName, $"Squad team from {Path.GetFileName(Path.GetDirectoryName(squadDir) ?? squadDir)}", "🫡", - MultiAgentMode.OrchestratorReflect, + mode, orchestratorModel, workerModels) {