diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 2b2be6e847..a5e292cb2b 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -56,6 +56,17 @@ For Android, always run `adb reverse tcp:9223 tcp:9223` after deploy. ## Architecture +**See `docs/multi-agent-orchestration.md` for the multi-agent architecture spec** (orchestration modes, reflection loop, sentinel protocol, invariants, Squad integration). Test scenarios in `PolyPilot.Tests/Scenarios/multi-agent-scenarios.json`. Read these before modifying orchestration, reconciliation, or TCS completion logic. + +### Squad Integration +PolyPilot discovers [bradygaster/squad](https://github.com/bradygaster/squad) team definitions from `.squad/` (or legacy `.ai-team/`) directories in the worktree root. Each agent's `charter.md` becomes a worker system prompt, `team.md` defines the roster, `decisions.md` provides shared context injected into all worker prompts, and `routing.md` is injected into the orchestrator's planning prompt. Repo-level teams appear in a **"πŸ“‚ From Repo"** section in the preset picker, above built-in presets. + +**Squad write-back:** When saving a multi-agent group as a preset, PolyPilot writes the team definition back to `.squad/` format in the worktree root via `SquadWriter`. This creates `team.md`, `agents/{name}/charter.md`, and optional `decisions.md`/`routing.md`. The preset is also saved to `presets.json` as a personal backup. This enables round-tripping: discover β†’ modify β†’ save back β†’ share via repo. + +**Preset priority (three-tier merge):** Built-in presets < User presets (`~/.polypilot/presets.json`) < Repo teams (`.squad/`). Repo teams shadow presets with the same name. The preset picker shows three sections: "πŸ“‚ From Repo", "βš™οΈ Built-in", and "πŸ‘€ My Presets". + +**Group deletion:** Deleting a multi-agent team closes and removes all its sessions (they're meaningless without the team). Deleting a regular group moves sessions to the default group. + This is a .NET MAUI Blazor Hybrid app targeting Mac Catalyst, Android, and iOS. It manages multiple GitHub Copilot CLI sessions through a native GUI. ### Three-Layer Stack diff --git a/PolyPilot.Tests/BridgeMessageTests.cs b/PolyPilot.Tests/BridgeMessageTests.cs index a0d567e518..d3a773dfe4 100644 --- a/PolyPilot.Tests/BridgeMessageTests.cs +++ b/PolyPilot.Tests/BridgeMessageTests.cs @@ -457,4 +457,74 @@ public void AttentionNeededPayload_AllReasons_RoundTrip(AttentionReason reason) Assert.Equal(reason, restored!.Reason); } + + [Fact] + public void MultiAgentBroadcastPayload_RoundTrips() + { + var payload = new MultiAgentBroadcastPayload + { + GroupId = "group-123", + Message = "Build the feature" + }; + var msg = BridgeMessage.Create(BridgeMessageTypes.MultiAgentBroadcast, payload); + var json = msg.Serialize(); + var restored = BridgeMessage.Deserialize(json)!.GetPayload(); + + Assert.NotNull(restored); + Assert.Equal("group-123", restored!.GroupId); + Assert.Equal("Build the feature", restored.Message); + } + + [Fact] + public void MultiAgentCreateGroupPayload_RoundTrips() + { + var payload = new MultiAgentCreateGroupPayload + { + Name = "Dev Team", + Mode = "Orchestrator", + OrchestratorPrompt = "Coordinate the workers", + SessionNames = new List { "session-1", "session-2" } + }; + var msg = BridgeMessage.Create(BridgeMessageTypes.MultiAgentCreateGroup, payload); + var json = msg.Serialize(); + var restored = BridgeMessage.Deserialize(json)!.GetPayload(); + + Assert.NotNull(restored); + Assert.Equal("Dev Team", restored!.Name); + Assert.Equal("Orchestrator", restored.Mode); + Assert.Equal("Coordinate the workers", restored.OrchestratorPrompt); + Assert.Equal(2, restored.SessionNames!.Count); + Assert.Contains("session-1", restored.SessionNames); + } + + [Fact] + public void MultiAgentProgressPayload_RoundTrips() + { + var payload = new MultiAgentProgressPayload + { + GroupId = "group-1", + TotalSessions = 3, + CompletedSessions = 1, + ProcessingSessions = 2, + CompletedSessionNames = new List { "worker-1" } + }; + var msg = BridgeMessage.Create(BridgeMessageTypes.MultiAgentProgress, payload); + var json = msg.Serialize(); + var restored = BridgeMessage.Deserialize(json)!.GetPayload(); + + Assert.NotNull(restored); + Assert.Equal("group-1", restored!.GroupId); + Assert.Equal(3, restored.TotalSessions); + Assert.Equal(1, restored.CompletedSessions); + Assert.Equal(2, restored.ProcessingSessions); + Assert.Single(restored.CompletedSessionNames); + } + + [Fact] + public void MultiAgentMessageTypes_AreCorrectStrings() + { + Assert.Equal("multi_agent_broadcast", BridgeMessageTypes.MultiAgentBroadcast); + Assert.Equal("multi_agent_create_group", BridgeMessageTypes.MultiAgentCreateGroup); + Assert.Equal("multi_agent_progress", BridgeMessageTypes.MultiAgentProgress); + } } diff --git a/PolyPilot.Tests/MultiAgentGapTests.cs b/PolyPilot.Tests/MultiAgentGapTests.cs new file mode 100644 index 0000000000..31b0391da8 --- /dev/null +++ b/PolyPilot.Tests/MultiAgentGapTests.cs @@ -0,0 +1,183 @@ +using PolyPilot.Models; +using PolyPilot.Services; + +namespace PolyPilot.Tests; + +/// +/// Gap-coverage tests for multi-agent parsing, model capabilities, and reflection summaries. +/// +public class MultiAgentGapTests +{ + // --- ParseTaskAssignments --- + + [Fact] + public void ParseTaskAssignments_EmptyInput_ReturnsEmpty() + { + var result = CopilotService.ParseTaskAssignments("", new List { "a", "b" }); + Assert.Empty(result); + } + + [Fact] + public void ParseTaskAssignments_SingleWorker_ExtractsTask() + { + var response = "@worker:alpha\nDo the thing.\n@end"; + var result = CopilotService.ParseTaskAssignments(response, new List { "alpha" }); + + Assert.Single(result); + Assert.Equal("alpha", result[0].WorkerName); + Assert.Contains("Do the thing", result[0].Task); + } + + [Fact] + public void ParseTaskAssignments_MultipleWorkers_ExtractsAll() + { + var response = @"@worker:w1 +Task one. +@end +@worker:w2 +Task two. +@end +@worker:w3 +Task three. +@end"; + var workers = new List { "w1", "w2", "w3" }; + var result = CopilotService.ParseTaskAssignments(response, workers); + + Assert.Equal(3, result.Count); + Assert.Equal("w1", result[0].WorkerName); + Assert.Equal("w2", result[1].WorkerName); + Assert.Equal("w3", result[2].WorkerName); + } + + [Fact] + public void ParseTaskAssignments_FuzzyMatch_FindsClosestWorker() + { + // "coder" is a substring of "coder-session" β†’ fuzzy match + var response = "@worker:coder\nWrite the code.\n@end"; + var result = CopilotService.ParseTaskAssignments(response, new List { "coder-session", "reviewer-session" }); + + Assert.Single(result); + Assert.Equal("coder-session", result[0].WorkerName); + } + + [Fact] + public void ParseTaskAssignments_UnknownWorker_IsIgnored() + { + var response = "@worker:ghost\nDo something.\n@end"; + var result = CopilotService.ParseTaskAssignments(response, new List { "alpha", "beta" }); + + Assert.Empty(result); + } + + [Fact] + public void ParseTaskAssignments_DuplicateWorker_TakesLast() + { + var response = @"@worker:alpha +First task. +@end +@worker:alpha +Second task. +@end"; + var result = CopilotService.ParseTaskAssignments(response, new List { "alpha" }); + + // The regex matches both blocks; both are added (last one wins in practice) + Assert.Equal(2, result.Count); + Assert.Contains("Second task", result[^1].Task); + } + + // --- ModelCapabilities --- + + [Theory] + [InlineData(null)] + [InlineData("")] + public void GetCapabilities_NullOrEmpty_ReturnsNone(string? slug) + { + var caps = ModelCapabilities.GetCapabilities(slug!); + Assert.Equal(ModelCapability.None, caps); + } + + [Fact] + public void GetCapabilities_KnownModel_ReturnsFlags() + { + var caps = ModelCapabilities.GetCapabilities("gpt-5"); + Assert.True(caps.HasFlag(ModelCapability.ReasoningExpert)); + Assert.True(caps.HasFlag(ModelCapability.CodeExpert)); + Assert.True(caps.HasFlag(ModelCapability.ToolUse)); + } + + [Fact] + public void GetRoleWarnings_UnknownModel_ReturnsWarning() + { + var warnings = ModelCapabilities.GetRoleWarnings("totally-unknown-model", MultiAgentRole.Worker); + Assert.NotEmpty(warnings); + Assert.Contains(warnings, w => w.Contains("Unknown model", StringComparison.OrdinalIgnoreCase)); + } + + [Fact] + public void GetRoleWarnings_WeakOrchestrator_ReturnsWarning() + { + // claude-haiku-4.5 is CostEfficient + Fast but not ReasoningExpert + var warnings = ModelCapabilities.GetRoleWarnings("claude-haiku-4.5", MultiAgentRole.Orchestrator); + Assert.NotEmpty(warnings); + Assert.Contains(warnings, w => w.Contains("reasoning", StringComparison.OrdinalIgnoreCase)); + } + + // --- BuildCompletionSummary --- + + [Fact] + public void BuildCompletionSummary_GoalMet_ShowsCheckmark() + { + var cycle = ReflectionCycle.Create("Ship the feature", maxIterations: 5); + cycle.Advance("Done!\n[[REFLECTION_COMPLETE]]"); + + var summary = cycle.BuildCompletionSummary(); + + Assert.Contains("βœ…", summary); + Assert.Contains("Goal met", summary); + } + + [Fact] + public void BuildCompletionSummary_Stalled_ShowsWarning() + { + var cycle = ReflectionCycle.Create("Improve quality", maxIterations: 10); + // Feed identical responses to trigger stall detection + cycle.Advance("Working on the task with specific details about implementation"); + cycle.Advance("Working on the task with specific details about implementation"); + cycle.Advance("Working on the task with specific details about implementation"); + + var summary = cycle.BuildCompletionSummary(); + + // IsStalled takes priority over IsCancelled in the ternary chain + Assert.Contains("⚠️", summary); + Assert.Contains("Stalled", summary); + Assert.DoesNotContain("⏹️", summary); + } + + [Fact] + public void BuildCompletionSummary_Cancelled_ShowsStop() + { + var cycle = ReflectionCycle.Create("Long task", maxIterations: 10); + cycle.Advance("First attempt with unique content here..."); + cycle.IsCancelled = true; + cycle.IsActive = false; + + var summary = cycle.BuildCompletionSummary(); + + Assert.Contains("⏹️", summary); + Assert.Contains("Cancelled", summary); + } + + [Fact] + public void BuildCompletionSummary_MaxIterations_ShowsClock() + { + var cycle = ReflectionCycle.Create("Goal", maxIterations: 2); + cycle.Advance("Trying with approach alpha..."); + cycle.Advance("Still trying with approach beta and new ideas..."); + + var summary = cycle.BuildCompletionSummary(); + + Assert.Contains("⏱️", summary); + Assert.Contains("Max iterations", summary); + Assert.Contains("2/2", summary); + } +} diff --git a/PolyPilot.Tests/MultiAgentRegressionTests.cs b/PolyPilot.Tests/MultiAgentRegressionTests.cs new file mode 100644 index 0000000000..e61eeeafd8 --- /dev/null +++ b/PolyPilot.Tests/MultiAgentRegressionTests.cs @@ -0,0 +1,1077 @@ +using System.Text.Json; +using Microsoft.Extensions.DependencyInjection; +using PolyPilot.Models; +using PolyPilot.Services; + +namespace PolyPilot.Tests; + +/// +/// Regression tests covering bugs found during PR #104 multi-agent development. +/// Each test documents a specific bug that was found and fixed, to prevent recurrence. +/// +/// Key bugs covered: +/// 1. TCS ordering: TrySetResult called before IsProcessing=false broke reflection loops +/// 2. Reconciliation scattering: multi-agent sessions moved to repo groups on restart +/// 3. Organization.json corruption: missing fields, wrong enums, partial data +/// 4. Preset creation: Role/PreferredModel not set, breaking reconciliation heuristic +/// 5. Mode enum gaps: OrchestratorReflect missing from dropdowns and serialization +/// 6. Reflection loop error handling: unhandled exceptions kill the async task silently +/// +public class MultiAgentRegressionTests +{ + private readonly StubChatDatabase _chatDb = new(); + private readonly StubServerManager _serverManager = new(); + private readonly StubWsBridgeClient _bridgeClient = new(); + private readonly StubDemoService _demoService = new(); + private readonly IServiceProvider _serviceProvider; + + public MultiAgentRegressionTests() + { + var services = new ServiceCollection(); + _serviceProvider = services.BuildServiceProvider(); + } + + private static RepoManager CreateRepoManagerWithState(List repos, List worktrees) + { + var rm = new RepoManager(); + var stateField = typeof(RepoManager).GetField("_state", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)!; + var loadedField = typeof(RepoManager).GetField("_loaded", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)!; + stateField.SetValue(rm, new RepositoryState { Repositories = repos, Worktrees = worktrees }); + loadedField.SetValue(rm, true); + return rm; + } + + private CopilotService CreateService(RepoManager? repoManager = null) => + new CopilotService(_chatDb, _serverManager, _bridgeClient, repoManager ?? new RepoManager(), _serviceProvider, _demoService); + + /// + /// Inject session names into the alias cache so ReconcileOrganization doesn't prune them. + /// + private static void RegisterKnownSessions(CopilotService svc, params string[] sessionNames) + { + var field = typeof(CopilotService).GetField("_aliasCache", + System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)!; + var cache = (Dictionary?)field.GetValue(svc) ?? new(); + foreach (var name in sessionNames) + cache[name] = name; + field.SetValue(svc, cache); + } + + #region Bug #1: Organization JSON Corruption Resilience + + /// + /// Bug: PowerShell ConvertTo-Json reformatted organization.json, dropping multi-agent + /// groups on app re-save. Deserialization must handle missing/extra fields gracefully. + /// + [Fact] + public void OrgJson_MissingOptionalFields_DeserializesGracefully() + { + // Simulate organization.json with only required fields + var json = """ + { + "Groups": [ + {"Id": "_default", "Name": "Sessions", "SortOrder": 0}, + {"Id": "ma-1", "Name": "Team", "IsMultiAgent": true} + ], + "Sessions": [ + {"SessionName": "worker-1", "GroupId": "ma-1"} + ] + } + """; + + var state = JsonSerializer.Deserialize(json)!; + + Assert.Equal(2, state.Groups.Count); + var maGroup = state.Groups.First(g => g.Id == "ma-1"); + Assert.True(maGroup.IsMultiAgent); + Assert.Null(maGroup.WorktreeId); + Assert.Null(maGroup.ReflectionState); + Assert.Equal(MultiAgentMode.Broadcast, maGroup.OrchestratorMode); // default + Assert.Single(state.Sessions); + Assert.Equal("ma-1", state.Sessions[0].GroupId); + } + + [Fact] + public void OrgJson_ExtraUnknownFields_DeserializesGracefully() + { + var json = """ + { + "Groups": [ + {"Id": "_default", "Name": "Sessions", "SortOrder": 0, "FutureField": true, "AnotherNew": "value"} + ], + "Sessions": [], + "FutureTopLevel": 42 + } + """; + + // Should not throw β€” unknown properties are ignored by default + var state = JsonSerializer.Deserialize(json)!; + Assert.Single(state.Groups); + } + + [Fact] + public void OrgJson_ReflectionState_ComplexRoundTrip() + { + var cycle = ReflectionCycle.Create("Fix all bugs", 10); + cycle.CurrentIteration = 3; + cycle.LastEvaluation = "Needs more work on error handling"; + cycle.EvaluatorSessionName = "eval-session"; + cycle.RecordEvaluation(1, 0.4, "Initial attempt", "claude-opus-4.6"); + cycle.RecordEvaluation(2, 0.6, "Better but incomplete", "claude-opus-4.6"); + cycle.RecordEvaluation(3, 0.75, "Good progress", "claude-opus-4.6"); + + var state = new OrganizationState(); + state.Groups.Add(new SessionGroup + { + Id = "reflect-team", + Name = "Bug Fix Team", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.OrchestratorReflect, + ReflectionState = cycle, + WorktreeId = "wt-1", + RepoId = "repo-1" + }); + + var json = JsonSerializer.Serialize(state, new JsonSerializerOptions { WriteIndented = true }); + var restored = JsonSerializer.Deserialize(json)!; + + var group = restored.Groups.First(g => g.Id == "reflect-team"); + Assert.NotNull(group.ReflectionState); + Assert.Equal("Fix all bugs", group.ReflectionState!.Goal); + Assert.Equal(3, group.ReflectionState.CurrentIteration); + Assert.Equal(10, group.ReflectionState.MaxIterations); + Assert.True(group.ReflectionState.IsActive); + Assert.Equal("Needs more work on error handling", group.ReflectionState.LastEvaluation); + Assert.Equal("eval-session", group.ReflectionState.EvaluatorSessionName); + Assert.Equal(3, group.ReflectionState.EvaluationHistory.Count); + Assert.Equal(0.75, group.ReflectionState.EvaluationHistory[2].Score); + } + + [Fact] + public void OrgJson_AllModes_RoundTrip() + { + foreach (var mode in Enum.GetValues()) + { + var group = new SessionGroup + { + Id = $"test-{mode}", + Name = $"Test {mode}", + IsMultiAgent = true, + OrchestratorMode = mode + }; + + var json = JsonSerializer.Serialize(group); + var restored = JsonSerializer.Deserialize(json)!; + + Assert.Equal(mode, restored.OrchestratorMode); + } + } + + [Fact] + public void OrgJson_AllRoles_RoundTrip() + { + foreach (var role in Enum.GetValues()) + { + var meta = new SessionMeta + { + SessionName = $"test-{role}", + Role = role + }; + + var json = JsonSerializer.Serialize(meta); + var restored = JsonSerializer.Deserialize(json)!; + + Assert.Equal(role, restored.Role); + } + } + + #endregion + + #region Bug #2: Reconciliation Scattering Multi-Agent Sessions + + /// + /// Bug: ReconcileOrganization auto-moved sessions from _default to repo groups + /// based on WorktreeId, even for orphaned multi-agent sessions. This scattered + /// team members across repo groups after group deletion or restart. + /// + [Fact] + public void Reconcile_SessionInMultiAgentGroup_NeverAutoMoved() + { + var repos = new List + { + new() { Id = "repo-1", Name = "Repo", Url = "https://github.com/test/repo" } + }; + var worktrees = new List + { + new() { Id = "wt-1", RepoId = "repo-1", Branch = "main", Path = "/tmp/wt-1" } + }; + var rm = CreateRepoManagerWithState(repos, worktrees); + var svc = CreateService(rm); + svc.GetOrCreateRepoGroup("repo-1", "Repo"); + + var maGroup = svc.CreateMultiAgentGroup("Team", + mode: MultiAgentMode.OrchestratorReflect, + worktreeId: "wt-1", repoId: "repo-1"); + + // Add sessions with worktree IDs (which would normally trigger auto-move) + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "team-orch", + GroupId = maGroup.Id, + Role = MultiAgentRole.Orchestrator, + PreferredModel = "claude-opus-4.6", + WorktreeId = "wt-1" + }); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "team-w1", + GroupId = maGroup.Id, + PreferredModel = "gpt-5.1-codex", + WorktreeId = "wt-1" + }); + + RegisterKnownSessions(svc, "team-orch", "team-w1"); + + // Run reconciliation multiple times (simulates multiple restarts) + for (int i = 0; i < 5; i++) + svc.ReconcileOrganization(); + + // Sessions must remain in multi-agent group + Assert.All(svc.Organization.Sessions.Where(s => s.SessionName.StartsWith("team-")), + m => Assert.Equal(maGroup.Id, m.GroupId)); + } + + /// + /// Bug: After deleting a multi-agent group, orphaned sessions in _default + /// with WorktreeId were auto-moved to repo group by reconciliation. + /// The wasMultiAgent heuristic (Orchestrator role or PreferredModel set) + /// must prevent this. + /// + [Fact] + public void Reconcile_OrphanedMultiAgentWorker_WithPreferredModel_NotMovedToRepoGroup() + { + var repos = new List + { + new() { Id = "repo-1", Name = "Repo", Url = "https://github.com/test/repo" } + }; + var worktrees = new List + { + new() { Id = "wt-1", RepoId = "repo-1", Branch = "main", Path = "/tmp/wt-1" } + }; + var rm = CreateRepoManagerWithState(repos, worktrees); + var svc = CreateService(rm); + svc.GetOrCreateRepoGroup("repo-1", "Repo"); + + // Session with PreferredModel = was a multi-agent worker + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "orphan-worker", + GroupId = SessionGroup.DefaultId, + PreferredModel = "gpt-5.1-codex", + WorktreeId = "wt-1" + }); + + RegisterKnownSessions(svc, "orphan-worker"); + svc.ReconcileOrganization(); + + Assert.Equal(SessionGroup.DefaultId, + svc.Organization.Sessions.First(s => s.SessionName == "orphan-worker").GroupId); + } + + [Fact] + public void Reconcile_OrphanedOrchestrator_NotMovedToRepoGroup() + { + var repos = new List + { + new() { Id = "repo-1", Name = "Repo", Url = "https://github.com/test/repo" } + }; + var worktrees = new List + { + new() { Id = "wt-1", RepoId = "repo-1", Branch = "main", Path = "/tmp/wt-1" } + }; + var rm = CreateRepoManagerWithState(repos, worktrees); + var svc = CreateService(rm); + svc.GetOrCreateRepoGroup("repo-1", "Repo"); + + // Session with Orchestrator role = was a multi-agent orchestrator + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "orphan-orch", + GroupId = SessionGroup.DefaultId, + Role = MultiAgentRole.Orchestrator, + WorktreeId = "wt-1" + }); + + RegisterKnownSessions(svc, "orphan-orch"); + svc.ReconcileOrganization(); + + Assert.Equal(SessionGroup.DefaultId, + svc.Organization.Sessions.First(s => s.SessionName == "orphan-orch").GroupId); + } + + [Fact] + public void Reconcile_RegularWorker_NoPreferredModel_CanBeAutoMoved() + { + // Verify we didn't break regular session grouping + var meta = new SessionMeta + { + SessionName = "regular", + GroupId = SessionGroup.DefaultId, + Role = MultiAgentRole.Worker, + PreferredModel = null, + WorktreeId = "wt-1" + }; + + // wasMultiAgent check + bool wasMultiAgent = meta.Role == MultiAgentRole.Orchestrator || meta.PreferredModel != null; + Assert.False(wasMultiAgent); + } + + #endregion + + #region Bug #3: Preset Creation Must Set Role/PreferredModel Markers + + /// + /// Bug: Sessions created via CreateGroupFromPresetAsync didn't always have + /// Role and PreferredModel set. Without these markers, reconciliation can't + /// distinguish multi-agent sessions from regular ones. + /// + /// + /// Simulates what CreateGroupFromPresetAsync does: creates a group, then sets + /// Role and PreferredModel on sessions. Verifies the metadata survives a round-trip. + /// + [Fact] + public void PresetGroup_OrchestratorRole_SurvivesRoundTrip() + { + var groupId = Guid.NewGuid().ToString(); + var org = new OrganizationState(); + org.Groups.Add(new SessionGroup { Id = groupId, Name = "Test Preset", IsMultiAgent = true, OrchestratorMode = MultiAgentMode.OrchestratorReflect }); + org.Sessions.Add(new SessionMeta { SessionName = "orch-1", GroupId = groupId, Role = MultiAgentRole.Orchestrator, PreferredModel = "claude-opus-4.6" }); + org.Sessions.Add(new SessionMeta { SessionName = "worker-1", GroupId = groupId, Role = MultiAgentRole.Worker, PreferredModel = "gpt-5.1-codex" }); + + var json = JsonSerializer.Serialize(org); + var restored = JsonSerializer.Deserialize(json)!; + + var orchMeta = restored.Sessions.First(s => s.Role == MultiAgentRole.Orchestrator); + Assert.Equal("claude-opus-4.6", orchMeta.PreferredModel); + Assert.Equal(groupId, orchMeta.GroupId); + } + + [Fact] + public void PresetGroup_AllWorkers_HavePreferredModel() + { + var groupId = Guid.NewGuid().ToString(); + var org = new OrganizationState(); + org.Groups.Add(new SessionGroup { Id = groupId, Name = "Test Preset", IsMultiAgent = true, OrchestratorMode = MultiAgentMode.Broadcast }); + org.Sessions.Add(new SessionMeta { SessionName = "orch-1", GroupId = groupId, Role = MultiAgentRole.Orchestrator, PreferredModel = "claude-opus-4.6" }); + org.Sessions.Add(new SessionMeta { SessionName = "worker-1", GroupId = groupId, Role = MultiAgentRole.Worker, PreferredModel = "gpt-5.1-codex" }); + org.Sessions.Add(new SessionMeta { SessionName = "worker-2", GroupId = groupId, Role = MultiAgentRole.Worker, PreferredModel = "gpt-4.1" }); + + var json = JsonSerializer.Serialize(org); + var restored = JsonSerializer.Deserialize(json)!; + + var workers = restored.Sessions.Where(s => s.GroupId == groupId && s.Role != MultiAgentRole.Orchestrator).ToList(); + Assert.Equal(2, workers.Count); + Assert.All(workers, w => Assert.NotNull(w.PreferredModel)); + } + + [Fact] + public void CreateMultiAgentGroup_ManualSessions_PreservesExistingMetadata() + { + var svc = CreateService(); + + // Pre-create sessions with specific metadata + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "session-a", + PreferredModel = "gpt-5.1-codex" + }); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "session-b", + PreferredModel = "claude-sonnet-4.5" + }); + + var group = svc.CreateMultiAgentGroup("Team", + sessionNames: new List { "session-a", "session-b" }); + + var a = svc.Organization.Sessions.First(s => s.SessionName == "session-a"); + var b = svc.Organization.Sessions.First(s => s.SessionName == "session-b"); + + // Sessions should be in the group + Assert.Equal(group.Id, a.GroupId); + Assert.Equal(group.Id, b.GroupId); + // PreferredModel should be preserved + Assert.Equal("gpt-5.1-codex", a.PreferredModel); + Assert.Equal("claude-sonnet-4.5", b.PreferredModel); + } + + #endregion + + #region Bug #4: Mode Enum Completeness + + /// + /// Bug: Dashboard mode dropdowns were missing OrchestratorReflect entirely. + /// Ensure all enum values are present and serializable. + /// + [Fact] + public void MultiAgentMode_HasAllExpectedValues() + { + var values = Enum.GetValues(); + Assert.Contains(MultiAgentMode.Broadcast, values); + Assert.Contains(MultiAgentMode.Sequential, values); + Assert.Contains(MultiAgentMode.Orchestrator, values); + Assert.Contains(MultiAgentMode.OrchestratorReflect, values); + Assert.Equal(4, values.Length); + } + + [Fact] + public void MultiAgentMode_StringSerialization_AllValues() + { + // Important: modes serialize as strings (JsonStringEnumConverter), not ints + foreach (var mode in Enum.GetValues()) + { + var json = JsonSerializer.Serialize(mode); + var restored = JsonSerializer.Deserialize(json); + Assert.Equal(mode, restored); + // Verify it's a string, not a number + Assert.StartsWith("\"", json); + } + } + + [Fact] + public void MultiAgentRole_HasAllExpectedValues() + { + var values = Enum.GetValues(); + Assert.Contains(MultiAgentRole.Worker, values); + Assert.Contains(MultiAgentRole.Orchestrator, values); + Assert.Equal(2, values.Length); + } + + #endregion + + #region Bug #5: Reflection Loop Error Resilience + + /// + /// Bug: No try-catch around the reflection while loop body meant any exception + /// (e.g., from SendPromptAndWaitAsync) silently killed the entire async task. + /// + [Fact] + public void ReflectionCycle_ErrorRetry_DecrementsThenStalls() + { + // Simulates the error handling logic in SendViaOrchestratorReflectAsync catch block + var state = ReflectionCycle.Create("test", 10); + state.IsActive = true; + state.CurrentIteration = 3; + + // Simulate error: decrement iteration, increment stalls + state.CurrentIteration--; // retry same iteration + state.ConsecutiveStalls++; + Assert.Equal(2, state.CurrentIteration); + Assert.Equal(1, state.ConsecutiveStalls); + + // Second error + state.CurrentIteration--; + state.ConsecutiveStalls++; + Assert.Equal(1, state.CurrentIteration); + Assert.Equal(2, state.ConsecutiveStalls); + + // Third error β€” should trigger stall + state.ConsecutiveStalls++; + Assert.True(state.ConsecutiveStalls >= 3); + state.IsStalled = true; + Assert.True(state.IsStalled); + } + + [Fact] + public void ReflectionCycle_LoopConditions_AllChecked() + { + var state = ReflectionCycle.Create("test", 5); + + // Active + not paused + under max β†’ should continue + Assert.True(state.IsActive && !state.IsPaused && state.CurrentIteration < state.MaxIterations); + + // Paused β†’ should stop + state.IsPaused = true; + Assert.False(state.IsActive && !state.IsPaused && state.CurrentIteration < state.MaxIterations); + state.IsPaused = false; + + // At max iterations β†’ should stop + state.CurrentIteration = 5; + Assert.False(state.IsActive && !state.IsPaused && state.CurrentIteration < state.MaxIterations); + state.CurrentIteration = 0; + + // Not active β†’ should stop + state.IsActive = false; + Assert.False(state.IsActive && !state.IsPaused && state.CurrentIteration < state.MaxIterations); + } + + [Fact] + public void ReflectionCycle_CompletionSentinels_Detected() + { + // [[GROUP_REFLECT_COMPLETE]] sentinel + var response1 = "Analysis complete. [[GROUP_REFLECT_COMPLETE]] All tasks finished."; + Assert.Contains("[[GROUP_REFLECT_COMPLETE]]", response1, StringComparison.OrdinalIgnoreCase); + + // [[NEEDS_ITERATION]] sentinel β†’ score 0.4 + var response2 = "Progress made but [[NEEDS_ITERATION]] more work needed."; + var score = response2.Contains("[[NEEDS_ITERATION]]", StringComparison.OrdinalIgnoreCase) ? 0.4 : 0.7; + Assert.Equal(0.4, score); + + // No sentinel β†’ score 0.7 + var response3 = "Good progress on all fronts."; + score = response3.Contains("[[NEEDS_ITERATION]]", StringComparison.OrdinalIgnoreCase) ? 0.4 : 0.7; + Assert.Equal(0.7, score); + } + + #endregion + + #region Bug #6: TCS Ordering Invariant + + /// + /// Bug: TrySetResult was called BEFORE IsProcessing=false in CompleteResponse. + /// When the TCS continuation runs synchronously (reflection loop), the next + /// SendPromptAsync sees IsProcessing=true and throws. + /// + /// This test verifies the invariant at the model level: IsProcessing must be + /// the first thing cleared so any synchronous continuation sees clean state. + /// + [Fact] + public void IsProcessing_MustBeFalse_BeforeTCSCompletion() + { + // Simulate what CompleteResponse does: state transitions must be ordered + var isProcessing = true; + var tcs = new TaskCompletionSource(); + string? observedFromContinuation = null; + + // Add a synchronous continuation that checks IsProcessing + tcs.Task.ContinueWith(t => + { + observedFromContinuation = isProcessing ? "BUG: still processing" : "OK: not processing"; + }, TaskContinuationOptions.ExecuteSynchronously); + + // Correct order: clear IsProcessing FIRST, then complete TCS + isProcessing = false; + tcs.TrySetResult("response"); + + // Give continuation a chance to run + tcs.Task.Wait(TimeSpan.FromSeconds(1)); + + Assert.Equal("OK: not processing", observedFromContinuation); + } + + [Fact] + public void IsProcessing_BugReproduction_WrongOrder() + { + // Demonstrate that wrong order causes the bug + var isProcessing = true; + var tcs = new TaskCompletionSource(); + string? observedFromContinuation = null; + + tcs.Task.ContinueWith(t => + { + observedFromContinuation = isProcessing ? "BUG: still processing" : "OK: not processing"; + }, TaskContinuationOptions.ExecuteSynchronously); + + // WRONG order (the old bug): complete TCS while IsProcessing is still true + tcs.TrySetResult("response"); + isProcessing = false; + + tcs.Task.Wait(TimeSpan.FromSeconds(1)); + + // This would have been the bug β€” continuation sees stale state + Assert.Equal("BUG: still processing", observedFromContinuation); + } + + [Fact] + public void IsProcessing_ErrorPath_MustAlsoClearFirst() + { + // Same invariant for the error path (SessionErrorEvent handler) + var isProcessing = true; + var tcs = new TaskCompletionSource(); + bool? sawProcessing = null; + + tcs.Task.ContinueWith(t => + { + sawProcessing = isProcessing; + }, TaskContinuationOptions.ExecuteSynchronously); + + // Correct error path: clear IsProcessing, then set exception + isProcessing = false; + tcs.TrySetException(new Exception("test error")); + + try { tcs.Task.Wait(TimeSpan.FromSeconds(1)); } catch { } + + Assert.False(sawProcessing); + } + + #endregion + + #region Bug #7: Full Lifecycle - Delete and Recreate + + [Fact] + public void Lifecycle_DeleteGroup_ThenCreateNewGroup_NoContamination() + { + var svc = CreateService(); + + // Create first team + var group1 = svc.CreateMultiAgentGroup("Team Alpha", + mode: MultiAgentMode.OrchestratorReflect); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "alpha-orch", + GroupId = group1.Id, + Role = MultiAgentRole.Orchestrator, + PreferredModel = "claude-opus-4.6" + }); + + // Delete it + svc.DeleteGroup(group1.Id); + + // Create second team + var group2 = svc.CreateMultiAgentGroup("Team Beta", + mode: MultiAgentMode.Orchestrator); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "beta-orch", + GroupId = group2.Id, + Role = MultiAgentRole.Orchestrator, + PreferredModel = "gpt-5" + }); + + // Verify no cross-contamination + Assert.NotEqual(group1.Id, group2.Id); + Assert.DoesNotContain(svc.Organization.Sessions, s => s.SessionName == "alpha-orch"); // removed with group + var beta = svc.Organization.Sessions.First(s => s.SessionName == "beta-orch"); + Assert.Equal(group2.Id, beta.GroupId); // in new group + } + + [Fact] + public void Lifecycle_CreateTeam_SerializeDeserialize_DeleteTeam_Serialize() + { + var svc = CreateService(); + var group = svc.CreateMultiAgentGroup("QRC", + mode: MultiAgentMode.OrchestratorReflect, + worktreeId: "wt-1", repoId: "repo-1"); + + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "qrc-orch", GroupId = group.Id, + Role = MultiAgentRole.Orchestrator, PreferredModel = "claude-opus-4.6", WorktreeId = "wt-1" + }); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "qrc-w1", GroupId = group.Id, + PreferredModel = "gpt-4.1", WorktreeId = "wt-1" + }); + + // Serialize (app save) + var json1 = JsonSerializer.Serialize(svc.Organization, new JsonSerializerOptions { WriteIndented = true }); + + // Deserialize (app reload) + var restored = JsonSerializer.Deserialize(json1)!; + Assert.Contains(restored.Groups, g => g.Id == group.Id && g.IsMultiAgent); + Assert.Equal(2, restored.Sessions.Count(s => s.GroupId == group.Id)); + + // Delete the group + restored.Groups.RemoveAll(g => g.Id == group.Id); + foreach (var s in restored.Sessions.Where(s => s.GroupId == group.Id)) + s.GroupId = SessionGroup.DefaultId; + + // Serialize again + var json2 = JsonSerializer.Serialize(restored, new JsonSerializerOptions { WriteIndented = true }); + var final = JsonSerializer.Deserialize(json2)!; + + // Group should be gone, sessions in default with preserved metadata + Assert.DoesNotContain(final.Groups, g => g.Id == group.Id); + var orch = final.Sessions.First(s => s.SessionName == "qrc-orch"); + Assert.Equal(SessionGroup.DefaultId, orch.GroupId); + Assert.Equal(MultiAgentRole.Orchestrator, orch.Role); + Assert.Equal("claude-opus-4.6", orch.PreferredModel); + } + + #endregion + + #region Scenario: Full App Restart Simulation + + /// + /// Simulates what happens when the app restarts: + /// 1. Organization loaded from disk + /// 2. ReconcileOrganization runs with no active sessions + /// 3. Sessions restored + /// 4. ReconcileOrganization runs again + /// + /// Multi-agent groups must survive this entire sequence. + /// + [Fact] + public void Scenario_AppRestart_MultiAgentGroupSurvives() + { + // Phase 1: Create state that would exist on disk + var orgState = new OrganizationState(); + orgState.Groups.Add(new SessionGroup + { + Id = "ma-team", + Name = "Reflect Team", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.OrchestratorReflect, + WorktreeId = "wt-1", + RepoId = "repo-1", + SortOrder = 2 + }); + orgState.Sessions.Add(new SessionMeta + { + SessionName = "team-orch", GroupId = "ma-team", + Role = MultiAgentRole.Orchestrator, PreferredModel = "claude-opus-4.6", + WorktreeId = "wt-1" + }); + orgState.Sessions.Add(new SessionMeta + { + SessionName = "team-w1", GroupId = "ma-team", + PreferredModel = "gpt-5.1-codex", WorktreeId = "wt-1" + }); + orgState.Sessions.Add(new SessionMeta + { + SessionName = "team-w2", GroupId = "ma-team", + PreferredModel = "gpt-4.1", WorktreeId = "wt-1" + }); + orgState.Sessions.Add(new SessionMeta + { + SessionName = "regular-session", GroupId = SessionGroup.DefaultId + }); + + // Serialize to simulate disk + var json = JsonSerializer.Serialize(orgState, new JsonSerializerOptions { WriteIndented = true }); + + // Phase 2: Deserialize (LoadOrganization) + var restored = JsonSerializer.Deserialize(json)!; + + // Verify the multi-agent group survived deserialization + var maGroup = restored.Groups.FirstOrDefault(g => g.Id == "ma-team"); + Assert.NotNull(maGroup); + Assert.True(maGroup!.IsMultiAgent); + Assert.Equal(MultiAgentMode.OrchestratorReflect, maGroup.OrchestratorMode); + + // Phase 3: Simulate ReconcileOrganization with sessions from aliases + var repos = new List + { + new() { Id = "repo-1", Name = "Repo", Url = "https://github.com/test/repo" } + }; + var worktrees = new List + { + new() { Id = "wt-1", RepoId = "repo-1", Branch = "main", Path = "/tmp/wt-1" } + }; + var rm = CreateRepoManagerWithState(repos, worktrees); + var svc = CreateService(rm); + + // Load the state by manipulating the groups/sessions directly (simulates LoadOrganization) + foreach (var g in restored.Groups) + { + if (!svc.Organization.Groups.Any(og => og.Id == g.Id)) + svc.Organization.Groups.Add(g); + } + foreach (var s in restored.Sessions) + svc.Organization.Sessions.Add(s); + + // Register sessions as known (simulates alias file) + RegisterKnownSessions(svc, "team-orch", "team-w1", "team-w2", "regular-session"); + + // First reconciliation (called inside LoadOrganization, no active sessions yet) + svc.ReconcileOrganization(); + + // Verify multi-agent sessions survived + Assert.All( + svc.Organization.Sessions.Where(s => s.SessionName.StartsWith("team-")), + m => Assert.Equal("ma-team", m.GroupId)); + + // Second reconciliation (after sessions restored) + svc.ReconcileOrganization(); + + // Still intact + Assert.All( + svc.Organization.Sessions.Where(s => s.SessionName.StartsWith("team-")), + m => Assert.Equal("ma-team", m.GroupId)); + + // Multi-agent group still exists + Assert.Contains(svc.Organization.Groups, g => g.Id == "ma-team" && g.IsMultiAgent); + } + + /// + /// Verify that reconciliation handles a mix of multi-agent and regular sessions + /// without moving any multi-agent session to a repo group. + /// + [Fact] + public void Scenario_MixedSessions_ReconcileDoesNotScatter() + { + var repos = new List + { + new() { Id = "repo-1", Name = "PolyPilot", Url = "https://github.com/test/repo" } + }; + var worktrees = new List + { + new() { Id = "wt-1", RepoId = "repo-1", Branch = "main", Path = "/tmp/wt-1" }, + new() { Id = "wt-2", RepoId = "repo-1", Branch = "feature", Path = "/tmp/wt-2" } + }; + var rm = CreateRepoManagerWithState(repos, worktrees); + var svc = CreateService(rm); + var repoGroup = svc.GetOrCreateRepoGroup("repo-1", "PolyPilot"); + + // Multi-agent group for wt-1 + var maGroup = svc.CreateMultiAgentGroup("Team", worktreeId: "wt-1", repoId: "repo-1"); + + // Multi-agent sessions + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "ma-orch", GroupId = maGroup.Id, + Role = MultiAgentRole.Orchestrator, PreferredModel = "claude-opus-4.6", WorktreeId = "wt-1" + }); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "ma-w1", GroupId = maGroup.Id, + PreferredModel = "gpt-5.1-codex", WorktreeId = "wt-1" + }); + + // Regular session on same worktree in repo group + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "regular-1", GroupId = repoGroup.Id, WorktreeId = "wt-1" + }); + + // Regular session in default + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "regular-default", GroupId = SessionGroup.DefaultId + }); + + RegisterKnownSessions(svc, "ma-orch", "ma-w1", "regular-1", "regular-default"); + svc.ReconcileOrganization(); + + // Multi-agent sessions: still in multi-agent group + Assert.Equal(maGroup.Id, svc.Organization.Sessions.First(s => s.SessionName == "ma-orch").GroupId); + Assert.Equal(maGroup.Id, svc.Organization.Sessions.First(s => s.SessionName == "ma-w1").GroupId); + + // Regular sessions: unchanged + Assert.Equal(repoGroup.Id, svc.Organization.Sessions.First(s => s.SessionName == "regular-1").GroupId); + Assert.Equal(SessionGroup.DefaultId, svc.Organization.Sessions.First(s => s.SessionName == "regular-default").GroupId); + } + + #endregion + + #region Scenario: wasMultiAgent Heuristic Correctness + + [Theory] + [InlineData(MultiAgentRole.Orchestrator, null, true)] // Orchestrator role β†’ multi-agent + [InlineData(MultiAgentRole.Worker, "gpt-5.1-codex", true)] // Worker with PreferredModel β†’ multi-agent + [InlineData(MultiAgentRole.Worker, null, false)] // Plain worker β†’ not multi-agent + public void WasMultiAgent_Heuristic_CorrectForAllCombinations( + MultiAgentRole role, string? preferredModel, bool expectedWasMultiAgent) + { + var meta = new SessionMeta + { + SessionName = "test", + Role = role, + PreferredModel = preferredModel + }; + + bool wasMultiAgent = meta.Role == MultiAgentRole.Orchestrator || meta.PreferredModel != null; + Assert.Equal(expectedWasMultiAgent, wasMultiAgent); + } + + #endregion + + #region Scenario: Stall Detection Alignment + + /// + /// Both single-agent and multi-agent stall detection must use + /// 2-consecutive-stalls tolerance (not break on first). + /// + [Fact] + public void StallDetection_ConsecutiveToleranceIs2() + { + var cycle = ReflectionCycle.Create("test"); + cycle.IsActive = true; + + // 1st stall β€” warning only + cycle.Advance("same response"); + cycle.Advance("same response"); + Assert.Equal(1, cycle.ConsecutiveStalls); + Assert.False(cycle.IsStalled); + + // 2nd stall β€” stops + cycle.Advance("same response"); + Assert.Equal(2, cycle.ConsecutiveStalls); + Assert.True(cycle.IsStalled); + } + + [Fact] + public void StallDetection_ResetOnDifferentContent() + { + var cycle = ReflectionCycle.Create("test"); + cycle.IsActive = true; + + cycle.Advance("response A"); + cycle.Advance("response A"); // 1st stall + Assert.Equal(1, cycle.ConsecutiveStalls); + + cycle.Advance("completely different response B"); // resets + Assert.Equal(0, cycle.ConsecutiveStalls); + Assert.False(cycle.IsStalled); + } + + #endregion + + #region Feature: Per-Worker System Prompts (Agent Personas) + + /// + /// SystemPrompt on SessionMeta must survive JSON round-trip (serialization to org.json). + /// + [Fact] + public void SystemPrompt_SurvivesJsonRoundTrip() + { + var org = new OrganizationState(); + var groupId = Guid.NewGuid().ToString(); + org.Groups.Add(new SessionGroup { Id = groupId, Name = "Persona Team", IsMultiAgent = true }); + org.Sessions.Add(new SessionMeta + { + SessionName = "worker-security", + GroupId = groupId, + Role = MultiAgentRole.Worker, + PreferredModel = "gpt-5.1-codex", + SystemPrompt = "You are a security auditor. Focus on vulnerabilities." + }); + org.Sessions.Add(new SessionMeta + { + SessionName = "worker-perf", + GroupId = groupId, + Role = MultiAgentRole.Worker, + PreferredModel = "claude-sonnet-4.5", + SystemPrompt = "You are a performance optimizer. Focus on latency and memory." + }); + org.Sessions.Add(new SessionMeta + { + SessionName = "worker-plain", + GroupId = groupId, + Role = MultiAgentRole.Worker, + PreferredModel = "gpt-4.1" + // No SystemPrompt β€” should remain null + }); + + var json = JsonSerializer.Serialize(org); + var restored = JsonSerializer.Deserialize(json)!; + + var security = restored.Sessions.First(s => s.SessionName == "worker-security"); + var perf = restored.Sessions.First(s => s.SessionName == "worker-perf"); + var plain = restored.Sessions.First(s => s.SessionName == "worker-plain"); + + Assert.Equal("You are a security auditor. Focus on vulnerabilities.", security.SystemPrompt); + Assert.Equal("You are a performance optimizer. Focus on latency and memory.", perf.SystemPrompt); + Assert.Null(plain.SystemPrompt); + } + + /// + /// Null SystemPrompt in old org.json files must not cause deserialization failure. + /// + [Fact] + public void SystemPrompt_NullInOldJson_DeserializesCleanly() + { + // Simulate an org.json from before SystemPrompt was added + var json = """{"Groups":[],"Sessions":[{"SessionName":"old-session","GroupId":"_default","Role":0,"PreferredModel":null}]}"""; + var org = JsonSerializer.Deserialize(json)!; + + Assert.Single(org.Sessions); + Assert.Null(org.Sessions[0].SystemPrompt); + } + + /// + /// SetSessionSystemPrompt persists through Organization model. + /// + [Fact] + public void SetSessionSystemPrompt_PersistsOnMeta() + { + var svc = CreateService(); + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "w1" }); + + svc.SetSessionSystemPrompt("w1", "You are a code reviewer."); + + var meta = svc.Organization.Sessions.First(s => s.SessionName == "w1"); + Assert.Equal("You are a code reviewer.", meta.SystemPrompt); + } + + /// + /// SetSessionSystemPrompt with whitespace/null clears the prompt. + /// + [Fact] + public void SetSessionSystemPrompt_WhitespaceClears() + { + var svc = CreateService(); + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "w1", SystemPrompt = "old" }); + + svc.SetSessionSystemPrompt("w1", " "); + Assert.Null(svc.Organization.Sessions.First(s => s.SessionName == "w1").SystemPrompt); + + svc.Organization.Sessions.First(s => s.SessionName == "w1").SystemPrompt = "restored"; + svc.SetSessionSystemPrompt("w1", null); + Assert.Null(svc.Organization.Sessions.First(s => s.SessionName == "w1").SystemPrompt); + } + + /// + /// BuildOrchestratorPlanningPrompt includes worker system prompts when present. + /// + [Fact] + public void OrchestratorPlanningPrompt_IncludesWorkerPersonas() + { + var svc = CreateService(); + // Pre-create session metadata entries + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "orch" }); + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "sec-worker" }); + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "perf-worker" }); + + var group = svc.CreateMultiAgentGroup("Persona", + sessionNames: new List { "orch", "sec-worker", "perf-worker" }); + + svc.SetSessionRole("orch", MultiAgentRole.Orchestrator); + svc.SetSessionSystemPrompt("sec-worker", "You are a security auditor."); + svc.SetSessionSystemPrompt("perf-worker", "You are a performance optimizer."); + + // Use reflection to call private BuildOrchestratorPlanningPrompt + var method = typeof(CopilotService).GetMethod("BuildOrchestratorPlanningPrompt", + System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance); + Assert.NotNull(method); + + var workers = new List { "sec-worker", "perf-worker" }; + var result = (string)method!.Invoke(svc, new object?[] { "Review this code", workers, null, null })!; + + Assert.Contains("security auditor", result); + Assert.Contains("performance optimizer", result); + Assert.Contains("specialization", result); + } + + /// + /// Built-in presets with WorkerSystemPrompts have the right number of prompts. + /// + [Fact] + public void BuiltInPresets_WorkerSystemPrompts_MatchWorkerCount() + { + foreach (var preset in GroupPreset.BuiltIn) + { + if (preset.WorkerSystemPrompts == null) continue; + Assert.True(preset.WorkerSystemPrompts.Length <= preset.WorkerModels.Length, + $"Preset '{preset.Name}' has {preset.WorkerSystemPrompts.Length} system prompts but only {preset.WorkerModels.Length} workers"); + } + } + + /// + /// Code Review Team preset has distinct personas for each worker. + /// + [Fact] + public void CodeReviewTeam_Preset_HasDistinctPersonas() + { + var preset = GroupPreset.BuiltIn.First(p => p.Name == "Code Review Team"); + Assert.NotNull(preset.WorkerSystemPrompts); + Assert.Equal(2, preset.WorkerSystemPrompts!.Length); + Assert.All(preset.WorkerSystemPrompts, p => Assert.False(string.IsNullOrWhiteSpace(p))); + // Each persona should be unique + Assert.NotEqual(preset.WorkerSystemPrompts[0], preset.WorkerSystemPrompts[1]); + } + + #endregion +} diff --git a/PolyPilot.Tests/PolyPilot.Tests.csproj b/PolyPilot.Tests/PolyPilot.Tests.csproj index 1d533fbc14..fcd746863b 100644 --- a/PolyPilot.Tests/PolyPilot.Tests.csproj +++ b/PolyPilot.Tests/PolyPilot.Tests.csproj @@ -54,6 +54,9 @@ + + + diff --git a/PolyPilot.Tests/ScenarioReferenceTests.cs b/PolyPilot.Tests/ScenarioReferenceTests.cs index 6fff147301..c07c70725b 100644 --- a/PolyPilot.Tests/ScenarioReferenceTests.cs +++ b/PolyPilot.Tests/ScenarioReferenceTests.cs @@ -189,4 +189,94 @@ public void AllScenarios_HaveUniqueIds() Assert.Equal(ids.Count, ids.Distinct().Count()); } + + [Fact] + public void MultiAgentScenarios_HaveUniqueIds() + { + var json = File.ReadAllText(Path.Combine(ScenariosDir, "multi-agent-scenarios.json")); + var doc = JsonDocument.Parse(json); + var ids = doc.RootElement.GetProperty("scenarios") + .EnumerateArray() + .Select(s => s.GetProperty("id").GetString()) + .ToList(); + + Assert.Equal(ids.Count, ids.Distinct().Count()); + } + + [Fact] + public void MultiAgentScenarios_IncludeSquadIntegration() + { + var json = File.ReadAllText(Path.Combine(ScenariosDir, "multi-agent-scenarios.json")); + var doc = JsonDocument.Parse(json); + var ids = doc.RootElement.GetProperty("scenarios") + .EnumerateArray() + .Select(s => s.GetProperty("id").GetString()) + .ToHashSet(); + + Assert.Contains("squad-discovery-creates-preset", ids); + Assert.Contains("squad-charter-becomes-system-prompt", ids); + Assert.Contains("squad-decisions-shared-context", ids); + Assert.Contains("squad-legacy-ai-team-compat", ids); + } + + [Fact] + public void MultiAgentScenarios_IncludeGroupDeletion() + { + var json = File.ReadAllText(Path.Combine(ScenariosDir, "multi-agent-scenarios.json")); + var doc = JsonDocument.Parse(json); + var ids = doc.RootElement.GetProperty("scenarios") + .EnumerateArray() + .Select(s => s.GetProperty("id").GetString()) + .ToHashSet(); + + Assert.Contains("delete-group-no-contamination", ids); + Assert.Contains("delete-multi-agent-group-closes-sessions", ids); + } + + [Fact] + public void MultiAgentScenarios_IncludeSquadWriteBack() + { + var json = File.ReadAllText(Path.Combine(ScenariosDir, "multi-agent-scenarios.json")); + var doc = JsonDocument.Parse(json); + var ids = doc.RootElement.GetProperty("scenarios") + .EnumerateArray() + .Select(s => s.GetProperty("id").GetString()) + .ToHashSet(); + + Assert.Contains("save-preset-creates-squad-dir", ids); + Assert.Contains("round-trip-squad-write-read", ids); + Assert.Contains("squad-write-sanitizes-names", ids); + } + + [Fact] + public void MultiAgentScenarios_AllHaveRequiredFields() + { + var json = File.ReadAllText(Path.Combine(ScenariosDir, "multi-agent-scenarios.json")); + var doc = JsonDocument.Parse(json); + var scenarios = doc.RootElement.GetProperty("scenarios").EnumerateArray().ToList(); + + Assert.NotEmpty(scenarios); + foreach (var s in scenarios) + { + Assert.True(s.TryGetProperty("id", out _), "Scenario missing 'id'"); + Assert.True(s.TryGetProperty("name", out _), "Scenario missing 'name'"); + Assert.True(s.TryGetProperty("steps", out var steps), "Scenario missing 'steps'"); + Assert.NotEqual(0, steps.GetArrayLength()); + } + } + + [Fact] + public void MultiAgentScenarios_IncludeReflectLoopScenarios() + { + var json = File.ReadAllText(Path.Combine(ScenariosDir, "multi-agent-scenarios.json")); + var doc = JsonDocument.Parse(json); + var ids = doc.RootElement.GetProperty("scenarios") + .EnumerateArray() + .Select(s => s.GetProperty("id").GetString()) + .ToHashSet(); + + Assert.Contains("reflect-loop-completes-goal-met", ids); + Assert.Contains("reflect-loop-max-iterations", ids); + Assert.Contains("stall-detection-triggers", ids); + } } diff --git a/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json b/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json new file mode 100644 index 0000000000..a1944a40b6 --- /dev/null +++ b/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json @@ -0,0 +1,414 @@ +{ + "description": "Multi-agent orchestration scenarios for PolyPilot. Tests cover the OrchestratorReflect loop, stall detection, reconciliation stability, group lifecycle, and Squad integration. Each scenario can be executed against a running app using MauiDevFlow CDP commands. See docs/multi-agent-orchestration.md for the architecture spec.", + "prerequisites": { + "build": "cd PolyPilot && .\\relaunch.ps1", + "waitForAgent": "maui-devflow MAUI status", + "initialMode": "Persistent", + "notes": "App must be in Persistent or Demo mode. Multi-agent features require at least one worktree configured. Squad scenarios require a worktree with a .squad/ directory." + }, + "scenarios": [ + { + "id": "reflect-loop-completes-goal-met", + "name": "OrchestratorReflect loop runs to goal completion", + "description": "Verifies the full plan-dispatch-collect-evaluate loop runs and exits when the evaluator signals [[GROUP_REFLECT_COMPLETE]] or scores >= 0.9.", + "invariants": [ + "ReflectionState.GoalMet == true on exit", + "ReflectionState.IsActive == false on exit", + "ReflectionState.CurrentIteration >= 1", + "All workers received prompts containing the original user request" + ], + "steps": [ + { "action": "navigate", "route": "/multi-agent" }, + { "action": "createGroup", "mode": "OrchestratorReflect", "workers": 2, "maxIterations": 3 }, + { "action": "sendPrompt", "text": "Analyze the project structure and suggest improvements" }, + { "action": "waitForPhase", "phase": "Planning", "timeout": 30 }, + { "action": "waitForPhase", "phase": "Dispatching", "timeout": 60 }, + { "action": "waitForPhase", "phase": "WaitingForWorkers", "timeout": 120 }, + { "action": "waitForPhase", "phase": "Synthesizing", "timeout": 60 }, + { "action": "waitForPhase", "phase": "Complete", "timeout": 600 }, + { "action": "assertReflectionState", "field": "IsActive", "expected": false }, + { "action": "assertReflectionState", "field": "CurrentIteration", "operator": ">=", "value": 1 } + ] + }, + { + "id": "reflect-loop-max-iterations", + "name": "OrchestratorReflect stops at MaxIterations", + "description": "Verifies the loop exits when MaxIterations is reached without the goal being met.", + "invariants": [ + "ReflectionState.CurrentIteration == MaxIterations on exit", + "ReflectionState.GoalMet == false", + "ReflectionState.IsActive == false" + ], + "steps": [ + { "action": "createGroup", "mode": "OrchestratorReflect", "workers": 1, "maxIterations": 2 }, + { "action": "sendPrompt", "text": "Write a perfect novel (intentionally impossible in 2 iterations)" }, + { "action": "waitForPhase", "phase": "Complete", "timeout": 600 }, + { "action": "assertReflectionState", "field": "CurrentIteration", "expected": 2 }, + { "action": "assertReflectionState", "field": "GoalMet", "expected": false } + ] + }, + { + "id": "stall-detection-triggers", + "name": "Stall detection fires after 2 consecutive similar responses", + "description": "Verifies that if the orchestrator synthesis is >90% similar (Jaccard) for 2 consecutive iterations, the loop stops with IsStalled=true.", + "invariants": [ + "ReflectionState.IsStalled == true on exit", + "ReflectionState.ConsecutiveStalls >= 2", + "ReflectionState.LastSimilarity > 0.9" + ], + "steps": [ + { "action": "createGroup", "mode": "OrchestratorReflect", "workers": 1, "maxIterations": 10 }, + { "action": "sendPrompt", "text": "Repeat the same analysis over and over" }, + { "action": "waitForPhase", "phase": "Complete", "timeout": 600 }, + { "action": "assertReflectionState", "field": "IsStalled", "expected": true } + ] + }, + { + "id": "group-survives-restart", + "name": "Multi-agent group persists across app restart", + "description": "Verifies that after creating a multi-agent group, killing the app, and relaunching, the group still exists with all sessions correctly assigned.", + "invariants": [ + "Group.IsMultiAgent == true after restart", + "All sessions retain their GroupId", + "Orchestrator session retains Role == Orchestrator", + "Worker sessions retain PreferredModel values", + "No sessions scattered to repo groups" + ], + "steps": [ + { "action": "createGroup", "mode": "OrchestratorReflect", "name": "Restart Test", "workers": 2 }, + { "action": "captureGroupState", "capture": "beforeRestart" }, + { "action": "restartApp" }, + { "action": "waitForAgent", "timeout": 120 }, + { "action": "captureGroupState", "capture": "afterRestart" }, + { "action": "assertEqual", "left": "beforeRestart.groupCount", "right": "afterRestart.groupCount" }, + { "action": "assertEqual", "left": "beforeRestart.sessionNames", "right": "afterRestart.sessionNames" }, + { "action": "assertOrgJson", "check": "noSessionsInDefaultWithMultiAgentMarkers" } + ] + }, + { + "id": "reconciliation-protects-multi-agent", + "name": "Reconciliation does not scatter multi-agent sessions", + "description": "Verifies that ReconcileOrganization() does not move sessions with Role=Orchestrator or PreferredModel!=null out of their multi-agent group into repo groups.", + "invariants": [ + "Sessions with IsMultiAgent group membership are never auto-moved", + "Orphaned sessions with Role==Orchestrator or PreferredModel!=null stay in _default", + "Regular sessions (no markers) ARE auto-moved to repo groups normally" + ], + "steps": [ + { "action": "createGroup", "mode": "Orchestrator", "workers": 2 }, + { "action": "readOrgJson", "capture": "orgBefore" }, + { "action": "restartApp" }, + { "action": "readOrgJson", "capture": "orgAfter" }, + { "action": "assertGroupMembership", "unchanged": true } + ] + }, + { + "id": "delete-group-no-contamination", + "name": "Deleted group sessions don't contaminate new groups", + "description": "Verifies that deleting a multi-agent group and creating a new one produces a clean group with no leftover sessions from the old one.", + "steps": [ + { "action": "createGroup", "mode": "OrchestratorReflect", "name": "Group A", "workers": 2 }, + { "action": "captureGroupState", "capture": "groupA" }, + { "action": "deleteGroup", "name": "Group A" }, + { "action": "createGroup", "mode": "OrchestratorReflect", "name": "Group B", "workers": 2 }, + { "action": "captureGroupState", "capture": "groupB" }, + { "action": "assertNoOverlap", "left": "groupA.sessionNames", "right": "groupB.sessionNames" } + ] + }, + { + "id": "delete-multi-agent-group-closes-sessions", + "name": "Deleting multi-agent group removes sessions entirely", + "description": "Verifies that deleting a multi-agent group closes all its sessions and removes them from the organization, rather than orphaning them in the default Sessions group.", + "invariants": [ + "No sessions with the deleted group's ID remain in organization.json", + "No orphaned orchestrator/worker sessions appear in the default Sessions group", + "Non-multi-agent group deletion still moves sessions to default (different behavior)" + ], + "steps": [ + { "action": "createGroup", "mode": "OrchestratorReflect", "name": "Temp Team", "workers": 2 }, + { "action": "captureGroupState", "capture": "beforeDelete" }, + { "action": "deleteGroup", "name": "Temp Team" }, + { "action": "readOrgJson", "capture": "orgAfter" }, + { "action": "assertNoSessionsWithGroupId", "groupId": "beforeDelete.groupId" }, + { "action": "assertNoSessionsInDefault", "nameContains": "Temp Team", "note": "Sessions should be gone, not orphaned" } + ] + }, + { + "id": "broadcast-mode-all-receive", + "name": "Broadcast mode sends to all sessions", + "description": "Verifies that in Broadcast mode, the same prompt is sent to all sessions simultaneously.", + "steps": [ + { "action": "createGroup", "mode": "Broadcast", "workers": 3 }, + { "action": "sendPrompt", "text": "Hello from broadcast" }, + { "action": "waitForAllSessions", "state": "idle", "timeout": 120 }, + { "action": "assertAllSessionsReceived", "text": "Hello from broadcast" } + ] + }, + { + "id": "orchestrator-single-pass", + "name": "Orchestrator mode runs one pass without iteration", + "description": "Verifies that Orchestrator (non-reflect) mode plans, dispatches, collects, and synthesizes exactly once.", + "steps": [ + { "action": "createGroup", "mode": "Orchestrator", "workers": 2 }, + { "action": "sendPrompt", "text": "Review this code" }, + { "action": "waitForPhase", "phase": "Complete", "timeout": 300 }, + { "action": "assertOrchestratorSynthesized" }, + { "action": "assertNoReflectionLoop", "note": "Should not have iterated" } + ] + }, + { + "id": "tcs-ordering-reflection-continues", + "name": "Reflection loop continues past iteration 1 (TCS ordering invariant)", + "description": "Regression test for the bug where IsProcessing was set to false AFTER TrySetResult, causing the next SendPromptAsync to throw. The loop must reach at least iteration 2.", + "invariants": [ + "IsProcessing = false BEFORE TrySetResult in CompleteResponse", + "ReflectionState.CurrentIteration >= 2 (proves loop continued)" + ], + "steps": [ + { "action": "createGroup", "mode": "OrchestratorReflect", "workers": 1, "maxIterations": 3 }, + { "action": "sendPrompt", "text": "Iterate on this multiple times" }, + { "action": "waitForPhase", "phase": "Complete", "timeout": 600 }, + { "action": "assertReflectionState", "field": "CurrentIteration", "operator": ">=", "value": 2 } + ] + }, + { + "id": "preset-creates-correct-markers", + "name": "Group preset sets Role and PreferredModel on all sessions", + "description": "Verifies that CreateGroupFromPresetAsync correctly sets Role=Orchestrator on the orchestrator session and PreferredModel on all sessions.", + "invariants": [ + "Orchestrator session has Role == Orchestrator", + "Orchestrator session has PreferredModel == preset.OrchestratorModel", + "Worker sessions have PreferredModel == preset.WorkerModels[i]", + "All sessions have GroupId matching the new group" + ], + "steps": [ + { "action": "createGroupFromPreset", "preset": "Quick Reflection Cycle" }, + { "action": "readOrgJson", "capture": "org" }, + { "action": "assertSessionMeta", "role": "Orchestrator", "hasPreferredModel": true }, + { "action": "assertAllWorkers", "havePreferredModel": true } + ] + }, + { + "id": "squad-discovery-creates-preset", + "name": "Squad directory discovered as repo-level preset", + "description": "Verifies that when a worktree contains a .squad/ directory with team.md and agent charters, PolyPilot discovers it and presents it as a selectable preset in the multi-agent group creation flow.", + "invariants": [ + "Squad preset appears in 'From Repo' section of preset picker", + "Preset has IsRepoLevel == true", + "Preset worker count matches number of non-scribe agents in .squad/agents/" + ], + "steps": [ + { "action": "shell", "command": "mkdir -p .squad/agents/reviewer && echo '# Team\n| Member | Role |\n|--------|------|\n| reviewer | Code Reviewer |' > .squad/team.md && echo 'You are a code reviewer.' > .squad/agents/reviewer/charter.md" }, + { "action": "navigate", "route": "/multi-agent" }, + { "action": "selectWorktree", "worktree": "current" }, + { "action": "assertPresetVisible", "section": "From Repo", "name": "Squad Team" }, + { "action": "shell", "command": "rm -rf .squad" } + ] + }, + { + "id": "squad-charter-becomes-system-prompt", + "name": "Squad agent charter.md becomes worker system prompt", + "description": "Verifies that when creating a group from a Squad-discovered preset, each agent's charter.md content is set as the worker's SessionMeta.SystemPrompt.", + "invariants": [ + "Worker SystemPrompt contains charter.md content", + "Orchestrator planning prompt includes worker specializations" + ], + "steps": [ + { "action": "shell", "command": "mkdir -p .squad/agents/security .squad/agents/perf && echo '# Team\n| Member | Role |\n|--------|------|\n| security | Security Auditor |\n| perf | Performance Analyst |' > .squad/team.md && echo 'You are a security auditor. Focus on OWASP Top 10.' > .squad/agents/security/charter.md && echo 'You are a performance analyst. Focus on latency and throughput.' > .squad/agents/perf/charter.md" }, + { "action": "createGroupFromPreset", "preset": "Squad Team", "source": "repo" }, + { "action": "readOrgJson", "capture": "org" }, + { "action": "assertSessionMeta", "sessionNameContains": "security", "systemPromptContains": "OWASP Top 10" }, + { "action": "assertSessionMeta", "sessionNameContains": "perf", "systemPromptContains": "latency and throughput" }, + { "action": "shell", "command": "rm -rf .squad" } + ] + }, + { + "id": "squad-decisions-shared-context", + "name": "Squad decisions.md injected as shared context", + "description": "Verifies that .squad/decisions.md content is prepended to all worker prompts as shared team knowledge.", + "invariants": [ + "All workers receive decisions.md content in their prompt context", + "Decisions content appears before the worker's assigned task" + ], + "steps": [ + { "action": "shell", "command": "mkdir -p .squad/agents/worker1 && echo '# Team\n| Member | Role |\n|--------|------|\n| worker1 | Developer |' > .squad/team.md && echo 'Always use TypeScript. Never use any.' > .squad/decisions.md && echo 'You are a developer.' > .squad/agents/worker1/charter.md" }, + { "action": "createGroupFromPreset", "preset": "Squad Team", "source": "repo" }, + { "action": "sendPrompt", "text": "Write a hello world" }, + { "action": "waitForAllSessions", "state": "idle", "timeout": 120 }, + { "action": "assertWorkerPromptContains", "text": "Always use TypeScript" }, + { "action": "shell", "command": "rm -rf .squad" } + ] + }, + { + "id": "squad-legacy-ai-team-compat", + "name": "Legacy .ai-team/ directory also discovered", + "description": "Verifies backward compatibility: .ai-team/ is discovered if .squad/ doesn't exist (Squad v0.4.x compat).", + "steps": [ + { "action": "shell", "command": "mkdir -p .ai-team/agents/dev && echo '# Team\n| Member | Role |\n|--------|------|\n| dev | Developer |' > .ai-team/team.md && echo 'You are a developer.' > .ai-team/agents/dev/charter.md" }, + { "action": "selectWorktree", "worktree": "current" }, + { "action": "assertPresetVisible", "section": "From Repo", "name": "Squad Team" }, + { "action": "shell", "command": "rm -rf .ai-team" } + ] + }, + { + "id": "squad-preset-priority-over-builtin", + "name": "Squad preset shadows built-in with same name", + "description": "Verifies that if a Squad team has the same name as a built-in preset, the Squad version takes priority within that worktree.", + "steps": [ + { "action": "shell", "command": "mkdir -p .squad/agents/reviewer && echo '# Team\nCode Review Team\n| Member | Role |\n|--------|------|\n| reviewer | Reviewer |' > .squad/team.md && echo 'Custom repo reviewer.' > .squad/agents/reviewer/charter.md" }, + { "action": "selectWorktree", "worktree": "current" }, + { "action": "assertPresetInSection", "name": "Code Review Team", "section": "From Repo", "note": "Repo version should shadow built-in" }, + { "action": "shell", "command": "rm -rf .squad" } + ] + }, + { + "id": "squad-missing-files-graceful", + "name": "Missing Squad files handled gracefully", + "description": "Verifies that partial .squad/ directories (missing team.md, missing charter.md) are handled without errors.", + "steps": [ + { "action": "shell", "command": "mkdir -p .squad/agents/orphan" }, + { "action": "selectWorktree", "worktree": "current" }, + { "action": "assertNoPresetInSection", "section": "From Repo", "note": "No team.md = no preset" }, + { "action": "shell", "command": "rm -rf .squad" } + ] + }, + { + "id": "worker-system-prompt-in-orchestrator-plan", + "name": "Worker system prompts visible in orchestrator planning", + "description": "Verifies that BuildOrchestratorPlanningPrompt includes each worker's SystemPrompt description so the orchestrator can route tasks based on expertise.", + "invariants": [ + "Orchestrator planning prompt lists worker names with their specializations", + "Workers with no SystemPrompt are listed as generic workers" + ], + "steps": [ + { "action": "createGroupFromPreset", "preset": "Code Review Team" }, + { "action": "sendPrompt", "text": "Review the authentication module" }, + { "action": "waitForPhase", "phase": "Planning", "timeout": 30 }, + { "action": "assertOrchestratorReceivedWorkerDescriptions" } + ] + }, + { + "id": "save-preset-creates-squad-dir", + "name": "Saving preset writes .squad/ directory", + "description": "Verifies that SaveGroupAsPreset creates a .squad/ directory with team.md, agent charters, and optional decisions.md/routing.md in the worktree root.", + "invariants": [ + ".squad/team.md is created with team name and roster table", + ".squad/agents/{name}/charter.md is created for each worker", + "Agent names are sanitized (lowercase, hyphens)", + "Preset is also saved to presets.json as backup" + ], + "steps": [ + { "action": "createGroupFromPreset", "preset": "Code Review Team" }, + { "action": "saveGroupAsPreset", "name": "My Code Review" }, + { "action": "assertFileExists", "path": ".squad/team.md" }, + { "action": "assertFileContains", "path": ".squad/team.md", "text": "My Code Review" }, + { "action": "assertDirectoryExists", "path": ".squad/agents" }, + { "action": "shell", "command": "rm -rf .squad" } + ] + }, + { + "id": "round-trip-squad-write-read", + "name": "Round-trip: write then discover Squad team", + "description": "Verifies that a team saved via SquadWriter can be discovered back via SquadDiscovery with matching data.", + "invariants": [ + "Written team.md can be parsed back by SquadDiscovery", + "Written charter.md content matches original system prompts", + "decisions.md and routing.md survive the round-trip" + ], + "steps": [ + { "action": "createGroupFromPreset", "preset": "Code Review Team" }, + { "action": "saveGroupAsPreset", "name": "Round Trip Test" }, + { "action": "assertFileExists", "path": ".squad/team.md" }, + { "action": "selectWorktree", "worktree": "current" }, + { "action": "assertPresetInSection", "section": "From Repo", "name": "Round Trip Test" }, + { "action": "shell", "command": "rm -rf .squad" } + ] + }, + { + "id": "squad-write-sanitizes-names", + "name": "Squad writer sanitizes agent names", + "description": "Verifies that SquadWriter strips team-name prefixes and sanitizes agent directory names (lowercase, hyphens instead of special chars).", + "invariants": [ + "Team-name prefix stripped from session names", + "Directory names are lowercase with hyphens", + "No spaces or special characters in directory names" + ], + "steps": [ + { "action": "createGroupFromPreset", "preset": "Code Review Team" }, + { "action": "saveGroupAsPreset", "name": "Test Team" }, + { "action": "assertNoDirectoryContains", "path": ".squad/agents", "pattern": " " }, + { "action": "shell", "command": "rm -rf .squad" } + ] + }, + { + "id": "sequential-mode-processes-in-order", + "name": "Sequential mode sends to sessions one-by-one", + "description": "Verifies that Sequential mode dispatches prompts to sessions one at a time in order, not in parallel.", + "invariants": [ + "Sessions receive prompts sequentially, not simultaneously", + "Each session processes before the next receives its prompt" + ], + "steps": [ + { "action": "createGroupFromPreset", "preset": "Multi-Perspective Analysis" }, + { "action": "setMode", "mode": "Sequential" }, + { "action": "sendPrompt", "text": "Explain dependency injection" }, + { "action": "waitForAllResponses", "timeout": 120 }, + { "action": "assertAllSessionsResponded" } + ] + }, + { + "id": "pause-resume-reflection-cycle", + "name": "Pause and resume an active reflection loop", + "description": "Verifies that pausing a running OrchestratorReflect loop halts iteration dispatch without losing state, and resuming continues from where it left off.", + "invariants": [ + "IsPaused flag prevents new iterations from dispatching", + "Resuming continues from the current iteration count", + "Reflection state (CurrentIteration, goal) is preserved" + ], + "steps": [ + { "action": "createGroupFromPreset", "preset": "Quick Reflection Cycle" }, + { "action": "sendPrompt", "text": "Implement a binary search function" }, + { "action": "waitForPhase", "phase": "Reflecting", "timeout": 30 }, + { "action": "pauseReflection" }, + { "action": "assertReflectionPaused" }, + { "action": "resumeReflection" }, + { "action": "waitForCompletion", "timeout": 120 } + ] + }, + { + "id": "dedicated-evaluator-session", + "name": "OrchestratorReflect with separate evaluator", + "description": "Verifies that when an EvaluatorSessionName is set, the evaluator independently scores each iteration instead of the orchestrator self-evaluating.", + "invariants": [ + "Evaluator session receives synthesis output for scoring", + "Evaluator PASS/FAIL determines iteration continuation", + "Orchestrator and evaluator are different sessions" + ], + "steps": [ + { "action": "createGroupFromPreset", "preset": "Code Review Team" }, + { "action": "setEvaluator", "sessionName": "worker-1" }, + { "action": "sendPrompt", "text": "Review error handling in the auth module" }, + { "action": "waitForCompletion", "timeout": 120 }, + { "action": "assertEvaluatorWasUsed" } + ] + }, + { + "id": "routing-context-in-orchestrator-plan", + "name": "Routing context from routing.md injected into orchestrator", + "description": "Verifies that a Squad-discovered preset's routing.md content appears in the orchestrator's planning prompt.", + "invariants": [ + "RoutingContext from routing.md is prepended to orchestrator planning", + "Orchestrator uses routing hints to assign tasks to appropriate workers" + ], + "steps": [ + { "action": "createSquadDir", "agents": ["security-reviewer", "code-optimizer"], "routing": "Route security tasks to security-reviewer" }, + { "action": "selectWorktree", "worktree": "current" }, + { "action": "createGroupFromPreset", "preset": "PolyPilot Review Squad" }, + { "action": "sendPrompt", "text": "Review this code for security and performance" }, + { "action": "waitForPhase", "phase": "Planning", "timeout": 30 }, + { "action": "assertOrchestratorReceivedRoutingContext" }, + { "action": "shell", "command": "rm -rf .squad" } + ] + } + ] +} diff --git a/PolyPilot.Tests/SessionOrganizationTests.cs b/PolyPilot.Tests/SessionOrganizationTests.cs index 66ebf8b06e..4350d7cf8e 100644 --- a/PolyPilot.Tests/SessionOrganizationTests.cs +++ b/PolyPilot.Tests/SessionOrganizationTests.cs @@ -117,6 +117,206 @@ public void OrganizationCommandPayload_Serializes() Assert.Equal("pin", deserialized!.Command); Assert.Equal("test-session", deserialized.SessionName); } + + [Fact] + public void SessionGroup_MultiAgent_DefaultsToFalse() + { + var group = new SessionGroup { Name = "Test" }; + Assert.False(group.IsMultiAgent); + Assert.Equal(MultiAgentMode.Broadcast, group.OrchestratorMode); + Assert.Null(group.OrchestratorPrompt); + } + + [Fact] + public void SessionGroup_MultiAgent_Serializes() + { + var group = new SessionGroup + { + Name = "Multi-Agent Team", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.Orchestrator, + OrchestratorPrompt = "You are the lead coordinator." + }; + + var json = JsonSerializer.Serialize(group); + var deserialized = JsonSerializer.Deserialize(json); + + Assert.NotNull(deserialized); + Assert.True(deserialized!.IsMultiAgent); + Assert.Equal(MultiAgentMode.Orchestrator, deserialized.OrchestratorMode); + Assert.Equal("You are the lead coordinator.", deserialized.OrchestratorPrompt); + } + + [Fact] + public void SessionMeta_Role_DefaultsToWorker() + { + var meta = new SessionMeta { SessionName = "test" }; + Assert.Equal(MultiAgentRole.Worker, meta.Role); + } + + [Fact] + public void SessionMeta_Role_SerializesAsString() + { + var meta = new SessionMeta + { + SessionName = "leader", + Role = MultiAgentRole.Orchestrator + }; + var json = JsonSerializer.Serialize(meta); + Assert.Contains("\"Orchestrator\"", json); + + var deserialized = JsonSerializer.Deserialize(json); + Assert.NotNull(deserialized); + Assert.Equal(MultiAgentRole.Orchestrator, deserialized!.Role); + } + + [Fact] + public void MultiAgentMode_AllValues() + { + Assert.Equal(4, Enum.GetValues().Length); + Assert.True(Enum.IsDefined(MultiAgentMode.Broadcast)); + Assert.True(Enum.IsDefined(MultiAgentMode.Sequential)); + Assert.True(Enum.IsDefined(MultiAgentMode.Orchestrator)); + Assert.True(Enum.IsDefined(MultiAgentMode.OrchestratorReflect)); + } + + [Fact] + public void MultiAgentMode_SerializesAsString() + { + var group = new SessionGroup + { + Name = "test", + OrchestratorMode = MultiAgentMode.Sequential + }; + var json = JsonSerializer.Serialize(group); + Assert.Contains("\"Sequential\"", json); + } + + [Fact] + public void OrganizationState_MultiAgentGroup_RoundTrips() + { + var state = new OrganizationState(); + var maGroup = new SessionGroup + { + Id = "ma-group-1", + Name = "Dev Team", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.Orchestrator, + OrchestratorPrompt = "Coordinate the workers", + SortOrder = 1 + }; + state.Groups.Add(maGroup); + state.Sessions.Add(new SessionMeta + { + SessionName = "orchestrator-session", + GroupId = "ma-group-1", + Role = MultiAgentRole.Orchestrator + }); + state.Sessions.Add(new SessionMeta + { + SessionName = "worker-1", + GroupId = "ma-group-1", + Role = MultiAgentRole.Worker + }); + + var json = JsonSerializer.Serialize(state); + var deserialized = JsonSerializer.Deserialize(json); + + Assert.NotNull(deserialized); + var group = deserialized!.Groups.Find(g => g.Id == "ma-group-1"); + Assert.NotNull(group); + Assert.True(group!.IsMultiAgent); + Assert.Equal(MultiAgentMode.Orchestrator, group.OrchestratorMode); + Assert.Equal("Coordinate the workers", group.OrchestratorPrompt); + + var orchSession = deserialized.Sessions.Find(s => s.SessionName == "orchestrator-session"); + Assert.NotNull(orchSession); + Assert.Equal(MultiAgentRole.Orchestrator, orchSession!.Role); + + var workerSession = deserialized.Sessions.Find(s => s.SessionName == "worker-1"); + Assert.NotNull(workerSession); + Assert.Equal(MultiAgentRole.Worker, workerSession!.Role); + } + + [Fact] + public void LegacyState_WithoutMultiAgent_DeserializesGracefully() + { + // Simulates loading organization.json from before multi-agent was added + var json = """ + { + "Groups": [ + {"Id": "_default", "Name": "Sessions", "SortOrder": 0} + ], + "Sessions": [ + {"SessionName": "old-session", "GroupId": "_default", "IsPinned": false} + ], + "SortMode": "LastActive" + } + """; + var state = JsonSerializer.Deserialize(json); + Assert.NotNull(state); + Assert.False(state!.Groups[0].IsMultiAgent); + Assert.Equal(MultiAgentMode.Broadcast, state.Groups[0].OrchestratorMode); + Assert.Null(state.Groups[0].OrchestratorPrompt); + Assert.Equal(MultiAgentRole.Worker, state.Sessions[0].Role); + } + + [Fact] + public void OrchestratorInvariant_PromotingNewOrchestrator_DemotesPrevious() + { + var state = new OrganizationState(); + var group = new SessionGroup + { + Id = "ma-group-1", + Name = "Team", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.Orchestrator + }; + state.Groups.Add(group); + + var session1 = new SessionMeta { SessionName = "s1", GroupId = "ma-group-1", Role = MultiAgentRole.Orchestrator }; + var session2 = new SessionMeta { SessionName = "s2", GroupId = "ma-group-1", Role = MultiAgentRole.Worker }; + var session3 = new SessionMeta { SessionName = "s3", GroupId = "ma-group-1", Role = MultiAgentRole.Worker }; + state.Sessions.Add(session1); + state.Sessions.Add(session2); + state.Sessions.Add(session3); + + // Simulate the demotion logic from SetSessionRole + foreach (var other in state.Sessions.Where(m => m.GroupId == "ma-group-1" && m.SessionName != "s2" && m.Role == MultiAgentRole.Orchestrator)) + { + other.Role = MultiAgentRole.Worker; + } + session2.Role = MultiAgentRole.Orchestrator; + + Assert.Equal(MultiAgentRole.Worker, session1.Role); + Assert.Equal(MultiAgentRole.Orchestrator, session2.Role); + Assert.Equal(MultiAgentRole.Worker, session3.Role); + Assert.Single(state.Sessions, s => s.GroupId == "ma-group-1" && s.Role == MultiAgentRole.Orchestrator); + } + + [Fact] + public void MultiAgentSetRolePayload_Serializes() + { + var payload = new MultiAgentSetRolePayload + { + SessionName = "worker-1", + Role = "Orchestrator" + }; + var json = JsonSerializer.Serialize(payload, BridgeJson.Options); + Assert.Contains("worker-1", json); + Assert.Contains("Orchestrator", json); + + var deserialized = JsonSerializer.Deserialize(json, BridgeJson.Options); + Assert.NotNull(deserialized); + Assert.Equal("worker-1", deserialized!.SessionName); + Assert.Equal("Orchestrator", deserialized.Role); + } + + [Fact] + public void MultiAgentSetRole_BridgeMessageType_Exists() + { + Assert.Equal("multi_agent_set_role", BridgeMessageTypes.MultiAgentSetRole); + } } /// @@ -465,4 +665,2166 @@ public void Reconcile_MultipleSessionsDifferentRepos_AllGetReassigned() Assert.Equal(groupA.Id, metaA.GroupId); Assert.Equal(groupB.Id, metaB.GroupId); } + + [Fact] + public void ParseTaskAssignments_ExtractsWorkerTasks() + { + var response = @"Here's my plan: + +@worker:session-a +Implement the login form with email and password fields. +@end + +@worker:session-b +Create the API endpoint for user authentication. +@end + +That covers the full task."; + + var workers = new List { "session-a", "session-b" }; + var assignments = CopilotService.ParseTaskAssignments(response, workers); + + Assert.Equal(2, assignments.Count); + Assert.Equal("session-a", assignments[0].WorkerName); + Assert.Contains("login form", assignments[0].Task); + Assert.Equal("session-b", assignments[1].WorkerName); + Assert.Contains("API endpoint", assignments[1].Task); + } + + [Fact] + public void ParseTaskAssignments_FuzzyMatchesWorkerNames() + { + var response = @"@worker:session +Do the work. +@end"; + + var workers = new List { "session-alpha", "session-beta" }; + var assignments = CopilotService.ParseTaskAssignments(response, workers); + + Assert.Single(assignments); + Assert.Equal("session-alpha", assignments[0].WorkerName); + } + + [Fact] + public void ParseTaskAssignments_ReturnsEmpty_WhenNoMarkers() + { + var response = "I'll handle this myself. No need to delegate to workers."; + var workers = new List { "session-a", "session-b" }; + var assignments = CopilotService.ParseTaskAssignments(response, workers); + + Assert.Empty(assignments); + } + + [Fact] + public void ParseTaskAssignments_IgnoresUnknownWorkers() + { + var response = @"@worker:unknown-worker +Do something. +@end"; + + var workers = new List { "session-a", "session-b" }; + var assignments = CopilotService.ParseTaskAssignments(response, workers); + + Assert.Empty(assignments); + } + + [Fact] + public void ConvertToMultiAgent_SetsIsMultiAgentTrue() + { + var svc = CreateService(); + svc.CreateGroup("TestGroup"); + var group = svc.Organization.Groups.First(g => g.Name == "TestGroup"); + Assert.False(group.IsMultiAgent); + + svc.ConvertToMultiAgent(group.Id); + + Assert.True(group.IsMultiAgent); + Assert.Equal(MultiAgentMode.Broadcast, group.OrchestratorMode); + } +} + +public class PerAgentModelAssignmentTests +{ + private readonly StubChatDatabase _chatDb = new(); + private readonly StubServerManager _serverManager = new(); + private readonly StubWsBridgeClient _bridgeClient = new(); + private readonly StubDemoService _demoService = new(); + private readonly IServiceProvider _serviceProvider; + + public PerAgentModelAssignmentTests() + { + var services = new ServiceCollection(); + _serviceProvider = services.BuildServiceProvider(); + } + + private CopilotService CreateService() => + new CopilotService(_chatDb, _serverManager, _bridgeClient, new RepoManager(), _serviceProvider, _demoService); + + [Fact] + public void SessionMeta_PreferredModel_DefaultsToNull() + { + var meta = new SessionMeta { SessionName = "test" }; + Assert.Null(meta.PreferredModel); + } + + [Fact] + public void SetSessionPreferredModel_StoresModel() + { + var svc = CreateService(); + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "worker1" }); + + svc.SetSessionPreferredModel("worker1", "gpt-4.1"); + + var meta = svc.Organization.Sessions.First(m => m.SessionName == "worker1"); + Assert.Equal("gpt-4.1", meta.PreferredModel); + } + + [Fact] + public void SetSessionPreferredModel_Null_ClearsOverride() + { + var svc = CreateService(); + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "worker1", PreferredModel = "gpt-4.1" }); + + svc.SetSessionPreferredModel("worker1", null); + + var meta = svc.Organization.Sessions.First(m => m.SessionName == "worker1"); + Assert.Null(meta.PreferredModel); + } + + [Fact] + public void GetEffectiveModel_ReturnsPreferredModel_WhenSet() + { + var svc = CreateService(); + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "worker1", PreferredModel = "claude-opus-4.6" }); + + var model = svc.GetEffectiveModel("worker1"); + Assert.Equal("claude-opus-4.6", model); + } + + [Fact] + public void GetEffectiveModel_ReturnsDefaultModel_WhenNoPreference() + { + var svc = CreateService(); + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "worker1" }); + + var model = svc.GetEffectiveModel("worker1"); + Assert.Equal(svc.DefaultModel, model); + } + + [Fact] + public void SessionGroup_DefaultWorkerModel_DefaultsToNull() + { + var group = new SessionGroup { Name = "Test" }; + Assert.Null(group.DefaultWorkerModel); + Assert.Null(group.DefaultOrchestratorModel); + } + + [Fact] + public void PreferredModel_SurvivesSerialization() + { + var state = new OrganizationState(); + state.Sessions.Add(new SessionMeta { SessionName = "worker1", PreferredModel = "gemini-3-pro" }); + + var json = JsonSerializer.Serialize(state); + var restored = JsonSerializer.Deserialize(json)!; + + Assert.Equal("gemini-3-pro", restored.Sessions[0].PreferredModel); + } + + [Fact] + public void SessionGroup_ModelDefaults_SurviveSerialization() + { + var state = new OrganizationState(); + state.Groups.Add(new SessionGroup + { + Name = "Test", + IsMultiAgent = true, + DefaultWorkerModel = "gpt-4.1", + DefaultOrchestratorModel = "claude-opus-4.6" + }); + + var json = JsonSerializer.Serialize(state); + var restored = JsonSerializer.Deserialize(json)!; + + var group = restored.Groups.First(g => g.Name == "Test"); + Assert.Equal("gpt-4.1", group.DefaultWorkerModel); + Assert.Equal("claude-opus-4.6", group.DefaultOrchestratorModel); + } + + [Fact] + public void Legacy_Deserialization_GracefullyHandlesNoPreferredModel() + { + // Simulate legacy JSON without PreferredModel + var json = """{"SessionName":"old-session","GroupId":"_default","IsPinned":false,"ManualOrder":0,"Role":"Worker"}"""; + var meta = JsonSerializer.Deserialize(json)!; + Assert.Null(meta.PreferredModel); + Assert.Equal("old-session", meta.SessionName); + } +} + +public class GroupReflectionStateTests +{ + [Fact] + public void Create_InitializesCorrectly() + { + var state = ReflectionCycle.Create("Build a REST API", 10); + + Assert.Equal("Build a REST API", state.Goal); + Assert.Equal(10, state.MaxIterations); + Assert.Equal(0, state.CurrentIteration); + Assert.True(state.IsActive); + Assert.False(state.GoalMet); + Assert.False(state.IsStalled); + Assert.False(state.IsPaused); + Assert.NotNull(state.StartedAt); + } + + [Fact] + public void CheckStall_ReturnsFalse_ForUniqueResponses() + { + var state = ReflectionCycle.Create("test"); + + Assert.False(state.CheckStall("response 1")); + Assert.False(state.CheckStall("response 2")); + Assert.False(state.CheckStall("response 3")); + } + + [Fact] + public void CheckStall_DetectsRepeatedResponses() + { + var state = ReflectionCycle.Create("test"); + state.IsActive = true; + + // Iteration 1 + state.Advance("same response"); + + // Iteration 2 (first stall) + state.Advance("same response"); + Assert.False(state.IsStalled); + Assert.Equal(1, state.ConsecutiveStalls); + + // Iteration 3 (second stall) + state.Advance("same response"); + Assert.True(state.IsStalled); + Assert.Equal(2, state.ConsecutiveStalls); + } + + [Fact] + public void CheckStall_ResetsOnProgress() + { + var state = ReflectionCycle.Create("test"); + state.IsActive = true; + + state.Advance("response A"); + state.Advance("response A"); // 1st stall + state.Advance("response B"); // different β€” resets + + Assert.False(state.IsStalled); + Assert.Equal(0, state.ConsecutiveStalls); + } + + [Fact] + public void CompletionSummary_GoalMet() + { + var state = ReflectionCycle.Create("test"); + state.CurrentIteration = 3; + state.GoalMet = true; + + Assert.Contains("βœ…", state.BuildCompletionSummary()); + Assert.Contains("3", state.BuildCompletionSummary()); + } + + [Fact] + public void CompletionSummary_Stalled() + { + var state = ReflectionCycle.Create("test"); + state.CurrentIteration = 4; + state.IsStalled = true; + + Assert.Contains("⚠️", state.BuildCompletionSummary()); + } + + [Fact] + public void CompletionSummary_MaxReached() + { + var state = ReflectionCycle.Create("test", 5); + state.CurrentIteration = 5; + + Assert.Contains("⏱️", state.BuildCompletionSummary()); + Assert.Contains("5", state.BuildCompletionSummary()); + } + + [Fact] + public void OrchestratorReflect_ModeEnumValue_Exists() + { + var mode = MultiAgentMode.OrchestratorReflect; + Assert.Equal("OrchestratorReflect", mode.ToString()); + } + + [Fact] + public void OrchestratorReflect_SurvivesSerialization() + { + var group = new SessionGroup + { + Name = "Test", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.OrchestratorReflect, + ReflectionState = ReflectionCycle.Create("Build it", 10) + }; + + var json = JsonSerializer.Serialize(group); + var restored = JsonSerializer.Deserialize(json)!; + + Assert.Equal(MultiAgentMode.OrchestratorReflect, restored.OrchestratorMode); + Assert.NotNull(restored.ReflectionState); + Assert.Equal("Build it", restored.ReflectionState!.Goal); + Assert.Equal(10, restored.ReflectionState.MaxIterations); + Assert.True(restored.ReflectionState.IsActive); + } + + [Fact] + public void ExtractIterationEvaluation_ParsesNeedsIterationMarker() + { + var response = "The synthesis looks good but [[NEEDS_ITERATION]] Missing error handling in the API layer. @worker:alice\nAdd error handling.\n@end"; + + // Use reflection to test internal method + var method = typeof(CopilotService).GetMethod("ExtractIterationEvaluation", + System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Static); + Assert.NotNull(method); + + var result = (string)method!.Invoke(null, new object[] { response })!; + Assert.Contains("Missing error handling", result); + Assert.DoesNotContain("@worker", result); + } + + [Fact] + public void ExtractIterationEvaluation_FallsBackToLastLines() + { + var response = "Line 1\nLine 2\nLine 3\nLine 4\nLine 5\nLine 6\nThe final evaluation."; + + var method = typeof(CopilotService).GetMethod("ExtractIterationEvaluation", + System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Static); + var result = (string)method!.Invoke(null, new object[] { response })!; + + Assert.Contains("The final evaluation", result); + } +} + +public class ModelCapabilitiesTests +{ + [Fact] + public void GetCapabilities_KnownModel_ReturnsFlags() + { + var caps = ModelCapabilities.GetCapabilities("claude-opus-4.6"); + Assert.True(caps.HasFlag(ModelCapability.ReasoningExpert)); + Assert.True(caps.HasFlag(ModelCapability.CodeExpert)); + } + + [Fact] + public void GetCapabilities_UnknownModel_ReturnsNone() + { + var caps = ModelCapabilities.GetCapabilities("totally-unknown-model"); + Assert.Equal(ModelCapability.None, caps); + } + + [Fact] + public void GetCapabilities_FuzzyMatch_Works() + { + // "claude-opus-4.6-fast" should fuzzy-match "claude-opus-4.6" + var caps = ModelCapabilities.GetCapabilities("gpt-4.1"); + Assert.True(caps.HasFlag(ModelCapability.Fast)); + Assert.True(caps.HasFlag(ModelCapability.CostEfficient)); + } + + [Fact] + public void GetRoleWarnings_CheapOrchestratorModel_WarnsAboutReasoning() + { + var warnings = ModelCapabilities.GetRoleWarnings("gpt-4.1", MultiAgentRole.Orchestrator); + Assert.NotEmpty(warnings); + Assert.Contains(warnings, w => w.Contains("reasoning", StringComparison.OrdinalIgnoreCase)); + } + + [Fact] + public void GetRoleWarnings_StrongOrchestratorModel_NoWarnings() + { + var warnings = ModelCapabilities.GetRoleWarnings("claude-opus-4.6", MultiAgentRole.Orchestrator); + Assert.Empty(warnings); + } + + [Fact] + public void GetRoleWarnings_WorkerWithToolUse_NoWarnings() + { + var warnings = ModelCapabilities.GetRoleWarnings("gpt-4.1", MultiAgentRole.Worker); + Assert.Empty(warnings); + } + + [Fact] + public void GetStrengths_ReturnsDescription() + { + var strengths = ModelCapabilities.GetStrengths("claude-opus-4.6"); + Assert.NotEqual("Unknown model", strengths); + Assert.Contains("reasoning", strengths, StringComparison.OrdinalIgnoreCase); + } +} + +public class GroupPresetTests +{ + [Fact] + public void BuiltInPresets_AllHaveRequiredFields() + { + foreach (var preset in GroupPreset.BuiltIn) + { + Assert.False(string.IsNullOrEmpty(preset.Name)); + Assert.False(string.IsNullOrEmpty(preset.Description)); + Assert.False(string.IsNullOrEmpty(preset.OrchestratorModel)); + Assert.NotEmpty(preset.WorkerModels); + Assert.True(preset.WorkerModels.All(m => !string.IsNullOrEmpty(m))); + } + } + + [Fact] + public void BuiltInPresets_ContainExpectedCount() + { + Assert.True(GroupPreset.BuiltIn.Length >= 3, "Should have at least 3 built-in presets"); + } + + [Fact] + public void BuiltInPresets_IncludeOrchestratorReflect() + { + Assert.Contains(GroupPreset.BuiltIn, p => p.Mode == MultiAgentMode.OrchestratorReflect); + } +} + +public class GroupModelAnalyzerTests +{ + [Fact] + public void Analyze_OrchestratorModeWithoutOrchestrator_ReturnsError() + { + var group = new SessionGroup { IsMultiAgent = true, OrchestratorMode = MultiAgentMode.Orchestrator }; + var members = new List<(string Name, string Model, MultiAgentRole Role)> + { + ("w1", "gpt-4.1", MultiAgentRole.Worker), + ("w2", "gpt-4.1", MultiAgentRole.Worker), + }; + + var diags = GroupModelAnalyzer.Analyze(group, members); + Assert.Contains(diags, d => d.Level == "error" && d.Message.Contains("Orchestrator role")); + } + + [Fact] + public void Analyze_WeakOrchestratorModel_ReturnsWarning() + { + var group = new SessionGroup { IsMultiAgent = true, OrchestratorMode = MultiAgentMode.Orchestrator }; + var members = new List<(string Name, string Model, MultiAgentRole Role)> + { + ("orch", "gpt-4.1", MultiAgentRole.Orchestrator), + ("w1", "gpt-5", MultiAgentRole.Worker), + }; + + var diags = GroupModelAnalyzer.Analyze(group, members); + Assert.Contains(diags, d => d.Level == "warning" && d.Message.Contains("reasoning")); + } + + [Fact] + public void Analyze_StrongOrchestrator_NoErrors() + { + var group = new SessionGroup { IsMultiAgent = true, OrchestratorMode = MultiAgentMode.Orchestrator }; + var members = new List<(string Name, string Model, MultiAgentRole Role)> + { + ("orch", "claude-opus-4.6", MultiAgentRole.Orchestrator), + ("w1", "gpt-4.1", MultiAgentRole.Worker), + }; + + var diags = GroupModelAnalyzer.Analyze(group, members); + Assert.DoesNotContain(diags, d => d.Level == "error"); + } + + [Fact] + public void Analyze_AllSameModelBroadcast_SuggestsDiversity() + { + var group = new SessionGroup { IsMultiAgent = true, OrchestratorMode = MultiAgentMode.Broadcast }; + var members = new List<(string Name, string Model, MultiAgentRole Role)> + { + ("w1", "gpt-4.1", MultiAgentRole.Worker), + ("w2", "gpt-4.1", MultiAgentRole.Worker), + ("w3", "gpt-4.1", MultiAgentRole.Worker), + }; + + var diags = GroupModelAnalyzer.Analyze(group, members); + Assert.Contains(diags, d => d.Level == "info" && d.Message.Contains("diverse")); + } + + [Fact] + public void Analyze_OrchestratorReflectWithoutWorkers_ReturnsError() + { + var group = new SessionGroup { IsMultiAgent = true, OrchestratorMode = MultiAgentMode.OrchestratorReflect }; + var members = new List<(string Name, string Model, MultiAgentRole Role)> + { + ("orch", "claude-opus-4.6", MultiAgentRole.Orchestrator), + }; + + var diags = GroupModelAnalyzer.Analyze(group, members); + Assert.Contains(diags, d => d.Level == "error" && d.Message.Contains("worker")); + } +} + +public class UserPresetsTests +{ + [Fact] + public void GetAll_IncludesBuiltInPresets() + { + // Use a temp dir that won't have presets.json + var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + try + { + var all = UserPresets.GetAll(tempDir); + Assert.Equal(GroupPreset.BuiltIn.Length, all.Length); + } + finally + { + if (Directory.Exists(tempDir)) Directory.Delete(tempDir, true); + } + } + + [Fact] + public void SaveAndLoad_RoundTrips() + { + var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + try + { + var preset = new GroupPreset("My Team", "Custom desc", "🎯", + MultiAgentMode.Orchestrator, "claude-opus-4.6", new[] { "gpt-4.1" }) + { IsUserDefined = true }; + + UserPresets.Save(tempDir, new List { preset }); + var loaded = UserPresets.Load(tempDir); + + Assert.Single(loaded); + Assert.Equal("My Team", loaded[0].Name); + Assert.True(loaded[0].IsUserDefined); + Assert.Equal("claude-opus-4.6", loaded[0].OrchestratorModel); + } + finally + { + if (Directory.Exists(tempDir)) Directory.Delete(tempDir, true); + } + } + + [Fact] + public void GetAll_CombinesBuiltInAndUser() + { + var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + try + { + var userPreset = new GroupPreset("Custom", "Mine", "⭐", + MultiAgentMode.Broadcast, "gpt-5", new[] { "gpt-4.1" }) + { IsUserDefined = true }; + + UserPresets.Save(tempDir, new List { userPreset }); + var all = UserPresets.GetAll(tempDir); + + Assert.Equal(GroupPreset.BuiltIn.Length + 1, all.Length); + Assert.Contains(all, p => p.Name == "Custom" && p.IsUserDefined); + } + finally + { + if (Directory.Exists(tempDir)) Directory.Delete(tempDir, true); + } + } + + [Fact] + public void SaveGroupAsPreset_CreatesFromMembers() + { + var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + try + { + var group = new SessionGroup { Name = "Test", IsMultiAgent = true, OrchestratorMode = MultiAgentMode.Orchestrator }; + var members = new List + { + new() { SessionName = "orch", Role = MultiAgentRole.Orchestrator }, + new() { SessionName = "w1", Role = MultiAgentRole.Worker }, + }; + + var preset = UserPresets.SaveGroupAsPreset(tempDir, "Test Preset", "desc", "πŸ”₯", + group, members, name => name == "orch" ? "claude-opus-4.6" : "gpt-4.1"); + + Assert.NotNull(preset); + Assert.Equal("claude-opus-4.6", preset!.OrchestratorModel); + Assert.Single(preset.WorkerModels); + Assert.Equal("gpt-4.1", preset.WorkerModels[0]); + Assert.True(preset.IsUserDefined); + + // Verify persisted + var loaded = UserPresets.Load(tempDir); + Assert.Single(loaded); + } + finally + { + if (Directory.Exists(tempDir)) Directory.Delete(tempDir, true); + } + } + + [Fact] + public void SaveGroupAsPreset_WithWorktreeRoot_WritesSquadDir() + { + var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + var worktreeRoot = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + try + { + Directory.CreateDirectory(worktreeRoot); + var group = new SessionGroup + { + Name = "SquadTeam", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.OrchestratorReflect + }; + var members = new List + { + new() { SessionName = "orch", Role = MultiAgentRole.Orchestrator }, + new() { SessionName = "w1", Role = MultiAgentRole.Worker, SystemPrompt = "You are a coder." }, + }; + + var preset = UserPresets.SaveGroupAsPreset(tempDir, "SquadTeam", "desc", "πŸš€", + group, members, name => name == "orch" ? "claude-opus-4.6" : "gpt-5", + worktreeRoot: worktreeRoot); + + Assert.NotNull(preset); + Assert.True(Directory.Exists(Path.Combine(worktreeRoot, ".squad"))); + Assert.True(preset!.IsRepoLevel); + } + finally + { + if (Directory.Exists(tempDir)) Directory.Delete(tempDir, true); + if (Directory.Exists(worktreeRoot)) Directory.Delete(worktreeRoot, true); + } + } +} + +public class EvaluationTrackingTests +{ + [Fact] + public void RecordEvaluation_FirstEntry_ReturnsStable() + { + var state = ReflectionCycle.Create("test goal"); + var trend = state.RecordEvaluation(1, 0.6, "Needs work", "gpt-4.1"); + Assert.Equal(QualityTrend.Stable, trend); + Assert.Single(state.EvaluationHistory); + } + + [Fact] + public void RecordEvaluation_ImprovingScores_ReturnsImproving() + { + var state = ReflectionCycle.Create("test goal"); + state.RecordEvaluation(1, 0.4, "Poor", "gpt-4.1"); + var trend = state.RecordEvaluation(2, 0.7, "Better", "gpt-4.1"); + Assert.Equal(QualityTrend.Improving, trend); + } + + [Fact] + public void RecordEvaluation_DegradingScores_ReturnsDegrading() + { + var state = ReflectionCycle.Create("test goal"); + state.RecordEvaluation(1, 0.8, "Good", "gpt-4.1"); + var trend = state.RecordEvaluation(2, 0.5, "Got worse", "gpt-4.1"); + Assert.Equal(QualityTrend.Degrading, trend); + } + + [Fact] + public void RecordEvaluation_SimilarScores_ReturnsStable() + { + var state = ReflectionCycle.Create("test goal"); + state.RecordEvaluation(1, 0.6, "Ok", "gpt-4.1"); + var trend = state.RecordEvaluation(2, 0.65, "Similar", "gpt-4.1"); + Assert.Equal(QualityTrend.Stable, trend); + } + + [Fact] + public void EvaluatorSession_CanBeConfigured() + { + var state = ReflectionCycle.Create("goal", 5, null, "eval-session"); + Assert.Equal("eval-session", state.EvaluatorSessionName); + } + + [Fact] + public void PendingAdjustments_InitiallyEmpty() + { + var state = ReflectionCycle.Create("goal"); + Assert.Empty(state.PendingAdjustments); + } + + [Fact] + public void EvaluationHistory_TracksMultipleIterations() + { + var state = ReflectionCycle.Create("goal"); + state.RecordEvaluation(1, 0.3, "Bad", "claude-haiku-4.5"); + state.RecordEvaluation(2, 0.5, "Improving", "claude-haiku-4.5"); + state.RecordEvaluation(3, 0.8, "Good", "claude-haiku-4.5"); + + Assert.Equal(3, state.EvaluationHistory.Count); + Assert.Equal(0.3, state.EvaluationHistory[0].Score); + Assert.Equal(0.8, state.EvaluationHistory[2].Score); + Assert.All(state.EvaluationHistory, e => Assert.Equal("claude-haiku-4.5", e.EvaluatorModel)); + } +} + +public class ModelNameInferenceTests +{ + [Fact] + public void InferFromName_OpusVariant_HasReasoningExpert() + { + var caps = ModelCapabilities.InferFromName("claude-opus-5.0"); + Assert.True(caps.HasFlag(ModelCapability.ReasoningExpert)); + Assert.True(caps.HasFlag(ModelCapability.CodeExpert)); + } + + [Fact] + public void InferFromName_SonnetVariant_HasCodeExpert() + { + var caps = ModelCapabilities.InferFromName("claude-sonnet-5.0"); + Assert.True(caps.HasFlag(ModelCapability.CodeExpert)); + Assert.True(caps.HasFlag(ModelCapability.Fast)); + } + + [Fact] + public void InferFromName_HaikuVariant_HasFastAndCheap() + { + var caps = ModelCapabilities.InferFromName("claude-haiku-5.0"); + Assert.True(caps.HasFlag(ModelCapability.Fast)); + Assert.True(caps.HasFlag(ModelCapability.CostEfficient)); + } + + [Fact] + public void InferFromName_CodexVariant_HasCodeExpert() + { + var caps = ModelCapabilities.InferFromName("gpt-6-codex"); + Assert.True(caps.HasFlag(ModelCapability.CodeExpert)); + } + + [Fact] + public void InferFromName_MiniVariant_HasFastAndCheap() + { + var caps = ModelCapabilities.InferFromName("gpt-6-mini"); + Assert.True(caps.HasFlag(ModelCapability.Fast)); + Assert.True(caps.HasFlag(ModelCapability.CostEfficient)); + } + + [Fact] + public void InferFromName_MaxVariant_HasReasoningExpert() + { + var caps = ModelCapabilities.InferFromName("gpt-6-codex-max"); + Assert.True(caps.HasFlag(ModelCapability.ReasoningExpert)); + } + + [Fact] + public void InferFromName_GeminiVariant_HasVision() + { + var caps = ModelCapabilities.InferFromName("gemini-4-ultra"); + Assert.True(caps.HasFlag(ModelCapability.Vision)); + Assert.True(caps.HasFlag(ModelCapability.ReasoningExpert)); + } + + [Fact] + public void InferFromName_UnknownModel_ReturnsNone() + { + var caps = ModelCapabilities.InferFromName("totally-unknown-model"); + Assert.Equal(ModelCapability.None, caps); + } + + [Fact] + public void GetCapabilities_NewOpusVersion_InfersFromName() + { + // Not in registry, but should be inferred + var caps = ModelCapabilities.GetCapabilities("claude-opus-99.0"); + Assert.True(caps.HasFlag(ModelCapability.ReasoningExpert)); + } +} + +public class ParseEvaluationScoreTests +{ + [Fact] + public void ParseScore_ValidFormat_ExtractsCorrectly() + { + var response = "SCORE: 0.75\nRATIONALE: Good progress but missing edge cases.\n[[NEEDS_ITERATION]]"; + var (score, rationale) = CopilotService.ParseEvaluationScore(response); + Assert.Equal(0.75, score); + Assert.Contains("Good progress", rationale); + } + + [Fact] + public void ParseScore_HighScore_ExtractsCorrectly() + { + var response = "SCORE: 0.95\nRATIONALE: Excellent output, fully addresses the goal.\n[[GROUP_REFLECT_COMPLETE]]"; + var (score, rationale) = CopilotService.ParseEvaluationScore(response); + Assert.Equal(0.95, score); + Assert.Contains("Excellent", rationale); + } + + [Fact] + public void ParseScore_NoScoreMarker_ReturnsDefault() + { + var response = "The output looks good but could improve."; + var (score, _) = CopilotService.ParseEvaluationScore(response); + Assert.Equal(0.5, score); // default + } + + [Fact] + public void ParseScore_ClampAboveOne_Returns1() + { + var response = "SCORE: 1.5\nRATIONALE: Overshot."; + var (score, _) = CopilotService.ParseEvaluationScore(response); + Assert.Equal(1.0, score); + } + + [Fact] + public void ParseScore_NegativeScore_ReturnsZero() + { + var response = "SCORE: -0.5\nRATIONALE: Terrible."; + var (score, _) = CopilotService.ParseEvaluationScore(response); + Assert.Equal(0.0, score); + } +} + +/// +/// End-to-end scenario tests demonstrating complete multi-agent user flows. +/// These serve as executable documentation of the feature's user experience. +/// +public class MultiAgentScenarioTests +{ + /// + /// Scenario: User creates a "Code Review Team" from a built-in preset. + /// + /// User flow: + /// 1. Click πŸš€ Preset in sidebar toolbar + /// 2. Preset picker appears showing 4 built-in templates + /// 3. Select "Code Review Team" (πŸ”) + /// 4. System creates: Orchestrator (claude-opus-4.6) + 2 Workers (gpt-5.1-codex, claude-sonnet-4.5) + /// 5. Sidebar shows group with mode selector set to "🎯 Orchestrator" + /// 6. Each session shows its model assignment and role badge + /// + [Fact] + public void Scenario_CreateGroupFromPreset() + { + // Step 1-2: User sees built-in presets + var presets = GroupPreset.BuiltIn; + Assert.Equal(4, presets.Length); + + // Step 3: User picks "Code Review Team" + var codeReview = presets.First(p => p.Name == "Code Review Team"); + Assert.Equal("πŸ”", codeReview.Emoji); + Assert.Equal(MultiAgentMode.Orchestrator, codeReview.Mode); + Assert.Equal("claude-opus-4.6", codeReview.OrchestratorModel); + Assert.Equal(2, codeReview.WorkerModels.Length); + + // Step 4: System creates the group - verify the preset structure + // (CopilotService.CreateGroupFromPresetAsync does the actual creation at runtime) + Assert.Equal("gpt-5.1-codex", codeReview.WorkerModels[0]); + Assert.Equal("claude-sonnet-4.5", codeReview.WorkerModels[1]); + + // Step 5-6: Each member has appropriate capabilities + var orchCaps = ModelCapabilities.GetCapabilities(codeReview.OrchestratorModel); + Assert.True(orchCaps.HasFlag(ModelCapability.ReasoningExpert)); + + var warnings = ModelCapabilities.GetRoleWarnings(codeReview.OrchestratorModel, MultiAgentRole.Orchestrator); + Assert.Empty(warnings); // opus is a great orchestrator, no warnings + + foreach (var workerModel in codeReview.WorkerModels) + { + var wCaps = ModelCapabilities.GetCapabilities(workerModel); + Assert.True(wCaps.HasFlag(ModelCapability.CodeExpert)); // both are code-capable + } + } + + /// + /// Scenario: User assigns a weak model to the Orchestrator role and sees warnings. + /// + /// User flow: + /// 1. Long-press/right-click a session in a multi-agent group β†’ context menu + /// 2. See "🎯 Set as Orchestrator" button β†’ click it + /// 3. Under "🧠 Model", pick "gpt-4.1" from dropdown + /// 4. Warning appears: "⚠️ This model may lack strong reasoning for orchestration" + /// 5. Warning appears: "πŸ’° Cost-efficient models may produce shallow plans" + /// 6. User also sees diagnostics in the group header: + /// "⚠️ Orchestrator 'session1' uses gpt-4.1 which lacks strong reasoning" + /// + [Fact] + public void Scenario_WeakOrchestratorWarnings() + { + // Step 3-5: User picks gpt-4.1 for orchestrator role + var warnings = ModelCapabilities.GetRoleWarnings("gpt-4.1", MultiAgentRole.Orchestrator); + Assert.Equal(2, warnings.Count); + Assert.Contains(warnings, w => w.Contains("reasoning")); + Assert.Contains(warnings, w => w.Contains("Cost-efficient")); + + // Step 6: Group diagnostics also flag the issue + var group = new SessionGroup + { + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.Orchestrator + }; + var members = new List<(string Name, string Model, MultiAgentRole Role)> + { + ("session1", "gpt-4.1", MultiAgentRole.Orchestrator), + ("session2", "gpt-5", MultiAgentRole.Worker), + }; + var diags = GroupModelAnalyzer.Analyze(group, members); + Assert.Contains(diags, d => d.Level == "warning" && d.Message.Contains("gpt-4.1")); + + // Compare: strong orchestrator shows no role warnings + var strongWarnings = ModelCapabilities.GetRoleWarnings("claude-opus-4.6", MultiAgentRole.Orchestrator); + Assert.Empty(strongWarnings); + } + + /// + /// Scenario: Full OrchestratorReflect iteration cycle with evaluation scoring. + /// + /// User flow: + /// 1. User selects "πŸ”„ Orchestrator + Reflect" from mode dropdown + /// 2. Types goal in the multi-agent input bar and clicks πŸ“‘ + /// 3. Sidebar shows: πŸ”„ 1/5 with goal text + /// 4. After iteration 1, evaluator scores 0.4 β†’ sidebar shows "πŸ“Š 0.4 (gpt-4.1)" + /// 5. AutoAdjust detects no issues yet β†’ no banner + /// 6. After iteration 2, evaluator scores 0.7 β†’ trend = Improving + /// 7. After iteration 3, evaluator scores 0.65 β†’ trend = Stable (slight drop) + /// 8. After iteration 4, evaluator scores 0.92 β†’ goal met, loop stops + /// 9. Sidebar shows: "βœ… Goal met after 4 iteration(s)" + /// + [Fact] + public void Scenario_FullReflectCycleWithScoring() + { + // Step 1-2: User starts OrchestratorReflect + var state = ReflectionCycle.Create("Implement a REST API with CRUD endpoints", maxIterations: 5); + Assert.True(state.IsActive); + Assert.Equal(0, state.CurrentIteration); + Assert.NotNull(state.StartedAt); + + // Step 3-4: Iteration 1 β€” low quality initial attempt + state.CurrentIteration = 1; + var trend1 = state.RecordEvaluation(1, 0.4, "Missing error handling and input validation. Only GET endpoint implemented.", "gpt-4.1"); + Assert.Equal(QualityTrend.Stable, trend1); // only one data point + Assert.Single(state.EvaluationHistory); + + // Sidebar would show: πŸ”„ 1/5 πŸ“Š 0.4 (gpt-4.1) + var lastEval = state.EvaluationHistory.Last(); + Assert.Equal("0.4", lastEval.Score.ToString("F1")); + Assert.Equal("gpt-4.1", lastEval.EvaluatorModel); + + // Step 6: Iteration 2 β€” significant improvement + state.CurrentIteration = 2; + var trend2 = state.RecordEvaluation(2, 0.7, "All CRUD endpoints present. Error handling added but tests incomplete.", "gpt-4.1"); + Assert.Equal(QualityTrend.Improving, trend2); + + // Step 7: Iteration 3 β€” slight regression + state.CurrentIteration = 3; + var trend3 = state.RecordEvaluation(3, 0.65, "Tests added but some CRUD operations regressed. PUT endpoint missing validation.", "gpt-4.1"); + Assert.Equal(QualityTrend.Stable, trend3); // within 0.1 threshold + + // Step 8: Iteration 4 β€” goal met + state.CurrentIteration = 4; + var trend4 = state.RecordEvaluation(4, 0.92, "All endpoints complete with validation, error handling, and comprehensive tests.", "gpt-4.1"); + Assert.Equal(QualityTrend.Improving, trend4); + + // Score >= 0.9 would trigger goal completion + state.GoalMet = true; + state.IsActive = false; + state.CompletedAt = DateTime.Now; + + // Step 9: Final summary + var summary = state.BuildCompletionSummary(); + Assert.Contains("Goal met", summary); + Assert.Equal(4, state.EvaluationHistory.Count); + + // Verify the quality trajectory is tracked + var scores = state.EvaluationHistory.Select(e => e.Score).ToList(); + Assert.Equal(new[] { 0.4, 0.7, 0.65, 0.92 }, scores); + } + + /// + /// Scenario: AutoAdjust detects quality degradation and surfaces a banner. + /// + /// User flow: + /// 1. Reflect loop running with 3 workers + /// 2. Iteration 2 scores 0.7, iteration 3 scores 0.45 (sharp drop) + /// 3. AutoAdjust detects degradation in evaluation history + /// 4. Sidebar shows amber banner: "πŸ“‰ Quality degraded significantly vs. previous iteration" + /// 5. Worker "fast-coder" using gpt-4.1 produced only 50 chars on iteration 3 + /// 6. Banner also shows: "πŸ“ˆ Worker 'fast-coder' produced a brief response. Consider upgrading..." + /// 7. User can see these suggestions and decide to change the worker's model + /// + [Fact] + public void Scenario_AutoAdjustDetectsIssuesAndSurfacesBanner() + { + var state = ReflectionCycle.Create("Build a microservice"); + state.CurrentIteration = 3; + + // Steps 2-3: Record scores showing degradation + state.RecordEvaluation(1, 0.5, "Initial attempt", "gpt-4.1"); + state.RecordEvaluation(2, 0.7, "Good progress", "gpt-4.1"); + state.RecordEvaluation(3, 0.45, "Quality dropped", "gpt-4.1"); + + // The last two evals show a significant drop (0.7 β†’ 0.45 = -0.25 > 0.15 threshold) + var lastTwo = state.EvaluationHistory.TakeLast(2).ToList(); + var degradation = lastTwo[0].Score - lastTwo[1].Score; + Assert.True(degradation > 0.15); // threshold for "significant" degradation + + // Step 4-6: AutoAdjust would populate PendingAdjustments + // Simulating what AutoAdjustFromFeedback does: + state.PendingAdjustments.Clear(); + state.PendingAdjustments.Add("πŸ“‰ Quality degraded significantly vs. previous iteration. Review worker models or task clarity."); + state.PendingAdjustments.Add("πŸ“ˆ Worker 'fast-coder' produced a brief response. Consider upgrading from a cost-efficient model to improve quality."); + + // Verify the banner would display + Assert.Equal(2, state.PendingAdjustments.Count); + Assert.Contains(state.PendingAdjustments, a => a.Contains("πŸ“‰")); + Assert.Contains(state.PendingAdjustments, a => a.Contains("fast-coder")); + + // Step 7: User changes the model β€” verify gpt-4.1 is flagged as cost-efficient + var caps = ModelCapabilities.GetCapabilities("gpt-4.1"); + Assert.True(caps.HasFlag(ModelCapability.CostEfficient)); + Assert.False(caps.HasFlag(ModelCapability.ReasoningExpert)); + } + + /// + /// Scenario: User saves their tuned multi-agent group as a reusable preset. + /// + /// User flow: + /// 1. User has a working Orchestrator group: opus orchestrator, 2 workers + /// 2. They've tweaked models over several iterations and are happy + /// 3. Click "πŸ’Ύ Save as Preset" button in sidebar + /// 4. System saves to ~/.polypilot/presets.json + /// 5. Next time user clicks πŸš€ Preset, their custom preset appears with πŸ‘€ badge + /// 6. User-defined presets appear after built-in ones + /// + [Fact] + public void Scenario_SaveAndReuseCustomPreset() + { + var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + try + { + // Step 1: User has a working group + var group = new SessionGroup + { + Name = "My API Team", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.OrchestratorReflect + }; + var members = new List + { + new() { SessionName = "planner", Role = MultiAgentRole.Orchestrator }, + new() { SessionName = "coder", Role = MultiAgentRole.Worker }, + new() { SessionName = "reviewer", Role = MultiAgentRole.Worker }, + }; + + // Step 3-4: Save as preset + var preset = UserPresets.SaveGroupAsPreset( + tempDir, "My API Team", "OrchestratorReflect with reviewer", "πŸ—οΈ", + group, members, + name => name switch + { + "planner" => "claude-opus-4.6", + "coder" => "gpt-5.1-codex", + "reviewer" => "claude-sonnet-4.5", + _ => "gpt-4.1" + }); + + Assert.NotNull(preset); + Assert.True(preset!.IsUserDefined); + Assert.Equal("claude-opus-4.6", preset.OrchestratorModel); + Assert.Equal(2, preset.WorkerModels.Length); + Assert.Equal(MultiAgentMode.OrchestratorReflect, preset.Mode); + + // Step 5-6: Next time, preset picker shows built-in + user presets + var allPresets = UserPresets.GetAll(tempDir); + Assert.Equal(GroupPreset.BuiltIn.Length + 1, allPresets.Length); + + // User-defined presets come after built-in ones + var userPresets = allPresets.Where(p => p.IsUserDefined).ToArray(); + Assert.Single(userPresets); + Assert.Equal("My API Team", userPresets[0].Name); + + // The preset correctly captures the model assignments + Assert.Contains("gpt-5.1-codex", preset.WorkerModels); + Assert.Contains("claude-sonnet-4.5", preset.WorkerModels); + } + finally + { + if (Directory.Exists(tempDir)) Directory.Delete(tempDir, true); + } + } + + /// + /// Scenario: Dedicated evaluator session provides independent scoring. + /// + /// User flow: + /// 1. User creates a "Quick Reflection Cycle" from presets (OrchestratorReflect) + /// 2. Group has: opus orchestrator + 3 cheap workers + /// 3. User adds a 4th session, sets role to Worker, assigns gpt-4.1 + /// 4. In code, EvaluatorSession is set to this 4th session + /// 5. Orchestrator synthesizes, then evaluator independently scores + /// 6. Evaluator responds with structured format: "SCORE: 0.75\nRATIONALE: ..." + /// 7. System parses score, records it, shows in sidebar + /// + [Fact] + public void Scenario_DedicatedEvaluatorScoring() + { + // Step 1-4: Group with evaluator + var state = ReflectionCycle.Create("Refactor auth module", maxIterations: 5, evaluatorSession: "eval-agent"); + Assert.Equal("eval-agent", state.EvaluatorSessionName); + + // Step 6-7: Evaluator responds with structured format + var evalResponse = """ + ## Evaluation + + SCORE: 0.75 + RATIONALE: The auth module refactoring covers JWT validation and middleware setup, but session management is incomplete and there are no integration tests. The code structure is clean but error handling paths need work. + + [[NEEDS_ITERATION]] + - Add session persistence layer + - Add integration tests for login/logout flow + - Improve error handling in token refresh + """; + + var (score, rationale) = CopilotService.ParseEvaluationScore(evalResponse); + Assert.Equal(0.75, score); + Assert.Contains("session management is incomplete", rationale); + + // Record it + var trend = state.RecordEvaluation(1, score, rationale, "gpt-4.1"); + Assert.Equal(QualityTrend.Stable, trend); + + // Sidebar shows: πŸ“Š 0.8 (gpt-4.1) + Assert.Equal(0.75, state.EvaluationHistory.Last().Score); + + // Next iteration: evaluator says done + var evalResponse2 = """ + SCORE: 0.93 + RATIONALE: All requirements met. Session persistence added, integration tests pass, error handling is comprehensive. + + [[GROUP_REFLECT_COMPLETE]] + """; + + var (score2, _) = CopilotService.ParseEvaluationScore(evalResponse2); + Assert.Equal(0.93, score2); + Assert.True(score2 >= 0.9); // triggers completion + Assert.Contains("[[GROUP_REFLECT_COMPLETE]]", evalResponse2); + + state.RecordEvaluation(2, score2, "All requirements met.", "gpt-4.1"); + state.GoalMet = true; + Assert.Contains("Goal met", state.BuildCompletionSummary()); + } + + /// + /// Scenario: Stall detection stops a reflect loop that's going in circles. + /// + /// User flow: + /// 1. Reflect loop is running, iteration 3 + /// 2. Workers keep producing similar output to iterations 1-2 + /// 3. String-based stall detector triggers after 2 consecutive matches + /// 4. Sidebar shows: "⚠️ Stalled after 3 iteration(s)" + /// 5. AutoAdjust banner: "⚠️ Output repetition detected..." + /// + [Fact] + public void Scenario_StallDetectionStopsLoop() + { + var state = ReflectionCycle.Create("Optimize database queries"); + + // Iterations 1-2: different responses β€” no stall + state.CurrentIteration = 1; + Assert.False(state.CheckStall("First attempt: added indexes on user_id column")); + + state.CurrentIteration = 2; + Assert.False(state.CheckStall("Second attempt: refactored joins to use CTEs")); + + // Iteration 3: exact repeat of iteration 2 β€” CheckStall detects string match immediately + state.CurrentIteration = 3; + Assert.True(state.CheckStall("Second attempt: refactored joins to use CTEs")); + state.IsStalled = true; // In the real loop, Advance() sets this + + Assert.Contains("Stalled", state.BuildCompletionSummary()); + } + + /// + /// Scenario: Model name inference handles a brand-new model release gracefully. + /// + /// User flow: + /// 1. A new model "claude-opus-5.0" is released + /// 2. Copilot server makes it available in AvailableModels + /// 3. User assigns it to an orchestrator via the model picker + /// 4. ModelCapabilities doesn't have it in the registry + /// 5. InferFromName detects "opus" β†’ ReasoningExpert + CodeExpert + ToolUse + /// 6. No "weak model" warning appears for orchestrator role + /// 7. User also assigns "gpt-6-codex-mini" to a worker + /// 8. InferFromName detects "codex" + "mini" β†’ CodeExpert + Fast + CostEfficient + /// + [Fact] + public void Scenario_NewModelReleasesHandledGracefully() + { + // Step 3-6: New opus model, not in registry + var opusCaps = ModelCapabilities.GetCapabilities("claude-opus-5.0"); + Assert.True(opusCaps.HasFlag(ModelCapability.ReasoningExpert)); + Assert.True(opusCaps.HasFlag(ModelCapability.CodeExpert)); + + var orchWarnings = ModelCapabilities.GetRoleWarnings("claude-opus-5.0", MultiAgentRole.Orchestrator); + // Should not warn about reasoning since inference detects it + Assert.DoesNotContain(orchWarnings, w => w.Contains("reasoning")); + + // Step 7-8: New codex-mini model + var codexMiniCaps = ModelCapabilities.GetCapabilities("gpt-6-codex-mini"); + Assert.True(codexMiniCaps.HasFlag(ModelCapability.CodeExpert)); + Assert.True(codexMiniCaps.HasFlag(ModelCapability.Fast)); + Assert.True(codexMiniCaps.HasFlag(ModelCapability.CostEfficient)); + + // Worker role should work fine with this model + var workerWarnings = ModelCapabilities.GetRoleWarnings("gpt-6-codex-mini", MultiAgentRole.Worker); + Assert.Empty(workerWarnings); // codex has CodeExpert, no warning + + // Strengths description works via inference for unknown models + var strengths = ModelCapabilities.GetStrengths("claude-opus-5.0"); + Assert.StartsWith("Inferred:", strengths); + Assert.Contains("reasoning", strengths); + Assert.Contains("code", strengths); + } + + /// + /// Scenario: Full diagnostics flow for a misconfigured group. + /// + /// User flow: + /// 1. User creates Orchestrator group but forgets to assign an orchestrator role + /// 2. All 3 sessions are Workers using the same cheap model + /// 3. Diagnostics panel shows: + /// β›” "Orchestrator mode requires at least one session with the Orchestrator role." + /// πŸ’‘ "All workers use the same model. For diverse perspectives, assign different models." + /// 4. User fixes: assigns one session as Orchestrator with opus + /// 5. Diagnostics update to clear the error, but show: + /// πŸ’° "Worker 'deep-thinker' uses premium model gpt-5.1. Consider a faster/cheaper model." + /// + [Fact] + public void Scenario_DiagnosticsGuideMisconfiguration() + { + // Step 1-3: Misconfigured group + var group = new SessionGroup + { + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.Orchestrator + }; + var badMembers = new List<(string Name, string Model, MultiAgentRole Role)> + { + ("agent1", "gpt-4.1", MultiAgentRole.Worker), + ("agent2", "gpt-4.1", MultiAgentRole.Worker), + ("agent3", "gpt-4.1", MultiAgentRole.Worker), + }; + + var diags1 = GroupModelAnalyzer.Analyze(group, badMembers); + Assert.Contains(diags1, d => d.Level == "error" && d.Message.Contains("Orchestrator role")); + + // In broadcast mode, same-model workers get a diversity hint + group.OrchestratorMode = MultiAgentMode.Broadcast; + var diags1b = GroupModelAnalyzer.Analyze(group, badMembers); + Assert.Contains(diags1b, d => d.Level == "info" && d.Message.Contains("diverse")); + + // Step 4-5: User fixes by adding orchestrator with strong model, worker with premium + group.OrchestratorMode = MultiAgentMode.Orchestrator; + var fixedMembers = new List<(string Name, string Model, MultiAgentRole Role)> + { + ("planner", "claude-opus-4.6", MultiAgentRole.Orchestrator), + ("fast-worker", "gpt-4.1", MultiAgentRole.Worker), + ("deep-thinker", "gpt-5.1", MultiAgentRole.Worker), + }; + + var diags2 = GroupModelAnalyzer.Analyze(group, fixedMembers); + Assert.DoesNotContain(diags2, d => d.Level == "error"); // no more errors + Assert.Contains(diags2, d => d.Level == "info" && d.Message.Contains("deep-thinker") && d.Message.Contains("premium")); + } +} + +/// +/// Tests for the aligned stall handling between single-agent and multi-agent paths. +/// Both now use 2-consecutive-stalls tolerance via ConsecutiveStalls counter. +/// +public class StallHandlingAlignmentTests +{ + [Fact] + public void SingleAgent_Advance_ToleratesFirstStall() + { + var cycle = ReflectionCycle.Create("Test goal", maxIterations: 10); + + // First iteration β€” unique response + Assert.True(cycle.Advance("First unique response about the topic")); + + // Second iteration β€” repeat triggers CheckStall but Advance tolerates it + Assert.True(cycle.Advance("First unique response about the topic")); + Assert.Equal(1, cycle.ConsecutiveStalls); + Assert.True(cycle.ShouldWarnOnStall); // warning but not stopped + Assert.False(cycle.IsStalled); + + // Third iteration β€” still repeating, now stalled + Assert.False(cycle.Advance("First unique response about the topic")); + Assert.True(cycle.IsStalled); + Assert.Equal(2, cycle.ConsecutiveStalls); + } + + [Fact] + public void SingleAgent_Advance_ResetsStallCountOnNewContent() + { + var cycle = ReflectionCycle.Create("Test goal", maxIterations: 10); + + cycle.Advance("Response A with some content"); + cycle.Advance("Response A with some content"); // first stall + Assert.Equal(1, cycle.ConsecutiveStalls); + + cycle.Advance("Response B completely different content"); // new content resets + Assert.Equal(0, cycle.ConsecutiveStalls); + Assert.False(cycle.IsStalled); + } + + [Fact] + public void MultiAgent_StallHandling_MatchesSingleAgent() + { + // Verify the multi-agent path uses same 2-consecutive tolerance + // by testing the ReflectionCycle state directly (service layer applies same logic) + var state = ReflectionCycle.Create("Multi-agent goal", maxIterations: 10); + + // Simulate what SendViaOrchestratorReflectAsync does: + // First stall: warn but continue + state.CurrentIteration = 1; + var isStall1 = state.CheckStall("Synthesis of worker outputs about authentication"); + Assert.False(isStall1); + + state.CurrentIteration = 2; + var isStall2 = state.CheckStall("Synthesis of worker outputs about authentication"); // repeat + Assert.True(isStall2); + + // Multi-agent path now increments ConsecutiveStalls (aligned with Advance) + state.ConsecutiveStalls++; + Assert.Equal(1, state.ConsecutiveStalls); + Assert.False(state.ConsecutiveStalls >= 2); // NOT stopped yet β€” this is the fix + + state.CurrentIteration = 3; + var isStall3 = state.CheckStall("Synthesis of worker outputs about authentication"); // still repeating + Assert.True(isStall3); + state.ConsecutiveStalls++; + Assert.True(state.ConsecutiveStalls >= 2); // NOW stopped + state.IsStalled = true; + Assert.Contains("Stalled", state.BuildCompletionSummary()); + } + + [Fact] + public void CheckStall_JaccardSimilarity_CatchesRephrasing() + { + var cycle = ReflectionCycle.Create("Test goal"); + + // First response + Assert.False(cycle.CheckStall("The authentication module needs JWT token validation and session management")); + + // Very similar rephrasing (should trigger Jaccard > 0.9) + Assert.True(cycle.CheckStall("The authentication module needs JWT token validation and session management support")); + } + + [Fact] + public void CheckStall_DifferentContent_NoFalsePositive() + { + var cycle = ReflectionCycle.Create("Test goal"); + + Assert.False(cycle.CheckStall("First I will implement the database layer with PostgreSQL")); + Assert.False(cycle.CheckStall("Next the API routes need Express middleware for auth")); + Assert.False(cycle.CheckStall("Finally the frontend React components for the dashboard")); + } +} + +public class WorktreeTeamAssociationTests +{ + private readonly StubChatDatabase _chatDb = new(); + private readonly StubServerManager _serverManager = new(); + private readonly StubWsBridgeClient _bridgeClient = new(); + private readonly StubDemoService _demoService = new(); + private readonly IServiceProvider _serviceProvider; + + public WorktreeTeamAssociationTests() + { + var services = new ServiceCollection(); + _serviceProvider = services.BuildServiceProvider(); + } + + private static RepoManager CreateRepoManagerWithState(List repos, List worktrees) + { + var rm = new RepoManager(); + var stateField = typeof(RepoManager).GetField("_state", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)!; + var loadedField = typeof(RepoManager).GetField("_loaded", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)!; + stateField.SetValue(rm, new RepositoryState { Repositories = repos, Worktrees = worktrees }); + loadedField.SetValue(rm, true); + return rm; + } + + private CopilotService CreateService(RepoManager? repoManager = null) => + new CopilotService(_chatDb, _serverManager, _bridgeClient, repoManager ?? new RepoManager(), _serviceProvider, _demoService); + + [Fact] + public void SessionGroup_WorktreeId_DefaultsToNull() + { + var group = new SessionGroup(); + Assert.Null(group.WorktreeId); + } + + [Fact] + public void CreateMultiAgentGroup_WithWorktreeId_SetsGroupFields() + { + var svc = CreateService(); + var group = svc.CreateMultiAgentGroup("Test Team", + worktreeId: "wt-123", + repoId: "repo-abc"); + + Assert.Equal("wt-123", group.WorktreeId); + Assert.Equal("repo-abc", group.RepoId); + Assert.True(group.IsMultiAgent); + } + + [Fact] + public void CreateMultiAgentGroup_WithWorktree_SetsSessionMetaWorktreeId() + { + var svc = CreateService(); + // Pre-create sessions + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "worker1" }); + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "worker2" }); + + var group = svc.CreateMultiAgentGroup("Test Team", + sessionNames: new List { "worker1", "worker2" }, + worktreeId: "wt-456", + repoId: "repo-xyz"); + + var w1 = svc.Organization.Sessions.First(s => s.SessionName == "worker1"); + var w2 = svc.Organization.Sessions.First(s => s.SessionName == "worker2"); + + Assert.Equal("wt-456", w1.WorktreeId); + Assert.Equal("wt-456", w2.WorktreeId); + Assert.Equal(group.Id, w1.GroupId); + Assert.Equal(group.Id, w2.GroupId); + } + + [Fact] + public void CreateMultiAgentGroup_WithoutWorktree_DoesNotSetWorktreeId() + { + var svc = CreateService(); + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "worker1" }); + + var group = svc.CreateMultiAgentGroup("Test Team", + sessionNames: new List { "worker1" }); + + Assert.Null(group.WorktreeId); + Assert.Null(group.RepoId); + var w1 = svc.Organization.Sessions.First(s => s.SessionName == "worker1"); + Assert.Null(w1.WorktreeId); + } + + [Fact] + public void SessionGroup_WorktreeId_RoundTripsViaJson() + { + var state = new OrganizationState(); + state.Groups.Add(new SessionGroup + { + Id = "g1", + Name = "Team", + IsMultiAgent = true, + WorktreeId = "wt-789", + RepoId = "repo-test" + }); + + var json = JsonSerializer.Serialize(state); + var restored = JsonSerializer.Deserialize(json)!; + + var group = restored.Groups.First(g => g.Id == "g1"); + Assert.Equal("wt-789", group.WorktreeId); + Assert.Equal("repo-test", group.RepoId); + } + + [Fact] + public async Task CreateGroupFromPresetAsync_WithWorktree_SetsGroupAndSessionWorktreeIds() + { + var svc = CreateService(); + var preset = new GroupPreset( + Name: "Test Preset", + Emoji: "πŸ§ͺ", + Description: "Test", + OrchestratorModel: "claude-opus-4.6", + WorkerModels: new[] { "gpt-5.1-codex", "claude-sonnet-4.5" }, + Mode: MultiAgentMode.Broadcast + ); + + // CreateSessionAsync will throw since StubServerManager doesn't implement it, + // but the group itself should be created with worktree info + var group = await svc.CreateGroupFromPresetAsync(preset, + workingDirectory: @"C:\repos\test", + worktreeId: "wt-preset", + repoId: "repo-preset"); + + Assert.NotNull(group); + Assert.Equal("wt-preset", group!.WorktreeId); + Assert.Equal("repo-preset", group.RepoId); + } + + [Fact] + public async Task CreateGroupFromPresetAsync_PreservesOrchestratorReflectMode() + { + var svc = CreateService(); + var preset = new GroupPreset( + Name: "Reflect Test", + Emoji: "πŸ”„", + Description: "Test reflect mode", + OrchestratorModel: "claude-opus-4.6", + WorkerModels: new[] { "gpt-4.1" }, + Mode: MultiAgentMode.OrchestratorReflect + ); + + var group = await svc.CreateGroupFromPresetAsync(preset); + + Assert.NotNull(group); + Assert.Equal(MultiAgentMode.OrchestratorReflect, group!.OrchestratorMode); + } + + [Fact] + public void OrchestratorReflectMode_RoundTripsViaJson() + { + var state = new OrganizationState(); + state.Groups.Add(new SessionGroup + { + Id = "g-reflect", + Name = "Reflect Team", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.OrchestratorReflect + }); + + var json = JsonSerializer.Serialize(state); + var restored = JsonSerializer.Deserialize(json)!; + + var group = restored.Groups.First(g => g.Id == "g-reflect"); + Assert.Equal(MultiAgentMode.OrchestratorReflect, group.OrchestratorMode); + } + + [Fact] + public void GroupHeader_ShowsWorktreeBadge_WhenWorktreeIdSet() + { + // Verify the data model supports worktree display in group headers + var group = new SessionGroup + { + Name = "Code Review Team", + IsMultiAgent = true, + WorktreeId = "wt-feature", + RepoId = "PureWeen-PolyPilot" + }; + + Assert.NotNull(group.WorktreeId); + Assert.NotNull(group.RepoId); + Assert.True(group.IsMultiAgent); + } + + [Fact] + public void ShortenPath_TwoOrFewerSegments_ReturnsOriginal() + { + Assert.Equal("test", ShortenPathHelper("test")); + Assert.Equal(@"C:\test", ShortenPathHelper(@"C:\test")); + } + + [Fact] + public void ShortenPath_LongPath_ShowsLastTwoSegments() + { + // Use platform-native path to avoid separator mismatch + var path = System.IO.Path.Combine("C:", "Users", "shneuvil", ".polypilot", "worktrees", "my-repo"); + var result = ShortenPathHelper(path); + var sep = System.IO.Path.DirectorySeparatorChar; + Assert.Equal($"…{sep}worktrees{sep}my-repo", result); + } + + [Fact] + public void ShortenPath_EmptyOrNull_ReturnsEmpty() + { + Assert.Equal("", ShortenPathHelper("")); + } + + private static string ShortenPathHelper(string path) + { + if (string.IsNullOrEmpty(path)) return ""; + var sep = System.IO.Path.DirectorySeparatorChar; + var parts = path.TrimEnd(sep).Split(sep); + return parts.Length <= 2 ? path : "…" + sep + string.Join(sep, parts[^2..]); + } +} + +/// +/// Tests for session grouping stability: ensures multi-agent sessions are not +/// scattered during reconciliation, deleted-group orphaning, or JSON round-trips. +/// Guards against the recurring bug where multi-agent group sessions get moved +/// to repo groups after app restart. +/// +public class GroupingStabilityTests +{ + private readonly StubChatDatabase _chatDb = new(); + private readonly StubServerManager _serverManager = new(); + private readonly StubWsBridgeClient _bridgeClient = new(); + private readonly StubDemoService _demoService = new(); + private readonly IServiceProvider _serviceProvider; + + public GroupingStabilityTests() + { + var services = new ServiceCollection(); + _serviceProvider = services.BuildServiceProvider(); + } + + private static RepoManager CreateRepoManagerWithState(List repos, List worktrees) + { + var rm = new RepoManager(); + var stateField = typeof(RepoManager).GetField("_state", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)!; + var loadedField = typeof(RepoManager).GetField("_loaded", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)!; + stateField.SetValue(rm, new RepositoryState { Repositories = repos, Worktrees = worktrees }); + loadedField.SetValue(rm, true); + return rm; + } + + private CopilotService CreateService(RepoManager? repoManager = null) => + new CopilotService(_chatDb, _serverManager, _bridgeClient, repoManager ?? new RepoManager(), _serviceProvider, _demoService); + + /// + /// Inject session names into the alias cache so ReconcileOrganization doesn't prune them. + /// In test environment there are no active sessions or alias/active-sessions files. + /// + private static void RegisterKnownSessions(CopilotService svc, params string[] sessionNames) + { + var field = typeof(CopilotService).GetField("_aliasCache", + System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)!; + var cache = (Dictionary?)field.GetValue(svc) ?? new(); + foreach (var name in sessionNames) + cache[name] = name; + field.SetValue(svc, cache); + } + + // --- Multi-agent group JSON round-trip tests --- + + [Fact] + public void MultiAgentGroup_FullState_SurvivesJsonRoundTrip() + { + var state = new OrganizationState(); + var maGroup = new SessionGroup + { + Id = "ma-team-1", + Name = "Reflection Team", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.OrchestratorReflect, + OrchestratorPrompt = "You are a code review orchestrator", + DefaultWorkerModel = "gpt-5.1-codex", + DefaultOrchestratorModel = "claude-opus-4.6", + WorktreeId = "wt-abc", + RepoId = "repo-xyz", + SortOrder = 2 + }; + state.Groups.Add(maGroup); + state.Sessions.Add(new SessionMeta + { + SessionName = "team-orchestrator", + GroupId = "ma-team-1", + Role = MultiAgentRole.Orchestrator, + PreferredModel = "claude-opus-4.6", + WorktreeId = "wt-abc" + }); + state.Sessions.Add(new SessionMeta + { + SessionName = "team-worker-1", + GroupId = "ma-team-1", + Role = MultiAgentRole.Worker, + PreferredModel = "gpt-5.1-codex", + WorktreeId = "wt-abc" + }); + + var json = JsonSerializer.Serialize(state, new JsonSerializerOptions { WriteIndented = true }); + var restored = JsonSerializer.Deserialize(json)!; + + // Verify the multi-agent group survived + var group = restored.Groups.FirstOrDefault(g => g.Id == "ma-team-1"); + Assert.NotNull(group); + Assert.True(group!.IsMultiAgent); + Assert.Equal(MultiAgentMode.OrchestratorReflect, group.OrchestratorMode); + Assert.Equal("You are a code review orchestrator", group.OrchestratorPrompt); + Assert.Equal("gpt-5.1-codex", group.DefaultWorkerModel); + Assert.Equal("claude-opus-4.6", group.DefaultOrchestratorModel); + Assert.Equal("wt-abc", group.WorktreeId); + Assert.Equal("repo-xyz", group.RepoId); + + // Verify sessions survived + var orch = restored.Sessions.FirstOrDefault(s => s.SessionName == "team-orchestrator"); + var worker = restored.Sessions.FirstOrDefault(s => s.SessionName == "team-worker-1"); + Assert.NotNull(orch); + Assert.NotNull(worker); + Assert.Equal("ma-team-1", orch!.GroupId); + Assert.Equal("ma-team-1", worker!.GroupId); + Assert.Equal(MultiAgentRole.Orchestrator, orch.Role); + Assert.Equal("claude-opus-4.6", orch.PreferredModel); + } + + [Fact] + public void MultipleGroups_IncludingMultiAgent_AllSurviveRoundTrip() + { + var state = new OrganizationState(); + state.Groups.Add(new SessionGroup + { + Id = "repo-group", + Name = "PolyPilot", + RepoId = "PureWeen-PolyPilot" + }); + state.Groups.Add(new SessionGroup + { + Id = "ma-team", + Name = "Review Team", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.Orchestrator, + WorktreeId = "wt-1", + RepoId = "PureWeen-PolyPilot" + }); + state.Sessions.Add(new SessionMeta { SessionName = "regular", GroupId = "repo-group" }); + state.Sessions.Add(new SessionMeta { SessionName = "team-orch", GroupId = "ma-team", Role = MultiAgentRole.Orchestrator }); + state.Sessions.Add(new SessionMeta { SessionName = "team-w1", GroupId = "ma-team" }); + + var json = JsonSerializer.Serialize(state); + var restored = JsonSerializer.Deserialize(json)!; + + // All 3 groups should exist (default + repo + multi-agent) + Assert.Equal(3, restored.Groups.Count); + Assert.Contains(restored.Groups, g => g.Id == "ma-team" && g.IsMultiAgent); + Assert.Contains(restored.Groups, g => g.Id == "repo-group" && !g.IsMultiAgent); + Assert.Equal(3, restored.Sessions.Count); + } + + // --- DeleteGroup tests --- + + [Fact] + public void DeleteGroup_MultiAgent_RemovesSessions() + { + var svc = CreateService(); + + // Create a multi-agent group with sessions + var group = svc.CreateMultiAgentGroup("Test Team"); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "orch", + GroupId = group.Id, + Role = MultiAgentRole.Orchestrator, + PreferredModel = "claude-opus-4.6", + WorktreeId = "wt-1" + }); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "worker-1", + GroupId = group.Id, + Role = MultiAgentRole.Worker, + PreferredModel = "gpt-5.1-codex", + WorktreeId = "wt-1" + }); + + svc.DeleteGroup(group.Id); + + // Multi-agent sessions should be removed, not orphaned + Assert.DoesNotContain(svc.Organization.Sessions, s => s.SessionName == "orch"); + Assert.DoesNotContain(svc.Organization.Sessions, s => s.SessionName == "worker-1"); + // Group should be removed + Assert.DoesNotContain(svc.Organization.Groups, g => g.Id == group.Id); + } + + [Fact] + public void DeleteGroup_MultiAgent_RemovesSessionMetadata() + { + var svc = CreateService(); + var group = svc.CreateMultiAgentGroup("Team"); + + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "orch", + GroupId = group.Id, + Role = MultiAgentRole.Orchestrator, + PreferredModel = "claude-opus-4.6", + WorktreeId = "wt-1" + }); + + svc.DeleteGroup(group.Id); + + // Multi-agent sessions should be removed entirely, not orphaned + Assert.DoesNotContain(svc.Organization.Sessions, s => s.SessionName == "orch"); + } + + // --- Reconciliation protection tests --- + + [Fact] + public void Reconcile_SessionsInMultiAgentGroup_NotMovedToRepoGroup() + { + var repos = new List + { + new() { Id = "repo-1", Name = "MyRepo", Url = "https://github.com/test/repo" } + }; + var worktrees = new List + { + new() { Id = "wt-1", RepoId = "repo-1", Branch = "main", Path = "/tmp/wt-1" } + }; + var rm = CreateRepoManagerWithState(repos, worktrees); + var svc = CreateService(rm); + + // Create repo group and multi-agent group sharing the same repo + var repoGroup = svc.GetOrCreateRepoGroup("repo-1", "MyRepo"); + var maGroup = svc.CreateMultiAgentGroup("Review Team", worktreeId: "wt-1", repoId: "repo-1"); + + // Add sessions to multi-agent group + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "team-orch", + GroupId = maGroup.Id, + Role = MultiAgentRole.Orchestrator, + WorktreeId = "wt-1" + }); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "team-w1", + GroupId = maGroup.Id, + WorktreeId = "wt-1" + }); + + RegisterKnownSessions(svc, "team-orch", "team-w1"); + + // Run reconciliation β€” sessions should stay in multi-agent group + svc.ReconcileOrganization(); + + var orch = svc.Organization.Sessions.First(s => s.SessionName == "team-orch"); + var worker = svc.Organization.Sessions.First(s => s.SessionName == "team-w1"); + Assert.Equal(maGroup.Id, orch.GroupId); + Assert.Equal(maGroup.Id, worker.GroupId); + // Should NOT have been moved to the repo group + Assert.NotEqual(repoGroup.Id, orch.GroupId); + Assert.NotEqual(repoGroup.Id, worker.GroupId); + } + + [Fact] + public void Reconcile_OrphanedFromDeletedGroup_GoesToDefault() + { + var svc = CreateService(); + + // Simulate sessions pointing to a non-existent group (as if the group was deleted) + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "orphan-orch", + GroupId = "deleted-group-id", + Role = MultiAgentRole.Orchestrator, + PreferredModel = "claude-opus-4.6", + WorktreeId = "wt-1" + }); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "orphan-worker", + GroupId = "deleted-group-id", + Role = MultiAgentRole.Worker, + PreferredModel = "gpt-5.1-codex", + WorktreeId = "wt-1" + }); + + RegisterKnownSessions(svc, "orphan-orch", "orphan-worker"); + + svc.ReconcileOrganization(); + + var orch = svc.Organization.Sessions.First(s => s.SessionName == "orphan-orch"); + var worker = svc.Organization.Sessions.First(s => s.SessionName == "orphan-worker"); + Assert.Equal(SessionGroup.DefaultId, orch.GroupId); + Assert.Equal(SessionGroup.DefaultId, worker.GroupId); + } + + [Fact] + public void Reconcile_OrphanedMultiAgentSessions_NotAutoMovedToRepoGroup() + { + // This is the key bug test: after a multi-agent group disappears, + // orphaned sessions with WorktreeIds should NOT be auto-moved to the repo group. + var repos = new List + { + new() { Id = "repo-1", Name = "MyRepo", Url = "https://github.com/test/repo" } + }; + var worktrees = new List + { + new() { Id = "wt-1", RepoId = "repo-1", Branch = "main", Path = "/tmp/wt-1" } + }; + var rm = CreateRepoManagerWithState(repos, worktrees); + var svc = CreateService(rm); + svc.GetOrCreateRepoGroup("repo-1", "MyRepo"); + + // Simulate orphaned multi-agent sessions already in _default with WorktreeId set + // (as if a previous reconciliation moved them from a deleted group) + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "team-orch", + GroupId = SessionGroup.DefaultId, + Role = MultiAgentRole.Orchestrator, + PreferredModel = "claude-opus-4.6", + WorktreeId = "wt-1" + }); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "team-worker", + GroupId = SessionGroup.DefaultId, + Role = MultiAgentRole.Worker, + PreferredModel = "gpt-5.1-codex", + WorktreeId = "wt-1" + }); + + RegisterKnownSessions(svc, "team-orch", "team-worker"); + + svc.ReconcileOrganization(); + + var orch = svc.Organization.Sessions.First(s => s.SessionName == "team-orch"); + var worker = svc.Organization.Sessions.First(s => s.SessionName == "team-worker"); + + // Orchestrator should NOT be moved (has Orchestrator role) + Assert.Equal(SessionGroup.DefaultId, orch.GroupId); + // Worker with PreferredModel should NOT be moved (was multi-agent member) + Assert.Equal(SessionGroup.DefaultId, worker.GroupId); + } + + [Fact] + public void Reconcile_RegularSession_WithWorktree_InDefault_SurvivesPrune() + { + // Verifies that regular sessions with worktrees aren't pruned during reconciliation. + // Note: auto-move from _default to repo group only happens for active sessions (in _sessions). + // This tests that the session metadata is preserved for when the session becomes active. + var repos = new List + { + new() { Id = "repo-1", Name = "MyRepo", Url = "https://github.com/test/repo" } + }; + var worktrees = new List + { + new() { Id = "wt-1", RepoId = "repo-1", Branch = "main", Path = "/tmp/wt-1" } + }; + var rm = CreateRepoManagerWithState(repos, worktrees); + var svc = CreateService(rm); + svc.GetOrCreateRepoGroup("repo-1", "MyRepo"); + + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "regular-session", + GroupId = SessionGroup.DefaultId, + WorktreeId = "wt-1", + PreferredModel = null, + Role = MultiAgentRole.Worker + }); + + RegisterKnownSessions(svc, "regular-session"); + svc.ReconcileOrganization(); + + // Session should still exist (not pruned) + var meta = svc.Organization.Sessions.FirstOrDefault(s => s.SessionName == "regular-session"); + Assert.NotNull(meta); + Assert.Equal("wt-1", meta!.WorktreeId); + } + + [Fact] + public void WasMultiAgent_DetectsOrchestratorRole() + { + // Verifies the wasMultiAgent heuristic used in reconciliation + var orch = new SessionMeta { Role = MultiAgentRole.Orchestrator }; + var workerWithModel = new SessionMeta { Role = MultiAgentRole.Worker, PreferredModel = "gpt-5.1-codex" }; + var regularWorker = new SessionMeta { Role = MultiAgentRole.Worker, PreferredModel = null }; + + // Orchestrator role β†’ was multi-agent + Assert.True(orch.Role == MultiAgentRole.Orchestrator || orch.PreferredModel != null); + // Worker with PreferredModel β†’ was multi-agent + Assert.True(workerWithModel.Role == MultiAgentRole.Orchestrator || workerWithModel.PreferredModel != null); + // Regular worker (no PreferredModel) β†’ not multi-agent + Assert.False(regularWorker.Role == MultiAgentRole.Orchestrator || regularWorker.PreferredModel != null); + } + + // --- Full lifecycle simulation tests --- + + [Fact] + public void FullLifecycle_CreateTeam_Serialize_Deserialize_SessionsIntact() + { + var svc = CreateService(); + var group = svc.CreateMultiAgentGroup("QRC", + worktreeId: "wt-feature", + repoId: "repo-poly"); + + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "QRC-orchestrator", + GroupId = group.Id, + Role = MultiAgentRole.Orchestrator, + PreferredModel = "claude-opus-4.6", + WorktreeId = "wt-feature" + }); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "QRC-worker-1", + GroupId = group.Id, + PreferredModel = "gpt-5.1-codex", + WorktreeId = "wt-feature" + }); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "QRC-worker-2", + GroupId = group.Id, + PreferredModel = "gpt-5.1-codex", + WorktreeId = "wt-feature" + }); + + // Serialize (simulate app save) + var json = JsonSerializer.Serialize(svc.Organization, new JsonSerializerOptions { WriteIndented = true }); + + // Deserialize (simulate app reload) + var restored = JsonSerializer.Deserialize(json)!; + + // All sessions should still point to the multi-agent group + Assert.Contains(restored.Groups, g => g.Id == group.Id && g.IsMultiAgent); + foreach (var session in restored.Sessions.Where(s => s.SessionName.StartsWith("QRC-"))) + { + Assert.Equal(group.Id, session.GroupId); + } + } + + [Fact] + public void FullLifecycle_DeleteTeam_ThenReconcile_SessionsStayInDefault() + { + // Simulates: create team β†’ delete team β†’ reconcile β†’ sessions stay visible in default + var repos = new List + { + new() { Id = "repo-1", Name = "MyRepo", Url = "https://github.com/test/repo" } + }; + var worktrees = new List + { + new() { Id = "wt-1", RepoId = "repo-1", Branch = "main", Path = "/tmp/wt-1" } + }; + var rm = CreateRepoManagerWithState(repos, worktrees); + var svc = CreateService(rm); + svc.GetOrCreateRepoGroup("repo-1", "MyRepo"); + + var group = svc.CreateMultiAgentGroup("Team", + worktreeId: "wt-1", repoId: "repo-1"); + + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "team-orch", + GroupId = group.Id, + Role = MultiAgentRole.Orchestrator, + PreferredModel = "claude-opus-4.6", + WorktreeId = "wt-1" + }); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "team-w1", + GroupId = group.Id, + PreferredModel = "gpt-5.1-codex", + WorktreeId = "wt-1" + }); + + // Delete the team + svc.DeleteGroup(group.Id); + + // Multi-agent sessions should be removed entirely + Assert.DoesNotContain(svc.Organization.Sessions, s => s.SessionName == "team-orch"); + Assert.DoesNotContain(svc.Organization.Sessions, s => s.SessionName == "team-w1"); + + // Group should be gone + Assert.DoesNotContain(svc.Organization.Groups, g => g.Id == group.Id); + } + + [Fact] + public void DeleteGroup_NonMultiAgent_MovesSessionsToDefault() + { + var svc = CreateService(); + var group = svc.GetOrCreateRepoGroup("repo-1", "MyRepo"); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "s1", + GroupId = group.Id, + WorktreeId = "wt-1" + }); + + svc.DeleteGroup(group.Id); + + // Non-multi-agent: sessions move to default + var s = svc.Organization.Sessions.First(s => s.SessionName == "s1"); + Assert.Equal(SessionGroup.DefaultId, s.GroupId); + } + + [Fact] + public void Reconcile_DeletedGroupId_NotInGroupsList_SessionsOrphaned() + { + // Simulates loading organization.json where a group is missing + // but sessions still reference it + var svc = CreateService(); + + // Manually add sessions referencing a group that doesn't exist + var phantomGroupId = "phantom-group-" + Guid.NewGuid(); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "ghost-1", + GroupId = phantomGroupId, + WorktreeId = "wt-1" + }); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "ghost-2", + GroupId = phantomGroupId, + WorktreeId = "wt-1" + }); + + RegisterKnownSessions(svc, "ghost-1", "ghost-2"); + + svc.ReconcileOrganization(); + + // Both sessions should be in default now + Assert.All(svc.Organization.Sessions.Where(s => s.SessionName.StartsWith("ghost-")), + m => Assert.Equal(SessionGroup.DefaultId, m.GroupId)); + } + + [Fact] + public void Reconcile_MultiAgentGroupExists_SessionsUntouched() + { + // When the multi-agent group exists, reconciliation should not alter its sessions at all + var svc = CreateService(); + var group = svc.CreateMultiAgentGroup("Stable Team"); + + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "stable-orch", + GroupId = group.Id, + Role = MultiAgentRole.Orchestrator, + PreferredModel = "claude-opus-4.6", + WorktreeId = "wt-1" + }); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "stable-w1", + GroupId = group.Id, + PreferredModel = "gpt-5.1-codex", + WorktreeId = "wt-1" + }); + + RegisterKnownSessions(svc, "stable-orch", "stable-w1"); + + var orchGroupBefore = svc.Organization.Sessions.First(s => s.SessionName == "stable-orch").GroupId; + var workerGroupBefore = svc.Organization.Sessions.First(s => s.SessionName == "stable-w1").GroupId; + + svc.ReconcileOrganization(); + + Assert.Equal(orchGroupBefore, svc.Organization.Sessions.First(s => s.SessionName == "stable-orch").GroupId); + Assert.Equal(workerGroupBefore, svc.Organization.Sessions.First(s => s.SessionName == "stable-w1").GroupId); + } + + [Fact] + public void OrganizationState_WithReflectionState_SurvivesRoundTrip() + { + var state = new OrganizationState(); + state.Groups.Add(new SessionGroup + { + Id = "reflect-team", + Name = "Reflect", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.OrchestratorReflect, + ReflectionState = ReflectionCycle.Create("Fix all bugs", 10) + }); + + var json = JsonSerializer.Serialize(state, new JsonSerializerOptions { WriteIndented = true }); + var restored = JsonSerializer.Deserialize(json)!; + + var group = restored.Groups.First(g => g.Id == "reflect-team"); + Assert.NotNull(group.ReflectionState); + Assert.Equal("Fix all bugs", group.ReflectionState!.Goal); + Assert.Equal(10, group.ReflectionState.MaxIterations); + Assert.True(group.ReflectionState.IsActive); + } } diff --git a/PolyPilot.Tests/SquadDiscoveryTests.cs b/PolyPilot.Tests/SquadDiscoveryTests.cs new file mode 100644 index 0000000000..955b0f3f69 --- /dev/null +++ b/PolyPilot.Tests/SquadDiscoveryTests.cs @@ -0,0 +1,260 @@ +using PolyPilot.Models; + +namespace PolyPilot.Tests; + +public class SquadDiscoveryTests +{ + private static string TestDataDir => Path.Combine( + AppContext.BaseDirectory, "..", "..", "..", "TestData"); + + private static string SquadSampleDir => Path.Combine(TestDataDir, "squad-sample"); + private static string LegacyAiTeamDir => Path.Combine(TestDataDir, "legacy-ai-team"); + + // --- FindSquadDirectory --- + + [Fact] + public void FindSquadDirectory_PrefersDotSquad() + { + var result = SquadDiscovery.FindSquadDirectory(SquadSampleDir); + Assert.NotNull(result); + Assert.EndsWith(".squad", result); + } + + [Fact] + public void FindSquadDirectory_FallsBackToAiTeam() + { + var result = SquadDiscovery.FindSquadDirectory(LegacyAiTeamDir); + Assert.NotNull(result); + Assert.EndsWith(".ai-team", result); + } + + [Fact] + public void FindSquadDirectory_ReturnsNull_WhenNeitherExists() + { + var result = SquadDiscovery.FindSquadDirectory(Path.GetTempPath()); + Assert.Null(result); + } + + // --- ParseTeamName --- + + [Fact] + public void ParseTeamName_ExtractsH1Heading() + { + var content = "# The Review Squad\n\nSome description\n"; + Assert.Equal("The Review Squad", SquadDiscovery.ParseTeamName(content)); + } + + [Fact] + public void ParseTeamName_ReturnsNull_WhenNoHeading() + { + var content = "Just a table\n| Member | Role |\n"; + Assert.Null(SquadDiscovery.ParseTeamName(content)); + } + + // --- ParseRosterNames --- + + [Fact] + public void ParseRosterNames_ExtractsAgentNames() + { + var content = "# Team\n| Member | Role |\n|--------|------|\n| security-reviewer | Auditor |\n| perf-analyst | Analyst |"; + var names = SquadDiscovery.ParseRosterNames(content); + Assert.Contains("security-reviewer", names); + Assert.Contains("perf-analyst", names); + Assert.DoesNotContain("Member", names); + Assert.DoesNotContain("---", names); + } + + // --- DiscoverAgents --- + + [Fact] + public void DiscoverAgents_SkipsScribe() + { + var squadDir = Path.Combine(SquadSampleDir, ".squad"); + var agents = SquadDiscovery.DiscoverAgents(squadDir); + Assert.DoesNotContain(agents, a => a.Name.Equals("scribe", StringComparison.OrdinalIgnoreCase)); + } + + [Fact] + public void DiscoverAgents_FindsRealAgents() + { + var squadDir = Path.Combine(SquadSampleDir, ".squad"); + var agents = SquadDiscovery.DiscoverAgents(squadDir); + Assert.Equal(2, agents.Count); // security-reviewer + perf-analyst (not scribe) + Assert.Contains(agents, a => a.Name == "security-reviewer"); + Assert.Contains(agents, a => a.Name == "perf-analyst"); + } + + [Fact] + public void DiscoverAgents_ReadsCharterContent() + { + var squadDir = Path.Combine(SquadSampleDir, ".squad"); + var agents = SquadDiscovery.DiscoverAgents(squadDir); + var security = agents.First(a => a.Name == "security-reviewer"); + Assert.NotNull(security.Charter); + Assert.Contains("OWASP Top 10", security.Charter); + } + + // --- Discover (full integration) --- + + [Fact] + public void Discover_ReturnsPreset_FromSquadDir() + { + var presets = SquadDiscovery.Discover(SquadSampleDir); + Assert.Single(presets); + var preset = presets[0]; + Assert.Equal("The Review Squad", preset.Name); + Assert.True(preset.IsRepoLevel); + Assert.Equal(MultiAgentMode.OrchestratorReflect, preset.Mode); + Assert.Equal(2, preset.WorkerModels.Length); + } + + [Fact] + public void Discover_SetsSystemPrompts_FromCharters() + { + var presets = SquadDiscovery.Discover(SquadSampleDir); + var preset = presets[0]; + Assert.NotNull(preset.WorkerSystemPrompts); + Assert.Equal(2, preset.WorkerSystemPrompts.Length); + + // At least one should contain OWASP (security-reviewer's charter) + Assert.True(preset.WorkerSystemPrompts.Any(p => p != null && p.Contains("OWASP")), + "Expected a worker system prompt containing 'OWASP'"); + // At least one should contain latency (perf-analyst's charter) + Assert.True(preset.WorkerSystemPrompts.Any(p => p != null && p.Contains("Latency")), + "Expected a worker system prompt containing 'Latency'"); + } + + [Fact] + public void Discover_ReadsDecisions_AsSharedContext() + { + var presets = SquadDiscovery.Discover(SquadSampleDir); + var preset = presets[0]; + Assert.NotNull(preset.SharedContext); + Assert.Contains("structured logging", preset.SharedContext); + Assert.Contains("async/await", preset.SharedContext); + } + + [Fact] + public void Discover_ReadsRouting_AsRoutingContext() + { + var presets = SquadDiscovery.Discover(SquadSampleDir); + var preset = presets[0]; + Assert.NotNull(preset.RoutingContext); + Assert.Contains("security-reviewer", preset.RoutingContext); + } + + [Fact] + public void Discover_LegacyAiTeam_Works() + { + var presets = SquadDiscovery.Discover(LegacyAiTeamDir); + Assert.Single(presets); + var preset = presets[0]; + Assert.Equal("Legacy Team", preset.Name); + Assert.True(preset.IsRepoLevel); + Assert.Single(preset.WorkerModels); + } + + [Fact] + public void Discover_ReturnsEmpty_WhenNoSquadDir() + { + var presets = SquadDiscovery.Discover(Path.GetTempPath()); + Assert.Empty(presets); + } + + [Fact] + public void Discover_ReturnsEmpty_WhenNoTeamMd() + { + // Create temp dir with .squad/ but no team.md + var tempDir = Path.Combine(Path.GetTempPath(), $"squad-test-{Guid.NewGuid():N}"); + try + { + Directory.CreateDirectory(Path.Combine(tempDir, ".squad", "agents", "test")); + File.WriteAllText(Path.Combine(tempDir, ".squad", "agents", "test", "charter.md"), "test charter"); + + var presets = SquadDiscovery.Discover(tempDir); + Assert.Empty(presets); + } + finally + { + Directory.Delete(tempDir, true); + } + } + + [Fact] + public void Discover_TruncatesLongCharters() + { + var tempDir = Path.Combine(Path.GetTempPath(), $"squad-test-{Guid.NewGuid():N}"); + try + { + Directory.CreateDirectory(Path.Combine(tempDir, ".squad", "agents", "verbose")); + File.WriteAllText(Path.Combine(tempDir, ".squad", "team.md"), "# Long Charter Test\n| Member | Role |\n|---|---|\n| verbose | Talker |"); + File.WriteAllText(Path.Combine(tempDir, ".squad", "agents", "verbose", "charter.md"), + new string('x', 5000)); // Over 4000 char limit + + var presets = SquadDiscovery.Discover(tempDir); + Assert.Single(presets); + Assert.True(presets[0].WorkerSystemPrompts![0]!.Length <= 4000); + } + finally + { + Directory.Delete(tempDir, true); + } + } + + // --- Three-tier merge --- + + [Fact] + public void GetAll_WithRepoPath_IncludesSquadPresets() + { + var all = UserPresets.GetAll(Path.GetTempPath(), SquadSampleDir); + Assert.Contains(all, p => p.Name == "The Review Squad" && p.IsRepoLevel); + // Built-in should also be present + Assert.Contains(all, p => p.Name == "Code Review Team"); + } + + [Fact] + public void GetAll_WithoutRepoPath_NoSquadPresets() + { + var all = UserPresets.GetAll(Path.GetTempPath()); + Assert.DoesNotContain(all, p => p.IsRepoLevel); + } + + [Fact] + public void GetAll_RepoOverrides_BuiltInByName() + { + // Create a temp Squad dir with a preset named "Code Review Team" + var tempDir = Path.Combine(Path.GetTempPath(), $"squad-test-{Guid.NewGuid():N}"); + try + { + Directory.CreateDirectory(Path.Combine(tempDir, ".squad", "agents", "reviewer")); + File.WriteAllText(Path.Combine(tempDir, ".squad", "team.md"), + "# Code Review Team\n| Member | Role |\n|---|---|\n| reviewer | Reviewer |"); + File.WriteAllText(Path.Combine(tempDir, ".squad", "agents", "reviewer", "charter.md"), + "Custom repo reviewer."); + + var all = UserPresets.GetAll(Path.GetTempPath(), tempDir); + var crt = all.Single(p => p.Name == "Code Review Team"); + Assert.True(crt.IsRepoLevel, "Repo version should shadow built-in"); + } + finally + { + Directory.Delete(tempDir, true); + } + } + + [Fact] + public void Discover_SetsSourcePath() + { + var presets = SquadDiscovery.Discover(SquadSampleDir); + Assert.Single(presets); + Assert.NotNull(presets[0].SourcePath); + Assert.True(presets[0].SourcePath!.EndsWith(".squad")); + } + + [Fact] + public void Discover_HasEmoji() + { + var presets = SquadDiscovery.Discover(SquadSampleDir); + Assert.Equal("🫑", presets[0].Emoji); + } +} diff --git a/PolyPilot.Tests/SquadWriterTests.cs b/PolyPilot.Tests/SquadWriterTests.cs new file mode 100644 index 0000000000..00e1c087d5 --- /dev/null +++ b/PolyPilot.Tests/SquadWriterTests.cs @@ -0,0 +1,283 @@ +using PolyPilot.Models; + +namespace PolyPilot.Tests; + +public class SquadWriterTests : IDisposable +{ + private readonly string _tempDir; + + public SquadWriterTests() + { + _tempDir = Path.Combine(Path.GetTempPath(), "squad-writer-" + Guid.NewGuid().ToString("N")[..8]); + Directory.CreateDirectory(_tempDir); + } + + public void Dispose() + { + if (Directory.Exists(_tempDir)) + Directory.Delete(_tempDir, recursive: true); + } + + [Fact] + public void WritePreset_CreatesSquadDirectory() + { + var preset = MakePreset("My Team"); + var workers = new List<(string Name, string? SystemPrompt)> + { + ("reviewer", "You are a code reviewer. Focus on correctness."), + ("analyst", "You are a performance analyst.") + }; + + var squadDir = SquadWriter.WritePreset(_tempDir, preset, workers); + + Assert.True(Directory.Exists(squadDir)); + Assert.True(File.Exists(Path.Combine(squadDir, "team.md"))); + Assert.True(File.Exists(Path.Combine(squadDir, "agents", "reviewer", "charter.md"))); + Assert.True(File.Exists(Path.Combine(squadDir, "agents", "analyst", "charter.md"))); + } + + [Fact] + public void WritePreset_TeamMdHasCorrectFormat() + { + var preset = MakePreset("Review Squad"); + var workers = new List<(string Name, string? SystemPrompt)> + { + ("security", "You are a security auditor."), + ("perf", null) + }; + + SquadWriter.WritePreset(_tempDir, preset, workers); + + var content = File.ReadAllText(Path.Combine(_tempDir, ".squad", "team.md")); + Assert.Contains("# Review Squad", content); + Assert.Contains("| security |", content); + Assert.Contains("| perf |", content); + Assert.Contains("| Member | Role |", content); + } + + [Fact] + public void WritePreset_CharterContainsSystemPrompt() + { + var preset = MakePreset("Team"); + var workers = new List<(string Name, string? SystemPrompt)> + { + ("dev", "You are a full-stack developer. Write clean code.") + }; + + SquadWriter.WritePreset(_tempDir, preset, workers); + + var charter = File.ReadAllText(Path.Combine(_tempDir, ".squad", "agents", "dev", "charter.md")); + Assert.Equal("You are a full-stack developer. Write clean code.", charter); + } + + [Fact] + public void WritePreset_NullPromptGetsDefaultCharter() + { + var preset = MakePreset("Team"); + var workers = new List<(string Name, string? SystemPrompt)> + { + ("helper", null) + }; + + SquadWriter.WritePreset(_tempDir, preset, workers); + + var charter = File.ReadAllText(Path.Combine(_tempDir, ".squad", "agents", "helper", "charter.md")); + Assert.Contains("helper", charter); + } + + [Fact] + public void WritePreset_WritesDecisionsMd() + { + var preset = MakePreset("Team") with { SharedContext = "Always use async/await." }; + var workers = new List<(string Name, string? SystemPrompt)> { ("w1", null) }; + + SquadWriter.WritePreset(_tempDir, preset, workers); + + var decisions = File.ReadAllText(Path.Combine(_tempDir, ".squad", "decisions.md")); + Assert.Equal("Always use async/await.", decisions); + } + + [Fact] + public void WritePreset_WritesRoutingMd() + { + var preset = MakePreset("Team") with { RoutingContext = "| *.cs | dev | C# code |" }; + var workers = new List<(string Name, string? SystemPrompt)> { ("dev", null) }; + + SquadWriter.WritePreset(_tempDir, preset, workers); + + var routing = File.ReadAllText(Path.Combine(_tempDir, ".squad", "routing.md")); + Assert.Equal("| *.cs | dev | C# code |", routing); + } + + [Fact] + public void WritePreset_NoSharedContext_NoDecisionsFile() + { + var preset = MakePreset("Team"); + var workers = new List<(string Name, string? SystemPrompt)> { ("w1", null) }; + + SquadWriter.WritePreset(_tempDir, preset, workers); + + Assert.False(File.Exists(Path.Combine(_tempDir, ".squad", "decisions.md"))); + } + + [Fact] + public void RoundTrip_WriteAndReadBack() + { + var preset = MakePreset("Round Trip Team") with + { + SharedContext = "Use TypeScript only.", + RoutingContext = "| *.ts | dev | TypeScript |" + }; + var workers = new List<(string Name, string? SystemPrompt)> + { + ("security", "You are a security auditor. Focus on OWASP."), + ("dev", "You are a developer. Write clean code.") + }; + + SquadWriter.WritePreset(_tempDir, preset, workers); + + // Read back via SquadDiscovery + var discovered = SquadDiscovery.Discover(_tempDir); + Assert.Single(discovered); + var result = discovered[0]; + Assert.Equal("Round Trip Team", result.Name); + Assert.True(result.IsRepoLevel); + Assert.Equal(2, result.WorkerModels.Length); + // Order may vary by directory enumeration β€” check both prompts are present + var allPrompts = string.Join(" | ", result.WorkerSystemPrompts!); + Assert.Contains("OWASP", allPrompts); + Assert.Contains("clean code", allPrompts); + Assert.Contains("TypeScript", result.SharedContext); + Assert.Contains("TypeScript", result.RoutingContext); + } + + [Fact] + public void RoundTrip_PreservesTeamName() + { + var preset = MakePreset("Special Characters & Stuff"); + var workers = new List<(string Name, string? SystemPrompt)> { ("w1", "Test prompt.") }; + + SquadWriter.WritePreset(_tempDir, preset, workers); + + var discovered = SquadDiscovery.Discover(_tempDir); + Assert.Single(discovered); + Assert.Equal("Special Characters & Stuff", discovered[0].Name); + } + + [Fact] + public void SanitizeAgentName_StripsTeamPrefix() + { + var name = SquadWriter.SanitizeAgentName("Code Review Team-worker-1", "Code Review Team"); + Assert.Equal("worker-1", name); + } + + [Fact] + public void SanitizeAgentName_LowercasesResult() + { + var name = SquadWriter.SanitizeAgentName("MyTeam-SecurityAuditor", "MyTeam"); + Assert.Equal("securityauditor", name); + } + + [Fact] + public void SanitizeAgentName_NoPrefix_ReturnsLowerName() + { + var name = SquadWriter.SanitizeAgentName("standalone-agent", "Different Team"); + Assert.Equal("standalone-agent", name); + } + + [Fact] + public void DeriveRole_ExtractsFromPrompt() + { + var preset = MakePreset("Team"); + var workers = new List<(string Name, string? SystemPrompt)> + { + ("sec", "You are a security auditor. Focus on OWASP Top 10.") + }; + + SquadWriter.WritePreset(_tempDir, preset, workers); + + var content = File.ReadAllText(Path.Combine(_tempDir, ".squad", "team.md")); + Assert.Contains("| sec | security auditor |", content); + } + + [Fact] + public void WritePreset_OverwritesExisting() + { + var preset1 = MakePreset("Team"); + var workers1 = new List<(string Name, string? SystemPrompt)> { ("old-agent", "Old charter.") }; + SquadWriter.WritePreset(_tempDir, preset1, workers1); + + var preset2 = MakePreset("Team v2"); + var workers2 = new List<(string Name, string? SystemPrompt)> { ("new-agent", "New charter.") }; + SquadWriter.WritePreset(_tempDir, preset2, workers2); + + var content = File.ReadAllText(Path.Combine(_tempDir, ".squad", "team.md")); + Assert.Contains("# Team v2", content); + Assert.Contains("| new-agent |", content); + // Old agent dir may still exist (we don't delete, just overwrite) + Assert.True(Directory.Exists(Path.Combine(_tempDir, ".squad", "agents", "new-agent"))); + } + + [Fact] + public void WriteFromGroup_CreatesSquadFromSessionData() + { + var group = new SessionGroup + { + Name = "Live Team", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.OrchestratorReflect, + SharedContext = "Be concise.", + }; + var members = new List + { + new() { SessionName = "Live Team-orchestrator", Role = MultiAgentRole.Orchestrator, PreferredModel = "claude-opus-4.6" }, + new() { SessionName = "Live Team-worker-1", Role = MultiAgentRole.Worker, PreferredModel = "gpt-5", SystemPrompt = "You are a code reviewer." }, + new() { SessionName = "Live Team-worker-2", Role = MultiAgentRole.Worker, PreferredModel = "claude-sonnet-4.5", SystemPrompt = "You are a test writer." }, + }; + string GetModel(string name) => members.First(m => m.SessionName == name).PreferredModel ?? "default"; + + var squadDir = SquadWriter.WriteFromGroup(_tempDir, "Live Team", group, members, GetModel); + + Assert.True(File.Exists(Path.Combine(squadDir, "team.md"))); + Assert.True(File.Exists(Path.Combine(squadDir, "decisions.md"))); + Assert.True(File.Exists(Path.Combine(squadDir, "agents", "worker-1", "charter.md"))); + Assert.True(File.Exists(Path.Combine(squadDir, "agents", "worker-2", "charter.md"))); + + // Verify round-trip + var discovered = SquadDiscovery.Discover(_tempDir); + Assert.Single(discovered); + Assert.Equal("Live Team", discovered[0].Name); + Assert.Equal(2, discovered[0].WorkerModels.Length); + } + + [Fact] + public void WritePreset_OverwriteCleansStaleAgents() + { + var preset = MakePreset("Team"); + var threeWorkers = new List<(string Name, string? SystemPrompt)> + { + ("alpha", "Alpha agent."), + ("beta", "Beta agent."), + ("gamma", "Gamma agent.") + }; + + SquadWriter.WritePreset(_tempDir, preset, threeWorkers); + Assert.True(Directory.Exists(Path.Combine(_tempDir, ".squad", "agents", "gamma"))); + + // Overwrite with only 2 workers β€” gamma dir should be gone + var twoWorkers = new List<(string Name, string? SystemPrompt)> + { + ("alpha", "Alpha v2."), + ("beta", "Beta v2.") + }; + SquadWriter.WritePreset(_tempDir, preset, twoWorkers); + + Assert.True(Directory.Exists(Path.Combine(_tempDir, ".squad", "agents", "alpha"))); + Assert.True(Directory.Exists(Path.Combine(_tempDir, ".squad", "agents", "beta"))); + Assert.False(Directory.Exists(Path.Combine(_tempDir, ".squad", "agents", "gamma"))); + } + + private static GroupPreset MakePreset(string name) => new( + name, "Test", "πŸ§ͺ", MultiAgentMode.OrchestratorReflect, + "claude-opus-4.6", new[] { "gpt-5", "claude-sonnet-4.5" }); +} diff --git a/PolyPilot.Tests/TestData/legacy-ai-team/.ai-team/agents/dev/charter.md b/PolyPilot.Tests/TestData/legacy-ai-team/.ai-team/agents/dev/charter.md new file mode 100644 index 0000000000..9753f3475a --- /dev/null +++ b/PolyPilot.Tests/TestData/legacy-ai-team/.ai-team/agents/dev/charter.md @@ -0,0 +1 @@ +You are a full-stack developer. diff --git a/PolyPilot.Tests/TestData/legacy-ai-team/.ai-team/team.md b/PolyPilot.Tests/TestData/legacy-ai-team/.ai-team/team.md new file mode 100644 index 0000000000..5111c37631 --- /dev/null +++ b/PolyPilot.Tests/TestData/legacy-ai-team/.ai-team/team.md @@ -0,0 +1,4 @@ +# Legacy Team +| Member | Role | +|--------|------| +| dev | Developer | diff --git a/PolyPilot.Tests/TestData/squad-sample/.squad/agents/perf-analyst/charter.md b/PolyPilot.Tests/TestData/squad-sample/.squad/agents/perf-analyst/charter.md new file mode 100644 index 0000000000..d4420ee536 --- /dev/null +++ b/PolyPilot.Tests/TestData/squad-sample/.squad/agents/perf-analyst/charter.md @@ -0,0 +1,9 @@ +You are a performance analyst focused on runtime efficiency. + +Focus on: +- Latency bottlenecks and hot paths +- Memory allocation patterns and GC pressure +- Database query optimization +- Caching opportunities + +Provide concrete metrics and benchmarks where possible. diff --git a/PolyPilot.Tests/TestData/squad-sample/.squad/agents/scribe/charter.md b/PolyPilot.Tests/TestData/squad-sample/.squad/agents/scribe/charter.md new file mode 100644 index 0000000000..61d89ce050 --- /dev/null +++ b/PolyPilot.Tests/TestData/squad-sample/.squad/agents/scribe/charter.md @@ -0,0 +1 @@ +You are a scribe. Log all decisions and session activity. diff --git a/PolyPilot.Tests/TestData/squad-sample/.squad/agents/security-reviewer/charter.md b/PolyPilot.Tests/TestData/squad-sample/.squad/agents/security-reviewer/charter.md new file mode 100644 index 0000000000..0ffd2c207d --- /dev/null +++ b/PolyPilot.Tests/TestData/squad-sample/.squad/agents/security-reviewer/charter.md @@ -0,0 +1,9 @@ +You are a security auditor specializing in application security. + +Focus on: +- OWASP Top 10 vulnerabilities +- Authentication and authorization flaws +- Input validation and injection prevention +- Secrets management + +Rate each finding as Critical, High, Medium, or Low severity. diff --git a/PolyPilot.Tests/TestData/squad-sample/.squad/decisions.md b/PolyPilot.Tests/TestData/squad-sample/.squad/decisions.md new file mode 100644 index 0000000000..ea4cba81ca --- /dev/null +++ b/PolyPilot.Tests/TestData/squad-sample/.squad/decisions.md @@ -0,0 +1,5 @@ +# Team Decisions + +- Always use structured logging with ILogger +- Prefer async/await over blocking calls +- All public APIs must have XML doc comments diff --git a/PolyPilot.Tests/TestData/squad-sample/.squad/routing.md b/PolyPilot.Tests/TestData/squad-sample/.squad/routing.md new file mode 100644 index 0000000000..8cba938281 --- /dev/null +++ b/PolyPilot.Tests/TestData/squad-sample/.squad/routing.md @@ -0,0 +1,6 @@ +# Work Routing + +| Pattern | Owner | Reason | +|---------|-------|--------| +| `src/auth/**` | security-reviewer | Authentication code | +| `*.perf.cs` | perf-analyst | Performance-related files | diff --git a/PolyPilot.Tests/TestData/squad-sample/.squad/team.md b/PolyPilot.Tests/TestData/squad-sample/.squad/team.md new file mode 100644 index 0000000000..1e09a03187 --- /dev/null +++ b/PolyPilot.Tests/TestData/squad-sample/.squad/team.md @@ -0,0 +1,7 @@ +# The Review Squad + +| Member | Role | Expertise | +|--------|------|-----------| +| security-reviewer | Security Auditor | OWASP, CVE analysis, auth flaws | +| perf-analyst | Performance Analyst | Latency, throughput, memory profiling | +| scribe | Scribe | Session logging and decision capture | diff --git a/PolyPilot/Components/Layout/SessionListItem.razor b/PolyPilot/Components/Layout/SessionListItem.razor index 23021be64d..fa6527eaff 100644 --- a/PolyPilot/Components/Layout/SessionListItem.razor +++ b/PolyPilot/Components/Layout/SessionListItem.razor @@ -47,6 +47,10 @@ else { @Session.Name + @if (Meta?.Role == MultiAgentRole.Orchestrator) + { + 🎯 + } @if (Session.UnreadCount > 0) { @Session.UnreadCount @@ -55,7 +59,11 @@
- @Session.MessageCount msgs@(Session.Model != "resumed" ? $" β€’ {Session.Model}" : "") + @Session.MessageCount msgs@(Session.Model != "resumed" ? $" β€’ {CopilotService.GetEffectiveModel(Session.Name)}" : "") + @if (Meta?.PreferredModel != null && Meta.PreferredModel != Session.Model) + { + ⚑ + } @if (Session.IsProcessing) { β€’ @GetShortProcessingStatus() @@ -119,6 +127,52 @@ }
} + @{ + var currentGroup = Groups?.FirstOrDefault(g => g.Id == Meta?.GroupId); + } + @if (currentGroup is { IsMultiAgent: true }) + { + + @if (Meta?.Role == MultiAgentRole.Orchestrator) + { + + } + else + { + + } + + 🧠 Model +
+ + @{ + var effectiveModel = CopilotService.GetEffectiveModel(Session.Name); + var warnings = ModelCapabilities.GetRoleWarnings(effectiveModel, Meta?.Role ?? MultiAgentRole.Worker); + } + @if (warnings.Count > 0) + { + @foreach (var w in warnings) + { +
@w
+ } + } + else + { +
@ModelCapabilities.GetStrengths(effectiveModel)
+ } +
+ } + + @foreach (var wt in RepoManager.Worktrees) + { + var w = wt; + var repo = RepoManager.Repositories.FirstOrDefault(r => r.Id == w.RepoId); + + } + @if (!RepoManager.Worktrees.Any()) + { +
No worktrees available. Add a repository first.
+ } + + } + else if (pendingMultiAgentWorktree != null) + { + @* Step 2: Pick a preset or enter a custom name *@ +
+
+ 🌿 @pendingMultiAgentWorktree.Branch + +
+ @{ + var allPresets = UserPresets.GetAll(CopilotService.BaseDir, pendingMultiAgentWorktree?.Path); + var repoPresets = allPresets.Where(p => p.IsRepoLevel).ToArray(); + var builtInPresets = allPresets.Where(p => !p.IsRepoLevel && !p.IsUserDefined).ToArray(); + var userPresets = allPresets.Where(p => p.IsUserDefined).ToArray(); + } + @if (repoPresets.Any()) + { +
πŸ“‚ From Repo
+ @foreach (var preset in repoPresets) + { + var p = preset; + + } + } + @if (builtInPresets.Any()) + { + @if (repoPresets.Any()) + { +
βš™οΈ Built-in
+ } + @foreach (var preset in builtInPresets) + { + var p = preset; + + } + } + @if (userPresets.Any()) + { +
πŸ‘€ My Presets
+ @foreach (var preset in userPresets) + { + var p = preset; + + } + } +
or create empty team:
+ +
+ } else {
+ - +
} @@ -280,6 +377,18 @@ else { βŒ₯ } + @if (group.IsMultiAgent) + { + πŸ€– + } + @if (!string.IsNullOrEmpty(group.WorktreeId)) + { + var groupWt = RepoManager.Worktrees.FirstOrDefault(w => w.Id == group.WorktreeId); + if (groupWt != null) + { + 🌿 @groupWt.Branch + } + } @group.Name @groupSessions.Count @if (group.IsCollapsed && groupSessions.Any(s => s.IsProcessing)) @@ -317,14 +426,27 @@ else βž• New Session
+ @if (group.IsMultiAgent) + { + + } } else { + @if (!group.IsMultiAgent) + { + +
+ } } @@ -333,6 +455,73 @@ else } } + + @if (group.IsMultiAgent && !group.IsCollapsed) + { + var maGroupId = group.Id; + + } } @if (!group.IsCollapsed || !showGroupHeaders) @@ -441,6 +630,7 @@ else private string? resumeError = null; private string currentPage = "/"; private bool isAddingGroup = false; + private bool isAddingMultiAgentGroup = false; private string? openMenuSession = null; private string? openGroupMenuId = null; private CreateSessionForm? createSessionFormRef; @@ -501,6 +691,7 @@ else CopilotService.OnStateChanged += RefreshSessions; CopilotService.OnSessionComplete += HandleSessionComplete; CopilotService.OnUsageInfoChanged += HandleUsageInfoChanged; + CopilotService.OnOrchestratorPhaseChanged += HandleOrchestratorPhaseChanged; RepoManager.OnStateChanged += OnRepoStateChanged; RepoManager.Load(); RefreshSessions(); @@ -550,6 +741,15 @@ else }); } + private void HandleOrchestratorPhaseChanged(string groupId, OrchestratorPhase phase, string? detail) + { + if (phase == OrchestratorPhase.Complete) + _groupPhases.Remove(groupId); + else + _groupPhases[groupId] = (phase, detail); + InvokeAsync(StateHasChanged); + } + protected override async Task OnAfterRenderAsync(bool firstRender) { if (firstRender) @@ -615,6 +815,30 @@ else if (showPersistedSessions) LoadPersistedSessions(); } + private async Task HandleCreateGroup(GroupPreset preset) + { + if (isCreating) return; + isCreating = true; + createError = null; + try + { + var group = await CopilotService.CreateGroupFromPresetAsync(preset); + if (group != null) + { + CopilotService.ToggleGroupCollapsed(group.Id); + CopilotService.SaveUiState(currentPage); + } + } + catch (Exception ex) + { + createError = ex.Message; + } + finally + { + isCreating = false; + } + } + private async Task HandleCreateSession((string Name, string Model, string Directory, string? WorktreeId, string? InitialPrompt) args) { if (isCreating) return; @@ -825,22 +1049,169 @@ else private void StartAddGroup() { isAddingGroup = true; + isAddingMultiAgentGroup = false; + } + + private void StartAddMultiAgentGroup() + { + isAddingGroup = false; + isAddingMultiAgentGroup = true; + pendingMultiAgentWorktree = null; + } + + private void CancelMultiAgentCreation() + { + isAddingMultiAgentGroup = false; + pendingMultiAgentWorktree = null; + } + + private async Task SelectWorktreeForGroup(WorktreeInfo wt) + { + // Worktree selected β€” advance to step 2 (presets + custom name) + pendingMultiAgentWorktree = wt; + isAddingMultiAgentGroup = false; + isAddingGroup = false; + StateHasChanged(); + } + + private WorktreeInfo? pendingMultiAgentWorktree; + + private async Task CreateFromPresetWithWorktree(GroupPreset preset, WorktreeInfo wt) + { + pendingMultiAgentWorktree = null; + StateHasChanged(); + try + { + await CopilotService.CreateGroupFromPresetAsync(preset, + workingDirectory: wt.Path, + worktreeId: wt.Id, + repoId: wt.RepoId); + } + catch (Exception ex) + { + Console.WriteLine($"Failed to create from preset: {ex.Message}"); + } + } + + private static string ShortenPath(string path) + { + if (string.IsNullOrEmpty(path)) return ""; + // Show last 2 segments + var sep = Path.DirectorySeparatorChar; + var parts = path.TrimEnd(sep).Split(sep); + return parts.Length <= 2 ? path : "…" + sep + string.Join(sep, parts[^2..]); + } + + private void PromptSaveAsPreset(string groupId) + { + var group = CopilotService.Organization.Groups.FirstOrDefault(g => g.Id == groupId); + if (group == null) return; + + // Save with the group's current name and a generated description + var members = CopilotService.GetMultiAgentGroupMembers(groupId); + var desc = $"{group.OrchestratorMode} group with {members.Count} agent(s)"; + var preset = CopilotService.SaveGroupAsPreset(groupId, group.Name, desc, "⭐"); + if (preset != null) + { + var orchestratorName = CopilotService.GetOrchestratorSession(groupId); + if (orchestratorName != null) + { + var session = CopilotService.GetSession(orchestratorName); + if (session != null) + session.History.Add(ChatMessage.SystemMessage($"πŸ’Ύ Group saved as preset: \"{preset.Name}\" (🎯 {preset.OrchestratorModel} + πŸ‘· {string.Join(", ", preset.WorkerModels)})")); + } + } + StateHasChanged(); + } + + private void OnSidebarMultiAgentModeChanged(string groupId, ChangeEventArgs e) + { + if (e.Value is string val && Enum.TryParse(val, out var mode)) + { + CopilotService.SetMultiAgentMode(groupId, mode); + } + } + + private static string GetInputPlaceholder(MultiAgentMode mode) => mode switch + { + MultiAgentMode.Broadcast => "Send to all agents...", + MultiAgentMode.Sequential => "Send to agents in sequence...", + MultiAgentMode.Orchestrator => "Describe task for orchestrator...", + MultiAgentMode.OrchestratorReflect => "Describe goal for reflection cycle...", + _ => "Enter prompt..." + }; + + private Dictionary _groupMaxIterations = new(); + + private int GetGroupMaxIterations(string groupId) + { + if (_groupMaxIterations.TryGetValue(groupId, out var val)) return val; + return 5; + } + + private void SetGroupMaxIterations(string groupId, ChangeEventArgs e) + { + if (int.TryParse(e.Value?.ToString(), out var val) && val >= 1) + _groupMaxIterations[groupId] = val; + } + + private async Task SendToSidebarMultiAgentGroup(string groupId) + { + var inputId = $"sidebar-ma-input-{groupId}"; + var prompt = await JS.InvokeAsync("getElementValue", inputId); + if (string.IsNullOrWhiteSpace(prompt)) return; + + await JS.InvokeVoidAsync("clearElementValue", inputId); + + // Auto-start reflection cycle when in Reflect mode + var group = CopilotService.Organization.Groups.FirstOrDefault(g => g.Id == groupId); + if (group?.OrchestratorMode == MultiAgentMode.OrchestratorReflect) + { + var maxIter = GetGroupMaxIterations(groupId); + CopilotService.StartGroupReflection(groupId, prompt.Trim(), maxIter); + } + + try + { + _ = CopilotService.SendToMultiAgentGroupAsync(groupId, prompt.Trim()).ContinueWith(t => + { + if (t.IsFaulted) + InvokeAsync(() => Console.WriteLine($"Error sending to multi-agent group: {t.Exception?.InnerException?.Message}")); + }); + } + catch (Exception ex) + { + Console.WriteLine($"Error sending to multi-agent group: {ex.Message}"); + } } private async Task CommitNewGroup() { var name = await JS.InvokeAsync("getElementValue", "newGroupInput"); + var wt = pendingMultiAgentWorktree; isAddingGroup = false; + isAddingMultiAgentGroup = false; + pendingMultiAgentWorktree = null; if (!string.IsNullOrWhiteSpace(name)) { - CopilotService.CreateGroup(name.Trim()); + if (wt != null) + { + // Multi-agent group with worktree + CopilotService.CreateMultiAgentGroup(name.Trim(), + worktreeId: wt.Id, + repoId: wt.RepoId); + } + else + { + CopilotService.CreateGroup(name.Trim()); + } } } private async Task HandleNewGroupKeyDown(KeyboardEventArgs e) { if (e.Key == "Enter") await CommitNewGroup(); - else if (e.Key == "Escape") isAddingGroup = false; + else if (e.Key == "Escape") { isAddingGroup = false; isAddingMultiAgentGroup = false; pendingMultiAgentWorktree = null; } } private void ToggleSessionMenu(string sessionName) @@ -1376,6 +1747,7 @@ Important conventions: CopilotService.OnStateChanged -= RefreshSessions; CopilotService.OnSessionComplete -= HandleSessionComplete; CopilotService.OnUsageInfoChanged -= HandleUsageInfoChanged; + CopilotService.OnOrchestratorPhaseChanged -= HandleOrchestratorPhaseChanged; RepoManager.OnStateChanged -= OnRepoStateChanged; } } diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor.css b/PolyPilot/Components/Layout/SessionSidebar.razor.css index 473a6aea4d..8b1a233668 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor.css +++ b/PolyPilot/Components/Layout/SessionSidebar.razor.css @@ -167,6 +167,7 @@ /* Sort/group toolbar */ .sidebar-toolbar { display: flex; + flex-wrap: wrap; align-items: center; gap: 0.1rem; padding: 0.25rem 0.5rem; @@ -202,10 +203,11 @@ gap: 0.1rem; margin-left: auto; flex-shrink: 0; + overflow: hidden; } .new-group-input { - margin-left: auto; + flex-basis: 100%; font-size: var(--type-caption1); color: var(--text-primary); background: var(--control-border); @@ -249,6 +251,18 @@ .group-count::before { content: "("; } .group-count::after { content: ")"; } +.group-worktree-badge { + font-size: var(--type-caption2); + color: var(--text-dim); + background: var(--bg-tertiary); + padding: 0 0.25rem; + border-radius: 3px; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + max-width: 100px; +} + .group-busy-dot { width: 6px; height: 6px; @@ -362,6 +376,151 @@ margin-right: 0.15rem; } +/* Sidebar multi-agent controls */ +.sidebar-ma-controls { + display: flex; + flex-direction: column; + gap: 0.3rem; + padding: 0.3rem 0.75rem 0.4rem; + background: rgba(59, 130, 246, 0.05); + border-bottom: 1px solid var(--control-border); +} + +/* Orchestrator phase indicator */ +.phase-indicator { + font-size: 0.75rem; + color: var(--accent-primary); + animation: phase-pulse 2s ease-in-out infinite; +} + +@keyframes phase-pulse { + 0%, 100% { opacity: 0.7; } + 50% { opacity: 1; } +} + +.group-reflect-status { + display: flex; + align-items: center; + gap: 0.4rem; + padding: 0.25rem 0; + font-size: var(--type-footnote); + color: var(--text-dim); +} + +.reflect-iter { + font-weight: 600; + color: var(--accent-primary); + white-space: nowrap; +} + +.reflect-goal { + flex: 1; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.reflect-stop-btn, .reflect-pause-btn { + all: unset; + cursor: pointer; + padding: 0.15rem 0.3rem; + border-radius: 4px; + font-size: 0.7rem; +} +.reflect-stop-btn:hover, .reflect-pause-btn:hover { + background: var(--control-bg); +} + +.sidebar-ma-mode-select { + width: 100%; + padding: 0.3rem 0.4rem; + border: 1px solid var(--control-border); + border-radius: 5px; + background: var(--control-bg); + color: var(--text-primary); + font-size: var(--type-footnote); + -webkit-appearance: none; + appearance: none; + background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='10' height='10' viewBox='0 0 24 24' fill='none' stroke='%23a0b4cc' stroke-width='2.5' stroke-linecap='round' stroke-linejoin='round'%3E%3Cpolyline points='6 9 12 15 18 9'/%3E%3C/svg%3E"); + background-repeat: no-repeat; + background-position: right 0.35rem center; +} + +.sidebar-ma-mode-select option { + background: var(--bg-primary); + color: var(--text-primary); +} + +.reflect-iterations-bar { + display: flex; + align-items: center; + gap: 0.3rem; + padding: 0.15rem 0; +} + +.reflect-iter-label { + font-size: var(--type-caption1); + color: var(--text-dim); + white-space: nowrap; +} + +.reflect-iter-input { + width: 50px; + padding: 0.15rem 0.3rem; + border: 1px solid var(--control-border); + border-radius: 4px; + background: var(--control-bg); + color: var(--text-primary); + font-size: var(--type-footnote); + text-align: center; +} + +.sidebar-ma-input-bar { + display: flex; + gap: 0.25rem; + align-items: flex-end; +} + +.sidebar-ma-input { + flex: 1; + min-width: 0; + padding: 0.25rem 0.4rem; + border: 1px solid var(--control-border); + border-radius: 5px; + background: var(--control-bg); + color: var(--text-primary); + font-size: var(--type-caption1); + font-family: inherit; + resize: vertical; + max-height: 80px; + box-sizing: border-box; +} + +.sidebar-ma-input::placeholder { + color: var(--text-dim); +} + +.sidebar-ma-input:focus { + outline: none; + border-color: var(--accent-primary); +} + +.sidebar-ma-send-btn { + all: unset; + padding: 0.25rem 0.4rem; + border-radius: 5px; + background: var(--accent-primary); + color: #fff; + cursor: pointer; + font-size: var(--type-caption1); + line-height: 1; + flex-shrink: 0; +} + +.sidebar-ma-send-btn:hover { + filter: brightness(1.15); +} + .group-worktree-btn { all: unset; font-size: var(--type-callout); @@ -1288,3 +1447,176 @@ } .bug-report-submit:hover { opacity: 0.85; } .bug-report-submit:disabled { opacity: 0.5; cursor: not-allowed; } + +/* === Preset & Worktree Picker === */ +.worktree-picker { + display: flex; + flex-direction: column; + flex-basis: 100%; + gap: 0.1rem; + padding: 0.3rem; + background: var(--bg-tertiary); + border: 1px solid var(--control-border); + border-radius: 6px; + margin-top: 0.2rem; +} + +.worktree-picker-header { + display: flex; + align-items: center; + justify-content: space-between; + padding: 0.15rem 0.3rem; + font-size: var(--type-footnote); + color: var(--text-dim); + font-weight: 600; +} + +.worktree-cancel-btn { + all: unset; + cursor: pointer; + font-size: 0.7rem; + color: var(--text-dim); + padding: 0.1rem 0.3rem; + border-radius: 3px; +} +.worktree-cancel-btn:hover { background: var(--control-bg); color: var(--text-primary); } + +.worktree-item { + all: unset; + display: flex; + flex-direction: column; + gap: 0.05rem; + padding: 0.25rem 0.4rem; + border-radius: 4px; + cursor: pointer; + font-size: var(--type-footnote); +} +.worktree-item:hover { background: var(--control-bg); } + +.worktree-branch { + color: var(--text-primary); + font-weight: 500; +} + +.worktree-path { + font-size: var(--type-caption1); + color: var(--text-dim); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.worktree-empty { + padding: 0.4rem; + font-size: var(--type-footnote); + color: var(--text-dim); + text-align: center; +} + +.preset-item { + all: unset; + display: flex; + align-items: center; + gap: 0.4rem; + padding: 0.25rem 0.4rem; + border-radius: 4px; + cursor: pointer; + font-size: var(--type-footnote); +} +.preset-item:hover { background: var(--control-bg); } + +.preset-emoji { font-size: 0.9rem; flex-shrink: 0; } + +.preset-name { + color: var(--text-primary); + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} + +.preset-section-header { + font-size: var(--type-caption2); + color: var(--text-muted); + padding: 0.15rem 0.4rem; + font-weight: 600; + letter-spacing: 0.02em; +} + +.preset-repo { + border-left: 2px solid var(--accent-blue, #58a6ff); + padding-left: 0.3rem; +} + +.preset-badge { + font-size: 0.65rem; + flex-shrink: 0; + opacity: 0.7; +} + +.preset-divider { + font-size: var(--type-caption2); + color: var(--text-muted); + text-align: center; + padding: 0.2rem 0; + border-top: 1px solid var(--control-border); + margin-top: 0.1rem; +} + +.preset-models { + font-size: var(--type-caption1); + color: var(--text-dim); + opacity: 0.8; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} + +/* === Group Diagnostics === */ +.group-diagnostics { + display: flex; + flex-direction: column; + gap: 0.15rem; + padding: 0.2rem 0; +} + +.group-diagnostics span { + font-size: var(--type-caption1); + line-height: 1.3; +} + +.diag-error { color: var(--accent-primary); } +.diag-warning { color: #fbbf24; } +.diag-info { color: var(--text-dim); } + +.save-preset-btn { + all: unset; + font-size: var(--type-caption1); + color: var(--text-dim); + cursor: pointer; + padding: 0.2rem 0; + opacity: 0.7; +} +.save-preset-btn:hover { opacity: 1; color: var(--accent-primary); } + +.reflect-score { + font-size: var(--type-caption1); + color: #4ade80; + white-space: nowrap; +} + +.adjust-banner { + display: flex; + flex-direction: column; + gap: 0.1rem; + padding: 0.2rem 0.4rem; + margin: 0.1rem 0; + background: rgba(251, 191, 36, 0.1); + border-left: 2px solid #fbbf24; + border-radius: 0.2rem; + font-size: var(--type-caption1); + color: var(--text-secondary); +} + +.adjust-banner span { + line-height: 1.3; +} diff --git a/PolyPilot/Components/ModelSelector.razor b/PolyPilot/Components/ModelSelector.razor index 431277f392..8b2a83f0f6 100644 --- a/PolyPilot/Components/ModelSelector.razor +++ b/PolyPilot/Components/ModelSelector.razor @@ -11,7 +11,7 @@ @if (isOpen) { -
+
@foreach (var model in Models) { var info = GetDisplayInfo(model); diff --git a/PolyPilot/Components/ModelSelector.razor.css b/PolyPilot/Components/ModelSelector.razor.css index 12804df67b..650cbf9654 100644 --- a/PolyPilot/Components/ModelSelector.razor.css +++ b/PolyPilot/Components/ModelSelector.razor.css @@ -107,8 +107,21 @@ font-size: var(--type-callout); } + .model-selector-dropdown { + position: fixed; + top: auto; + bottom: auto; + left: 50%; + transform: translateX(-50%); + width: 90vw; + max-width: 280px; + max-height: 50vh; + z-index: 2000; + } + .model-option { - padding: 0.35rem 0.5rem; + padding: 0.5rem 0.6rem; font-size: var(--type-callout); + min-height: 44px; } } diff --git a/PolyPilot/Components/Pages/Dashboard.razor b/PolyPilot/Components/Pages/Dashboard.razor index 87d442958e..7e4a08df92 100644 --- a/PolyPilot/Components/Pages/Dashboard.razor +++ b/PolyPilot/Components/Pages/Dashboard.razor @@ -65,6 +65,46 @@ } else if (expandedSession != null) { + var expandedMeta = CopilotService.Organization.Sessions.FirstOrDefault(m => m.SessionName == expandedSession); + var expandedGroup = expandedMeta != null ? CopilotService.Organization.Groups.FirstOrDefault(g => g.Id == expandedMeta.GroupId && g.IsMultiAgent) : null; + @if (expandedGroup != null) + { + var grpProgress = CopilotService.GetMultiAgentProgress(expandedGroup.Id); + var expandedGroupId = expandedGroup.Id; +
+ πŸ€– @expandedGroup.Name + + @if (expandedGroup.OrchestratorMode == MultiAgentMode.OrchestratorReflect) + { + + + + + } + @if (grpProgress.Processing > 0) + { + @grpProgress.Completed/@grpProgress.Total done + } + @if (_groupPhases.TryGetValue(expandedGroupId, out var expPhase)) + { + @PhaseLabel(expPhase.Phase, expPhase.Detail) + } + + +
+ } @* Keep-alive: render all active sessions, JS owns 'active' class for instant switching *@ @foreach (var session in sessions) { @@ -134,9 +174,17 @@ @onblur="CommitDashGroup" @onkeydown="HandleDashGroupKeyDown" /> } + else if (isAddingMultiAgentGroup) + { + + } else { + } +
+ }
@foreach (var session in groupSessions) { @@ -253,6 +349,7 @@ private string? cardMenuSession; private string? cardRenamingSession; private bool isAddingDashGroup; + private bool isAddingMultiAgentGroup; private string? _focusedInputId; private string? _lastActiveSession; private int _cursorStart; @@ -261,6 +358,7 @@ private string? initError; private bool _initializationComplete = false; private readonly Dictionary _fiestaStreamingMessages = new(StringComparer.Ordinal); + private Dictionary _groupPhases = new(); protected override async Task OnInitializedAsync() { @@ -286,6 +384,7 @@ CopilotService.OnError += HandleError; CopilotService.OnTurnStart += HandleTurnStart; CopilotService.OnTurnEnd += HandleTurnEnd; + CopilotService.OnOrchestratorPhaseChanged += HandleOrchestratorPhaseChanged; FiestaService.OnStateChanged += HandleFiestaStateChanged; FiestaService.OnHostTaskUpdate += HandleFiestaTaskUpdate; @@ -983,6 +1082,24 @@ ScheduleRender(); } + private void HandleOrchestratorPhaseChanged(string groupId, OrchestratorPhase phase, string? detail) + { + if (phase == OrchestratorPhase.Complete) + _groupPhases.Remove(groupId); + else + _groupPhases[groupId] = (phase, detail); + ScheduleRender(); + } + + private static string PhaseLabel(OrchestratorPhase phase, string? detail) => phase switch + { + OrchestratorPhase.Planning => "🎯 Planning...", + OrchestratorPhase.Dispatching => "πŸ“‘ Dispatching..." + (detail != null ? $" {detail}" : ""), + OrchestratorPhase.WaitingForWorkers => "⏳ Waiting for workers...", + OrchestratorPhase.Synthesizing => "πŸ”„ Synthesizing...", + _ => "" + }; + private void DismissError(string sessionName) { errorBySession.Remove(sessionName); @@ -2068,6 +2185,119 @@ else if (e.Key == "Escape") isAddingDashGroup = false; } + private async Task CommitMultiAgentGroup() + { + var name = await JS.InvokeAsync("getElementValue", "dashNewMultiAgentGroupInput"); + isAddingMultiAgentGroup = false; + if (!string.IsNullOrWhiteSpace(name)) + { + CopilotService.CreateMultiAgentGroup(name.Trim()); + } + } + + private async Task HandleMultiAgentGroupKeyDown(KeyboardEventArgs e) + { + if (e.Key == "Enter") await CommitMultiAgentGroup(); + else if (e.Key == "Escape") isAddingMultiAgentGroup = false; + } + + private void OnMultiAgentModeChanged(string groupId, ChangeEventArgs e) + { + if (e.Value is string val && Enum.TryParse(val, out var mode)) + { + CopilotService.SetMultiAgentMode(groupId, mode); + } + } + + private static string GetInputPlaceholder(MultiAgentMode mode) => mode switch + { + MultiAgentMode.Broadcast => "Send to all agents...", + MultiAgentMode.Sequential => "Send to agents in sequence...", + MultiAgentMode.Orchestrator => "Describe task for orchestrator...", + MultiAgentMode.OrchestratorReflect => "Describe goal for reflection cycle...", + _ => "Enter prompt..." + }; + + private void AutoStartReflectionIfNeeded(string groupId, string prompt) + { + var group = CopilotService.Organization.Groups.FirstOrDefault(g => g.Id == groupId); + if (group?.OrchestratorMode == MultiAgentMode.OrchestratorReflect) + { + var maxIter = GetMaxIterations(groupId); + CopilotService.StartGroupReflection(groupId, prompt, maxIter); + } + } + + private Dictionary _groupMaxIterations = new(); + + private int GetMaxIterations(string groupId) + { + if (_groupMaxIterations.TryGetValue(groupId, out var val)) return val; + return 5; + } + + private void SetMaxIterations(string groupId, ChangeEventArgs e) + { + if (int.TryParse(e.Value?.ToString(), out var val) && val >= 1) + _groupMaxIterations[groupId] = val; + } + + private async Task SendToMultiAgentGroup(string groupId) + { + var inputId = $"ma-input-{groupId}"; + var prompt = await JS.InvokeAsync("getElementValue", inputId); + if (string.IsNullOrWhiteSpace(prompt)) return; + + await JS.InvokeVoidAsync("clearElementValue", inputId); + AutoStartReflectionIfNeeded(groupId, prompt.Trim()); + + try + { + _ = CopilotService.SendToMultiAgentGroupAsync(groupId, prompt.Trim()).ContinueWith(t => + { + if (t.IsFaulted) + { + InvokeAsync(() => + { + Console.WriteLine($"Error sending to multi-agent group: {t.Exception?.InnerException?.Message}"); + }); + } + }); + } + catch (Exception ex) + { + Console.WriteLine($"Error sending to multi-agent group: {ex.Message}"); + } + } + + private async Task SendToExpandedMultiAgentGroup(string groupId) + { + var inputId = $"ma-input-expanded-{groupId}"; + var prompt = await JS.InvokeAsync("getElementValue", inputId); + if (string.IsNullOrWhiteSpace(prompt)) return; + + await JS.InvokeVoidAsync("clearElementValue", inputId); + AutoStartReflectionIfNeeded(groupId, prompt.Trim()); + + try + { + _ = CopilotService.SendToMultiAgentGroupAsync(groupId, prompt.Trim()).ContinueWith(t => + { + if (t.IsFaulted) + { + InvokeAsync(() => + { + Console.WriteLine($"Error sending to multi-agent group: {t.Exception?.InnerException?.Message}"); + }); + } + }); + } + catch (Exception ex) + { + Console.WriteLine($"Error sending to multi-agent group: {ex.Message}"); + } + } + private async Task SaveDraftsAndCursor() { var json = await JS.InvokeAsync("eval", @" @@ -2513,6 +2743,7 @@ CopilotService.OnError -= HandleError; CopilotService.OnTurnStart -= HandleTurnStart; CopilotService.OnTurnEnd -= HandleTurnEnd; + CopilotService.OnOrchestratorPhaseChanged -= HandleOrchestratorPhaseChanged; FiestaService.OnStateChanged -= HandleFiestaStateChanged; FiestaService.OnHostTaskUpdate -= HandleFiestaTaskUpdate; _renderTimer?.Dispose(); diff --git a/PolyPilot/Components/Pages/Dashboard.razor.css b/PolyPilot/Components/Pages/Dashboard.razor.css index 0304364e3f..e91f424fdf 100644 --- a/PolyPilot/Components/Pages/Dashboard.razor.css +++ b/PolyPilot/Components/Pages/Dashboard.razor.css @@ -391,6 +391,118 @@ color: var(--text-muted); } +/* Multi-agent group styles */ +.group-divider.multi-agent-group { + border-bottom-color: rgba(99, 102, 241, 0.3); + background: linear-gradient(90deg, rgba(99, 102, 241, 0.05) 0%, transparent 100%); +} +.group-divider.multi-agent-group:hover { + border-bottom-color: rgba(99, 102, 241, 0.5); +} +.group-divider-badge { + font-size: var(--type-callout); + flex-shrink: 0; +} +.group-divider-mode { + flex-shrink: 0; +} +.ma-mode-select { + background: var(--control-bg); + color: var(--text-primary); + border: 1px solid var(--border-color); + border-radius: 4px; + font-size: var(--type-footnote); + padding: 0.2rem 0.4rem; + cursor: pointer; + min-width: 120px; +} + +.reflect-iter-inline { + display: flex; + align-items: center; + gap: 0.25rem; + font-size: var(--type-caption1); + color: var(--text-dim); + white-space: nowrap; +} + +.reflect-iter-input { + width: 50px; + padding: 0.15rem 0.3rem; + border: 1px solid var(--border-color); + border-radius: 4px; + background: var(--control-bg); + color: var(--text-primary); + font-size: var(--type-footnote); + text-align: center; +} + +.group-divider-progress { + font-size: var(--type-caption1); + color: rgba(99, 102, 241, 0.8); + font-weight: 500; +} + +/* Orchestrator phase indicator */ +.phase-indicator { + font-size: 0.75rem; + color: var(--accent-primary); + white-space: nowrap; + animation: phase-pulse 2s ease-in-out infinite; +} + +@keyframes phase-pulse { + 0%, 100% { opacity: 0.7; } + 50% { opacity: 1; } +} +.multi-agent-input-bar { + display: flex; + align-items: flex-end; + gap: 0.5rem; + padding: 0.5rem 0.25rem; + margin-bottom: 0.5rem; +} +.ma-broadcast-input { + flex: 1; + background: var(--control-bg); + color: var(--text-primary); + border: 1px solid var(--border-color); + border-radius: 8px; + padding: 0.5rem 0.75rem; + font-size: var(--type-body); + font-family: inherit; + resize: none; + min-height: 2.2rem; + line-height: 1.4; +} +.ma-broadcast-input:focus { + outline: none; + border-color: rgba(99, 102, 241, 0.6); + box-shadow: 0 0 0 2px rgba(99, 102, 241, 0.15); +} +.ma-broadcast-input::placeholder { + color: var(--text-muted); +} +.ma-send-btn { + background: rgba(99, 102, 241, 0.15); + color: rgba(99, 102, 241, 0.9); + border: 1px solid rgba(99, 102, 241, 0.3); + border-radius: 8px; + padding: 0.45rem 0.85rem; + font-size: var(--type-callout); + font-weight: 500; + cursor: pointer; + white-space: nowrap; + transition: all 0.15s ease; +} +.ma-send-btn:hover { + background: rgba(99, 102, 241, 0.25); + border-color: rgba(99, 102, 241, 0.5); +} +.multi-agent-btn { + color: rgba(99, 102, 241, 0.9) !important; +} + /* Pinned card styles */ .session-card.pinned { border-color: rgba(251,191,36,0.25); @@ -1275,3 +1387,49 @@ height: 100%; width: 100%; } + +/* Multi-agent expanded toolbar */ +.ma-expanded-toolbar { + display: flex; + align-items: center; + gap: 0.5rem; + padding: 0.35rem 0.75rem; + height: 40px; + position: sticky; + top: 0; + z-index: 10; + background: rgba(99, 102, 241, 0.08); + border-bottom: 1px solid rgba(99, 102, 241, 0.2); + flex-shrink: 0; +} +.ma-expanded-toolbar-label { + font-size: var(--type-callout); + font-weight: 600; + color: rgba(99, 102, 241, 0.9); + white-space: nowrap; +} +.ma-expanded-toolbar-progress { + font-size: var(--type-caption1); + color: rgba(99, 102, 241, 0.8); + font-weight: 500; + white-space: nowrap; +} +.ma-expanded-toolbar-input { + flex: 1; + background: var(--control-bg); + color: var(--text-primary); + border: 1px solid var(--border-color); + border-radius: 6px; + padding: 0.25rem 0.5rem; + font-size: var(--type-callout); + font-family: inherit; + min-width: 0; +} +.ma-expanded-toolbar-input:focus { + outline: none; + border-color: rgba(99, 102, 241, 0.6); + box-shadow: 0 0 0 2px rgba(99, 102, 241, 0.15); +} +.ma-expanded-toolbar-input::placeholder { + color: var(--text-muted); +} diff --git a/PolyPilot/Components/SessionCard.razor.css b/PolyPilot/Components/SessionCard.razor.css index f7f3a16833..657b232bbb 100644 --- a/PolyPilot/Components/SessionCard.razor.css +++ b/PolyPilot/Components/SessionCard.razor.css @@ -473,6 +473,19 @@ background: var(--hover-bg); } +.card-role-badge { + font-size: var(--type-caption1, 0.7rem); + flex-shrink: 0; + opacity: 0.85; +} + +.card-role-badge.orchestrator { + color: rgba(99, 102, 241, 0.9); +} + +.card-role-badge.worker { + color: var(--text-dim); +} .unread-badge { display: inline-flex; diff --git a/PolyPilot/Models/BridgeMessages.cs b/PolyPilot/Models/BridgeMessages.cs index 54359f32d0..4d5d732afc 100644 --- a/PolyPilot/Models/BridgeMessages.cs +++ b/PolyPilot/Models/BridgeMessages.cs @@ -81,6 +81,9 @@ public static class BridgeMessageTypes public const string AbortSession = "abort_session"; public const string OrganizationCommand = "organization_command"; public const string ListDirectories = "list_directories"; + public const string MultiAgentBroadcast = "multi_agent_broadcast"; + public const string MultiAgentCreateGroup = "multi_agent_create_group"; + public const string MultiAgentSetRole = "multi_agent_set_role"; public const string ChangeModel = "change_model"; public const string RenameSession = "rename_session"; @@ -97,6 +100,7 @@ public static class BridgeMessageTypes // Server β†’ Client (response) public const string DirectoriesList = "directories_list"; + public const string MultiAgentProgress = "multi_agent_progress"; // Client β†’ Server (image fetch) public const string FetchImage = "fetch_image"; @@ -331,6 +335,37 @@ public class AttentionNeededPayload public string Summary { get; set; } = ""; } +// --- Multi-agent orchestration payloads --- + +public class MultiAgentBroadcastPayload +{ + public string GroupId { get; set; } = ""; + public string Message { get; set; } = ""; +} + +public class MultiAgentCreateGroupPayload +{ + public string Name { get; set; } = ""; + public string Mode { get; set; } = "Broadcast"; + public string? OrchestratorPrompt { get; set; } + public List? SessionNames { get; set; } +} + +public class MultiAgentProgressPayload +{ + public string GroupId { get; set; } = ""; + public int TotalSessions { get; set; } + public int CompletedSessions { get; set; } + public int ProcessingSessions { get; set; } + public List CompletedSessionNames { get; set; } = new(); +} + +public class MultiAgentSetRolePayload +{ + public string SessionName { get; set; } = ""; + public string Role { get; set; } = "Worker"; +} + // --- Fiesta payloads --- public class FiestaAssignPayload diff --git a/PolyPilot/Models/ModelCapabilities.cs b/PolyPilot/Models/ModelCapabilities.cs new file mode 100644 index 0000000000..98fcdaf294 --- /dev/null +++ b/PolyPilot/Models/ModelCapabilities.cs @@ -0,0 +1,364 @@ +namespace PolyPilot.Models; + +/// +/// Lightweight model capability flags for multi-agent role assignment warnings. +/// No external API calls β€” purely static metadata based on known model families. +/// +[Flags] +public enum ModelCapability +{ + None = 0, + CodeExpert = 1 << 0, + ReasoningExpert = 1 << 1, + Fast = 1 << 2, + CostEfficient = 1 << 3, + ToolUse = 1 << 4, + Vision = 1 << 5, + LargeContext = 1 << 6, +} + +/// +/// Static registry of model capabilities for UX warnings during agent assignment. +/// +public static class ModelCapabilities +{ + private static readonly Dictionary _registry = new(StringComparer.OrdinalIgnoreCase) + { + // Anthropic + ["claude-opus-4.6"] = (ModelCapability.ReasoningExpert | ModelCapability.CodeExpert | ModelCapability.ToolUse | ModelCapability.LargeContext, "Best reasoning, complex orchestration"), + ["claude-opus-4.5"] = (ModelCapability.ReasoningExpert | ModelCapability.CodeExpert | ModelCapability.ToolUse | ModelCapability.LargeContext, "Deep reasoning, creative coding"), + ["claude-sonnet-4.5"] = (ModelCapability.CodeExpert | ModelCapability.ToolUse | ModelCapability.Fast, "Fast coding, good balance"), + ["claude-sonnet-4"] = (ModelCapability.CodeExpert | ModelCapability.ToolUse | ModelCapability.Fast, "Fast coding, good balance"), + ["claude-haiku-4.5"] = (ModelCapability.Fast | ModelCapability.CostEfficient | ModelCapability.ToolUse, "Quick tasks, cost-efficient"), + + // OpenAI + ["gpt-5"] = (ModelCapability.ReasoningExpert | ModelCapability.CodeExpert | ModelCapability.ToolUse | ModelCapability.LargeContext, "Strong reasoning and coding"), + ["gpt-5.1"] = (ModelCapability.ReasoningExpert | ModelCapability.CodeExpert | ModelCapability.ToolUse | ModelCapability.LargeContext, "Strong reasoning and coding"), + ["gpt-5.1-codex"] = (ModelCapability.CodeExpert | ModelCapability.ToolUse | ModelCapability.Fast, "Optimized for code generation"), + ["gpt-5.1-codex-mini"] = (ModelCapability.CodeExpert | ModelCapability.Fast | ModelCapability.CostEfficient, "Fast code, cost-efficient"), + ["gpt-4.1"] = (ModelCapability.Fast | ModelCapability.CostEfficient | ModelCapability.ToolUse, "Fast and cheap, good for evaluation"), + ["gpt-5-mini"] = (ModelCapability.Fast | ModelCapability.CostEfficient, "Quick tasks, budget-friendly"), + + // Google + ["gemini-3-pro"] = (ModelCapability.ReasoningExpert | ModelCapability.LargeContext | ModelCapability.Vision, "Strong reasoning, large context, multimodal"), + ["gemini-3-pro-preview"] = (ModelCapability.ReasoningExpert | ModelCapability.LargeContext | ModelCapability.Vision, "Strong reasoning, large context, multimodal"), + }; + + /// Get capabilities for a model. Returns None for unknown models. + public static ModelCapability GetCapabilities(string modelSlug) + { + if (string.IsNullOrEmpty(modelSlug)) return ModelCapability.None; + if (_registry.TryGetValue(modelSlug, out var entry)) return entry.Caps; + + // Fuzzy match by prefix + foreach (var (key, val) in _registry) + if (modelSlug.StartsWith(key, StringComparison.OrdinalIgnoreCase) || + key.StartsWith(modelSlug, StringComparison.OrdinalIgnoreCase)) + return val.Caps; + + // Name-pattern inference for new/unknown models + return InferFromName(modelSlug); + } + + /// + /// Infer capabilities from model name patterns for unknown models. + /// Handles new model releases gracefully without registry updates. + /// + internal static ModelCapability InferFromName(string slug) + { + var lower = slug.ToLowerInvariant(); + var caps = ModelCapability.None; + + // Family inference + if (lower.Contains("opus")) caps |= ModelCapability.ReasoningExpert | ModelCapability.CodeExpert | ModelCapability.ToolUse; + else if (lower.Contains("sonnet")) caps |= ModelCapability.CodeExpert | ModelCapability.ToolUse | ModelCapability.Fast; + else if (lower.Contains("haiku")) caps |= ModelCapability.Fast | ModelCapability.CostEfficient; + else if (lower.Contains("gemini")) caps |= ModelCapability.ReasoningExpert | ModelCapability.LargeContext | ModelCapability.Vision; + + // Variant inference + if (lower.Contains("codex")) caps |= ModelCapability.CodeExpert; + if (lower.Contains("mini")) caps |= ModelCapability.Fast | ModelCapability.CostEfficient; + if (lower.Contains("max")) caps |= ModelCapability.ReasoningExpert; + + return caps; + } + + /// Get a short description of model strengths. + public static string GetStrengths(string modelSlug) + { + if (_registry.TryGetValue(modelSlug, out var entry)) return entry.Strengths; + + foreach (var (key, val) in _registry) + if (modelSlug.StartsWith(key, StringComparison.OrdinalIgnoreCase) || + key.StartsWith(modelSlug, StringComparison.OrdinalIgnoreCase)) + return val.Strengths; + + // Generate description from inferred capabilities + var inferred = InferFromName(modelSlug); + if (inferred != ModelCapability.None) + { + var parts = new List(); + if (inferred.HasFlag(ModelCapability.ReasoningExpert)) parts.Add("reasoning"); + if (inferred.HasFlag(ModelCapability.CodeExpert)) parts.Add("code"); + if (inferred.HasFlag(ModelCapability.Fast)) parts.Add("fast"); + if (inferred.HasFlag(ModelCapability.CostEfficient)) parts.Add("cost-efficient"); + if (inferred.HasFlag(ModelCapability.Vision)) parts.Add("multimodal"); + if (inferred.HasFlag(ModelCapability.LargeContext)) parts.Add("large context"); + return $"Inferred: {string.Join(", ", parts)}"; + } + + return "Unknown model"; + } + + /// + /// Get warnings when assigning a model to a multi-agent role. + /// Returns empty list if no issues detected. + /// + public static List GetRoleWarnings(string modelSlug, MultiAgentRole role) + { + var warnings = new List(); + var caps = GetCapabilities(modelSlug); + + if (caps == ModelCapability.None) + { + warnings.Add($"Unknown model '{modelSlug}' β€” capabilities not verified"); + return warnings; + } + + if (role == MultiAgentRole.Orchestrator) + { + if (!caps.HasFlag(ModelCapability.ReasoningExpert)) + warnings.Add("⚠️ This model may lack strong reasoning for orchestration. Consider claude-opus or gpt-5."); + if (caps.HasFlag(ModelCapability.CostEfficient) && !caps.HasFlag(ModelCapability.ReasoningExpert)) + warnings.Add("πŸ’° Cost-efficient models may produce shallow plans. Best for workers, not orchestrators."); + } + + if (role == MultiAgentRole.Worker) + { + if (!caps.HasFlag(ModelCapability.ToolUse) && !caps.HasFlag(ModelCapability.CodeExpert)) + warnings.Add("⚠️ This model may not support tool use well. Worker tasks may require tool interaction."); + } + + return warnings; + } +} + +/// +/// Pre-configured multi-agent group templates for quick setup. +/// +public record GroupPreset(string Name, string Description, string Emoji, MultiAgentMode Mode, + string OrchestratorModel, string[] WorkerModels) +{ + /// Whether this is a user-created preset (vs built-in). + public bool IsUserDefined { get; init; } + + /// Whether this preset was loaded from a repo-level team definition (.squad/). + public bool IsRepoLevel { get; init; } + + /// Path to the source directory (e.g., ".squad/") for repo-level presets. + public string? SourcePath { get; init; } + + /// + /// Per-worker system prompts, indexed to match WorkerModels. + /// Null or shorter array = remaining workers get generic prompt. + /// + public string?[]? WorkerSystemPrompts { get; init; } + + /// + /// Shared context from decisions.md or similar, prepended to all worker prompts. + /// + public string? SharedContext { get; init; } + + /// + /// Routing rules from routing.md, injected into orchestrator planning prompt. + /// + public string? RoutingContext { get; init; } + + public static readonly GroupPreset[] BuiltIn = new[] + { + new GroupPreset( + "Code Review Team", "Opus orchestrates, specialized reviewers execute", + "πŸ”", MultiAgentMode.Orchestrator, + "claude-opus-4.6", new[] { "gpt-5.1-codex", "claude-sonnet-4.5" }) + { + WorkerSystemPrompts = new[] + { + "You are a code correctness reviewer. Focus on logic errors, edge cases, off-by-one bugs, null safety, and incorrect assumptions. Flag anything that could cause runtime failures or data corruption.", + "You are a security and architecture reviewer. Focus on vulnerabilities (injection, auth flaws, data exposure), architectural anti-patterns, and maintainability issues. Suggest concrete fixes." + } + }, + + new GroupPreset( + "Multi-Perspective Analysis", "Different models analyze the same problem", + "πŸ”¬", MultiAgentMode.Broadcast, + "claude-opus-4.6", new[] { "gpt-5", "gemini-3-pro", "claude-sonnet-4.5" }), + + new GroupPreset( + "Quick Reflection Cycle", "Fast workers + smart evaluator for iterative refinement", + "πŸ”„", MultiAgentMode.OrchestratorReflect, + "claude-opus-4.6", new[] { "gpt-4.1", "gpt-4.1", "gpt-5.1-codex-mini" }) + { + WorkerSystemPrompts = new[] + { + "You are an implementation specialist. Write clean, correct code. Focus on getting the logic right and handling edge cases.", + "You are a testing and validation specialist. Review solutions for correctness, write test cases, and identify gaps in coverage.", + "You are a documentation and UX specialist. Ensure code is well-documented, APIs are intuitive, and error messages are helpful." + } + }, + + new GroupPreset( + "Deep Research", "Strong reasoning models collaborate on complex problems", + "🧠", MultiAgentMode.Orchestrator, + "claude-opus-4.6", new[] { "gpt-5.1", "gemini-3-pro" }) + { + WorkerSystemPrompts = new[] + { + "You are a deep reasoning analyst. Break down complex problems methodically. Provide thorough analysis with evidence and citations where possible.", + "You are a creative problem solver. Explore unconventional approaches, challenge assumptions, and propose alternative solutions that others might miss." + } + }, + }; +} + +/// +/// Manages user-defined presets: save/load from ~/.polypilot/presets.json. +/// +public static class UserPresets +{ + private const string FileName = "presets.json"; + + public static List Load(string baseDir) + { + try + { + var path = Path.Combine(baseDir, FileName); + if (!File.Exists(path)) return new List(); + var json = File.ReadAllText(path); + return System.Text.Json.JsonSerializer.Deserialize>(json) ?? new(); + } + catch { return new List(); } + } + + public static void Save(string baseDir, List presets) + { + try + { + Directory.CreateDirectory(baseDir); + var json = System.Text.Json.JsonSerializer.Serialize(presets, + new System.Text.Json.JsonSerializerOptions { WriteIndented = true }); + File.WriteAllText(Path.Combine(baseDir, FileName), json); + } + catch { /* best-effort persistence */ } + } + + /// Get all presets: built-in + user-defined + repo-level (Squad). Repo overrides by name. + public static GroupPreset[] GetAll(string baseDir, string? repoWorkingDirectory = null) + { + var merged = new Dictionary(StringComparer.OrdinalIgnoreCase); + foreach (var p in GroupPreset.BuiltIn) merged[p.Name] = p; + foreach (var p in Load(baseDir)) merged[p.Name] = p; + if (repoWorkingDirectory != null) + { + foreach (var p in SquadDiscovery.Discover(repoWorkingDirectory)) + merged[p.Name] = p; + } + return merged.Values.ToArray(); + } + + /// Save the current multi-agent group as a reusable preset. + public static GroupPreset? SaveGroupAsPreset(string baseDir, string name, string description, + string emoji, SessionGroup group, List members, Func getEffectiveModel, + string? worktreeRoot = null) + { + var orchestrator = members.FirstOrDefault(m => m.Role == MultiAgentRole.Orchestrator); + var workers = members.Where(m => m.Role != MultiAgentRole.Orchestrator).ToList(); + + if (orchestrator == null && workers.Count == 0) return null; + + var preset = new GroupPreset( + name, description, emoji, group.OrchestratorMode, + orchestrator != null ? getEffectiveModel(orchestrator.SessionName) : "claude-opus-4.6", + workers.Select(w => getEffectiveModel(w.SessionName)).ToArray()) + { + IsUserDefined = true, + WorkerSystemPrompts = workers.Select(w => w.SystemPrompt).ToArray(), + SharedContext = group.SharedContext, + RoutingContext = group.RoutingContext, + }; + + // Write as .squad/ directory if worktree is available + if (!string.IsNullOrEmpty(worktreeRoot) && Directory.Exists(worktreeRoot)) + { + try + { + SquadWriter.WriteFromGroup(worktreeRoot, name, group, members, getEffectiveModel); + preset = preset with { IsRepoLevel = true, SourcePath = Path.Combine(worktreeRoot, ".squad") }; + } + catch { /* Fall through to JSON save */ } + } + + // Always save to presets.json too (personal backup) + var existing = Load(baseDir); + existing.RemoveAll(p => p.Name == name); + existing.Add(preset); + Save(baseDir, existing); + return preset; + } +} + +/// +/// Detects conflicts and issues within a multi-agent group's model configuration. +/// +public static class GroupModelAnalyzer +{ + public record GroupDiagnostic(string Level, string Message); // Level: "error", "warning", "info" + + /// + /// Analyze a multi-agent group for model conflicts and capability gaps. + /// + public static List Analyze(SessionGroup group, List<(string Name, string Model, MultiAgentRole Role)> members) + { + var diags = new List(); + if (members.Count == 0) return diags; + + var orchestrators = members.Where(m => m.Role == MultiAgentRole.Orchestrator).ToList(); + var workers = members.Where(m => m.Role == MultiAgentRole.Worker).ToList(); + + // Check: orchestrator mode without orchestrator + if ((group.OrchestratorMode == MultiAgentMode.Orchestrator || group.OrchestratorMode == MultiAgentMode.OrchestratorReflect) + && orchestrators.Count == 0) + { + diags.Add(new("error", "β›” Orchestrator mode requires at least one session with the Orchestrator role.")); + } + + // Check: orchestrator using weak model + foreach (var orch in orchestrators) + { + var caps = ModelCapabilities.GetCapabilities(orch.Model); + if (!caps.HasFlag(ModelCapability.ReasoningExpert)) + diags.Add(new("warning", $"⚠️ Orchestrator '{orch.Name}' uses {orch.Model} which lacks strong reasoning. Consider claude-opus or gpt-5.")); + } + + // Check: all workers same model in broadcast (less diverse perspectives) + if (group.OrchestratorMode == MultiAgentMode.Broadcast && workers.Count > 1) + { + var uniqueModels = workers.Select(w => w.Model).Distinct().Count(); + if (uniqueModels == 1) + diags.Add(new("info", "πŸ’‘ All workers use the same model. For diverse perspectives, assign different models.")); + } + + // Check: expensive models as workers when cheaper ones suffice + foreach (var w in workers) + { + var caps = ModelCapabilities.GetCapabilities(w.Model); + if (caps.HasFlag(ModelCapability.ReasoningExpert) && !caps.HasFlag(ModelCapability.Fast)) + diags.Add(new("info", $"πŸ’° Worker '{w.Name}' uses premium model {w.Model}. Consider a faster/cheaper model for worker tasks.")); + } + + // Check: OrchestratorReflect without enough workers + if (group.OrchestratorMode == MultiAgentMode.OrchestratorReflect && workers.Count == 0) + diags.Add(new("error", "β›” OrchestratorReflect needs at least one worker to iterate on.")); + + return diags; + } +} diff --git a/PolyPilot/Models/ReflectionCycle.cs b/PolyPilot/Models/ReflectionCycle.cs index 1c09f3da14..5d0c648c38 100644 --- a/PolyPilot/Models/ReflectionCycle.cs +++ b/PolyPilot/Models/ReflectionCycle.cs @@ -54,10 +54,25 @@ public partial class ReflectionCycle ///
public bool IsStalled { get; set; } + /// + /// Whether the cycle was manually cancelled by the user via StopGroupReflection. + /// + public bool IsCancelled { get; set; } + /// /// Number of consecutive stalls detected. Exposed for diagnostics and warning UI. + /// Not serialized β€” private stall state (_recentResponses, _lastResponse) is not recoverable + /// from JSON, so persisting this counter would create inconsistent state after restart. /// - public int ConsecutiveStalls { get; private set; } + [System.Text.Json.Serialization.JsonIgnore] + public int ConsecutiveStalls { get; internal set; } + + /// + /// Number of consecutive errors in the reflection loop. Separate from ConsecutiveStalls + /// because stalls and errors have different thresholds and recovery strategies. + /// + [System.Text.Json.Serialization.JsonIgnore] + public int ConsecutiveErrors { get; internal set; } /// /// Optional instructions on how to evaluate whether the goal has been met. @@ -68,12 +83,14 @@ public partial class ReflectionCycle /// /// True only on the advance where the first stall is detected. /// + [System.Text.Json.Serialization.JsonIgnore] public bool ShouldWarnOnStall { get; private set; } /// /// The Jaccard similarity score from the last stall check (0.0–1.0). /// Exposed so the UI can show "91% similar to previous response". /// + [System.Text.Json.Serialization.JsonIgnore] public double LastSimilarity { get; private set; } /// @@ -92,7 +109,7 @@ public partial class ReflectionCycle public bool IsPaused { get; set; } /// - /// Name of the hidden evaluator session used for independent goal evaluation. + /// Optional: session name of a dedicated evaluator (different from orchestrator/worker). /// public string? EvaluatorSessionName { get; set; } @@ -101,8 +118,24 @@ public partial class ReflectionCycle /// public string? EvaluatorFeedback { get; set; } + /// + /// The orchestrator's evaluation from the last iteration (for multi-agent). + /// + public string? LastEvaluation { get; set; } + + /// + /// Per-iteration evaluation results for trend tracking. + /// + public List EvaluationHistory { get; set; } = new(); + + /// + /// Auto-adjustment suggestions surfaced to the user. + /// + [System.Text.Json.Serialization.JsonIgnore] + public List PendingAdjustments { get; } = new(); + // Stall detection state (not serialized) - private readonly List _recentHashes = new(); + private readonly List _recentResponses = new(); private string _lastResponse = ""; /// @@ -111,9 +144,10 @@ public partial class ReflectionCycle /// public void ResetStallDetection() { - _recentHashes.Clear(); + _recentResponses.Clear(); _lastResponse = ""; ConsecutiveStalls = 0; + ConsecutiveErrors = 0; ShouldWarnOnStall = false; } @@ -237,7 +271,7 @@ public bool IsGoalMet(string response) /// /// Checks if the response indicates a stall (repetitive or near-identical to previous). - /// Uses exact hash matching over a sliding window and Jaccard token similarity. + /// Uses exact string matching over a sliding window and Jaccard token similarity. /// public bool CheckStall(string response) { @@ -246,16 +280,15 @@ public bool CheckStall(string response) bool isStall = false; LastSimilarity = 0.0; - // Exact repetition check over last 5 responses - int currentHash = response.GetHashCode(); - if (_recentHashes.Contains(currentHash)) + // Exact repetition check over last 5 responses (full string equality, no hash collisions) + if (_recentResponses.Contains(response)) { isStall = true; LastSimilarity = 1.0; } - _recentHashes.Add(currentHash); - if (_recentHashes.Count > 5) _recentHashes.RemoveAt(0); + _recentResponses.Add(response); + if (_recentResponses.Count > 5) _recentResponses.RemoveAt(0); // Jaccard similarity with immediate predecessor if (!isStall && !string.IsNullOrEmpty(_lastResponse)) @@ -386,8 +419,8 @@ public bool AdvanceWithEvaluation(string response, bool evaluatorPassed, string? /// public string BuildCompletionSummary() { - var emoji = GoalMet ? "βœ…" : IsStalled ? "⚠️" : "⏱️"; - var reasonText = GoalMet ? "Goal met" : IsStalled ? $"Stalled ({LastSimilarity:P0} similarity)" : $"Max iterations reached ({MaxIterations})"; + var emoji = GoalMet ? "βœ…" : IsStalled ? "⚠️" : IsCancelled ? "⏹️" : "⏱️"; + var reasonText = GoalMet ? "Goal met" : IsStalled ? $"Stalled ({LastSimilarity:P0} similarity)" : IsCancelled ? "Cancelled by user" : $"Max iterations reached ({MaxIterations})"; var durationText = ""; if (StartedAt.HasValue && CompletedAt.HasValue) { @@ -404,7 +437,7 @@ public string BuildCompletionSummary() /// /// Creates a new reflection cycle with the given goal and iteration limit. /// - public static ReflectionCycle Create(string goal, int maxIterations = 5, string? evaluationPrompt = null) + public static ReflectionCycle Create(string goal, int maxIterations = 5, string? evaluationPrompt = null, string? evaluatorSession = null) { return new ReflectionCycle { @@ -415,6 +448,43 @@ public static ReflectionCycle Create(string goal, int maxIterations = 5, string? CurrentIteration = 0, GoalMet = false, StartedAt = DateTime.Now, + EvaluatorSessionName = evaluatorSession }; } + + /// + /// Record an evaluation result and return the quality trend. + /// + public QualityTrend RecordEvaluation(int iteration, double score, string rationale, string evaluatorModel) + { + EvaluationHistory.Add(new EvaluationResult + { + Iteration = iteration, + Score = score, + Rationale = rationale, + EvaluatorModel = evaluatorModel, + Timestamp = DateTime.Now + }); + + if (EvaluationHistory.Count < 2) return QualityTrend.Stable; + + var recent = EvaluationHistory.TakeLast(3).Select(e => e.Score).ToList(); + if (recent.Count >= 2 && recent.Last() > recent[^2] + 0.1) return QualityTrend.Improving; + if (recent.Count >= 2 && recent.Last() < recent[^2] - 0.1) return QualityTrend.Degrading; + return QualityTrend.Stable; + } +} + +/// Quality trend across iterations. +public enum QualityTrend { Improving, Stable, Degrading } + +/// Structured evaluation result from one reflect iteration. +public class EvaluationResult +{ + public int Iteration { get; set; } + /// Quality score 0.0-1.0. + public double Score { get; set; } + public string Rationale { get; set; } = ""; + public string EvaluatorModel { get; set; } = ""; + public DateTime Timestamp { get; set; } } diff --git a/PolyPilot/Models/SessionOrganization.cs b/PolyPilot/Models/SessionOrganization.cs index b6a3451e04..9041a32236 100644 --- a/PolyPilot/Models/SessionOrganization.cs +++ b/PolyPilot/Models/SessionOrganization.cs @@ -13,6 +13,40 @@ public class SessionGroup public bool IsCollapsed { get; set; } /// If set, this group auto-tracks a repository managed by RepoManager. public string? RepoId { get; set; } + + /// When true, this group operates as a multi-agent orchestration group. + public bool IsMultiAgent { get; set; } + + /// The orchestration mode for multi-agent groups. + public MultiAgentMode OrchestratorMode { get; set; } = MultiAgentMode.Broadcast; + + /// Optional system prompt appended to all sessions in this multi-agent group. + public string? OrchestratorPrompt { get; set; } + + /// Default model for new worker sessions added to this group. Null = use app default. + public string? DefaultWorkerModel { get; set; } + + /// Default model for the orchestrator role. Null = use app default. + public string? DefaultOrchestratorModel { get; set; } + + /// + /// Shared worktree for the entire multi-agent group. All sessions use this worktree's path as CWD. + /// Future: per-agent worktrees would move this to SessionMeta and add merge orchestration. + /// + public string? WorktreeId { get; set; } + + /// Active reflection state for OrchestratorReflect mode. Null when not in a reflect loop. + public ReflectionCycle? ReflectionState { get; set; } + + /// + /// Shared context from Squad decisions.md or similar, prepended to all worker prompts. + /// + public string? SharedContext { get; set; } + + /// + /// Routing context from Squad routing.md, injected into orchestrator planning prompt. + /// + public string? RoutingContext { get; set; } } public class SessionMeta @@ -23,6 +57,23 @@ public class SessionMeta public int ManualOrder { get; set; } /// Worktree ID if this session was created from a worktree. public string? WorktreeId { get; set; } + + /// Role of this session within a multi-agent group. + public MultiAgentRole Role { get; set; } = MultiAgentRole.Worker; + + /// + /// Preferred model for this session in multi-agent context. + /// Null = use whatever model the session was created with (no override). + /// When set, the model is switched before dispatch via EnsureSessionModelAsync. + /// + public string? PreferredModel { get; set; } + + /// + /// System prompt / charter that defines this worker's specialization. + /// Prepended to every task dispatched to this worker. Null = generic worker prompt. + /// Example: "You are a security auditor. Focus on vulnerabilities, input validation, and auth flaws." + /// + public string? SystemPrompt { get; set; } } [JsonConverter(typeof(JsonStringEnumConverter))] @@ -34,6 +85,30 @@ public enum SessionSortMode Manual } +/// How prompts are distributed in a multi-agent group. +[JsonConverter(typeof(JsonStringEnumConverter))] +public enum MultiAgentMode +{ + /// Send the same prompt to all sessions simultaneously. + Broadcast, + /// Send the prompt to sessions one at a time in order. + Sequential, + /// An orchestrator session decides how to delegate work to other sessions. + Orchestrator, + /// Orchestrator with iterative reflection: planβ†’dispatchβ†’collectβ†’evaluateβ†’repeat until goal met. + OrchestratorReflect +} + +/// Role of a session within a multi-agent group. +[JsonConverter(typeof(JsonStringEnumConverter))] +public enum MultiAgentRole +{ + /// Regular worker session that receives prompts. + Worker, + /// Orchestrator session that delegates work (used in Orchestrator mode). + Orchestrator +} + public class OrganizationState { public List Groups { get; set; } = new() @@ -43,3 +118,6 @@ public class OrganizationState public List Sessions { get; set; } = new(); public SessionSortMode SortMode { get; set; } = SessionSortMode.LastActive; } + +// GroupReflectionState class removed and merged into ReflectionCycle + diff --git a/PolyPilot/Models/SquadDiscovery.cs b/PolyPilot/Models/SquadDiscovery.cs new file mode 100644 index 0000000000..5eabf9f446 --- /dev/null +++ b/PolyPilot/Models/SquadDiscovery.cs @@ -0,0 +1,174 @@ +using System.Text.RegularExpressions; + +namespace PolyPilot.Models; + +/// +/// Discovers bradygaster/squad team definitions from .squad/ or .ai-team/ directories. +/// Parses team.md, agent charters, routing.md, and decisions.md into GroupPreset(s). +/// Read-only: never writes to the .squad/ directory. +/// +public static class SquadDiscovery +{ + private const int MaxCharterLength = 4000; + private const int MaxDecisionsLength = 8000; + + /// Names of agents that are infrastructure, not workers. + private static readonly HashSet InfraAgents = new(StringComparer.OrdinalIgnoreCase) + { + "scribe", "_scribe", "coordinator", "_coordinator", "_alumni" + }; + + /// + /// Discover Squad team definitions from a worktree root. + /// Returns empty list if no .squad/ or .ai-team/ directory found. + /// + public static List Discover(string worktreeRoot) + { + try + { + var squadDir = FindSquadDirectory(worktreeRoot); + if (squadDir == null) return new(); + + var teamFile = Path.Combine(squadDir, "team.md"); + if (!File.Exists(teamFile)) return new(); + + var teamContent = File.ReadAllText(teamFile); + var agents = DiscoverAgents(squadDir); + + if (agents.Count == 0) return new(); + + var teamName = ParseTeamName(teamContent) ?? "Squad Team"; + var decisions = ReadOptionalFile(Path.Combine(squadDir, "decisions.md"), MaxDecisionsLength); + var routing = ReadOptionalFile(Path.Combine(squadDir, "routing.md"), MaxDecisionsLength); + + var preset = BuildPreset(teamName, agents, decisions, routing, squadDir); + return new List { preset }; + } + catch + { + return new(); + } + } + + /// + /// Find .squad/ or .ai-team/ directory. Prefers .squad/ if both exist. + /// + internal static string? FindSquadDirectory(string worktreeRoot) + { + var squadPath = Path.Combine(worktreeRoot, ".squad"); + if (Directory.Exists(squadPath)) return squadPath; + + var aiTeamPath = Path.Combine(worktreeRoot, ".ai-team"); + if (Directory.Exists(aiTeamPath)) return aiTeamPath; + + return null; + } + + /// + /// Discover agents from the agents/ subdirectory. + /// Each agent has a directory with charter.md inside. + /// Skips infrastructure agents (scribe, coordinator, _alumni). + /// + internal static List DiscoverAgents(string squadDir) + { + var agentsDir = Path.Combine(squadDir, "agents"); + if (!Directory.Exists(agentsDir)) return new(); + + var agents = new List(); + foreach (var dir in Directory.GetDirectories(agentsDir)) + { + var name = Path.GetFileName(dir); + if (InfraAgents.Contains(name)) continue; + + var charterPath = Path.Combine(dir, "charter.md"); + string? charter = null; + if (File.Exists(charterPath)) + { + charter = File.ReadAllText(charterPath); + if (charter.Length > MaxCharterLength) + charter = charter[..MaxCharterLength]; + } + + agents.Add(new SquadAgent(name, charter)); + } + + return agents; + } + + /// + /// Parse team name from team.md content. + /// Looks for: first H1 heading, or first line that looks like a title. + /// + internal static string? ParseTeamName(string teamContent) + { + foreach (var line in teamContent.Split('\n')) + { + var trimmed = line.Trim(); + if (trimmed.StartsWith("# ")) + return trimmed[2..].Trim(); + } + return null; + } + + /// + /// Parse agent roster from team.md table rows. + /// Returns member names from the first column of markdown tables. + /// + internal static List ParseRosterNames(string teamContent) + { + var names = new List(); + var tableRegex = new Regex(@"^\s*\|\s*([^\|\s]+)\s*\|", RegexOptions.Multiline); + foreach (Match m in tableRegex.Matches(teamContent)) + { + var name = m.Groups[1].Value.Trim(); + // Skip header row markers and header labels + if (name == "---" || name.All(c => c == '-') + || name.Equals("Member", StringComparison.OrdinalIgnoreCase) + || name.Equals("Name", StringComparison.OrdinalIgnoreCase)) + continue; + names.Add(name); + } + return names; + } + + private static string? ReadOptionalFile(string path, int maxLength) + { + if (!File.Exists(path)) return null; + try + { + var content = File.ReadAllText(path); + if (string.IsNullOrWhiteSpace(content)) return null; + return content.Length > maxLength ? content[..maxLength] : content; + } + catch { return null; } + } + + private static GroupPreset BuildPreset(string teamName, List agents, + string? decisions, string? routing, string squadDir) + { + // Use a sensible default model for all agents (user can override after creation) + var defaultModel = "claude-sonnet-4.6"; + var orchestratorModel = "claude-opus-4.6"; + + var workerModels = agents.Select(_ => defaultModel).ToArray(); + var systemPrompts = agents.Select(a => a.Charter).ToArray(); + + return new GroupPreset( + teamName, + $"Squad team from {Path.GetFileName(Path.GetDirectoryName(squadDir) ?? squadDir)}", + "🫑", + MultiAgentMode.OrchestratorReflect, + orchestratorModel, + workerModels) + { + IsRepoLevel = true, + SourcePath = squadDir, + WorkerSystemPrompts = systemPrompts, + SharedContext = decisions, + RoutingContext = routing, + }; + } + + /// Represents a discovered Squad agent with name and charter content. + internal record SquadAgent(string Name, string? Charter); +} diff --git a/PolyPilot/Models/SquadWriter.cs b/PolyPilot/Models/SquadWriter.cs new file mode 100644 index 0000000000..db2d032f61 --- /dev/null +++ b/PolyPilot/Models/SquadWriter.cs @@ -0,0 +1,131 @@ +using System.Text; + +namespace PolyPilot.Models; + +/// +/// Writes GroupPreset data as a bradygaster/squad .squad/ directory structure. +/// Produces: team.md, agents/{name}/charter.md, decisions.md (optional), routing.md (optional). +/// This is the inverse of SquadDiscovery β€” write what we read. +/// +public static class SquadWriter +{ + /// + /// Write a GroupPreset to .squad/ format in the given worktree root. + /// Creates .squad/ directory if it doesn't exist. Overwrites existing files. + /// + public static string WritePreset(string worktreeRoot, GroupPreset preset, + List<(string Name, string? SystemPrompt)> workers) + { + var squadDir = Path.Combine(worktreeRoot, ".squad"); + Directory.CreateDirectory(squadDir); + + WriteTeamFile(squadDir, preset.Name, workers); + WriteAgentCharters(squadDir, workers); + + if (!string.IsNullOrWhiteSpace(preset.SharedContext)) + File.WriteAllText(Path.Combine(squadDir, "decisions.md"), preset.SharedContext); + + if (!string.IsNullOrWhiteSpace(preset.RoutingContext)) + File.WriteAllText(Path.Combine(squadDir, "routing.md"), preset.RoutingContext); + + return squadDir; + } + + /// + /// Write a GroupPreset from live session data (orchestrator + workers with their system prompts and group context). + /// + public static string WriteFromGroup(string worktreeRoot, string teamName, + SessionGroup group, List members, Func getEffectiveModel) + { + var workers = members + .Where(m => m.Role != MultiAgentRole.Orchestrator) + .Select(m => (Name: SanitizeAgentName(m.SessionName, teamName), SystemPrompt: m.SystemPrompt)) + .ToList(); + + var preset = new GroupPreset( + teamName, "", "🫑", group.OrchestratorMode, + getEffectiveModel(members.FirstOrDefault(m => m.Role == MultiAgentRole.Orchestrator)?.SessionName ?? ""), + members.Where(m => m.Role != MultiAgentRole.Orchestrator) + .Select(m => getEffectiveModel(m.SessionName)).ToArray()) + { + SharedContext = group.SharedContext, + RoutingContext = group.RoutingContext, + }; + + return WritePreset(worktreeRoot, preset, workers); + } + + private static void WriteTeamFile(string squadDir, string teamName, + List<(string Name, string? SystemPrompt)> workers) + { + var sb = new StringBuilder(); + sb.AppendLine($"# {teamName}"); + sb.AppendLine(); + sb.AppendLine("| Member | Role |"); + sb.AppendLine("|--------|------|"); + foreach (var (name, prompt) in workers) + { + var role = DeriveRole(name, prompt); + sb.AppendLine($"| {name} | {role} |"); + } + File.WriteAllText(Path.Combine(squadDir, "team.md"), sb.ToString()); + } + + private static void WriteAgentCharters(string squadDir, + List<(string Name, string? SystemPrompt)> workers) + { + var agentsDir = Path.Combine(squadDir, "agents"); + // Clean stale agent dirs before re-writing to prevent phantom agents on re-discovery + if (Directory.Exists(agentsDir)) + Directory.Delete(agentsDir, true); + Directory.CreateDirectory(agentsDir); + + foreach (var (name, prompt) in workers) + { + var agentDir = Path.Combine(agentsDir, name); + Directory.CreateDirectory(agentDir); + var charter = prompt ?? $"You are {name}. Complete assigned tasks thoroughly."; + File.WriteAllText(Path.Combine(agentDir, "charter.md"), charter); + } + } + + /// + /// Derive a short role description from the agent name or system prompt. + /// + private static string DeriveRole(string name, string? prompt) + { + if (!string.IsNullOrWhiteSpace(prompt)) + { + // Take first sentence of prompt as role, capped at 60 chars + var firstSentence = prompt.Split('.', '\n')[0].Trim(); + if (firstSentence.StartsWith("You are a ", StringComparison.OrdinalIgnoreCase)) + firstSentence = firstSentence[10..]; + else if (firstSentence.StartsWith("You are an ", StringComparison.OrdinalIgnoreCase)) + firstSentence = firstSentence[11..]; + if (firstSentence.Length > 60) + firstSentence = firstSentence[..57] + "..."; + if (!string.IsNullOrWhiteSpace(firstSentence)) + return firstSentence; + } + // Fall back to name-based role + return name.Replace("-", " ").Replace("_", " "); + } + + /// + /// Convert a session name like "Code Review Team-worker-1" into an agent name like "worker-1". + /// Strips the team name prefix and sanitizes for filesystem use. + /// + internal static string SanitizeAgentName(string sessionName, string teamName) + { + var name = sessionName; + // Strip team name prefix (e.g., "Code Review Team-worker-1" β†’ "worker-1") + if (name.StartsWith(teamName + "-", StringComparison.OrdinalIgnoreCase)) + name = name[(teamName.Length + 1)..]; + + // Replace invalid path chars with hyphens + foreach (var c in Path.GetInvalidFileNameChars()) + name = name.Replace(c, '-'); + + return name.Trim('-').ToLowerInvariant(); + } +} diff --git a/PolyPilot/Services/CopilotService.Events.cs b/PolyPilot/Services/CopilotService.Events.cs index a5d28e442b..d59c609c46 100644 --- a/PolyPilot/Services/CopilotService.Events.cs +++ b/PolyPilot/Services/CopilotService.Events.cs @@ -210,13 +210,16 @@ private void HandleSessionEvent(SessionState state, SessionEvent evt) } // Warn if receiving events on an orphaned (replaced) state object. - // We don't early-return here: both old and new SessionState share the same Info object - // (reconnect copies Info to newState), so CompleteResponse on the orphaned state still - // correctly clears IsProcessing on the live session's shared Info. + // After the generation-carry fix, stale callbacks on orphaned state would have + // matching generations and could incorrectly complete the new turn. Gate all + // terminal/mutating events to only fire on the current (live) state. if (!isCurrentState) { Debug($"[EVT-WARN] '{sessionName}' event {evt.GetType().Name} delivered to ORPHANED state " + $"(not in _sessions). This handler should have been detached."); + // Block ALL events from orphaned state β€” stale deltas, tool events, and + // terminal events can all produce ghost mutations on shared Info.History. + return; } void Invoke(Action action) @@ -720,13 +723,18 @@ private void CompleteResponse(SessionState state, long? expectedGeneration = nul if (!string.IsNullOrEmpty(state.Info.SessionId)) _ = _chatDb.AddMessageAsync(state.Info.SessionId, msg); } - state.ResponseCompletion?.TrySetResult(response); + // Clear IsProcessing BEFORE completing the TCS β€” if the continuation runs + // synchronously (e.g., in orchestrator reflection loops), the next SendPromptAsync + // call must see IsProcessing=false or it throws "already processing". state.CurrentResponse.Clear(); state.Info.IsProcessing = false; + state.Info.IsResumed = false; // After first successful completion, use normal watchdog timeouts + Interlocked.Exchange(ref state.SendingFlag, 0); // Release atomic send lock state.Info.ProcessingStartedAt = null; state.Info.ToolCallCount = 0; state.Info.ProcessingPhase = 0; state.Info.LastUpdatedAt = DateTime.Now; + state.ResponseCompletion?.TrySetResult(response); OnStateChanged?.Invoke(); // Fire completion notification @@ -780,12 +788,15 @@ private void CompleteResponse(SessionState state, long? expectedGeneration = nul state.Info.MessageQueue.RemoveAt(0); // Retrieve any queued image paths for this message List? nextImagePaths = null; - if (_queuedImagePaths.TryGetValue(state.Info.Name, out var imageQueue) && imageQueue.Count > 0) + lock (_imageQueueLock) { - nextImagePaths = imageQueue[0]; - imageQueue.RemoveAt(0); - if (imageQueue.Count == 0) - _queuedImagePaths.TryRemove(state.Info.Name, out _); + if (_queuedImagePaths.TryGetValue(state.Info.Name, out var imageQueue) && imageQueue.Count > 0) + { + nextImagePaths = imageQueue[0]; + imageQueue.RemoveAt(0); + if (imageQueue.Count == 0) + _queuedImagePaths.TryRemove(state.Info.Name, out _); + } } var skipHistory = state.Info.ReflectionCycle is { IsActive: true } && @@ -801,7 +812,7 @@ private void CompleteResponse(SessionState state, long? expectedGeneration = nul await Task.Delay(100); if (_syncContext != null) { - var tcs = new TaskCompletionSource(); + var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); _syncContext.Post(async _ => { try @@ -829,8 +840,11 @@ private void CompleteResponse(SessionState state, long? expectedGeneration = nul state.Info.MessageQueue.Insert(0, nextPrompt); if (nextImagePaths != null) { - var images = _queuedImagePaths.GetOrAdd(state.Info.Name, _ => new List>()); - images.Insert(0, nextImagePaths); + lock (_imageQueueLock) + { + var images = _queuedImagePaths.GetOrAdd(state.Info.Name, _ => new List>()); + images.Insert(0, nextImagePaths); + } } }); } @@ -902,7 +916,7 @@ private async Task EvaluateAndAdvanceAsync(string workerSessionName, string work while (evalState.Info.IsProcessing && !cts.Token.IsCancellationRequested) await Task.Delay(200, cts.Token); - evalState.ResponseCompletion = new TaskCompletionSource(); + evalState.ResponseCompletion = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); await SendPromptAsync(evaluatorName, evalPrompt, cancellationToken: cts.Token, skipHistoryMessage: true); // Wait for the evaluator response @@ -1052,7 +1066,7 @@ private void HandleReflectionAdvanceResult(SessionState state, string response, await Task.Delay(100); if (_syncContext != null) { - var tcs = new TaskCompletionSource(); + var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); _syncContext.Post(async _ => { try @@ -1215,6 +1229,7 @@ private async Task RunProcessingWatchdogAsync(SessionState state, string session // Flush any accumulated partial response before clearing processing state FlushCurrentResponse(state); state.Info.IsProcessing = false; + Interlocked.Exchange(ref state.SendingFlag, 0); state.Info.ProcessingStartedAt = null; state.Info.ToolCallCount = 0; state.Info.ProcessingPhase = 0; diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index e796e0857c..bc61b44112 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -1,12 +1,81 @@ +using System.Collections.Concurrent; using System.Text.Json; +using System.Text.RegularExpressions; using PolyPilot.Models; namespace PolyPilot.Services; +public enum OrchestratorPhase { Planning, Dispatching, WaitingForWorkers, Synthesizing, Complete } + public partial class CopilotService { + public event Action? OnOrchestratorPhaseChanged; // groupId, phase, detail + + // Per-session semaphores to prevent concurrent model switches during rapid dispatch + private readonly ConcurrentDictionary _modelSwitchLocks = new(); + #region Session Organization (groups, pinning, sorting) + public async Task CreateMultiAgentGroupAsync(string groupName, string orchestratorModel, string workerModel, int workerCount, MultiAgentMode mode, string? systemPrompt = null) + { + // 1. Create the group + var group = new SessionGroup + { + Id = Guid.NewGuid().ToString(), + Name = groupName, + IsMultiAgent = true, + OrchestratorMode = mode, + OrchestratorPrompt = systemPrompt, + DefaultOrchestratorModel = orchestratorModel, + DefaultWorkerModel = workerModel, + SortOrder = Organization.Groups.Max(g => g.SortOrder) + 1 + }; + Organization.Groups.Add(group); + + // 2. Create Orchestrator Session + var orchName = $"{groupName}-Orchestrator"; + // Ensure name uniqueness + int suffix = 1; + while (_sessions.ContainsKey(orchName) || Organization.Sessions.Any(s => s.SessionName == orchName)) + orchName = $"{groupName}-Orchestrator-{suffix++}"; + + var orchSession = await CreateSessionAsync(orchName, orchestratorModel, null); // Use default dir + var orchMeta = GetOrCreateSessionMeta(orchSession.Name); + orchMeta.GroupId = group.Id; + orchMeta.Role = MultiAgentRole.Orchestrator; + orchMeta.PreferredModel = orchestratorModel; + + // 3. Create Worker Sessions + for (int i = 1; i <= workerCount; i++) + { + var workerName = $"{groupName}-Worker-{i}"; + suffix = 1; + while (_sessions.ContainsKey(workerName) || Organization.Sessions.Any(s => s.SessionName == workerName)) + workerName = $"{groupName}-Worker-{i}-{suffix++}"; + + var workerSession = await CreateSessionAsync(workerName, workerModel, null); + var workerMeta = GetOrCreateSessionMeta(workerSession.Name); + workerMeta.GroupId = group.Id; + workerMeta.Role = MultiAgentRole.Worker; + workerMeta.PreferredModel = workerModel; + } + + SaveOrganization(); + OnStateChanged?.Invoke(); + return group.Id; + } + + private SessionMeta GetOrCreateSessionMeta(string sessionName) + { + var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == sessionName); + if (meta == null) + { + meta = new SessionMeta { SessionName = sessionName, GroupId = SessionGroup.DefaultId }; + Organization.Sessions.Add(meta); + } + return meta; + } + public void LoadOrganization() { try @@ -15,6 +84,7 @@ public void LoadOrganization() { var json = File.ReadAllText(OrganizationFile); Organization = JsonSerializer.Deserialize(json) ?? new OrganizationState(); + Debug($"LoadOrganization: loaded {Organization.Groups.Count} groups, {Organization.Sessions.Count} sessions"); } else { @@ -91,7 +161,7 @@ private void WriteOrgFile(string json) /// Skips work if the active session set hasn't changed since last reconciliation. /// private int _lastReconcileSessionHash; - private void ReconcileOrganization() + internal void ReconcileOrganization() { var activeNames = _sessions.Where(kv => !kv.Value.Info.IsHidden).Select(kv => kv.Key).ToHashSet(); @@ -102,6 +172,9 @@ private void ReconcileOrganization() _lastReconcileSessionHash = currentHash; bool changed = false; + // Build lookup of multi-agent group IDs so we can protect their sessions + var multiAgentGroupIds = Organization.Groups.Where(g => g.IsMultiAgent).Select(g => g.Id).ToHashSet(); + // Add missing sessions to default group and link to worktrees foreach (var name in activeNames) { @@ -116,6 +189,10 @@ private void ReconcileOrganization() Organization.Sessions.Add(meta); changed = true; } + + // Don't auto-reassign sessions that belong to a multi-agent group + if (multiAgentGroupIds.Contains(meta.GroupId)) + continue; // Auto-link session to worktree if working directory matches if (meta.WorktreeId == null && _sessions.TryGetValue(name, out var sessionState)) @@ -142,8 +219,11 @@ private void ReconcileOrganization() } } - // Ensure sessions with worktrees are in the correct repo group - if (meta.WorktreeId != null && meta.GroupId == SessionGroup.DefaultId) + // Ensure sessions with worktrees are in the correct repo group. + // Skip sessions that were part of a multi-agent team (identifiable by having + // an Orchestrator role or a PreferredModel set β€” regular sessions never have these). + bool wasMultiAgent = meta.Role == MultiAgentRole.Orchestrator || meta.PreferredModel != null; + if (meta.WorktreeId != null && meta.GroupId == SessionGroup.DefaultId && !wasMultiAgent) { var worktree = _repoManager.Worktrees.FirstOrDefault(w => w.Id == meta.WorktreeId); if (worktree != null) @@ -165,6 +245,7 @@ private void ReconcileOrganization() { if (!groupIds.Contains(meta.GroupId)) { + Debug($"ReconcileOrganization: orphaned session '{meta.SessionName}' (GroupId={meta.GroupId}) β†’ _default"); meta.GroupId = SessionGroup.DefaultId; changed = true; } @@ -209,6 +290,9 @@ private void ReconcileOrganization() } // Remove metadata only for sessions that are truly gone (not in any known set) + var toRemove = Organization.Sessions.Where(m => !knownNames.Contains(m.SessionName)).ToList(); + if (toRemove.Count > 0) + Debug($"ReconcileOrganization: pruning {toRemove.Count} sessions: {string.Join(", ", toRemove.Select(m => m.SessionName))}"); Organization.Sessions.RemoveAll(m => !knownNames.Contains(m.SessionName)); if (changed) SaveOrganization(); @@ -275,10 +359,39 @@ public void DeleteGroup(string groupId) { if (groupId == SessionGroup.DefaultId) return; - // Move all sessions in this group to default - foreach (var meta in Organization.Sessions.Where(m => m.GroupId == groupId)) + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId); + var isMultiAgent = group?.IsMultiAgent ?? false; + + if (isMultiAgent) + { + // Multi-agent sessions are meaningless without their group β€” close them + var sessionNames = Organization.Sessions + .Where(m => m.GroupId == groupId) + .Select(m => m.SessionName) + .ToList(); + // Remove org metadata first so UI updates immediately + Organization.Sessions.RemoveAll(m => sessionNames.Contains(m.SessionName)); + // Mark sessions as hidden so ReconcileOrganization won't re-add them + // to the default group while CloseSessionAsync is still running + foreach (var name in sessionNames) + { + if (_sessions.TryGetValue(name, out var s)) + s.Info.IsHidden = true; + } + // Fire-and-forget: close sessions asynchronously + _ = Task.Run(async () => + { + foreach (var name in sessionNames) + await CloseSessionAsync(name); + }); + } + else { - meta.GroupId = SessionGroup.DefaultId; + // Non-multi-agent: move sessions to default group + foreach (var meta in Organization.Sessions.Where(m => m.GroupId == groupId)) + { + meta.GroupId = SessionGroup.DefaultId; + } } Organization.Groups.RemoveAll(g => g.Id == groupId); @@ -405,4 +518,1089 @@ public SessionGroup GetOrCreateRepoGroup(string repoId, string repoName) } #endregion + + #region Multi-Agent Orchestration + + /// + /// Create a multi-agent group and optionally move existing sessions into it. + /// + public SessionGroup CreateMultiAgentGroup(string name, MultiAgentMode mode = MultiAgentMode.Broadcast, string? orchestratorPrompt = null, List? sessionNames = null, string? worktreeId = null, string? repoId = null) + { + var group = new SessionGroup + { + Id = Guid.NewGuid().ToString(), + Name = name, + IsMultiAgent = true, + OrchestratorMode = mode, + OrchestratorPrompt = orchestratorPrompt, + WorktreeId = worktreeId, + RepoId = repoId, + SortOrder = Organization.Groups.Any() ? Organization.Groups.Max(g => g.SortOrder) + 1 : 0 + }; + Organization.Groups.Add(group); + + if (sessionNames != null) + { + foreach (var sessionName in sessionNames) + { + var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == sessionName); + if (meta != null) + { + meta.GroupId = group.Id; + if (worktreeId != null) + meta.WorktreeId = worktreeId; + } + } + } + + SaveOrganization(); + OnStateChanged?.Invoke(); + return group; + } + + /// + /// Convert an existing regular group into a multi-agent group. + /// + public void ConvertToMultiAgent(string groupId) + { + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId); + if (group == null || group.IsMultiAgent) return; + group.IsMultiAgent = true; + group.OrchestratorMode = MultiAgentMode.Broadcast; + SaveOrganization(); + OnStateChanged?.Invoke(); + } + + /// + /// Set the orchestration mode for a multi-agent group. + /// + public void SetMultiAgentMode(string groupId, MultiAgentMode mode) + { + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId); + if (group != null && group.IsMultiAgent) + { + group.OrchestratorMode = mode; + SaveOrganization(); + OnStateChanged?.Invoke(); + } + } + + /// + /// Set the role of a session within a multi-agent group. + /// When promoting to Orchestrator, any existing orchestrator in the same group is demoted to Worker. + /// + public void SetSessionRole(string sessionName, MultiAgentRole role) + { + var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == sessionName); + if (meta == null) return; + + var oldRole = meta.Role; + + // Enforce single orchestrator per group + if (role == MultiAgentRole.Orchestrator) + { + var group = Organization.Groups.FirstOrDefault(g => g.Id == meta.GroupId); + if (group is { IsMultiAgent: true }) + { + foreach (var other in Organization.Sessions + .Where(m => m.GroupId == meta.GroupId && m.SessionName != sessionName && m.Role == MultiAgentRole.Orchestrator)) + { + other.Role = MultiAgentRole.Worker; + } + } + } + + meta.Role = role; + SaveOrganization(); + OnStateChanged?.Invoke(); + } + + /// + /// Get all session names in a multi-agent group. + /// + public List GetMultiAgentGroupMembers(string groupId) + { + return Organization.Sessions + .Where(m => m.GroupId == groupId) + .Select(m => m.SessionName) + .ToList(); + } + + /// + /// Get the orchestrator session name for an orchestrator-mode group, if any. + /// + public string? GetOrchestratorSession(string groupId) + { + return Organization.Sessions + .FirstOrDefault(m => m.GroupId == groupId && m.Role == MultiAgentRole.Orchestrator) + ?.SessionName; + } + + /// + /// Send a prompt to all sessions in a multi-agent group based on its orchestration mode. + /// + public async Task SendToMultiAgentGroupAsync(string groupId, string prompt, CancellationToken cancellationToken = default) + { + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId && g.IsMultiAgent); + if (group == null) return; + + var members = GetMultiAgentGroupMembers(groupId); + if (members.Count == 0) return; + + switch (group.OrchestratorMode) + { + case MultiAgentMode.Broadcast: + await SendBroadcastAsync(group, members, prompt, cancellationToken); + break; + + case MultiAgentMode.Sequential: + await SendSequentialAsync(group, members, prompt, cancellationToken); + break; + + case MultiAgentMode.Orchestrator: + await SendViaOrchestratorAsync(groupId, members, prompt, cancellationToken); + break; + + case MultiAgentMode.OrchestratorReflect: + await SendViaOrchestratorReflectAsync(groupId, members, prompt, cancellationToken); + break; + } + } + + /// + /// Build a multi-agent context prefix for a session in a group. + /// Includes model info for each member so agents know each other's capabilities. + /// + private string BuildMultiAgentPrefix(string sessionName, SessionGroup group, List allMembers) + { + var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == sessionName); + var role = meta?.Role ?? MultiAgentRole.Worker; + var roleName = role == MultiAgentRole.Orchestrator ? "orchestrator" : "worker"; + var memberDetails = allMembers.Where(m => m != sessionName) + .Select(m => $"'{m}' ({GetEffectiveModel(m)})") + .ToList(); + var othersList = memberDetails.Count > 0 ? string.Join(", ", memberDetails) : "none"; + return $"[Multi-agent context: You are '{sessionName}' ({roleName}, {GetEffectiveModel(sessionName)}) in group '{group.Name}'. Other members: {othersList}.]\n\n"; + } + + private async Task SendBroadcastAsync(SessionGroup group, List sessionNames, string prompt, CancellationToken cancellationToken) + { + var tasks = sessionNames.Select(async name => + { + var session = GetSession(name); + if (session == null) return; + + await EnsureSessionModelAsync(name, cancellationToken); + var prefixedPrompt = BuildMultiAgentPrefix(name, group, sessionNames) + prompt; + + if (session.IsProcessing) + { + EnqueueMessage(name, prefixedPrompt); + return; + } + + try + { + await SendPromptAsync(name, prefixedPrompt, cancellationToken: cancellationToken); + } + catch (Exception ex) + { + Debug($"Broadcast send failed for '{name}': {ex.Message}"); + } + }); + + await Task.WhenAll(tasks); + } + + private async Task SendSequentialAsync(SessionGroup group, List sessionNames, string prompt, CancellationToken cancellationToken) + { + foreach (var name in sessionNames) + { + if (cancellationToken.IsCancellationRequested) break; + + var session = GetSession(name); + if (session == null) continue; + + await EnsureSessionModelAsync(name, cancellationToken); + var prefixedPrompt = BuildMultiAgentPrefix(name, group, sessionNames) + prompt; + + if (session.IsProcessing) + { + EnqueueMessage(name, prefixedPrompt); + continue; + } + + try + { + await SendPromptAsync(name, prefixedPrompt, cancellationToken: cancellationToken); + } + catch (Exception ex) + { + Debug($"Sequential send failed for '{name}': {ex.Message}"); + } + } + } + + private async Task SendViaOrchestratorAsync(string groupId, List members, string prompt, CancellationToken cancellationToken) + { + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId); + var orchestratorName = GetOrchestratorSession(groupId); + if (orchestratorName == null) + { + // Fall back to broadcast if no orchestrator is designated + if (group != null) + await SendBroadcastAsync(group, members, prompt, cancellationToken); + return; + } + + var workerNames = members.Where(m => m != orchestratorName).ToList(); + + // Phase 1: Planning β€” ask orchestrator to analyze and assign tasks + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Planning, null)); + + var planningPrompt = BuildOrchestratorPlanningPrompt(prompt, workerNames, group?.OrchestratorPrompt, group?.RoutingContext); + var planResponse = await SendPromptAndWaitAsync(orchestratorName, planningPrompt, cancellationToken); + + // Phase 2: Parse task assignments from orchestrator response + var rawAssignments = ParseTaskAssignments(planResponse, workerNames); + // Deduplicate: merge multiple tasks for the same worker into one prompt + var assignments = rawAssignments + .GroupBy(a => a.WorkerName, StringComparer.OrdinalIgnoreCase) + .Select(g => new TaskAssignment(g.Key, string.Join("\n\n---\n\n", g.Select(a => a.Task)))) + .ToList(); + if (assignments.Count == 0) + { + // Orchestrator handled it without delegation β€” add a system note + AddOrchestratorSystemMessage(orchestratorName, "ℹ️ Orchestrator handled the request directly (no tasks delegated to workers)."); + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Complete, null)); + return; + } + + // Phase 3: Dispatch tasks to workers in parallel + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Dispatching, + $"Sending tasks to {assignments.Count} worker(s)")); + + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.WaitingForWorkers, null)); + + var workerTasks = assignments.Select(a => + ExecuteWorkerAsync(a.WorkerName, a.Task, prompt, cancellationToken)); + var results = await Task.WhenAll(workerTasks); + + // Phase 4: Synthesize β€” send worker results back to orchestrator + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Synthesizing, null)); + + var synthesisPrompt = BuildSynthesisPrompt(prompt, results.ToList()); + await SendPromptAsync(orchestratorName, synthesisPrompt, cancellationToken: cancellationToken); + + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Complete, null)); + } + + private string BuildOrchestratorPlanningPrompt(string userPrompt, List workerNames, string? additionalInstructions, string? routingContext = null) + { + var sb = new System.Text.StringBuilder(); + sb.AppendLine($"You are the orchestrator of a multi-agent group. You have {workerNames.Count} worker agent(s) available:"); + foreach (var w in workerNames) + { + var meta = GetSessionMeta(w); + var model = GetEffectiveModel(w); + if (!string.IsNullOrEmpty(meta?.SystemPrompt)) + sb.AppendLine($" - '{w}' (model: {model}) β€” {meta.SystemPrompt}"); + else + sb.AppendLine($" - '{w}' (model: {model})"); + } + sb.AppendLine(); + sb.AppendLine("Route tasks to workers based on their specialization. If a worker has a described role, assign tasks that match their expertise."); + sb.AppendLine(); + sb.AppendLine("## User Request"); + sb.AppendLine(userPrompt); + if (!string.IsNullOrEmpty(additionalInstructions)) + { + sb.AppendLine(); + sb.AppendLine("## Additional Orchestration Instructions"); + sb.AppendLine(additionalInstructions); + } + if (!string.IsNullOrEmpty(routingContext)) + { + sb.AppendLine(); + sb.AppendLine("## Work Routing (from team definition)"); + sb.AppendLine(routingContext); + } + sb.AppendLine(); + sb.AppendLine("## Your Task"); + sb.AppendLine("Analyze the request and assign specific tasks to your workers. Use this exact format for each assignment:"); + sb.AppendLine(); + sb.AppendLine("@worker:worker-name"); + sb.AppendLine("Detailed task description for this worker."); + sb.AppendLine("@end"); + sb.AppendLine(); + sb.AppendLine("You may include your analysis and reasoning as normal text. Only the @worker/@end blocks will be dispatched."); + sb.AppendLine("If you can handle the request entirely yourself, just respond normally without any @worker blocks."); + return sb.ToString(); + } + + internal record TaskAssignment(string WorkerName, string Task); + + internal static List ParseTaskAssignments(string orchestratorResponse, List availableWorkers) + { + var assignments = new List(); + var pattern = @"@worker:(\S+)\s*([\s\S]*?)(?:@end|(?=@worker:)|$)"; + + foreach (Match match in Regex.Matches(orchestratorResponse, pattern, RegexOptions.IgnoreCase)) + { + var workerName = match.Groups[1].Value.Trim(); + var task = match.Groups[2].Value.Trim(); + if (string.IsNullOrEmpty(task)) continue; + + // Resolve worker name: exact match, then fuzzy + var resolved = availableWorkers.FirstOrDefault(w => + w.Equals(workerName, StringComparison.OrdinalIgnoreCase)); + if (resolved == null) + { + resolved = availableWorkers.FirstOrDefault(w => + w.Contains(workerName, StringComparison.OrdinalIgnoreCase) || + workerName.Contains(w, StringComparison.OrdinalIgnoreCase)); + } + if (resolved != null) + assignments.Add(new TaskAssignment(resolved, task)); + } + return assignments; + } + + private record WorkerResult(string WorkerName, string? Response, bool Success, string? Error, TimeSpan Duration); + + private async Task ExecuteWorkerAsync(string workerName, string task, string originalPrompt, CancellationToken cancellationToken) + { + var sw = System.Diagnostics.Stopwatch.StartNew(); + await EnsureSessionModelAsync(workerName, cancellationToken); + + // Use per-worker system prompt if set, otherwise generic. + // Note: .github/copilot-instructions.md is auto-loaded by the SDK for each session's working directory, + // so workers already inherit repo-level copilot instructions without explicit injection here. + var meta = GetSessionMeta(workerName); + var identity = !string.IsNullOrEmpty(meta?.SystemPrompt) + ? meta.SystemPrompt + : "You are a worker agent. Complete the following task thoroughly."; + + // Inject shared context (e.g., Squad decisions.md) if the group has it + var group = meta != null ? Organization.Groups.FirstOrDefault(g => g.Id == meta.GroupId) : null; + var sharedPrefix = !string.IsNullOrEmpty(group?.SharedContext) + ? $"## Team Context (shared knowledge)\n{group.SharedContext}\n\n" + : ""; + + var workerPrompt = $"{identity}\n\nYour response will be collected and synthesized with other workers' responses.\n\n{sharedPrefix}## Original User Request (context)\n{originalPrompt}\n\n## Your Assigned Task\n{task}"; + + try + { + var response = await SendPromptAndWaitAsync(workerName, workerPrompt, cancellationToken); + return new WorkerResult(workerName, response, true, null, sw.Elapsed); + } + catch (Exception ex) + { + return new WorkerResult(workerName, null, false, ex.Message, sw.Elapsed); + } + } + + private async Task SendPromptAndWaitAsync(string sessionName, string prompt, CancellationToken cancellationToken) + { + // Use SendPromptAsync directly β€” it already awaits ResponseCompletion internally. + // Do NOT capture state and await its TCS separately: reconnection replaces the state + // object, orphaning the old TCS and causing a 10-minute hang. + using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); + cts.CancelAfter(TimeSpan.FromMinutes(10)); + return await SendPromptAsync(sessionName, prompt, cancellationToken: cts.Token); + } + + private string BuildSynthesisPrompt(string originalPrompt, List results) + { + var sb = new System.Text.StringBuilder(); + sb.AppendLine("## Worker Results"); + sb.AppendLine(); + foreach (var result in results) + { + sb.AppendLine($"### {result.WorkerName} ({(result.Success ? "βœ… completed" : "❌ failed")}, {result.Duration.TotalSeconds:F1}s)"); + if (result.Success) + sb.AppendLine(result.Response); + else + sb.AppendLine($"*Error: {result.Error}*"); + sb.AppendLine(); + } + sb.AppendLine("## Instructions"); + sb.AppendLine($"Original request: {originalPrompt}"); + sb.AppendLine(); + sb.AppendLine("Synthesize these worker responses into a coherent final answer. Note any tasks that failed. Provide a unified response addressing the original request."); + return sb.ToString(); + } + + private void AddOrchestratorSystemMessage(string sessionName, string message) + { + var session = GetSession(sessionName); + if (session != null) + { + session.History.Add(ChatMessage.SystemMessage(message)); + InvokeOnUI(() => OnStateChanged?.Invoke()); + } + } + + /// + /// Get the progress of a multi-agent group (how many sessions have completed their current turn). + /// + public (int Total, int Completed, int Processing, List CompletedNames) GetMultiAgentProgress(string groupId) + { + var members = GetMultiAgentGroupMembers(groupId); + var completed = new List(); + int processing = 0; + + foreach (var name in members) + { + var session = GetSession(name); + if (session == null) continue; + + if (session.IsProcessing) + processing++; + else + completed.Add(name); + } + + return (members.Count, completed.Count, processing, completed); + } + + #endregion + + #region Per-Agent Model Assignment + + /// + /// Set the preferred model for a session in a multi-agent group. + /// The model is applied at dispatch time via EnsureSessionModelAsync. + /// + public void SetSessionPreferredModel(string sessionName, string? modelSlug) + { + var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == sessionName); + if (meta == null) return; + meta.PreferredModel = modelSlug != null ? Models.ModelHelper.NormalizeToSlug(modelSlug) : null; + SaveOrganization(); + OnStateChanged?.Invoke(); + } + + public void SetSessionSystemPrompt(string sessionName, string? systemPrompt) + { + var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == sessionName); + if (meta == null) return; + meta.SystemPrompt = string.IsNullOrWhiteSpace(systemPrompt) ? null : systemPrompt.Trim(); + SaveOrganization(); + OnStateChanged?.Invoke(); + } + + /// + /// Returns the model a session will use: PreferredModel if set, else live AgentSessionInfo.Model. + /// + public string GetEffectiveModel(string sessionName) + { + var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == sessionName); + if (meta?.PreferredModel != null) return meta.PreferredModel; + var session = GetSession(sessionName); + return session?.Model ?? DefaultModel; + } + + /// + /// Create a multi-agent group from a preset template, creating sessions with assigned models. + /// + public async Task CreateGroupFromPresetAsync(Models.GroupPreset preset, string? workingDirectory = null, string? worktreeId = null, string? repoId = null, CancellationToken ct = default) + { + var group = CreateMultiAgentGroup(preset.Name, preset.Mode, worktreeId: worktreeId, repoId: repoId); + if (group == null) return null; + + // Store Squad context (routing, decisions) on the group for use during orchestration + group.SharedContext = preset.SharedContext; + group.RoutingContext = preset.RoutingContext; + + // Create orchestrator session + var orchName = $"{preset.Name}-orchestrator"; + try + { + await CreateSessionAsync(orchName, preset.OrchestratorModel, workingDirectory, ct); + MoveSession(orchName, group.Id); + SetSessionRole(orchName, MultiAgentRole.Orchestrator); + SetSessionPreferredModel(orchName, preset.OrchestratorModel); + if (worktreeId != null) + { + var meta = GetSessionMeta(orchName); + if (meta != null) meta.WorktreeId = worktreeId; + } + } + catch (Exception ex) + { + Debug($"Failed to create orchestrator session: {ex.Message}"); + } + + // Create worker sessions + for (int i = 0; i < preset.WorkerModels.Length; i++) + { + var workerName = $"{preset.Name}-worker-{i + 1}"; + var workerModel = preset.WorkerModels[i]; + try + { + await CreateSessionAsync(workerName, workerModel, workingDirectory, ct); + MoveSession(workerName, group.Id); + SetSessionPreferredModel(workerName, workerModel); + // Apply per-worker system prompt from preset if available + var systemPrompt = preset.WorkerSystemPrompts != null && i < preset.WorkerSystemPrompts.Length + ? preset.WorkerSystemPrompts[i] : null; + var meta = GetSessionMeta(workerName); + if (meta != null) + { + if (worktreeId != null) meta.WorktreeId = worktreeId; + if (systemPrompt != null) meta.SystemPrompt = systemPrompt; + } + } + catch (Exception ex) + { + Debug($"Failed to create worker session '{workerName}': {ex.Message}"); + } + } + + SaveOrganization(); + OnStateChanged?.Invoke(); + return group; + } + + /// + /// Ensures a session's live model matches its PreferredModel before dispatch. + /// Uses per-session semaphore to prevent concurrent model switches. + /// No-op if PreferredModel is null or already matches. + /// + private async Task EnsureSessionModelAsync(string sessionName, CancellationToken ct) + { + var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == sessionName); + if (meta?.PreferredModel == null) return; + + var session = GetSession(sessionName); + if (session == null) return; + + var currentSlug = Models.ModelHelper.NormalizeToSlug(session.Model); + if (currentSlug == meta.PreferredModel) return; + + var semaphore = _modelSwitchLocks.GetOrAdd(sessionName, _ => new SemaphoreSlim(1, 1)); + await semaphore.WaitAsync(ct); + try + { + // Re-check after acquiring lock β€” another dispatch may have already switched + currentSlug = Models.ModelHelper.NormalizeToSlug(GetSession(sessionName)?.Model ?? ""); + if (currentSlug == meta.PreferredModel) return; + + await ChangeModelAsync(sessionName, meta.PreferredModel, ct); + Debug($"Switched '{sessionName}' model to '{meta.PreferredModel}' for multi-agent dispatch"); + } + catch (Exception ex) + { + Debug($"Failed to switch model for '{sessionName}': {ex.Message}"); + } + finally + { + semaphore.Release(); + } + } + + #endregion + + #region OrchestratorReflect Loop + + /// + /// Start a reflection loop on a multi-agent group. + /// + public void StartGroupReflection(string groupId, string goal, int maxIterations = 5) + { + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId && g.IsMultiAgent); + if (group == null) return; + + group.ReflectionState = ReflectionCycle.Create(goal, maxIterations); + group.OrchestratorMode = MultiAgentMode.OrchestratorReflect; + SaveOrganization(); + OnStateChanged?.Invoke(); + } + + /// + /// Stop an active group reflection loop. + /// + public void StopGroupReflection(string groupId) + { + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId); + if (group?.ReflectionState == null) return; + + group.ReflectionState.IsActive = false; + group.ReflectionState.IsCancelled = true; + group.ReflectionState.CompletedAt = DateTime.Now; + SaveOrganization(); + OnStateChanged?.Invoke(); + } + + /// + /// Pause/resume a group reflection loop. + /// + public void PauseGroupReflection(string groupId, bool paused) + { + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId); + if (group?.ReflectionState == null) return; + group.ReflectionState.IsPaused = paused; + SaveOrganization(); + OnStateChanged?.Invoke(); + } + + private async Task SendViaOrchestratorReflectAsync(string groupId, List members, string prompt, CancellationToken ct) + { + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId); + if (group == null) return; + + var reflectState = group.ReflectionState; + if (reflectState == null || !reflectState.IsActive) + { + // Not in reflect mode β€” fall back to regular orchestrator + await SendViaOrchestratorAsync(groupId, members, prompt, ct); + return; + } + + var orchestratorName = GetOrchestratorSession(groupId); + if (orchestratorName == null) + { + await SendBroadcastAsync(group, members, prompt, ct); + return; + } + + var workerNames = members.Where(m => m != orchestratorName).ToList(); + + while (reflectState.IsActive && !reflectState.IsPaused + && reflectState.CurrentIteration < reflectState.MaxIterations) + { + ct.ThrowIfCancellationRequested(); + reflectState.CurrentIteration++; + + try + { + Debug($"Reflection loop: starting iteration {reflectState.CurrentIteration}/{reflectState.MaxIterations} " + + $"(IsActive={reflectState.IsActive}, IsPaused={reflectState.IsPaused})"); + // Phase 1: Plan (first iteration) or Re-plan (subsequent) + var iterDetail = $"Iteration {reflectState.CurrentIteration}/{reflectState.MaxIterations}"; + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Planning, iterDetail)); + + string planPrompt; + if (reflectState.CurrentIteration == 1) + { + planPrompt = BuildOrchestratorPlanningPrompt(prompt, workerNames, group.OrchestratorPrompt, group.RoutingContext); + } + else + { + planPrompt = BuildReplanPrompt(reflectState.LastEvaluation ?? "Continue iterating.", workerNames, prompt); + } + + var planResponse = await SendPromptAndWaitAsync(orchestratorName, planPrompt, ct); + var rawAssignments = ParseTaskAssignments(planResponse, workerNames); + // Deduplicate: merge multiple tasks for the same worker into one prompt + var assignments = rawAssignments + .GroupBy(a => a.WorkerName, StringComparer.OrdinalIgnoreCase) + .Select(g => new TaskAssignment(g.Key, string.Join("\n\n---\n\n", g.Select(a => a.Task)))) + .ToList(); + + if (assignments.Count == 0) + { + if (reflectState.CurrentIteration == 1) + { + // First iteration with no assignments = orchestrator failed to delegate. + // Treat as error, not goal met, so we can retry. + AddOrchestratorSystemMessage(orchestratorName, + "⚠️ No @worker assignments parsed from orchestrator response. Retrying..."); + reflectState.ConsecutiveErrors++; + if (reflectState.ConsecutiveErrors >= 3) + { + reflectState.IsStalled = true; + reflectState.IsCancelled = true; + break; + } + continue; + } + // Later iterations: orchestrator decided no more work needed + reflectState.GoalMet = true; + AddOrchestratorSystemMessage(orchestratorName, $"βœ… Orchestrator completed without delegation (iteration {reflectState.CurrentIteration})."); + break; + } + + // Phase 2-3: Dispatch + Collect + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Dispatching, + $"Sending tasks to {assignments.Count} worker(s) β€” {iterDetail}")); + + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.WaitingForWorkers, iterDetail)); + + var workerTasks = assignments.Select(a => ExecuteWorkerAsync(a.WorkerName, a.Task, prompt, ct)); + var results = await Task.WhenAll(workerTasks); + + // Phase 4: Synthesize + Evaluate + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Synthesizing, iterDetail)); + + var synthEvalPrompt = BuildSynthesisWithEvalPrompt(prompt, results.ToList(), reflectState); + + // Use dedicated evaluator session if configured, otherwise orchestrator self-evaluates + string evaluatorName = reflectState.EvaluatorSessionName ?? orchestratorName; + string synthesisResponse; + if (reflectState.EvaluatorSessionName != null && reflectState.EvaluatorSessionName != orchestratorName) + { + // Send results to orchestrator for synthesis + var synthOnlyPrompt = BuildSynthesisOnlyPrompt(prompt, results.ToList()); + synthesisResponse = await SendPromptAndWaitAsync(orchestratorName, synthOnlyPrompt, ct); + + // Send to evaluator for independent scoring + var evalOnlyPrompt = BuildEvaluatorPrompt(prompt, synthesisResponse, reflectState); + var evalResponse = await SendPromptAndWaitAsync(evaluatorName, evalOnlyPrompt, ct); + + // Parse score from evaluator + var (score, rationale) = ParseEvaluationScore(evalResponse); + var evaluatorModel = GetEffectiveModel(evaluatorName); + var trend = reflectState.RecordEvaluation(reflectState.CurrentIteration, score, rationale, evaluatorModel); + + // Check if evaluator says complete + if (evalResponse.Contains("[[GROUP_REFLECT_COMPLETE]]", StringComparison.OrdinalIgnoreCase) || score >= 0.9) + { + reflectState.GoalMet = true; + reflectState.IsActive = false; + AddOrchestratorSystemMessage(orchestratorName, $"βœ… {reflectState.BuildCompletionSummary()} (score: {score:F1})"); + break; + } + + reflectState.LastEvaluation = rationale; + if (trend == Models.QualityTrend.Degrading) + reflectState.PendingAdjustments.Add("πŸ“‰ Quality degrading β€” consider changing worker models or refining the goal."); + } + else + { + synthesisResponse = await SendPromptAndWaitAsync(orchestratorName, synthEvalPrompt, ct); + + // Check completion sentinel + if (synthesisResponse.Contains("[[GROUP_REFLECT_COMPLETE]]", StringComparison.OrdinalIgnoreCase)) + { + reflectState.GoalMet = true; + reflectState.IsActive = false; + AddOrchestratorSystemMessage(orchestratorName, $"βœ… {reflectState.BuildCompletionSummary()}"); + break; + } + + // Extract evaluation for next iteration + reflectState.LastEvaluation = ExtractIterationEvaluation(synthesisResponse); + + // Record a self-eval score (estimated from sentinel presence) + var selfScore = synthesisResponse.Contains("[[NEEDS_ITERATION]]", StringComparison.OrdinalIgnoreCase) ? 0.4 : 0.7; + reflectState.RecordEvaluation(reflectState.CurrentIteration, selfScore, + reflectState.LastEvaluation ?? "", GetEffectiveModel(orchestratorName)); + } + + // Auto-adjustment: analyze worker results and suggest/apply changes + AutoAdjustFromFeedback(groupId, group, results.ToList(), reflectState); + + // Stall detection β€” use 2-consecutive tolerance like single-agent Advance() + if (reflectState.CheckStall(synthesisResponse)) + { + reflectState.ConsecutiveStalls++; + if (reflectState.ConsecutiveStalls >= 2) + { + reflectState.IsStalled = true; + reflectState.IsCancelled = true; + AddOrchestratorSystemMessage(orchestratorName, $"⚠️ {reflectState.BuildCompletionSummary()}"); + break; + } + // First stall: warn but continue + reflectState.PendingAdjustments.Add("⚠️ Output similarity detected β€” may be stalling. Will stop if it repeats."); + } + else + { + reflectState.ConsecutiveStalls = 0; + reflectState.ConsecutiveErrors = 0; + } + + SaveOrganization(); + InvokeOnUI(() => OnStateChanged?.Invoke()); + + } // end try + catch (OperationCanceledException) + { + reflectState.IsCancelled = true; + throw; + } + catch (Exception ex) + { + Debug($"Reflection iteration {reflectState.CurrentIteration} error: {ex.GetType().Name}: {ex.Message}"); + // Decrement so we retry the same iteration, not skip ahead + reflectState.CurrentIteration--; + // But limit retries per iteration to 3 (uses separate error counter) + reflectState.ConsecutiveErrors++; + if (reflectState.ConsecutiveErrors >= 3) + { + reflectState.IsStalled = true; + reflectState.IsCancelled = true; + AddOrchestratorSystemMessage(orchestratorName, + $"⚠️ Iteration failed after retries: {ex.Message}"); + break; + } + AddOrchestratorSystemMessage(orchestratorName, + $"⚠️ Iteration {reflectState.CurrentIteration + 1} error: {ex.Message}. Retrying..."); + InvokeOnUI(() => OnStateChanged?.Invoke()); + await Task.Delay(2000, ct); + } + } + + if (!reflectState.GoalMet && !reflectState.IsStalled && !reflectState.IsPaused) + { + // Max-iteration exit without goal met β€” mark as cancelled so callers + // can distinguish "ran out of iterations" from "succeeded". + reflectState.IsCancelled = true; + AddOrchestratorSystemMessage(orchestratorName, $"⏱️ {reflectState.BuildCompletionSummary()}"); + } + + reflectState.IsActive = false; + reflectState.CompletedAt = DateTime.Now; + SaveOrganization(); + InvokeOnUI(() => + { + OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Complete, reflectState.BuildCompletionSummary()); + OnStateChanged?.Invoke(); + }); + } + + private string BuildSynthesisWithEvalPrompt(string originalPrompt, List results, ReflectionCycle state) + { + var sb = new System.Text.StringBuilder(); + sb.Append(BuildSynthesisPrompt(originalPrompt, results)); + sb.AppendLine(); + sb.AppendLine($"## Evaluation Check (Iteration {state.CurrentIteration}/{state.MaxIterations})"); + sb.AppendLine($"**Goal:** {state.Goal}"); + sb.AppendLine(); + sb.AppendLine("### Quality Assessment"); + sb.AppendLine("Before deciding, evaluate each worker's output:"); + sb.AppendLine("1. **Completeness** β€” Did they fully address their assigned task?"); + sb.AppendLine("2. **Correctness** β€” Is the output accurate and well-reasoned?"); + sb.AppendLine("3. **Relevance** β€” Does it contribute meaningfully toward the goal?"); + sb.AppendLine(); + if (state.CurrentIteration > 1 && state.LastEvaluation != null) + { + sb.AppendLine("### Previous Iteration Feedback"); + sb.AppendLine(state.LastEvaluation); + sb.AppendLine(); + sb.AppendLine("Check whether the identified gaps have been addressed in this iteration."); + sb.AppendLine(); + } + sb.AppendLine("### Decision"); + sb.AppendLine("- If the combined output **fully satisfies** the goal: Include `[[GROUP_REFLECT_COMPLETE]]` with a summary."); + sb.AppendLine("- If **not yet complete**: Include `[[NEEDS_ITERATION]]` followed by:"); + sb.AppendLine(" 1. What specific gaps remain (be precise)"); + sb.AppendLine(" 2. Whether quality improved, degraded, or stalled vs. previous iteration"); + sb.AppendLine(" 3. Revised `@worker:name` / `@end` blocks for the next iteration"); + if (state.CurrentIteration >= state.MaxIterations - 1) + { + sb.AppendLine(); + sb.AppendLine($"⚠️ This is iteration {state.CurrentIteration} of {state.MaxIterations}. If close to the goal, consider completing with what you have rather than requesting another iteration."); + } + return sb.ToString(); + } + + private string BuildReplanPrompt(string lastEvaluation, List workerNames, string originalPrompt) + { + var sb = new System.Text.StringBuilder(); + sb.AppendLine("## Previous Iteration Evaluation"); + sb.AppendLine(lastEvaluation); + sb.AppendLine(); + sb.AppendLine("## Original Request (context)"); + sb.AppendLine(originalPrompt); + sb.AppendLine(); + sb.AppendLine($"Available workers ({workerNames.Count}):"); + foreach (var w in workerNames) + sb.AppendLine($" - '{w}' (model: {GetEffectiveModel(w)})"); + sb.AppendLine(); + sb.AppendLine("Assign refined tasks using `@worker:name` / `@end` blocks to address the gaps identified above."); + return sb.ToString(); + } + + private static string ExtractIterationEvaluation(string response) + { + // Extract text after [[NEEDS_ITERATION]] marker, or use full response as evaluation + var idx = response.IndexOf("[[NEEDS_ITERATION]]", StringComparison.OrdinalIgnoreCase); + if (idx >= 0) + { + var afterMarker = response[(idx + "[[NEEDS_ITERATION]]".Length)..].Trim(); + // Take text up to first @worker block as the evaluation + var workerIdx = afterMarker.IndexOf("@worker:", StringComparison.OrdinalIgnoreCase); + return workerIdx >= 0 ? afterMarker[..workerIdx].Trim() : afterMarker; + } + // No marker β€” use last paragraph as evaluation + var lines = response.Split('\n'); + return string.Join('\n', lines.TakeLast(5)).Trim(); + } + + /// Build a synthesis-only prompt (no evaluation decision) for use with separate evaluator. + private string BuildSynthesisOnlyPrompt(string originalPrompt, List results) + { + var sb = new System.Text.StringBuilder(); + sb.Append(BuildSynthesisPrompt(originalPrompt, results)); + sb.AppendLine(); + sb.AppendLine("Synthesize the worker outputs into a unified, coherent response. Do NOT make a completion decision β€” an independent evaluator will assess quality separately."); + return sb.ToString(); + } + + /// Build a prompt for an independent evaluator session to score synthesis quality. + private static string BuildEvaluatorPrompt(string originalGoal, string synthesisResponse, ReflectionCycle state) + { + var sb = new System.Text.StringBuilder(); + sb.AppendLine("## Independent Quality Evaluation"); + sb.AppendLine($"**Goal:** {state.Goal}"); + sb.AppendLine($"**Iteration:** {state.CurrentIteration}/{state.MaxIterations}"); + sb.AppendLine(); + sb.AppendLine("### Synthesized Output to Evaluate"); + sb.AppendLine(synthesisResponse); + sb.AppendLine(); + sb.AppendLine("### Scoring Rubric"); + sb.AppendLine("Rate the output on a 0.0–1.0 scale across these dimensions:"); + sb.AppendLine("1. **Completeness** (0-1): Does it fully address the goal?"); + sb.AppendLine("2. **Correctness** (0-1): Is it accurate and well-reasoned?"); + sb.AppendLine("3. **Coherence** (0-1): Is the synthesis well-organized?"); + sb.AppendLine("4. **Actionability** (0-1): Can the user act on this output?"); + sb.AppendLine(); + if (state.EvaluationHistory.Count > 0) + { + var last = state.EvaluationHistory.Last(); + sb.AppendLine($"Previous iteration scored: {last.Score:F1} β€” {last.Rationale}"); + sb.AppendLine("Indicate whether quality improved, degraded, or stayed flat."); + sb.AppendLine(); + } + sb.AppendLine("### Response Format"); + sb.AppendLine("SCORE: "); + sb.AppendLine("RATIONALE: <2-3 sentences explaining the score and gaps>"); + sb.AppendLine(); + sb.AppendLine("If score >= 0.9, include `[[GROUP_REFLECT_COMPLETE]]`."); + sb.AppendLine("If score < 0.9, include `[[NEEDS_ITERATION]]` and list specific improvements needed."); + return sb.ToString(); + } + + /// Parse a score and rationale from evaluator response. + internal static (double Score, string Rationale) ParseEvaluationScore(string evalResponse) + { + double score = 0.5; // default if parsing fails + string rationale = evalResponse; + + // Try to find "SCORE: X.X" pattern + var scoreMatch = System.Text.RegularExpressions.Regex.Match(evalResponse, @"SCORE:\s*(-?[\d.]+)", System.Text.RegularExpressions.RegexOptions.IgnoreCase); + if (scoreMatch.Success && double.TryParse(scoreMatch.Groups[1].Value, System.Globalization.NumberStyles.Float, + System.Globalization.CultureInfo.InvariantCulture, out var parsed)) + { + score = Math.Clamp(parsed, 0.0, 1.0); + } + + // Extract rationale + var rationaleMatch = System.Text.RegularExpressions.Regex.Match(evalResponse, @"RATIONALE:\s*(.+?)(?:\[\[|$)", System.Text.RegularExpressions.RegexOptions.IgnoreCase | System.Text.RegularExpressions.RegexOptions.Singleline); + if (rationaleMatch.Success) + rationale = rationaleMatch.Groups[1].Value.Trim(); + + return (score, rationale); + } + + /// + /// Auto-adjust agent configuration based on iteration feedback. + /// Called after each reflect iteration to detect quality issues and apply fixes. + /// Surfaces adjustments both as orchestrator system messages and as PendingAdjustments on state (for UI banners). + /// + private void AutoAdjustFromFeedback(string groupId, SessionGroup group, List results, ReflectionCycle state) + { + var failedWorkers = results.Where(r => !r.Success).ToList(); + var adjustments = new List(); + + // Auto-reassign tasks from failed workers to successful ones + if (failedWorkers.Count > 0 && results.Any(r => r.Success)) + { + foreach (var failed in failedWorkers) + { + adjustments.Add($"πŸ”„ Worker '{failed.WorkerName}' failed ({failed.Error}). Its tasks will be reassigned in the next iteration."); + } + } + + // Detect workers with suspiciously short responses (quality issue) + foreach (var result in results.Where(r => r.Success)) + { + if (result.Response != null && result.Response.Length < 100 && state.CurrentIteration > 1) + { + var caps = Models.ModelCapabilities.GetCapabilities(GetEffectiveModel(result.WorkerName)); + if (caps.HasFlag(Models.ModelCapability.CostEfficient) && !caps.HasFlag(Models.ModelCapability.ReasoningExpert)) + { + adjustments.Add($"πŸ“ˆ Worker '{result.WorkerName}' produced a brief response. Consider upgrading from a cost-efficient model to improve quality."); + } + } + } + + // Detect quality degradation from evaluation history + if (state.EvaluationHistory.Count >= 2) + { + var lastTwo = state.EvaluationHistory.TakeLast(2).ToList(); + if (lastTwo[1].Score < lastTwo[0].Score - 0.15) + adjustments.Add("πŸ“‰ Quality degraded significantly vs. previous iteration. Review worker models or task clarity."); + } + + // Detect quality degradation: if consecutive stalls detected, suggest model changes + if (state.ConsecutiveStalls == 1) + { + adjustments.Add("⚠️ Output repetition detected. The orchestrator may benefit from a different model or clearer instructions."); + } + + // Surface adjustments for UI banners (non-blocking) + state.PendingAdjustments.Clear(); + state.PendingAdjustments.AddRange(adjustments); + + // Surface adjustments as system messages to orchestrator + if (adjustments.Count > 0) + { + var orchestratorName = GetOrchestratorSession(groupId); + if (orchestratorName != null) + { + AddOrchestratorSystemMessage(orchestratorName, + $"πŸ”§ Auto-analysis (iteration {state.CurrentIteration}):\n" + string.Join("\n", adjustments)); + } + } + } + + /// + /// Get diagnostics for a multi-agent group (model conflicts, capability gaps). + /// + public List GetGroupDiagnostics(string groupId) + { + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId); + if (group == null || !group.IsMultiAgent) return new(); + + var members = GetMultiAgentGroupMembers(groupId) + .Select(name => + { + var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == name); + return (name, GetEffectiveModel(name), meta?.Role ?? MultiAgentRole.Worker); + }) + .ToList(); + + return Models.GroupModelAnalyzer.Analyze(group, members); + } + + /// + /// Save the current multi-agent group configuration as a reusable user preset. + /// + public Models.GroupPreset? SaveGroupAsPreset(string groupId, string name, string description, string emoji) + { + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId && g.IsMultiAgent); + if (group == null) return null; + + var members = GetMultiAgentGroupMembers(groupId) + .Select(n => Organization.Sessions.FirstOrDefault(m => m.SessionName == n)) + .Where(m => m != null) + .ToList(); + + // Resolve worktree path for .squad/ write-back + string? worktreeRoot = null; + if (!string.IsNullOrEmpty(group.WorktreeId)) + { + var wt = _repoManager.Worktrees.FirstOrDefault(w => w.Id == group.WorktreeId); + if (wt != null) worktreeRoot = wt.Path; + } + + return Models.UserPresets.SaveGroupAsPreset(PolyPilotBaseDir, name, description, emoji, + group, members!, GetEffectiveModel, worktreeRoot); + } + + #endregion } diff --git a/PolyPilot/Services/CopilotService.Persistence.cs b/PolyPilot/Services/CopilotService.Persistence.cs index 03c1c02841..21613abef5 100644 --- a/PolyPilot/Services/CopilotService.Persistence.cs +++ b/PolyPilot/Services/CopilotService.Persistence.cs @@ -155,10 +155,32 @@ public async Task RestorePreviousSessionsAsync(CancellationToken cancellationTok Debug($"Restoring {entries.Count} previous sessions..."); IsRestoring = true; + // Collect evaluator session names referenced by active reflection cycles + var activeEvaluators = new HashSet(StringComparer.OrdinalIgnoreCase); + foreach (var g in Organization.Groups) + { + if (g.ReflectionState?.IsActive == true && !string.IsNullOrEmpty(g.ReflectionState.EvaluatorSessionName)) + activeEvaluators.Add(g.ReflectionState.EvaluatorSessionName); + } + foreach (var entry in entries) { try { + // Prune ghost evaluator sessions from crashed cycles + if (entry.DisplayName.StartsWith("__evaluator_") && !activeEvaluators.Contains(entry.DisplayName)) + { + Debug($"Pruning ghost evaluator session '{entry.DisplayName}' β€” not referenced by active cycle"); + _closedSessionIds[entry.SessionId] = 0; // prevent merge from re-adding + // Clean up persisted session directory + var ghostDir = Path.Combine(SessionStatePath, entry.SessionId); + if (Directory.Exists(ghostDir)) + { + try { Directory.Delete(ghostDir, recursive: true); } + catch (Exception delEx) { Debug($"Failed to delete ghost session dir: {delEx.Message}"); } + } + continue; + } // Skip if already active if (_sessions.ContainsKey(entry.DisplayName)) { diff --git a/PolyPilot/Services/CopilotService.cs b/PolyPilot/Services/CopilotService.cs index 269a04ac76..ab4c99ef2a 100644 --- a/PolyPilot/Services/CopilotService.cs +++ b/PolyPilot/Services/CopilotService.cs @@ -21,6 +21,7 @@ public partial class CopilotService : IAsyncDisposable private readonly ConcurrentDictionary _closedSessionIds = new(); // Image paths queued alongside messages when session is busy (keyed by session name, list per queued message) private readonly ConcurrentDictionary>> _queuedImagePaths = new(); + private readonly object _imageQueueLock = new(); private static readonly object _diagnosticLogLock = new(); // Debounce timers for disk I/O β€” coalesce rapid-fire saves into a single write private Timer? _saveSessionsDebounce; @@ -42,6 +43,7 @@ public partial class CopilotService : IAsyncDisposable private static string? _polyPilotBaseDir; private static string PolyPilotBaseDir => _polyPilotBaseDir ??= GetPolyPilotBaseDir(); + internal static string BaseDir => PolyPilotBaseDir; private static string GetCopilotBaseDir() { @@ -225,6 +227,12 @@ private class SessionState /// that produced the SessionIdleEvent (race between SEND and queued COMPLETE). /// public long ProcessingGeneration; + /// + /// Atomic flag for SendPromptAsync entry. Prevents TOCTOU race where two + /// concurrent callers both see IsProcessing=false and both enter. + /// 0 = idle, 1 = sending. Set via Interlocked.CompareExchange. + /// + public int SendingFlag; } private void Debug(string message) @@ -459,7 +467,10 @@ public async Task ReconnectAsync(ConnectionSettings settings, CancellationToken } _sessions.Clear(); _closedSessionIds.Clear(); - _queuedImagePaths.Clear(); + lock (_imageQueueLock) + { + _queuedImagePaths.Clear(); + } _activeSessionName = null; if (_client != null) @@ -1188,7 +1199,7 @@ public async Task ResumeSessionAsync(string sessionId, string // stuck sessions β€” no separate short timeout needed. if (isStillProcessing) { - state.ResponseCompletion = new TaskCompletionSource(); + state.ResponseCompletion = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); Debug($"Session '{displayName}' is still processing (was mid-turn when app restarted)"); // Start the processing watchdog so the session doesn't get stuck @@ -1197,7 +1208,6 @@ public async Task ResumeSessionAsync(string sessionId, string } - if (!_sessions.TryAdd(displayName, state)) { try { await copilotSession.DisposeAsync(); } catch { } @@ -1495,6 +1505,13 @@ public async Task SendPromptAsync(string sessionName, string prompt, Lis if (state.Info.IsProcessing) throw new InvalidOperationException("Session is already processing a request."); + // Atomic check-and-set to prevent TOCTOU race: two callers could both see + // IsProcessing=false and both enter without this guard. + if (Interlocked.CompareExchange(ref state.SendingFlag, 1, 0) != 0) + throw new InvalidOperationException("Session is already processing a request."); + + try + { state.Info.IsProcessing = true; state.Info.ProcessingStartedAt = DateTime.UtcNow; state.Info.ToolCallCount = 0; @@ -1503,7 +1520,7 @@ public async Task SendPromptAsync(string sessionName, string prompt, Lis Interlocked.Exchange(ref state.ActiveToolCallCount, 0); // Reset stale tool count from previous turn state.HasUsedToolsThisTurn = false; // Reset stale tool flag from previous turn Debug($"[SEND] '{sessionName}' IsProcessing=true gen={Interlocked.Read(ref state.ProcessingGeneration)} (thread={Environment.CurrentManagedThreadId})"); - state.ResponseCompletion = new TaskCompletionSource(); + state.ResponseCompletion = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); state.CurrentResponse.Clear(); StartProcessingWatchdog(state, sessionName); @@ -1577,7 +1594,12 @@ public async Task SendPromptAsync(string sessionName, string prompt, Lis Session = newSession, Info = state.Info }; - newState.ResponseCompletion = new TaskCompletionSource(); + newState.ResponseCompletion = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + // Carry forward ProcessingGeneration so stale callbacks on the + // orphaned old state can't pass generation checks on the new state. + Interlocked.Exchange(ref newState.ProcessingGeneration, + Interlocked.Read(ref state.ProcessingGeneration)); + newState.HasUsedToolsThisTurn = state.HasUsedToolsThisTurn; newSession.On(evt => HandleSessionEvent(newState, evt)); _sessions[sessionName] = newState; state = newState; @@ -1641,6 +1663,13 @@ await state.Session.SendAsync(new MessageOptions if (state.ResponseCompletion == null) return ""; // Response already completed via events return await state.ResponseCompletion.Task; + } + catch + { + // Reset atomic send flag on any exception so the session isn't permanently locked + Interlocked.Exchange(ref state.SendingFlag, 0); + throw; + } } public async Task AbortSessionAsync(string sessionName) @@ -1732,11 +1761,13 @@ public void EnqueueMessage(string sessionName, string prompt, List? imag // Track image paths alongside the queued message if (imagePaths != null && imagePaths.Count > 0) { - var queue = _queuedImagePaths.GetOrAdd(sessionName, _ => new List>()); - // Pad with empty lists for any prior messages without images - while (queue.Count < state.Info.MessageQueue.Count - 1) - queue.Add(new List()); - queue.Add(imagePaths); + lock (_imageQueueLock) + { + var queue = _queuedImagePaths.GetOrAdd(sessionName, _ => new List>()); + while (queue.Count < state.Info.MessageQueue.Count - 1) + queue.Add(new List()); + queue.Add(imagePaths); + } } OnStateChanged?.Invoke(); @@ -1789,11 +1820,14 @@ public void RemoveQueuedMessage(string sessionName, int index) { state.Info.MessageQueue.RemoveAt(index); // Keep queued image paths in sync - if (_queuedImagePaths.TryGetValue(sessionName, out var imageQueue) && index < imageQueue.Count) + lock (_imageQueueLock) { - imageQueue.RemoveAt(index); - if (imageQueue.Count == 0) - _queuedImagePaths.TryRemove(sessionName, out _); + if (_queuedImagePaths.TryGetValue(sessionName, out var imageQueue) && index < imageQueue.Count) + { + imageQueue.RemoveAt(index); + if (imageQueue.Count == 0) + _queuedImagePaths.TryRemove(sessionName, out _); + } } OnStateChanged?.Invoke(); } @@ -1804,7 +1838,10 @@ public void ClearQueue(string sessionName) if (_sessions.TryGetValue(sessionName, out var state)) { state.Info.MessageQueue.Clear(); - _queuedImagePaths.TryRemove(sessionName, out _); + lock (_imageQueueLock) + { + _queuedImagePaths.TryRemove(sessionName, out _); + } OnStateChanged?.Invoke(); } } @@ -1864,6 +1901,8 @@ public void StopReflectionCycle(string sessionName) { var evaluatorName = state.Info.ReflectionCycle.EvaluatorSessionName; state.Info.ReflectionCycle.IsActive = false; + state.Info.ReflectionCycle.IsCancelled = true; + state.Info.ReflectionCycle.CompletedAt = DateTime.Now; // Purge any queued reflection follow-up prompts to prevent zombie iterations state.Info.MessageQueue.RemoveAll(p => ReflectionCycle.IsReflectionFollowUpPrompt(p)); Debug($"Reflection cycle stopped for '{sessionName}'"); @@ -1951,8 +1990,11 @@ public bool RenameSession(string oldName, string newName) state.Info.Name = newName; // Move queued image paths to new name - if (_queuedImagePaths.TryRemove(oldName, out var imageQueue)) - _queuedImagePaths[newName] = imageQueue; + lock (_imageQueueLock) + { + if (_queuedImagePaths.TryRemove(oldName, out var imageQueue)) + _queuedImagePaths[newName] = imageQueue; + } if (!_sessions.TryAdd(newName, state)) { @@ -2019,7 +2061,14 @@ public async Task CloseSessionAsync(string name) return false; // Clean up any queued image paths for this session - _queuedImagePaths.TryRemove(name, out _); + lock (_imageQueueLock) + { + _queuedImagePaths.TryRemove(name, out _); + } + + // Clean up per-session model switch lock + if (_modelSwitchLocks.TryRemove(name, out var sem)) + sem.Dispose(); // Track as explicitly closed so merge doesn't re-add from file if (state.Info.SessionId != null) diff --git a/PolyPilot/Services/WsBridgeClient.cs b/PolyPilot/Services/WsBridgeClient.cs index ad11c3a431..ec3213554c 100644 --- a/PolyPilot/Services/WsBridgeClient.cs +++ b/PolyPilot/Services/WsBridgeClient.cs @@ -211,6 +211,14 @@ await SendAsync(BridgeMessage.Create(BridgeMessageTypes.RenameSession, public async Task SendOrganizationCommandAsync(OrganizationCommandPayload cmd, CancellationToken ct = default) => await SendAsync(BridgeMessage.Create(BridgeMessageTypes.OrganizationCommand, cmd), ct); + public async Task SendMultiAgentBroadcastAsync(string groupId, string message, CancellationToken ct = default) => + await SendAsync(BridgeMessage.Create(BridgeMessageTypes.MultiAgentBroadcast, + new MultiAgentBroadcastPayload { GroupId = groupId, Message = message }), ct); + + public async Task CreateMultiAgentGroupAsync(string name, string mode = "Broadcast", string? orchestratorPrompt = null, List? sessionNames = null, CancellationToken ct = default) => + await SendAsync(BridgeMessage.Create(BridgeMessageTypes.MultiAgentCreateGroup, + new MultiAgentCreateGroupPayload { Name = name, Mode = mode, OrchestratorPrompt = orchestratorPrompt, SessionNames = sessionNames }), ct); + private readonly System.Collections.Concurrent.ConcurrentDictionary> _dirListRequests = new(); private readonly System.Collections.Concurrent.ConcurrentDictionary> _addRepoRequests = new(); private readonly System.Collections.Concurrent.ConcurrentDictionary> _repoProgressCallbacks = new(); diff --git a/PolyPilot/Services/WsBridgeServer.cs b/PolyPilot/Services/WsBridgeServer.cs index f4cade2967..24c79bc88c 100644 --- a/PolyPilot/Services/WsBridgeServer.cs +++ b/PolyPilot/Services/WsBridgeServer.cs @@ -566,6 +566,32 @@ await SendToClientAsync(clientId, ws, BridgeMessage.Create(BridgeMessageTypes.DirectoriesList, dirResult), ct); break; + case BridgeMessageTypes.MultiAgentBroadcast: + var maReq = msg.GetPayload(); + if (maReq != null && _copilot != null) + { + _ = _copilot.SendToMultiAgentGroupAsync(maReq.GroupId, maReq.Message, ct); + } + break; + + case BridgeMessageTypes.MultiAgentCreateGroup: + var maCreateReq = msg.GetPayload(); + if (maCreateReq != null && _copilot != null) + { + var mode = Enum.TryParse(maCreateReq.Mode, out var m) ? m : MultiAgentMode.Broadcast; + _copilot.CreateMultiAgentGroup(maCreateReq.Name, mode, maCreateReq.OrchestratorPrompt, maCreateReq.SessionNames); + } + break; + + case BridgeMessageTypes.MultiAgentSetRole: + var maRoleReq = msg.GetPayload(); + if (maRoleReq != null && _copilot != null) + { + var role = Enum.TryParse(maRoleReq.Role, out var r) ? r : MultiAgentRole.Worker; + _copilot.SetSessionRole(maRoleReq.SessionName, role); + } + break; + case BridgeMessageTypes.FetchImage: var imgReq = msg.GetPayload(); if (imgReq != null) diff --git a/docs/multi-agent-orchestration.md b/docs/multi-agent-orchestration.md new file mode 100644 index 0000000000..cd973d8469 --- /dev/null +++ b/docs/multi-agent-orchestration.md @@ -0,0 +1,444 @@ +# Multi-Agent Orchestration β€” Architecture Spec + +> **Read this before modifying orchestration, sentinel protocol, session reconciliation, or reflection loops.** + +## Overview + +PolyPilot's multi-agent system lets you create a **team of AI sessions** that work together. Each session can use a different AI model. An orchestrator coordinates work dispatch, response collection, and quality evaluation. + +### Key Files + +| File | Purpose | +|------|---------| +| `PolyPilot/Services/CopilotService.Organization.cs` | Orchestration engine (dispatch, reflection loop, reconciliation, group deletion) | +| `PolyPilot/Models/SessionOrganization.cs` | `SessionGroup`, `SessionMeta`, `MultiAgentMode`, `MultiAgentRole` | +| `PolyPilot/Models/ReflectionCycle.cs` | Reflection state, stall detection, sentinel parsing, evaluator prompts | +| `PolyPilot/Models/ModelCapabilities.cs` | `GroupPreset`, `UserPresets` (three-tier merge), built-in presets | +| `PolyPilot/Models/SquadDiscovery.cs` | Squad directory parser (`.squad/` β†’ `GroupPreset`) | +| `PolyPilot/Models/SquadWriter.cs` | Squad directory writer (`GroupPreset` β†’ `.squad/`) | +| `PolyPilot/Services/CopilotService.Events.cs` | TCS completion (IsProcessing β†’ TrySetResult ordering) | +| `PolyPilot/Components/Layout/SessionSidebar.razor` | Preset picker UI (sectioned: From Repo / Built-in / My Presets) | +| `PolyPilot.Tests/MultiAgentRegressionTests.cs` | 37 regression tests covering all known bugs | +| `PolyPilot.Tests/SessionOrganizationTests.cs` | 15 grouping stability tests | +| `PolyPilot.Tests/SquadDiscoveryTests.cs` | 22 Squad discovery tests | +| `PolyPilot.Tests/SquadWriterTests.cs` | 15 Squad write-back tests | +| `PolyPilot.Tests/Scenarios/multi-agent-scenarios.json` | Executable CDP test scenarios | + +--- + +## Orchestration Modes + +### Broadcast +Same prompt sent to **all sessions simultaneously**. No orchestrator. Each session responds independently. Use for: comparing model outputs, getting diverse perspectives. + +### Sequential +Prompt sent to sessions **one at a time**. Each session sees previous responses. Use for: chain-of-thought across models, iterative refinement. + +### Orchestrator (Single-Pass) +One orchestrator session plans and delegates: +1. **Plan** β€” Orchestrator receives user prompt + list of available workers with their models +2. **Dispatch** β€” Orchestrator emits `@worker:name task` assignments, parsed by `ParseTaskAssignments` +3. **Collect** β€” Workers execute in parallel (`Task.WhenAll`), each with 10-min timeout +4. **Synthesize** β€” Worker results sent back to orchestrator for final synthesis + +No iteration. One pass through the loop. + +### OrchestratorReflect (Iterative β€” The Main Mode) +Same as Orchestrator but **loops** until the goal is met, quality stalls, or max iterations reached. This is the primary mode for serious multi-agent work. + +--- + +## OrchestratorReflect β€” Detailed Loop + +### Participants +- **1 Orchestrator** β€” Plans, delegates, synthesizes. Set via `SessionMeta.Role = Orchestrator` +- **N Workers** β€” Execute assigned tasks in parallel. Each can use a different model (`SessionMeta.PreferredModel`) and have a **system prompt** (`SessionMeta.SystemPrompt`) that defines their specialization +- **1 Evaluator** (optional) β€” Independent quality judge on a separate model (`ReflectionCycle.EvaluatorSessionName`) + +### The Loop (runs in `SendViaOrchestratorReflectAsync`) + +``` +while (IsActive && !IsPaused && CurrentIteration < MaxIterations): + CurrentIteration++ + + Phase 1: PLAN + β”œβ”€β”€ Iteration 1: BuildOrchestratorPlanningPrompt(userPrompt, workerNames) + └── Iteration 2+: BuildReplanPrompt(lastEvaluation, workerNames, userPrompt) + + Orchestrator responds with task assignments: + @worker:worker-1 Implement the auth module + @worker:worker-2 Write tests for the auth module + + ParseTaskAssignments extracts these β†’ List + If no assignments AND iteration == 1 β†’ error (retry up to 3 times) + If no assignments AND iteration > 1 β†’ orchestrator decided goal is met β†’ break + + Phase 2: DISPATCH + └── Send each assignment to its worker in parallel (Task.WhenAll) + Each worker gets: "You are a worker agent..." + original prompt + assigned task + + Phase 3: COLLECT + └── Wait for all workers (SendPromptAndWaitAsync, 10-min timeout per worker) + Returns List (response, success, duration) + + Phase 4: EVALUATE (two paths) + β”œβ”€β”€ WITH dedicated evaluator: + β”‚ β”œβ”€β”€ Orchestrator synthesizes worker results + β”‚ β”œβ”€β”€ Evaluator scores quality (0.0–1.0) with rationale + β”‚ β”œβ”€β”€ Score β‰₯ 0.9 or [[GROUP_REFLECT_COMPLETE]] β†’ goal met β†’ break + β”‚ └── RecordEvaluation tracks trend (Improving/Stable/Degrading) + β”‚ + └── SELF-evaluation (no evaluator): + β”œβ”€β”€ Orchestrator gets combined synthesis + eval prompt + β”œβ”€β”€ [[GROUP_REFLECT_COMPLETE]] sentinel β†’ goal met β†’ break + └── [[NEEDS_ITERATION]] sentinel β†’ scored as 0.4, continue + + Phase 5: STALL DETECTION + β”œβ”€β”€ CheckStall() compares synthesis response to previous + β”œβ”€β”€ Jaccard token similarity > 0.9 β†’ stall detected + β”œβ”€β”€ 1st consecutive stall: warn but continue + └── 2nd consecutive stall: IsStalled = true β†’ break + + Phase 6: AUTO-ADJUST + └── AutoAdjustFromFeedback analyzes worker results, may suggest model changes + + SaveOrganization() after each iteration +``` + +### Exit Conditions (whichever hits first) + +| Condition | How Detected | State | +|-----------|-------------|-------| +| βœ… Goal met | Evaluator score β‰₯ 0.9 or `[[GROUP_REFLECT_COMPLETE]]` sentinel | `GoalMet = true` | +| ⏱️ Max iterations | `CurrentIteration >= MaxIterations` | `IsCancelled = true` | +| ⚠️ Stalled | 2 consecutive responses with >90% Jaccard similarity | `IsStalled = true, IsCancelled = true` | +| ⚠️ Error budget | 3 consecutive errors within a single iteration | `IsStalled = true, IsCancelled = true` | +| πŸ›‘ Cancelled | CancellationToken triggered or user `StopGroupReflection` | `IsCancelled = true` | +| ⏸️ Paused | User set `IsPaused = true` | Loop condition fails | + +**IsCancelled invariant:** Every non-success exit MUST set `IsCancelled = true`. This allows `BuildCompletionSummary()` to distinguish successful completion from abnormal termination. `GoalMet = true` paths must NOT set `IsCancelled`. + +--- + +## Invariants β€” What Breaks If You Violate These + +### 1. TCS Ordering: `IsProcessing = false` BEFORE `TrySetResult` + +**Where:** `CopilotService.Events.cs` β†’ `CompleteResponse()` and `SessionErrorEvent` handler + +**The rule:** When completing a response via the TaskCompletionSource (TCS), you MUST set `IsProcessing = false` BEFORE calling `TrySetResult()` or `TrySetException()`. + +**Why:** In reflection loops, the TCS continuation runs **synchronously**. The next `SendPromptAsync` in the loop checks `IsProcessing` β€” if it's still `true`, it throws "already processing". This killed reflection loops after 1 iteration. + +```csharp +// βœ… CORRECT ORDER +state.IsProcessing = false; // 1. Clear flag first +state.ResponseCompletion?.TrySetResult(response); // 2. Then signal completion + +// ❌ WRONG β€” breaks reflection loops +state.ResponseCompletion?.TrySetResult(response); // Continuation runs NOW +state.IsProcessing = false; // Too late β€” next SendPromptAsync already threw +``` + +**Same rule applies to error paths** (`TrySetException`). + +### 2. Reconciliation Must Not Scatter Multi-Agent Sessions + +**Where:** `CopilotService.Organization.cs` β†’ `ReconcileOrganization()` + +**The rule:** Sessions that belong to multi-agent groups must NOT be auto-moved to repo groups during reconciliation. Two protections: + +1. **Active group members**: If a session's `GroupId` matches any `IsMultiAgent` group, skip it +2. **Orphaned multi-agent sessions** (group was deleted): If `Role == Orchestrator` or `PreferredModel != null`, don't auto-move to repo groups β€” these markers indicate the session was part of a multi-agent group + +**Why:** Reconciliation runs twice on startup (once in `LoadOrganization`, once after `RestorePreviousSessionsAsync`). Without protection, it redistributes multi-agent sessions across repo-based groups, destroying the team. + +### 3. Never Edit `organization.json` While the App Is Running + +**Why:** The app calls `SaveOrganization()` from ~30 places, constantly overwriting the file with its in-memory state. Any external edits are lost within seconds. To fix organization state: kill app β†’ edit file β†’ relaunch. + +### 4. Sentinel Protocol Is Case-Insensitive But Must Be on Its Own Line + +**Sentinels:** +- `[[GROUP_REFLECT_COMPLETE]]` β€” Goal achieved, stop iterating +- `[[NEEDS_ITERATION]]` β€” More work needed, continue +- `[[REFLECTION_COMPLETE]]` β€” Single-agent reflection goal met + +**Detection:** `StringComparison.OrdinalIgnoreCase` for multi-agent; strict regex `^\s*\[\[REFLECTION_COMPLETE\]\]\s*$` (multiline) for single-agent. + +### 5. Worker Prompt Must Include Original User Request + +**Where:** `ExecuteWorkerAsync` (line ~772) + +**Why:** Workers receive only their assigned subtask from the orchestrator. Without the original user request as context, they can't understand the broader goal. The prompt format is: + +``` +You are a worker agent. Complete the following task thoroughly. + +## Original User Request (context) +{originalPrompt} + +## Your Assigned Task +{task} +``` + +### 6. Orphaned Event Handlers Must Not Mutate State + +**Where:** `CopilotService.Events.cs` β†’ `HandleSessionEvent`, `isCurrentState` gate + +**The rule:** When a session is reconnected, the old session's event handler becomes orphaned. ALL events from orphaned handlers must be blocked (not just terminal events). The `isCurrentState` check compares the captured state object with `_sessions[sessionName]` β€” if they don't match, the handler is orphaned. + +**Why:** Orphaned handlers can produce ghost text deltas, phantom tool executions, and stale history entries that corrupt the current session's state. + +### 7. Session Reconnect: Swap `_sessions` Before Wiring Handler + +**Where:** `CopilotService.cs` β†’ reconnect logic + +**The rule:** `_sessions[sessionName] = newState` MUST execute BEFORE `newSession.On(evt => HandleSessionEvent(newState, evt))`. If the handler is wired first, early events from the new session see `isCurrentState=false` (because `_sessions` still points to old state) and get incorrectly dropped. + +### 8. Image Queue: ALL Mutations Under `_imageQueueLock` + +**Where:** `CopilotService.cs` and `CopilotService.Events.cs` β€” all `_queuedImagePaths` access + +**The rule:** Every mutation of `_queuedImagePaths` (enqueue, dequeue, remove, clear, rename, close) must be inside `lock (_imageQueueLock)`. The inner lists (`List>`) are not thread-safe. + +### 9. `IsResumed` Must Be Cleared on ALL Terminal Paths + +**Where:** `CopilotService.Events.cs` β†’ `CompleteResponse`, `SessionErrorEvent`, watchdog timeout + +**The rule:** `state.Info.IsResumed = false` must be set in every code path that sets `IsProcessing = false`. Otherwise, subsequent turns inherit the resumed session's 600s tool timeout. + +### 10. All TCS Must Use `RunContinuationsAsynchronously` + +**Where:** All `new TaskCompletionSource()` in `CopilotService.Events.cs` + +**The rule:** Always use `new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously)`. Without this, TCS continuations can run inline on the completing thread, causing reentrancy and stack overflows in reflection loops. + +--- + +## Stall Detection + +Two mechanisms, both in `ReflectionCycle.CheckStall()`: + +1. **Exact string match** β€” Sliding window of last 5 full response strings. If current response matches any (full string equality, no hash) β†’ stall. +2. **Jaccard token similarity** β€” Tokenize current and previous response by whitespace. If intersection/union > 0.9 β†’ stall. + +**Tolerance:** 2 consecutive stalls required before stopping. First stall generates a warning. This prevents false positives from models that happen to produce similar phrasing once. + +**Reset:** `ResetStallDetection()` clears history. Called when resuming from pause. + +--- + +## Quality Trend Tracking + +`ReflectionCycle.EvaluationHistory` records per-iteration: +- `Score` (0.0–1.0) +- `Rationale` (string) +- `EvaluatorModel` (which model evaluated) +- `Timestamp` + +`RecordEvaluation()` returns a `QualityTrend`: +- **Improving** β€” Latest score > previous + 0.1 +- **Stable** β€” Within Β±0.1 +- **Degrading** β€” Latest score < previous - 0.1 + +Degrading trend triggers a `PendingAdjustments` warning suggesting model changes. + +--- + +## Session Organization & Persistence + +### Data Model + +``` +OrganizationState +β”œβ”€β”€ Groups: List +β”‚ β”œβ”€β”€ Id (GUID string) +β”‚ β”œβ”€β”€ Name +β”‚ β”œβ”€β”€ IsMultiAgent (bool) +β”‚ β”œβ”€β”€ OrchestratorMode (Broadcast/Sequential/Orchestrator/OrchestratorReflect) +β”‚ β”œβ”€β”€ OrchestratorPrompt (optional system prompt for orchestrator) +β”‚ β”œβ”€β”€ ReflectionState: ReflectionCycle? (active cycle state) +β”‚ β”œβ”€β”€ SharedContext (from decisions.md β€” prepended to worker prompts) +β”‚ β”œβ”€β”€ RoutingContext (from routing.md β€” injected into orchestrator planning) +β”‚ β”œβ”€β”€ WorktreeId, RepoId (links to repo/worktree) +β”‚ └── SortOrder +β”‚ +└── Sessions: List + β”œβ”€β”€ SessionName + β”œβ”€β”€ GroupId (β†’ SessionGroup.Id) + β”œβ”€β”€ Role (Worker/Orchestrator) + β”œβ”€β”€ PreferredModel (e.g., "claude-opus-4.6") + β”œβ”€β”€ SystemPrompt (worker specialization, e.g., "You are a security auditor...") + β”œβ”€β”€ WorktreeId + └── IsPinned, ManualOrder +``` + +### Persistence Flow +- **File:** `~/.polypilot/organization.json` +- **Save:** `SaveOrganization()` called from ~30 places (group CRUD, session moves, reflection state updates) +- **Load:** `LoadOrganization()` on startup β†’ deserialize β†’ `ReconcileOrganization()` +- **Reconciliation:** Matches sessions to repo groups by `WorktreeId`/`RepoId`, prunes stale groups, protects multi-agent sessions + +### Group Presets +`CreateGroupFromPresetAsync(GroupPreset)` creates a full team: +1. Creates `SessionGroup` with mode and metadata +2. Creates orchestrator session with `Role = Orchestrator`, `PreferredModel` set +3. Creates N worker sessions with `PreferredModel` and `SystemPrompt` set per worker +4. All sessions get `WorktreeId` if provided + +**Worker System Prompts:** Each worker can have a `SystemPrompt` defining its specialization. This prompt is: +- Included in `BuildOrchestratorPlanningPrompt` so the orchestrator knows each worker's expertise and routes tasks accordingly +- Prepended to the worker's task in `ExecuteWorkerAsync` (replaces the generic "You are a worker agent" prompt) +- Set via `SetSessionSystemPrompt(sessionName, prompt)` or via `GroupPreset.WorkerSystemPrompts` + +**Critical:** Both `Role` and `PreferredModel` must be set on all sessions. These are the markers that `ReconcileOrganization` uses to identify multi-agent sessions. Without them, sessions get scattered on restart. + +### Group Deletion + +Deleting a group via `DeleteGroup(groupId)` behaves differently based on group type: + +- **Multi-agent groups (`IsMultiAgent == true`):** All sessions in the group are **removed from the organization and closed asynchronously**. Multi-agent sessions are meaningless without their group β€” they have orchestrator/worker roles, preferred models, and system prompts that only make sense within the team context. Leaving them orphaned in the default group (the old behavior) caused confusion in the sidebar. + +- **Regular groups (repo groups, etc.):** Sessions are **moved to the default group**. These are standalone sessions that the user may still want to access. + +**Invariant:** After `DeleteGroup` on a multi-agent group, `Organization.Sessions` must contain zero entries with the deleted group's ID. The async close fires `CloseSessionAsync` on each session (disposing the SDK session, cleaning up image queues, and tracking closed session IDs to prevent merge re-addition). + +--- + +## Error Handling in Reflection Loops + +``` +try { + // ... full iteration (plan β†’ dispatch β†’ collect β†’ evaluate) +} +catch (OperationCanceledException) { + IsCancelled = true; // Mark as cancelled for BuildCompletionSummary + throw; // User cancellation propagates +} +catch (Exception ex) { + CurrentIteration--; // Retry same iteration, don't skip ahead + ConsecutiveErrors++; // Separate error counter (ConsecutiveStalls tracks repetition) + if (ConsecutiveErrors >= 3) { + IsStalled = true; // Give up after 3 retries + IsCancelled = true; // Non-success termination + break; + } + await Task.Delay(2000); // Back off before retry +} +``` + +This prevents a single transient error (network hiccup, model timeout) from killing the entire reflection cycle. `ConsecutiveErrors` resets to 0 on successful iterations (alongside `ConsecutiveStalls`), so errors must be truly consecutive. + +--- + +## Task Assignment Protocol + +The orchestrator's planning prompt tells it to emit assignments in this format: + +``` +@worker:worker-name-1 Description of the task for this worker +@worker:worker-name-2 Description of the task for this worker +``` + +`ParseTaskAssignments` uses regex `@worker:(\S+)\s*([\s\S]*?)(?:@end|(?=@worker:)|$)` to extract these. Workers are matched against the `availableWorkers` list (case-insensitive, fuzzy-matched). + +If no `@worker:` assignments are found, the orchestrator handled the request directly and the loop exits. + +--- + +## Testing + +### Unit Tests +- **`MultiAgentRegressionTests.cs`** (37 tests) β€” JSON corruption, reconciliation scattering, preset markers, mode enums, reflection loop logic, TCS ordering, lifecycle scenarios, persona tests +- **`SessionOrganizationTests.cs`** β†’ `GroupingStabilityTests` (15 tests) β€” JSON round-trips, delete+cleanup, orphan handling, multi-agent vs regular group deletion +- **`SquadDiscoveryTests.cs`** (22 tests) β€” Squad directory discovery, team.md parsing, charterβ†’system-prompt, decisions/routing context, three-tier merge, legacy `.ai-team/` compat +- **`ScenarioReferenceTests.cs`** β€” Validates scenario JSON structure, unique IDs, Squad integration scenario presence + +### Executable Scenarios +- **`PolyPilot.Tests/Scenarios/multi-agent-scenarios.json`** β€” CDP-based scenarios for MauiDevFlow testing against a running app + +### What to Test After Changes +1. **Changed orchestration logic?** β†’ Run `MultiAgentRegressionTests` +2. **Changed reconciliation?** β†’ Run `GroupingStabilityTests` +3. **Changed TCS/event handling?** β†’ Run `ProcessingWatchdogTests` + verify reflection loop completes +4. **Changed sentinel parsing?** β†’ Run `ReflectionCycleTests` +5. **Changed session persistence?** β†’ Run full suite, verify `organization.json` survives restart + +--- + +## Squad Integration β€” Repo-Level Team Discovery + +### Overview + +PolyPilot can discover and load team definitions from [bradygaster/squad](https://github.com/bradygaster/squad) format directories (`.squad/` or the legacy `.ai-team/`). Any repository that has been "squadified" automatically gets its teams available as presets in PolyPilot's multi-agent group creation flow. + +### How Squad Maps to PolyPilot + +| Squad File | PolyPilot Concept | How It's Used | +|------------|-------------------|---------------| +| `.squad/team.md` | `SessionGroup` + workers | Roster parsed for agent names and roles | +| `.squad/agents/{name}/charter.md` | `SessionMeta.SystemPrompt` | Charter content becomes worker system prompt | +| `.squad/routing.md` | Orchestrator planning context | Injected into `BuildOrchestratorPlanningPrompt` | +| `.squad/decisions.md` | Shared worker context | Prepended to all worker prompts as shared team knowledge | +| Squad coordinator | `MultiAgentMode.OrchestratorReflect` | Squad's iterative coordinator maps to PolyPilot's reflect loop | + +### Discovery Flow + +1. User clicks **πŸ€– Multi** β†’ selects a worktree +2. `SquadDiscovery.Discover(worktreePath)` scans for `.squad/` or `.ai-team/` +3. If found, parses `team.md` + agent charters β†’ builds a `GroupPreset` +4. Preset appears in the picker under **"πŸ“‚ From Repo (Squad)"** section, above built-in presets +5. User clicks the Squad preset β†’ `CreateGroupFromPresetAsync` creates the group with all agents and their charters as system prompts + +### Preset Priority (Three-Tier Cascade) + +``` +Built-in presets < User presets (~/.polypilot/presets.json) < Repo teams (.squad/) +``` + +Repo teams shadow built-in/user presets with the same name when working in that repo's worktree. + +### Squad Write-Back + +When a user saves a multi-agent group as a preset and the group is associated with a worktree, PolyPilot writes the team definition back to `.squad/` format in the worktree root: + +1. **`SaveGroupAsPreset`** resolves the worktree path from the group's `WorktreeId` +2. **`SquadWriter.WriteFromGroup`** converts the live `SessionGroup` + `SessionMeta` into Squad files: + - `.squad/team.md` β€” Team name + agent roster table (Member | Role) + - `.squad/agents/{name}/charter.md` β€” Worker system prompt as charter + - `.squad/decisions.md` β€” Shared context (from `GroupPreset.SharedContext`) + - `.squad/routing.md` β€” Routing context (from `GroupPreset.RoutingContext`) +3. The preset is also saved to `presets.json` as a personal backup + +Agent names are sanitized: team-name prefixes are stripped (e.g., "Code Review Team-worker-1" β†’ "worker-1"), names are lowercased and non-alphanumeric characters replaced with hyphens. Roles are derived from the first sentence of the system prompt, stripping "You are a/an" prefix. + +This enables round-tripping: discover a Squad team β†’ modify it in PolyPilot β†’ save back β†’ others can use the updated team definition from the repo. + +### What PolyPilot Does NOT Do with Squad + +- **No `history.md` persistence** β€” Squad agents accumulate learnings; PolyPilot sessions are stateless across restarts +- **No Scribe agent** β€” Squad's silent decision-logger is not replicated +- **No GitHub Actions integration** β€” Squad's label triage workflows are out of scope +- **No casting system** β€” Squad's thematic name universes; PolyPilot uses agent names as-is + +### Security + +- Agent charters (system prompts) are capped at 4,000 characters +- Model slugs are validated against `ModelCapabilities.AllModels`; unknown slugs fall back to app default +- Repo presets show a **πŸ“‚** source badge so users know the definition came from the repo +- No file-read directives or code execution from parsed files + +### GroupPreset Extensions for Squad Support + +```csharp +public record GroupPreset(...) +{ + public bool IsUserDefined { get; init; } + public bool IsRepoLevel { get; init; } // Loaded from .squad/ + public string? SourcePath { get; init; } // Path to .squad/ dir + public string?[]? WorkerSystemPrompts { get; init; } + public string? SharedContext { get; init; } // From decisions.md + public string? RoutingContext { get; init; } // From routing.md +} +``` diff --git a/recommendation.md b/recommendation.md new file mode 100644 index 0000000000..df27853302 --- /dev/null +++ b/recommendation.md @@ -0,0 +1,41 @@ +# Recommendation: Hybrid Architecture (Option C) + +I recommend adopting **Option C (Hybrid)** as the architectural target, implemented in two phases. + +## Phase 1 (Immediate PR): "Team Context" +Implement **Option A** behavior using the **Option C** data model. +* **Mechanism:** When a user assigns a Repository/Worktree to a `SessionGroup`, propagate that `WorktreeId` to the `SessionMeta` of **every agent** in that group. +* **Result:** All agents share the same directory and branch. +* **User Experience:** "I assign this team to feature-branch-x." + +## Phase 2 (Future): "Agent Independence" +Expose the existing per-agent `WorktreeId` in the UI for advanced scenarios. +* **Mechanism:** Allow power users to override the `WorktreeId` for specific agents (e.g., "Reviewer Agent" checks out `main` while "Coder Agent" is on `feature-branch`). +* **User Experience:** "I want this specific agent to look at a different version of the code." + +## Reasoning & Tradeoffs + +1. **Future-Proofing (Why not A):** `SessionMeta` already has `WorktreeId`. Hardcoding a single `WorktreeId` on `SessionGroup` would restrict us later. By using the per-session field (even if they all point to the same ID initially), we keep the architecture flexible for free. +2. **Complexity Management (Why not B):** Forcing per-agent worktrees now creates massive complexity (merging, disk space, synchronization). Shared worktrees are sufficient for 90% of current use cases (collaborative coding, pair programming). +3. **Correct Abstraction:** A "Team" usually works on a "Project" (Repo/Branch). It is the natural default. Divergence is an exception. + +## Implementation Plan + +1. **Update `CreateMultiAgentGroupAsync`:** + * Accept an optional `repoId` and `worktreeId`. + * If provided, assign `WorktreeId` to the `SessionMeta` of the Orchestrator and all Workers. + * Ensure the `SessionGroup` also stores the `RepoId` for context. + +2. **Update `RepoManager.LinkSessionToWorktree`:** + * Ensure it can handle multiple sessions linking to the same worktree (currently it has a single `SessionName` field, which might be a limitation if strict 1:1 mapping is enforced). **Crucial Check:** `WorktreeInfo.SessionName` is a single string. This needs to change to support multiple sessions (or be ignored for multi-agent groups). + +## Interaction with Reflection/Orchestration + +* **Orchestrator Mode:** The Orchestrator agent typically plans and delegates. Sharing a worktree means the Orchestrator sees the *exact state* the workers are producing in real-time. This is generally beneficial for immediate feedback loops. +* **OrchestratorReflect Mode:** In this mode, the system might benefit from an "isolation sandbox" where a worker tries a change in a separate worktree, runs tests, and only merges if successful. This is a strong argument for **Option C (Hybrid)** in the long term. A shared worktree (Option A) risks breaking the build for the whole team during experimental changes. +* **Recommendation:** Start with shared worktrees for simplicity. For advanced reflection cycles that require safe experimentation, leverage the **Option C** capability later to spawn ephemeral worktrees for specific worker tasks. + +## Critical Code Change Required +`WorktreeInfo` currently has `public string? SessionName { get; set; }`. +* **Issue:** This implies 1 worktree = 1 session. +* **Fix:** For Phase 1, treat `SessionName` as the "Primary/Owner" session (e.g., the Orchestrator). The UI should rely on `SessionMeta.WorktreeId` to find *all* sessions associated with a worktree, rather than relying on the back-pointer in `WorktreeInfo`.