From a7414f01d0e32c0a3a6fa316baf0b43d54b95dcb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 14 Feb 2026 05:08:01 +0000 Subject: [PATCH 01/48] Initial plan From 4bd6b79aedfe162008dc5ad3190fce92ee3cd3ec Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 14 Feb 2026 05:17:08 +0000 Subject: [PATCH 02/48] Add multi-agent orchestration mode support with model, service, bridge, dashboard UI, and tests Co-authored-by: PureWeen <5375137+PureWeen@users.noreply.github.com> --- PolyPilot.Tests/BridgeMessageTests.cs | 70 ++++++ PolyPilot.Tests/SessionOrganizationTests.cs | 142 +++++++++++ PolyPilot/Components/Pages/Dashboard.razor | 96 +++++++- .../Components/Pages/Dashboard.razor.css | 77 ++++++ PolyPilot/Models/BridgeMessages.cs | 28 +++ PolyPilot/Models/SessionOrganization.cs | 34 +++ .../Services/CopilotService.Organization.cs | 224 ++++++++++++++++++ PolyPilot/Services/WsBridgeServer.cs | 17 ++ 8 files changed, 687 insertions(+), 1 deletion(-) diff --git a/PolyPilot.Tests/BridgeMessageTests.cs b/PolyPilot.Tests/BridgeMessageTests.cs index 304e8bd662..20c8cea00d 100644 --- a/PolyPilot.Tests/BridgeMessageTests.cs +++ b/PolyPilot.Tests/BridgeMessageTests.cs @@ -436,4 +436,74 @@ public void AttentionNeededPayload_AllReasons_RoundTrip(AttentionReason reason) Assert.Equal(reason, restored!.Reason); } + + [Fact] + public void MultiAgentBroadcastPayload_RoundTrips() + { + var payload = new MultiAgentBroadcastPayload + { + GroupId = "group-123", + Message = "Build the feature" + }; + var msg = BridgeMessage.Create(BridgeMessageTypes.MultiAgentBroadcast, payload); + var json = msg.Serialize(); + var restored = BridgeMessage.Deserialize(json)!.GetPayload(); + + Assert.NotNull(restored); + Assert.Equal("group-123", restored!.GroupId); + Assert.Equal("Build the feature", restored.Message); + } + + [Fact] + public void MultiAgentCreateGroupPayload_RoundTrips() + { + var payload = new MultiAgentCreateGroupPayload + { + Name = "Dev Team", + Mode = "Orchestrator", + OrchestratorPrompt = "Coordinate the workers", + SessionNames = new List { "session-1", "session-2" } + }; + var msg = BridgeMessage.Create(BridgeMessageTypes.MultiAgentCreateGroup, payload); + var json = msg.Serialize(); + var restored = BridgeMessage.Deserialize(json)!.GetPayload(); + + Assert.NotNull(restored); + Assert.Equal("Dev Team", restored!.Name); + Assert.Equal("Orchestrator", restored.Mode); + Assert.Equal("Coordinate the workers", restored.OrchestratorPrompt); + Assert.Equal(2, restored.SessionNames!.Count); + Assert.Contains("session-1", restored.SessionNames); + } + + [Fact] + public void MultiAgentProgressPayload_RoundTrips() + { + var payload = new MultiAgentProgressPayload + { + GroupId = "group-1", + TotalSessions = 3, + CompletedSessions = 1, + ProcessingSessions = 2, + CompletedSessionNames = new List { "worker-1" } + }; + var msg = BridgeMessage.Create(BridgeMessageTypes.MultiAgentProgress, payload); + var json = msg.Serialize(); + var restored = BridgeMessage.Deserialize(json)!.GetPayload(); + + Assert.NotNull(restored); + Assert.Equal("group-1", restored!.GroupId); + Assert.Equal(3, restored.TotalSessions); + Assert.Equal(1, restored.CompletedSessions); + Assert.Equal(2, restored.ProcessingSessions); + Assert.Single(restored.CompletedSessionNames); + } + + [Fact] + public void MultiAgentMessageTypes_AreCorrectStrings() + { + Assert.Equal("multi_agent_broadcast", BridgeMessageTypes.MultiAgentBroadcast); + Assert.Equal("multi_agent_create_group", BridgeMessageTypes.MultiAgentCreateGroup); + Assert.Equal("multi_agent_progress", BridgeMessageTypes.MultiAgentProgress); + } } diff --git a/PolyPilot.Tests/SessionOrganizationTests.cs b/PolyPilot.Tests/SessionOrganizationTests.cs index 5200c71e40..15927d08a4 100644 --- a/PolyPilot.Tests/SessionOrganizationTests.cs +++ b/PolyPilot.Tests/SessionOrganizationTests.cs @@ -115,4 +115,146 @@ public void OrganizationCommandPayload_Serializes() Assert.Equal("pin", deserialized!.Command); Assert.Equal("test-session", deserialized.SessionName); } + + [Fact] + public void SessionGroup_MultiAgent_DefaultsToFalse() + { + var group = new SessionGroup { Name = "Test" }; + Assert.False(group.IsMultiAgent); + Assert.Equal(MultiAgentMode.Broadcast, group.OrchestratorMode); + Assert.Null(group.OrchestratorPrompt); + } + + [Fact] + public void SessionGroup_MultiAgent_Serializes() + { + var group = new SessionGroup + { + Name = "Multi-Agent Team", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.Orchestrator, + OrchestratorPrompt = "You are the lead coordinator." + }; + + var json = JsonSerializer.Serialize(group); + var deserialized = JsonSerializer.Deserialize(json); + + Assert.NotNull(deserialized); + Assert.True(deserialized!.IsMultiAgent); + Assert.Equal(MultiAgentMode.Orchestrator, deserialized.OrchestratorMode); + Assert.Equal("You are the lead coordinator.", deserialized.OrchestratorPrompt); + } + + [Fact] + public void SessionMeta_Role_DefaultsToWorker() + { + var meta = new SessionMeta { SessionName = "test" }; + Assert.Equal(MultiAgentRole.Worker, meta.Role); + } + + [Fact] + public void SessionMeta_Role_SerializesAsString() + { + var meta = new SessionMeta + { + SessionName = "leader", + Role = MultiAgentRole.Orchestrator + }; + var json = JsonSerializer.Serialize(meta); + Assert.Contains("\"Orchestrator\"", json); + + var deserialized = JsonSerializer.Deserialize(json); + Assert.NotNull(deserialized); + Assert.Equal(MultiAgentRole.Orchestrator, deserialized!.Role); + } + + [Fact] + public void MultiAgentMode_AllValues() + { + Assert.Equal(3, Enum.GetValues().Length); + Assert.True(Enum.IsDefined(MultiAgentMode.Broadcast)); + Assert.True(Enum.IsDefined(MultiAgentMode.Sequential)); + Assert.True(Enum.IsDefined(MultiAgentMode.Orchestrator)); + } + + [Fact] + public void MultiAgentMode_SerializesAsString() + { + var group = new SessionGroup + { + Name = "test", + OrchestratorMode = MultiAgentMode.Sequential + }; + var json = JsonSerializer.Serialize(group); + Assert.Contains("\"Sequential\"", json); + } + + [Fact] + public void OrganizationState_MultiAgentGroup_RoundTrips() + { + var state = new OrganizationState(); + var maGroup = new SessionGroup + { + Id = "ma-group-1", + Name = "Dev Team", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.Orchestrator, + OrchestratorPrompt = "Coordinate the workers", + SortOrder = 1 + }; + state.Groups.Add(maGroup); + state.Sessions.Add(new SessionMeta + { + SessionName = "orchestrator-session", + GroupId = "ma-group-1", + Role = MultiAgentRole.Orchestrator + }); + state.Sessions.Add(new SessionMeta + { + SessionName = "worker-1", + GroupId = "ma-group-1", + Role = MultiAgentRole.Worker + }); + + var json = JsonSerializer.Serialize(state); + var deserialized = JsonSerializer.Deserialize(json); + + Assert.NotNull(deserialized); + var group = deserialized!.Groups.Find(g => g.Id == "ma-group-1"); + Assert.NotNull(group); + Assert.True(group!.IsMultiAgent); + Assert.Equal(MultiAgentMode.Orchestrator, group.OrchestratorMode); + Assert.Equal("Coordinate the workers", group.OrchestratorPrompt); + + var orchSession = deserialized.Sessions.Find(s => s.SessionName == "orchestrator-session"); + Assert.NotNull(orchSession); + Assert.Equal(MultiAgentRole.Orchestrator, orchSession!.Role); + + var workerSession = deserialized.Sessions.Find(s => s.SessionName == "worker-1"); + Assert.NotNull(workerSession); + Assert.Equal(MultiAgentRole.Worker, workerSession!.Role); + } + + [Fact] + public void LegacyState_WithoutMultiAgent_DeserializesGracefully() + { + // Simulates loading organization.json from before multi-agent was added + var json = """ + { + "Groups": [ + {"Id": "_default", "Name": "Sessions", "SortOrder": 0} + ], + "Sessions": [ + {"SessionName": "old-session", "GroupId": "_default", "IsPinned": false} + ], + "SortMode": "LastActive" + } + """; + var state = JsonSerializer.Deserialize(json); + Assert.NotNull(state); + Assert.False(state!.Groups[0].IsMultiAgent); + Assert.Equal(MultiAgentMode.Broadcast, state.Groups[0].OrchestratorMode); + Assert.Null(state.Groups[0].OrchestratorPrompt); + Assert.Equal(MultiAgentRole.Worker, state.Sessions[0].Role); + } } diff --git a/PolyPilot/Components/Pages/Dashboard.razor b/PolyPilot/Components/Pages/Dashboard.razor index 0a9a9eed2d..54c3414fe1 100644 --- a/PolyPilot/Components/Pages/Dashboard.razor +++ b/PolyPilot/Components/Pages/Dashboard.razor @@ -113,9 +113,17 @@ @onblur="CommitDashGroup" @onkeydown="HandleDashGroupKeyDown" /> } + else if (isAddingMultiAgentGroup) + { + + } else { + } + + }
@foreach (var session in groupSessions) { @@ -227,6 +269,7 @@ private string? cardMenuSession; private string? cardRenamingSession; private bool isAddingDashGroup; + private bool isAddingMultiAgentGroup; private string? _focusedInputId; private string? _lastActiveSession; private int _cursorStart; @@ -1639,6 +1682,57 @@ else if (e.Key == "Escape") isAddingDashGroup = false; } + private async Task CommitMultiAgentGroup() + { + var name = await JS.InvokeAsync("getElementValue", "dashNewMultiAgentGroupInput"); + isAddingMultiAgentGroup = false; + if (!string.IsNullOrWhiteSpace(name)) + { + CopilotService.CreateMultiAgentGroup(name.Trim()); + } + } + + private async Task HandleMultiAgentGroupKeyDown(KeyboardEventArgs e) + { + if (e.Key == "Enter") await CommitMultiAgentGroup(); + else if (e.Key == "Escape") isAddingMultiAgentGroup = false; + } + + private void OnMultiAgentModeChanged(string groupId, ChangeEventArgs e) + { + if (e.Value is string val && Enum.TryParse(val, out var mode)) + { + CopilotService.SetMultiAgentMode(groupId, mode); + } + } + + private async Task SendToMultiAgentGroup(string groupId) + { + var inputId = $"ma-input-{groupId}"; + var prompt = await JS.InvokeAsync("getElementValue", inputId); + if (string.IsNullOrWhiteSpace(prompt)) return; + + await JS.InvokeVoidAsync("clearElementValue", inputId); + + try + { + _ = CopilotService.SendToMultiAgentGroupAsync(groupId, prompt.Trim()).ContinueWith(t => + { + if (t.IsFaulted) + { + InvokeAsync(() => + { + Console.WriteLine($"Error sending to multi-agent group: {t.Exception?.InnerException?.Message}"); + }); + } + }); + } + catch (Exception ex) + { + Console.WriteLine($"Error sending to multi-agent group: {ex.Message}"); + } + } + private async Task SaveDraftsAndCursor() { var json = await JS.InvokeAsync("eval", @" diff --git a/PolyPilot/Components/Pages/Dashboard.razor.css b/PolyPilot/Components/Pages/Dashboard.razor.css index eb43b82654..bced1eac1d 100644 --- a/PolyPilot/Components/Pages/Dashboard.razor.css +++ b/PolyPilot/Components/Pages/Dashboard.razor.css @@ -372,6 +372,83 @@ color: var(--text-muted); } +/* Multi-agent group styles */ +.group-divider.multi-agent-group { + border-bottom-color: rgba(99, 102, 241, 0.3); + background: linear-gradient(90deg, rgba(99, 102, 241, 0.05) 0%, transparent 100%); +} +.group-divider.multi-agent-group:hover { + border-bottom-color: rgba(99, 102, 241, 0.5); +} +.group-divider-badge { + font-size: var(--type-callout); + flex-shrink: 0; +} +.group-divider-mode { + flex-shrink: 0; +} +.ma-mode-select { + background: var(--control-bg); + color: var(--text-dim); + border: 1px solid var(--border-color); + border-radius: 4px; + font-size: var(--type-caption1); + padding: 0.15rem 0.35rem; + cursor: pointer; +} +.group-divider-progress { + font-size: var(--type-caption1); + color: rgba(99, 102, 241, 0.8); + font-weight: 500; +} +.multi-agent-input-bar { + display: flex; + align-items: flex-end; + gap: 0.5rem; + padding: 0.5rem 0.25rem; + margin-bottom: 0.5rem; +} +.ma-broadcast-input { + flex: 1; + background: var(--control-bg); + color: var(--text-primary); + border: 1px solid var(--border-color); + border-radius: 8px; + padding: 0.5rem 0.75rem; + font-size: var(--type-body); + font-family: inherit; + resize: none; + min-height: 2.2rem; + line-height: 1.4; +} +.ma-broadcast-input:focus { + outline: none; + border-color: rgba(99, 102, 241, 0.6); + box-shadow: 0 0 0 2px rgba(99, 102, 241, 0.15); +} +.ma-broadcast-input::placeholder { + color: var(--text-muted); +} +.ma-send-btn { + background: rgba(99, 102, 241, 0.15); + color: rgba(99, 102, 241, 0.9); + border: 1px solid rgba(99, 102, 241, 0.3); + border-radius: 8px; + padding: 0.45rem 0.85rem; + font-size: var(--type-callout); + font-weight: 500; + cursor: pointer; + white-space: nowrap; + transition: all 0.15s ease; +} +.ma-send-btn:hover { + background: rgba(99, 102, 241, 0.25); + border-color: rgba(99, 102, 241, 0.5); +} +.multi-agent-btn { + color: rgba(99, 102, 241, 0.9) !important; +} + /* Pinned card styles */ .session-card.pinned { border-color: rgba(251,191,36,0.25); diff --git a/PolyPilot/Models/BridgeMessages.cs b/PolyPilot/Models/BridgeMessages.cs index ab55281bdb..831832f8bf 100644 --- a/PolyPilot/Models/BridgeMessages.cs +++ b/PolyPilot/Models/BridgeMessages.cs @@ -81,9 +81,12 @@ public static class BridgeMessageTypes public const string AbortSession = "abort_session"; public const string OrganizationCommand = "organization_command"; public const string ListDirectories = "list_directories"; + public const string MultiAgentBroadcast = "multi_agent_broadcast"; + public const string MultiAgentCreateGroup = "multi_agent_create_group"; // Server β†’ Client (response) public const string DirectoriesList = "directories_list"; + public const string MultiAgentProgress = "multi_agent_progress"; } // --- Server β†’ Client payloads --- @@ -281,3 +284,28 @@ public class AttentionNeededPayload public AttentionReason Reason { get; set; } public string Summary { get; set; } = ""; } + +// --- Multi-agent orchestration payloads --- + +public class MultiAgentBroadcastPayload +{ + public string GroupId { get; set; } = ""; + public string Message { get; set; } = ""; +} + +public class MultiAgentCreateGroupPayload +{ + public string Name { get; set; } = ""; + public string Mode { get; set; } = "Broadcast"; + public string? OrchestratorPrompt { get; set; } + public List? SessionNames { get; set; } +} + +public class MultiAgentProgressPayload +{ + public string GroupId { get; set; } = ""; + public int TotalSessions { get; set; } + public int CompletedSessions { get; set; } + public int ProcessingSessions { get; set; } + public List CompletedSessionNames { get; set; } = new(); +} diff --git a/PolyPilot/Models/SessionOrganization.cs b/PolyPilot/Models/SessionOrganization.cs index b6a3451e04..134fca77c4 100644 --- a/PolyPilot/Models/SessionOrganization.cs +++ b/PolyPilot/Models/SessionOrganization.cs @@ -13,6 +13,15 @@ public class SessionGroup public bool IsCollapsed { get; set; } /// If set, this group auto-tracks a repository managed by RepoManager. public string? RepoId { get; set; } + + /// When true, this group operates as a multi-agent orchestration group. + public bool IsMultiAgent { get; set; } + + /// The orchestration mode for multi-agent groups. + public MultiAgentMode OrchestratorMode { get; set; } = MultiAgentMode.Broadcast; + + /// Optional system prompt appended to all sessions in this multi-agent group. + public string? OrchestratorPrompt { get; set; } } public class SessionMeta @@ -23,6 +32,9 @@ public class SessionMeta public int ManualOrder { get; set; } /// Worktree ID if this session was created from a worktree. public string? WorktreeId { get; set; } + + /// Role of this session within a multi-agent group. + public MultiAgentRole Role { get; set; } = MultiAgentRole.Worker; } [JsonConverter(typeof(JsonStringEnumConverter))] @@ -34,6 +46,28 @@ public enum SessionSortMode Manual } +/// How prompts are distributed in a multi-agent group. +[JsonConverter(typeof(JsonStringEnumConverter))] +public enum MultiAgentMode +{ + /// Send the same prompt to all sessions simultaneously. + Broadcast, + /// Send the prompt to sessions one at a time in order. + Sequential, + /// An orchestrator session decides how to delegate work to other sessions. + Orchestrator +} + +/// Role of a session within a multi-agent group. +[JsonConverter(typeof(JsonStringEnumConverter))] +public enum MultiAgentRole +{ + /// Regular worker session that receives prompts. + Worker, + /// Orchestrator session that delegates work (used in Orchestrator mode). + Orchestrator +} + public class OrganizationState { public List Groups { get; set; } = new() diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index 82b0068b41..fa477a0fa5 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -318,4 +318,228 @@ public SessionGroup GetOrCreateRepoGroup(string repoId, string repoName) } #endregion + + #region Multi-Agent Orchestration + + /// + /// Create a multi-agent group and optionally move existing sessions into it. + /// + public SessionGroup CreateMultiAgentGroup(string name, MultiAgentMode mode = MultiAgentMode.Broadcast, string? orchestratorPrompt = null, List? sessionNames = null) + { + var group = new SessionGroup + { + Id = Guid.NewGuid().ToString(), + Name = name, + IsMultiAgent = true, + OrchestratorMode = mode, + OrchestratorPrompt = orchestratorPrompt, + SortOrder = Organization.Groups.Max(g => g.SortOrder) + 1 + }; + Organization.Groups.Add(group); + + if (sessionNames != null) + { + foreach (var sessionName in sessionNames) + { + var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == sessionName); + if (meta != null) + { + meta.GroupId = group.Id; + } + } + } + + SaveOrganization(); + OnStateChanged?.Invoke(); + return group; + } + + /// + /// Set the orchestration mode for a multi-agent group. + /// + public void SetMultiAgentMode(string groupId, MultiAgentMode mode) + { + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId); + if (group != null && group.IsMultiAgent) + { + group.OrchestratorMode = mode; + SaveOrganization(); + OnStateChanged?.Invoke(); + } + } + + /// + /// Set the role of a session within a multi-agent group. + /// + public void SetSessionRole(string sessionName, MultiAgentRole role) + { + var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == sessionName); + if (meta != null) + { + meta.Role = role; + SaveOrganization(); + OnStateChanged?.Invoke(); + } + } + + /// + /// Get all session names in a multi-agent group. + /// + public List GetMultiAgentGroupMembers(string groupId) + { + return Organization.Sessions + .Where(m => m.GroupId == groupId) + .Select(m => m.SessionName) + .ToList(); + } + + /// + /// Get the orchestrator session name for an orchestrator-mode group, if any. + /// + public string? GetOrchestratorSession(string groupId) + { + return Organization.Sessions + .FirstOrDefault(m => m.GroupId == groupId && m.Role == MultiAgentRole.Orchestrator) + ?.SessionName; + } + + /// + /// Send a prompt to all sessions in a multi-agent group based on its orchestration mode. + /// + public async Task SendToMultiAgentGroupAsync(string groupId, string prompt, CancellationToken cancellationToken = default) + { + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId && g.IsMultiAgent); + if (group == null) return; + + var members = GetMultiAgentGroupMembers(groupId); + if (members.Count == 0) return; + + switch (group.OrchestratorMode) + { + case MultiAgentMode.Broadcast: + await SendBroadcastAsync(members, prompt, cancellationToken); + break; + + case MultiAgentMode.Sequential: + await SendSequentialAsync(members, prompt, cancellationToken); + break; + + case MultiAgentMode.Orchestrator: + await SendViaOrchestratorAsync(groupId, members, prompt, cancellationToken); + break; + } + } + + private async Task SendBroadcastAsync(List sessionNames, string prompt, CancellationToken cancellationToken) + { + var tasks = sessionNames.Select(name => + { + var session = GetSession(name); + if (session == null) return Task.CompletedTask; + + if (session.IsProcessing) + { + EnqueueMessage(name, prompt); + return Task.CompletedTask; + } + + return SendPromptAsync(name, prompt, cancellationToken: cancellationToken) + .ContinueWith(t => + { + if (t.IsFaulted) + Debug($"Broadcast send failed for '{name}': {t.Exception?.InnerException?.Message}"); + }, TaskScheduler.Default); + }); + + await Task.WhenAll(tasks); + } + + private async Task SendSequentialAsync(List sessionNames, string prompt, CancellationToken cancellationToken) + { + foreach (var name in sessionNames) + { + if (cancellationToken.IsCancellationRequested) break; + + var session = GetSession(name); + if (session == null) continue; + + if (session.IsProcessing) + { + EnqueueMessage(name, prompt); + continue; + } + + try + { + await SendPromptAsync(name, prompt, cancellationToken: cancellationToken); + } + catch (Exception ex) + { + Debug($"Sequential send failed for '{name}': {ex.Message}"); + } + } + } + + private async Task SendViaOrchestratorAsync(string groupId, List members, string prompt, CancellationToken cancellationToken) + { + var orchestratorName = GetOrchestratorSession(groupId); + if (orchestratorName == null) + { + // Fall back to broadcast if no orchestrator is designated + await SendBroadcastAsync(members, prompt, cancellationToken); + return; + } + + var workerNames = members.Where(m => m != orchestratorName).ToList(); + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId); + + // Build the orchestrator prompt with context about available workers + var orchestratorPrompt = $""" + You are the orchestrator of a multi-agent group. You have {workerNames.Count} worker agent(s) available: {string.Join(", ", workerNames.Select(w => $"'{w}'"))}. + + The user's request is: + {prompt} + + {(group?.OrchestratorPrompt != null ? $"Additional orchestration instructions: {group.OrchestratorPrompt}" : "")} + + Analyze the request and respond with your plan. The user will manually delegate specific tasks to the worker sessions based on your plan. + """; + + var orchestratorSession = GetSession(orchestratorName); + if (orchestratorSession == null) return; + + try + { + await SendPromptAsync(orchestratorName, orchestratorPrompt, cancellationToken: cancellationToken); + } + catch (Exception ex) + { + Debug($"Orchestrator send failed: {ex.Message}"); + } + } + + /// + /// Get the progress of a multi-agent group (how many sessions have completed their current turn). + /// + public (int Total, int Completed, int Processing, List CompletedNames) GetMultiAgentProgress(string groupId) + { + var members = GetMultiAgentGroupMembers(groupId); + var completed = new List(); + int processing = 0; + + foreach (var name in members) + { + var session = GetSession(name); + if (session == null) continue; + + if (session.IsProcessing) + processing++; + else + completed.Add(name); + } + + return (members.Count, completed.Count, processing, completed); + } + + #endregion } diff --git a/PolyPilot/Services/WsBridgeServer.cs b/PolyPilot/Services/WsBridgeServer.cs index d222a592f1..d52c5abcb1 100644 --- a/PolyPilot/Services/WsBridgeServer.cs +++ b/PolyPilot/Services/WsBridgeServer.cs @@ -484,6 +484,23 @@ await SendToClientAsync(clientId, ws, await SendToClientAsync(clientId, ws, BridgeMessage.Create(BridgeMessageTypes.DirectoriesList, dirResult), ct); break; + + case BridgeMessageTypes.MultiAgentBroadcast: + var maReq = msg.GetPayload(); + if (maReq != null && _copilotService != null) + { + _ = _copilotService.SendToMultiAgentGroupAsync(maReq.GroupId, maReq.Message, ct); + } + break; + + case BridgeMessageTypes.MultiAgentCreateGroup: + var maCreateReq = msg.GetPayload(); + if (maCreateReq != null && _copilotService != null) + { + var mode = Enum.TryParse(maCreateReq.Mode, out var m) ? m : MultiAgentMode.Broadcast; + _copilotService.CreateMultiAgentGroup(maCreateReq.Name, mode, maCreateReq.OrchestratorPrompt, maCreateReq.SessionNames); + } + break; } } catch (Exception ex) From 87a20030545723c94b5033e2cade4a932ed169b6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 14 Feb 2026 05:19:30 +0000 Subject: [PATCH 03/48] Address code review feedback: guard against empty Groups list, fix orchestrator prompt formatting Co-authored-by: PureWeen <5375137+PureWeen@users.noreply.github.com> --- .../Services/CopilotService.Organization.cs | 25 +++++++++++-------- PolyPilot/Services/WsBridgeClient.cs | 8 ++++++ 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index fa477a0fa5..9c0762e965 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -333,7 +333,7 @@ public SessionGroup CreateMultiAgentGroup(string name, MultiAgentMode mode = Mul IsMultiAgent = true, OrchestratorMode = mode, OrchestratorPrompt = orchestratorPrompt, - SortOrder = Organization.Groups.Max(g => g.SortOrder) + 1 + SortOrder = Organization.Groups.Any() ? Organization.Groups.Max(g => g.SortOrder) + 1 : 0 }; Organization.Groups.Add(group); @@ -494,16 +494,19 @@ private async Task SendViaOrchestratorAsync(string groupId, List members var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId); // Build the orchestrator prompt with context about available workers - var orchestratorPrompt = $""" - You are the orchestrator of a multi-agent group. You have {workerNames.Count} worker agent(s) available: {string.Join(", ", workerNames.Select(w => $"'{w}'"))}. - - The user's request is: - {prompt} - - {(group?.OrchestratorPrompt != null ? $"Additional orchestration instructions: {group.OrchestratorPrompt}" : "")} - - Analyze the request and respond with your plan. The user will manually delegate specific tasks to the worker sessions based on your plan. - """; + var promptBuilder = new System.Text.StringBuilder(); + promptBuilder.AppendLine($"You are the orchestrator of a multi-agent group. You have {workerNames.Count} worker agent(s) available: {string.Join(", ", workerNames.Select(w => $"'{w}'"))}."); + promptBuilder.AppendLine(); + promptBuilder.AppendLine($"The user's request is:"); + promptBuilder.AppendLine(prompt); + if (!string.IsNullOrEmpty(group?.OrchestratorPrompt)) + { + promptBuilder.AppendLine(); + promptBuilder.AppendLine($"Additional orchestration instructions: {group.OrchestratorPrompt}"); + } + promptBuilder.AppendLine(); + promptBuilder.AppendLine("Analyze the request and respond with your plan. The user will manually delegate specific tasks to the worker sessions based on your plan."); + var orchestratorPrompt = promptBuilder.ToString(); var orchestratorSession = GetSession(orchestratorName); if (orchestratorSession == null) return; diff --git a/PolyPilot/Services/WsBridgeClient.cs b/PolyPilot/Services/WsBridgeClient.cs index 8cc5299a53..ce2b9ec1ec 100644 --- a/PolyPilot/Services/WsBridgeClient.cs +++ b/PolyPilot/Services/WsBridgeClient.cs @@ -193,6 +193,14 @@ await SendAsync(BridgeMessage.Create(BridgeMessageTypes.AbortSession, public async Task SendOrganizationCommandAsync(OrganizationCommandPayload cmd, CancellationToken ct = default) => await SendAsync(BridgeMessage.Create(BridgeMessageTypes.OrganizationCommand, cmd), ct); + public async Task SendMultiAgentBroadcastAsync(string groupId, string message, CancellationToken ct = default) => + await SendAsync(BridgeMessage.Create(BridgeMessageTypes.MultiAgentBroadcast, + new MultiAgentBroadcastPayload { GroupId = groupId, Message = message }), ct); + + public async Task CreateMultiAgentGroupAsync(string name, string mode = "Broadcast", string? orchestratorPrompt = null, List? sessionNames = null, CancellationToken ct = default) => + await SendAsync(BridgeMessage.Create(BridgeMessageTypes.MultiAgentCreateGroup, + new MultiAgentCreateGroupPayload { Name = name, Mode = mode, OrchestratorPrompt = orchestratorPrompt, SessionNames = sessionNames }), ct); + private TaskCompletionSource? _dirListTcs; public async Task ListDirectoriesAsync(string? path = null, CancellationToken ct = default) From 68bbbe8ffc46325376300e59ea738660334e2f1a Mon Sep 17 00:00:00 2001 From: Shane Neuville Date: Tue, 17 Feb 2026 09:47:08 -0600 Subject: [PATCH 04/48] Enhance multi-agent orchestration: single-orchestrator invariant, context prefix, bridge role support - SetSessionRole enforces max 1 orchestrator per group (auto-demotes previous) - Broadcast/Sequential dispatch prepends multi-agent context prefix with role and team info - Add multi_agent_set_role bridge message type, payload, and WsBridgeServer handler - Fix _copilotService -> _copilot field reference bug in bridge handlers - Add .card-role-badge CSS styles for orchestrator/worker badges - Add 3 new tests (orchestrator invariant, payload serialization, message type) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/SessionOrganizationTests.cs | 57 +++++++++++++++++ PolyPilot/Components/SessionCard.razor.css | 14 +++++ PolyPilot/Models/BridgeMessages.cs | 7 +++ .../Services/CopilotService.Organization.cs | 62 ++++++++++++++----- PolyPilot/Services/WsBridgeServer.cs | 17 +++-- 5 files changed, 139 insertions(+), 18 deletions(-) diff --git a/PolyPilot.Tests/SessionOrganizationTests.cs b/PolyPilot.Tests/SessionOrganizationTests.cs index 15927d08a4..ad149c0670 100644 --- a/PolyPilot.Tests/SessionOrganizationTests.cs +++ b/PolyPilot.Tests/SessionOrganizationTests.cs @@ -257,4 +257,61 @@ public void LegacyState_WithoutMultiAgent_DeserializesGracefully() Assert.Null(state.Groups[0].OrchestratorPrompt); Assert.Equal(MultiAgentRole.Worker, state.Sessions[0].Role); } + + [Fact] + public void OrchestratorInvariant_PromotingNewOrchestrator_DemotesPrevious() + { + var state = new OrganizationState(); + var group = new SessionGroup + { + Id = "ma-group-1", + Name = "Team", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.Orchestrator + }; + state.Groups.Add(group); + + var session1 = new SessionMeta { SessionName = "s1", GroupId = "ma-group-1", Role = MultiAgentRole.Orchestrator }; + var session2 = new SessionMeta { SessionName = "s2", GroupId = "ma-group-1", Role = MultiAgentRole.Worker }; + var session3 = new SessionMeta { SessionName = "s3", GroupId = "ma-group-1", Role = MultiAgentRole.Worker }; + state.Sessions.Add(session1); + state.Sessions.Add(session2); + state.Sessions.Add(session3); + + // Simulate the demotion logic from SetSessionRole + foreach (var other in state.Sessions.Where(m => m.GroupId == "ma-group-1" && m.SessionName != "s2" && m.Role == MultiAgentRole.Orchestrator)) + { + other.Role = MultiAgentRole.Worker; + } + session2.Role = MultiAgentRole.Orchestrator; + + Assert.Equal(MultiAgentRole.Worker, session1.Role); + Assert.Equal(MultiAgentRole.Orchestrator, session2.Role); + Assert.Equal(MultiAgentRole.Worker, session3.Role); + Assert.Single(state.Sessions, s => s.GroupId == "ma-group-1" && s.Role == MultiAgentRole.Orchestrator); + } + + [Fact] + public void MultiAgentSetRolePayload_Serializes() + { + var payload = new MultiAgentSetRolePayload + { + SessionName = "worker-1", + Role = "Orchestrator" + }; + var json = JsonSerializer.Serialize(payload, BridgeJson.Options); + Assert.Contains("worker-1", json); + Assert.Contains("Orchestrator", json); + + var deserialized = JsonSerializer.Deserialize(json, BridgeJson.Options); + Assert.NotNull(deserialized); + Assert.Equal("worker-1", deserialized!.SessionName); + Assert.Equal("Orchestrator", deserialized.Role); + } + + [Fact] + public void MultiAgentSetRole_BridgeMessageType_Exists() + { + Assert.Equal("multi_agent_set_role", BridgeMessageTypes.MultiAgentSetRole); + } } diff --git a/PolyPilot/Components/SessionCard.razor.css b/PolyPilot/Components/SessionCard.razor.css index 60f2bb47d1..226139e3f2 100644 --- a/PolyPilot/Components/SessionCard.razor.css +++ b/PolyPilot/Components/SessionCard.razor.css @@ -467,3 +467,17 @@ .card-input button:hover:not(:disabled) { background: var(--hover-bg); } + +.card-role-badge { + font-size: var(--type-caption1, 0.7rem); + flex-shrink: 0; + opacity: 0.85; +} + +.card-role-badge.orchestrator { + color: rgba(99, 102, 241, 0.9); +} + +.card-role-badge.worker { + color: var(--text-dim); +} diff --git a/PolyPilot/Models/BridgeMessages.cs b/PolyPilot/Models/BridgeMessages.cs index 831832f8bf..e95cc5e21c 100644 --- a/PolyPilot/Models/BridgeMessages.cs +++ b/PolyPilot/Models/BridgeMessages.cs @@ -83,6 +83,7 @@ public static class BridgeMessageTypes public const string ListDirectories = "list_directories"; public const string MultiAgentBroadcast = "multi_agent_broadcast"; public const string MultiAgentCreateGroup = "multi_agent_create_group"; + public const string MultiAgentSetRole = "multi_agent_set_role"; // Server β†’ Client (response) public const string DirectoriesList = "directories_list"; @@ -309,3 +310,9 @@ public class MultiAgentProgressPayload public int ProcessingSessions { get; set; } public List CompletedSessionNames { get; set; } = new(); } + +public class MultiAgentSetRolePayload +{ + public string SessionName { get; set; } = ""; + public string Role { get; set; } = "Worker"; +} diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index 9c0762e965..4707cdfb47 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -370,16 +370,32 @@ public void SetMultiAgentMode(string groupId, MultiAgentMode mode) /// /// Set the role of a session within a multi-agent group. + /// When promoting to Orchestrator, any existing orchestrator in the same group is demoted to Worker. /// public void SetSessionRole(string sessionName, MultiAgentRole role) { var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == sessionName); - if (meta != null) + if (meta == null) return; + + var oldRole = meta.Role; + + // Enforce single orchestrator per group + if (role == MultiAgentRole.Orchestrator) { - meta.Role = role; - SaveOrganization(); - OnStateChanged?.Invoke(); + var group = Organization.Groups.FirstOrDefault(g => g.Id == meta.GroupId); + if (group is { IsMultiAgent: true }) + { + foreach (var other in Organization.Sessions + .Where(m => m.GroupId == meta.GroupId && m.SessionName != sessionName && m.Role == MultiAgentRole.Orchestrator)) + { + other.Role = MultiAgentRole.Worker; + } + } } + + meta.Role = role; + SaveOrganization(); + OnStateChanged?.Invoke(); } /// @@ -417,11 +433,11 @@ public async Task SendToMultiAgentGroupAsync(string groupId, string prompt, Canc switch (group.OrchestratorMode) { case MultiAgentMode.Broadcast: - await SendBroadcastAsync(members, prompt, cancellationToken); + await SendBroadcastAsync(group, members, prompt, cancellationToken); break; case MultiAgentMode.Sequential: - await SendSequentialAsync(members, prompt, cancellationToken); + await SendSequentialAsync(group, members, prompt, cancellationToken); break; case MultiAgentMode.Orchestrator: @@ -430,20 +446,35 @@ public async Task SendToMultiAgentGroupAsync(string groupId, string prompt, Canc } } - private async Task SendBroadcastAsync(List sessionNames, string prompt, CancellationToken cancellationToken) + /// + /// Build a multi-agent context prefix for a session in a group. + /// + private string BuildMultiAgentPrefix(string sessionName, SessionGroup group, List allMembers) + { + var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == sessionName); + var role = meta?.Role ?? MultiAgentRole.Worker; + var roleName = role == MultiAgentRole.Orchestrator ? "orchestrator" : "worker"; + var others = allMembers.Where(m => m != sessionName).ToList(); + var othersList = others.Count > 0 ? string.Join(", ", others) : "none"; + return $"[Multi-agent context: You are '{sessionName}' ({roleName}) in group '{group.Name}'. Other members: {othersList}.]\n\n"; + } + + private async Task SendBroadcastAsync(SessionGroup group, List sessionNames, string prompt, CancellationToken cancellationToken) { var tasks = sessionNames.Select(name => { var session = GetSession(name); if (session == null) return Task.CompletedTask; + var prefixedPrompt = BuildMultiAgentPrefix(name, group, sessionNames) + prompt; + if (session.IsProcessing) { - EnqueueMessage(name, prompt); + EnqueueMessage(name, prefixedPrompt); return Task.CompletedTask; } - return SendPromptAsync(name, prompt, cancellationToken: cancellationToken) + return SendPromptAsync(name, prefixedPrompt, cancellationToken: cancellationToken) .ContinueWith(t => { if (t.IsFaulted) @@ -454,7 +485,7 @@ private async Task SendBroadcastAsync(List sessionNames, string prompt, await Task.WhenAll(tasks); } - private async Task SendSequentialAsync(List sessionNames, string prompt, CancellationToken cancellationToken) + private async Task SendSequentialAsync(SessionGroup group, List sessionNames, string prompt, CancellationToken cancellationToken) { foreach (var name in sessionNames) { @@ -463,15 +494,17 @@ private async Task SendSequentialAsync(List sessionNames, string prompt, var session = GetSession(name); if (session == null) continue; + var prefixedPrompt = BuildMultiAgentPrefix(name, group, sessionNames) + prompt; + if (session.IsProcessing) { - EnqueueMessage(name, prompt); + EnqueueMessage(name, prefixedPrompt); continue; } try { - await SendPromptAsync(name, prompt, cancellationToken: cancellationToken); + await SendPromptAsync(name, prefixedPrompt, cancellationToken: cancellationToken); } catch (Exception ex) { @@ -482,16 +515,17 @@ private async Task SendSequentialAsync(List sessionNames, string prompt, private async Task SendViaOrchestratorAsync(string groupId, List members, string prompt, CancellationToken cancellationToken) { + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId); var orchestratorName = GetOrchestratorSession(groupId); if (orchestratorName == null) { // Fall back to broadcast if no orchestrator is designated - await SendBroadcastAsync(members, prompt, cancellationToken); + if (group != null) + await SendBroadcastAsync(group, members, prompt, cancellationToken); return; } var workerNames = members.Where(m => m != orchestratorName).ToList(); - var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId); // Build the orchestrator prompt with context about available workers var promptBuilder = new System.Text.StringBuilder(); diff --git a/PolyPilot/Services/WsBridgeServer.cs b/PolyPilot/Services/WsBridgeServer.cs index d52c5abcb1..56818265dd 100644 --- a/PolyPilot/Services/WsBridgeServer.cs +++ b/PolyPilot/Services/WsBridgeServer.cs @@ -487,18 +487,27 @@ await SendToClientAsync(clientId, ws, case BridgeMessageTypes.MultiAgentBroadcast: var maReq = msg.GetPayload(); - if (maReq != null && _copilotService != null) + if (maReq != null && _copilot != null) { - _ = _copilotService.SendToMultiAgentGroupAsync(maReq.GroupId, maReq.Message, ct); + _ = _copilot.SendToMultiAgentGroupAsync(maReq.GroupId, maReq.Message, ct); } break; case BridgeMessageTypes.MultiAgentCreateGroup: var maCreateReq = msg.GetPayload(); - if (maCreateReq != null && _copilotService != null) + if (maCreateReq != null && _copilot != null) { var mode = Enum.TryParse(maCreateReq.Mode, out var m) ? m : MultiAgentMode.Broadcast; - _copilotService.CreateMultiAgentGroup(maCreateReq.Name, mode, maCreateReq.OrchestratorPrompt, maCreateReq.SessionNames); + _copilot.CreateMultiAgentGroup(maCreateReq.Name, mode, maCreateReq.OrchestratorPrompt, maCreateReq.SessionNames); + } + break; + + case BridgeMessageTypes.MultiAgentSetRole: + var maRoleReq = msg.GetPayload(); + if (maRoleReq != null && _copilot != null) + { + var role = Enum.TryParse(maRoleReq.Role, out var r) ? r : MultiAgentRole.Worker; + _copilot.SetSessionRole(maRoleReq.SessionName, role); } break; } From dce31349df694f931bf1997aa0483008cbaca147 Mon Sep 17 00:00:00 2001 From: Shane Neuville Date: Tue, 17 Feb 2026 10:22:51 -0600 Subject: [PATCH 05/48] Add full orchestrator loop and sidebar multi-agent support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Orchestrator mode now runs a complete planβ†’dispatchβ†’collectβ†’synthesize loop: - Orchestrator receives prompt and assigns tasks using @worker:name markers - Tasks dispatched to workers in parallel with SendPromptAndWaitAsync - Worker results collected and sent back to orchestrator for synthesis - Phase events (Planning/Dispatching/WaitingForWorkers/Synthesizing/Complete) Sidebar improvements: - Add 'πŸ€– + Multi-Agent' button to create multi-agent groups from sidebar - Add 'πŸ€– Convert to Multi-Agent' option in group context menu - Show πŸ€– badge on multi-agent group headers in sidebar - Add ConvertToMultiAgent method to CopilotService Tests: 5 new tests for ParseTaskAssignments and ConvertToMultiAgent Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/SessionOrganizationTests.cs | 76 ++++++++ .../Components/Layout/SessionSidebar.razor | 31 ++- .../Services/CopilotService.Organization.cs | 179 ++++++++++++++++-- 3 files changed, 267 insertions(+), 19 deletions(-) diff --git a/PolyPilot.Tests/SessionOrganizationTests.cs b/PolyPilot.Tests/SessionOrganizationTests.cs index 679a78c75c..2d460598fe 100644 --- a/PolyPilot.Tests/SessionOrganizationTests.cs +++ b/PolyPilot.Tests/SessionOrganizationTests.cs @@ -664,4 +664,80 @@ public void Reconcile_MultipleSessionsDifferentRepos_AllGetReassigned() Assert.Equal(groupA.Id, metaA.GroupId); Assert.Equal(groupB.Id, metaB.GroupId); } + + [Fact] + public void ParseTaskAssignments_ExtractsWorkerTasks() + { + var response = @"Here's my plan: + +@worker:session-a +Implement the login form with email and password fields. +@end + +@worker:session-b +Create the API endpoint for user authentication. +@end + +That covers the full task."; + + var workers = new List { "session-a", "session-b" }; + var assignments = CopilotService.ParseTaskAssignments(response, workers); + + Assert.Equal(2, assignments.Count); + Assert.Equal("session-a", assignments[0].WorkerName); + Assert.Contains("login form", assignments[0].Task); + Assert.Equal("session-b", assignments[1].WorkerName); + Assert.Contains("API endpoint", assignments[1].Task); + } + + [Fact] + public void ParseTaskAssignments_FuzzyMatchesWorkerNames() + { + var response = @"@worker:session +Do the work. +@end"; + + var workers = new List { "session-alpha", "session-beta" }; + var assignments = CopilotService.ParseTaskAssignments(response, workers); + + Assert.Single(assignments); + Assert.Equal("session-alpha", assignments[0].WorkerName); + } + + [Fact] + public void ParseTaskAssignments_ReturnsEmpty_WhenNoMarkers() + { + var response = "I'll handle this myself. No need to delegate to workers."; + var workers = new List { "session-a", "session-b" }; + var assignments = CopilotService.ParseTaskAssignments(response, workers); + + Assert.Empty(assignments); + } + + [Fact] + public void ParseTaskAssignments_IgnoresUnknownWorkers() + { + var response = @"@worker:unknown-worker +Do something. +@end"; + + var workers = new List { "session-a", "session-b" }; + var assignments = CopilotService.ParseTaskAssignments(response, workers); + + Assert.Empty(assignments); + } + + [Fact] + public void ConvertToMultiAgent_SetsIsMultiAgentTrue() + { + var svc = CreateService(); + svc.CreateGroup("TestGroup"); + var group = svc.Organization.Groups.First(g => g.Name == "TestGroup"); + Assert.False(group.IsMultiAgent); + + svc.ConvertToMultiAgent(group.Id); + + Assert.True(group.IsMultiAgent); + Assert.Equal(MultiAgentMode.Broadcast, group.OrchestratorMode); + } } diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor b/PolyPilot/Components/Layout/SessionSidebar.razor index df73fdd0c4..0824e8f86a 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor +++ b/PolyPilot/Components/Layout/SessionSidebar.razor @@ -180,7 +180,7 @@ else @if (isAddingGroup) { } @@ -188,6 +188,7 @@ else {
+
} @@ -265,6 +266,10 @@ else { βŒ₯ } + @if (group.IsMultiAgent) + { + πŸ€– + } @group.Name @groupSessions.Count @if (group.IsCollapsed && groupSessions.Any(s => s.IsProcessing)) @@ -308,6 +313,13 @@ else } else { + @if (!group.IsMultiAgent) + { + +
+ } @@ -424,6 +436,7 @@ else private string? resumeError = null; private string currentPage = "/"; private bool isAddingGroup = false; + private bool isAddingMultiAgentGroup = false; private string? openMenuSession = null; private string? openGroupMenuId = null; private CreateSessionForm? createSessionFormRef; @@ -798,22 +811,34 @@ else private void StartAddGroup() { isAddingGroup = true; + isAddingMultiAgentGroup = false; + } + + private void StartAddMultiAgentGroup() + { + isAddingGroup = true; + isAddingMultiAgentGroup = true; } private async Task CommitNewGroup() { var name = await JS.InvokeAsync("getElementValue", "newGroupInput"); + var wasMultiAgent = isAddingMultiAgentGroup; isAddingGroup = false; + isAddingMultiAgentGroup = false; if (!string.IsNullOrWhiteSpace(name)) { - CopilotService.CreateGroup(name.Trim()); + if (wasMultiAgent) + CopilotService.CreateMultiAgentGroup(name.Trim()); + else + CopilotService.CreateGroup(name.Trim()); } } private async Task HandleNewGroupKeyDown(KeyboardEventArgs e) { if (e.Key == "Enter") await CommitNewGroup(); - else if (e.Key == "Escape") isAddingGroup = false; + else if (e.Key == "Escape") { isAddingGroup = false; isAddingMultiAgentGroup = false; } } private void ToggleSessionMenu(string sessionName) diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index 66b1090d8a..1886cce09e 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -1,10 +1,15 @@ using System.Text.Json; +using System.Text.RegularExpressions; using PolyPilot.Models; namespace PolyPilot.Services; +public enum OrchestratorPhase { Planning, Dispatching, WaitingForWorkers, Synthesizing, Complete } + public partial class CopilotService { + public event Action? OnOrchestratorPhaseChanged; // groupId, phase, detail + #region Session Organization (groups, pinning, sorting) public void LoadOrganization() @@ -377,6 +382,19 @@ public SessionGroup CreateMultiAgentGroup(string name, MultiAgentMode mode = Mul return group; } + /// + /// Convert an existing regular group into a multi-agent group. + /// + public void ConvertToMultiAgent(string groupId) + { + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId); + if (group == null || group.IsMultiAgent) return; + group.IsMultiAgent = true; + group.OrchestratorMode = MultiAgentMode.Broadcast; + SaveOrganization(); + OnStateChanged?.Invoke(); + } + /// /// Set the orchestration mode for a multi-agent group. /// @@ -550,31 +568,160 @@ private async Task SendViaOrchestratorAsync(string groupId, List members var workerNames = members.Where(m => m != orchestratorName).ToList(); - // Build the orchestrator prompt with context about available workers - var promptBuilder = new System.Text.StringBuilder(); - promptBuilder.AppendLine($"You are the orchestrator of a multi-agent group. You have {workerNames.Count} worker agent(s) available: {string.Join(", ", workerNames.Select(w => $"'{w}'"))}."); - promptBuilder.AppendLine(); - promptBuilder.AppendLine($"The user's request is:"); - promptBuilder.AppendLine(prompt); - if (!string.IsNullOrEmpty(group?.OrchestratorPrompt)) + // Phase 1: Planning β€” ask orchestrator to analyze and assign tasks + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Planning, null)); + + var planningPrompt = BuildOrchestratorPlanningPrompt(prompt, workerNames, group?.OrchestratorPrompt); + var planResponse = await SendPromptAndWaitAsync(orchestratorName, planningPrompt, cancellationToken); + + // Phase 2: Parse task assignments from orchestrator response + var assignments = ParseTaskAssignments(planResponse, workerNames); + if (assignments.Count == 0) { - promptBuilder.AppendLine(); - promptBuilder.AppendLine($"Additional orchestration instructions: {group.OrchestratorPrompt}"); + // Orchestrator handled it without delegation β€” add a system note + AddOrchestratorSystemMessage(orchestratorName, "ℹ️ Orchestrator handled the request directly (no tasks delegated to workers)."); + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Complete, null)); + return; } - promptBuilder.AppendLine(); - promptBuilder.AppendLine("Analyze the request and respond with your plan. The user will manually delegate specific tasks to the worker sessions based on your plan."); - var orchestratorPrompt = promptBuilder.ToString(); - var orchestratorSession = GetSession(orchestratorName); - if (orchestratorSession == null) return; + // Phase 3: Dispatch tasks to workers in parallel + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Dispatching, + $"Sending tasks to {assignments.Count} worker(s)")); + + var workerTasks = assignments.Select(a => + ExecuteWorkerAsync(a.WorkerName, a.Task, prompt, cancellationToken)); + var results = await Task.WhenAll(workerTasks); + + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.WaitingForWorkers, null)); + + // Phase 4: Synthesize β€” send worker results back to orchestrator + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Synthesizing, null)); + + var synthesisPrompt = BuildSynthesisPrompt(prompt, results.ToList()); + await SendPromptAsync(orchestratorName, synthesisPrompt, cancellationToken: cancellationToken); + + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Complete, null)); + } + + private string BuildOrchestratorPlanningPrompt(string userPrompt, List workerNames, string? additionalInstructions) + { + var sb = new System.Text.StringBuilder(); + sb.AppendLine($"You are the orchestrator of a multi-agent group. You have {workerNames.Count} worker agent(s) available:"); + foreach (var w in workerNames) + sb.AppendLine($" - '{w}'"); + sb.AppendLine(); + sb.AppendLine("## User Request"); + sb.AppendLine(userPrompt); + if (!string.IsNullOrEmpty(additionalInstructions)) + { + sb.AppendLine(); + sb.AppendLine("## Additional Orchestration Instructions"); + sb.AppendLine(additionalInstructions); + } + sb.AppendLine(); + sb.AppendLine("## Your Task"); + sb.AppendLine("Analyze the request and assign specific tasks to your workers. Use this exact format for each assignment:"); + sb.AppendLine(); + sb.AppendLine("@worker:worker-name"); + sb.AppendLine("Detailed task description for this worker."); + sb.AppendLine("@end"); + sb.AppendLine(); + sb.AppendLine("You may include your analysis and reasoning as normal text. Only the @worker/@end blocks will be dispatched."); + sb.AppendLine("If you can handle the request entirely yourself, just respond normally without any @worker blocks."); + return sb.ToString(); + } + + internal record TaskAssignment(string WorkerName, string Task); + + internal static List ParseTaskAssignments(string orchestratorResponse, List availableWorkers) + { + var assignments = new List(); + var pattern = @"@worker:(\S+)\s*([\s\S]*?)(?:@end|(?=@worker:)|$)"; + + foreach (Match match in Regex.Matches(orchestratorResponse, pattern, RegexOptions.IgnoreCase)) + { + var workerName = match.Groups[1].Value.Trim(); + var task = match.Groups[2].Value.Trim(); + if (string.IsNullOrEmpty(task)) continue; + + // Resolve worker name: exact match, then fuzzy + var resolved = availableWorkers.FirstOrDefault(w => + w.Equals(workerName, StringComparison.OrdinalIgnoreCase)); + if (resolved == null) + { + resolved = availableWorkers.FirstOrDefault(w => + w.Contains(workerName, StringComparison.OrdinalIgnoreCase) || + workerName.Contains(w, StringComparison.OrdinalIgnoreCase)); + } + if (resolved != null) + assignments.Add(new TaskAssignment(resolved, task)); + } + return assignments; + } + + private record WorkerResult(string WorkerName, string? Response, bool Success, string? Error, TimeSpan Duration); + + private async Task ExecuteWorkerAsync(string workerName, string task, string originalPrompt, CancellationToken cancellationToken) + { + var sw = System.Diagnostics.Stopwatch.StartNew(); + var workerPrompt = $"You are a worker agent. Complete the following task thoroughly. Your response will be collected and synthesized with other workers' responses.\n\n## Original User Request (context)\n{originalPrompt}\n\n## Your Assigned Task\n{task}"; try { - await SendPromptAsync(orchestratorName, orchestratorPrompt, cancellationToken: cancellationToken); + var response = await SendPromptAndWaitAsync(workerName, workerPrompt, cancellationToken); + return new WorkerResult(workerName, response, true, null, sw.Elapsed); } catch (Exception ex) { - Debug($"Orchestrator send failed: {ex.Message}"); + return new WorkerResult(workerName, null, false, ex.Message, sw.Elapsed); + } + } + + private async Task SendPromptAndWaitAsync(string sessionName, string prompt, CancellationToken cancellationToken) + { + if (!_sessions.TryGetValue(sessionName, out var state)) + throw new InvalidOperationException($"Session '{sessionName}' not found."); + + await SendPromptAsync(sessionName, prompt, cancellationToken: cancellationToken); + + // Wait for the response to complete via the existing ResponseCompletion TCS + if (state.ResponseCompletion != null) + { + using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); + cts.CancelAfter(TimeSpan.FromMinutes(10)); + return await state.ResponseCompletion.Task.WaitAsync(cts.Token); + } + return ""; + } + + private string BuildSynthesisPrompt(string originalPrompt, List results) + { + var sb = new System.Text.StringBuilder(); + sb.AppendLine("## Worker Results"); + sb.AppendLine(); + foreach (var result in results) + { + sb.AppendLine($"### {result.WorkerName} ({(result.Success ? "βœ… completed" : "❌ failed")}, {result.Duration.TotalSeconds:F1}s)"); + if (result.Success) + sb.AppendLine(result.Response); + else + sb.AppendLine($"*Error: {result.Error}*"); + sb.AppendLine(); + } + sb.AppendLine("## Instructions"); + sb.AppendLine($"Original request: {originalPrompt}"); + sb.AppendLine(); + sb.AppendLine("Synthesize these worker responses into a coherent final answer. Note any tasks that failed. Provide a unified response addressing the original request."); + return sb.ToString(); + } + + private void AddOrchestratorSystemMessage(string sessionName, string message) + { + var session = GetSession(sessionName); + if (session != null) + { + session.History.Add(ChatMessage.SystemMessage(message)); + InvokeOnUI(() => OnStateChanged?.Invoke()); } } From 2cfd5b3e7e3051c0b1cdea228fca1178774a0196 Mon Sep 17 00:00:00 2001 From: Shane Neuville Date: Tue, 17 Feb 2026 10:29:04 -0600 Subject: [PATCH 06/48] Add role toggle to session context menu in sidebar MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sessions in multi-agent groups now show: - 🎯 Set as Orchestrator / πŸ‘· Set as Worker in the β‹― menu - 🎯 badge next to the session name when it's the orchestrator Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Components/Layout/SessionListItem.razor | 24 ++++++++++++++++++- .../Layout/SessionListItem.razor.css | 6 +++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/PolyPilot/Components/Layout/SessionListItem.razor b/PolyPilot/Components/Layout/SessionListItem.razor index 3bbb1f4325..afdd14ecdc 100644 --- a/PolyPilot/Components/Layout/SessionListItem.razor +++ b/PolyPilot/Components/Layout/SessionListItem.razor @@ -47,6 +47,10 @@ else { @Session.Name + @if (Meta?.Role == MultiAgentRole.Orchestrator) + { + 🎯 + } @if (Session.UnreadCount > 0) { @Session.UnreadCount @@ -119,8 +123,26 @@ }
} + @{ + var currentGroup = Groups?.FirstOrDefault(g => g.Id == Meta?.GroupId); + } + @if (currentGroup is { IsMultiAgent: true }) + { + + @if (Meta?.Role == MultiAgentRole.Orchestrator) + { + + } + else + { + + } + } - @if (!string.IsNullOrEmpty(sessionDir)) { + + + } } @if (!group.IsCollapsed || !showGroupHeaders) @@ -820,6 +836,36 @@ else isAddingMultiAgentGroup = true; } + private void OnSidebarMultiAgentModeChanged(string groupId, ChangeEventArgs e) + { + if (e.Value is string val && Enum.TryParse(val, out var mode)) + { + CopilotService.SetMultiAgentMode(groupId, mode); + } + } + + private async Task SendToSidebarMultiAgentGroup(string groupId) + { + var inputId = $"sidebar-ma-input-{groupId}"; + var prompt = await JS.InvokeAsync("getElementValue", inputId); + if (string.IsNullOrWhiteSpace(prompt)) return; + + await JS.InvokeVoidAsync("clearElementValue", inputId); + + try + { + _ = CopilotService.SendToMultiAgentGroupAsync(groupId, prompt.Trim()).ContinueWith(t => + { + if (t.IsFaulted) + InvokeAsync(() => Console.WriteLine($"Error sending to multi-agent group: {t.Exception?.InnerException?.Message}")); + }); + } + catch (Exception ex) + { + Console.WriteLine($"Error sending to multi-agent group: {ex.Message}"); + } + } + private async Task CommitNewGroup() { var name = await JS.InvokeAsync("getElementValue", "newGroupInput"); diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor.css b/PolyPilot/Components/Layout/SessionSidebar.razor.css index 3a188ff503..8266594268 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor.css +++ b/PolyPilot/Components/Layout/SessionSidebar.razor.css @@ -362,6 +362,82 @@ margin-right: 0.15rem; } +/* Sidebar multi-agent controls */ +.sidebar-ma-controls { + display: flex; + flex-direction: column; + gap: 0.3rem; + padding: 0.3rem 0.75rem 0.4rem; + background: rgba(59, 130, 246, 0.05); + border-bottom: 1px solid var(--control-border); +} + +.sidebar-ma-mode-select { + width: 100%; + padding: 0.25rem 0.4rem; + border: 1px solid var(--control-border); + border-radius: 5px; + background: var(--control-bg); + color: var(--text-primary); + font-size: var(--type-caption1); + -webkit-appearance: none; + appearance: none; + background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='10' height='10' viewBox='0 0 24 24' fill='none' stroke='%23a0b4cc' stroke-width='2.5' stroke-linecap='round' stroke-linejoin='round'%3E%3Cpolyline points='6 9 12 15 18 9'/%3E%3C/svg%3E"); + background-repeat: no-repeat; + background-position: right 0.35rem center; +} + +.sidebar-ma-mode-select option { + background: var(--bg-primary); + color: var(--text-primary); +} + +.sidebar-ma-input-bar { + display: flex; + gap: 0.25rem; + align-items: flex-end; +} + +.sidebar-ma-input { + flex: 1; + min-width: 0; + padding: 0.25rem 0.4rem; + border: 1px solid var(--control-border); + border-radius: 5px; + background: var(--control-bg); + color: var(--text-primary); + font-size: var(--type-caption1); + font-family: inherit; + resize: vertical; + max-height: 80px; + box-sizing: border-box; +} + +.sidebar-ma-input::placeholder { + color: var(--text-dim); +} + +.sidebar-ma-input:focus { + outline: none; + border-color: var(--accent-primary); +} + +.sidebar-ma-send-btn { + all: unset; + padding: 0.25rem 0.4rem; + border-radius: 5px; + background: var(--accent-primary); + color: #fff; + cursor: pointer; + font-size: var(--type-caption1); + line-height: 1; + flex-shrink: 0; +} + +.sidebar-ma-send-btn:hover { + filter: brightness(1.15); +} + .group-worktree-btn { all: unset; font-size: var(--type-callout); From 9db4aa0ac4507239e7657be927e14fde8985d65b Mon Sep 17 00:00:00 2001 From: Shane Neuville Date: Tue, 17 Feb 2026 13:06:26 -0600 Subject: [PATCH 08/48] Add mini orchestration toolbar to expanded session view for multi-agent groups When a session belonging to a multi-agent group is expanded (full-screen), the dashboard grid and its multi-agent controls become hidden. This adds a compact sticky toolbar at the top of the expanded view showing: - Group name with multi-agent badge - Mode selector dropdown (Broadcast/Sequential/Orchestrator) - Inline text input with Send All button - Phase progress indicator when orchestration is running Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot/Components/Pages/Dashboard.razor | 53 +++++++++++++++++++ .../Components/Pages/Dashboard.razor.css | 46 ++++++++++++++++ 2 files changed, 99 insertions(+) diff --git a/PolyPilot/Components/Pages/Dashboard.razor b/PolyPilot/Components/Pages/Dashboard.razor index c5d0352bb8..1f4c933ecd 100644 --- a/PolyPilot/Components/Pages/Dashboard.razor +++ b/PolyPilot/Components/Pages/Dashboard.razor @@ -65,6 +65,32 @@ } else if (expandedSession != null) { + var expandedMeta = CopilotService.Organization.Sessions.FirstOrDefault(m => m.SessionName == expandedSession); + var expandedGroup = expandedMeta != null ? CopilotService.Organization.Groups.FirstOrDefault(g => g.Id == expandedMeta.GroupId && g.IsMultiAgent) : null; + @if (expandedGroup != null) + { + var grpProgress = CopilotService.GetMultiAgentProgress(expandedGroup.Id); + var expandedGroupId = expandedGroup.Id; +
+ πŸ€– @expandedGroup.Name + + @if (grpProgress.Processing > 0) + { + @grpProgress.Completed/@grpProgress.Total done + } + + +
+ } @* Keep-alive: render all active sessions, JS owns 'active' class for instant switching *@ @foreach (var session in sessions) { @@ -1984,6 +2010,33 @@ } } + private async Task SendToExpandedMultiAgentGroup(string groupId) + { + var inputId = $"ma-input-expanded-{groupId}"; + var prompt = await JS.InvokeAsync("getElementValue", inputId); + if (string.IsNullOrWhiteSpace(prompt)) return; + + await JS.InvokeVoidAsync("clearElementValue", inputId); + + try + { + _ = CopilotService.SendToMultiAgentGroupAsync(groupId, prompt.Trim()).ContinueWith(t => + { + if (t.IsFaulted) + { + InvokeAsync(() => + { + Console.WriteLine($"Error sending to multi-agent group: {t.Exception?.InnerException?.Message}"); + }); + } + }); + } + catch (Exception ex) + { + Console.WriteLine($"Error sending to multi-agent group: {ex.Message}"); + } + } + private async Task SaveDraftsAndCursor() { var json = await JS.InvokeAsync("eval", @" diff --git a/PolyPilot/Components/Pages/Dashboard.razor.css b/PolyPilot/Components/Pages/Dashboard.razor.css index c1ae74c0e9..c63a3a4597 100644 --- a/PolyPilot/Components/Pages/Dashboard.razor.css +++ b/PolyPilot/Components/Pages/Dashboard.razor.css @@ -1352,3 +1352,49 @@ height: 100%; width: 100%; } + +/* Multi-agent expanded toolbar */ +.ma-expanded-toolbar { + display: flex; + align-items: center; + gap: 0.5rem; + padding: 0.35rem 0.75rem; + height: 40px; + position: sticky; + top: 0; + z-index: 10; + background: rgba(99, 102, 241, 0.08); + border-bottom: 1px solid rgba(99, 102, 241, 0.2); + flex-shrink: 0; +} +.ma-expanded-toolbar-label { + font-size: var(--type-callout); + font-weight: 600; + color: rgba(99, 102, 241, 0.9); + white-space: nowrap; +} +.ma-expanded-toolbar-progress { + font-size: var(--type-caption1); + color: rgba(99, 102, 241, 0.8); + font-weight: 500; + white-space: nowrap; +} +.ma-expanded-toolbar-input { + flex: 1; + background: var(--control-bg); + color: var(--text-primary); + border: 1px solid var(--border-color); + border-radius: 6px; + padding: 0.25rem 0.5rem; + font-size: var(--type-callout); + font-family: inherit; + min-width: 0; +} +.ma-expanded-toolbar-input:focus { + outline: none; + border-color: rgba(99, 102, 241, 0.6); + box-shadow: 0 0 0 2px rgba(99, 102, 241, 0.15); +} +.ma-expanded-toolbar-input::placeholder { + color: var(--text-muted); +} From 1e5ef23672eeda837a9928d86fa16bbcdb236372 Mon Sep 17 00:00:00 2001 From: Shane Neuville Date: Tue, 17 Feb 2026 13:10:50 -0600 Subject: [PATCH 09/48] Add multi-agent controls to sidebar and expanded view MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Users can now access multi-agent orchestration without leaving the expanded session view: Sidebar: - Mode selector (Broadcast/Sequential/Orchestrator) under group header - Compact Send All input bar with textarea + send button - Real-time orchestrator phase progress indicator Expanded session view: - Sticky toolbar when active session is in a multi-agent group - Shows group name, mode selector, Send All input - Phase progress indicator during orchestration loop Phase indicators show animated status: - 🎯 Planning... β†’ πŸ“‘ Dispatching... β†’ ⏳ Waiting... β†’ πŸ”„ Synthesizing... Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Components/Layout/SessionSidebar.razor | 27 +++++++++++++++++ .../Layout/SessionSidebar.razor.css | 12 ++++++++ PolyPilot/Components/Pages/Dashboard.razor | 29 +++++++++++++++++++ .../Components/Pages/Dashboard.razor.css | 13 +++++++++ 4 files changed, 81 insertions(+) diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor b/PolyPilot/Components/Layout/SessionSidebar.razor index 0864eb45a7..b7ee4efb71 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor +++ b/PolyPilot/Components/Layout/SessionSidebar.razor @@ -4,6 +4,7 @@ @inject RepoManager RepoManager @inject IJSRuntime JS @inject NavigationManager Nav +@implements IDisposable @if (IsMobileTopBar) { @@ -156,6 +157,17 @@ else [Parameter] public int FontSize { get; set; } = 20; [Parameter] public EventCallback OnFontSizeChange { get; set; } + private Dictionary _groupPhases = new(); + + private static string PhaseLabel(OrchestratorPhase phase, string? detail) => phase switch + { + OrchestratorPhase.Planning => "🎯 Planning...", + OrchestratorPhase.Dispatching => "πŸ“‘ Dispatching..." + (detail != null ? $" {detail}" : ""), + OrchestratorPhase.WaitingForWorkers => "⏳ Waiting for workers...", + OrchestratorPhase.Synthesizing => "πŸ”„ Synthesizing...", + _ => "" + }; + private async Task IncreaseFontSize() { if (FontSize < 24) @@ -344,6 +356,10 @@ else + @if (_groupPhases.TryGetValue(maGroupId, out var sidebarPhase)) + { + @PhaseLabel(sidebarPhase.Phase, sidebarPhase.Detail) + } } } @@ -513,6 +529,7 @@ else CopilotService.OnStateChanged += RefreshSessions; CopilotService.OnSessionComplete += HandleSessionComplete; CopilotService.OnUsageInfoChanged += HandleUsageInfoChanged; + CopilotService.OnOrchestratorPhaseChanged += HandleOrchestratorPhaseChanged; RepoManager.Load(); RefreshSessions(); LoadPersistedSessions(); @@ -561,6 +578,15 @@ else }); } + private void HandleOrchestratorPhaseChanged(string groupId, OrchestratorPhase phase, string? detail) + { + if (phase == OrchestratorPhase.Complete) + _groupPhases.Remove(groupId); + else + _groupPhases[groupId] = (phase, detail); + InvokeAsync(StateHasChanged); + } + protected override async Task OnAfterRenderAsync(bool firstRender) { if (firstRender) @@ -1315,5 +1341,6 @@ Important conventions: CopilotService.OnStateChanged -= RefreshSessions; CopilotService.OnSessionComplete -= HandleSessionComplete; CopilotService.OnUsageInfoChanged -= HandleUsageInfoChanged; + CopilotService.OnOrchestratorPhaseChanged -= HandleOrchestratorPhaseChanged; } } diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor.css b/PolyPilot/Components/Layout/SessionSidebar.razor.css index 8266594268..10c1461377 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor.css +++ b/PolyPilot/Components/Layout/SessionSidebar.razor.css @@ -372,6 +372,18 @@ border-bottom: 1px solid var(--control-border); } +/* Orchestrator phase indicator */ +.phase-indicator { + font-size: 0.75rem; + color: var(--accent-primary); + animation: phase-pulse 2s ease-in-out infinite; +} + +@keyframes phase-pulse { + 0%, 100% { opacity: 0.7; } + 50% { opacity: 1; } +} + .sidebar-ma-mode-select { width: 100%; padding: 0.25rem 0.4rem; diff --git a/PolyPilot/Components/Pages/Dashboard.razor b/PolyPilot/Components/Pages/Dashboard.razor index 1f4c933ecd..b3592c5ac4 100644 --- a/PolyPilot/Components/Pages/Dashboard.razor +++ b/PolyPilot/Components/Pages/Dashboard.razor @@ -82,6 +82,10 @@ { @grpProgress.Completed/@grpProgress.Total done } + @if (_groupPhases.TryGetValue(expandedGroupId, out var expPhase)) + { + @PhaseLabel(expPhase.Phase, expPhase.Detail) + } } + @if (_groupPhases.TryGetValue(group.Id, out var gridPhase)) + { + @PhaseLabel(gridPhase.Phase, gridPhase.Detail) + } } @(group.IsCollapsed ? "β–Ά" : "β–Ό") @@ -331,6 +339,7 @@ private string? initError; private bool _initializationComplete = false; private readonly Dictionary _fiestaStreamingMessages = new(StringComparer.Ordinal); + private Dictionary _groupPhases = new(); protected override async Task OnInitializedAsync() { @@ -356,6 +365,7 @@ CopilotService.OnError += HandleError; CopilotService.OnTurnStart += HandleTurnStart; CopilotService.OnTurnEnd += HandleTurnEnd; + CopilotService.OnOrchestratorPhaseChanged += HandleOrchestratorPhaseChanged; FiestaService.OnStateChanged += HandleFiestaStateChanged; FiestaService.OnHostTaskUpdate += HandleFiestaTaskUpdate; @@ -1025,6 +1035,24 @@ ScheduleRender(); } + private void HandleOrchestratorPhaseChanged(string groupId, OrchestratorPhase phase, string? detail) + { + if (phase == OrchestratorPhase.Complete) + _groupPhases.Remove(groupId); + else + _groupPhases[groupId] = (phase, detail); + ScheduleRender(); + } + + private static string PhaseLabel(OrchestratorPhase phase, string? detail) => phase switch + { + OrchestratorPhase.Planning => "🎯 Planning...", + OrchestratorPhase.Dispatching => "πŸ“‘ Dispatching..." + (detail != null ? $" {detail}" : ""), + OrchestratorPhase.WaitingForWorkers => "⏳ Waiting for workers...", + OrchestratorPhase.Synthesizing => "πŸ”„ Synthesizing...", + _ => "" + }; + private void DismissError(string sessionName) { errorBySession.Remove(sessionName); @@ -2482,6 +2510,7 @@ CopilotService.OnError -= HandleError; CopilotService.OnTurnStart -= HandleTurnStart; CopilotService.OnTurnEnd -= HandleTurnEnd; + CopilotService.OnOrchestratorPhaseChanged -= HandleOrchestratorPhaseChanged; FiestaService.OnStateChanged -= HandleFiestaStateChanged; FiestaService.OnHostTaskUpdate -= HandleFiestaTaskUpdate; _renderTimer?.Dispose(); diff --git a/PolyPilot/Components/Pages/Dashboard.razor.css b/PolyPilot/Components/Pages/Dashboard.razor.css index c63a3a4597..1a8ac257d5 100644 --- a/PolyPilot/Components/Pages/Dashboard.razor.css +++ b/PolyPilot/Components/Pages/Dashboard.razor.css @@ -420,6 +420,19 @@ color: rgba(99, 102, 241, 0.8); font-weight: 500; } + +/* Orchestrator phase indicator */ +.phase-indicator { + font-size: 0.75rem; + color: var(--accent-primary); + white-space: nowrap; + animation: phase-pulse 2s ease-in-out infinite; +} + +@keyframes phase-pulse { + 0%, 100% { opacity: 0.7; } + 50% { opacity: 1; } +} .multi-agent-input-bar { display: flex; align-items: flex-end; From d38dc70dd62a3bf7c8b902755dc56a8424636cef Mon Sep 17 00:00:00 2001 From: Shane Date: Tue, 17 Feb 2026 22:14:53 -0600 Subject: [PATCH 10/48] Fix mobile model selector dropdown positioning - Use fixed positioning instead of absolute for mobile to prevent off-screen clipping - Center dropdown on screen with translateX(-50%) - Increase touch target size to 44px minimum (iOS guideline) - Improve max-height calculation for small screens (50vh) - Add touch event handling support (@ontouchstart:preventDefault) Fixes model dropdown being cut off at edges of mobile screens. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot/Components/ModelSelector.razor | 2 +- PolyPilot/Components/ModelSelector.razor.css | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/PolyPilot/Components/ModelSelector.razor b/PolyPilot/Components/ModelSelector.razor index 431277f392..8b2a83f0f6 100644 --- a/PolyPilot/Components/ModelSelector.razor +++ b/PolyPilot/Components/ModelSelector.razor @@ -11,7 +11,7 @@ @if (isOpen) { -
+
@foreach (var model in Models) { var info = GetDisplayInfo(model); diff --git a/PolyPilot/Components/ModelSelector.razor.css b/PolyPilot/Components/ModelSelector.razor.css index 12804df67b..650cbf9654 100644 --- a/PolyPilot/Components/ModelSelector.razor.css +++ b/PolyPilot/Components/ModelSelector.razor.css @@ -107,8 +107,21 @@ font-size: var(--type-callout); } + .model-selector-dropdown { + position: fixed; + top: auto; + bottom: auto; + left: 50%; + transform: translateX(-50%); + width: 90vw; + max-width: 280px; + max-height: 50vh; + z-index: 2000; + } + .model-option { - padding: 0.35rem 0.5rem; + padding: 0.5rem 0.6rem; font-size: var(--type-callout); + min-height: 44px; } } From 3ee6fc6c294cb0488aa2a4f1ccfccbed40db0763 Mon Sep 17 00:00:00 2001 From: Shane Date: Tue, 17 Feb 2026 22:43:45 -0600 Subject: [PATCH 11/48] Add per-agent model assignment and OrchestratorReflect mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per-Agent Model Assignment: - Add PreferredModel to SessionMeta for per-session model override - Add DefaultWorkerModel/DefaultOrchestratorModel to SessionGroup - Add SetSessionPreferredModel() and GetEffectiveModel() to CopilotService - Add EnsureSessionModelAsync() called before all dispatch paths (broadcast, sequential, orchestrator) to switch models at dispatch time - Include model info in BuildMultiAgentPrefix and planning prompts so orchestrators know each worker's capabilities - Add inline model picker in SessionListItem context menu for multi-agent groups - Show model override indicator (⚑) in session metadata row OrchestratorReflect Mode: - Add OrchestratorReflect enum value to MultiAgentMode - Add GroupReflectionState class with goal, iteration tracking, stall detection - Implement SendViaOrchestratorReflectAsync with iterative loop: Plan -> Dispatch -> Collect -> Synthesize+Evaluate -> repeat until [[GROUP_REFLECT_COMPLETE]] sentinel or stall/max iterations - Add StartGroupReflection/StopGroupReflection/PauseGroupReflection methods - Add OrchestratorReflect option in sidebar mode selector - Add group reflection status bar with iteration counter, goal, pause/stop Tests (20 new, 650 total passing): - PerAgentModelAssignmentTests: store/clear/effective model, serialization - GroupReflectionStateTests: creation, stall detection, completion summaries, serialization, evaluation extraction Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/SessionOrganizationTests.cs | 262 +++++++++++++++- .../Components/Layout/SessionListItem.razor | 23 +- .../Layout/SessionListItem.razor.css | 27 ++ .../Components/Layout/SessionSidebar.razor | 10 + .../Layout/SessionSidebar.razor.css | 33 ++ PolyPilot/Models/SessionOrganization.cs | 82 ++++- .../Services/CopilotService.Organization.cs | 289 +++++++++++++++++- 7 files changed, 710 insertions(+), 16 deletions(-) diff --git a/PolyPilot.Tests/SessionOrganizationTests.cs b/PolyPilot.Tests/SessionOrganizationTests.cs index 2d460598fe..00c97b1133 100644 --- a/PolyPilot.Tests/SessionOrganizationTests.cs +++ b/PolyPilot.Tests/SessionOrganizationTests.cs @@ -173,10 +173,11 @@ public void SessionMeta_Role_SerializesAsString() [Fact] public void MultiAgentMode_AllValues() { - Assert.Equal(3, Enum.GetValues().Length); + Assert.Equal(4, Enum.GetValues().Length); Assert.True(Enum.IsDefined(MultiAgentMode.Broadcast)); Assert.True(Enum.IsDefined(MultiAgentMode.Sequential)); Assert.True(Enum.IsDefined(MultiAgentMode.Orchestrator)); + Assert.True(Enum.IsDefined(MultiAgentMode.OrchestratorReflect)); } [Fact] @@ -741,3 +742,262 @@ public void ConvertToMultiAgent_SetsIsMultiAgentTrue() Assert.Equal(MultiAgentMode.Broadcast, group.OrchestratorMode); } } + +public class PerAgentModelAssignmentTests +{ + private readonly StubChatDatabase _chatDb = new(); + private readonly StubServerManager _serverManager = new(); + private readonly StubWsBridgeClient _bridgeClient = new(); + private readonly StubDemoService _demoService = new(); + private readonly IServiceProvider _serviceProvider; + + public PerAgentModelAssignmentTests() + { + var services = new ServiceCollection(); + _serviceProvider = services.BuildServiceProvider(); + } + + private CopilotService CreateService() => + new CopilotService(_chatDb, _serverManager, _bridgeClient, new RepoManager(), _serviceProvider, _demoService); + + [Fact] + public void SessionMeta_PreferredModel_DefaultsToNull() + { + var meta = new SessionMeta { SessionName = "test" }; + Assert.Null(meta.PreferredModel); + } + + [Fact] + public void SetSessionPreferredModel_StoresModel() + { + var svc = CreateService(); + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "worker1" }); + + svc.SetSessionPreferredModel("worker1", "gpt-4.1"); + + var meta = svc.Organization.Sessions.First(m => m.SessionName == "worker1"); + Assert.Equal("gpt-4.1", meta.PreferredModel); + } + + [Fact] + public void SetSessionPreferredModel_Null_ClearsOverride() + { + var svc = CreateService(); + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "worker1", PreferredModel = "gpt-4.1" }); + + svc.SetSessionPreferredModel("worker1", null); + + var meta = svc.Organization.Sessions.First(m => m.SessionName == "worker1"); + Assert.Null(meta.PreferredModel); + } + + [Fact] + public void GetEffectiveModel_ReturnsPreferredModel_WhenSet() + { + var svc = CreateService(); + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "worker1", PreferredModel = "claude-opus-4.6" }); + + var model = svc.GetEffectiveModel("worker1"); + Assert.Equal("claude-opus-4.6", model); + } + + [Fact] + public void GetEffectiveModel_ReturnsDefaultModel_WhenNoPreference() + { + var svc = CreateService(); + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "worker1" }); + + var model = svc.GetEffectiveModel("worker1"); + Assert.Equal(svc.DefaultModel, model); + } + + [Fact] + public void SessionGroup_DefaultWorkerModel_DefaultsToNull() + { + var group = new SessionGroup { Name = "Test" }; + Assert.Null(group.DefaultWorkerModel); + Assert.Null(group.DefaultOrchestratorModel); + } + + [Fact] + public void PreferredModel_SurvivesSerialization() + { + var state = new OrganizationState(); + state.Sessions.Add(new SessionMeta { SessionName = "worker1", PreferredModel = "gemini-3-pro" }); + + var json = JsonSerializer.Serialize(state); + var restored = JsonSerializer.Deserialize(json)!; + + Assert.Equal("gemini-3-pro", restored.Sessions[0].PreferredModel); + } + + [Fact] + public void SessionGroup_ModelDefaults_SurviveSerialization() + { + var state = new OrganizationState(); + state.Groups.Add(new SessionGroup + { + Name = "Test", + IsMultiAgent = true, + DefaultWorkerModel = "gpt-4.1", + DefaultOrchestratorModel = "claude-opus-4.6" + }); + + var json = JsonSerializer.Serialize(state); + var restored = JsonSerializer.Deserialize(json)!; + + var group = restored.Groups.First(g => g.Name == "Test"); + Assert.Equal("gpt-4.1", group.DefaultWorkerModel); + Assert.Equal("claude-opus-4.6", group.DefaultOrchestratorModel); + } + + [Fact] + public void Legacy_Deserialization_GracefullyHandlesNoPreferredModel() + { + // Simulate legacy JSON without PreferredModel + var json = """{"SessionName":"old-session","GroupId":"_default","IsPinned":false,"ManualOrder":0,"Role":"Worker"}"""; + var meta = JsonSerializer.Deserialize(json)!; + Assert.Null(meta.PreferredModel); + Assert.Equal("old-session", meta.SessionName); + } +} + +public class GroupReflectionStateTests +{ + [Fact] + public void Create_InitializesCorrectly() + { + var state = GroupReflectionState.Create("Build a REST API", 10); + + Assert.Equal("Build a REST API", state.Goal); + Assert.Equal(10, state.MaxIterations); + Assert.Equal(0, state.CurrentIteration); + Assert.True(state.IsActive); + Assert.False(state.GoalMet); + Assert.False(state.IsStalled); + Assert.False(state.IsPaused); + Assert.NotNull(state.StartedAt); + } + + [Fact] + public void CheckStall_ReturnsFalse_ForUniqueResponses() + { + var state = GroupReflectionState.Create("test"); + + Assert.False(state.CheckStall("response 1")); + Assert.False(state.CheckStall("response 2")); + Assert.False(state.CheckStall("response 3")); + } + + [Fact] + public void CheckStall_DetectsRepeatedResponses() + { + var state = GroupReflectionState.Create("test"); + + state.CheckStall("same response"); + state.CheckStall("same response"); // 1st stall + var stalled = state.CheckStall("same response"); // 2nd stall + + Assert.True(stalled); + Assert.True(state.IsStalled); + } + + [Fact] + public void CheckStall_ResetsOnProgress() + { + var state = GroupReflectionState.Create("test"); + + state.CheckStall("response A"); + state.CheckStall("response A"); // 1st stall + state.CheckStall("response B"); // different β€” resets + + Assert.False(state.IsStalled); + Assert.Equal(0, state.ConsecutiveStalls); + } + + [Fact] + public void CompletionSummary_GoalMet() + { + var state = GroupReflectionState.Create("test"); + state.CurrentIteration = 3; + state.GoalMet = true; + + Assert.Contains("βœ…", state.CompletionSummary); + Assert.Contains("3", state.CompletionSummary); + } + + [Fact] + public void CompletionSummary_Stalled() + { + var state = GroupReflectionState.Create("test"); + state.CurrentIteration = 4; + state.IsStalled = true; + + Assert.Contains("⚠️", state.CompletionSummary); + } + + [Fact] + public void CompletionSummary_MaxReached() + { + var state = GroupReflectionState.Create("test", 5); + state.CurrentIteration = 5; + + Assert.Contains("⏱️", state.CompletionSummary); + Assert.Contains("5", state.CompletionSummary); + } + + [Fact] + public void OrchestratorReflect_ModeEnumValue_Exists() + { + var mode = MultiAgentMode.OrchestratorReflect; + Assert.Equal("OrchestratorReflect", mode.ToString()); + } + + [Fact] + public void OrchestratorReflect_SurvivesSerialization() + { + var group = new SessionGroup + { + Name = "Test", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.OrchestratorReflect, + ReflectionState = GroupReflectionState.Create("Build it", 10) + }; + + var json = JsonSerializer.Serialize(group); + var restored = JsonSerializer.Deserialize(json)!; + + Assert.Equal(MultiAgentMode.OrchestratorReflect, restored.OrchestratorMode); + Assert.NotNull(restored.ReflectionState); + Assert.Equal("Build it", restored.ReflectionState!.Goal); + Assert.Equal(10, restored.ReflectionState.MaxIterations); + Assert.True(restored.ReflectionState.IsActive); + } + + [Fact] + public void ExtractIterationEvaluation_ParsesNeedsIterationMarker() + { + var response = "The synthesis looks good but [[NEEDS_ITERATION]] Missing error handling in the API layer. @worker:alice\nAdd error handling.\n@end"; + + // Use reflection to test internal method + var method = typeof(CopilotService).GetMethod("ExtractIterationEvaluation", + System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Static); + Assert.NotNull(method); + + var result = (string)method!.Invoke(null, new object[] { response })!; + Assert.Contains("Missing error handling", result); + Assert.DoesNotContain("@worker", result); + } + + [Fact] + public void ExtractIterationEvaluation_FallsBackToLastLines() + { + var response = "Line 1\nLine 2\nLine 3\nLine 4\nLine 5\nLine 6\nThe final evaluation."; + + var method = typeof(CopilotService).GetMethod("ExtractIterationEvaluation", + System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Static); + var result = (string)method!.Invoke(null, new object[] { response })!; + + Assert.Contains("The final evaluation", result); + } +} diff --git a/PolyPilot/Components/Layout/SessionListItem.razor b/PolyPilot/Components/Layout/SessionListItem.razor index afdd14ecdc..86282ebac1 100644 --- a/PolyPilot/Components/Layout/SessionListItem.razor +++ b/PolyPilot/Components/Layout/SessionListItem.razor @@ -59,7 +59,11 @@
- @Session.MessageCount msgs@(Session.Model != "resumed" ? $" β€’ {Session.Model}" : "") + @Session.MessageCount msgs@(Session.Model != "resumed" ? $" β€’ {CopilotService.GetEffectiveModel(Session.Name)}" : "") + @if (Meta?.PreferredModel != null && Meta.PreferredModel != Session.Model) + { + ⚑ + } @if (Session.IsProcessing) { β€’ Working @@ -141,6 +145,17 @@ 🎯 Set as Orchestrator } + + 🧠 Model +
+ +
} { @@ -186,6 +201,12 @@ else if (e.Key == "Escape") await OnCloseMenu.InvokeAsync(); } + private void OnPreferredModelChanged(ChangeEventArgs e) + { + var slug = e.Value?.ToString(); + CopilotService.SetSessionPreferredModel(Session.Name, string.IsNullOrEmpty(slug) ? null : slug); + } + private static string GetShortPath(string path) { var parts = path.Split(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar); diff --git a/PolyPilot/Components/Layout/SessionListItem.razor.css b/PolyPilot/Components/Layout/SessionListItem.razor.css index d91e60cdad..abe586e75d 100644 --- a/PolyPilot/Components/Layout/SessionListItem.razor.css +++ b/PolyPilot/Components/Layout/SessionListItem.razor.css @@ -278,3 +278,30 @@ margin-left: 4px; flex-shrink: 0; } + +.model-override { + color: #fbbf24; + font-size: 0.65rem; + margin-left: 2px; +} + +.model-picker-inline { + padding: 0.25rem 0.65rem; +} + +.model-picker-inline select { + width: 100%; + padding: 0.3rem 0.4rem; + border: 1px solid var(--control-border); + border-radius: 5px; + background: var(--control-bg); + color: var(--text-primary); + font-size: var(--type-footnote); + font-family: inherit; + cursor: pointer; + outline: none; +} + +.model-picker-inline select:focus { + border-color: var(--accent-primary); +} diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor b/PolyPilot/Components/Layout/SessionSidebar.razor index 780bcc7514..aac3bff996 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor +++ b/PolyPilot/Components/Layout/SessionSidebar.razor @@ -352,6 +352,7 @@ else + } } diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor.css b/PolyPilot/Components/Layout/SessionSidebar.razor.css index 10c1461377..a1d3bd3994 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor.css +++ b/PolyPilot/Components/Layout/SessionSidebar.razor.css @@ -384,6 +384,39 @@ 50% { opacity: 1; } } +.group-reflect-status { + display: flex; + align-items: center; + gap: 0.4rem; + padding: 0.25rem 0; + font-size: var(--type-footnote); + color: var(--text-dim); +} + +.reflect-iter { + font-weight: 600; + color: var(--accent-primary); + white-space: nowrap; +} + +.reflect-goal { + flex: 1; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.reflect-stop-btn, .reflect-pause-btn { + all: unset; + cursor: pointer; + padding: 0.15rem 0.3rem; + border-radius: 4px; + font-size: 0.7rem; +} +.reflect-stop-btn:hover, .reflect-pause-btn:hover { + background: var(--control-bg); +} + .sidebar-ma-mode-select { width: 100%; padding: 0.25rem 0.4rem; diff --git a/PolyPilot/Models/SessionOrganization.cs b/PolyPilot/Models/SessionOrganization.cs index 134fca77c4..19413dda69 100644 --- a/PolyPilot/Models/SessionOrganization.cs +++ b/PolyPilot/Models/SessionOrganization.cs @@ -22,6 +22,15 @@ public class SessionGroup /// Optional system prompt appended to all sessions in this multi-agent group. public string? OrchestratorPrompt { get; set; } + + /// Default model for new worker sessions added to this group. Null = use app default. + public string? DefaultWorkerModel { get; set; } + + /// Default model for the orchestrator role. Null = use app default. + public string? DefaultOrchestratorModel { get; set; } + + /// Active reflection state for OrchestratorReflect mode. Null when not in a reflect loop. + public GroupReflectionState? ReflectionState { get; set; } } public class SessionMeta @@ -35,6 +44,13 @@ public class SessionMeta /// Role of this session within a multi-agent group. public MultiAgentRole Role { get; set; } = MultiAgentRole.Worker; + + /// + /// Preferred model for this session in multi-agent context. + /// Null = use whatever model the session was created with (no override). + /// When set, the model is switched before dispatch via EnsureSessionModelAsync. + /// + public string? PreferredModel { get; set; } } [JsonConverter(typeof(JsonStringEnumConverter))] @@ -55,7 +71,9 @@ public enum MultiAgentMode /// Send the prompt to sessions one at a time in order. Sequential, /// An orchestrator session decides how to delegate work to other sessions. - Orchestrator + Orchestrator, + /// Orchestrator with iterative reflection: planβ†’dispatchβ†’collectβ†’evaluateβ†’repeat until goal met. + OrchestratorReflect } /// Role of a session within a multi-agent group. @@ -77,3 +95,65 @@ public class OrganizationState public List Sessions { get; set; } = new(); public SessionSortMode SortMode { get; set; } = SessionSortMode.LastActive; } + +/// +/// Tracks iterative orchestration state for a multi-agent group in OrchestratorReflect mode. +/// The orchestrator evaluates worker results against a goal and re-dispatches until satisfied. +/// +public class GroupReflectionState +{ + public string Goal { get; set; } = ""; + public int MaxIterations { get; set; } = 5; + public int CurrentIteration { get; set; } + public bool IsActive { get; set; } + public bool GoalMet { get; set; } + public bool IsStalled { get; set; } + public bool IsPaused { get; set; } + public DateTime? StartedAt { get; set; } + public DateTime? CompletedAt { get; set; } + + /// The orchestrator's evaluation from the last iteration. + public string? LastEvaluation { get; set; } + + /// Hash window for stall detection (last N response hashes). + [System.Text.Json.Serialization.JsonIgnore] + internal List ResponseHashes { get; } = new(); + internal const int StallWindowSize = 3; + internal int ConsecutiveStalls { get; set; } + + public static GroupReflectionState Create(string goal, int maxIterations = 5) => new() + { + Goal = goal, + MaxIterations = maxIterations, + IsActive = true, + StartedAt = DateTime.Now + }; + + /// Check if the latest synthesis is repeating (stall detection). + public bool CheckStall(string synthesisResponse) + { + var hash = synthesisResponse.GetHashCode(); + if (ResponseHashes.Contains(hash)) + { + ConsecutiveStalls++; + if (ConsecutiveStalls >= 2) + { + IsStalled = true; + return true; + } + } + else + { + ConsecutiveStalls = 0; + } + ResponseHashes.Add(hash); + if (ResponseHashes.Count > StallWindowSize) + ResponseHashes.RemoveAt(0); + return false; + } + + public string CompletionSummary => + GoalMet ? $"βœ… Goal met after {CurrentIteration} iteration(s)" + : IsStalled ? $"⚠️ Stalled after {CurrentIteration} iteration(s)" + : $"⏱️ Reached max iterations ({MaxIterations})"; +} diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index 66af1eef02..dee9a03fd2 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -484,43 +484,53 @@ public async Task SendToMultiAgentGroupAsync(string groupId, string prompt, Canc case MultiAgentMode.Orchestrator: await SendViaOrchestratorAsync(groupId, members, prompt, cancellationToken); break; + + case MultiAgentMode.OrchestratorReflect: + await SendViaOrchestratorReflectAsync(groupId, members, prompt, cancellationToken); + break; } } /// /// Build a multi-agent context prefix for a session in a group. + /// Includes model info for each member so agents know each other's capabilities. /// private string BuildMultiAgentPrefix(string sessionName, SessionGroup group, List allMembers) { var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == sessionName); var role = meta?.Role ?? MultiAgentRole.Worker; var roleName = role == MultiAgentRole.Orchestrator ? "orchestrator" : "worker"; - var others = allMembers.Where(m => m != sessionName).ToList(); - var othersList = others.Count > 0 ? string.Join(", ", others) : "none"; - return $"[Multi-agent context: You are '{sessionName}' ({roleName}) in group '{group.Name}'. Other members: {othersList}.]\n\n"; + var memberDetails = allMembers.Where(m => m != sessionName) + .Select(m => $"'{m}' ({GetEffectiveModel(m)})") + .ToList(); + var othersList = memberDetails.Count > 0 ? string.Join(", ", memberDetails) : "none"; + return $"[Multi-agent context: You are '{sessionName}' ({roleName}, {GetEffectiveModel(sessionName)}) in group '{group.Name}'. Other members: {othersList}.]\n\n"; } private async Task SendBroadcastAsync(SessionGroup group, List sessionNames, string prompt, CancellationToken cancellationToken) { - var tasks = sessionNames.Select(name => + var tasks = sessionNames.Select(async name => { var session = GetSession(name); - if (session == null) return Task.CompletedTask; + if (session == null) return; + await EnsureSessionModelAsync(name, cancellationToken); var prefixedPrompt = BuildMultiAgentPrefix(name, group, sessionNames) + prompt; if (session.IsProcessing) { EnqueueMessage(name, prefixedPrompt); - return Task.CompletedTask; + return; } - return SendPromptAsync(name, prefixedPrompt, cancellationToken: cancellationToken) - .ContinueWith(t => - { - if (t.IsFaulted) - Debug($"Broadcast send failed for '{name}': {t.Exception?.InnerException?.Message}"); - }, TaskScheduler.Default); + try + { + await SendPromptAsync(name, prefixedPrompt, cancellationToken: cancellationToken); + } + catch (Exception ex) + { + Debug($"Broadcast send failed for '{name}': {ex.Message}"); + } }); await Task.WhenAll(tasks); @@ -535,6 +545,7 @@ private async Task SendSequentialAsync(SessionGroup group, List sessionN var session = GetSession(name); if (session == null) continue; + await EnsureSessionModelAsync(name, cancellationToken); var prefixedPrompt = BuildMultiAgentPrefix(name, group, sessionNames) + prompt; if (session.IsProcessing) @@ -608,7 +619,7 @@ private string BuildOrchestratorPlanningPrompt(string userPrompt, List w var sb = new System.Text.StringBuilder(); sb.AppendLine($"You are the orchestrator of a multi-agent group. You have {workerNames.Count} worker agent(s) available:"); foreach (var w in workerNames) - sb.AppendLine($" - '{w}'"); + sb.AppendLine($" - '{w}' (model: {GetEffectiveModel(w)})"); sb.AppendLine(); sb.AppendLine("## User Request"); sb.AppendLine(userPrompt); @@ -664,6 +675,7 @@ private record WorkerResult(string WorkerName, string? Response, bool Success, s private async Task ExecuteWorkerAsync(string workerName, string task, string originalPrompt, CancellationToken cancellationToken) { var sw = System.Diagnostics.Stopwatch.StartNew(); + await EnsureSessionModelAsync(workerName, cancellationToken); var workerPrompt = $"You are a worker agent. Complete the following task thoroughly. Your response will be collected and synthesized with other workers' responses.\n\n## Original User Request (context)\n{originalPrompt}\n\n## Your Assigned Task\n{task}"; try @@ -749,4 +761,255 @@ private void AddOrchestratorSystemMessage(string sessionName, string message) } #endregion + + #region Per-Agent Model Assignment + + /// + /// Set the preferred model for a session in a multi-agent group. + /// The model is applied at dispatch time via EnsureSessionModelAsync. + /// + public void SetSessionPreferredModel(string sessionName, string? modelSlug) + { + var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == sessionName); + if (meta == null) return; + meta.PreferredModel = modelSlug != null ? Models.ModelHelper.NormalizeToSlug(modelSlug) : null; + SaveOrganization(); + OnStateChanged?.Invoke(); + } + + /// + /// Returns the model a session will use: PreferredModel if set, else live AgentSessionInfo.Model. + /// + public string GetEffectiveModel(string sessionName) + { + var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == sessionName); + if (meta?.PreferredModel != null) return meta.PreferredModel; + var session = GetSession(sessionName); + return session?.Model ?? DefaultModel; + } + + /// + /// Ensures a session's live model matches its PreferredModel before dispatch. + /// No-op if PreferredModel is null or already matches. + /// + private async Task EnsureSessionModelAsync(string sessionName, CancellationToken ct) + { + var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == sessionName); + if (meta?.PreferredModel == null) return; + + var session = GetSession(sessionName); + if (session == null) return; + + var currentSlug = Models.ModelHelper.NormalizeToSlug(session.Model); + if (currentSlug == meta.PreferredModel) return; + + try + { + await ChangeModelAsync(sessionName, meta.PreferredModel, ct); + Debug($"Switched '{sessionName}' model to '{meta.PreferredModel}' for multi-agent dispatch"); + } + catch (Exception ex) + { + Debug($"Failed to switch model for '{sessionName}': {ex.Message}"); + } + } + + #endregion + + #region OrchestratorReflect Loop + + /// + /// Start a reflection loop on a multi-agent group. + /// + public void StartGroupReflection(string groupId, string goal, int maxIterations = 5) + { + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId && g.IsMultiAgent); + if (group == null) return; + + group.ReflectionState = GroupReflectionState.Create(goal, maxIterations); + group.OrchestratorMode = MultiAgentMode.OrchestratorReflect; + SaveOrganization(); + OnStateChanged?.Invoke(); + } + + /// + /// Stop an active group reflection loop. + /// + public void StopGroupReflection(string groupId) + { + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId); + if (group?.ReflectionState == null) return; + + group.ReflectionState.IsActive = false; + group.ReflectionState.CompletedAt = DateTime.Now; + SaveOrganization(); + OnStateChanged?.Invoke(); + } + + /// + /// Pause/resume a group reflection loop. + /// + public void PauseGroupReflection(string groupId, bool paused) + { + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId); + if (group?.ReflectionState == null) return; + group.ReflectionState.IsPaused = paused; + SaveOrganization(); + OnStateChanged?.Invoke(); + } + + private async Task SendViaOrchestratorReflectAsync(string groupId, List members, string prompt, CancellationToken ct) + { + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId); + if (group == null) return; + + var reflectState = group.ReflectionState; + if (reflectState == null || !reflectState.IsActive) + { + // Not in reflect mode β€” fall back to regular orchestrator + await SendViaOrchestratorAsync(groupId, members, prompt, ct); + return; + } + + var orchestratorName = GetOrchestratorSession(groupId); + if (orchestratorName == null) + { + await SendBroadcastAsync(group, members, prompt, ct); + return; + } + + var workerNames = members.Where(m => m != orchestratorName).ToList(); + + while (reflectState.IsActive && !reflectState.IsPaused + && reflectState.CurrentIteration < reflectState.MaxIterations) + { + ct.ThrowIfCancellationRequested(); + reflectState.CurrentIteration++; + + // Phase 1: Plan (first iteration) or Re-plan (subsequent) + var iterDetail = $"Iteration {reflectState.CurrentIteration}/{reflectState.MaxIterations}"; + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Planning, iterDetail)); + + string planPrompt; + if (reflectState.CurrentIteration == 1) + { + planPrompt = BuildOrchestratorPlanningPrompt(prompt, workerNames, group.OrchestratorPrompt); + } + else + { + planPrompt = BuildReplanPrompt(reflectState.LastEvaluation ?? "Continue iterating.", workerNames, prompt); + } + + var planResponse = await SendPromptAndWaitAsync(orchestratorName, planPrompt, ct); + var assignments = ParseTaskAssignments(planResponse, workerNames); + + if (assignments.Count == 0) + { + // Orchestrator decided no more work needed + reflectState.GoalMet = true; + AddOrchestratorSystemMessage(orchestratorName, $"βœ… Orchestrator completed without delegation (iteration {reflectState.CurrentIteration})."); + break; + } + + // Phase 2-3: Dispatch + Collect + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Dispatching, + $"Sending tasks to {assignments.Count} worker(s) β€” {iterDetail}")); + + var workerTasks = assignments.Select(a => ExecuteWorkerAsync(a.WorkerName, a.Task, prompt, ct)); + var results = await Task.WhenAll(workerTasks); + + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.WaitingForWorkers, iterDetail)); + + // Phase 4: Synthesize + Evaluate + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Synthesizing, iterDetail)); + + var synthEvalPrompt = BuildSynthesisWithEvalPrompt(prompt, results.ToList(), reflectState); + var synthesisResponse = await SendPromptAndWaitAsync(orchestratorName, synthEvalPrompt, ct); + + // Check completion sentinel + if (synthesisResponse.Contains("[[GROUP_REFLECT_COMPLETE]]", StringComparison.OrdinalIgnoreCase)) + { + reflectState.GoalMet = true; + reflectState.IsActive = false; + AddOrchestratorSystemMessage(orchestratorName, $"βœ… {reflectState.CompletionSummary}"); + break; + } + + // Extract evaluation for next iteration + reflectState.LastEvaluation = ExtractIterationEvaluation(synthesisResponse); + + // Stall detection + if (reflectState.CheckStall(synthesisResponse)) + { + AddOrchestratorSystemMessage(orchestratorName, $"⚠️ {reflectState.CompletionSummary}"); + break; + } + + SaveOrganization(); + InvokeOnUI(() => OnStateChanged?.Invoke()); + } + + if (!reflectState.GoalMet && !reflectState.IsStalled && !reflectState.IsPaused) + { + AddOrchestratorSystemMessage(orchestratorName, $"⏱️ {reflectState.CompletionSummary}"); + } + + reflectState.IsActive = false; + reflectState.CompletedAt = DateTime.Now; + SaveOrganization(); + InvokeOnUI(() => + { + OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Complete, reflectState.CompletionSummary); + OnStateChanged?.Invoke(); + }); + } + + private string BuildSynthesisWithEvalPrompt(string originalPrompt, List results, GroupReflectionState state) + { + var sb = new System.Text.StringBuilder(); + sb.Append(BuildSynthesisPrompt(originalPrompt, results)); + sb.AppendLine(); + sb.AppendLine($"## Evaluation Check (Iteration {state.CurrentIteration}/{state.MaxIterations})"); + sb.AppendLine($"**Goal:** {state.Goal}"); + sb.AppendLine(); + sb.AppendLine("Evaluate whether the combined output satisfies the goal."); + sb.AppendLine("- If **YES**: Include `[[GROUP_REFLECT_COMPLETE]]` in your response with a final summary."); + sb.AppendLine("- If **NO**: Include `[[NEEDS_ITERATION]]` explaining what's missing, then provide revised `@worker` blocks for the next iteration."); + return sb.ToString(); + } + + private string BuildReplanPrompt(string lastEvaluation, List workerNames, string originalPrompt) + { + var sb = new System.Text.StringBuilder(); + sb.AppendLine("## Previous Iteration Evaluation"); + sb.AppendLine(lastEvaluation); + sb.AppendLine(); + sb.AppendLine("## Original Request (context)"); + sb.AppendLine(originalPrompt); + sb.AppendLine(); + sb.AppendLine($"Available workers ({workerNames.Count}):"); + foreach (var w in workerNames) + sb.AppendLine($" - '{w}' (model: {GetEffectiveModel(w)})"); + sb.AppendLine(); + sb.AppendLine("Assign refined tasks using `@worker:name` / `@end` blocks to address the gaps identified above."); + return sb.ToString(); + } + + private static string ExtractIterationEvaluation(string response) + { + // Extract text after [[NEEDS_ITERATION]] marker, or use full response as evaluation + var idx = response.IndexOf("[[NEEDS_ITERATION]]", StringComparison.OrdinalIgnoreCase); + if (idx >= 0) + { + var afterMarker = response[(idx + "[[NEEDS_ITERATION]]".Length)..].Trim(); + // Take text up to first @worker block as the evaluation + var workerIdx = afterMarker.IndexOf("@worker:", StringComparison.OrdinalIgnoreCase); + return workerIdx >= 0 ? afterMarker[..workerIdx].Trim() : afterMarker; + } + // No marker β€” use last paragraph as evaluation + var lines = response.Split('\n'); + return string.Join('\n', lines.TakeLast(5)).Trim(); + } + + #endregion } From 676f9c4ab6e921d4c83d6ccbb0fc454bd804c131 Mon Sep 17 00:00:00 2001 From: Shane Date: Tue, 17 Feb 2026 22:56:01 -0600 Subject: [PATCH 12/48] Add model capability warnings, group presets, and race-safe dispatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model Capability System: - Add ModelCapabilities static registry with capability flags per model family (CodeExpert, ReasoningExpert, Fast, CostEfficient, ToolUse, Vision, LargeContext) - GetRoleWarnings() warns when assigning cheap models as orchestrator or non-tool-use models as workers - GetStrengths() returns human-readable model description - Warnings displayed inline in SessionListItem model picker Group Presets (one-click multi-agent creation): - Add GroupPreset with 4 built-in templates: Code Review Team, Multi-Perspective Analysis, Fast Iteration Squad, Deep Research - Each preset defines orchestrator model, worker models, and dispatch mode - πŸš€ Preset button in sidebar toolbar opens picker panel - CreateGroupFromPresetAsync creates group + sessions with correct roles/models Race-Safe Model Switching: - Add per-session SemaphoreSlim in EnsureSessionModelAsync via ConcurrentDictionary - Double-check pattern: re-verify model after acquiring lock - Prevents concurrent dispatches from racing on model switch Richer Evaluation Prompts: - BuildSynthesisWithEvalPrompt now includes quality assessment criteria (completeness, correctness, relevance) and iteration-aware urgency hints - Cross-iteration feedback tracking: previous evaluation included in next iteration Tests (10 new, 660 total passing): - ModelCapabilitiesTests: known/unknown models, fuzzy match, role warnings - GroupPresetTests: built-in validation, OrchestratorReflect mode coverage Architecture consulted with: Claude Opus 4.6 (system design), Gemini 3 Pro (UX/prompt design), GPT-5 (extensibility/concurrency patterns). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/PolyPilot.Tests.csproj | 1 + PolyPilot.Tests/SessionOrganizationTests.cs | 85 ++++++++++ .../Components/Layout/SessionListItem.razor | 18 ++- .../Layout/SessionListItem.razor.css | 14 ++ .../Components/Layout/SessionSidebar.razor | 66 ++++++++ .../Layout/SessionSidebar.razor.css | 53 +++++++ PolyPilot/Models/ModelCapabilities.cs | 136 ++++++++++++++++ .../Services/CopilotService.Organization.cs | 147 +++++++++++++++++- 8 files changed, 516 insertions(+), 4 deletions(-) create mode 100644 PolyPilot/Models/ModelCapabilities.cs diff --git a/PolyPilot.Tests/PolyPilot.Tests.csproj b/PolyPilot.Tests/PolyPilot.Tests.csproj index f13b0b4d7a..dfae41d76d 100644 --- a/PolyPilot.Tests/PolyPilot.Tests.csproj +++ b/PolyPilot.Tests/PolyPilot.Tests.csproj @@ -51,6 +51,7 @@ + diff --git a/PolyPilot.Tests/SessionOrganizationTests.cs b/PolyPilot.Tests/SessionOrganizationTests.cs index 00c97b1133..2878ac0048 100644 --- a/PolyPilot.Tests/SessionOrganizationTests.cs +++ b/PolyPilot.Tests/SessionOrganizationTests.cs @@ -1001,3 +1001,88 @@ public void ExtractIterationEvaluation_FallsBackToLastLines() Assert.Contains("The final evaluation", result); } } + +public class ModelCapabilitiesTests +{ + [Fact] + public void GetCapabilities_KnownModel_ReturnsFlags() + { + var caps = ModelCapabilities.GetCapabilities("claude-opus-4.6"); + Assert.True(caps.HasFlag(ModelCapability.ReasoningExpert)); + Assert.True(caps.HasFlag(ModelCapability.CodeExpert)); + } + + [Fact] + public void GetCapabilities_UnknownModel_ReturnsNone() + { + var caps = ModelCapabilities.GetCapabilities("totally-unknown-model"); + Assert.Equal(ModelCapability.None, caps); + } + + [Fact] + public void GetCapabilities_FuzzyMatch_Works() + { + // "claude-opus-4.6-fast" should fuzzy-match "claude-opus-4.6" + var caps = ModelCapabilities.GetCapabilities("gpt-4.1"); + Assert.True(caps.HasFlag(ModelCapability.Fast)); + Assert.True(caps.HasFlag(ModelCapability.CostEfficient)); + } + + [Fact] + public void GetRoleWarnings_CheapOrchestratorModel_WarnsAboutReasoning() + { + var warnings = ModelCapabilities.GetRoleWarnings("gpt-4.1", MultiAgentRole.Orchestrator); + Assert.NotEmpty(warnings); + Assert.Contains(warnings, w => w.Contains("reasoning", StringComparison.OrdinalIgnoreCase)); + } + + [Fact] + public void GetRoleWarnings_StrongOrchestratorModel_NoWarnings() + { + var warnings = ModelCapabilities.GetRoleWarnings("claude-opus-4.6", MultiAgentRole.Orchestrator); + Assert.Empty(warnings); + } + + [Fact] + public void GetRoleWarnings_WorkerWithToolUse_NoWarnings() + { + var warnings = ModelCapabilities.GetRoleWarnings("gpt-4.1", MultiAgentRole.Worker); + Assert.Empty(warnings); + } + + [Fact] + public void GetStrengths_ReturnsDescription() + { + var strengths = ModelCapabilities.GetStrengths("claude-opus-4.6"); + Assert.NotEqual("Unknown model", strengths); + Assert.Contains("reasoning", strengths, StringComparison.OrdinalIgnoreCase); + } +} + +public class GroupPresetTests +{ + [Fact] + public void BuiltInPresets_AllHaveRequiredFields() + { + foreach (var preset in GroupPreset.BuiltIn) + { + Assert.False(string.IsNullOrEmpty(preset.Name)); + Assert.False(string.IsNullOrEmpty(preset.Description)); + Assert.False(string.IsNullOrEmpty(preset.OrchestratorModel)); + Assert.NotEmpty(preset.WorkerModels); + Assert.True(preset.WorkerModels.All(m => !string.IsNullOrEmpty(m))); + } + } + + [Fact] + public void BuiltInPresets_ContainExpectedCount() + { + Assert.True(GroupPreset.BuiltIn.Length >= 3, "Should have at least 3 built-in presets"); + } + + [Fact] + public void BuiltInPresets_IncludeOrchestratorReflect() + { + Assert.Contains(GroupPreset.BuiltIn, p => p.Mode == MultiAgentMode.OrchestratorReflect); + } +} diff --git a/PolyPilot/Components/Layout/SessionListItem.razor b/PolyPilot/Components/Layout/SessionListItem.razor index 86282ebac1..c638fc6602 100644 --- a/PolyPilot/Components/Layout/SessionListItem.razor +++ b/PolyPilot/Components/Layout/SessionListItem.razor @@ -152,9 +152,25 @@ @foreach (var model in CopilotService.AvailableModels) { - + var strengths = ModelCapabilities.GetStrengths(model); + } + @{ + var effectiveModel = CopilotService.GetEffectiveModel(Session.Name); + var warnings = ModelCapabilities.GetRoleWarnings(effectiveModel, Meta?.Role ?? MultiAgentRole.Worker); + } + @if (warnings.Count > 0) + { + @foreach (var w in warnings) + { +
@w
+ } + } + else + { +
@ModelCapabilities.GetStrengths(effectiveModel)
+ }
} diff --git a/PolyPilot/Components/Layout/SessionListItem.razor.css b/PolyPilot/Components/Layout/SessionListItem.razor.css index abe586e75d..6a76ddf420 100644 --- a/PolyPilot/Components/Layout/SessionListItem.razor.css +++ b/PolyPilot/Components/Layout/SessionListItem.razor.css @@ -305,3 +305,17 @@ .model-picker-inline select:focus { border-color: var(--accent-primary); } + +.model-warning { + font-size: var(--type-caption1); + color: #fbbf24; + padding: 0.2rem 0; + line-height: 1.3; +} + +.model-strengths { + font-size: var(--type-caption1); + color: var(--text-dim); + padding: 0.15rem 0; + opacity: 0.8; +} diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor b/PolyPilot/Components/Layout/SessionSidebar.razor index aac3bff996..996b8b4c3d 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor +++ b/PolyPilot/Components/Layout/SessionSidebar.razor @@ -37,6 +37,7 @@ else if (IsFlyoutPanel) IsCreating="isCreating" CreateError="@createError" OnCreate="HandleCreateSession" + OnCreateGroup="HandleCreateGroup" OnBrowseDirectory="OpenDirectoryPicker" />
@@ -75,6 +76,7 @@ else IsCreating="isCreating" CreateError="@createError" OnCreate="HandleCreateSession" + OnCreateGroup="HandleCreateGroup" OnBrowseDirectory="OpenDirectoryPicker" />
@@ -201,9 +203,27 @@ else
+
+ @if (showPresetPicker) + { +
+ @foreach (var preset in GroupPreset.BuiltIn) + { + var p = preset; + + } +
+ } } }; @@ -661,6 +681,35 @@ else showPersistedSessions = !showPersistedSessions; } + private async Task HandleCreateGroup(GroupPreset preset) + { + if (isCreating) return; + isCreating = true; + createError = null; + try + { + var groupId = await CopilotService.CreateMultiAgentGroupAsync( + preset.Name, + preset.OrchestratorModel, + preset.WorkerModel, + preset.WorkerCount, + preset.Mode, + preset.SystemPrompt); + + // Expand the new group + CopilotService.ToggleGroupCollapsed(groupId); + CopilotService.SaveUiState(currentPage); + } + catch (Exception ex) + { + createError = ex.Message; + } + finally + { + isCreating = false; + } + } + private async Task HandleCreateSession((string Name, string Model, string Directory, string? WorktreeId, string? InitialPrompt) args) { if (isCreating) return; @@ -878,6 +927,23 @@ else { isAddingGroup = true; isAddingMultiAgentGroup = true; + showPresetPicker = false; + } + + private bool showPresetPicker; + + private async Task CreateFromPreset(GroupPreset preset) + { + showPresetPicker = false; + StateHasChanged(); + try + { + await CopilotService.CreateGroupFromPresetAsync(preset, CopilotService.GetActiveSessionWorkingDirectory()); + } + catch (Exception ex) + { + Console.WriteLine($"Failed to create from preset: {ex.Message}"); + } } private void OnSidebarMultiAgentModeChanged(string groupId, ChangeEventArgs e) diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor.css b/PolyPilot/Components/Layout/SessionSidebar.razor.css index a1d3bd3994..332cb7ba39 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor.css +++ b/PolyPilot/Components/Layout/SessionSidebar.razor.css @@ -1384,3 +1384,56 @@ } .bug-report-submit:hover { opacity: 0.85; } .bug-report-submit:disabled { opacity: 0.5; cursor: not-allowed; } + +/* === Preset Picker === */ +.preset-picker { + display: flex; + flex-direction: column; + gap: 0.35rem; + padding: 0.5rem; + background: var(--bg-tertiary); + border: 1px solid var(--control-border); + border-radius: 8px; + margin-top: 0.25rem; +} + +.preset-item { + all: unset; + display: flex; + align-items: flex-start; + gap: 0.5rem; + padding: 0.5rem; + border-radius: 6px; + cursor: pointer; + transition: background 0.15s; +} +.preset-item:hover { background: var(--control-bg); } + +.preset-emoji { font-size: 1.2rem; flex-shrink: 0; margin-top: 0.1rem; } + +.preset-info { + display: flex; + flex-direction: column; + gap: 0.15rem; + min-width: 0; +} + +.preset-name { + font-size: var(--type-callout); + font-weight: 600; + color: var(--text-primary); +} + +.preset-desc { + font-size: var(--type-footnote); + color: var(--text-dim); +} + +.preset-models { + font-size: var(--type-caption1); + color: var(--text-dim); + opacity: 0.8; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} diff --git a/PolyPilot/Models/ModelCapabilities.cs b/PolyPilot/Models/ModelCapabilities.cs new file mode 100644 index 0000000000..d4ba04a92e --- /dev/null +++ b/PolyPilot/Models/ModelCapabilities.cs @@ -0,0 +1,136 @@ +namespace PolyPilot.Models; + +/// +/// Lightweight model capability flags for multi-agent role assignment warnings. +/// No external API calls β€” purely static metadata based on known model families. +/// +[Flags] +public enum ModelCapability +{ + None = 0, + CodeExpert = 1 << 0, + ReasoningExpert = 1 << 1, + Fast = 1 << 2, + CostEfficient = 1 << 3, + ToolUse = 1 << 4, + Vision = 1 << 5, + LargeContext = 1 << 6, +} + +/// +/// Static registry of model capabilities for UX warnings during agent assignment. +/// +public static class ModelCapabilities +{ + private static readonly Dictionary _registry = new(StringComparer.OrdinalIgnoreCase) + { + // Anthropic + ["claude-opus-4.6"] = (ModelCapability.ReasoningExpert | ModelCapability.CodeExpert | ModelCapability.ToolUse | ModelCapability.LargeContext, "Best reasoning, complex orchestration"), + ["claude-opus-4.5"] = (ModelCapability.ReasoningExpert | ModelCapability.CodeExpert | ModelCapability.ToolUse | ModelCapability.LargeContext, "Deep reasoning, creative coding"), + ["claude-sonnet-4.5"] = (ModelCapability.CodeExpert | ModelCapability.ToolUse | ModelCapability.Fast, "Fast coding, good balance"), + ["claude-sonnet-4"] = (ModelCapability.CodeExpert | ModelCapability.ToolUse | ModelCapability.Fast, "Fast coding, good balance"), + ["claude-haiku-4.5"] = (ModelCapability.Fast | ModelCapability.CostEfficient | ModelCapability.ToolUse, "Quick tasks, cost-efficient"), + + // OpenAI + ["gpt-5"] = (ModelCapability.ReasoningExpert | ModelCapability.CodeExpert | ModelCapability.ToolUse | ModelCapability.LargeContext, "Strong reasoning and coding"), + ["gpt-5.1"] = (ModelCapability.ReasoningExpert | ModelCapability.CodeExpert | ModelCapability.ToolUse | ModelCapability.LargeContext, "Strong reasoning and coding"), + ["gpt-5.1-codex"] = (ModelCapability.CodeExpert | ModelCapability.ToolUse | ModelCapability.Fast, "Optimized for code generation"), + ["gpt-5.1-codex-mini"] = (ModelCapability.CodeExpert | ModelCapability.Fast | ModelCapability.CostEfficient, "Fast code, cost-efficient"), + ["gpt-4.1"] = (ModelCapability.Fast | ModelCapability.CostEfficient | ModelCapability.ToolUse, "Fast and cheap, good for evaluation"), + ["gpt-5-mini"] = (ModelCapability.Fast | ModelCapability.CostEfficient, "Quick tasks, budget-friendly"), + + // Google + ["gemini-3-pro"] = (ModelCapability.ReasoningExpert | ModelCapability.LargeContext | ModelCapability.Vision, "Strong reasoning, large context, multimodal"), + ["gemini-3-pro-preview"] = (ModelCapability.ReasoningExpert | ModelCapability.LargeContext | ModelCapability.Vision, "Strong reasoning, large context, multimodal"), + }; + + /// Get capabilities for a model. Returns None for unknown models. + public static ModelCapability GetCapabilities(string modelSlug) + { + if (string.IsNullOrEmpty(modelSlug)) return ModelCapability.None; + if (_registry.TryGetValue(modelSlug, out var entry)) return entry.Caps; + + // Fuzzy match by prefix + foreach (var (key, val) in _registry) + if (modelSlug.StartsWith(key, StringComparison.OrdinalIgnoreCase) || + key.StartsWith(modelSlug, StringComparison.OrdinalIgnoreCase)) + return val.Caps; + + return ModelCapability.None; + } + + /// Get a short description of model strengths. + public static string GetStrengths(string modelSlug) + { + if (_registry.TryGetValue(modelSlug, out var entry)) return entry.Strengths; + + foreach (var (key, val) in _registry) + if (modelSlug.StartsWith(key, StringComparison.OrdinalIgnoreCase) || + key.StartsWith(modelSlug, StringComparison.OrdinalIgnoreCase)) + return val.Strengths; + + return "Unknown model"; + } + + /// + /// Get warnings when assigning a model to a multi-agent role. + /// Returns empty list if no issues detected. + /// + public static List GetRoleWarnings(string modelSlug, MultiAgentRole role) + { + var warnings = new List(); + var caps = GetCapabilities(modelSlug); + + if (caps == ModelCapability.None) + { + warnings.Add($"Unknown model '{modelSlug}' β€” capabilities not verified"); + return warnings; + } + + if (role == MultiAgentRole.Orchestrator) + { + if (!caps.HasFlag(ModelCapability.ReasoningExpert)) + warnings.Add("⚠️ This model may lack strong reasoning for orchestration. Consider claude-opus or gpt-5."); + if (caps.HasFlag(ModelCapability.CostEfficient) && !caps.HasFlag(ModelCapability.ReasoningExpert)) + warnings.Add("πŸ’° Cost-efficient models may produce shallow plans. Best for workers, not orchestrators."); + } + + if (role == MultiAgentRole.Worker) + { + if (!caps.HasFlag(ModelCapability.ToolUse) && !caps.HasFlag(ModelCapability.CodeExpert)) + warnings.Add("⚠️ This model may not support tool use well. Worker tasks may require tool interaction."); + } + + return warnings; + } +} + +/// +/// Pre-configured multi-agent group templates for quick setup. +/// +public record GroupPreset(string Name, string Description, string Emoji, MultiAgentMode Mode, + string OrchestratorModel, string[] WorkerModels) +{ + public static readonly GroupPreset[] BuiltIn = new[] + { + new GroupPreset( + "Code Review Team", "Opus orchestrates, fast workers execute", + "πŸ”", MultiAgentMode.Orchestrator, + "claude-opus-4.6", new[] { "gpt-5.1-codex", "claude-sonnet-4.5" }), + + new GroupPreset( + "Multi-Perspective Analysis", "Different models analyze the same problem", + "πŸ”¬", MultiAgentMode.Broadcast, + "claude-opus-4.6", new[] { "gpt-5", "gemini-3-pro", "claude-sonnet-4.5" }), + + new GroupPreset( + "Fast Iteration Squad", "Cheap workers + smart evaluator for reflect loops", + "πŸ”„", MultiAgentMode.OrchestratorReflect, + "claude-opus-4.6", new[] { "gpt-4.1", "gpt-4.1", "gpt-5.1-codex-mini" }), + + new GroupPreset( + "Deep Research", "Strong reasoning models collaborate on complex problems", + "🧠", MultiAgentMode.Orchestrator, + "claude-opus-4.6", new[] { "gpt-5.1", "gemini-3-pro" }), + }; +} diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index dee9a03fd2..6f29a72d03 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -1,3 +1,4 @@ +using System.Collections.Concurrent; using System.Text.Json; using System.Text.RegularExpressions; using PolyPilot.Models; @@ -10,8 +11,71 @@ public partial class CopilotService { public event Action? OnOrchestratorPhaseChanged; // groupId, phase, detail + // Per-session semaphores to prevent concurrent model switches during rapid dispatch + private readonly ConcurrentDictionary _modelSwitchLocks = new(); + #region Session Organization (groups, pinning, sorting) + public async Task CreateMultiAgentGroupAsync(string groupName, string orchestratorModel, string workerModel, int workerCount, MultiAgentMode mode, string? systemPrompt = null) + { + // 1. Create the group + var group = new SessionGroup + { + Id = Guid.NewGuid().ToString(), + Name = groupName, + IsMultiAgent = true, + OrchestratorMode = mode, + OrchestratorPrompt = systemPrompt, + DefaultOrchestratorModel = orchestratorModel, + DefaultWorkerModel = workerModel, + SortOrder = Organization.Groups.Max(g => g.SortOrder) + 1 + }; + Organization.Groups.Add(group); + + // 2. Create Orchestrator Session + var orchName = $"{groupName}-Orchestrator"; + // Ensure name uniqueness + int suffix = 1; + while (_sessions.ContainsKey(orchName) || Organization.Sessions.Any(s => s.SessionName == orchName)) + orchName = $"{groupName}-Orchestrator-{suffix++}"; + + var orchSession = await CreateSessionAsync(orchName, orchestratorModel, null); // Use default dir + var orchMeta = GetOrCreateSessionMeta(orchSession.Name); + orchMeta.GroupId = group.Id; + orchMeta.Role = MultiAgentRole.Orchestrator; + orchMeta.PreferredModel = orchestratorModel; + + // 3. Create Worker Sessions + for (int i = 1; i <= workerCount; i++) + { + var workerName = $"{groupName}-Worker-{i}"; + suffix = 1; + while (_sessions.ContainsKey(workerName) || Organization.Sessions.Any(s => s.SessionName == workerName)) + workerName = $"{groupName}-Worker-{i}-{suffix++}"; + + var workerSession = await CreateSessionAsync(workerName, workerModel, null); + var workerMeta = GetOrCreateSessionMeta(workerSession.Name); + workerMeta.GroupId = group.Id; + workerMeta.Role = MultiAgentRole.Worker; + workerMeta.PreferredModel = workerModel; + } + + SaveOrganization(); + OnStateChanged?.Invoke(); + return group.Id; + } + + private SessionMeta GetOrCreateSessionMeta(string sessionName) + { + var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == sessionName); + if (meta == null) + { + meta = new SessionMeta { SessionName = sessionName, GroupId = SessionGroup.DefaultId }; + Organization.Sessions.Add(meta); + } + return meta; + } + public void LoadOrganization() { try @@ -788,8 +852,53 @@ public string GetEffectiveModel(string sessionName) return session?.Model ?? DefaultModel; } + /// + /// Create a multi-agent group from a preset template, creating sessions with assigned models. + /// + public async Task CreateGroupFromPresetAsync(Models.GroupPreset preset, string? workingDirectory = null, CancellationToken ct = default) + { + var group = CreateMultiAgentGroup(preset.Name, preset.Mode); + if (group == null) return null; + + // Create orchestrator session + var orchName = $"{preset.Name}-orchestrator"; + try + { + await CreateSessionAsync(orchName, preset.OrchestratorModel, workingDirectory, ct); + MoveSession(orchName, group.Id); + SetSessionRole(orchName, MultiAgentRole.Orchestrator); + SetSessionPreferredModel(orchName, preset.OrchestratorModel); + } + catch (Exception ex) + { + Debug($"Failed to create orchestrator session: {ex.Message}"); + } + + // Create worker sessions + for (int i = 0; i < preset.WorkerModels.Length; i++) + { + var workerName = $"{preset.Name}-worker-{i + 1}"; + var workerModel = preset.WorkerModels[i]; + try + { + await CreateSessionAsync(workerName, workerModel, workingDirectory, ct); + MoveSession(workerName, group.Id); + SetSessionPreferredModel(workerName, workerModel); + } + catch (Exception ex) + { + Debug($"Failed to create worker session '{workerName}': {ex.Message}"); + } + } + + SaveOrganization(); + OnStateChanged?.Invoke(); + return group; + } + /// /// Ensures a session's live model matches its PreferredModel before dispatch. + /// Uses per-session semaphore to prevent concurrent model switches. /// No-op if PreferredModel is null or already matches. /// private async Task EnsureSessionModelAsync(string sessionName, CancellationToken ct) @@ -803,8 +912,14 @@ private async Task EnsureSessionModelAsync(string sessionName, CancellationToken var currentSlug = Models.ModelHelper.NormalizeToSlug(session.Model); if (currentSlug == meta.PreferredModel) return; + var semaphore = _modelSwitchLocks.GetOrAdd(sessionName, _ => new SemaphoreSlim(1, 1)); + await semaphore.WaitAsync(ct); try { + // Re-check after acquiring lock β€” another dispatch may have already switched + currentSlug = Models.ModelHelper.NormalizeToSlug(GetSession(sessionName)?.Model ?? ""); + if (currentSlug == meta.PreferredModel) return; + await ChangeModelAsync(sessionName, meta.PreferredModel, ct); Debug($"Switched '{sessionName}' model to '{meta.PreferredModel}' for multi-agent dispatch"); } @@ -812,6 +927,10 @@ private async Task EnsureSessionModelAsync(string sessionName, CancellationToken { Debug($"Failed to switch model for '{sessionName}': {ex.Message}"); } + finally + { + semaphore.Release(); + } } #endregion @@ -972,9 +1091,31 @@ private string BuildSynthesisWithEvalPrompt(string originalPrompt, List 1 && state.LastEvaluation != null) + { + sb.AppendLine("### Previous Iteration Feedback"); + sb.AppendLine(state.LastEvaluation); + sb.AppendLine(); + sb.AppendLine("Check whether the identified gaps have been addressed in this iteration."); + sb.AppendLine(); + } + sb.AppendLine("### Decision"); + sb.AppendLine("- If the combined output **fully satisfies** the goal: Include `[[GROUP_REFLECT_COMPLETE]]` with a summary."); + sb.AppendLine("- If **not yet complete**: Include `[[NEEDS_ITERATION]]` followed by:"); + sb.AppendLine(" 1. What specific gaps remain (be precise)"); + sb.AppendLine(" 2. Whether quality improved, degraded, or stalled vs. previous iteration"); + sb.AppendLine(" 3. Revised `@worker:name` / `@end` blocks for the next iteration"); + if (state.CurrentIteration >= state.MaxIterations - 1) + { + sb.AppendLine(); + sb.AppendLine($"⚠️ This is iteration {state.CurrentIteration} of {state.MaxIterations}. If close to the goal, consider completing with what you have rather than requesting another iteration."); + } return sb.ToString(); } From df9e4934cf43cf840f8d8b1f7f25d209353780bd Mon Sep 17 00:00:00 2001 From: Shane Date: Tue, 17 Feb 2026 23:10:36 -0600 Subject: [PATCH 13/48] Add dedicated evaluator support, evaluation scoring, name-pattern inference, and auto-adjust banners - GroupReflectionState: EvaluationResult history, QualityTrend tracking, optional EvaluatorSession, PendingAdjustments for UI banners - Dedicated evaluator: separate session scores each iteration independently via SCORE/RATIONALE format - ModelCapabilities: InferFromName() for unknown model variants (opus/sonnet/haiku/codex/mini/max patterns) - ParseEvaluationScore: robust 0-1 score extraction with clamping - AutoAdjustFromFeedback: quality degradation from eval history, PendingAdjustments for banner UX - Sidebar: evaluation score display, adjustment banner with warning styling - 690 tests passing (21 new: EvaluationTracking, ModelNameInference, ParseEvaluationScore) - Consulted: Gemini 3 Pro (UX), GPT-5 (extensibility architecture) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/SessionOrganizationTests.cs | 354 ++++++++++++++++++ .../Components/Layout/SessionSidebar.razor | 53 ++- .../Layout/SessionSidebar.razor.css | 50 +++ PolyPilot/Models/ModelCapabilities.cs | 147 +++++++- PolyPilot/Models/SessionOrganization.cs | 49 ++- .../Services/CopilotService.Organization.cs | 225 ++++++++++- 6 files changed, 864 insertions(+), 14 deletions(-) diff --git a/PolyPilot.Tests/SessionOrganizationTests.cs b/PolyPilot.Tests/SessionOrganizationTests.cs index 2878ac0048..9979e1deda 100644 --- a/PolyPilot.Tests/SessionOrganizationTests.cs +++ b/PolyPilot.Tests/SessionOrganizationTests.cs @@ -1086,3 +1086,357 @@ public void BuiltInPresets_IncludeOrchestratorReflect() Assert.Contains(GroupPreset.BuiltIn, p => p.Mode == MultiAgentMode.OrchestratorReflect); } } + +public class GroupModelAnalyzerTests +{ + [Fact] + public void Analyze_OrchestratorModeWithoutOrchestrator_ReturnsError() + { + var group = new SessionGroup { IsMultiAgent = true, OrchestratorMode = MultiAgentMode.Orchestrator }; + var members = new List<(string Name, string Model, MultiAgentRole Role)> + { + ("w1", "gpt-4.1", MultiAgentRole.Worker), + ("w2", "gpt-4.1", MultiAgentRole.Worker), + }; + + var diags = GroupModelAnalyzer.Analyze(group, members); + Assert.Contains(diags, d => d.Level == "error" && d.Message.Contains("Orchestrator role")); + } + + [Fact] + public void Analyze_WeakOrchestratorModel_ReturnsWarning() + { + var group = new SessionGroup { IsMultiAgent = true, OrchestratorMode = MultiAgentMode.Orchestrator }; + var members = new List<(string Name, string Model, MultiAgentRole Role)> + { + ("orch", "gpt-4.1", MultiAgentRole.Orchestrator), + ("w1", "gpt-5", MultiAgentRole.Worker), + }; + + var diags = GroupModelAnalyzer.Analyze(group, members); + Assert.Contains(diags, d => d.Level == "warning" && d.Message.Contains("reasoning")); + } + + [Fact] + public void Analyze_StrongOrchestrator_NoErrors() + { + var group = new SessionGroup { IsMultiAgent = true, OrchestratorMode = MultiAgentMode.Orchestrator }; + var members = new List<(string Name, string Model, MultiAgentRole Role)> + { + ("orch", "claude-opus-4.6", MultiAgentRole.Orchestrator), + ("w1", "gpt-4.1", MultiAgentRole.Worker), + }; + + var diags = GroupModelAnalyzer.Analyze(group, members); + Assert.DoesNotContain(diags, d => d.Level == "error"); + } + + [Fact] + public void Analyze_AllSameModelBroadcast_SuggestsDiversity() + { + var group = new SessionGroup { IsMultiAgent = true, OrchestratorMode = MultiAgentMode.Broadcast }; + var members = new List<(string Name, string Model, MultiAgentRole Role)> + { + ("w1", "gpt-4.1", MultiAgentRole.Worker), + ("w2", "gpt-4.1", MultiAgentRole.Worker), + ("w3", "gpt-4.1", MultiAgentRole.Worker), + }; + + var diags = GroupModelAnalyzer.Analyze(group, members); + Assert.Contains(diags, d => d.Level == "info" && d.Message.Contains("diverse")); + } + + [Fact] + public void Analyze_OrchestratorReflectWithoutWorkers_ReturnsError() + { + var group = new SessionGroup { IsMultiAgent = true, OrchestratorMode = MultiAgentMode.OrchestratorReflect }; + var members = new List<(string Name, string Model, MultiAgentRole Role)> + { + ("orch", "claude-opus-4.6", MultiAgentRole.Orchestrator), + }; + + var diags = GroupModelAnalyzer.Analyze(group, members); + Assert.Contains(diags, d => d.Level == "error" && d.Message.Contains("worker")); + } +} + +public class UserPresetsTests +{ + [Fact] + public void GetAll_IncludesBuiltInPresets() + { + // Use a temp dir that won't have presets.json + var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + try + { + var all = UserPresets.GetAll(tempDir); + Assert.Equal(GroupPreset.BuiltIn.Length, all.Length); + } + finally + { + if (Directory.Exists(tempDir)) Directory.Delete(tempDir, true); + } + } + + [Fact] + public void SaveAndLoad_RoundTrips() + { + var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + try + { + var preset = new GroupPreset("My Team", "Custom desc", "🎯", + MultiAgentMode.Orchestrator, "claude-opus-4.6", new[] { "gpt-4.1" }) + { IsUserDefined = true }; + + UserPresets.Save(tempDir, new List { preset }); + var loaded = UserPresets.Load(tempDir); + + Assert.Single(loaded); + Assert.Equal("My Team", loaded[0].Name); + Assert.True(loaded[0].IsUserDefined); + Assert.Equal("claude-opus-4.6", loaded[0].OrchestratorModel); + } + finally + { + if (Directory.Exists(tempDir)) Directory.Delete(tempDir, true); + } + } + + [Fact] + public void GetAll_CombinesBuiltInAndUser() + { + var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + try + { + var userPreset = new GroupPreset("Custom", "Mine", "⭐", + MultiAgentMode.Broadcast, "gpt-5", new[] { "gpt-4.1" }) + { IsUserDefined = true }; + + UserPresets.Save(tempDir, new List { userPreset }); + var all = UserPresets.GetAll(tempDir); + + Assert.Equal(GroupPreset.BuiltIn.Length + 1, all.Length); + Assert.Contains(all, p => p.Name == "Custom" && p.IsUserDefined); + } + finally + { + if (Directory.Exists(tempDir)) Directory.Delete(tempDir, true); + } + } + + [Fact] + public void SaveGroupAsPreset_CreatesFromMembers() + { + var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + try + { + var group = new SessionGroup { Name = "Test", IsMultiAgent = true, OrchestratorMode = MultiAgentMode.Orchestrator }; + var members = new List + { + new() { SessionName = "orch", Role = MultiAgentRole.Orchestrator }, + new() { SessionName = "w1", Role = MultiAgentRole.Worker }, + }; + + var preset = UserPresets.SaveGroupAsPreset(tempDir, "Test Preset", "desc", "πŸ”₯", + group, members, name => name == "orch" ? "claude-opus-4.6" : "gpt-4.1"); + + Assert.NotNull(preset); + Assert.Equal("claude-opus-4.6", preset!.OrchestratorModel); + Assert.Single(preset.WorkerModels); + Assert.Equal("gpt-4.1", preset.WorkerModels[0]); + Assert.True(preset.IsUserDefined); + + // Verify persisted + var loaded = UserPresets.Load(tempDir); + Assert.Single(loaded); + } + finally + { + if (Directory.Exists(tempDir)) Directory.Delete(tempDir, true); + } + } +} + +public class EvaluationTrackingTests +{ + [Fact] + public void RecordEvaluation_FirstEntry_ReturnsStable() + { + var state = GroupReflectionState.Create("test goal"); + var trend = state.RecordEvaluation(1, 0.6, "Needs work", "gpt-4.1"); + Assert.Equal(QualityTrend.Stable, trend); + Assert.Single(state.EvaluationHistory); + } + + [Fact] + public void RecordEvaluation_ImprovingScores_ReturnsImproving() + { + var state = GroupReflectionState.Create("test goal"); + state.RecordEvaluation(1, 0.4, "Poor", "gpt-4.1"); + var trend = state.RecordEvaluation(2, 0.7, "Better", "gpt-4.1"); + Assert.Equal(QualityTrend.Improving, trend); + } + + [Fact] + public void RecordEvaluation_DegradingScores_ReturnsDegrading() + { + var state = GroupReflectionState.Create("test goal"); + state.RecordEvaluation(1, 0.8, "Good", "gpt-4.1"); + var trend = state.RecordEvaluation(2, 0.5, "Got worse", "gpt-4.1"); + Assert.Equal(QualityTrend.Degrading, trend); + } + + [Fact] + public void RecordEvaluation_SimilarScores_ReturnsStable() + { + var state = GroupReflectionState.Create("test goal"); + state.RecordEvaluation(1, 0.6, "Ok", "gpt-4.1"); + var trend = state.RecordEvaluation(2, 0.65, "Similar", "gpt-4.1"); + Assert.Equal(QualityTrend.Stable, trend); + } + + [Fact] + public void EvaluatorSession_CanBeConfigured() + { + var state = GroupReflectionState.Create("goal", 5, "eval-session"); + Assert.Equal("eval-session", state.EvaluatorSession); + } + + [Fact] + public void PendingAdjustments_InitiallyEmpty() + { + var state = GroupReflectionState.Create("goal"); + Assert.Empty(state.PendingAdjustments); + } + + [Fact] + public void EvaluationHistory_TracksMultipleIterations() + { + var state = GroupReflectionState.Create("goal"); + state.RecordEvaluation(1, 0.3, "Bad", "claude-haiku-4.5"); + state.RecordEvaluation(2, 0.5, "Improving", "claude-haiku-4.5"); + state.RecordEvaluation(3, 0.8, "Good", "claude-haiku-4.5"); + + Assert.Equal(3, state.EvaluationHistory.Count); + Assert.Equal(0.3, state.EvaluationHistory[0].Score); + Assert.Equal(0.8, state.EvaluationHistory[2].Score); + Assert.All(state.EvaluationHistory, e => Assert.Equal("claude-haiku-4.5", e.EvaluatorModel)); + } +} + +public class ModelNameInferenceTests +{ + [Fact] + public void InferFromName_OpusVariant_HasReasoningExpert() + { + var caps = ModelCapabilities.InferFromName("claude-opus-5.0"); + Assert.True(caps.HasFlag(ModelCapability.ReasoningExpert)); + Assert.True(caps.HasFlag(ModelCapability.CodeExpert)); + } + + [Fact] + public void InferFromName_SonnetVariant_HasCodeExpert() + { + var caps = ModelCapabilities.InferFromName("claude-sonnet-5.0"); + Assert.True(caps.HasFlag(ModelCapability.CodeExpert)); + Assert.True(caps.HasFlag(ModelCapability.Fast)); + } + + [Fact] + public void InferFromName_HaikuVariant_HasFastAndCheap() + { + var caps = ModelCapabilities.InferFromName("claude-haiku-5.0"); + Assert.True(caps.HasFlag(ModelCapability.Fast)); + Assert.True(caps.HasFlag(ModelCapability.CostEfficient)); + } + + [Fact] + public void InferFromName_CodexVariant_HasCodeExpert() + { + var caps = ModelCapabilities.InferFromName("gpt-6-codex"); + Assert.True(caps.HasFlag(ModelCapability.CodeExpert)); + } + + [Fact] + public void InferFromName_MiniVariant_HasFastAndCheap() + { + var caps = ModelCapabilities.InferFromName("gpt-6-mini"); + Assert.True(caps.HasFlag(ModelCapability.Fast)); + Assert.True(caps.HasFlag(ModelCapability.CostEfficient)); + } + + [Fact] + public void InferFromName_MaxVariant_HasReasoningExpert() + { + var caps = ModelCapabilities.InferFromName("gpt-6-codex-max"); + Assert.True(caps.HasFlag(ModelCapability.ReasoningExpert)); + } + + [Fact] + public void InferFromName_GeminiVariant_HasVision() + { + var caps = ModelCapabilities.InferFromName("gemini-4-ultra"); + Assert.True(caps.HasFlag(ModelCapability.Vision)); + Assert.True(caps.HasFlag(ModelCapability.ReasoningExpert)); + } + + [Fact] + public void InferFromName_UnknownModel_ReturnsNone() + { + var caps = ModelCapabilities.InferFromName("totally-unknown-model"); + Assert.Equal(ModelCapability.None, caps); + } + + [Fact] + public void GetCapabilities_NewOpusVersion_InfersFromName() + { + // Not in registry, but should be inferred + var caps = ModelCapabilities.GetCapabilities("claude-opus-99.0"); + Assert.True(caps.HasFlag(ModelCapability.ReasoningExpert)); + } +} + +public class ParseEvaluationScoreTests +{ + [Fact] + public void ParseScore_ValidFormat_ExtractsCorrectly() + { + var response = "SCORE: 0.75\nRATIONALE: Good progress but missing edge cases.\n[[NEEDS_ITERATION]]"; + var (score, rationale) = CopilotService.ParseEvaluationScore(response); + Assert.Equal(0.75, score); + Assert.Contains("Good progress", rationale); + } + + [Fact] + public void ParseScore_HighScore_ExtractsCorrectly() + { + var response = "SCORE: 0.95\nRATIONALE: Excellent output, fully addresses the goal.\n[[GROUP_REFLECT_COMPLETE]]"; + var (score, rationale) = CopilotService.ParseEvaluationScore(response); + Assert.Equal(0.95, score); + Assert.Contains("Excellent", rationale); + } + + [Fact] + public void ParseScore_NoScoreMarker_ReturnsDefault() + { + var response = "The output looks good but could improve."; + var (score, _) = CopilotService.ParseEvaluationScore(response); + Assert.Equal(0.5, score); // default + } + + [Fact] + public void ParseScore_ClampAboveOne_Returns1() + { + var response = "SCORE: 1.5\nRATIONALE: Overshot."; + var (score, _) = CopilotService.ParseEvaluationScore(response); + Assert.Equal(1.0, score); + } + + [Fact] + public void ParseScore_NegativeScore_ReturnsZero() + { + var response = "SCORE: -0.5\nRATIONALE: Terrible."; + var (score, _) = CopilotService.ParseEvaluationScore(response); + Assert.Equal(0.0, score); + } +} diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor b/PolyPilot/Components/Layout/SessionSidebar.razor index 996b8b4c3d..23e92087a1 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor +++ b/PolyPilot/Components/Layout/SessionSidebar.razor @@ -210,13 +210,13 @@ else @if (showPresetPicker) {
- @foreach (var preset in GroupPreset.BuiltIn) + @foreach (var preset in UserPresets.GetAll(CopilotService.PolyPilotBaseDir)) { var p = preset;
+ @if (rs.PendingAdjustments.Count > 0) + { +
+ @foreach (var adj in rs.PendingAdjustments) + { + @adj + } +
+ } } + @{ + var diags = CopilotService.GetGroupDiagnostics(maGroupId); + } + @if (diags.Count > 0) + { +
+ @foreach (var d in diags) + { + @d.Message + } +
+ } + } } @@ -946,6 +973,28 @@ else } } + private void PromptSaveAsPreset(string groupId) + { + var group = CopilotService.Organization.Groups.FirstOrDefault(g => g.Id == groupId); + if (group == null) return; + + // Save with the group's current name and a generated description + var members = CopilotService.GetMultiAgentGroupMembers(groupId); + var desc = $"{group.OrchestratorMode} group with {members.Count} agent(s)"; + var preset = CopilotService.SaveGroupAsPreset(groupId, group.Name, desc, "⭐"); + if (preset != null) + { + var orchestratorName = CopilotService.GetOrchestratorSession(groupId); + if (orchestratorName != null) + { + var session = CopilotService.GetSession(orchestratorName); + if (session != null) + session.History.Add(ChatMessage.SystemMessage($"πŸ’Ύ Group saved as preset: \"{preset.Name}\" (🎯 {preset.OrchestratorModel} + πŸ‘· {string.Join(", ", preset.WorkerModels)})")); + } + } + StateHasChanged(); + } + private void OnSidebarMultiAgentModeChanged(string groupId, ChangeEventArgs e) { if (e.Value is string val && Enum.TryParse(val, out var mode)) diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor.css b/PolyPilot/Components/Layout/SessionSidebar.razor.css index 332cb7ba39..6731c9f699 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor.css +++ b/PolyPilot/Components/Layout/SessionSidebar.razor.css @@ -1437,3 +1437,53 @@ overflow: hidden; text-overflow: ellipsis; } + +/* === Group Diagnostics === */ +.group-diagnostics { + display: flex; + flex-direction: column; + gap: 0.15rem; + padding: 0.2rem 0; +} + +.group-diagnostics span { + font-size: var(--type-caption1); + line-height: 1.3; +} + +.diag-error { color: var(--accent-primary); } +.diag-warning { color: #fbbf24; } +.diag-info { color: var(--text-dim); } + +.save-preset-btn { + all: unset; + font-size: var(--type-caption1); + color: var(--text-dim); + cursor: pointer; + padding: 0.2rem 0; + opacity: 0.7; +} +.save-preset-btn:hover { opacity: 1; color: var(--accent-primary); } + +.reflect-score { + font-size: var(--type-caption1); + color: #4ade80; + white-space: nowrap; +} + +.adjust-banner { + display: flex; + flex-direction: column; + gap: 0.1rem; + padding: 0.2rem 0.4rem; + margin: 0.1rem 0; + background: rgba(251, 191, 36, 0.1); + border-left: 2px solid #fbbf24; + border-radius: 0.2rem; + font-size: var(--type-caption1); + color: var(--text-secondary); +} + +.adjust-banner span { + line-height: 1.3; +} diff --git a/PolyPilot/Models/ModelCapabilities.cs b/PolyPilot/Models/ModelCapabilities.cs index d4ba04a92e..512a7e0671 100644 --- a/PolyPilot/Models/ModelCapabilities.cs +++ b/PolyPilot/Models/ModelCapabilities.cs @@ -56,7 +56,31 @@ public static ModelCapability GetCapabilities(string modelSlug) key.StartsWith(modelSlug, StringComparison.OrdinalIgnoreCase)) return val.Caps; - return ModelCapability.None; + // Name-pattern inference for new/unknown models + return InferFromName(modelSlug); + } + + /// + /// Infer capabilities from model name patterns for unknown models. + /// Handles new model releases gracefully without registry updates. + /// + internal static ModelCapability InferFromName(string slug) + { + var lower = slug.ToLowerInvariant(); + var caps = ModelCapability.None; + + // Family inference + if (lower.Contains("opus")) caps |= ModelCapability.ReasoningExpert | ModelCapability.CodeExpert | ModelCapability.ToolUse; + else if (lower.Contains("sonnet")) caps |= ModelCapability.CodeExpert | ModelCapability.ToolUse | ModelCapability.Fast; + else if (lower.Contains("haiku")) caps |= ModelCapability.Fast | ModelCapability.CostEfficient; + else if (lower.Contains("gemini")) caps |= ModelCapability.ReasoningExpert | ModelCapability.LargeContext | ModelCapability.Vision; + + // Variant inference + if (lower.Contains("codex")) caps |= ModelCapability.CodeExpert; + if (lower.Contains("mini")) caps |= ModelCapability.Fast | ModelCapability.CostEfficient; + if (lower.Contains("max")) caps |= ModelCapability.ReasoningExpert; + + return caps; } /// Get a short description of model strengths. @@ -111,6 +135,9 @@ public static List GetRoleWarnings(string modelSlug, MultiAgentRole role public record GroupPreset(string Name, string Description, string Emoji, MultiAgentMode Mode, string OrchestratorModel, string[] WorkerModels) { + /// Whether this is a user-created preset (vs built-in). + public bool IsUserDefined { get; init; } + public static readonly GroupPreset[] BuiltIn = new[] { new GroupPreset( @@ -134,3 +161,121 @@ public record GroupPreset(string Name, string Description, string Emoji, MultiAg "claude-opus-4.6", new[] { "gpt-5.1", "gemini-3-pro" }), }; } + +/// +/// Manages user-defined presets: save/load from ~/.polypilot/presets.json. +/// +public static class UserPresets +{ + private const string FileName = "presets.json"; + + public static List Load(string baseDir) + { + try + { + var path = Path.Combine(baseDir, FileName); + if (!File.Exists(path)) return new List(); + var json = File.ReadAllText(path); + return System.Text.Json.JsonSerializer.Deserialize>(json) ?? new(); + } + catch { return new List(); } + } + + public static void Save(string baseDir, List presets) + { + try + { + Directory.CreateDirectory(baseDir); + var json = System.Text.Json.JsonSerializer.Serialize(presets, + new System.Text.Json.JsonSerializerOptions { WriteIndented = true }); + File.WriteAllText(Path.Combine(baseDir, FileName), json); + } + catch { /* best-effort persistence */ } + } + + /// Get all presets: built-in + user-defined. + public static GroupPreset[] GetAll(string baseDir) + { + var user = Load(baseDir); + return GroupPreset.BuiltIn.Concat(user).ToArray(); + } + + /// Save the current multi-agent group as a reusable preset. + public static GroupPreset? SaveGroupAsPreset(string baseDir, string name, string description, + string emoji, SessionGroup group, List members, Func getEffectiveModel) + { + var orchestrator = members.FirstOrDefault(m => m.Role == MultiAgentRole.Orchestrator); + var workers = members.Where(m => m.Role != MultiAgentRole.Orchestrator).ToList(); + + if (orchestrator == null && workers.Count == 0) return null; + + var preset = new GroupPreset( + name, description, emoji, group.OrchestratorMode, + orchestrator != null ? getEffectiveModel(orchestrator.SessionName) : "claude-opus-4.6", + workers.Select(w => getEffectiveModel(w.SessionName)).ToArray()) + { IsUserDefined = true }; + + var existing = Load(baseDir); + existing.RemoveAll(p => p.Name == name); // replace if same name + existing.Add(preset); + Save(baseDir, existing); + return preset; + } +} + +/// +/// Detects conflicts and issues within a multi-agent group's model configuration. +/// +public static class GroupModelAnalyzer +{ + public record GroupDiagnostic(string Level, string Message); // Level: "error", "warning", "info" + + /// + /// Analyze a multi-agent group for model conflicts and capability gaps. + /// + public static List Analyze(SessionGroup group, List<(string Name, string Model, MultiAgentRole Role)> members) + { + var diags = new List(); + if (members.Count == 0) return diags; + + var orchestrators = members.Where(m => m.Role == MultiAgentRole.Orchestrator).ToList(); + var workers = members.Where(m => m.Role == MultiAgentRole.Worker).ToList(); + + // Check: orchestrator mode without orchestrator + if ((group.OrchestratorMode == MultiAgentMode.Orchestrator || group.OrchestratorMode == MultiAgentMode.OrchestratorReflect) + && orchestrators.Count == 0) + { + diags.Add(new("error", "β›” Orchestrator mode requires at least one session with the Orchestrator role.")); + } + + // Check: orchestrator using weak model + foreach (var orch in orchestrators) + { + var caps = ModelCapabilities.GetCapabilities(orch.Model); + if (!caps.HasFlag(ModelCapability.ReasoningExpert)) + diags.Add(new("warning", $"⚠️ Orchestrator '{orch.Name}' uses {orch.Model} which lacks strong reasoning. Consider claude-opus or gpt-5.")); + } + + // Check: all workers same model in broadcast (less diverse perspectives) + if (group.OrchestratorMode == MultiAgentMode.Broadcast && workers.Count > 1) + { + var uniqueModels = workers.Select(w => w.Model).Distinct().Count(); + if (uniqueModels == 1) + diags.Add(new("info", "πŸ’‘ All workers use the same model. For diverse perspectives, assign different models.")); + } + + // Check: expensive models as workers when cheaper ones suffice + foreach (var w in workers) + { + var caps = ModelCapabilities.GetCapabilities(w.Model); + if (caps.HasFlag(ModelCapability.ReasoningExpert) && !caps.HasFlag(ModelCapability.Fast)) + diags.Add(new("info", $"πŸ’° Worker '{w.Name}' uses premium model {w.Model}. Consider a faster/cheaper model for worker tasks.")); + } + + // Check: OrchestratorReflect without enough workers + if (group.OrchestratorMode == MultiAgentMode.OrchestratorReflect && workers.Count == 0) + diags.Add(new("error", "β›” OrchestratorReflect needs at least one worker to iterate on.")); + + return diags; + } +} diff --git a/PolyPilot/Models/SessionOrganization.cs b/PolyPilot/Models/SessionOrganization.cs index 19413dda69..4b389b464f 100644 --- a/PolyPilot/Models/SessionOrganization.cs +++ b/PolyPilot/Models/SessionOrganization.cs @@ -115,18 +115,29 @@ public class GroupReflectionState /// The orchestrator's evaluation from the last iteration. public string? LastEvaluation { get; set; } + /// Per-iteration evaluation results for trend tracking. + public List EvaluationHistory { get; set; } = new(); + + /// Optional: session name of a dedicated evaluator (different from orchestrator). + public string? EvaluatorSession { get; set; } + + /// Auto-adjustment suggestions surfaced to the user. + [System.Text.Json.Serialization.JsonIgnore] + public List PendingAdjustments { get; } = new(); + /// Hash window for stall detection (last N response hashes). [System.Text.Json.Serialization.JsonIgnore] internal List ResponseHashes { get; } = new(); internal const int StallWindowSize = 3; internal int ConsecutiveStalls { get; set; } - public static GroupReflectionState Create(string goal, int maxIterations = 5) => new() + public static GroupReflectionState Create(string goal, int maxIterations = 5, string? evaluatorSession = null) => new() { Goal = goal, MaxIterations = maxIterations, IsActive = true, - StartedAt = DateTime.Now + StartedAt = DateTime.Now, + EvaluatorSession = evaluatorSession }; /// Check if the latest synthesis is repeating (stall detection). @@ -152,8 +163,42 @@ public bool CheckStall(string synthesisResponse) return false; } + /// Record an evaluation result and return the quality trend. + public QualityTrend RecordEvaluation(int iteration, double score, string rationale, string evaluatorModel) + { + EvaluationHistory.Add(new EvaluationResult + { + Iteration = iteration, + Score = score, + Rationale = rationale, + EvaluatorModel = evaluatorModel, + Timestamp = DateTime.Now + }); + + if (EvaluationHistory.Count < 2) return QualityTrend.Stable; + + var recent = EvaluationHistory.TakeLast(3).Select(e => e.Score).ToList(); + if (recent.Count >= 2 && recent.Last() > recent[^2] + 0.1) return QualityTrend.Improving; + if (recent.Count >= 2 && recent.Last() < recent[^2] - 0.1) return QualityTrend.Degrading; + return QualityTrend.Stable; + } + public string CompletionSummary => GoalMet ? $"βœ… Goal met after {CurrentIteration} iteration(s)" : IsStalled ? $"⚠️ Stalled after {CurrentIteration} iteration(s)" : $"⏱️ Reached max iterations ({MaxIterations})"; } + +/// Quality trend across iterations. +public enum QualityTrend { Improving, Stable, Degrading } + +/// Structured evaluation result from one reflect iteration. +public class EvaluationResult +{ + public int Iteration { get; set; } + /// Quality score 0.0-1.0. + public double Score { get; set; } + public string Rationale { get; set; } = ""; + public string EvaluatorModel { get; set; } = ""; + public DateTime Timestamp { get; set; } +} diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index 6f29a72d03..a2fd07eaa8 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -1043,19 +1043,62 @@ private async Task SendViaOrchestratorReflectAsync(string groupId, List InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Synthesizing, iterDetail)); var synthEvalPrompt = BuildSynthesisWithEvalPrompt(prompt, results.ToList(), reflectState); - var synthesisResponse = await SendPromptAndWaitAsync(orchestratorName, synthEvalPrompt, ct); - // Check completion sentinel - if (synthesisResponse.Contains("[[GROUP_REFLECT_COMPLETE]]", StringComparison.OrdinalIgnoreCase)) + // Use dedicated evaluator session if configured, otherwise orchestrator self-evaluates + string evaluatorName = reflectState.EvaluatorSession ?? orchestratorName; + string synthesisResponse; + if (reflectState.EvaluatorSession != null && reflectState.EvaluatorSession != orchestratorName) { - reflectState.GoalMet = true; - reflectState.IsActive = false; - AddOrchestratorSystemMessage(orchestratorName, $"βœ… {reflectState.CompletionSummary}"); - break; + // Send results to orchestrator for synthesis + var synthOnlyPrompt = BuildSynthesisOnlyPrompt(prompt, results.ToList()); + synthesisResponse = await SendPromptAndWaitAsync(orchestratorName, synthOnlyPrompt, ct); + + // Send to evaluator for independent scoring + var evalOnlyPrompt = BuildEvaluatorPrompt(prompt, synthesisResponse, reflectState); + var evalResponse = await SendPromptAndWaitAsync(evaluatorName, evalOnlyPrompt, ct); + + // Parse score from evaluator + var (score, rationale) = ParseEvaluationScore(evalResponse); + var evaluatorModel = GetEffectiveModel(evaluatorName); + var trend = reflectState.RecordEvaluation(reflectState.CurrentIteration, score, rationale, evaluatorModel); + + // Check if evaluator says complete + if (evalResponse.Contains("[[GROUP_REFLECT_COMPLETE]]", StringComparison.OrdinalIgnoreCase) || score >= 0.9) + { + reflectState.GoalMet = true; + reflectState.IsActive = false; + AddOrchestratorSystemMessage(orchestratorName, $"βœ… {reflectState.CompletionSummary} (score: {score:F1})"); + break; + } + + reflectState.LastEvaluation = rationale; + if (trend == Models.QualityTrend.Degrading) + reflectState.PendingAdjustments.Add("πŸ“‰ Quality degrading β€” consider changing worker models or refining the goal."); + } + else + { + synthesisResponse = await SendPromptAndWaitAsync(orchestratorName, synthEvalPrompt, ct); + + // Check completion sentinel + if (synthesisResponse.Contains("[[GROUP_REFLECT_COMPLETE]]", StringComparison.OrdinalIgnoreCase)) + { + reflectState.GoalMet = true; + reflectState.IsActive = false; + AddOrchestratorSystemMessage(orchestratorName, $"βœ… {reflectState.CompletionSummary}"); + break; + } + + // Extract evaluation for next iteration + reflectState.LastEvaluation = ExtractIterationEvaluation(synthesisResponse); + + // Record a self-eval score (estimated from sentinel presence) + var selfScore = synthesisResponse.Contains("[[NEEDS_ITERATION]]", StringComparison.OrdinalIgnoreCase) ? 0.4 : 0.7; + reflectState.RecordEvaluation(reflectState.CurrentIteration, selfScore, + reflectState.LastEvaluation ?? "", GetEffectiveModel(orchestratorName)); } - // Extract evaluation for next iteration - reflectState.LastEvaluation = ExtractIterationEvaluation(synthesisResponse); + // Auto-adjustment: analyze worker results and suggest/apply changes + AutoAdjustFromFeedback(groupId, group, results.ToList(), reflectState); // Stall detection if (reflectState.CheckStall(synthesisResponse)) @@ -1152,5 +1195,169 @@ private static string ExtractIterationEvaluation(string response) return string.Join('\n', lines.TakeLast(5)).Trim(); } + /// Build a synthesis-only prompt (no evaluation decision) for use with separate evaluator. + private string BuildSynthesisOnlyPrompt(string originalPrompt, List results) + { + var sb = new System.Text.StringBuilder(); + sb.Append(BuildSynthesisPrompt(originalPrompt, results)); + sb.AppendLine(); + sb.AppendLine("Synthesize the worker outputs into a unified, coherent response. Do NOT make a completion decision β€” an independent evaluator will assess quality separately."); + return sb.ToString(); + } + + /// Build a prompt for an independent evaluator session to score synthesis quality. + private static string BuildEvaluatorPrompt(string originalGoal, string synthesisResponse, Models.GroupReflectionState state) + { + var sb = new System.Text.StringBuilder(); + sb.AppendLine("## Independent Quality Evaluation"); + sb.AppendLine($"**Goal:** {state.Goal}"); + sb.AppendLine($"**Iteration:** {state.CurrentIteration}/{state.MaxIterations}"); + sb.AppendLine(); + sb.AppendLine("### Synthesized Output to Evaluate"); + sb.AppendLine(synthesisResponse); + sb.AppendLine(); + sb.AppendLine("### Scoring Rubric"); + sb.AppendLine("Rate the output on a 0.0–1.0 scale across these dimensions:"); + sb.AppendLine("1. **Completeness** (0-1): Does it fully address the goal?"); + sb.AppendLine("2. **Correctness** (0-1): Is it accurate and well-reasoned?"); + sb.AppendLine("3. **Coherence** (0-1): Is the synthesis well-organized?"); + sb.AppendLine("4. **Actionability** (0-1): Can the user act on this output?"); + sb.AppendLine(); + if (state.EvaluationHistory.Count > 0) + { + var last = state.EvaluationHistory.Last(); + sb.AppendLine($"Previous iteration scored: {last.Score:F1} β€” {last.Rationale}"); + sb.AppendLine("Indicate whether quality improved, degraded, or stayed flat."); + sb.AppendLine(); + } + sb.AppendLine("### Response Format"); + sb.AppendLine("SCORE: "); + sb.AppendLine("RATIONALE: <2-3 sentences explaining the score and gaps>"); + sb.AppendLine(); + sb.AppendLine("If score >= 0.9, include `[[GROUP_REFLECT_COMPLETE]]`."); + sb.AppendLine("If score < 0.9, include `[[NEEDS_ITERATION]]` and list specific improvements needed."); + return sb.ToString(); + } + + /// Parse a score and rationale from evaluator response. + internal static (double Score, string Rationale) ParseEvaluationScore(string evalResponse) + { + double score = 0.5; // default if parsing fails + string rationale = evalResponse; + + // Try to find "SCORE: X.X" pattern + var scoreMatch = System.Text.RegularExpressions.Regex.Match(evalResponse, @"SCORE:\s*(-?[\d.]+)", System.Text.RegularExpressions.RegexOptions.IgnoreCase); + if (scoreMatch.Success && double.TryParse(scoreMatch.Groups[1].Value, System.Globalization.NumberStyles.Float, + System.Globalization.CultureInfo.InvariantCulture, out var parsed)) + { + score = Math.Clamp(parsed, 0.0, 1.0); + } + + // Extract rationale + var rationaleMatch = System.Text.RegularExpressions.Regex.Match(evalResponse, @"RATIONALE:\s*(.+?)(?:\[\[|$)", System.Text.RegularExpressions.RegexOptions.IgnoreCase | System.Text.RegularExpressions.RegexOptions.Singleline); + if (rationaleMatch.Success) + rationale = rationaleMatch.Groups[1].Value.Trim(); + + return (score, rationale); + } + + /// + /// Auto-adjust agent configuration based on iteration feedback. + /// Called after each reflect iteration to detect quality issues and apply fixes. + /// Surfaces adjustments both as orchestrator system messages and as PendingAdjustments on state (for UI banners). + /// + private void AutoAdjustFromFeedback(string groupId, SessionGroup group, List results, GroupReflectionState state) + { + var failedWorkers = results.Where(r => !r.Success).ToList(); + var adjustments = new List(); + + // Auto-reassign tasks from failed workers to successful ones + if (failedWorkers.Count > 0 && results.Any(r => r.Success)) + { + foreach (var failed in failedWorkers) + { + adjustments.Add($"πŸ”„ Worker '{failed.WorkerName}' failed ({failed.Error}). Its tasks will be reassigned in the next iteration."); + } + } + + // Detect workers with suspiciously short responses (quality issue) + foreach (var result in results.Where(r => r.Success)) + { + if (result.Response != null && result.Response.Length < 100 && state.CurrentIteration > 1) + { + var caps = Models.ModelCapabilities.GetCapabilities(GetEffectiveModel(result.WorkerName)); + if (caps.HasFlag(Models.ModelCapability.CostEfficient) && !caps.HasFlag(Models.ModelCapability.ReasoningExpert)) + { + adjustments.Add($"πŸ“ˆ Worker '{result.WorkerName}' produced a brief response. Consider upgrading from a cost-efficient model to improve quality."); + } + } + } + + // Detect quality degradation from evaluation history + if (state.EvaluationHistory.Count >= 2) + { + var lastTwo = state.EvaluationHistory.TakeLast(2).ToList(); + if (lastTwo[1].Score < lastTwo[0].Score - 0.15) + adjustments.Add("πŸ“‰ Quality degraded significantly vs. previous iteration. Review worker models or task clarity."); + } + + // Detect quality degradation: if consecutive stalls detected, suggest model changes + if (state.ConsecutiveStalls == 1) + { + adjustments.Add("⚠️ Output repetition detected. The orchestrator may benefit from a different model or clearer instructions."); + } + + // Surface adjustments for UI banners (non-blocking) + state.PendingAdjustments.Clear(); + state.PendingAdjustments.AddRange(adjustments); + + // Surface adjustments as system messages to orchestrator + if (adjustments.Count > 0) + { + var orchestratorName = GetOrchestratorSession(groupId); + if (orchestratorName != null) + { + AddOrchestratorSystemMessage(orchestratorName, + $"πŸ”§ Auto-analysis (iteration {state.CurrentIteration}):\n" + string.Join("\n", adjustments)); + } + } + } + + /// + /// Get diagnostics for a multi-agent group (model conflicts, capability gaps). + /// + public List GetGroupDiagnostics(string groupId) + { + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId); + if (group == null || !group.IsMultiAgent) return new(); + + var members = GetMultiAgentGroupMembers(groupId) + .Select(name => + { + var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == name); + return (name, GetEffectiveModel(name), meta?.Role ?? MultiAgentRole.Worker); + }) + .ToList(); + + return Models.GroupModelAnalyzer.Analyze(group, members); + } + + /// + /// Save the current multi-agent group configuration as a reusable user preset. + /// + public Models.GroupPreset? SaveGroupAsPreset(string groupId, string name, string description, string emoji) + { + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId && g.IsMultiAgent); + if (group == null) return null; + + var members = GetMultiAgentGroupMembers(groupId) + .Select(n => Organization.Sessions.FirstOrDefault(m => m.SessionName == n)) + .Where(m => m != null) + .ToList(); + + return Models.UserPresets.SaveGroupAsPreset(PolyPilotBaseDir, name, description, emoji, + group, members!, GetEffectiveModel); + } + #endregion } From 7d75b4bf26691b2577dce6c38169d5da0c50038b Mon Sep 17 00:00:00 2001 From: Shane Date: Tue, 17 Feb 2026 23:16:08 -0600 Subject: [PATCH 14/48] Add end-to-end scenario tests and enhanced model name inference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 9 scenario tests that serve as executable documentation of complete user flows: - Scenario_CreateGroupFromPreset: preset picker β†’ group creation β†’ role/model validation - Scenario_WeakOrchestratorWarnings: model picker β†’ role warnings β†’ group diagnostics - Scenario_FullReflectCycleWithScoring: 4-iteration cycle with eval scores (0.4β†’0.7β†’0.65β†’0.92) - Scenario_AutoAdjustDetectsIssuesAndSurfacesBanner: quality degradation β†’ amber banner - Scenario_SaveAndReuseCustomPreset: save β†’ persist β†’ reload with user badge - Scenario_DedicatedEvaluatorScoring: SCORE/RATIONALE parsing β†’ completion detection - Scenario_StallDetectionStopsLoop: hash-based repeat detection β†’ auto-stop - Scenario_NewModelReleasesHandledGracefully: name-pattern inference for future models - Scenario_DiagnosticsGuideMisconfiguration: error/warning/info diagnostics flow Also enhanced GetStrengths to generate descriptions from inferred capabilities. 699 tests passing. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/SessionOrganizationTests.cs | 456 ++++++++++++++++++++ PolyPilot/Models/ModelCapabilities.cs | 14 + 2 files changed, 470 insertions(+) diff --git a/PolyPilot.Tests/SessionOrganizationTests.cs b/PolyPilot.Tests/SessionOrganizationTests.cs index 9979e1deda..db8da4c7a0 100644 --- a/PolyPilot.Tests/SessionOrganizationTests.cs +++ b/PolyPilot.Tests/SessionOrganizationTests.cs @@ -1440,3 +1440,459 @@ public void ParseScore_NegativeScore_ReturnsZero() Assert.Equal(0.0, score); } } + +/// +/// End-to-end scenario tests demonstrating complete multi-agent user flows. +/// These serve as executable documentation of the feature's user experience. +/// +public class MultiAgentScenarioTests +{ + /// + /// Scenario: User creates a "Code Review Team" from a built-in preset. + /// + /// User flow: + /// 1. Click πŸš€ Preset in sidebar toolbar + /// 2. Preset picker appears showing 4 built-in templates + /// 3. Select "Code Review Team" (πŸ”) + /// 4. System creates: Orchestrator (claude-opus-4.6) + 2 Workers (gpt-5.1-codex, claude-sonnet-4.5) + /// 5. Sidebar shows group with mode selector set to "🎯 Orchestrator" + /// 6. Each session shows its model assignment and role badge + /// + [Fact] + public void Scenario_CreateGroupFromPreset() + { + // Step 1-2: User sees built-in presets + var presets = GroupPreset.BuiltIn; + Assert.Equal(4, presets.Length); + + // Step 3: User picks "Code Review Team" + var codeReview = presets.First(p => p.Name == "Code Review Team"); + Assert.Equal("πŸ”", codeReview.Emoji); + Assert.Equal(MultiAgentMode.Orchestrator, codeReview.Mode); + Assert.Equal("claude-opus-4.6", codeReview.OrchestratorModel); + Assert.Equal(2, codeReview.WorkerModels.Length); + + // Step 4: System creates the group - verify the preset structure + // (CopilotService.CreateGroupFromPresetAsync does the actual creation at runtime) + Assert.Equal("gpt-5.1-codex", codeReview.WorkerModels[0]); + Assert.Equal("claude-sonnet-4.5", codeReview.WorkerModels[1]); + + // Step 5-6: Each member has appropriate capabilities + var orchCaps = ModelCapabilities.GetCapabilities(codeReview.OrchestratorModel); + Assert.True(orchCaps.HasFlag(ModelCapability.ReasoningExpert)); + + var warnings = ModelCapabilities.GetRoleWarnings(codeReview.OrchestratorModel, MultiAgentRole.Orchestrator); + Assert.Empty(warnings); // opus is a great orchestrator, no warnings + + foreach (var workerModel in codeReview.WorkerModels) + { + var wCaps = ModelCapabilities.GetCapabilities(workerModel); + Assert.True(wCaps.HasFlag(ModelCapability.CodeExpert)); // both are code-capable + } + } + + /// + /// Scenario: User assigns a weak model to the Orchestrator role and sees warnings. + /// + /// User flow: + /// 1. Long-press/right-click a session in a multi-agent group β†’ context menu + /// 2. See "🎯 Set as Orchestrator" button β†’ click it + /// 3. Under "🧠 Model", pick "gpt-4.1" from dropdown + /// 4. Warning appears: "⚠️ This model may lack strong reasoning for orchestration" + /// 5. Warning appears: "πŸ’° Cost-efficient models may produce shallow plans" + /// 6. User also sees diagnostics in the group header: + /// "⚠️ Orchestrator 'session1' uses gpt-4.1 which lacks strong reasoning" + /// + [Fact] + public void Scenario_WeakOrchestratorWarnings() + { + // Step 3-5: User picks gpt-4.1 for orchestrator role + var warnings = ModelCapabilities.GetRoleWarnings("gpt-4.1", MultiAgentRole.Orchestrator); + Assert.Equal(2, warnings.Count); + Assert.Contains(warnings, w => w.Contains("reasoning")); + Assert.Contains(warnings, w => w.Contains("Cost-efficient")); + + // Step 6: Group diagnostics also flag the issue + var group = new SessionGroup + { + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.Orchestrator + }; + var members = new List<(string Name, string Model, MultiAgentRole Role)> + { + ("session1", "gpt-4.1", MultiAgentRole.Orchestrator), + ("session2", "gpt-5", MultiAgentRole.Worker), + }; + var diags = GroupModelAnalyzer.Analyze(group, members); + Assert.Contains(diags, d => d.Level == "warning" && d.Message.Contains("gpt-4.1")); + + // Compare: strong orchestrator shows no role warnings + var strongWarnings = ModelCapabilities.GetRoleWarnings("claude-opus-4.6", MultiAgentRole.Orchestrator); + Assert.Empty(strongWarnings); + } + + /// + /// Scenario: Full OrchestratorReflect iteration cycle with evaluation scoring. + /// + /// User flow: + /// 1. User selects "πŸ”„ Orchestrator + Reflect" from mode dropdown + /// 2. Types goal in the multi-agent input bar and clicks πŸ“‘ + /// 3. Sidebar shows: πŸ”„ 1/5 with goal text + /// 4. After iteration 1, evaluator scores 0.4 β†’ sidebar shows "πŸ“Š 0.4 (gpt-4.1)" + /// 5. AutoAdjust detects no issues yet β†’ no banner + /// 6. After iteration 2, evaluator scores 0.7 β†’ trend = Improving + /// 7. After iteration 3, evaluator scores 0.65 β†’ trend = Stable (slight drop) + /// 8. After iteration 4, evaluator scores 0.92 β†’ goal met, loop stops + /// 9. Sidebar shows: "βœ… Goal met after 4 iteration(s)" + /// + [Fact] + public void Scenario_FullReflectCycleWithScoring() + { + // Step 1-2: User starts OrchestratorReflect + var state = GroupReflectionState.Create("Implement a REST API with CRUD endpoints", maxIterations: 5); + Assert.True(state.IsActive); + Assert.Equal(0, state.CurrentIteration); + Assert.NotNull(state.StartedAt); + + // Step 3-4: Iteration 1 β€” low quality initial attempt + state.CurrentIteration = 1; + var trend1 = state.RecordEvaluation(1, 0.4, "Missing error handling and input validation. Only GET endpoint implemented.", "gpt-4.1"); + Assert.Equal(QualityTrend.Stable, trend1); // only one data point + Assert.Single(state.EvaluationHistory); + + // Sidebar would show: πŸ”„ 1/5 πŸ“Š 0.4 (gpt-4.1) + var lastEval = state.EvaluationHistory.Last(); + Assert.Equal("0.4", lastEval.Score.ToString("F1")); + Assert.Equal("gpt-4.1", lastEval.EvaluatorModel); + + // Step 6: Iteration 2 β€” significant improvement + state.CurrentIteration = 2; + var trend2 = state.RecordEvaluation(2, 0.7, "All CRUD endpoints present. Error handling added but tests incomplete.", "gpt-4.1"); + Assert.Equal(QualityTrend.Improving, trend2); + + // Step 7: Iteration 3 β€” slight regression + state.CurrentIteration = 3; + var trend3 = state.RecordEvaluation(3, 0.65, "Tests added but some CRUD operations regressed. PUT endpoint missing validation.", "gpt-4.1"); + Assert.Equal(QualityTrend.Stable, trend3); // within 0.1 threshold + + // Step 8: Iteration 4 β€” goal met + state.CurrentIteration = 4; + var trend4 = state.RecordEvaluation(4, 0.92, "All endpoints complete with validation, error handling, and comprehensive tests.", "gpt-4.1"); + Assert.Equal(QualityTrend.Improving, trend4); + + // Score >= 0.9 would trigger goal completion + state.GoalMet = true; + state.IsActive = false; + state.CompletedAt = DateTime.Now; + + // Step 9: Final summary + Assert.Equal("βœ… Goal met after 4 iteration(s)", state.CompletionSummary); + Assert.Equal(4, state.EvaluationHistory.Count); + + // Verify the quality trajectory is tracked + var scores = state.EvaluationHistory.Select(e => e.Score).ToList(); + Assert.Equal(new[] { 0.4, 0.7, 0.65, 0.92 }, scores); + } + + /// + /// Scenario: AutoAdjust detects quality degradation and surfaces a banner. + /// + /// User flow: + /// 1. Reflect loop running with 3 workers + /// 2. Iteration 2 scores 0.7, iteration 3 scores 0.45 (sharp drop) + /// 3. AutoAdjust detects degradation in evaluation history + /// 4. Sidebar shows amber banner: "πŸ“‰ Quality degraded significantly vs. previous iteration" + /// 5. Worker "fast-coder" using gpt-4.1 produced only 50 chars on iteration 3 + /// 6. Banner also shows: "πŸ“ˆ Worker 'fast-coder' produced a brief response. Consider upgrading..." + /// 7. User can see these suggestions and decide to change the worker's model + /// + [Fact] + public void Scenario_AutoAdjustDetectsIssuesAndSurfacesBanner() + { + var state = GroupReflectionState.Create("Build a microservice"); + state.CurrentIteration = 3; + + // Steps 2-3: Record scores showing degradation + state.RecordEvaluation(1, 0.5, "Initial attempt", "gpt-4.1"); + state.RecordEvaluation(2, 0.7, "Good progress", "gpt-4.1"); + state.RecordEvaluation(3, 0.45, "Quality dropped", "gpt-4.1"); + + // The last two evals show a significant drop (0.7 β†’ 0.45 = -0.25 > 0.15 threshold) + var lastTwo = state.EvaluationHistory.TakeLast(2).ToList(); + var degradation = lastTwo[0].Score - lastTwo[1].Score; + Assert.True(degradation > 0.15); // threshold for "significant" degradation + + // Step 4-6: AutoAdjust would populate PendingAdjustments + // Simulating what AutoAdjustFromFeedback does: + state.PendingAdjustments.Clear(); + state.PendingAdjustments.Add("πŸ“‰ Quality degraded significantly vs. previous iteration. Review worker models or task clarity."); + state.PendingAdjustments.Add("πŸ“ˆ Worker 'fast-coder' produced a brief response. Consider upgrading from a cost-efficient model to improve quality."); + + // Verify the banner would display + Assert.Equal(2, state.PendingAdjustments.Count); + Assert.Contains(state.PendingAdjustments, a => a.Contains("πŸ“‰")); + Assert.Contains(state.PendingAdjustments, a => a.Contains("fast-coder")); + + // Step 7: User changes the model β€” verify gpt-4.1 is flagged as cost-efficient + var caps = ModelCapabilities.GetCapabilities("gpt-4.1"); + Assert.True(caps.HasFlag(ModelCapability.CostEfficient)); + Assert.False(caps.HasFlag(ModelCapability.ReasoningExpert)); + } + + /// + /// Scenario: User saves their tuned multi-agent group as a reusable preset. + /// + /// User flow: + /// 1. User has a working Orchestrator group: opus orchestrator, 2 workers + /// 2. They've tweaked models over several iterations and are happy + /// 3. Click "πŸ’Ύ Save as Preset" button in sidebar + /// 4. System saves to ~/.polypilot/presets.json + /// 5. Next time user clicks πŸš€ Preset, their custom preset appears with πŸ‘€ badge + /// 6. User-defined presets appear after built-in ones + /// + [Fact] + public void Scenario_SaveAndReuseCustomPreset() + { + var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + try + { + // Step 1: User has a working group + var group = new SessionGroup + { + Name = "My API Team", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.OrchestratorReflect + }; + var members = new List + { + new() { SessionName = "planner", Role = MultiAgentRole.Orchestrator }, + new() { SessionName = "coder", Role = MultiAgentRole.Worker }, + new() { SessionName = "reviewer", Role = MultiAgentRole.Worker }, + }; + + // Step 3-4: Save as preset + var preset = UserPresets.SaveGroupAsPreset( + tempDir, "My API Team", "OrchestratorReflect with reviewer", "πŸ—οΈ", + group, members, + name => name switch + { + "planner" => "claude-opus-4.6", + "coder" => "gpt-5.1-codex", + "reviewer" => "claude-sonnet-4.5", + _ => "gpt-4.1" + }); + + Assert.NotNull(preset); + Assert.True(preset!.IsUserDefined); + Assert.Equal("claude-opus-4.6", preset.OrchestratorModel); + Assert.Equal(2, preset.WorkerModels.Length); + Assert.Equal(MultiAgentMode.OrchestratorReflect, preset.Mode); + + // Step 5-6: Next time, preset picker shows built-in + user presets + var allPresets = UserPresets.GetAll(tempDir); + Assert.Equal(GroupPreset.BuiltIn.Length + 1, allPresets.Length); + + // User-defined presets come after built-in ones + var userPresets = allPresets.Where(p => p.IsUserDefined).ToArray(); + Assert.Single(userPresets); + Assert.Equal("My API Team", userPresets[0].Name); + + // The preset correctly captures the model assignments + Assert.Contains("gpt-5.1-codex", preset.WorkerModels); + Assert.Contains("claude-sonnet-4.5", preset.WorkerModels); + } + finally + { + if (Directory.Exists(tempDir)) Directory.Delete(tempDir, true); + } + } + + /// + /// Scenario: Dedicated evaluator session provides independent scoring. + /// + /// User flow: + /// 1. User creates a "Fast Iteration Squad" from presets (OrchestratorReflect) + /// 2. Group has: opus orchestrator + 3 cheap workers + /// 3. User adds a 4th session, sets role to Worker, assigns gpt-4.1 + /// 4. In code, EvaluatorSession is set to this 4th session + /// 5. Orchestrator synthesizes, then evaluator independently scores + /// 6. Evaluator responds with structured format: "SCORE: 0.75\nRATIONALE: ..." + /// 7. System parses score, records it, shows in sidebar + /// + [Fact] + public void Scenario_DedicatedEvaluatorScoring() + { + // Step 1-4: Group with evaluator + var state = GroupReflectionState.Create("Refactor auth module", maxIterations: 5, evaluatorSession: "eval-agent"); + Assert.Equal("eval-agent", state.EvaluatorSession); + + // Step 6-7: Evaluator responds with structured format + var evalResponse = """ + ## Evaluation + + SCORE: 0.75 + RATIONALE: The auth module refactoring covers JWT validation and middleware setup, but session management is incomplete and there are no integration tests. The code structure is clean but error handling paths need work. + + [[NEEDS_ITERATION]] + - Add session persistence layer + - Add integration tests for login/logout flow + - Improve error handling in token refresh + """; + + var (score, rationale) = CopilotService.ParseEvaluationScore(evalResponse); + Assert.Equal(0.75, score); + Assert.Contains("session management is incomplete", rationale); + + // Record it + var trend = state.RecordEvaluation(1, score, rationale, "gpt-4.1"); + Assert.Equal(QualityTrend.Stable, trend); + + // Sidebar shows: πŸ“Š 0.8 (gpt-4.1) + Assert.Equal(0.75, state.EvaluationHistory.Last().Score); + + // Next iteration: evaluator says done + var evalResponse2 = """ + SCORE: 0.93 + RATIONALE: All requirements met. Session persistence added, integration tests pass, error handling is comprehensive. + + [[GROUP_REFLECT_COMPLETE]] + """; + + var (score2, _) = CopilotService.ParseEvaluationScore(evalResponse2); + Assert.Equal(0.93, score2); + Assert.True(score2 >= 0.9); // triggers completion + Assert.Contains("[[GROUP_REFLECT_COMPLETE]]", evalResponse2); + + state.RecordEvaluation(2, score2, "All requirements met.", "gpt-4.1"); + state.GoalMet = true; + Assert.Contains("Goal met", state.CompletionSummary); + } + + /// + /// Scenario: Stall detection stops a reflect loop that's going in circles. + /// + /// User flow: + /// 1. Reflect loop is running, iteration 3 + /// 2. Workers keep producing similar output to iterations 1-2 + /// 3. Hash-based stall detector triggers after 2 consecutive matches + /// 4. Sidebar shows: "⚠️ Stalled after 3 iteration(s)" + /// 5. AutoAdjust banner: "⚠️ Output repetition detected..." + /// + [Fact] + public void Scenario_StallDetectionStopsLoop() + { + var state = GroupReflectionState.Create("Optimize database queries"); + + // Iterations 1-2: different responses + state.CurrentIteration = 1; + Assert.False(state.CheckStall("First attempt: added indexes on user_id column")); + + state.CurrentIteration = 2; + Assert.False(state.CheckStall("Second attempt: refactored joins to use CTEs")); + + // Iteration 3: same as iteration 2 β€” first repeat detected + state.CurrentIteration = 3; + Assert.False(state.CheckStall("Second attempt: refactored joins to use CTEs")); + Assert.Equal(1, state.ConsecutiveStalls); + Assert.False(state.IsStalled); // need 2 consecutive + + // Iteration 4: still repeating β€” stall confirmed + state.CurrentIteration = 4; + Assert.True(state.CheckStall("Second attempt: refactored joins to use CTEs")); + Assert.True(state.IsStalled); + Assert.Equal("⚠️ Stalled after 4 iteration(s)", state.CompletionSummary); + } + + /// + /// Scenario: Model name inference handles a brand-new model release gracefully. + /// + /// User flow: + /// 1. A new model "claude-opus-5.0" is released + /// 2. Copilot server makes it available in AvailableModels + /// 3. User assigns it to an orchestrator via the model picker + /// 4. ModelCapabilities doesn't have it in the registry + /// 5. InferFromName detects "opus" β†’ ReasoningExpert + CodeExpert + ToolUse + /// 6. No "weak model" warning appears for orchestrator role + /// 7. User also assigns "gpt-6-codex-mini" to a worker + /// 8. InferFromName detects "codex" + "mini" β†’ CodeExpert + Fast + CostEfficient + /// + [Fact] + public void Scenario_NewModelReleasesHandledGracefully() + { + // Step 3-6: New opus model, not in registry + var opusCaps = ModelCapabilities.GetCapabilities("claude-opus-5.0"); + Assert.True(opusCaps.HasFlag(ModelCapability.ReasoningExpert)); + Assert.True(opusCaps.HasFlag(ModelCapability.CodeExpert)); + + var orchWarnings = ModelCapabilities.GetRoleWarnings("claude-opus-5.0", MultiAgentRole.Orchestrator); + // Should not warn about reasoning since inference detects it + Assert.DoesNotContain(orchWarnings, w => w.Contains("reasoning")); + + // Step 7-8: New codex-mini model + var codexMiniCaps = ModelCapabilities.GetCapabilities("gpt-6-codex-mini"); + Assert.True(codexMiniCaps.HasFlag(ModelCapability.CodeExpert)); + Assert.True(codexMiniCaps.HasFlag(ModelCapability.Fast)); + Assert.True(codexMiniCaps.HasFlag(ModelCapability.CostEfficient)); + + // Worker role should work fine with this model + var workerWarnings = ModelCapabilities.GetRoleWarnings("gpt-6-codex-mini", MultiAgentRole.Worker); + Assert.Empty(workerWarnings); // codex has CodeExpert, no warning + + // Strengths description works via inference for unknown models + var strengths = ModelCapabilities.GetStrengths("claude-opus-5.0"); + Assert.StartsWith("Inferred:", strengths); + Assert.Contains("reasoning", strengths); + Assert.Contains("code", strengths); + } + + /// + /// Scenario: Full diagnostics flow for a misconfigured group. + /// + /// User flow: + /// 1. User creates Orchestrator group but forgets to assign an orchestrator role + /// 2. All 3 sessions are Workers using the same cheap model + /// 3. Diagnostics panel shows: + /// β›” "Orchestrator mode requires at least one session with the Orchestrator role." + /// πŸ’‘ "All workers use the same model. For diverse perspectives, assign different models." + /// 4. User fixes: assigns one session as Orchestrator with opus + /// 5. Diagnostics update to clear the error, but show: + /// πŸ’° "Worker 'deep-thinker' uses premium model gpt-5.1. Consider a faster/cheaper model." + /// + [Fact] + public void Scenario_DiagnosticsGuideMisconfiguration() + { + // Step 1-3: Misconfigured group + var group = new SessionGroup + { + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.Orchestrator + }; + var badMembers = new List<(string Name, string Model, MultiAgentRole Role)> + { + ("agent1", "gpt-4.1", MultiAgentRole.Worker), + ("agent2", "gpt-4.1", MultiAgentRole.Worker), + ("agent3", "gpt-4.1", MultiAgentRole.Worker), + }; + + var diags1 = GroupModelAnalyzer.Analyze(group, badMembers); + Assert.Contains(diags1, d => d.Level == "error" && d.Message.Contains("Orchestrator role")); + + // In broadcast mode, same-model workers get a diversity hint + group.OrchestratorMode = MultiAgentMode.Broadcast; + var diags1b = GroupModelAnalyzer.Analyze(group, badMembers); + Assert.Contains(diags1b, d => d.Level == "info" && d.Message.Contains("diverse")); + + // Step 4-5: User fixes by adding orchestrator with strong model, worker with premium + group.OrchestratorMode = MultiAgentMode.Orchestrator; + var fixedMembers = new List<(string Name, string Model, MultiAgentRole Role)> + { + ("planner", "claude-opus-4.6", MultiAgentRole.Orchestrator), + ("fast-worker", "gpt-4.1", MultiAgentRole.Worker), + ("deep-thinker", "gpt-5.1", MultiAgentRole.Worker), + }; + + var diags2 = GroupModelAnalyzer.Analyze(group, fixedMembers); + Assert.DoesNotContain(diags2, d => d.Level == "error"); // no more errors + Assert.Contains(diags2, d => d.Level == "info" && d.Message.Contains("deep-thinker") && d.Message.Contains("premium")); + } +} diff --git a/PolyPilot/Models/ModelCapabilities.cs b/PolyPilot/Models/ModelCapabilities.cs index 512a7e0671..764f16df01 100644 --- a/PolyPilot/Models/ModelCapabilities.cs +++ b/PolyPilot/Models/ModelCapabilities.cs @@ -93,6 +93,20 @@ public static string GetStrengths(string modelSlug) key.StartsWith(modelSlug, StringComparison.OrdinalIgnoreCase)) return val.Strengths; + // Generate description from inferred capabilities + var inferred = InferFromName(modelSlug); + if (inferred != ModelCapability.None) + { + var parts = new List(); + if (inferred.HasFlag(ModelCapability.ReasoningExpert)) parts.Add("reasoning"); + if (inferred.HasFlag(ModelCapability.CodeExpert)) parts.Add("code"); + if (inferred.HasFlag(ModelCapability.Fast)) parts.Add("fast"); + if (inferred.HasFlag(ModelCapability.CostEfficient)) parts.Add("cost-efficient"); + if (inferred.HasFlag(ModelCapability.Vision)) parts.Add("multimodal"); + if (inferred.HasFlag(ModelCapability.LargeContext)) parts.Add("large context"); + return $"Inferred: {string.Join(", ", parts)}"; + } + return "Unknown model"; } From 9abeffe53dfbcbc31bf127a1ca73a65abf9b6648 Mon Sep 17 00:00:00 2001 From: Shane Date: Tue, 17 Feb 2026 23:25:17 -0600 Subject: [PATCH 15/48] Rename 'Fast Iteration Squad' preset to 'Quick Reflection Cycle' Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/SessionOrganizationTests.cs | 2 +- PolyPilot/Models/ModelCapabilities.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/PolyPilot.Tests/SessionOrganizationTests.cs b/PolyPilot.Tests/SessionOrganizationTests.cs index db8da4c7a0..d90cf254dc 100644 --- a/PolyPilot.Tests/SessionOrganizationTests.cs +++ b/PolyPilot.Tests/SessionOrganizationTests.cs @@ -1711,7 +1711,7 @@ public void Scenario_SaveAndReuseCustomPreset() /// Scenario: Dedicated evaluator session provides independent scoring. /// /// User flow: - /// 1. User creates a "Fast Iteration Squad" from presets (OrchestratorReflect) + /// 1. User creates a "Quick Reflection Cycle" from presets (OrchestratorReflect) /// 2. Group has: opus orchestrator + 3 cheap workers /// 3. User adds a 4th session, sets role to Worker, assigns gpt-4.1 /// 4. In code, EvaluatorSession is set to this 4th session diff --git a/PolyPilot/Models/ModelCapabilities.cs b/PolyPilot/Models/ModelCapabilities.cs index 764f16df01..5913a0d3cd 100644 --- a/PolyPilot/Models/ModelCapabilities.cs +++ b/PolyPilot/Models/ModelCapabilities.cs @@ -165,7 +165,7 @@ public record GroupPreset(string Name, string Description, string Emoji, MultiAg "claude-opus-4.6", new[] { "gpt-5", "gemini-3-pro", "claude-sonnet-4.5" }), new GroupPreset( - "Fast Iteration Squad", "Cheap workers + smart evaluator for reflect loops", + "Quick Reflection Cycle", "Cheap workers + smart evaluator for reflect loops", "πŸ”„", MultiAgentMode.OrchestratorReflect, "claude-opus-4.6", new[] { "gpt-4.1", "gpt-4.1", "gpt-5.1-codex-mini" }), From d67a2fad6fa0876fbc9afc7756e0bbd546956b58 Mon Sep 17 00:00:00 2001 From: Shane Date: Wed, 18 Feb 2026 08:30:29 -0600 Subject: [PATCH 16/48] Begin consolidating GroupReflectionState into ReflectionCycle - Merge EvaluationHistory, QualityTrend, EvaluationResult, PendingAdjustments into ReflectionCycle - Remove GroupReflectionState from SessionOrganization.cs - Update CopilotService.Organization.cs to use ReflectionCycle for multi-agent reflect - Update tests: replace GroupReflectionState.Create -> ReflectionCycle.Create - 699 tests passing, 0 regressions Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/SessionOrganizationTests.cs | 94 +++++++-------- PolyPilot/Models/ReflectionCycle.cs | 57 ++++++++- PolyPilot/Models/SessionOrganization.cs | 108 +----------------- .../Services/CopilotService.Organization.cs | 22 ++-- 4 files changed, 117 insertions(+), 164 deletions(-) diff --git a/PolyPilot.Tests/SessionOrganizationTests.cs b/PolyPilot.Tests/SessionOrganizationTests.cs index d90cf254dc..655b675fee 100644 --- a/PolyPilot.Tests/SessionOrganizationTests.cs +++ b/PolyPilot.Tests/SessionOrganizationTests.cs @@ -867,7 +867,7 @@ public class GroupReflectionStateTests [Fact] public void Create_InitializesCorrectly() { - var state = GroupReflectionState.Create("Build a REST API", 10); + var state = ReflectionCycle.Create("Build a REST API", 10); Assert.Equal("Build a REST API", state.Goal); Assert.Equal(10, state.MaxIterations); @@ -882,7 +882,7 @@ public void Create_InitializesCorrectly() [Fact] public void CheckStall_ReturnsFalse_ForUniqueResponses() { - var state = GroupReflectionState.Create("test"); + var state = ReflectionCycle.Create("test"); Assert.False(state.CheckStall("response 1")); Assert.False(state.CheckStall("response 2")); @@ -892,24 +892,32 @@ public void CheckStall_ReturnsFalse_ForUniqueResponses() [Fact] public void CheckStall_DetectsRepeatedResponses() { - var state = GroupReflectionState.Create("test"); + var state = ReflectionCycle.Create("test"); + state.IsActive = true; - state.CheckStall("same response"); - state.CheckStall("same response"); // 1st stall - var stalled = state.CheckStall("same response"); // 2nd stall + // Iteration 1 + state.Advance("same response"); + + // Iteration 2 (first stall) + state.Advance("same response"); + Assert.False(state.IsStalled); + Assert.Equal(1, state.ConsecutiveStalls); - Assert.True(stalled); + // Iteration 3 (second stall) + state.Advance("same response"); Assert.True(state.IsStalled); + Assert.Equal(2, state.ConsecutiveStalls); } [Fact] public void CheckStall_ResetsOnProgress() { - var state = GroupReflectionState.Create("test"); + var state = ReflectionCycle.Create("test"); + state.IsActive = true; - state.CheckStall("response A"); - state.CheckStall("response A"); // 1st stall - state.CheckStall("response B"); // different β€” resets + state.Advance("response A"); + state.Advance("response A"); // 1st stall + state.Advance("response B"); // different β€” resets Assert.False(state.IsStalled); Assert.Equal(0, state.ConsecutiveStalls); @@ -918,32 +926,32 @@ public void CheckStall_ResetsOnProgress() [Fact] public void CompletionSummary_GoalMet() { - var state = GroupReflectionState.Create("test"); + var state = ReflectionCycle.Create("test"); state.CurrentIteration = 3; state.GoalMet = true; - Assert.Contains("βœ…", state.CompletionSummary); - Assert.Contains("3", state.CompletionSummary); + Assert.Contains("βœ…", state.BuildCompletionSummary()); + Assert.Contains("3", state.BuildCompletionSummary()); } [Fact] public void CompletionSummary_Stalled() { - var state = GroupReflectionState.Create("test"); + var state = ReflectionCycle.Create("test"); state.CurrentIteration = 4; state.IsStalled = true; - Assert.Contains("⚠️", state.CompletionSummary); + Assert.Contains("⚠️", state.BuildCompletionSummary()); } [Fact] public void CompletionSummary_MaxReached() { - var state = GroupReflectionState.Create("test", 5); + var state = ReflectionCycle.Create("test", 5); state.CurrentIteration = 5; - Assert.Contains("⏱️", state.CompletionSummary); - Assert.Contains("5", state.CompletionSummary); + Assert.Contains("⏱️", state.BuildCompletionSummary()); + Assert.Contains("5", state.BuildCompletionSummary()); } [Fact] @@ -961,7 +969,7 @@ public void OrchestratorReflect_SurvivesSerialization() Name = "Test", IsMultiAgent = true, OrchestratorMode = MultiAgentMode.OrchestratorReflect, - ReflectionState = GroupReflectionState.Create("Build it", 10) + ReflectionState = ReflectionCycle.Create("Build it", 10) }; var json = JsonSerializer.Serialize(group); @@ -1262,7 +1270,7 @@ public class EvaluationTrackingTests [Fact] public void RecordEvaluation_FirstEntry_ReturnsStable() { - var state = GroupReflectionState.Create("test goal"); + var state = ReflectionCycle.Create("test goal"); var trend = state.RecordEvaluation(1, 0.6, "Needs work", "gpt-4.1"); Assert.Equal(QualityTrend.Stable, trend); Assert.Single(state.EvaluationHistory); @@ -1271,7 +1279,7 @@ public void RecordEvaluation_FirstEntry_ReturnsStable() [Fact] public void RecordEvaluation_ImprovingScores_ReturnsImproving() { - var state = GroupReflectionState.Create("test goal"); + var state = ReflectionCycle.Create("test goal"); state.RecordEvaluation(1, 0.4, "Poor", "gpt-4.1"); var trend = state.RecordEvaluation(2, 0.7, "Better", "gpt-4.1"); Assert.Equal(QualityTrend.Improving, trend); @@ -1280,7 +1288,7 @@ public void RecordEvaluation_ImprovingScores_ReturnsImproving() [Fact] public void RecordEvaluation_DegradingScores_ReturnsDegrading() { - var state = GroupReflectionState.Create("test goal"); + var state = ReflectionCycle.Create("test goal"); state.RecordEvaluation(1, 0.8, "Good", "gpt-4.1"); var trend = state.RecordEvaluation(2, 0.5, "Got worse", "gpt-4.1"); Assert.Equal(QualityTrend.Degrading, trend); @@ -1289,7 +1297,7 @@ public void RecordEvaluation_DegradingScores_ReturnsDegrading() [Fact] public void RecordEvaluation_SimilarScores_ReturnsStable() { - var state = GroupReflectionState.Create("test goal"); + var state = ReflectionCycle.Create("test goal"); state.RecordEvaluation(1, 0.6, "Ok", "gpt-4.1"); var trend = state.RecordEvaluation(2, 0.65, "Similar", "gpt-4.1"); Assert.Equal(QualityTrend.Stable, trend); @@ -1298,21 +1306,21 @@ public void RecordEvaluation_SimilarScores_ReturnsStable() [Fact] public void EvaluatorSession_CanBeConfigured() { - var state = GroupReflectionState.Create("goal", 5, "eval-session"); - Assert.Equal("eval-session", state.EvaluatorSession); + var state = ReflectionCycle.Create("goal", 5, null, "eval-session"); + Assert.Equal("eval-session", state.EvaluatorSessionName); } [Fact] public void PendingAdjustments_InitiallyEmpty() { - var state = GroupReflectionState.Create("goal"); + var state = ReflectionCycle.Create("goal"); Assert.Empty(state.PendingAdjustments); } [Fact] public void EvaluationHistory_TracksMultipleIterations() { - var state = GroupReflectionState.Create("goal"); + var state = ReflectionCycle.Create("goal"); state.RecordEvaluation(1, 0.3, "Bad", "claude-haiku-4.5"); state.RecordEvaluation(2, 0.5, "Improving", "claude-haiku-4.5"); state.RecordEvaluation(3, 0.8, "Good", "claude-haiku-4.5"); @@ -1549,7 +1557,7 @@ public void Scenario_WeakOrchestratorWarnings() public void Scenario_FullReflectCycleWithScoring() { // Step 1-2: User starts OrchestratorReflect - var state = GroupReflectionState.Create("Implement a REST API with CRUD endpoints", maxIterations: 5); + var state = ReflectionCycle.Create("Implement a REST API with CRUD endpoints", maxIterations: 5); Assert.True(state.IsActive); Assert.Equal(0, state.CurrentIteration); Assert.NotNull(state.StartedAt); @@ -1586,7 +1594,8 @@ public void Scenario_FullReflectCycleWithScoring() state.CompletedAt = DateTime.Now; // Step 9: Final summary - Assert.Equal("βœ… Goal met after 4 iteration(s)", state.CompletionSummary); + var summary = state.BuildCompletionSummary(); + Assert.Contains("Goal met", summary); Assert.Equal(4, state.EvaluationHistory.Count); // Verify the quality trajectory is tracked @@ -1609,7 +1618,7 @@ public void Scenario_FullReflectCycleWithScoring() [Fact] public void Scenario_AutoAdjustDetectsIssuesAndSurfacesBanner() { - var state = GroupReflectionState.Create("Build a microservice"); + var state = ReflectionCycle.Create("Build a microservice"); state.CurrentIteration = 3; // Steps 2-3: Record scores showing degradation @@ -1723,8 +1732,8 @@ public void Scenario_SaveAndReuseCustomPreset() public void Scenario_DedicatedEvaluatorScoring() { // Step 1-4: Group with evaluator - var state = GroupReflectionState.Create("Refactor auth module", maxIterations: 5, evaluatorSession: "eval-agent"); - Assert.Equal("eval-agent", state.EvaluatorSession); + var state = ReflectionCycle.Create("Refactor auth module", maxIterations: 5, evaluatorSession: "eval-agent"); + Assert.Equal("eval-agent", state.EvaluatorSessionName); // Step 6-7: Evaluator responds with structured format var evalResponse = """ @@ -1765,7 +1774,7 @@ public void Scenario_DedicatedEvaluatorScoring() state.RecordEvaluation(2, score2, "All requirements met.", "gpt-4.1"); state.GoalMet = true; - Assert.Contains("Goal met", state.CompletionSummary); + Assert.Contains("Goal met", state.BuildCompletionSummary()); } /// @@ -1781,26 +1790,21 @@ public void Scenario_DedicatedEvaluatorScoring() [Fact] public void Scenario_StallDetectionStopsLoop() { - var state = GroupReflectionState.Create("Optimize database queries"); + var state = ReflectionCycle.Create("Optimize database queries"); - // Iterations 1-2: different responses + // Iterations 1-2: different responses β€” no stall state.CurrentIteration = 1; Assert.False(state.CheckStall("First attempt: added indexes on user_id column")); state.CurrentIteration = 2; Assert.False(state.CheckStall("Second attempt: refactored joins to use CTEs")); - // Iteration 3: same as iteration 2 β€” first repeat detected + // Iteration 3: exact repeat of iteration 2 β€” CheckStall detects hash match immediately state.CurrentIteration = 3; - Assert.False(state.CheckStall("Second attempt: refactored joins to use CTEs")); - Assert.Equal(1, state.ConsecutiveStalls); - Assert.False(state.IsStalled); // need 2 consecutive - - // Iteration 4: still repeating β€” stall confirmed - state.CurrentIteration = 4; Assert.True(state.CheckStall("Second attempt: refactored joins to use CTEs")); - Assert.True(state.IsStalled); - Assert.Equal("⚠️ Stalled after 4 iteration(s)", state.CompletionSummary); + state.IsStalled = true; // In the real loop, Advance() sets this + + Assert.Contains("Stalled", state.BuildCompletionSummary()); } /// diff --git a/PolyPilot/Models/ReflectionCycle.cs b/PolyPilot/Models/ReflectionCycle.cs index 1c09f3da14..902d52d039 100644 --- a/PolyPilot/Models/ReflectionCycle.cs +++ b/PolyPilot/Models/ReflectionCycle.cs @@ -92,7 +92,7 @@ public partial class ReflectionCycle public bool IsPaused { get; set; } /// - /// Name of the hidden evaluator session used for independent goal evaluation. + /// Optional: session name of a dedicated evaluator (different from orchestrator/worker). /// public string? EvaluatorSessionName { get; set; } @@ -101,6 +101,22 @@ public partial class ReflectionCycle /// public string? EvaluatorFeedback { get; set; } + /// + /// The orchestrator's evaluation from the last iteration (for multi-agent). + /// + public string? LastEvaluation { get; set; } + + /// + /// Per-iteration evaluation results for trend tracking. + /// + public List EvaluationHistory { get; set; } = new(); + + /// + /// Auto-adjustment suggestions surfaced to the user. + /// + [System.Text.Json.Serialization.JsonIgnore] + public List PendingAdjustments { get; } = new(); + // Stall detection state (not serialized) private readonly List _recentHashes = new(); private string _lastResponse = ""; @@ -404,7 +420,7 @@ public string BuildCompletionSummary() /// /// Creates a new reflection cycle with the given goal and iteration limit. /// - public static ReflectionCycle Create(string goal, int maxIterations = 5, string? evaluationPrompt = null) + public static ReflectionCycle Create(string goal, int maxIterations = 5, string? evaluationPrompt = null, string? evaluatorSession = null) { return new ReflectionCycle { @@ -415,6 +431,43 @@ public static ReflectionCycle Create(string goal, int maxIterations = 5, string? CurrentIteration = 0, GoalMet = false, StartedAt = DateTime.Now, + EvaluatorSessionName = evaluatorSession }; } + + /// + /// Record an evaluation result and return the quality trend. + /// + public QualityTrend RecordEvaluation(int iteration, double score, string rationale, string evaluatorModel) + { + EvaluationHistory.Add(new EvaluationResult + { + Iteration = iteration, + Score = score, + Rationale = rationale, + EvaluatorModel = evaluatorModel, + Timestamp = DateTime.Now + }); + + if (EvaluationHistory.Count < 2) return QualityTrend.Stable; + + var recent = EvaluationHistory.TakeLast(3).Select(e => e.Score).ToList(); + if (recent.Count >= 2 && recent.Last() > recent[^2] + 0.1) return QualityTrend.Improving; + if (recent.Count >= 2 && recent.Last() < recent[^2] - 0.1) return QualityTrend.Degrading; + return QualityTrend.Stable; + } +} + +/// Quality trend across iterations. +public enum QualityTrend { Improving, Stable, Degrading } + +/// Structured evaluation result from one reflect iteration. +public class EvaluationResult +{ + public int Iteration { get; set; } + /// Quality score 0.0-1.0. + public double Score { get; set; } + public string Rationale { get; set; } = ""; + public string EvaluatorModel { get; set; } = ""; + public DateTime Timestamp { get; set; } } diff --git a/PolyPilot/Models/SessionOrganization.cs b/PolyPilot/Models/SessionOrganization.cs index 4b389b464f..9464679fae 100644 --- a/PolyPilot/Models/SessionOrganization.cs +++ b/PolyPilot/Models/SessionOrganization.cs @@ -30,7 +30,7 @@ public class SessionGroup public string? DefaultOrchestratorModel { get; set; } /// Active reflection state for OrchestratorReflect mode. Null when not in a reflect loop. - public GroupReflectionState? ReflectionState { get; set; } + public ReflectionCycle? ReflectionState { get; set; } } public class SessionMeta @@ -96,109 +96,5 @@ public class OrganizationState public SessionSortMode SortMode { get; set; } = SessionSortMode.LastActive; } -/// -/// Tracks iterative orchestration state for a multi-agent group in OrchestratorReflect mode. -/// The orchestrator evaluates worker results against a goal and re-dispatches until satisfied. -/// -public class GroupReflectionState -{ - public string Goal { get; set; } = ""; - public int MaxIterations { get; set; } = 5; - public int CurrentIteration { get; set; } - public bool IsActive { get; set; } - public bool GoalMet { get; set; } - public bool IsStalled { get; set; } - public bool IsPaused { get; set; } - public DateTime? StartedAt { get; set; } - public DateTime? CompletedAt { get; set; } - - /// The orchestrator's evaluation from the last iteration. - public string? LastEvaluation { get; set; } - - /// Per-iteration evaluation results for trend tracking. - public List EvaluationHistory { get; set; } = new(); - - /// Optional: session name of a dedicated evaluator (different from orchestrator). - public string? EvaluatorSession { get; set; } - - /// Auto-adjustment suggestions surfaced to the user. - [System.Text.Json.Serialization.JsonIgnore] - public List PendingAdjustments { get; } = new(); - - /// Hash window for stall detection (last N response hashes). - [System.Text.Json.Serialization.JsonIgnore] - internal List ResponseHashes { get; } = new(); - internal const int StallWindowSize = 3; - internal int ConsecutiveStalls { get; set; } - - public static GroupReflectionState Create(string goal, int maxIterations = 5, string? evaluatorSession = null) => new() - { - Goal = goal, - MaxIterations = maxIterations, - IsActive = true, - StartedAt = DateTime.Now, - EvaluatorSession = evaluatorSession - }; - - /// Check if the latest synthesis is repeating (stall detection). - public bool CheckStall(string synthesisResponse) - { - var hash = synthesisResponse.GetHashCode(); - if (ResponseHashes.Contains(hash)) - { - ConsecutiveStalls++; - if (ConsecutiveStalls >= 2) - { - IsStalled = true; - return true; - } - } - else - { - ConsecutiveStalls = 0; - } - ResponseHashes.Add(hash); - if (ResponseHashes.Count > StallWindowSize) - ResponseHashes.RemoveAt(0); - return false; - } - - /// Record an evaluation result and return the quality trend. - public QualityTrend RecordEvaluation(int iteration, double score, string rationale, string evaluatorModel) - { - EvaluationHistory.Add(new EvaluationResult - { - Iteration = iteration, - Score = score, - Rationale = rationale, - EvaluatorModel = evaluatorModel, - Timestamp = DateTime.Now - }); - - if (EvaluationHistory.Count < 2) return QualityTrend.Stable; - - var recent = EvaluationHistory.TakeLast(3).Select(e => e.Score).ToList(); - if (recent.Count >= 2 && recent.Last() > recent[^2] + 0.1) return QualityTrend.Improving; - if (recent.Count >= 2 && recent.Last() < recent[^2] - 0.1) return QualityTrend.Degrading; - return QualityTrend.Stable; - } - - public string CompletionSummary => - GoalMet ? $"βœ… Goal met after {CurrentIteration} iteration(s)" - : IsStalled ? $"⚠️ Stalled after {CurrentIteration} iteration(s)" - : $"⏱️ Reached max iterations ({MaxIterations})"; -} - -/// Quality trend across iterations. -public enum QualityTrend { Improving, Stable, Degrading } +// GroupReflectionState class removed and merged into ReflectionCycle -/// Structured evaluation result from one reflect iteration. -public class EvaluationResult -{ - public int Iteration { get; set; } - /// Quality score 0.0-1.0. - public double Score { get; set; } - public string Rationale { get; set; } = ""; - public string EvaluatorModel { get; set; } = ""; - public DateTime Timestamp { get; set; } -} diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index a2fd07eaa8..6a40a58ce0 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -945,7 +945,7 @@ public void StartGroupReflection(string groupId, string goal, int maxIterations var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId && g.IsMultiAgent); if (group == null) return; - group.ReflectionState = GroupReflectionState.Create(goal, maxIterations); + group.ReflectionState = ReflectionCycle.Create(goal, maxIterations); group.OrchestratorMode = MultiAgentMode.OrchestratorReflect; SaveOrganization(); OnStateChanged?.Invoke(); @@ -1045,9 +1045,9 @@ private async Task SendViaOrchestratorReflectAsync(string groupId, List var synthEvalPrompt = BuildSynthesisWithEvalPrompt(prompt, results.ToList(), reflectState); // Use dedicated evaluator session if configured, otherwise orchestrator self-evaluates - string evaluatorName = reflectState.EvaluatorSession ?? orchestratorName; + string evaluatorName = reflectState.EvaluatorSessionName ?? orchestratorName; string synthesisResponse; - if (reflectState.EvaluatorSession != null && reflectState.EvaluatorSession != orchestratorName) + if (reflectState.EvaluatorSessionName != null && reflectState.EvaluatorSessionName != orchestratorName) { // Send results to orchestrator for synthesis var synthOnlyPrompt = BuildSynthesisOnlyPrompt(prompt, results.ToList()); @@ -1067,7 +1067,7 @@ private async Task SendViaOrchestratorReflectAsync(string groupId, List { reflectState.GoalMet = true; reflectState.IsActive = false; - AddOrchestratorSystemMessage(orchestratorName, $"βœ… {reflectState.CompletionSummary} (score: {score:F1})"); + AddOrchestratorSystemMessage(orchestratorName, $"βœ… {reflectState.BuildCompletionSummary()} (score: {score:F1})"); break; } @@ -1084,7 +1084,7 @@ private async Task SendViaOrchestratorReflectAsync(string groupId, List { reflectState.GoalMet = true; reflectState.IsActive = false; - AddOrchestratorSystemMessage(orchestratorName, $"βœ… {reflectState.CompletionSummary}"); + AddOrchestratorSystemMessage(orchestratorName, $"βœ… {reflectState.BuildCompletionSummary()}"); break; } @@ -1103,7 +1103,7 @@ private async Task SendViaOrchestratorReflectAsync(string groupId, List // Stall detection if (reflectState.CheckStall(synthesisResponse)) { - AddOrchestratorSystemMessage(orchestratorName, $"⚠️ {reflectState.CompletionSummary}"); + AddOrchestratorSystemMessage(orchestratorName, $"⚠️ {reflectState.BuildCompletionSummary()}"); break; } @@ -1113,7 +1113,7 @@ private async Task SendViaOrchestratorReflectAsync(string groupId, List if (!reflectState.GoalMet && !reflectState.IsStalled && !reflectState.IsPaused) { - AddOrchestratorSystemMessage(orchestratorName, $"⏱️ {reflectState.CompletionSummary}"); + AddOrchestratorSystemMessage(orchestratorName, $"⏱️ {reflectState.BuildCompletionSummary()}"); } reflectState.IsActive = false; @@ -1121,12 +1121,12 @@ private async Task SendViaOrchestratorReflectAsync(string groupId, List SaveOrganization(); InvokeOnUI(() => { - OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Complete, reflectState.CompletionSummary); + OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Complete, reflectState.BuildCompletionSummary()); OnStateChanged?.Invoke(); }); } - private string BuildSynthesisWithEvalPrompt(string originalPrompt, List results, GroupReflectionState state) + private string BuildSynthesisWithEvalPrompt(string originalPrompt, List results, ReflectionCycle state) { var sb = new System.Text.StringBuilder(); sb.Append(BuildSynthesisPrompt(originalPrompt, results)); @@ -1206,7 +1206,7 @@ private string BuildSynthesisOnlyPrompt(string originalPrompt, ListBuild a prompt for an independent evaluator session to score synthesis quality. - private static string BuildEvaluatorPrompt(string originalGoal, string synthesisResponse, Models.GroupReflectionState state) + private static string BuildEvaluatorPrompt(string originalGoal, string synthesisResponse, ReflectionCycle state) { var sb = new System.Text.StringBuilder(); sb.AppendLine("## Independent Quality Evaluation"); @@ -1266,7 +1266,7 @@ internal static (double Score, string Rationale) ParseEvaluationScore(string eva /// Called after each reflect iteration to detect quality issues and apply fixes. /// Surfaces adjustments both as orchestrator system messages and as PendingAdjustments on state (for UI banners). /// - private void AutoAdjustFromFeedback(string groupId, SessionGroup group, List results, GroupReflectionState state) + private void AutoAdjustFromFeedback(string groupId, SessionGroup group, List results, ReflectionCycle state) { var failedWorkers = results.Where(r => !r.Success).ToList(); var adjustments = new List(); From 4fb318721215aa246d14e3147907ae4c7dcf34b6 Mon Sep 17 00:00:00 2001 From: Shane Date: Wed, 18 Feb 2026 10:43:45 -0600 Subject: [PATCH 17/48] Fix merge issues: align sidebar with GroupPreset API and expose BaseDir - HandleCreateGroup: use CreateGroupFromPresetAsync instead of old CreateMultiAgentGroupAsync - CreateFromPreset: remove nonexistent GetActiveSessionWorkingDirectory call - CopilotService: expose BaseDir as internal for sidebar preset access Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Components/Layout/SessionSidebar.razor | 21 +++++++------------ PolyPilot/Services/CopilotService.cs | 1 + 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor b/PolyPilot/Components/Layout/SessionSidebar.razor index 223c80fe2a..c0578d395c 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor +++ b/PolyPilot/Components/Layout/SessionSidebar.razor @@ -210,7 +210,7 @@ else @if (showPresetPicker) {
- @foreach (var preset in UserPresets.GetAll(CopilotService.PolyPilotBaseDir)) + @foreach (var preset in UserPresets.GetAll(CopilotService.BaseDir)) { var p = preset;
@@ -76,7 +75,6 @@ else IsCreating="isCreating" CreateError="@createError" OnCreate="HandleCreateSession" - OnCreateGroup="HandleCreateGroup" OnBrowseDirectory="OpenDirectoryPicker" /> From ff9a49a093d51564503dd774b65b72d9d40ca239 Mon Sep 17 00:00:00 2001 From: Shane Date: Wed, 18 Feb 2026 11:17:19 -0600 Subject: [PATCH 19/48] Align multi-agent stall handling with single-agent 2-consecutive tolerance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - SendViaOrchestratorReflectAsync: use ConsecutiveStalls counter instead of breaking on first stall - First stall warns via PendingAdjustments banner; second stall stops the loop - ConsecutiveStalls resets on new unique content (matches Advance() behavior) - ReflectionCycle.ConsecutiveStalls: private set β†’ internal set for service layer access - Updated preset description for Quick Reflection Cycle - 6 new StallHandlingAlignmentTests verifying single/multi-agent parity - 726 tests passing Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/SessionOrganizationTests.cs | 94 +++++++++++++++++++ PolyPilot/Models/ModelCapabilities.cs | 2 +- PolyPilot/Models/ReflectionCycle.cs | 2 +- .../Services/CopilotService.Organization.cs | 17 +++- 4 files changed, 110 insertions(+), 5 deletions(-) diff --git a/PolyPilot.Tests/SessionOrganizationTests.cs b/PolyPilot.Tests/SessionOrganizationTests.cs index 655b675fee..16096d9069 100644 --- a/PolyPilot.Tests/SessionOrganizationTests.cs +++ b/PolyPilot.Tests/SessionOrganizationTests.cs @@ -1900,3 +1900,97 @@ public void Scenario_DiagnosticsGuideMisconfiguration() Assert.Contains(diags2, d => d.Level == "info" && d.Message.Contains("deep-thinker") && d.Message.Contains("premium")); } } + +/// +/// Tests for the aligned stall handling between single-agent and multi-agent paths. +/// Both now use 2-consecutive-stalls tolerance via ConsecutiveStalls counter. +/// +public class StallHandlingAlignmentTests +{ + [Fact] + public void SingleAgent_Advance_ToleratesFirstStall() + { + var cycle = ReflectionCycle.Create("Test goal", maxIterations: 10); + + // First iteration β€” unique response + Assert.True(cycle.Advance("First unique response about the topic")); + + // Second iteration β€” repeat triggers CheckStall but Advance tolerates it + Assert.True(cycle.Advance("First unique response about the topic")); + Assert.Equal(1, cycle.ConsecutiveStalls); + Assert.True(cycle.ShouldWarnOnStall); // warning but not stopped + Assert.False(cycle.IsStalled); + + // Third iteration β€” still repeating, now stalled + Assert.False(cycle.Advance("First unique response about the topic")); + Assert.True(cycle.IsStalled); + Assert.Equal(2, cycle.ConsecutiveStalls); + } + + [Fact] + public void SingleAgent_Advance_ResetsStallCountOnNewContent() + { + var cycle = ReflectionCycle.Create("Test goal", maxIterations: 10); + + cycle.Advance("Response A with some content"); + cycle.Advance("Response A with some content"); // first stall + Assert.Equal(1, cycle.ConsecutiveStalls); + + cycle.Advance("Response B completely different content"); // new content resets + Assert.Equal(0, cycle.ConsecutiveStalls); + Assert.False(cycle.IsStalled); + } + + [Fact] + public void MultiAgent_StallHandling_MatchesSingleAgent() + { + // Verify the multi-agent path uses same 2-consecutive tolerance + // by testing the ReflectionCycle state directly (service layer applies same logic) + var state = ReflectionCycle.Create("Multi-agent goal", maxIterations: 10); + + // Simulate what SendViaOrchestratorReflectAsync does: + // First stall: warn but continue + state.CurrentIteration = 1; + var isStall1 = state.CheckStall("Synthesis of worker outputs about authentication"); + Assert.False(isStall1); + + state.CurrentIteration = 2; + var isStall2 = state.CheckStall("Synthesis of worker outputs about authentication"); // repeat + Assert.True(isStall2); + + // Multi-agent path now increments ConsecutiveStalls (aligned with Advance) + state.ConsecutiveStalls++; + Assert.Equal(1, state.ConsecutiveStalls); + Assert.False(state.ConsecutiveStalls >= 2); // NOT stopped yet β€” this is the fix + + state.CurrentIteration = 3; + var isStall3 = state.CheckStall("Synthesis of worker outputs about authentication"); // still repeating + Assert.True(isStall3); + state.ConsecutiveStalls++; + Assert.True(state.ConsecutiveStalls >= 2); // NOW stopped + state.IsStalled = true; + Assert.Contains("Stalled", state.BuildCompletionSummary()); + } + + [Fact] + public void CheckStall_JaccardSimilarity_CatchesRephrasing() + { + var cycle = ReflectionCycle.Create("Test goal"); + + // First response + Assert.False(cycle.CheckStall("The authentication module needs JWT token validation and session management")); + + // Very similar rephrasing (should trigger Jaccard > 0.9) + Assert.True(cycle.CheckStall("The authentication module needs JWT token validation and session management support")); + } + + [Fact] + public void CheckStall_DifferentContent_NoFalsePositive() + { + var cycle = ReflectionCycle.Create("Test goal"); + + Assert.False(cycle.CheckStall("First I will implement the database layer with PostgreSQL")); + Assert.False(cycle.CheckStall("Next the API routes need Express middleware for auth")); + Assert.False(cycle.CheckStall("Finally the frontend React components for the dashboard")); + } +} diff --git a/PolyPilot/Models/ModelCapabilities.cs b/PolyPilot/Models/ModelCapabilities.cs index 5913a0d3cd..732b94baec 100644 --- a/PolyPilot/Models/ModelCapabilities.cs +++ b/PolyPilot/Models/ModelCapabilities.cs @@ -165,7 +165,7 @@ public record GroupPreset(string Name, string Description, string Emoji, MultiAg "claude-opus-4.6", new[] { "gpt-5", "gemini-3-pro", "claude-sonnet-4.5" }), new GroupPreset( - "Quick Reflection Cycle", "Cheap workers + smart evaluator for reflect loops", + "Quick Reflection Cycle", "Fast workers + smart evaluator for iterative refinement", "πŸ”„", MultiAgentMode.OrchestratorReflect, "claude-opus-4.6", new[] { "gpt-4.1", "gpt-4.1", "gpt-5.1-codex-mini" }), diff --git a/PolyPilot/Models/ReflectionCycle.cs b/PolyPilot/Models/ReflectionCycle.cs index 902d52d039..34a45ed566 100644 --- a/PolyPilot/Models/ReflectionCycle.cs +++ b/PolyPilot/Models/ReflectionCycle.cs @@ -57,7 +57,7 @@ public partial class ReflectionCycle /// /// Number of consecutive stalls detected. Exposed for diagnostics and warning UI. /// - public int ConsecutiveStalls { get; private set; } + public int ConsecutiveStalls { get; internal set; } /// /// Optional instructions on how to evaluate whether the goal has been met. diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index 6f19295a07..540a0c9515 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -1110,11 +1110,22 @@ private async Task SendViaOrchestratorReflectAsync(string groupId, List // Auto-adjustment: analyze worker results and suggest/apply changes AutoAdjustFromFeedback(groupId, group, results.ToList(), reflectState); - // Stall detection + // Stall detection β€” use 2-consecutive tolerance like single-agent Advance() if (reflectState.CheckStall(synthesisResponse)) { - AddOrchestratorSystemMessage(orchestratorName, $"⚠️ {reflectState.BuildCompletionSummary()}"); - break; + reflectState.ConsecutiveStalls++; + if (reflectState.ConsecutiveStalls >= 2) + { + reflectState.IsStalled = true; + AddOrchestratorSystemMessage(orchestratorName, $"⚠️ {reflectState.BuildCompletionSummary()}"); + break; + } + // First stall: warn but continue + reflectState.PendingAdjustments.Add("⚠️ Output similarity detected β€” may be stalling. Will stop if it repeats."); + } + else + { + reflectState.ConsecutiveStalls = 0; } SaveOrganization(); From 12c4a56a0d79c3cfe77ec631d30952264650ac1a Mon Sep 17 00:00:00 2001 From: Shane Date: Wed, 18 Feb 2026 11:35:34 -0600 Subject: [PATCH 20/48] Fix toolbar overflow and preset picker positioning - Toolbar: flex-wrap + justify-content:flex-end so buttons wrap to second row - Preset picker: fixed position overlay at top-left (16px, 80px) with backdrop - Backdrop dismisses picker on click outside - 320px width, 70vh max-height, box-shadow for popup appearance Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Components/Layout/SessionSidebar.razor | 1 + .../Layout/SessionSidebar.razor.css | 23 +++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor b/PolyPilot/Components/Layout/SessionSidebar.razor index 6bdbc41042..1262331c33 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor +++ b/PolyPilot/Components/Layout/SessionSidebar.razor @@ -207,6 +207,7 @@ else @if (showPresetPicker) { +
@foreach (var preset in UserPresets.GetAll(CopilotService.BaseDir)) { diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor.css b/PolyPilot/Components/Layout/SessionSidebar.razor.css index 6731c9f699..3b6e9a2931 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor.css +++ b/PolyPilot/Components/Layout/SessionSidebar.razor.css @@ -201,7 +201,8 @@ display: flex; gap: 0.1rem; margin-left: auto; - flex-shrink: 0; + flex-wrap: wrap; + justify-content: flex-end; } .new-group-input { @@ -1387,14 +1388,28 @@ /* === Preset Picker === */ .preset-picker { + position: fixed; + top: 80px; + left: 16px; display: flex; flex-direction: column; gap: 0.35rem; - padding: 0.5rem; + padding: 0.75rem; background: var(--bg-tertiary); border: 1px solid var(--control-border); - border-radius: 8px; - margin-top: 0.25rem; + border-radius: 10px; + box-shadow: 0 8px 32px rgba(0,0,0,0.5); + z-index: 1000; + max-height: 70vh; + overflow-y: auto; + width: 320px; +} + +.preset-backdrop { + position: fixed; + inset: 0; + background: rgba(0,0,0,0.3); + z-index: 999; } .preset-item { From 962605141846a939a2389e84ebfaeb22bf4875ad Mon Sep 17 00:00:00 2001 From: Shane Date: Wed, 18 Feb 2026 11:49:47 -0600 Subject: [PATCH 21/48] Fix preset picker: compact dropdown with flex-wrap layout - Made sidebar-toolbar flex-wrap so picker renders on own row below buttons - Simplified preset items to single-line (emoji + name, details in tooltip) - Added flex-basis: 100% to force picker to full width - Root cause: picker was a flex child next to toolbar-actions, pushed to left=293 inside 315px sidebar, only 22px visible due to overflow:hidden Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Components/Layout/SessionSidebar.razor | 15 ++--- .../Layout/SessionSidebar.razor.css | 55 ++++++------------- 2 files changed, 23 insertions(+), 47 deletions(-) diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor b/PolyPilot/Components/Layout/SessionSidebar.razor index 1262331c33..5d86fa3e87 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor +++ b/PolyPilot/Components/Layout/SessionSidebar.razor @@ -200,25 +200,20 @@ else {
- - + + - +
@if (showPresetPicker) { -
@foreach (var preset in UserPresets.GetAll(CopilotService.BaseDir)) { var p = preset; - }
diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor.css b/PolyPilot/Components/Layout/SessionSidebar.razor.css index 3b6e9a2931..eea872d5c4 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor.css +++ b/PolyPilot/Components/Layout/SessionSidebar.razor.css @@ -167,6 +167,7 @@ /* Sort/group toolbar */ .sidebar-toolbar { display: flex; + flex-wrap: wrap; align-items: center; gap: 0.1rem; padding: 0.25rem 0.5rem; @@ -201,8 +202,8 @@ display: flex; gap: 0.1rem; margin-left: auto; - flex-wrap: wrap; - justify-content: flex-end; + flex-shrink: 0; + overflow: hidden; } .new-group-input { @@ -1388,60 +1389,40 @@ /* === Preset Picker === */ .preset-picker { - position: fixed; - top: 80px; - left: 16px; display: flex; flex-direction: column; - gap: 0.35rem; - padding: 0.75rem; + flex-basis: 100%; + gap: 0.1rem; + padding: 0.3rem; background: var(--bg-tertiary); border: 1px solid var(--control-border); - border-radius: 10px; - box-shadow: 0 8px 32px rgba(0,0,0,0.5); - z-index: 1000; - max-height: 70vh; - overflow-y: auto; - width: 320px; + border-radius: 6px; + margin-top: 0.2rem; } .preset-backdrop { - position: fixed; - inset: 0; - background: rgba(0,0,0,0.3); - z-index: 999; + display: none; } .preset-item { all: unset; display: flex; - align-items: flex-start; - gap: 0.5rem; - padding: 0.5rem; - border-radius: 6px; + align-items: center; + gap: 0.4rem; + padding: 0.25rem 0.4rem; + border-radius: 4px; cursor: pointer; - transition: background 0.15s; + font-size: var(--type-footnote); } .preset-item:hover { background: var(--control-bg); } -.preset-emoji { font-size: 1.2rem; flex-shrink: 0; margin-top: 0.1rem; } - -.preset-info { - display: flex; - flex-direction: column; - gap: 0.15rem; - min-width: 0; -} +.preset-emoji { font-size: 0.9rem; flex-shrink: 0; } .preset-name { - font-size: var(--type-callout); - font-weight: 600; color: var(--text-primary); -} - -.preset-desc { - font-size: var(--type-footnote); - color: var(--text-dim); + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; } .preset-models { From 107777a0473ffee4429a21a9299a028c08010844 Mon Sep 17 00:00:00 2001 From: Shane Date: Wed, 18 Feb 2026 12:28:31 -0600 Subject: [PATCH 22/48] Require worktree selection when creating multi-agent teams MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add WorktreeId to SessionGroup for shared team worktree - CreateMultiAgentGroup/CreateGroupFromPresetAsync accept worktreeId+repoId - All worker SessionMeta get WorktreeId propagated from group - Preset flow: pick preset β†’ pick worktree (two-step) - Manual flow: click Multi β†’ pick worktree β†’ enter name - Group header shows 🌿 branch badge when worktree assigned - Worktree picker with repo/branch display and shortened paths - 10 new tests (736 total, 0 failures) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/SessionOrganizationTests.cs | 176 ++++++++++++++++++ .../Components/Layout/SessionSidebar.razor | 140 ++++++++++++-- .../Layout/SessionSidebar.razor.css | 68 ++++++- PolyPilot/Models/SessionOrganization.cs | 6 + .../Services/CopilotService.Organization.cs | 20 +- 5 files changed, 387 insertions(+), 23 deletions(-) diff --git a/PolyPilot.Tests/SessionOrganizationTests.cs b/PolyPilot.Tests/SessionOrganizationTests.cs index 16096d9069..d80a491a2f 100644 --- a/PolyPilot.Tests/SessionOrganizationTests.cs +++ b/PolyPilot.Tests/SessionOrganizationTests.cs @@ -1994,3 +1994,179 @@ public void CheckStall_DifferentContent_NoFalsePositive() Assert.False(cycle.CheckStall("Finally the frontend React components for the dashboard")); } } + +public class WorktreeTeamAssociationTests +{ + private readonly StubChatDatabase _chatDb = new(); + private readonly StubServerManager _serverManager = new(); + private readonly StubWsBridgeClient _bridgeClient = new(); + private readonly StubDemoService _demoService = new(); + private readonly IServiceProvider _serviceProvider; + + public WorktreeTeamAssociationTests() + { + var services = new ServiceCollection(); + _serviceProvider = services.BuildServiceProvider(); + } + + private static RepoManager CreateRepoManagerWithState(List repos, List worktrees) + { + var rm = new RepoManager(); + var stateField = typeof(RepoManager).GetField("_state", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)!; + var loadedField = typeof(RepoManager).GetField("_loaded", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)!; + stateField.SetValue(rm, new RepositoryState { Repositories = repos, Worktrees = worktrees }); + loadedField.SetValue(rm, true); + return rm; + } + + private CopilotService CreateService(RepoManager? repoManager = null) => + new CopilotService(_chatDb, _serverManager, _bridgeClient, repoManager ?? new RepoManager(), _serviceProvider, _demoService); + + [Fact] + public void SessionGroup_WorktreeId_DefaultsToNull() + { + var group = new SessionGroup(); + Assert.Null(group.WorktreeId); + } + + [Fact] + public void CreateMultiAgentGroup_WithWorktreeId_SetsGroupFields() + { + var svc = CreateService(); + var group = svc.CreateMultiAgentGroup("Test Team", + worktreeId: "wt-123", + repoId: "repo-abc"); + + Assert.Equal("wt-123", group.WorktreeId); + Assert.Equal("repo-abc", group.RepoId); + Assert.True(group.IsMultiAgent); + } + + [Fact] + public void CreateMultiAgentGroup_WithWorktree_SetsSessionMetaWorktreeId() + { + var svc = CreateService(); + // Pre-create sessions + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "worker1" }); + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "worker2" }); + + var group = svc.CreateMultiAgentGroup("Test Team", + sessionNames: new List { "worker1", "worker2" }, + worktreeId: "wt-456", + repoId: "repo-xyz"); + + var w1 = svc.Organization.Sessions.First(s => s.SessionName == "worker1"); + var w2 = svc.Organization.Sessions.First(s => s.SessionName == "worker2"); + + Assert.Equal("wt-456", w1.WorktreeId); + Assert.Equal("wt-456", w2.WorktreeId); + Assert.Equal(group.Id, w1.GroupId); + Assert.Equal(group.Id, w2.GroupId); + } + + [Fact] + public void CreateMultiAgentGroup_WithoutWorktree_DoesNotSetWorktreeId() + { + var svc = CreateService(); + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "worker1" }); + + var group = svc.CreateMultiAgentGroup("Test Team", + sessionNames: new List { "worker1" }); + + Assert.Null(group.WorktreeId); + Assert.Null(group.RepoId); + var w1 = svc.Organization.Sessions.First(s => s.SessionName == "worker1"); + Assert.Null(w1.WorktreeId); + } + + [Fact] + public void SessionGroup_WorktreeId_RoundTripsViaJson() + { + var state = new OrganizationState(); + state.Groups.Add(new SessionGroup + { + Id = "g1", + Name = "Team", + IsMultiAgent = true, + WorktreeId = "wt-789", + RepoId = "repo-test" + }); + + var json = JsonSerializer.Serialize(state); + var restored = JsonSerializer.Deserialize(json)!; + + var group = restored.Groups.First(g => g.Id == "g1"); + Assert.Equal("wt-789", group.WorktreeId); + Assert.Equal("repo-test", group.RepoId); + } + + [Fact] + public async Task CreateGroupFromPresetAsync_WithWorktree_SetsGroupAndSessionWorktreeIds() + { + var svc = CreateService(); + var preset = new GroupPreset( + Name: "Test Preset", + Emoji: "πŸ§ͺ", + Description: "Test", + OrchestratorModel: "claude-opus-4.6", + WorkerModels: new[] { "gpt-5.1-codex", "claude-sonnet-4.5" }, + Mode: MultiAgentMode.Broadcast + ); + + // CreateSessionAsync will throw since StubServerManager doesn't implement it, + // but the group itself should be created with worktree info + var group = await svc.CreateGroupFromPresetAsync(preset, + workingDirectory: @"C:\repos\test", + worktreeId: "wt-preset", + repoId: "repo-preset"); + + Assert.NotNull(group); + Assert.Equal("wt-preset", group!.WorktreeId); + Assert.Equal("repo-preset", group.RepoId); + } + + [Fact] + public void GroupHeader_ShowsWorktreeBadge_WhenWorktreeIdSet() + { + // Verify the data model supports worktree display in group headers + var group = new SessionGroup + { + Name = "Code Review Team", + IsMultiAgent = true, + WorktreeId = "wt-feature", + RepoId = "PureWeen-PolyPilot" + }; + + Assert.NotNull(group.WorktreeId); + Assert.NotNull(group.RepoId); + Assert.True(group.IsMultiAgent); + } + + [Fact] + public void ShortenPath_TwoOrFewerSegments_ReturnsOriginal() + { + Assert.Equal("test", ShortenPathHelper("test")); + Assert.Equal(@"C:\test", ShortenPathHelper(@"C:\test")); + } + + [Fact] + public void ShortenPath_LongPath_ShowsLastTwoSegments() + { + var result = ShortenPathHelper(@"C:\Users\shneuvil\.polypilot\worktrees\my-repo"); + Assert.Equal(@"…\worktrees\my-repo", result); + } + + [Fact] + public void ShortenPath_EmptyOrNull_ReturnsEmpty() + { + Assert.Equal("", ShortenPathHelper("")); + } + + private static string ShortenPathHelper(string path) + { + if (string.IsNullOrEmpty(path)) return ""; + var sep = System.IO.Path.DirectorySeparatorChar; + var parts = path.TrimEnd(sep).Split(sep); + return parts.Length <= 2 ? path : "…" + sep + string.Join(sep, parts[^2..]); + } +} diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor b/PolyPilot/Components/Layout/SessionSidebar.razor index 5d86fa3e87..3059d41528 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor +++ b/PolyPilot/Components/Layout/SessionSidebar.razor @@ -189,32 +189,77 @@ else - @if (isAddingGroup) + @if (isAddingGroup && !isAddingMultiAgentGroup) { } + else if (isAddingMultiAgentGroup) + { +
+
+ Select worktree for team: + +
+ @foreach (var wt in RepoManager.Worktrees) + { + var w = wt; + var repo = RepoManager.Repositories.FirstOrDefault(r => r.Id == w.RepoId); + + } + @if (!RepoManager.Worktrees.Any()) + { +
No worktrees available. Add a repository first.
+ } +
+ } else {
- +
@if (showPresetPicker) { -
- @foreach (var preset in UserPresets.GetAll(CopilotService.BaseDir)) +
+
+ @(selectedPreset != null ? $"πŸ“‚ Worktree for \"{selectedPreset.Name}\":" : "Select worktree:") + +
+ @if (selectedPreset == null) { - var p = preset; - + @foreach (var preset in UserPresets.GetAll(CopilotService.BaseDir)) + { + var p = preset; + + } + } + else + { + @foreach (var wt in RepoManager.Worktrees) + { + var w = wt; + var repo = RepoManager.Repositories.FirstOrDefault(r => r.Id == w.RepoId); + + } + @if (!RepoManager.Worktrees.Any()) + { +
No worktrees available. Add a repository first.
+ } }
} @@ -297,6 +342,14 @@ else { πŸ€– } + @if (!string.IsNullOrEmpty(group.WorktreeId)) + { + var groupWt = RepoManager.Worktrees.FirstOrDefault(w => w.Id == group.WorktreeId); + if (groupWt != null) + { + 🌿 @groupWt.Branch + } + } @group.Name @groupSessions.Count @if (group.IsCollapsed && groupSessions.Any(s => s.IsProcessing)) @@ -944,20 +997,58 @@ else private void StartAddMultiAgentGroup() { - isAddingGroup = true; + isAddingGroup = false; isAddingMultiAgentGroup = true; showPresetPicker = false; + selectedPreset = null; + pendingMultiAgentWorktree = null; } + private void TogglePresetPicker() + { + showPresetPicker = !showPresetPicker; + selectedPreset = null; + } + + private void CancelMultiAgentCreation() + { + isAddingMultiAgentGroup = false; + pendingMultiAgentWorktree = null; + } + + private async Task SelectWorktreeForGroup(WorktreeInfo wt) + { + // Worktree selected β€” now show name input + pendingMultiAgentWorktree = wt; + isAddingMultiAgentGroup = false; // hide worktree picker + isAddingGroup = true; // show name input + StateHasChanged(); + } + + private WorktreeInfo? pendingMultiAgentWorktree; + private bool showPresetPicker; + private GroupPreset? selectedPreset; private async Task CreateFromPreset(GroupPreset preset) + { + // Legacy path β€” should not be reached since we now require worktree + showPresetPicker = false; + selectedPreset = null; + StateHasChanged(); + } + + private async Task CreateFromPresetWithWorktree(GroupPreset preset, WorktreeInfo wt) { showPresetPicker = false; + selectedPreset = null; StateHasChanged(); try { - await CopilotService.CreateGroupFromPresetAsync(preset); + await CopilotService.CreateGroupFromPresetAsync(preset, + workingDirectory: wt.Path, + worktreeId: wt.Id, + repoId: wt.RepoId); } catch (Exception ex) { @@ -965,6 +1056,15 @@ else } } + private static string ShortenPath(string path) + { + if (string.IsNullOrEmpty(path)) return ""; + // Show last 2 segments + var sep = Path.DirectorySeparatorChar; + var parts = path.TrimEnd(sep).Split(sep); + return parts.Length <= 2 ? path : "…" + sep + string.Join(sep, parts[^2..]); + } + private void PromptSaveAsPreset(string groupId) { var group = CopilotService.Organization.Groups.FirstOrDefault(g => g.Id == groupId); @@ -1020,22 +1120,30 @@ else private async Task CommitNewGroup() { var name = await JS.InvokeAsync("getElementValue", "newGroupInput"); - var wasMultiAgent = isAddingMultiAgentGroup; + var wt = pendingMultiAgentWorktree; isAddingGroup = false; isAddingMultiAgentGroup = false; + pendingMultiAgentWorktree = null; if (!string.IsNullOrWhiteSpace(name)) { - if (wasMultiAgent) - CopilotService.CreateMultiAgentGroup(name.Trim()); + if (wt != null) + { + // Multi-agent group with worktree + CopilotService.CreateMultiAgentGroup(name.Trim(), + worktreeId: wt.Id, + repoId: wt.RepoId); + } else + { CopilotService.CreateGroup(name.Trim()); + } } } private async Task HandleNewGroupKeyDown(KeyboardEventArgs e) { if (e.Key == "Enter") await CommitNewGroup(); - else if (e.Key == "Escape") { isAddingGroup = false; isAddingMultiAgentGroup = false; } + else if (e.Key == "Escape") { isAddingGroup = false; isAddingMultiAgentGroup = false; pendingMultiAgentWorktree = null; } } private void ToggleSessionMenu(string sessionName) diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor.css b/PolyPilot/Components/Layout/SessionSidebar.razor.css index eea872d5c4..9f30985302 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor.css +++ b/PolyPilot/Components/Layout/SessionSidebar.razor.css @@ -251,6 +251,18 @@ .group-count::before { content: "("; } .group-count::after { content: ")"; } +.group-worktree-badge { + font-size: var(--type-caption2); + color: var(--text-dim); + background: var(--bg-tertiary); + padding: 0 0.25rem; + border-radius: 3px; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + max-width: 100px; +} + .group-busy-dot { width: 6px; height: 6px; @@ -1387,8 +1399,8 @@ .bug-report-submit:hover { opacity: 0.85; } .bug-report-submit:disabled { opacity: 0.5; cursor: not-allowed; } -/* === Preset Picker === */ -.preset-picker { +/* === Preset & Worktree Picker === */ +.worktree-picker { display: flex; flex-direction: column; flex-basis: 100%; @@ -1400,8 +1412,56 @@ margin-top: 0.2rem; } -.preset-backdrop { - display: none; +.worktree-picker-header { + display: flex; + align-items: center; + justify-content: space-between; + padding: 0.15rem 0.3rem; + font-size: var(--type-footnote); + color: var(--text-dim); + font-weight: 600; +} + +.worktree-cancel-btn { + all: unset; + cursor: pointer; + font-size: 0.7rem; + color: var(--text-dim); + padding: 0.1rem 0.3rem; + border-radius: 3px; +} +.worktree-cancel-btn:hover { background: var(--control-bg); color: var(--text-primary); } + +.worktree-item { + all: unset; + display: flex; + flex-direction: column; + gap: 0.05rem; + padding: 0.25rem 0.4rem; + border-radius: 4px; + cursor: pointer; + font-size: var(--type-footnote); +} +.worktree-item:hover { background: var(--control-bg); } + +.worktree-branch { + color: var(--text-primary); + font-weight: 500; +} + +.worktree-path { + font-size: var(--type-caption1); + color: var(--text-dim); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.worktree-empty { + padding: 0.4rem; + font-size: var(--type-footnote); + color: var(--text-dim); + text-align: center; } .preset-item { diff --git a/PolyPilot/Models/SessionOrganization.cs b/PolyPilot/Models/SessionOrganization.cs index 9464679fae..cbdabb8814 100644 --- a/PolyPilot/Models/SessionOrganization.cs +++ b/PolyPilot/Models/SessionOrganization.cs @@ -29,6 +29,12 @@ public class SessionGroup /// Default model for the orchestrator role. Null = use app default. public string? DefaultOrchestratorModel { get; set; } + /// + /// Shared worktree for the entire multi-agent group. All sessions use this worktree's path as CWD. + /// Future: per-agent worktrees would move this to SessionMeta and add merge orchestration. + /// + public string? WorktreeId { get; set; } + /// Active reflection state for OrchestratorReflect mode. Null when not in a reflect loop. public ReflectionCycle? ReflectionState { get; set; } } diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index 540a0c9515..a144df2255 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -426,7 +426,7 @@ public SessionGroup GetOrCreateRepoGroup(string repoId, string repoName) /// /// Create a multi-agent group and optionally move existing sessions into it. /// - public SessionGroup CreateMultiAgentGroup(string name, MultiAgentMode mode = MultiAgentMode.Broadcast, string? orchestratorPrompt = null, List? sessionNames = null) + public SessionGroup CreateMultiAgentGroup(string name, MultiAgentMode mode = MultiAgentMode.Broadcast, string? orchestratorPrompt = null, List? sessionNames = null, string? worktreeId = null, string? repoId = null) { var group = new SessionGroup { @@ -435,6 +435,8 @@ public SessionGroup CreateMultiAgentGroup(string name, MultiAgentMode mode = Mul IsMultiAgent = true, OrchestratorMode = mode, OrchestratorPrompt = orchestratorPrompt, + WorktreeId = worktreeId, + RepoId = repoId, SortOrder = Organization.Groups.Any() ? Organization.Groups.Max(g => g.SortOrder) + 1 : 0 }; Organization.Groups.Add(group); @@ -447,6 +449,8 @@ public SessionGroup CreateMultiAgentGroup(string name, MultiAgentMode mode = Mul if (meta != null) { meta.GroupId = group.Id; + if (worktreeId != null) + meta.WorktreeId = worktreeId; } } } @@ -865,9 +869,9 @@ public string GetEffectiveModel(string sessionName) /// /// Create a multi-agent group from a preset template, creating sessions with assigned models. /// - public async Task CreateGroupFromPresetAsync(Models.GroupPreset preset, string? workingDirectory = null, CancellationToken ct = default) + public async Task CreateGroupFromPresetAsync(Models.GroupPreset preset, string? workingDirectory = null, string? worktreeId = null, string? repoId = null, CancellationToken ct = default) { - var group = CreateMultiAgentGroup(preset.Name, preset.Mode); + var group = CreateMultiAgentGroup(preset.Name, preset.Mode, worktreeId: worktreeId, repoId: repoId); if (group == null) return null; // Create orchestrator session @@ -878,6 +882,11 @@ public string GetEffectiveModel(string sessionName) MoveSession(orchName, group.Id); SetSessionRole(orchName, MultiAgentRole.Orchestrator); SetSessionPreferredModel(orchName, preset.OrchestratorModel); + if (worktreeId != null) + { + var meta = GetSessionMeta(orchName); + if (meta != null) meta.WorktreeId = worktreeId; + } } catch (Exception ex) { @@ -894,6 +903,11 @@ public string GetEffectiveModel(string sessionName) await CreateSessionAsync(workerName, workerModel, workingDirectory, ct); MoveSession(workerName, group.Id); SetSessionPreferredModel(workerName, workerModel); + if (worktreeId != null) + { + var meta = GetSessionMeta(workerName); + if (meta != null) meta.WorktreeId = worktreeId; + } } catch (Exception ex) { From 358aca199bc47273c6444bd33f817febcfd3933e Mon Sep 17 00:00:00 2001 From: Shane Date: Wed, 18 Feb 2026 12:32:26 -0600 Subject: [PATCH 23/48] Fix team name input: full-width on own row instead of margin-left:auto Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot/Components/Layout/SessionSidebar.razor.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor.css b/PolyPilot/Components/Layout/SessionSidebar.razor.css index 9f30985302..af6e24087a 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor.css +++ b/PolyPilot/Components/Layout/SessionSidebar.razor.css @@ -207,7 +207,7 @@ } .new-group-input { - margin-left: auto; + flex-basis: 100%; font-size: var(--type-caption1); color: var(--text-primary); background: var(--control-border); From 490c12419a49b412c017fc91acc2128310f886f5 Mon Sep 17 00:00:00 2001 From: Shane Date: Wed, 18 Feb 2026 14:47:01 -0600 Subject: [PATCH 24/48] =?UTF-8?q?Unify=20multi-agent=20creation:=20worktre?= =?UTF-8?q?e=20=E2=86=92=20presets=20+=20custom=20name?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merged the separate πŸ€– Multi and πŸš€ Preset flows into one: 1. Click πŸ€– Multi β†’ pick worktree 2. Choose a preset template OR enter a custom team name Removed the standalone πŸš€ button since presets are now part of the multi-agent flow. Added preset-divider styling. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Components/Layout/SessionSidebar.razor | 95 ++++++------------- .../Layout/SessionSidebar.razor.css | 9 ++ 2 files changed, 40 insertions(+), 64 deletions(-) diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor b/PolyPilot/Components/Layout/SessionSidebar.razor index a0eb2a1456..cf165a3dd5 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor +++ b/PolyPilot/Components/Layout/SessionSidebar.razor @@ -189,15 +189,16 @@ else - @if (isAddingGroup && !isAddingMultiAgentGroup) + @if (isAddingGroup && !isAddingMultiAgentGroup && pendingMultiAgentWorktree == null) { } - else if (isAddingMultiAgentGroup) + else if (isAddingMultiAgentGroup && pendingMultiAgentWorktree == null) { + @* Step 1: Pick a worktree *@
Select worktree for team: @@ -218,51 +219,37 @@ else }
} + else if (pendingMultiAgentWorktree != null) + { + @* Step 2: Pick a preset or enter a custom name *@ +
+
+ 🌿 @pendingMultiAgentWorktree.Branch + +
+ @foreach (var preset in UserPresets.GetAll(CopilotService.BaseDir)) + { + var p = preset; + + } +
or create empty team:
+ +
+ } else {
-
- @if (showPresetPicker) - { -
-
- @(selectedPreset != null ? $"πŸ“‚ Worktree for \"{selectedPreset.Name}\":" : "Select worktree:") - -
- @if (selectedPreset == null) - { - @foreach (var preset in UserPresets.GetAll(CopilotService.BaseDir)) - { - var p = preset; - - } - } - else - { - @foreach (var wt in RepoManager.Worktrees) - { - var w = wt; - var repo = RepoManager.Repositories.FirstOrDefault(r => r.Id == w.RepoId); - - } - @if (!RepoManager.Worktrees.Any()) - { -
No worktrees available. Add a repository first.
- } - } -
- } }
}; @@ -999,17 +986,9 @@ else { isAddingGroup = false; isAddingMultiAgentGroup = true; - showPresetPicker = false; - selectedPreset = null; pendingMultiAgentWorktree = null; } - private void TogglePresetPicker() - { - showPresetPicker = !showPresetPicker; - selectedPreset = null; - } - private void CancelMultiAgentCreation() { isAddingMultiAgentGroup = false; @@ -1018,30 +997,18 @@ else private async Task SelectWorktreeForGroup(WorktreeInfo wt) { - // Worktree selected β€” now show name input + // Worktree selected β€” advance to step 2 (presets + custom name) pendingMultiAgentWorktree = wt; - isAddingMultiAgentGroup = false; // hide worktree picker - isAddingGroup = true; // show name input + isAddingMultiAgentGroup = false; + isAddingGroup = false; StateHasChanged(); } private WorktreeInfo? pendingMultiAgentWorktree; - private bool showPresetPicker; - private GroupPreset? selectedPreset; - - private async Task CreateFromPreset(GroupPreset preset) - { - // Legacy path β€” should not be reached since we now require worktree - showPresetPicker = false; - selectedPreset = null; - StateHasChanged(); - } - private async Task CreateFromPresetWithWorktree(GroupPreset preset, WorktreeInfo wt) { - showPresetPicker = false; - selectedPreset = null; + pendingMultiAgentWorktree = null; StateHasChanged(); try { diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor.css b/PolyPilot/Components/Layout/SessionSidebar.razor.css index af6e24087a..91f8272b08 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor.css +++ b/PolyPilot/Components/Layout/SessionSidebar.razor.css @@ -1485,6 +1485,15 @@ text-overflow: ellipsis; } +.preset-divider { + font-size: var(--type-caption2); + color: var(--text-muted); + text-align: center; + padding: 0.2rem 0; + border-top: 1px solid var(--control-border); + margin-top: 0.1rem; +} + .preset-models { font-size: var(--type-caption1); color: var(--text-dim); From c61673cfb83f5cca870b09639362b51344851659 Mon Sep 17 00:00:00 2001 From: Shane Date: Wed, 18 Feb 2026 14:51:41 -0600 Subject: [PATCH 25/48] Add 'Delete Team' option to multi-agent group context menu Multi-agent groups with a repo now show both 'Delete Team' (removes orchestration group, moves sessions to default) and 'Remove Repo'. Non-repo multi-agent groups show 'Delete Team' instead of 'Delete Group'. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot/Components/Layout/SessionSidebar.razor | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor b/PolyPilot/Components/Layout/SessionSidebar.razor index cf165a3dd5..ef4ddd613d 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor +++ b/PolyPilot/Components/Layout/SessionSidebar.razor @@ -374,6 +374,12 @@ else βž• New Session
+ @if (group.IsMultiAgent) + { + + } @@ -388,7 +394,7 @@ else
} }
From 02ceef19cf1c8788a57c57eb7e4e89ebcda6aede Mon Sep 17 00:00:00 2001 From: Shane Date: Wed, 18 Feb 2026 15:07:36 -0600 Subject: [PATCH 26/48] Fix mode selector to reflect actual group mode The - - - - +
- private void ReconcileOrganization() + internal void ReconcileOrganization() { var activeNames = _sessions.Where(kv => !kv.Value.Info.IsHidden).Select(kv => kv.Key).ToHashSet(); bool changed = false; + // Build lookup of multi-agent group IDs so we can protect their sessions + var multiAgentGroupIds = Organization.Groups.Where(g => g.IsMultiAgent).Select(g => g.Id).ToHashSet(); + // Add missing sessions to default group and link to worktrees foreach (var name in activeNames) { @@ -148,6 +151,10 @@ private void ReconcileOrganization() Organization.Sessions.Add(meta); changed = true; } + + // Don't auto-reassign sessions that belong to a multi-agent group + if (multiAgentGroupIds.Contains(meta.GroupId)) + continue; // Auto-link session to worktree if working directory matches if (meta.WorktreeId == null && _sessions.TryGetValue(name, out var sessionState)) @@ -174,8 +181,11 @@ private void ReconcileOrganization() } } - // Ensure sessions with worktrees are in the correct repo group - if (meta.WorktreeId != null && meta.GroupId == SessionGroup.DefaultId) + // Ensure sessions with worktrees are in the correct repo group. + // Skip sessions that were part of a multi-agent team (identifiable by having + // an Orchestrator role or a PreferredModel set β€” regular sessions never have these). + bool wasMultiAgent = meta.Role == MultiAgentRole.Orchestrator || meta.PreferredModel != null; + if (meta.WorktreeId != null && meta.GroupId == SessionGroup.DefaultId && !wasMultiAgent) { var worktree = _repoManager.Worktrees.FirstOrDefault(w => w.Id == meta.WorktreeId); if (worktree != null) From aa4ea4fb2851e7c2300574936c59500bf228163a Mon Sep 17 00:00:00 2001 From: Shane Date: Wed, 18 Feb 2026 17:34:47 -0600 Subject: [PATCH 33/48] Add diagnostic logging for reconciliation orphaning and pruning Keeps lightweight Debug logs for: - LoadOrganization: group/session count on load - ReconcileOrganization: orphaned sessions moved to _default - ReconcileOrganization: sessions pruned (no longer known) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot/Services/CopilotService.Organization.cs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index 921bfcb14b..84f780645b 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -84,6 +84,7 @@ public void LoadOrganization() { var json = File.ReadAllText(OrganizationFile); Organization = JsonSerializer.Deserialize(json) ?? new OrganizationState(); + Debug($"LoadOrganization: loaded {Organization.Groups.Count} groups, {Organization.Sessions.Count} sessions"); } else { @@ -207,6 +208,7 @@ internal void ReconcileOrganization() { if (!groupIds.Contains(meta.GroupId)) { + Debug($"ReconcileOrganization: orphaned session '{meta.SessionName}' (GroupId={meta.GroupId}) β†’ _default"); meta.GroupId = SessionGroup.DefaultId; changed = true; } @@ -251,6 +253,9 @@ internal void ReconcileOrganization() } // Remove metadata only for sessions that are truly gone (not in any known set) + var toRemove = Organization.Sessions.Where(m => !knownNames.Contains(m.SessionName)).ToList(); + if (toRemove.Count > 0) + Debug($"ReconcileOrganization: pruning {toRemove.Count} sessions: {string.Join(", ", toRemove.Select(m => m.SessionName))}"); Organization.Sessions.RemoveAll(m => !knownNames.Contains(m.SessionName)); if (changed) SaveOrganization(); From 7a647836fd56a5e1325a0d45254d9d1c20005d27 Mon Sep 17 00:00:00 2001 From: Shane Date: Thu, 19 Feb 2026 08:50:16 -0600 Subject: [PATCH 34/48] Add 30 multi-agent regression tests for all session bugs Tests cover: - Organization JSON corruption resilience (missing fields, extra fields, complex round-trips) - Reconciliation scattering protection (multi-agent sessions, orphaned workers/orchestrators) - Preset creation Role/PreferredModel markers (round-trip preservation) - Mode enum completeness (all values, string serialization) - Reflection loop error resilience (retry logic, sentinel detection, stall handling) - TCS ordering invariant (IsProcessing before TrySetResult) - Full lifecycle delete-recreate scenarios (no contamination) - App restart simulation (serialize-reconcile-verify) - wasMultiAgent heuristic Theory tests Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/MultiAgentRegressionTests.cs | 922 +++++++++++++++++++ 1 file changed, 922 insertions(+) create mode 100644 PolyPilot.Tests/MultiAgentRegressionTests.cs diff --git a/PolyPilot.Tests/MultiAgentRegressionTests.cs b/PolyPilot.Tests/MultiAgentRegressionTests.cs new file mode 100644 index 0000000000..07ab12e3f8 --- /dev/null +++ b/PolyPilot.Tests/MultiAgentRegressionTests.cs @@ -0,0 +1,922 @@ +using System.Text.Json; +using Microsoft.Extensions.DependencyInjection; +using PolyPilot.Models; +using PolyPilot.Services; + +namespace PolyPilot.Tests; + +/// +/// Regression tests covering bugs found during PR #104 multi-agent development. +/// Each test documents a specific bug that was found and fixed, to prevent recurrence. +/// +/// Key bugs covered: +/// 1. TCS ordering: TrySetResult called before IsProcessing=false broke reflection loops +/// 2. Reconciliation scattering: multi-agent sessions moved to repo groups on restart +/// 3. Organization.json corruption: missing fields, wrong enums, partial data +/// 4. Preset creation: Role/PreferredModel not set, breaking reconciliation heuristic +/// 5. Mode enum gaps: OrchestratorReflect missing from dropdowns and serialization +/// 6. Reflection loop error handling: unhandled exceptions kill the async task silently +/// +public class MultiAgentRegressionTests +{ + private readonly StubChatDatabase _chatDb = new(); + private readonly StubServerManager _serverManager = new(); + private readonly StubWsBridgeClient _bridgeClient = new(); + private readonly StubDemoService _demoService = new(); + private readonly IServiceProvider _serviceProvider; + + public MultiAgentRegressionTests() + { + var services = new ServiceCollection(); + _serviceProvider = services.BuildServiceProvider(); + } + + private static RepoManager CreateRepoManagerWithState(List repos, List worktrees) + { + var rm = new RepoManager(); + var stateField = typeof(RepoManager).GetField("_state", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)!; + var loadedField = typeof(RepoManager).GetField("_loaded", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)!; + stateField.SetValue(rm, new RepositoryState { Repositories = repos, Worktrees = worktrees }); + loadedField.SetValue(rm, true); + return rm; + } + + private CopilotService CreateService(RepoManager? repoManager = null) => + new CopilotService(_chatDb, _serverManager, _bridgeClient, repoManager ?? new RepoManager(), _serviceProvider, _demoService); + + /// + /// Inject session names into the alias cache so ReconcileOrganization doesn't prune them. + /// + private static void RegisterKnownSessions(CopilotService svc, params string[] sessionNames) + { + var field = typeof(CopilotService).GetField("_aliasCache", + System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)!; + var cache = (Dictionary?)field.GetValue(svc) ?? new(); + foreach (var name in sessionNames) + cache[name] = name; + field.SetValue(svc, cache); + } + + #region Bug #1: Organization JSON Corruption Resilience + + /// + /// Bug: PowerShell ConvertTo-Json reformatted organization.json, dropping multi-agent + /// groups on app re-save. Deserialization must handle missing/extra fields gracefully. + /// + [Fact] + public void OrgJson_MissingOptionalFields_DeserializesGracefully() + { + // Simulate organization.json with only required fields + var json = """ + { + "Groups": [ + {"Id": "_default", "Name": "Sessions", "SortOrder": 0}, + {"Id": "ma-1", "Name": "Team", "IsMultiAgent": true} + ], + "Sessions": [ + {"SessionName": "worker-1", "GroupId": "ma-1"} + ] + } + """; + + var state = JsonSerializer.Deserialize(json)!; + + Assert.Equal(2, state.Groups.Count); + var maGroup = state.Groups.First(g => g.Id == "ma-1"); + Assert.True(maGroup.IsMultiAgent); + Assert.Null(maGroup.WorktreeId); + Assert.Null(maGroup.ReflectionState); + Assert.Equal(MultiAgentMode.Broadcast, maGroup.OrchestratorMode); // default + Assert.Single(state.Sessions); + Assert.Equal("ma-1", state.Sessions[0].GroupId); + } + + [Fact] + public void OrgJson_ExtraUnknownFields_DeserializesGracefully() + { + var json = """ + { + "Groups": [ + {"Id": "_default", "Name": "Sessions", "SortOrder": 0, "FutureField": true, "AnotherNew": "value"} + ], + "Sessions": [], + "FutureTopLevel": 42 + } + """; + + // Should not throw β€” unknown properties are ignored by default + var state = JsonSerializer.Deserialize(json)!; + Assert.Single(state.Groups); + } + + [Fact] + public void OrgJson_ReflectionState_ComplexRoundTrip() + { + var cycle = ReflectionCycle.Create("Fix all bugs", 10); + cycle.CurrentIteration = 3; + cycle.LastEvaluation = "Needs more work on error handling"; + cycle.EvaluatorSessionName = "eval-session"; + cycle.RecordEvaluation(1, 0.4, "Initial attempt", "claude-opus-4.6"); + cycle.RecordEvaluation(2, 0.6, "Better but incomplete", "claude-opus-4.6"); + cycle.RecordEvaluation(3, 0.75, "Good progress", "claude-opus-4.6"); + + var state = new OrganizationState(); + state.Groups.Add(new SessionGroup + { + Id = "reflect-team", + Name = "Bug Fix Team", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.OrchestratorReflect, + ReflectionState = cycle, + WorktreeId = "wt-1", + RepoId = "repo-1" + }); + + var json = JsonSerializer.Serialize(state, new JsonSerializerOptions { WriteIndented = true }); + var restored = JsonSerializer.Deserialize(json)!; + + var group = restored.Groups.First(g => g.Id == "reflect-team"); + Assert.NotNull(group.ReflectionState); + Assert.Equal("Fix all bugs", group.ReflectionState!.Goal); + Assert.Equal(3, group.ReflectionState.CurrentIteration); + Assert.Equal(10, group.ReflectionState.MaxIterations); + Assert.True(group.ReflectionState.IsActive); + Assert.Equal("Needs more work on error handling", group.ReflectionState.LastEvaluation); + Assert.Equal("eval-session", group.ReflectionState.EvaluatorSessionName); + Assert.Equal(3, group.ReflectionState.EvaluationHistory.Count); + Assert.Equal(0.75, group.ReflectionState.EvaluationHistory[2].Score); + } + + [Fact] + public void OrgJson_AllModes_RoundTrip() + { + foreach (var mode in Enum.GetValues()) + { + var group = new SessionGroup + { + Id = $"test-{mode}", + Name = $"Test {mode}", + IsMultiAgent = true, + OrchestratorMode = mode + }; + + var json = JsonSerializer.Serialize(group); + var restored = JsonSerializer.Deserialize(json)!; + + Assert.Equal(mode, restored.OrchestratorMode); + } + } + + [Fact] + public void OrgJson_AllRoles_RoundTrip() + { + foreach (var role in Enum.GetValues()) + { + var meta = new SessionMeta + { + SessionName = $"test-{role}", + Role = role + }; + + var json = JsonSerializer.Serialize(meta); + var restored = JsonSerializer.Deserialize(json)!; + + Assert.Equal(role, restored.Role); + } + } + + #endregion + + #region Bug #2: Reconciliation Scattering Multi-Agent Sessions + + /// + /// Bug: ReconcileOrganization auto-moved sessions from _default to repo groups + /// based on WorktreeId, even for orphaned multi-agent sessions. This scattered + /// team members across repo groups after group deletion or restart. + /// + [Fact] + public void Reconcile_SessionInMultiAgentGroup_NeverAutoMoved() + { + var repos = new List + { + new() { Id = "repo-1", Name = "Repo", Url = "https://github.com/test/repo" } + }; + var worktrees = new List + { + new() { Id = "wt-1", RepoId = "repo-1", Branch = "main", Path = "/tmp/wt-1" } + }; + var rm = CreateRepoManagerWithState(repos, worktrees); + var svc = CreateService(rm); + svc.GetOrCreateRepoGroup("repo-1", "Repo"); + + var maGroup = svc.CreateMultiAgentGroup("Team", + mode: MultiAgentMode.OrchestratorReflect, + worktreeId: "wt-1", repoId: "repo-1"); + + // Add sessions with worktree IDs (which would normally trigger auto-move) + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "team-orch", + GroupId = maGroup.Id, + Role = MultiAgentRole.Orchestrator, + PreferredModel = "claude-opus-4.6", + WorktreeId = "wt-1" + }); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "team-w1", + GroupId = maGroup.Id, + PreferredModel = "gpt-5.1-codex", + WorktreeId = "wt-1" + }); + + RegisterKnownSessions(svc, "team-orch", "team-w1"); + + // Run reconciliation multiple times (simulates multiple restarts) + for (int i = 0; i < 5; i++) + svc.ReconcileOrganization(); + + // Sessions must remain in multi-agent group + Assert.All(svc.Organization.Sessions.Where(s => s.SessionName.StartsWith("team-")), + m => Assert.Equal(maGroup.Id, m.GroupId)); + } + + /// + /// Bug: After deleting a multi-agent group, orphaned sessions in _default + /// with WorktreeId were auto-moved to repo group by reconciliation. + /// The wasMultiAgent heuristic (Orchestrator role or PreferredModel set) + /// must prevent this. + /// + [Fact] + public void Reconcile_OrphanedMultiAgentWorker_WithPreferredModel_NotMovedToRepoGroup() + { + var repos = new List + { + new() { Id = "repo-1", Name = "Repo", Url = "https://github.com/test/repo" } + }; + var worktrees = new List + { + new() { Id = "wt-1", RepoId = "repo-1", Branch = "main", Path = "/tmp/wt-1" } + }; + var rm = CreateRepoManagerWithState(repos, worktrees); + var svc = CreateService(rm); + svc.GetOrCreateRepoGroup("repo-1", "Repo"); + + // Session with PreferredModel = was a multi-agent worker + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "orphan-worker", + GroupId = SessionGroup.DefaultId, + PreferredModel = "gpt-5.1-codex", + WorktreeId = "wt-1" + }); + + RegisterKnownSessions(svc, "orphan-worker"); + svc.ReconcileOrganization(); + + Assert.Equal(SessionGroup.DefaultId, + svc.Organization.Sessions.First(s => s.SessionName == "orphan-worker").GroupId); + } + + [Fact] + public void Reconcile_OrphanedOrchestrator_NotMovedToRepoGroup() + { + var repos = new List + { + new() { Id = "repo-1", Name = "Repo", Url = "https://github.com/test/repo" } + }; + var worktrees = new List + { + new() { Id = "wt-1", RepoId = "repo-1", Branch = "main", Path = "/tmp/wt-1" } + }; + var rm = CreateRepoManagerWithState(repos, worktrees); + var svc = CreateService(rm); + svc.GetOrCreateRepoGroup("repo-1", "Repo"); + + // Session with Orchestrator role = was a multi-agent orchestrator + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "orphan-orch", + GroupId = SessionGroup.DefaultId, + Role = MultiAgentRole.Orchestrator, + WorktreeId = "wt-1" + }); + + RegisterKnownSessions(svc, "orphan-orch"); + svc.ReconcileOrganization(); + + Assert.Equal(SessionGroup.DefaultId, + svc.Organization.Sessions.First(s => s.SessionName == "orphan-orch").GroupId); + } + + [Fact] + public void Reconcile_RegularWorker_NoPreferredModel_CanBeAutoMoved() + { + // Verify we didn't break regular session grouping + var meta = new SessionMeta + { + SessionName = "regular", + GroupId = SessionGroup.DefaultId, + Role = MultiAgentRole.Worker, + PreferredModel = null, + WorktreeId = "wt-1" + }; + + // wasMultiAgent check + bool wasMultiAgent = meta.Role == MultiAgentRole.Orchestrator || meta.PreferredModel != null; + Assert.False(wasMultiAgent); + } + + #endregion + + #region Bug #3: Preset Creation Must Set Role/PreferredModel Markers + + /// + /// Bug: Sessions created via CreateGroupFromPresetAsync didn't always have + /// Role and PreferredModel set. Without these markers, reconciliation can't + /// distinguish multi-agent sessions from regular ones. + /// + /// + /// Simulates what CreateGroupFromPresetAsync does: creates a group, then sets + /// Role and PreferredModel on sessions. Verifies the metadata survives a round-trip. + /// + [Fact] + public void PresetGroup_OrchestratorRole_SurvivesRoundTrip() + { + var groupId = Guid.NewGuid().ToString(); + var org = new OrganizationState(); + org.Groups.Add(new SessionGroup { Id = groupId, Name = "Test Preset", IsMultiAgent = true, OrchestratorMode = MultiAgentMode.OrchestratorReflect }); + org.Sessions.Add(new SessionMeta { SessionName = "orch-1", GroupId = groupId, Role = MultiAgentRole.Orchestrator, PreferredModel = "claude-opus-4.6" }); + org.Sessions.Add(new SessionMeta { SessionName = "worker-1", GroupId = groupId, Role = MultiAgentRole.Worker, PreferredModel = "gpt-5.1-codex" }); + + var json = JsonSerializer.Serialize(org); + var restored = JsonSerializer.Deserialize(json)!; + + var orchMeta = restored.Sessions.First(s => s.Role == MultiAgentRole.Orchestrator); + Assert.Equal("claude-opus-4.6", orchMeta.PreferredModel); + Assert.Equal(groupId, orchMeta.GroupId); + } + + [Fact] + public void PresetGroup_AllWorkers_HavePreferredModel() + { + var groupId = Guid.NewGuid().ToString(); + var org = new OrganizationState(); + org.Groups.Add(new SessionGroup { Id = groupId, Name = "Test Preset", IsMultiAgent = true, OrchestratorMode = MultiAgentMode.Broadcast }); + org.Sessions.Add(new SessionMeta { SessionName = "orch-1", GroupId = groupId, Role = MultiAgentRole.Orchestrator, PreferredModel = "claude-opus-4.6" }); + org.Sessions.Add(new SessionMeta { SessionName = "worker-1", GroupId = groupId, Role = MultiAgentRole.Worker, PreferredModel = "gpt-5.1-codex" }); + org.Sessions.Add(new SessionMeta { SessionName = "worker-2", GroupId = groupId, Role = MultiAgentRole.Worker, PreferredModel = "gpt-4.1" }); + + var json = JsonSerializer.Serialize(org); + var restored = JsonSerializer.Deserialize(json)!; + + var workers = restored.Sessions.Where(s => s.GroupId == groupId && s.Role != MultiAgentRole.Orchestrator).ToList(); + Assert.Equal(2, workers.Count); + Assert.All(workers, w => Assert.NotNull(w.PreferredModel)); + } + + [Fact] + public void CreateMultiAgentGroup_ManualSessions_PreservesExistingMetadata() + { + var svc = CreateService(); + + // Pre-create sessions with specific metadata + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "session-a", + PreferredModel = "gpt-5.1-codex" + }); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "session-b", + PreferredModel = "claude-sonnet-4.5" + }); + + var group = svc.CreateMultiAgentGroup("Team", + sessionNames: new List { "session-a", "session-b" }); + + var a = svc.Organization.Sessions.First(s => s.SessionName == "session-a"); + var b = svc.Organization.Sessions.First(s => s.SessionName == "session-b"); + + // Sessions should be in the group + Assert.Equal(group.Id, a.GroupId); + Assert.Equal(group.Id, b.GroupId); + // PreferredModel should be preserved + Assert.Equal("gpt-5.1-codex", a.PreferredModel); + Assert.Equal("claude-sonnet-4.5", b.PreferredModel); + } + + #endregion + + #region Bug #4: Mode Enum Completeness + + /// + /// Bug: Dashboard mode dropdowns were missing OrchestratorReflect entirely. + /// Ensure all enum values are present and serializable. + /// + [Fact] + public void MultiAgentMode_HasAllExpectedValues() + { + var values = Enum.GetValues(); + Assert.Contains(MultiAgentMode.Broadcast, values); + Assert.Contains(MultiAgentMode.Sequential, values); + Assert.Contains(MultiAgentMode.Orchestrator, values); + Assert.Contains(MultiAgentMode.OrchestratorReflect, values); + Assert.Equal(4, values.Length); + } + + [Fact] + public void MultiAgentMode_StringSerialization_AllValues() + { + // Important: modes serialize as strings (JsonStringEnumConverter), not ints + foreach (var mode in Enum.GetValues()) + { + var json = JsonSerializer.Serialize(mode); + var restored = JsonSerializer.Deserialize(json); + Assert.Equal(mode, restored); + // Verify it's a string, not a number + Assert.StartsWith("\"", json); + } + } + + [Fact] + public void MultiAgentRole_HasAllExpectedValues() + { + var values = Enum.GetValues(); + Assert.Contains(MultiAgentRole.Worker, values); + Assert.Contains(MultiAgentRole.Orchestrator, values); + Assert.Equal(2, values.Length); + } + + #endregion + + #region Bug #5: Reflection Loop Error Resilience + + /// + /// Bug: No try-catch around the reflection while loop body meant any exception + /// (e.g., from SendPromptAndWaitAsync) silently killed the entire async task. + /// + [Fact] + public void ReflectionCycle_ErrorRetry_DecrementsThenStalls() + { + // Simulates the error handling logic in SendViaOrchestratorReflectAsync catch block + var state = ReflectionCycle.Create("test", 10); + state.IsActive = true; + state.CurrentIteration = 3; + + // Simulate error: decrement iteration, increment stalls + state.CurrentIteration--; // retry same iteration + state.ConsecutiveStalls++; + Assert.Equal(2, state.CurrentIteration); + Assert.Equal(1, state.ConsecutiveStalls); + + // Second error + state.CurrentIteration--; + state.ConsecutiveStalls++; + Assert.Equal(1, state.CurrentIteration); + Assert.Equal(2, state.ConsecutiveStalls); + + // Third error β€” should trigger stall + state.ConsecutiveStalls++; + Assert.True(state.ConsecutiveStalls >= 3); + state.IsStalled = true; + Assert.True(state.IsStalled); + } + + [Fact] + public void ReflectionCycle_LoopConditions_AllChecked() + { + var state = ReflectionCycle.Create("test", 5); + + // Active + not paused + under max β†’ should continue + Assert.True(state.IsActive && !state.IsPaused && state.CurrentIteration < state.MaxIterations); + + // Paused β†’ should stop + state.IsPaused = true; + Assert.False(state.IsActive && !state.IsPaused && state.CurrentIteration < state.MaxIterations); + state.IsPaused = false; + + // At max iterations β†’ should stop + state.CurrentIteration = 5; + Assert.False(state.IsActive && !state.IsPaused && state.CurrentIteration < state.MaxIterations); + state.CurrentIteration = 0; + + // Not active β†’ should stop + state.IsActive = false; + Assert.False(state.IsActive && !state.IsPaused && state.CurrentIteration < state.MaxIterations); + } + + [Fact] + public void ReflectionCycle_CompletionSentinels_Detected() + { + // [[GROUP_REFLECT_COMPLETE]] sentinel + var response1 = "Analysis complete. [[GROUP_REFLECT_COMPLETE]] All tasks finished."; + Assert.Contains("[[GROUP_REFLECT_COMPLETE]]", response1, StringComparison.OrdinalIgnoreCase); + + // [[NEEDS_ITERATION]] sentinel β†’ score 0.4 + var response2 = "Progress made but [[NEEDS_ITERATION]] more work needed."; + var score = response2.Contains("[[NEEDS_ITERATION]]", StringComparison.OrdinalIgnoreCase) ? 0.4 : 0.7; + Assert.Equal(0.4, score); + + // No sentinel β†’ score 0.7 + var response3 = "Good progress on all fronts."; + score = response3.Contains("[[NEEDS_ITERATION]]", StringComparison.OrdinalIgnoreCase) ? 0.4 : 0.7; + Assert.Equal(0.7, score); + } + + #endregion + + #region Bug #6: TCS Ordering Invariant + + /// + /// Bug: TrySetResult was called BEFORE IsProcessing=false in CompleteResponse. + /// When the TCS continuation runs synchronously (reflection loop), the next + /// SendPromptAsync sees IsProcessing=true and throws. + /// + /// This test verifies the invariant at the model level: IsProcessing must be + /// the first thing cleared so any synchronous continuation sees clean state. + /// + [Fact] + public void IsProcessing_MustBeFalse_BeforeTCSCompletion() + { + // Simulate what CompleteResponse does: state transitions must be ordered + var isProcessing = true; + var tcs = new TaskCompletionSource(); + string? observedFromContinuation = null; + + // Add a synchronous continuation that checks IsProcessing + tcs.Task.ContinueWith(t => + { + observedFromContinuation = isProcessing ? "BUG: still processing" : "OK: not processing"; + }, TaskContinuationOptions.ExecuteSynchronously); + + // Correct order: clear IsProcessing FIRST, then complete TCS + isProcessing = false; + tcs.TrySetResult("response"); + + // Give continuation a chance to run + tcs.Task.Wait(TimeSpan.FromSeconds(1)); + + Assert.Equal("OK: not processing", observedFromContinuation); + } + + [Fact] + public void IsProcessing_BugReproduction_WrongOrder() + { + // Demonstrate that wrong order causes the bug + var isProcessing = true; + var tcs = new TaskCompletionSource(); + string? observedFromContinuation = null; + + tcs.Task.ContinueWith(t => + { + observedFromContinuation = isProcessing ? "BUG: still processing" : "OK: not processing"; + }, TaskContinuationOptions.ExecuteSynchronously); + + // WRONG order (the old bug): complete TCS while IsProcessing is still true + tcs.TrySetResult("response"); + isProcessing = false; + + tcs.Task.Wait(TimeSpan.FromSeconds(1)); + + // This would have been the bug β€” continuation sees stale state + Assert.Equal("BUG: still processing", observedFromContinuation); + } + + [Fact] + public void IsProcessing_ErrorPath_MustAlsoClearFirst() + { + // Same invariant for the error path (SessionErrorEvent handler) + var isProcessing = true; + var tcs = new TaskCompletionSource(); + bool? sawProcessing = null; + + tcs.Task.ContinueWith(t => + { + sawProcessing = isProcessing; + }, TaskContinuationOptions.ExecuteSynchronously); + + // Correct error path: clear IsProcessing, then set exception + isProcessing = false; + tcs.TrySetException(new Exception("test error")); + + try { tcs.Task.Wait(TimeSpan.FromSeconds(1)); } catch { } + + Assert.False(sawProcessing); + } + + #endregion + + #region Bug #7: Full Lifecycle - Delete and Recreate + + [Fact] + public void Lifecycle_DeleteGroup_ThenCreateNewGroup_NoContamination() + { + var svc = CreateService(); + + // Create first team + var group1 = svc.CreateMultiAgentGroup("Team Alpha", + mode: MultiAgentMode.OrchestratorReflect); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "alpha-orch", + GroupId = group1.Id, + Role = MultiAgentRole.Orchestrator, + PreferredModel = "claude-opus-4.6" + }); + + // Delete it + svc.DeleteGroup(group1.Id); + + // Create second team + var group2 = svc.CreateMultiAgentGroup("Team Beta", + mode: MultiAgentMode.Orchestrator); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "beta-orch", + GroupId = group2.Id, + Role = MultiAgentRole.Orchestrator, + PreferredModel = "gpt-5" + }); + + // Verify no cross-contamination + Assert.NotEqual(group1.Id, group2.Id); + var alpha = svc.Organization.Sessions.First(s => s.SessionName == "alpha-orch"); + var beta = svc.Organization.Sessions.First(s => s.SessionName == "beta-orch"); + Assert.Equal(SessionGroup.DefaultId, alpha.GroupId); // moved to default + Assert.Equal(group2.Id, beta.GroupId); // in new group + } + + [Fact] + public void Lifecycle_CreateTeam_SerializeDeserialize_DeleteTeam_Serialize() + { + var svc = CreateService(); + var group = svc.CreateMultiAgentGroup("QRC", + mode: MultiAgentMode.OrchestratorReflect, + worktreeId: "wt-1", repoId: "repo-1"); + + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "qrc-orch", GroupId = group.Id, + Role = MultiAgentRole.Orchestrator, PreferredModel = "claude-opus-4.6", WorktreeId = "wt-1" + }); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "qrc-w1", GroupId = group.Id, + PreferredModel = "gpt-4.1", WorktreeId = "wt-1" + }); + + // Serialize (app save) + var json1 = JsonSerializer.Serialize(svc.Organization, new JsonSerializerOptions { WriteIndented = true }); + + // Deserialize (app reload) + var restored = JsonSerializer.Deserialize(json1)!; + Assert.Contains(restored.Groups, g => g.Id == group.Id && g.IsMultiAgent); + Assert.Equal(2, restored.Sessions.Count(s => s.GroupId == group.Id)); + + // Delete the group + restored.Groups.RemoveAll(g => g.Id == group.Id); + foreach (var s in restored.Sessions.Where(s => s.GroupId == group.Id)) + s.GroupId = SessionGroup.DefaultId; + + // Serialize again + var json2 = JsonSerializer.Serialize(restored, new JsonSerializerOptions { WriteIndented = true }); + var final = JsonSerializer.Deserialize(json2)!; + + // Group should be gone, sessions in default with preserved metadata + Assert.DoesNotContain(final.Groups, g => g.Id == group.Id); + var orch = final.Sessions.First(s => s.SessionName == "qrc-orch"); + Assert.Equal(SessionGroup.DefaultId, orch.GroupId); + Assert.Equal(MultiAgentRole.Orchestrator, orch.Role); + Assert.Equal("claude-opus-4.6", orch.PreferredModel); + } + + #endregion + + #region Scenario: Full App Restart Simulation + + /// + /// Simulates what happens when the app restarts: + /// 1. Organization loaded from disk + /// 2. ReconcileOrganization runs with no active sessions + /// 3. Sessions restored + /// 4. ReconcileOrganization runs again + /// + /// Multi-agent groups must survive this entire sequence. + /// + [Fact] + public void Scenario_AppRestart_MultiAgentGroupSurvives() + { + // Phase 1: Create state that would exist on disk + var orgState = new OrganizationState(); + orgState.Groups.Add(new SessionGroup + { + Id = "ma-team", + Name = "Reflect Team", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.OrchestratorReflect, + WorktreeId = "wt-1", + RepoId = "repo-1", + SortOrder = 2 + }); + orgState.Sessions.Add(new SessionMeta + { + SessionName = "team-orch", GroupId = "ma-team", + Role = MultiAgentRole.Orchestrator, PreferredModel = "claude-opus-4.6", + WorktreeId = "wt-1" + }); + orgState.Sessions.Add(new SessionMeta + { + SessionName = "team-w1", GroupId = "ma-team", + PreferredModel = "gpt-5.1-codex", WorktreeId = "wt-1" + }); + orgState.Sessions.Add(new SessionMeta + { + SessionName = "team-w2", GroupId = "ma-team", + PreferredModel = "gpt-4.1", WorktreeId = "wt-1" + }); + orgState.Sessions.Add(new SessionMeta + { + SessionName = "regular-session", GroupId = SessionGroup.DefaultId + }); + + // Serialize to simulate disk + var json = JsonSerializer.Serialize(orgState, new JsonSerializerOptions { WriteIndented = true }); + + // Phase 2: Deserialize (LoadOrganization) + var restored = JsonSerializer.Deserialize(json)!; + + // Verify the multi-agent group survived deserialization + var maGroup = restored.Groups.FirstOrDefault(g => g.Id == "ma-team"); + Assert.NotNull(maGroup); + Assert.True(maGroup!.IsMultiAgent); + Assert.Equal(MultiAgentMode.OrchestratorReflect, maGroup.OrchestratorMode); + + // Phase 3: Simulate ReconcileOrganization with sessions from aliases + var repos = new List + { + new() { Id = "repo-1", Name = "Repo", Url = "https://github.com/test/repo" } + }; + var worktrees = new List + { + new() { Id = "wt-1", RepoId = "repo-1", Branch = "main", Path = "/tmp/wt-1" } + }; + var rm = CreateRepoManagerWithState(repos, worktrees); + var svc = CreateService(rm); + + // Load the state by manipulating the groups/sessions directly (simulates LoadOrganization) + foreach (var g in restored.Groups) + { + if (!svc.Organization.Groups.Any(og => og.Id == g.Id)) + svc.Organization.Groups.Add(g); + } + foreach (var s in restored.Sessions) + svc.Organization.Sessions.Add(s); + + // Register sessions as known (simulates alias file) + RegisterKnownSessions(svc, "team-orch", "team-w1", "team-w2", "regular-session"); + + // First reconciliation (called inside LoadOrganization, no active sessions yet) + svc.ReconcileOrganization(); + + // Verify multi-agent sessions survived + Assert.All( + svc.Organization.Sessions.Where(s => s.SessionName.StartsWith("team-")), + m => Assert.Equal("ma-team", m.GroupId)); + + // Second reconciliation (after sessions restored) + svc.ReconcileOrganization(); + + // Still intact + Assert.All( + svc.Organization.Sessions.Where(s => s.SessionName.StartsWith("team-")), + m => Assert.Equal("ma-team", m.GroupId)); + + // Multi-agent group still exists + Assert.Contains(svc.Organization.Groups, g => g.Id == "ma-team" && g.IsMultiAgent); + } + + /// + /// Verify that reconciliation handles a mix of multi-agent and regular sessions + /// without moving any multi-agent session to a repo group. + /// + [Fact] + public void Scenario_MixedSessions_ReconcileDoesNotScatter() + { + var repos = new List + { + new() { Id = "repo-1", Name = "PolyPilot", Url = "https://github.com/test/repo" } + }; + var worktrees = new List + { + new() { Id = "wt-1", RepoId = "repo-1", Branch = "main", Path = "/tmp/wt-1" }, + new() { Id = "wt-2", RepoId = "repo-1", Branch = "feature", Path = "/tmp/wt-2" } + }; + var rm = CreateRepoManagerWithState(repos, worktrees); + var svc = CreateService(rm); + var repoGroup = svc.GetOrCreateRepoGroup("repo-1", "PolyPilot"); + + // Multi-agent group for wt-1 + var maGroup = svc.CreateMultiAgentGroup("Team", worktreeId: "wt-1", repoId: "repo-1"); + + // Multi-agent sessions + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "ma-orch", GroupId = maGroup.Id, + Role = MultiAgentRole.Orchestrator, PreferredModel = "claude-opus-4.6", WorktreeId = "wt-1" + }); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "ma-w1", GroupId = maGroup.Id, + PreferredModel = "gpt-5.1-codex", WorktreeId = "wt-1" + }); + + // Regular session on same worktree in repo group + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "regular-1", GroupId = repoGroup.Id, WorktreeId = "wt-1" + }); + + // Regular session in default + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "regular-default", GroupId = SessionGroup.DefaultId + }); + + RegisterKnownSessions(svc, "ma-orch", "ma-w1", "regular-1", "regular-default"); + svc.ReconcileOrganization(); + + // Multi-agent sessions: still in multi-agent group + Assert.Equal(maGroup.Id, svc.Organization.Sessions.First(s => s.SessionName == "ma-orch").GroupId); + Assert.Equal(maGroup.Id, svc.Organization.Sessions.First(s => s.SessionName == "ma-w1").GroupId); + + // Regular sessions: unchanged + Assert.Equal(repoGroup.Id, svc.Organization.Sessions.First(s => s.SessionName == "regular-1").GroupId); + Assert.Equal(SessionGroup.DefaultId, svc.Organization.Sessions.First(s => s.SessionName == "regular-default").GroupId); + } + + #endregion + + #region Scenario: wasMultiAgent Heuristic Correctness + + [Theory] + [InlineData(MultiAgentRole.Orchestrator, null, true)] // Orchestrator role β†’ multi-agent + [InlineData(MultiAgentRole.Worker, "gpt-5.1-codex", true)] // Worker with PreferredModel β†’ multi-agent + [InlineData(MultiAgentRole.Worker, null, false)] // Plain worker β†’ not multi-agent + public void WasMultiAgent_Heuristic_CorrectForAllCombinations( + MultiAgentRole role, string? preferredModel, bool expectedWasMultiAgent) + { + var meta = new SessionMeta + { + SessionName = "test", + Role = role, + PreferredModel = preferredModel + }; + + bool wasMultiAgent = meta.Role == MultiAgentRole.Orchestrator || meta.PreferredModel != null; + Assert.Equal(expectedWasMultiAgent, wasMultiAgent); + } + + #endregion + + #region Scenario: Stall Detection Alignment + + /// + /// Both single-agent and multi-agent stall detection must use + /// 2-consecutive-stalls tolerance (not break on first). + /// + [Fact] + public void StallDetection_ConsecutiveToleranceIs2() + { + var cycle = ReflectionCycle.Create("test"); + cycle.IsActive = true; + + // 1st stall β€” warning only + cycle.Advance("same response"); + cycle.Advance("same response"); + Assert.Equal(1, cycle.ConsecutiveStalls); + Assert.False(cycle.IsStalled); + + // 2nd stall β€” stops + cycle.Advance("same response"); + Assert.Equal(2, cycle.ConsecutiveStalls); + Assert.True(cycle.IsStalled); + } + + [Fact] + public void StallDetection_ResetOnDifferentContent() + { + var cycle = ReflectionCycle.Create("test"); + cycle.IsActive = true; + + cycle.Advance("response A"); + cycle.Advance("response A"); // 1st stall + Assert.Equal(1, cycle.ConsecutiveStalls); + + cycle.Advance("completely different response B"); // resets + Assert.Equal(0, cycle.ConsecutiveStalls); + Assert.False(cycle.IsStalled); + } + + #endregion +} From 69d8996bcb1ea31f46c04d769633025a46236279 Mon Sep 17 00:00:00 2001 From: Shane Date: Thu, 19 Feb 2026 12:56:52 -0600 Subject: [PATCH 35/48] Add multi-agent architecture spec and executable test scenarios - docs/multi-agent-orchestration.md: Full spec covering OrchestratorReflect loop, all 4 modes, sentinel protocol, stall detection, TCS ordering invariant, reconciliation rules, error handling, and testing guide - PolyPilot.Tests/Scenarios/multi-agent-scenarios.json: 10 executable CDP scenarios covering reflection loop, stall detection, reconciliation, broadcast, preset creation, and TCS ordering regression - .github/copilot-instructions.md: Updated pointer to docs and scenarios Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/copilot-instructions.md | 2 + .../Scenarios/multi-agent-scenarios.json | 174 ++++++++++ docs/multi-agent-orchestration.md | 301 ++++++++++++++++++ 3 files changed, 477 insertions(+) create mode 100644 PolyPilot.Tests/Scenarios/multi-agent-scenarios.json create mode 100644 docs/multi-agent-orchestration.md diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 3c545f4153..d4725ac9dc 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -56,6 +56,8 @@ For Android, always run `adb reverse tcp:9223 tcp:9223` after deploy. ## Architecture +**See `docs/multi-agent-orchestration.md` for the multi-agent architecture spec** (orchestration modes, reflection loop, sentinel protocol, invariants). Test scenarios in `PolyPilot.Tests/Scenarios/multi-agent-scenarios.json`. Read these before modifying orchestration, reconciliation, or TCS completion logic. + This is a .NET MAUI Blazor Hybrid app targeting Mac Catalyst, Android, and iOS. It manages multiple GitHub Copilot CLI sessions through a native GUI. ### Three-Layer Stack diff --git a/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json b/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json new file mode 100644 index 0000000000..0d017aae36 --- /dev/null +++ b/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json @@ -0,0 +1,174 @@ +{ + "description": "Multi-agent orchestration scenarios for PolyPilot. Tests cover the OrchestratorReflect loop, stall detection, reconciliation stability, and group lifecycle. Each scenario can be executed against a running app using MauiDevFlow CDP commands. See docs/multi-agent-orchestration.md for the architecture spec.", + "prerequisites": { + "build": "cd PolyPilot && .\\relaunch.ps1", + "waitForAgent": "maui-devflow MAUI status", + "initialMode": "Persistent", + "notes": "App must be in Persistent or Demo mode. Multi-agent features require at least one worktree configured." + }, + "scenarios": [ + { + "id": "reflect-loop-completes-goal-met", + "name": "OrchestratorReflect loop runs to goal completion", + "description": "Verifies the full plan-dispatch-collect-evaluate loop runs and exits when the evaluator signals [[GROUP_REFLECT_COMPLETE]] or scores >= 0.9.", + "invariants": [ + "ReflectionState.GoalMet == true on exit", + "ReflectionState.IsActive == false on exit", + "ReflectionState.CurrentIteration >= 1", + "All workers received prompts containing the original user request" + ], + "steps": [ + { "action": "navigate", "route": "/multi-agent" }, + { "action": "createGroup", "mode": "OrchestratorReflect", "workers": 2, "maxIterations": 3 }, + { "action": "sendPrompt", "text": "Analyze the project structure and suggest improvements" }, + { "action": "waitForPhase", "phase": "Planning", "timeout": 30 }, + { "action": "waitForPhase", "phase": "Dispatching", "timeout": 60 }, + { "action": "waitForPhase", "phase": "WaitingForWorkers", "timeout": 120 }, + { "action": "waitForPhase", "phase": "Synthesizing", "timeout": 60 }, + { "action": "waitForPhase", "phase": "Complete", "timeout": 600 }, + { "action": "assertReflectionState", "field": "IsActive", "expected": false }, + { "action": "assertReflectionState", "field": "CurrentIteration", "operator": ">=", "value": 1 } + ] + }, + { + "id": "reflect-loop-max-iterations", + "name": "OrchestratorReflect stops at MaxIterations", + "description": "Verifies the loop exits when MaxIterations is reached without the goal being met.", + "invariants": [ + "ReflectionState.CurrentIteration == MaxIterations on exit", + "ReflectionState.GoalMet == false", + "ReflectionState.IsActive == false" + ], + "steps": [ + { "action": "createGroup", "mode": "OrchestratorReflect", "workers": 1, "maxIterations": 2 }, + { "action": "sendPrompt", "text": "Write a perfect novel (intentionally impossible in 2 iterations)" }, + { "action": "waitForPhase", "phase": "Complete", "timeout": 600 }, + { "action": "assertReflectionState", "field": "CurrentIteration", "expected": 2 }, + { "action": "assertReflectionState", "field": "GoalMet", "expected": false } + ] + }, + { + "id": "stall-detection-triggers", + "name": "Stall detection fires after 2 consecutive similar responses", + "description": "Verifies that if the orchestrator synthesis is >90% similar (Jaccard) for 2 consecutive iterations, the loop stops with IsStalled=true.", + "invariants": [ + "ReflectionState.IsStalled == true on exit", + "ReflectionState.ConsecutiveStalls >= 2", + "ReflectionState.LastSimilarity > 0.9" + ], + "steps": [ + { "action": "createGroup", "mode": "OrchestratorReflect", "workers": 1, "maxIterations": 10 }, + { "action": "sendPrompt", "text": "Repeat the same analysis over and over" }, + { "action": "waitForPhase", "phase": "Complete", "timeout": 600 }, + { "action": "assertReflectionState", "field": "IsStalled", "expected": true } + ] + }, + { + "id": "group-survives-restart", + "name": "Multi-agent group persists across app restart", + "description": "Verifies that after creating a multi-agent group, killing the app, and relaunching, the group still exists with all sessions correctly assigned.", + "invariants": [ + "Group.IsMultiAgent == true after restart", + "All sessions retain their GroupId", + "Orchestrator session retains Role == Orchestrator", + "Worker sessions retain PreferredModel values", + "No sessions scattered to repo groups" + ], + "steps": [ + { "action": "createGroup", "mode": "OrchestratorReflect", "name": "Restart Test", "workers": 2 }, + { "action": "captureGroupState", "capture": "beforeRestart" }, + { "action": "restartApp" }, + { "action": "waitForAgent", "timeout": 120 }, + { "action": "captureGroupState", "capture": "afterRestart" }, + { "action": "assertEqual", "left": "beforeRestart.groupCount", "right": "afterRestart.groupCount" }, + { "action": "assertEqual", "left": "beforeRestart.sessionNames", "right": "afterRestart.sessionNames" }, + { "action": "assertOrgJson", "check": "noSessionsInDefaultWithMultiAgentMarkers" } + ] + }, + { + "id": "reconciliation-protects-multi-agent", + "name": "Reconciliation does not scatter multi-agent sessions", + "description": "Verifies that ReconcileOrganization() does not move sessions with Role=Orchestrator or PreferredModel!=null out of their multi-agent group into repo groups.", + "invariants": [ + "Sessions with IsMultiAgent group membership are never auto-moved", + "Orphaned sessions with Role==Orchestrator or PreferredModel!=null stay in _default", + "Regular sessions (no markers) ARE auto-moved to repo groups normally" + ], + "steps": [ + { "action": "createGroup", "mode": "Orchestrator", "workers": 2 }, + { "action": "readOrgJson", "capture": "orgBefore" }, + { "action": "restartApp" }, + { "action": "readOrgJson", "capture": "orgAfter" }, + { "action": "assertGroupMembership", "unchanged": true } + ] + }, + { + "id": "delete-group-no-contamination", + "name": "Deleted group sessions don't contaminate new groups", + "description": "Verifies that deleting a multi-agent group and creating a new one produces a clean group with no leftover sessions from the old one.", + "steps": [ + { "action": "createGroup", "mode": "OrchestratorReflect", "name": "Group A", "workers": 2 }, + { "action": "captureGroupState", "capture": "groupA" }, + { "action": "deleteGroup", "name": "Group A" }, + { "action": "createGroup", "mode": "OrchestratorReflect", "name": "Group B", "workers": 2 }, + { "action": "captureGroupState", "capture": "groupB" }, + { "action": "assertNoOverlap", "left": "groupA.sessionNames", "right": "groupB.sessionNames" } + ] + }, + { + "id": "broadcast-mode-all-receive", + "name": "Broadcast mode sends to all sessions", + "description": "Verifies that in Broadcast mode, the same prompt is sent to all sessions simultaneously.", + "steps": [ + { "action": "createGroup", "mode": "Broadcast", "workers": 3 }, + { "action": "sendPrompt", "text": "Hello from broadcast" }, + { "action": "waitForAllSessions", "state": "idle", "timeout": 120 }, + { "action": "assertAllSessionsReceived", "text": "Hello from broadcast" } + ] + }, + { + "id": "orchestrator-single-pass", + "name": "Orchestrator mode runs one pass without iteration", + "description": "Verifies that Orchestrator (non-reflect) mode plans, dispatches, collects, and synthesizes exactly once.", + "steps": [ + { "action": "createGroup", "mode": "Orchestrator", "workers": 2 }, + { "action": "sendPrompt", "text": "Review this code" }, + { "action": "waitForPhase", "phase": "Complete", "timeout": 300 }, + { "action": "assertOrchestratorSynthesized" }, + { "action": "assertNoReflectionLoop", "note": "Should not have iterated" } + ] + }, + { + "id": "tcs-ordering-reflection-continues", + "name": "Reflection loop continues past iteration 1 (TCS ordering invariant)", + "description": "Regression test for the bug where IsProcessing was set to false AFTER TrySetResult, causing the next SendPromptAsync to throw. The loop must reach at least iteration 2.", + "invariants": [ + "IsProcessing = false BEFORE TrySetResult in CompleteResponse", + "ReflectionState.CurrentIteration >= 2 (proves loop continued)" + ], + "steps": [ + { "action": "createGroup", "mode": "OrchestratorReflect", "workers": 1, "maxIterations": 3 }, + { "action": "sendPrompt", "text": "Iterate on this multiple times" }, + { "action": "waitForPhase", "phase": "Complete", "timeout": 600 }, + { "action": "assertReflectionState", "field": "CurrentIteration", "operator": ">=", "value": 2 } + ] + }, + { + "id": "preset-creates-correct-markers", + "name": "Group preset sets Role and PreferredModel on all sessions", + "description": "Verifies that CreateGroupFromPresetAsync correctly sets Role=Orchestrator on the orchestrator session and PreferredModel on all sessions.", + "invariants": [ + "Orchestrator session has Role == Orchestrator", + "Orchestrator session has PreferredModel == preset.OrchestratorModel", + "Worker sessions have PreferredModel == preset.WorkerModels[i]", + "All sessions have GroupId matching the new group" + ], + "steps": [ + { "action": "createGroupFromPreset", "preset": "Quick Reflection Cycle" }, + { "action": "readOrgJson", "capture": "org" }, + { "action": "assertSessionMeta", "role": "Orchestrator", "hasPreferredModel": true }, + { "action": "assertAllWorkers", "havePreferredModel": true } + ] + } + ] +} diff --git a/docs/multi-agent-orchestration.md b/docs/multi-agent-orchestration.md new file mode 100644 index 0000000000..62a1a21a4f --- /dev/null +++ b/docs/multi-agent-orchestration.md @@ -0,0 +1,301 @@ +# Multi-Agent Orchestration β€” Architecture Spec + +> **Read this before modifying orchestration, sentinel protocol, session reconciliation, or reflection loops.** + +## Overview + +PolyPilot's multi-agent system lets you create a **team of AI sessions** that work together. Each session can use a different AI model. An orchestrator coordinates work dispatch, response collection, and quality evaluation. + +### Key Files + +| File | Purpose | +|------|---------| +| `PolyPilot/Services/CopilotService.Organization.cs` | Orchestration engine (dispatch, reflection loop, reconciliation) | +| `PolyPilot/Models/SessionOrganization.cs` | `SessionGroup`, `SessionMeta`, `MultiAgentMode`, `MultiAgentRole` | +| `PolyPilot/Models/ReflectionCycle.cs` | Reflection state, stall detection, sentinel parsing, evaluator prompts | +| `PolyPilot/Services/CopilotService.Events.cs` | TCS completion (IsProcessing β†’ TrySetResult ordering) | +| `PolyPilot.Tests/MultiAgentRegressionTests.cs` | 30 regression tests covering all known bugs | +| `PolyPilot.Tests/SessionOrganizationTests.cs` | 14 grouping stability tests | +| `PolyPilot.Tests/Scenarios/multi-agent-scenarios.json` | Executable CDP test scenarios | + +--- + +## Orchestration Modes + +### Broadcast +Same prompt sent to **all sessions simultaneously**. No orchestrator. Each session responds independently. Use for: comparing model outputs, getting diverse perspectives. + +### Sequential +Prompt sent to sessions **one at a time**. Each session sees previous responses. Use for: chain-of-thought across models, iterative refinement. + +### Orchestrator (Single-Pass) +One orchestrator session plans and delegates: +1. **Plan** β€” Orchestrator receives user prompt + list of available workers with their models +2. **Dispatch** β€” Orchestrator emits `@worker:name task` assignments, parsed by `ParseTaskAssignments` +3. **Collect** β€” Workers execute in parallel (`Task.WhenAll`), each with 10-min timeout +4. **Synthesize** β€” Worker results sent back to orchestrator for final synthesis + +No iteration. One pass through the loop. + +### OrchestratorReflect (Iterative β€” The Main Mode) +Same as Orchestrator but **loops** until the goal is met, quality stalls, or max iterations reached. This is the primary mode for serious multi-agent work. + +--- + +## OrchestratorReflect β€” Detailed Loop + +### Participants +- **1 Orchestrator** β€” Plans, delegates, synthesizes. Set via `SessionMeta.Role = Orchestrator` +- **N Workers** β€” Execute assigned tasks in parallel. Each can use a different model (`SessionMeta.PreferredModel`) +- **1 Evaluator** (optional) β€” Independent quality judge on a separate model (`ReflectionCycle.EvaluatorSessionName`) + +### The Loop (runs in `SendViaOrchestratorReflectAsync`) + +``` +while (IsActive && !IsPaused && CurrentIteration < MaxIterations): + CurrentIteration++ + + Phase 1: PLAN + β”œβ”€β”€ Iteration 1: BuildOrchestratorPlanningPrompt(userPrompt, workerNames) + └── Iteration 2+: BuildReplanPrompt(lastEvaluation, workerNames, userPrompt) + + Orchestrator responds with task assignments: + @worker:worker-1 Implement the auth module + @worker:worker-2 Write tests for the auth module + + ParseTaskAssignments extracts these β†’ List + If no assignments parsed β†’ orchestrator decided goal is met β†’ break + + Phase 2: DISPATCH + └── Send each assignment to its worker in parallel (Task.WhenAll) + Each worker gets: "You are a worker agent..." + original prompt + assigned task + + Phase 3: COLLECT + └── Wait for all workers (SendPromptAndWaitAsync, 10-min timeout per worker) + Returns List (response, success, duration) + + Phase 4: EVALUATE (two paths) + β”œβ”€β”€ WITH dedicated evaluator: + β”‚ β”œβ”€β”€ Orchestrator synthesizes worker results + β”‚ β”œβ”€β”€ Evaluator scores quality (0.0–1.0) with rationale + β”‚ β”œβ”€β”€ Score β‰₯ 0.9 or [[GROUP_REFLECT_COMPLETE]] β†’ goal met β†’ break + β”‚ └── RecordEvaluation tracks trend (Improving/Stable/Degrading) + β”‚ + └── SELF-evaluation (no evaluator): + β”œβ”€β”€ Orchestrator gets combined synthesis + eval prompt + β”œβ”€β”€ [[GROUP_REFLECT_COMPLETE]] sentinel β†’ goal met β†’ break + └── [[NEEDS_ITERATION]] sentinel β†’ scored as 0.4, continue + + Phase 5: STALL DETECTION + β”œβ”€β”€ CheckStall() compares synthesis response to previous + β”œβ”€β”€ Jaccard token similarity > 0.9 β†’ stall detected + β”œβ”€β”€ 1st consecutive stall: warn but continue + └── 2nd consecutive stall: IsStalled = true β†’ break + + Phase 6: AUTO-ADJUST + └── AutoAdjustFromFeedback analyzes worker results, may suggest model changes + + SaveOrganization() after each iteration +``` + +### Exit Conditions (whichever hits first) + +| Condition | How Detected | State | +|-----------|-------------|-------| +| βœ… Goal met | Evaluator score β‰₯ 0.9 or `[[GROUP_REFLECT_COMPLETE]]` sentinel | `GoalMet = true` | +| ⏱️ Max iterations | `CurrentIteration >= MaxIterations` | `IsActive = false` | +| ⚠️ Stalled | 2 consecutive responses with >90% Jaccard similarity | `IsStalled = true` | +| ⚠️ Error budget | 3 consecutive errors within a single iteration | `IsStalled = true` | +| πŸ›‘ Cancelled | CancellationToken triggered | `OperationCanceledException` | +| ⏸️ Paused | User set `IsPaused = true` | Loop condition fails | + +--- + +## Invariants β€” What Breaks If You Violate These + +### 1. TCS Ordering: `IsProcessing = false` BEFORE `TrySetResult` + +**Where:** `CopilotService.Events.cs` β†’ `CompleteResponse()` and `SessionErrorEvent` handler + +**The rule:** When completing a response via the TaskCompletionSource (TCS), you MUST set `IsProcessing = false` BEFORE calling `TrySetResult()` or `TrySetException()`. + +**Why:** In reflection loops, the TCS continuation runs **synchronously**. The next `SendPromptAsync` in the loop checks `IsProcessing` β€” if it's still `true`, it throws "already processing". This killed reflection loops after 1 iteration. + +```csharp +// βœ… CORRECT ORDER +state.IsProcessing = false; // 1. Clear flag first +state.ResponseCompletion?.TrySetResult(response); // 2. Then signal completion + +// ❌ WRONG β€” breaks reflection loops +state.ResponseCompletion?.TrySetResult(response); // Continuation runs NOW +state.IsProcessing = false; // Too late β€” next SendPromptAsync already threw +``` + +**Same rule applies to error paths** (`TrySetException`). + +### 2. Reconciliation Must Not Scatter Multi-Agent Sessions + +**Where:** `CopilotService.Organization.cs` β†’ `ReconcileOrganization()` + +**The rule:** Sessions that belong to multi-agent groups must NOT be auto-moved to repo groups during reconciliation. Two protections: + +1. **Active group members**: If a session's `GroupId` matches any `IsMultiAgent` group, skip it +2. **Orphaned multi-agent sessions** (group was deleted): If `Role == Orchestrator` or `PreferredModel != null`, don't auto-move to repo groups β€” these markers indicate the session was part of a multi-agent group + +**Why:** Reconciliation runs twice on startup (once in `LoadOrganization`, once after `RestorePreviousSessionsAsync`). Without protection, it redistributes multi-agent sessions across repo-based groups, destroying the team. + +### 3. Never Edit `organization.json` While the App Is Running + +**Why:** The app calls `SaveOrganization()` from ~30 places, constantly overwriting the file with its in-memory state. Any external edits are lost within seconds. To fix organization state: kill app β†’ edit file β†’ relaunch. + +### 4. Sentinel Protocol Is Case-Insensitive But Must Be on Its Own Line + +**Sentinels:** +- `[[GROUP_REFLECT_COMPLETE]]` β€” Goal achieved, stop iterating +- `[[NEEDS_ITERATION]]` β€” More work needed, continue +- `[[REFLECTION_COMPLETE]]` β€” Single-agent reflection goal met + +**Detection:** `StringComparison.OrdinalIgnoreCase` for multi-agent; strict regex `^\s*\[\[REFLECTION_COMPLETE\]\]\s*$` (multiline) for single-agent. + +### 5. Worker Prompt Must Include Original User Request + +**Where:** `ExecuteWorkerAsync` (line ~772) + +**Why:** Workers receive only their assigned subtask from the orchestrator. Without the original user request as context, they can't understand the broader goal. The prompt format is: + +``` +You are a worker agent. Complete the following task thoroughly. + +## Original User Request (context) +{originalPrompt} + +## Your Assigned Task +{task} +``` + +--- + +## Stall Detection + +Two mechanisms, both in `ReflectionCycle.CheckStall()`: + +1. **Exact hash match** β€” Sliding window of last 5 response hashes. If current hash matches any β†’ stall. +2. **Jaccard token similarity** β€” Tokenize current and previous response by whitespace. If intersection/union > 0.9 β†’ stall. + +**Tolerance:** 2 consecutive stalls required before stopping. First stall generates a warning. This prevents false positives from models that happen to produce similar phrasing once. + +**Reset:** `ResetStallDetection()` clears history. Called when resuming from pause. + +--- + +## Quality Trend Tracking + +`ReflectionCycle.EvaluationHistory` records per-iteration: +- `Score` (0.0–1.0) +- `Rationale` (string) +- `EvaluatorModel` (which model evaluated) +- `Timestamp` + +`RecordEvaluation()` returns a `QualityTrend`: +- **Improving** β€” Latest score > previous + 0.1 +- **Stable** β€” Within Β±0.1 +- **Degrading** β€” Latest score < previous - 0.1 + +Degrading trend triggers a `PendingAdjustments` warning suggesting model changes. + +--- + +## Session Organization & Persistence + +### Data Model + +``` +OrganizationState +β”œβ”€β”€ Groups: List +β”‚ β”œβ”€β”€ Id (GUID string) +β”‚ β”œβ”€β”€ Name +β”‚ β”œβ”€β”€ IsMultiAgent (bool) +β”‚ β”œβ”€β”€ OrchestratorMode (Broadcast/Sequential/Orchestrator/OrchestratorReflect) +β”‚ β”œβ”€β”€ OrchestratorPrompt (optional system prompt for orchestrator) +β”‚ β”œβ”€β”€ ReflectionState: ReflectionCycle? (active cycle state) +β”‚ β”œβ”€β”€ WorktreeId, RepoId (links to repo/worktree) +β”‚ └── SortOrder +β”‚ +└── Sessions: List + β”œβ”€β”€ SessionName + β”œβ”€β”€ GroupId (β†’ SessionGroup.Id) + β”œβ”€β”€ Role (Worker/Orchestrator) + β”œβ”€β”€ PreferredModel (e.g., "claude-opus-4.6") + β”œβ”€β”€ WorktreeId + └── IsPinned, ManualOrder +``` + +### Persistence Flow +- **File:** `~/.polypilot/organization.json` +- **Save:** `SaveOrganization()` called from ~30 places (group CRUD, session moves, reflection state updates) +- **Load:** `LoadOrganization()` on startup β†’ deserialize β†’ `ReconcileOrganization()` +- **Reconciliation:** Matches sessions to repo groups by `WorktreeId`/`RepoId`, prunes stale groups, protects multi-agent sessions + +### Group Presets +`CreateGroupFromPresetAsync(GroupPreset)` creates a full team: +1. Creates `SessionGroup` with mode and metadata +2. Creates orchestrator session with `Role = Orchestrator`, `PreferredModel` set +3. Creates N worker sessions with `PreferredModel` set per worker +4. All sessions get `WorktreeId` if provided + +**Critical:** Both `Role` and `PreferredModel` must be set on all sessions. These are the markers that `ReconcileOrganization` uses to identify multi-agent sessions. Without them, sessions get scattered on restart. + +--- + +## Error Handling in Reflection Loops + +``` +try { + // ... full iteration (plan β†’ dispatch β†’ collect β†’ evaluate) +} +catch (OperationCanceledException) { throw; } // User cancellation propagates +catch (Exception ex) { + CurrentIteration--; // Retry same iteration, don't skip ahead + ConsecutiveStalls++; // Borrow stall counter as error counter + if (ConsecutiveStalls >= 3) { + IsStalled = true; // Give up after 3 retries + break; + } + await Task.Delay(2000); // Back off before retry +} +``` + +This prevents a single transient error (network hiccup, model timeout) from killing the entire reflection cycle. + +--- + +## Task Assignment Protocol + +The orchestrator's planning prompt tells it to emit assignments in this format: + +``` +@worker:worker-name-1 Description of the task for this worker +@worker:worker-name-2 Description of the task for this worker +``` + +`ParseTaskAssignments` uses regex `@worker:(\S+)\s*([\s\S]*?)(?:@end|(?=@worker:)|$)` to extract these. Workers are matched against the `availableWorkers` list (case-insensitive, fuzzy-matched). + +If no `@worker:` assignments are found, the orchestrator handled the request directly and the loop exits. + +--- + +## Testing + +### Unit Tests +- **`MultiAgentRegressionTests.cs`** (30 tests) β€” JSON corruption, reconciliation scattering, preset markers, mode enums, reflection loop logic, TCS ordering, lifecycle scenarios +- **`SessionOrganizationTests.cs`** β†’ `GroupingStabilityTests` (14 tests) β€” JSON round-trips, delete+reconcile, orphan handling + +### Executable Scenarios +- **`PolyPilot.Tests/Scenarios/multi-agent-scenarios.json`** β€” CDP-based scenarios for MauiDevFlow testing against a running app + +### What to Test After Changes +1. **Changed orchestration logic?** β†’ Run `MultiAgentRegressionTests` +2. **Changed reconciliation?** β†’ Run `GroupingStabilityTests` +3. **Changed TCS/event handling?** β†’ Run `ProcessingWatchdogTests` + verify reflection loop completes +4. **Changed sentinel parsing?** β†’ Run `ReflectionCycleTests` +5. **Changed session persistence?** β†’ Run full suite, verify `organization.json` survives restart From fc23388cefca50818ce887b09162244f020cf078 Mon Sep 17 00:00:00 2001 From: Shane Date: Thu, 19 Feb 2026 14:18:21 -0600 Subject: [PATCH 36/48] Fix session resume killing active turns after 10 seconds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 10-second hardcoded timeout in ResumeSessionAsync was prematurely clearing IsProcessing on sessions that were still actively working. Tool calls (dotnet build, git push, etc.) can easily go 30-60 seconds between events, causing the resume logic to declare the turn dead. Changes: - Remove the 10-second resume timeout entirely β€” the processing watchdog (120s inactivity / 600s tool execution) already handles stuck sessions properly - Move event handler subscription (copilotSession.On) BEFORE the watchdog setup to fix a race where events arriving immediately after SDK resume were missed because the handler wasn't wired yet Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot/Services/CopilotService.cs | 31 +++++++--------------------- 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/PolyPilot/Services/CopilotService.cs b/PolyPilot/Services/CopilotService.cs index 717b3e6e96..57dee21aa9 100644 --- a/PolyPilot/Services/CopilotService.cs +++ b/PolyPilot/Services/CopilotService.cs @@ -1161,8 +1161,13 @@ public async Task ResumeSessionAsync(string sessionId, string Info = info }; - // If still processing, set up ResponseCompletion so events flow properly - // but add a timeout β€” if no new events arrive, the old turn is gone + // Wire up event handler BEFORE starting watchdog/timeout so events + // arriving immediately after SDK resume are not missed. + copilotSession.On(evt => HandleSessionEvent(state, evt)); + + // If still processing, set up ResponseCompletion so events flow properly. + // The processing watchdog (120s inactivity / 600s tool timeout) handles + // stuck sessions β€” no separate short timeout needed. if (isStillProcessing) { state.ResponseCompletion = new TaskCompletionSource(); @@ -1171,30 +1176,8 @@ public async Task ResumeSessionAsync(string sessionId, string // Start the processing watchdog so the session doesn't get stuck // forever if the CLI goes silent after resume (same as SendPromptAsync). StartProcessingWatchdog(state, displayName); - - _ = Task.Run(async () => - { - await Task.Delay(TimeSpan.FromSeconds(10)); - // Marshal all state mutations to the UI thread to avoid racing with - // HandleSessionEvent / CompleteResponse (same pattern as the watchdog). - InvokeOnUI(() => - { - if (state.Info.IsProcessing && !Volatile.Read(ref state.HasReceivedEventsSinceResume)) - { - Debug($"Session '{displayName}' processing timeout β€” no new events after resume, clearing stale state"); - CancelProcessingWatchdog(state); - state.Info.IsProcessing = false; - Interlocked.Exchange(ref state.ActiveToolCallCount, 0); - state.ResponseCompletion?.TrySetResult("timeout"); - state.Info.History.Add(ChatMessage.SystemMessage("⏹ Previous turn appears to have ended. Ready for new input.")); - OnStateChanged?.Invoke(); - } - }); - }); } - copilotSession.On(evt => HandleSessionEvent(state, evt)); - if (!_sessions.TryAdd(displayName, state)) { try { await copilotSession.DisposeAsync(); } catch { } From 38a210172104b53c949f12a630181c25dd03c545 Mon Sep 17 00:00:00 2001 From: Shane Date: Thu, 19 Feb 2026 14:28:19 -0600 Subject: [PATCH 37/48] Fix watchdog using 120s timeout instead of 600s during tool-call loops The processing watchdog was incorrectly using the 120s inactivity timeout even when the session was actively running multi-turn tool calls. This happened because AssistantTurnStartEvent resets ActiveToolCallCount to 0 between tool rounds, making the model's 'thinking' gap between tools look like inactivity. Added HasUsedToolsThisTurn flag that stays true for the entire processing cycle once any tool executes. The watchdog now uses the 600s tool timeout when: a tool is actively running (hasActiveTool), the session was resumed mid-turn (IsResumed), or tools have been used this turn (HasUsedToolsThisTurn). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot/Services/CopilotService.Events.cs | 17 +++++++++++++++-- PolyPilot/Services/CopilotService.cs | 5 +++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/PolyPilot/Services/CopilotService.Events.cs b/PolyPilot/Services/CopilotService.Events.cs index 86076cec5c..45f9efeab0 100644 --- a/PolyPilot/Services/CopilotService.Events.cs +++ b/PolyPilot/Services/CopilotService.Events.cs @@ -275,6 +275,7 @@ void Invoke(Action action) case ToolExecutionStartEvent toolStart: if (toolStart.Data == null) break; Interlocked.Increment(ref state.ActiveToolCallCount); + state.HasUsedToolsThisTurn = true; var startToolName = toolStart.Data.ToolName ?? "unknown"; var startCallId = toolStart.Data.ToolCallId ?? ""; var toolInput = ExtractToolInput(toolStart.Data); @@ -643,6 +644,7 @@ private void CompleteResponse(SessionState state, long? expectedGeneration = nul $"(responseLen={state.CurrentResponse.Length}, thread={Environment.CurrentManagedThreadId})"); CancelProcessingWatchdog(state); + state.HasUsedToolsThisTurn = false; var response = state.CurrentResponse.ToString(); if (!string.IsNullOrEmpty(response)) { @@ -1099,13 +1101,23 @@ private async Task RunProcessingWatchdogAsync(SessionState state, string session var lastEventTicks = Interlocked.Read(ref state.LastEventAtTicks); var elapsed = (DateTime.UtcNow - new DateTime(lastEventTicks)).TotalSeconds; var hasActiveTool = Interlocked.CompareExchange(ref state.ActiveToolCallCount, 0, 0) > 0; - var effectiveTimeout = hasActiveTool ? WatchdogToolExecutionTimeoutSeconds : WatchdogInactivityTimeoutSeconds; + // Use the longer tool-execution timeout if: + // 1. A tool call is actively running (hasActiveTool), OR + // 2. This is a resumed session that was mid-turn (agent sessions routinely + // have 2-3 min gaps between events while the model reasons), OR + // 3. Tools have been executed this turn (HasUsedToolsThisTurn) β€” even between + // tool rounds when ActiveToolCallCount is 0, the model may spend minutes + // thinking about what tool to call next. + var useToolTimeout = hasActiveTool || state.Info.IsResumed || state.HasUsedToolsThisTurn; + var effectiveTimeout = useToolTimeout + ? WatchdogToolExecutionTimeoutSeconds + : WatchdogInactivityTimeoutSeconds; if (elapsed >= effectiveTimeout) { var timeoutMinutes = effectiveTimeout / 60; Debug($"Session '{sessionName}' watchdog: no events for {elapsed:F0}s " + - $"(timeout={effectiveTimeout}s, hasActiveTool={hasActiveTool}), clearing stuck processing state"); + $"(timeout={effectiveTimeout}s, hasActiveTool={hasActiveTool}, isResumed={state.Info.IsResumed}, hasUsedTools={state.HasUsedToolsThisTurn}), clearing stuck processing state"); // Capture generation before posting β€” same guard pattern as CompleteResponse. // Prevents a stale watchdog callback from killing a new turn if the user // aborts + resends between the Post() and the callback execution. @@ -1124,6 +1136,7 @@ private async Task RunProcessingWatchdogAsync(SessionState state, string session } CancelProcessingWatchdog(state); Interlocked.Exchange(ref state.ActiveToolCallCount, 0); + state.HasUsedToolsThisTurn = false; state.Info.IsProcessing = false; state.Info.History.Add(ChatMessage.SystemMessage( "⚠️ Session appears stuck β€” no response received. You can try sending your message again.")); diff --git a/PolyPilot/Services/CopilotService.cs b/PolyPilot/Services/CopilotService.cs index 57dee21aa9..c2da33a665 100644 --- a/PolyPilot/Services/CopilotService.cs +++ b/PolyPilot/Services/CopilotService.cs @@ -207,6 +207,11 @@ private class SessionState public CancellationTokenSource? ProcessingWatchdog { get; set; } /// Number of tool calls started but not yet completed this turn. public int ActiveToolCallCount; + /// True if any tool call has started during the current processing cycle. + /// Unlike ActiveToolCallCount which resets on AssistantTurnStartEvent, this stays + /// true until the response completes β€” so the watchdog uses the longer tool timeout + /// even between tool rounds when the model is thinking. + public bool HasUsedToolsThisTurn; /// /// Monotonically increasing counter incremented each time a new prompt is sent. /// Used by CompleteResponse to avoid completing a different turn than the one From b73631e97e825399667c5d7493981b9e203fce59 Mon Sep 17 00:00:00 2001 From: Shane Date: Thu, 19 Feb 2026 14:43:02 -0600 Subject: [PATCH 38/48] Add per-worker system prompts (agent personas) Workers can now have a SystemPrompt on SessionMeta that defines their specialization (e.g., 'security auditor', 'performance optimizer'). - SessionMeta.SystemPrompt: nullable, serializable to org.json - BuildOrchestratorPlanningPrompt: includes worker descriptions so the orchestrator routes tasks based on expertise - ExecuteWorkerAsync: prepends worker's system prompt instead of generic - GroupPreset.WorkerSystemPrompts: per-worker prompts indexed to models - CreateGroupFromPresetAsync: applies preset system prompts to workers - SetSessionSystemPrompt: public API for setting/clearing prompts - Built-in presets updated with meaningful personas: Code Review Team (correctness + security reviewers) Quick Reflection Cycle (implementation + testing + docs specialists) Deep Research (analyst + creative problem solver) - 7 new regression tests (JSON round-trip, null safety, prompt content) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/MultiAgentRegressionTests.cs | 156 ++++++++++++++++++ PolyPilot/Models/ModelCapabilities.cs | 36 +++- PolyPilot/Models/SessionOrganization.cs | 7 + .../Services/CopilotService.Organization.cs | 38 ++++- docs/multi-agent-orchestration.md | 12 +- 5 files changed, 237 insertions(+), 12 deletions(-) diff --git a/PolyPilot.Tests/MultiAgentRegressionTests.cs b/PolyPilot.Tests/MultiAgentRegressionTests.cs index 07ab12e3f8..4f23317ff2 100644 --- a/PolyPilot.Tests/MultiAgentRegressionTests.cs +++ b/PolyPilot.Tests/MultiAgentRegressionTests.cs @@ -919,4 +919,160 @@ public void StallDetection_ResetOnDifferentContent() } #endregion + + #region Feature: Per-Worker System Prompts (Agent Personas) + + /// + /// SystemPrompt on SessionMeta must survive JSON round-trip (serialization to org.json). + /// + [Fact] + public void SystemPrompt_SurvivesJsonRoundTrip() + { + var org = new OrganizationState(); + var groupId = Guid.NewGuid().ToString(); + org.Groups.Add(new SessionGroup { Id = groupId, Name = "Persona Team", IsMultiAgent = true }); + org.Sessions.Add(new SessionMeta + { + SessionName = "worker-security", + GroupId = groupId, + Role = MultiAgentRole.Worker, + PreferredModel = "gpt-5.1-codex", + SystemPrompt = "You are a security auditor. Focus on vulnerabilities." + }); + org.Sessions.Add(new SessionMeta + { + SessionName = "worker-perf", + GroupId = groupId, + Role = MultiAgentRole.Worker, + PreferredModel = "claude-sonnet-4.5", + SystemPrompt = "You are a performance optimizer. Focus on latency and memory." + }); + org.Sessions.Add(new SessionMeta + { + SessionName = "worker-plain", + GroupId = groupId, + Role = MultiAgentRole.Worker, + PreferredModel = "gpt-4.1" + // No SystemPrompt β€” should remain null + }); + + var json = JsonSerializer.Serialize(org); + var restored = JsonSerializer.Deserialize(json)!; + + var security = restored.Sessions.First(s => s.SessionName == "worker-security"); + var perf = restored.Sessions.First(s => s.SessionName == "worker-perf"); + var plain = restored.Sessions.First(s => s.SessionName == "worker-plain"); + + Assert.Equal("You are a security auditor. Focus on vulnerabilities.", security.SystemPrompt); + Assert.Equal("You are a performance optimizer. Focus on latency and memory.", perf.SystemPrompt); + Assert.Null(plain.SystemPrompt); + } + + /// + /// Null SystemPrompt in old org.json files must not cause deserialization failure. + /// + [Fact] + public void SystemPrompt_NullInOldJson_DeserializesCleanly() + { + // Simulate an org.json from before SystemPrompt was added + var json = """{"Groups":[],"Sessions":[{"SessionName":"old-session","GroupId":"_default","Role":0,"PreferredModel":null}]}"""; + var org = JsonSerializer.Deserialize(json)!; + + Assert.Single(org.Sessions); + Assert.Null(org.Sessions[0].SystemPrompt); + } + + /// + /// SetSessionSystemPrompt persists through Organization model. + /// + [Fact] + public void SetSessionSystemPrompt_PersistsOnMeta() + { + var svc = CreateService(); + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "w1" }); + + svc.SetSessionSystemPrompt("w1", "You are a code reviewer."); + + var meta = svc.Organization.Sessions.First(s => s.SessionName == "w1"); + Assert.Equal("You are a code reviewer.", meta.SystemPrompt); + } + + /// + /// SetSessionSystemPrompt with whitespace/null clears the prompt. + /// + [Fact] + public void SetSessionSystemPrompt_WhitespaceClears() + { + var svc = CreateService(); + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "w1", SystemPrompt = "old" }); + + svc.SetSessionSystemPrompt("w1", " "); + Assert.Null(svc.Organization.Sessions.First(s => s.SessionName == "w1").SystemPrompt); + + svc.Organization.Sessions.First(s => s.SessionName == "w1").SystemPrompt = "restored"; + svc.SetSessionSystemPrompt("w1", null); + Assert.Null(svc.Organization.Sessions.First(s => s.SessionName == "w1").SystemPrompt); + } + + /// + /// BuildOrchestratorPlanningPrompt includes worker system prompts when present. + /// + [Fact] + public void OrchestratorPlanningPrompt_IncludesWorkerPersonas() + { + var svc = CreateService(); + // Pre-create session metadata entries + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "orch" }); + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "sec-worker" }); + svc.Organization.Sessions.Add(new SessionMeta { SessionName = "perf-worker" }); + + var group = svc.CreateMultiAgentGroup("Persona", + sessionNames: new List { "orch", "sec-worker", "perf-worker" }); + + svc.SetSessionRole("orch", MultiAgentRole.Orchestrator); + svc.SetSessionSystemPrompt("sec-worker", "You are a security auditor."); + svc.SetSessionSystemPrompt("perf-worker", "You are a performance optimizer."); + + // Use reflection to call private BuildOrchestratorPlanningPrompt + var method = typeof(CopilotService).GetMethod("BuildOrchestratorPlanningPrompt", + System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance); + Assert.NotNull(method); + + var workers = new List { "sec-worker", "perf-worker" }; + var result = (string)method!.Invoke(svc, new object?[] { "Review this code", workers, null })!; + + Assert.Contains("security auditor", result); + Assert.Contains("performance optimizer", result); + Assert.Contains("specialization", result); + } + + /// + /// Built-in presets with WorkerSystemPrompts have the right number of prompts. + /// + [Fact] + public void BuiltInPresets_WorkerSystemPrompts_MatchWorkerCount() + { + foreach (var preset in GroupPreset.BuiltIn) + { + if (preset.WorkerSystemPrompts == null) continue; + Assert.True(preset.WorkerSystemPrompts.Length <= preset.WorkerModels.Length, + $"Preset '{preset.Name}' has {preset.WorkerSystemPrompts.Length} system prompts but only {preset.WorkerModels.Length} workers"); + } + } + + /// + /// Code Review Team preset has distinct personas for each worker. + /// + [Fact] + public void CodeReviewTeam_Preset_HasDistinctPersonas() + { + var preset = GroupPreset.BuiltIn.First(p => p.Name == "Code Review Team"); + Assert.NotNull(preset.WorkerSystemPrompts); + Assert.Equal(2, preset.WorkerSystemPrompts!.Length); + Assert.All(preset.WorkerSystemPrompts, p => Assert.False(string.IsNullOrWhiteSpace(p))); + // Each persona should be unique + Assert.NotEqual(preset.WorkerSystemPrompts[0], preset.WorkerSystemPrompts[1]); + } + + #endregion } diff --git a/PolyPilot/Models/ModelCapabilities.cs b/PolyPilot/Models/ModelCapabilities.cs index 732b94baec..4090bef6e2 100644 --- a/PolyPilot/Models/ModelCapabilities.cs +++ b/PolyPilot/Models/ModelCapabilities.cs @@ -152,12 +152,25 @@ public record GroupPreset(string Name, string Description, string Emoji, MultiAg /// Whether this is a user-created preset (vs built-in). public bool IsUserDefined { get; init; } + /// + /// Per-worker system prompts, indexed to match WorkerModels. + /// Null or shorter array = remaining workers get generic prompt. + /// + public string?[]? WorkerSystemPrompts { get; init; } + public static readonly GroupPreset[] BuiltIn = new[] { new GroupPreset( - "Code Review Team", "Opus orchestrates, fast workers execute", + "Code Review Team", "Opus orchestrates, specialized reviewers execute", "πŸ”", MultiAgentMode.Orchestrator, - "claude-opus-4.6", new[] { "gpt-5.1-codex", "claude-sonnet-4.5" }), + "claude-opus-4.6", new[] { "gpt-5.1-codex", "claude-sonnet-4.5" }) + { + WorkerSystemPrompts = new[] + { + "You are a code correctness reviewer. Focus on logic errors, edge cases, off-by-one bugs, null safety, and incorrect assumptions. Flag anything that could cause runtime failures or data corruption.", + "You are a security and architecture reviewer. Focus on vulnerabilities (injection, auth flaws, data exposure), architectural anti-patterns, and maintainability issues. Suggest concrete fixes." + } + }, new GroupPreset( "Multi-Perspective Analysis", "Different models analyze the same problem", @@ -167,12 +180,27 @@ public record GroupPreset(string Name, string Description, string Emoji, MultiAg new GroupPreset( "Quick Reflection Cycle", "Fast workers + smart evaluator for iterative refinement", "πŸ”„", MultiAgentMode.OrchestratorReflect, - "claude-opus-4.6", new[] { "gpt-4.1", "gpt-4.1", "gpt-5.1-codex-mini" }), + "claude-opus-4.6", new[] { "gpt-4.1", "gpt-4.1", "gpt-5.1-codex-mini" }) + { + WorkerSystemPrompts = new[] + { + "You are an implementation specialist. Write clean, correct code. Focus on getting the logic right and handling edge cases.", + "You are a testing and validation specialist. Review solutions for correctness, write test cases, and identify gaps in coverage.", + "You are a documentation and UX specialist. Ensure code is well-documented, APIs are intuitive, and error messages are helpful." + } + }, new GroupPreset( "Deep Research", "Strong reasoning models collaborate on complex problems", "🧠", MultiAgentMode.Orchestrator, - "claude-opus-4.6", new[] { "gpt-5.1", "gemini-3-pro" }), + "claude-opus-4.6", new[] { "gpt-5.1", "gemini-3-pro" }) + { + WorkerSystemPrompts = new[] + { + "You are a deep reasoning analyst. Break down complex problems methodically. Provide thorough analysis with evidence and citations where possible.", + "You are a creative problem solver. Explore unconventional approaches, challenge assumptions, and propose alternative solutions that others might miss." + } + }, }; } diff --git a/PolyPilot/Models/SessionOrganization.cs b/PolyPilot/Models/SessionOrganization.cs index cbdabb8814..203669f163 100644 --- a/PolyPilot/Models/SessionOrganization.cs +++ b/PolyPilot/Models/SessionOrganization.cs @@ -57,6 +57,13 @@ public class SessionMeta /// When set, the model is switched before dispatch via EnsureSessionModelAsync. /// public string? PreferredModel { get; set; } + + /// + /// System prompt / charter that defines this worker's specialization. + /// Prepended to every task dispatched to this worker. Null = generic worker prompt. + /// Example: "You are a security auditor. Focus on vulnerabilities, input validation, and auth flaws." + /// + public string? SystemPrompt { get; set; } } [JsonConverter(typeof(JsonStringEnumConverter))] diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index 84f780645b..b1d26dd417 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -712,7 +712,16 @@ private string BuildOrchestratorPlanningPrompt(string userPrompt, List w var sb = new System.Text.StringBuilder(); sb.AppendLine($"You are the orchestrator of a multi-agent group. You have {workerNames.Count} worker agent(s) available:"); foreach (var w in workerNames) - sb.AppendLine($" - '{w}' (model: {GetEffectiveModel(w)})"); + { + var meta = GetSessionMeta(w); + var model = GetEffectiveModel(w); + if (!string.IsNullOrEmpty(meta?.SystemPrompt)) + sb.AppendLine($" - '{w}' (model: {model}) β€” {meta.SystemPrompt}"); + else + sb.AppendLine($" - '{w}' (model: {model})"); + } + sb.AppendLine(); + sb.AppendLine("Route tasks to workers based on their specialization. If a worker has a described role, assign tasks that match their expertise."); sb.AppendLine(); sb.AppendLine("## User Request"); sb.AppendLine(userPrompt); @@ -769,7 +778,13 @@ private async Task ExecuteWorkerAsync(string workerName, string ta { var sw = System.Diagnostics.Stopwatch.StartNew(); await EnsureSessionModelAsync(workerName, cancellationToken); - var workerPrompt = $"You are a worker agent. Complete the following task thoroughly. Your response will be collected and synthesized with other workers' responses.\n\n## Original User Request (context)\n{originalPrompt}\n\n## Your Assigned Task\n{task}"; + + // Use per-worker system prompt if set, otherwise generic + var meta = GetSessionMeta(workerName); + var identity = !string.IsNullOrEmpty(meta?.SystemPrompt) + ? meta.SystemPrompt + : "You are a worker agent. Complete the following task thoroughly."; + var workerPrompt = $"{identity}\n\nYour response will be collected and synthesized with other workers' responses.\n\n## Original User Request (context)\n{originalPrompt}\n\n## Your Assigned Task\n{task}"; try { @@ -870,6 +885,15 @@ public void SetSessionPreferredModel(string sessionName, string? modelSlug) OnStateChanged?.Invoke(); } + public void SetSessionSystemPrompt(string sessionName, string? systemPrompt) + { + var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == sessionName); + if (meta == null) return; + meta.SystemPrompt = string.IsNullOrWhiteSpace(systemPrompt) ? null : systemPrompt.Trim(); + SaveOrganization(); + OnStateChanged?.Invoke(); + } + /// /// Returns the model a session will use: PreferredModel if set, else live AgentSessionInfo.Model. /// @@ -918,10 +942,14 @@ public string GetEffectiveModel(string sessionName) await CreateSessionAsync(workerName, workerModel, workingDirectory, ct); MoveSession(workerName, group.Id); SetSessionPreferredModel(workerName, workerModel); - if (worktreeId != null) + // Apply per-worker system prompt from preset if available + var systemPrompt = preset.WorkerSystemPrompts != null && i < preset.WorkerSystemPrompts.Length + ? preset.WorkerSystemPrompts[i] : null; + var meta = GetSessionMeta(workerName); + if (meta != null) { - var meta = GetSessionMeta(workerName); - if (meta != null) meta.WorktreeId = worktreeId; + if (worktreeId != null) meta.WorktreeId = worktreeId; + if (systemPrompt != null) meta.SystemPrompt = systemPrompt; } } catch (Exception ex) diff --git a/docs/multi-agent-orchestration.md b/docs/multi-agent-orchestration.md index 62a1a21a4f..3f17d1c291 100644 --- a/docs/multi-agent-orchestration.md +++ b/docs/multi-agent-orchestration.md @@ -14,7 +14,7 @@ PolyPilot's multi-agent system lets you create a **team of AI sessions** that wo | `PolyPilot/Models/SessionOrganization.cs` | `SessionGroup`, `SessionMeta`, `MultiAgentMode`, `MultiAgentRole` | | `PolyPilot/Models/ReflectionCycle.cs` | Reflection state, stall detection, sentinel parsing, evaluator prompts | | `PolyPilot/Services/CopilotService.Events.cs` | TCS completion (IsProcessing β†’ TrySetResult ordering) | -| `PolyPilot.Tests/MultiAgentRegressionTests.cs` | 30 regression tests covering all known bugs | +| `PolyPilot.Tests/MultiAgentRegressionTests.cs` | 37 regression tests covering all known bugs | | `PolyPilot.Tests/SessionOrganizationTests.cs` | 14 grouping stability tests | | `PolyPilot.Tests/Scenarios/multi-agent-scenarios.json` | Executable CDP test scenarios | @@ -46,7 +46,7 @@ Same as Orchestrator but **loops** until the goal is met, quality stalls, or max ### Participants - **1 Orchestrator** β€” Plans, delegates, synthesizes. Set via `SessionMeta.Role = Orchestrator` -- **N Workers** β€” Execute assigned tasks in parallel. Each can use a different model (`SessionMeta.PreferredModel`) +- **N Workers** β€” Execute assigned tasks in parallel. Each can use a different model (`SessionMeta.PreferredModel`) and have a **system prompt** (`SessionMeta.SystemPrompt`) that defines their specialization - **1 Evaluator** (optional) β€” Independent quality judge on a separate model (`ReflectionCycle.EvaluatorSessionName`) ### The Loop (runs in `SendViaOrchestratorReflectAsync`) @@ -226,6 +226,7 @@ OrganizationState β”œβ”€β”€ GroupId (β†’ SessionGroup.Id) β”œβ”€β”€ Role (Worker/Orchestrator) β”œβ”€β”€ PreferredModel (e.g., "claude-opus-4.6") + β”œβ”€β”€ SystemPrompt (worker specialization, e.g., "You are a security auditor...") β”œβ”€β”€ WorktreeId └── IsPinned, ManualOrder ``` @@ -240,9 +241,14 @@ OrganizationState `CreateGroupFromPresetAsync(GroupPreset)` creates a full team: 1. Creates `SessionGroup` with mode and metadata 2. Creates orchestrator session with `Role = Orchestrator`, `PreferredModel` set -3. Creates N worker sessions with `PreferredModel` set per worker +3. Creates N worker sessions with `PreferredModel` and `SystemPrompt` set per worker 4. All sessions get `WorktreeId` if provided +**Worker System Prompts:** Each worker can have a `SystemPrompt` defining its specialization. This prompt is: +- Included in `BuildOrchestratorPlanningPrompt` so the orchestrator knows each worker's expertise and routes tasks accordingly +- Prepended to the worker's task in `ExecuteWorkerAsync` (replaces the generic "You are a worker agent" prompt) +- Set via `SetSessionSystemPrompt(sessionName, prompt)` or via `GroupPreset.WorkerSystemPrompts` + **Critical:** Both `Role` and `PreferredModel` must be set on all sessions. These are the markers that `ReconcileOrganization` uses to identify multi-agent sessions. Without them, sessions get scattered on restart. --- From 18b3df3daa29b6713b1e5569af7f8803d5065c80 Mon Sep 17 00:00:00 2001 From: Shane Date: Thu, 19 Feb 2026 15:56:19 -0600 Subject: [PATCH 39/48] Fix 12 issues from multi-agent code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Findings from OrchestratorReflect review with Sonnet 4.6 + GPT-5.3-Codex: Critical: - Carry ProcessingGeneration across SessionState replacement on reconnect to prevent stale callbacks from passing generation checks High: - Add atomic SendingFlag to prevent TOCTOU race in SendPromptAsync - Gate orphaned event handlers: skip SessionIdleEvent/SessionErrorEvent on !isCurrentState to prevent stale handlers clearing IsProcessing - Add lock around _queuedImagePaths inner List mutations - JsonIgnore ConsecutiveStalls β€” private stall state not recoverable from JSON, persisting counter creates inconsistent state on restart - Split ConsecutiveErrors from ConsecutiveStalls β€” different thresholds and recovery strategies for errors vs stalls Medium: - Reset IsResumed after first CompleteResponse so subsequent turns use normal 120s watchdog timeout instead of permanent 600s - Add RunContinuationsAsynchronously to all TaskCompletionSource to prevent inline continuation reentrancy - Empty worker assignment on first iteration treated as error not goal met Low: - Add IsCancelled flag to ReflectionCycle for StopGroupReflection - Replace GetHashCode() with full string equality in stall detection - Prune ghost __evaluator_* sessions on startup if not referenced by active reflection cycle Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot/Models/AgentSessionInfo.cs | 2 +- PolyPilot/Models/ReflectionCycle.cs | 33 ++++++++---- PolyPilot/Services/CopilotService.Events.cs | 16 ++++-- .../Services/CopilotService.Organization.cs | 23 ++++++-- .../Services/CopilotService.Persistence.cs | 14 +++++ PolyPilot/Services/CopilotService.cs | 53 +++++++++++++++---- 6 files changed, 112 insertions(+), 29 deletions(-) diff --git a/PolyPilot/Models/AgentSessionInfo.cs b/PolyPilot/Models/AgentSessionInfo.cs index cfc523e20b..66d98d4bb0 100644 --- a/PolyPilot/Models/AgentSessionInfo.cs +++ b/PolyPilot/Models/AgentSessionInfo.cs @@ -15,7 +15,7 @@ public class AgentSessionInfo // For resumed sessions public string? SessionId { get; set; } - public bool IsResumed { get; init; } + public bool IsResumed { get; set; } // Timestamp of last state change (message received, turn end, etc.) public DateTime LastUpdatedAt { get; set; } = DateTime.Now; diff --git a/PolyPilot/Models/ReflectionCycle.cs b/PolyPilot/Models/ReflectionCycle.cs index 34a45ed566..0d0485183c 100644 --- a/PolyPilot/Models/ReflectionCycle.cs +++ b/PolyPilot/Models/ReflectionCycle.cs @@ -54,11 +54,26 @@ public partial class ReflectionCycle /// public bool IsStalled { get; set; } + /// + /// Whether the cycle was manually cancelled by the user via StopGroupReflection. + /// + public bool IsCancelled { get; set; } + /// /// Number of consecutive stalls detected. Exposed for diagnostics and warning UI. + /// Not serialized β€” private stall state (_recentHashes, _lastResponse) is not recoverable + /// from JSON, so persisting this counter would create inconsistent state after restart. /// + [System.Text.Json.Serialization.JsonIgnore] public int ConsecutiveStalls { get; internal set; } + /// + /// Number of consecutive errors in the reflection loop. Separate from ConsecutiveStalls + /// because stalls and errors have different thresholds and recovery strategies. + /// + [System.Text.Json.Serialization.JsonIgnore] + public int ConsecutiveErrors { get; internal set; } + /// /// Optional instructions on how to evaluate whether the goal has been met. /// If empty, a default evaluation prompt is constructed from the Goal. @@ -118,7 +133,7 @@ public partial class ReflectionCycle public List PendingAdjustments { get; } = new(); // Stall detection state (not serialized) - private readonly List _recentHashes = new(); + private readonly List _recentResponses = new(); private string _lastResponse = ""; /// @@ -127,9 +142,10 @@ public partial class ReflectionCycle /// public void ResetStallDetection() { - _recentHashes.Clear(); + _recentResponses.Clear(); _lastResponse = ""; ConsecutiveStalls = 0; + ConsecutiveErrors = 0; ShouldWarnOnStall = false; } @@ -262,16 +278,15 @@ public bool CheckStall(string response) bool isStall = false; LastSimilarity = 0.0; - // Exact repetition check over last 5 responses - int currentHash = response.GetHashCode(); - if (_recentHashes.Contains(currentHash)) + // Exact repetition check over last 5 responses (full string equality, no hash collisions) + if (_recentResponses.Contains(response)) { isStall = true; LastSimilarity = 1.0; } - _recentHashes.Add(currentHash); - if (_recentHashes.Count > 5) _recentHashes.RemoveAt(0); + _recentResponses.Add(response); + if (_recentResponses.Count > 5) _recentResponses.RemoveAt(0); // Jaccard similarity with immediate predecessor if (!isStall && !string.IsNullOrEmpty(_lastResponse)) @@ -402,8 +417,8 @@ public bool AdvanceWithEvaluation(string response, bool evaluatorPassed, string? /// public string BuildCompletionSummary() { - var emoji = GoalMet ? "βœ…" : IsStalled ? "⚠️" : "⏱️"; - var reasonText = GoalMet ? "Goal met" : IsStalled ? $"Stalled ({LastSimilarity:P0} similarity)" : $"Max iterations reached ({MaxIterations})"; + var emoji = GoalMet ? "βœ…" : IsCancelled ? "⏹️" : IsStalled ? "⚠️" : "⏱️"; + var reasonText = GoalMet ? "Goal met" : IsCancelled ? "Cancelled by user" : IsStalled ? $"Stalled ({LastSimilarity:P0} similarity)" : $"Max iterations reached ({MaxIterations})"; var durationText = ""; if (StartedAt.HasValue && CompletedAt.HasValue) { diff --git a/PolyPilot/Services/CopilotService.Events.cs b/PolyPilot/Services/CopilotService.Events.cs index 45f9efeab0..eb5f3ef4e7 100644 --- a/PolyPilot/Services/CopilotService.Events.cs +++ b/PolyPilot/Services/CopilotService.Events.cs @@ -210,13 +210,18 @@ private void HandleSessionEvent(SessionState state, SessionEvent evt) } // Warn if receiving events on an orphaned (replaced) state object. - // We don't early-return here: both old and new SessionState share the same Info object - // (reconnect copies Info to newState), so CompleteResponse on the orphaned state still - // correctly clears IsProcessing on the live session's shared Info. + // After the generation-carry fix, stale callbacks on orphaned state would have + // matching generations and could incorrectly complete the new turn. Gate all + // terminal/mutating events to only fire on the current (live) state. if (!isCurrentState) { Debug($"[EVT-WARN] '{sessionName}' event {evt.GetType().Name} delivered to ORPHANED state " + $"(not in _sessions). This handler should have been detached."); + // Allow non-mutating events (text deltas, tool output) to flow through + // since they only append to shared Info.History. But block terminal events + // that would clear IsProcessing or complete the TCS. + if (evt is SessionIdleEvent or SessionErrorEvent) + return; } void Invoke(Action action) @@ -664,6 +669,8 @@ private void CompleteResponse(SessionState state, long? expectedGeneration = nul // call must see IsProcessing=false or it throws "already processing". state.CurrentResponse.Clear(); state.Info.IsProcessing = false; + state.Info.IsResumed = false; // After first successful completion, use normal watchdog timeouts + Interlocked.Exchange(ref state.SendingFlag, 0); // Release atomic send lock state.Info.LastUpdatedAt = DateTime.Now; state.ResponseCompletion?.TrySetResult(response); OnStateChanged?.Invoke(); @@ -841,7 +848,7 @@ private async Task EvaluateAndAdvanceAsync(string workerSessionName, string work while (evalState.Info.IsProcessing && !cts.Token.IsCancellationRequested) await Task.Delay(200, cts.Token); - evalState.ResponseCompletion = new TaskCompletionSource(); + evalState.ResponseCompletion = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); await SendPromptAsync(evaluatorName, evalPrompt, cancellationToken: cts.Token, skipHistoryMessage: true); // Wait for the evaluator response @@ -1138,6 +1145,7 @@ private async Task RunProcessingWatchdogAsync(SessionState state, string session Interlocked.Exchange(ref state.ActiveToolCallCount, 0); state.HasUsedToolsThisTurn = false; state.Info.IsProcessing = false; + Interlocked.Exchange(ref state.SendingFlag, 0); state.Info.History.Add(ChatMessage.SystemMessage( "⚠️ Session appears stuck β€” no response received. You can try sending your message again.")); state.ResponseCompletion?.TrySetResult(""); diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index b1d26dd417..43ef137d29 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -1027,6 +1027,7 @@ public void StopGroupReflection(string groupId) if (group?.ReflectionState == null) return; group.ReflectionState.IsActive = false; + group.ReflectionState.IsCancelled = true; group.ReflectionState.CompletedAt = DateTime.Now; SaveOrganization(); OnStateChanged?.Invoke(); @@ -1095,7 +1096,21 @@ private async Task SendViaOrchestratorReflectAsync(string groupId, List if (assignments.Count == 0) { - // Orchestrator decided no more work needed + if (reflectState.CurrentIteration == 0) + { + // First iteration with no assignments = orchestrator failed to delegate. + // Treat as error, not goal met, so we can retry. + AddOrchestratorSystemMessage(orchestratorName, + "⚠️ No @worker assignments parsed from orchestrator response. Retrying..."); + reflectState.ConsecutiveErrors++; + if (reflectState.ConsecutiveErrors >= 3) + { + reflectState.IsStalled = true; + break; + } + continue; + } + // Later iterations: orchestrator decided no more work needed reflectState.GoalMet = true; AddOrchestratorSystemMessage(orchestratorName, $"βœ… Orchestrator completed without delegation (iteration {reflectState.CurrentIteration})."); break; @@ -1199,15 +1214,15 @@ private async Task SendViaOrchestratorReflectAsync(string groupId, List Debug($"Reflection iteration {reflectState.CurrentIteration} error: {ex.GetType().Name}: {ex.Message}"); // Decrement so we retry the same iteration, not skip ahead reflectState.CurrentIteration--; - // But limit retries per iteration to 3 - if (reflectState.ConsecutiveStalls >= 3) + // But limit retries per iteration to 3 (uses separate error counter) + if (reflectState.ConsecutiveErrors >= 3) { reflectState.IsStalled = true; AddOrchestratorSystemMessage(orchestratorName, $"⚠️ Iteration failed after retries: {ex.Message}"); break; } - reflectState.ConsecutiveStalls++; + reflectState.ConsecutiveErrors++; AddOrchestratorSystemMessage(orchestratorName, $"⚠️ Iteration {reflectState.CurrentIteration + 1} error: {ex.Message}. Retrying..."); InvokeOnUI(() => OnStateChanged?.Invoke()); diff --git a/PolyPilot/Services/CopilotService.Persistence.cs b/PolyPilot/Services/CopilotService.Persistence.cs index 41e891a77b..8995969128 100644 --- a/PolyPilot/Services/CopilotService.Persistence.cs +++ b/PolyPilot/Services/CopilotService.Persistence.cs @@ -101,10 +101,24 @@ public async Task RestorePreviousSessionsAsync(CancellationToken cancellationTok Debug($"Restoring {entries.Count} previous sessions..."); IsRestoring = true; + // Collect evaluator session names referenced by active reflection cycles + var activeEvaluators = new HashSet(StringComparer.OrdinalIgnoreCase); + foreach (var g in Organization.Groups) + { + if (g.ReflectionState?.IsActive == true && !string.IsNullOrEmpty(g.ReflectionState.EvaluatorSessionName)) + activeEvaluators.Add(g.ReflectionState.EvaluatorSessionName); + } + foreach (var entry in entries) { try { + // Prune ghost evaluator sessions from crashed cycles + if (entry.DisplayName.StartsWith("__evaluator_") && !activeEvaluators.Contains(entry.DisplayName)) + { + Debug($"Pruning ghost evaluator session '{entry.DisplayName}' β€” not referenced by active cycle"); + continue; + } // Skip if already active if (_sessions.ContainsKey(entry.DisplayName)) { diff --git a/PolyPilot/Services/CopilotService.cs b/PolyPilot/Services/CopilotService.cs index c2da33a665..bf623de4da 100644 --- a/PolyPilot/Services/CopilotService.cs +++ b/PolyPilot/Services/CopilotService.cs @@ -19,6 +19,7 @@ public partial class CopilotService : IAsyncDisposable private readonly ConcurrentDictionary _closedSessionIds = new(); // Image paths queued alongside messages when session is busy (keyed by session name, list per queued message) private readonly ConcurrentDictionary>> _queuedImagePaths = new(); + private readonly object _imageQueueLock = new(); private static readonly object _diagnosticLogLock = new(); private readonly IChatDatabase _chatDb; private readonly IServerManager _serverManager; @@ -218,6 +219,12 @@ private class SessionState /// that produced the SessionIdleEvent (race between SEND and queued COMPLETE). /// public long ProcessingGeneration; + /// + /// Atomic flag for SendPromptAsync entry. Prevents TOCTOU race where two + /// concurrent callers both see IsProcessing=false and both enter. + /// 0 = idle, 1 = sending. Set via Interlocked.CompareExchange. + /// + public int SendingFlag; } private void Debug(string message) @@ -1175,7 +1182,7 @@ public async Task ResumeSessionAsync(string sessionId, string // stuck sessions β€” no separate short timeout needed. if (isStillProcessing) { - state.ResponseCompletion = new TaskCompletionSource(); + state.ResponseCompletion = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); Debug($"Session '{displayName}' is still processing (was mid-turn when app restarted)"); // Start the processing watchdog so the session doesn't get stuck @@ -1443,11 +1450,18 @@ public async Task SendPromptAsync(string sessionName, string prompt, Lis if (state.Info.IsProcessing) throw new InvalidOperationException("Session is already processing a request."); + // Atomic check-and-set to prevent TOCTOU race: two callers could both see + // IsProcessing=false and both enter without this guard. + if (Interlocked.CompareExchange(ref state.SendingFlag, 1, 0) != 0) + throw new InvalidOperationException("Session is already processing a request."); + + try + { state.Info.IsProcessing = true; Interlocked.Increment(ref state.ProcessingGeneration); Interlocked.Exchange(ref state.ActiveToolCallCount, 0); // Reset stale tool count from previous turn Debug($"[SEND] '{sessionName}' IsProcessing=true gen={Interlocked.Read(ref state.ProcessingGeneration)} (thread={Environment.CurrentManagedThreadId})"); - state.ResponseCompletion = new TaskCompletionSource(); + state.ResponseCompletion = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); state.CurrentResponse.Clear(); StartProcessingWatchdog(state, sessionName); @@ -1521,6 +1535,11 @@ public async Task SendPromptAsync(string sessionName, string prompt, Lis Info = state.Info }; newState.ResponseCompletion = state.ResponseCompletion; + // Carry forward ProcessingGeneration so stale callbacks on the + // orphaned old state can't pass generation checks on the new state. + Interlocked.Exchange(ref newState.ProcessingGeneration, + Interlocked.Read(ref state.ProcessingGeneration)); + newState.HasUsedToolsThisTurn = state.HasUsedToolsThisTurn; newSession.On(evt => HandleSessionEvent(newState, evt)); _sessions[sessionName] = newState; state = newState; @@ -1559,6 +1578,13 @@ await state.Session.SendAsync(new MessageOptions if (state.ResponseCompletion == null) return ""; // Response already completed via events return await state.ResponseCompletion.Task; + } + catch + { + // Reset atomic send flag on any exception so the session isn't permanently locked + Interlocked.Exchange(ref state.SendingFlag, 0); + throw; + } } public async Task AbortSessionAsync(string sessionName) @@ -1621,11 +1647,13 @@ public void EnqueueMessage(string sessionName, string prompt, List? imag // Track image paths alongside the queued message if (imagePaths != null && imagePaths.Count > 0) { - var queue = _queuedImagePaths.GetOrAdd(sessionName, _ => new List>()); - // Pad with empty lists for any prior messages without images - while (queue.Count < state.Info.MessageQueue.Count - 1) - queue.Add(new List()); - queue.Add(imagePaths); + lock (_imageQueueLock) + { + var queue = _queuedImagePaths.GetOrAdd(sessionName, _ => new List>()); + while (queue.Count < state.Info.MessageQueue.Count - 1) + queue.Add(new List()); + queue.Add(imagePaths); + } } OnStateChanged?.Invoke(); @@ -1647,11 +1675,14 @@ public void RemoveQueuedMessage(string sessionName, int index) { state.Info.MessageQueue.RemoveAt(index); // Keep queued image paths in sync - if (_queuedImagePaths.TryGetValue(sessionName, out var imageQueue) && index < imageQueue.Count) + lock (_imageQueueLock) { - imageQueue.RemoveAt(index); - if (imageQueue.Count == 0) - _queuedImagePaths.TryRemove(sessionName, out _); + if (_queuedImagePaths.TryGetValue(sessionName, out var imageQueue) && index < imageQueue.Count) + { + imageQueue.RemoveAt(index); + if (imageQueue.Count == 0) + _queuedImagePaths.TryRemove(sessionName, out _); + } } OnStateChanged?.Invoke(); } From 81e45a049f85ab9cad682fd335c88016e3023362 Mon Sep 17 00:00:00 2001 From: Shane Date: Thu, 19 Feb 2026 16:39:42 -0600 Subject: [PATCH 40/48] Fix 5 issues from multi-agent re-review council - Fix #9 off-by-one: CurrentIteration==0 check unreachable (now ==1) - Fix #10 incomplete: set IsCancelled on OperationCanceledException - Fix ConsecutiveErrors: reset to 0 after successful iteration - Fix #11 stale comments: update hash references to string equality - Fix #12 incomplete: mark pruned ghost evaluators in _closedSessionIds Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/SessionOrganizationTests.cs | 4 ++-- PolyPilot/Models/ReflectionCycle.cs | 4 ++-- PolyPilot/Services/CopilotService.Organization.cs | 9 +++++++-- PolyPilot/Services/CopilotService.Persistence.cs | 1 + 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/PolyPilot.Tests/SessionOrganizationTests.cs b/PolyPilot.Tests/SessionOrganizationTests.cs index 7d939f9f45..e46125d020 100644 --- a/PolyPilot.Tests/SessionOrganizationTests.cs +++ b/PolyPilot.Tests/SessionOrganizationTests.cs @@ -1783,7 +1783,7 @@ public void Scenario_DedicatedEvaluatorScoring() /// User flow: /// 1. Reflect loop is running, iteration 3 /// 2. Workers keep producing similar output to iterations 1-2 - /// 3. Hash-based stall detector triggers after 2 consecutive matches + /// 3. String-based stall detector triggers after 2 consecutive matches /// 4. Sidebar shows: "⚠️ Stalled after 3 iteration(s)" /// 5. AutoAdjust banner: "⚠️ Output repetition detected..." /// @@ -1799,7 +1799,7 @@ public void Scenario_StallDetectionStopsLoop() state.CurrentIteration = 2; Assert.False(state.CheckStall("Second attempt: refactored joins to use CTEs")); - // Iteration 3: exact repeat of iteration 2 β€” CheckStall detects hash match immediately + // Iteration 3: exact repeat of iteration 2 β€” CheckStall detects string match immediately state.CurrentIteration = 3; Assert.True(state.CheckStall("Second attempt: refactored joins to use CTEs")); state.IsStalled = true; // In the real loop, Advance() sets this diff --git a/PolyPilot/Models/ReflectionCycle.cs b/PolyPilot/Models/ReflectionCycle.cs index 0d0485183c..276d40d976 100644 --- a/PolyPilot/Models/ReflectionCycle.cs +++ b/PolyPilot/Models/ReflectionCycle.cs @@ -61,7 +61,7 @@ public partial class ReflectionCycle /// /// Number of consecutive stalls detected. Exposed for diagnostics and warning UI. - /// Not serialized β€” private stall state (_recentHashes, _lastResponse) is not recoverable + /// Not serialized β€” private stall state (_recentResponses, _lastResponse) is not recoverable /// from JSON, so persisting this counter would create inconsistent state after restart. /// [System.Text.Json.Serialization.JsonIgnore] @@ -269,7 +269,7 @@ public bool IsGoalMet(string response) /// /// Checks if the response indicates a stall (repetitive or near-identical to previous). - /// Uses exact hash matching over a sliding window and Jaccard token similarity. + /// Uses exact string matching over a sliding window and Jaccard token similarity. /// public bool CheckStall(string response) { diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index 43ef137d29..f37c5246b2 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -1096,7 +1096,7 @@ private async Task SendViaOrchestratorReflectAsync(string groupId, List if (assignments.Count == 0) { - if (reflectState.CurrentIteration == 0) + if (reflectState.CurrentIteration == 1) { // First iteration with no assignments = orchestrator failed to delegate. // Treat as error, not goal met, so we can retry. @@ -1202,13 +1202,18 @@ private async Task SendViaOrchestratorReflectAsync(string groupId, List else { reflectState.ConsecutiveStalls = 0; + reflectState.ConsecutiveErrors = 0; } SaveOrganization(); InvokeOnUI(() => OnStateChanged?.Invoke()); } // end try - catch (OperationCanceledException) { throw; } + catch (OperationCanceledException) + { + reflectState.IsCancelled = true; + throw; + } catch (Exception ex) { Debug($"Reflection iteration {reflectState.CurrentIteration} error: {ex.GetType().Name}: {ex.Message}"); diff --git a/PolyPilot/Services/CopilotService.Persistence.cs b/PolyPilot/Services/CopilotService.Persistence.cs index 8995969128..309fd47def 100644 --- a/PolyPilot/Services/CopilotService.Persistence.cs +++ b/PolyPilot/Services/CopilotService.Persistence.cs @@ -117,6 +117,7 @@ public async Task RestorePreviousSessionsAsync(CancellationToken cancellationTok if (entry.DisplayName.StartsWith("__evaluator_") && !activeEvaluators.Contains(entry.DisplayName)) { Debug($"Pruning ghost evaluator session '{entry.DisplayName}' β€” not referenced by active cycle"); + _closedSessionIds[entry.SessionId] = 0; // prevent merge from re-adding continue; } // Skip if already active From ab5c334b7e67264977ba32643d7d633f37a9723c Mon Sep 17 00:00:00 2001 From: Shane Date: Fri, 20 Feb 2026 07:45:40 -0600 Subject: [PATCH 41/48] Fix 12 multi-agent orchestration issues + 3 found in verification Concurrency fixes: - Swap _sessions before wiring event handler on reconnect (#2) - Block ALL events from orphaned handlers, not just terminal (#3) - Add lock(_imageQueueLock) to all image queue mutations (#4) including dequeue, reinsert, ClearQueue, rename, close, dispose - Clear IsResumed on error and watchdog paths (#5) - Add RunContinuationsAsynchronously to remaining TCS (#6) Architecture/contract fixes: - Add [JsonIgnore] to ShouldWarnOnStall, LastSimilarity (#7) - Fix ConsecutiveErrors increment-before-check ordering (#8) - Set IsCancelled on all non-success termination paths (#10) including stall, error-stall, max-iteration, OperationCanceled, empty-assignment error stall, and single-agent StopReflectionCycle - Add session dir deletion for ghost evaluator pruning (#12) - Add CompletedAt to StopReflectionCycle (#12 related) Already correct (no changes needed): - #9: CurrentIteration == 1 check was already fixed - #11: Comments already reference string-based stall detection Documentation: - Update stall detection from 'hash match' to 'string equality' - Update error handling to show ConsecutiveErrors (not ConsecutiveStalls) - Add IsCancelled invariant to exit conditions table - Add 5 new invariants: orphan gate, reconnect ordering, image queue locking, IsResumed clearing, TCS creation - Document empty-assignment retry behavior 817/817 tests pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot/Models/ReflectionCycle.cs | 2 + PolyPilot/Services/CopilotService.Events.cs | 35 ++++++----- .../Services/CopilotService.Organization.cs | 8 ++- .../Services/CopilotService.Persistence.cs | 7 +++ PolyPilot/Services/CopilotService.cs | 26 ++++++-- docs/multi-agent-orchestration.md | 59 +++++++++++++++---- recommendation.md | 41 +++++++++++++ 7 files changed, 147 insertions(+), 31 deletions(-) create mode 100644 recommendation.md diff --git a/PolyPilot/Models/ReflectionCycle.cs b/PolyPilot/Models/ReflectionCycle.cs index 276d40d976..616f6f0cf3 100644 --- a/PolyPilot/Models/ReflectionCycle.cs +++ b/PolyPilot/Models/ReflectionCycle.cs @@ -83,12 +83,14 @@ public partial class ReflectionCycle /// /// True only on the advance where the first stall is detected. /// + [System.Text.Json.Serialization.JsonIgnore] public bool ShouldWarnOnStall { get; private set; } /// /// The Jaccard similarity score from the last stall check (0.0–1.0). /// Exposed so the UI can show "91% similar to previous response". /// + [System.Text.Json.Serialization.JsonIgnore] public double LastSimilarity { get; private set; } /// diff --git a/PolyPilot/Services/CopilotService.Events.cs b/PolyPilot/Services/CopilotService.Events.cs index eb5f3ef4e7..50989cf402 100644 --- a/PolyPilot/Services/CopilotService.Events.cs +++ b/PolyPilot/Services/CopilotService.Events.cs @@ -217,11 +217,9 @@ private void HandleSessionEvent(SessionState state, SessionEvent evt) { Debug($"[EVT-WARN] '{sessionName}' event {evt.GetType().Name} delivered to ORPHANED state " + $"(not in _sessions). This handler should have been detached."); - // Allow non-mutating events (text deltas, tool output) to flow through - // since they only append to shared Info.History. But block terminal events - // that would clear IsProcessing or complete the TCS. - if (evt is SessionIdleEvent or SessionErrorEvent) - return; + // Block ALL events from orphaned state β€” stale deltas, tool events, and + // terminal events can all produce ghost mutations on shared Info.History. + return; } void Invoke(Action action) @@ -502,6 +500,8 @@ await notifService.SendNotificationAsync( CancelProcessingWatchdog(state); Invoke(() => OnError?.Invoke(sessionName, errMsg)); state.Info.IsProcessing = false; + state.Info.IsResumed = false; + Interlocked.Exchange(ref state.SendingFlag, 0); state.ResponseCompletion?.TrySetException(new Exception(errMsg)); Invoke(() => OnStateChanged?.Invoke()); break; @@ -726,12 +726,15 @@ private void CompleteResponse(SessionState state, long? expectedGeneration = nul state.Info.MessageQueue.RemoveAt(0); // Retrieve any queued image paths for this message List? nextImagePaths = null; - if (_queuedImagePaths.TryGetValue(state.Info.Name, out var imageQueue) && imageQueue.Count > 0) + lock (_imageQueueLock) { - nextImagePaths = imageQueue[0]; - imageQueue.RemoveAt(0); - if (imageQueue.Count == 0) - _queuedImagePaths.TryRemove(state.Info.Name, out _); + if (_queuedImagePaths.TryGetValue(state.Info.Name, out var imageQueue) && imageQueue.Count > 0) + { + nextImagePaths = imageQueue[0]; + imageQueue.RemoveAt(0); + if (imageQueue.Count == 0) + _queuedImagePaths.TryRemove(state.Info.Name, out _); + } } var skipHistory = state.Info.ReflectionCycle is { IsActive: true } && @@ -747,7 +750,7 @@ private void CompleteResponse(SessionState state, long? expectedGeneration = nul await Task.Delay(100); if (_syncContext != null) { - var tcs = new TaskCompletionSource(); + var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); _syncContext.Post(async _ => { try @@ -775,8 +778,11 @@ private void CompleteResponse(SessionState state, long? expectedGeneration = nul state.Info.MessageQueue.Insert(0, nextPrompt); if (nextImagePaths != null) { - var images = _queuedImagePaths.GetOrAdd(state.Info.Name, _ => new List>()); - images.Insert(0, nextImagePaths); + lock (_imageQueueLock) + { + var images = _queuedImagePaths.GetOrAdd(state.Info.Name, _ => new List>()); + images.Insert(0, nextImagePaths); + } } }); } @@ -998,7 +1004,7 @@ private void HandleReflectionAdvanceResult(SessionState state, string response, await Task.Delay(100); if (_syncContext != null) { - var tcs = new TaskCompletionSource(); + var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); _syncContext.Post(async _ => { try @@ -1145,6 +1151,7 @@ private async Task RunProcessingWatchdogAsync(SessionState state, string session Interlocked.Exchange(ref state.ActiveToolCallCount, 0); state.HasUsedToolsThisTurn = false; state.Info.IsProcessing = false; + state.Info.IsResumed = false; Interlocked.Exchange(ref state.SendingFlag, 0); state.Info.History.Add(ChatMessage.SystemMessage( "⚠️ Session appears stuck β€” no response received. You can try sending your message again.")); diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index f37c5246b2..29abc5f839 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -1106,6 +1106,7 @@ private async Task SendViaOrchestratorReflectAsync(string groupId, List if (reflectState.ConsecutiveErrors >= 3) { reflectState.IsStalled = true; + reflectState.IsCancelled = true; break; } continue; @@ -1193,6 +1194,7 @@ private async Task SendViaOrchestratorReflectAsync(string groupId, List if (reflectState.ConsecutiveStalls >= 2) { reflectState.IsStalled = true; + reflectState.IsCancelled = true; AddOrchestratorSystemMessage(orchestratorName, $"⚠️ {reflectState.BuildCompletionSummary()}"); break; } @@ -1220,14 +1222,15 @@ private async Task SendViaOrchestratorReflectAsync(string groupId, List // Decrement so we retry the same iteration, not skip ahead reflectState.CurrentIteration--; // But limit retries per iteration to 3 (uses separate error counter) + reflectState.ConsecutiveErrors++; if (reflectState.ConsecutiveErrors >= 3) { reflectState.IsStalled = true; + reflectState.IsCancelled = true; AddOrchestratorSystemMessage(orchestratorName, $"⚠️ Iteration failed after retries: {ex.Message}"); break; } - reflectState.ConsecutiveErrors++; AddOrchestratorSystemMessage(orchestratorName, $"⚠️ Iteration {reflectState.CurrentIteration + 1} error: {ex.Message}. Retrying..."); InvokeOnUI(() => OnStateChanged?.Invoke()); @@ -1237,6 +1240,9 @@ private async Task SendViaOrchestratorReflectAsync(string groupId, List if (!reflectState.GoalMet && !reflectState.IsStalled && !reflectState.IsPaused) { + // Max-iteration exit without goal met β€” mark as cancelled so callers + // can distinguish "ran out of iterations" from "succeeded". + reflectState.IsCancelled = true; AddOrchestratorSystemMessage(orchestratorName, $"⏱️ {reflectState.BuildCompletionSummary()}"); } diff --git a/PolyPilot/Services/CopilotService.Persistence.cs b/PolyPilot/Services/CopilotService.Persistence.cs index 309fd47def..4e96288930 100644 --- a/PolyPilot/Services/CopilotService.Persistence.cs +++ b/PolyPilot/Services/CopilotService.Persistence.cs @@ -118,6 +118,13 @@ public async Task RestorePreviousSessionsAsync(CancellationToken cancellationTok { Debug($"Pruning ghost evaluator session '{entry.DisplayName}' β€” not referenced by active cycle"); _closedSessionIds[entry.SessionId] = 0; // prevent merge from re-adding + // Clean up persisted session directory + var ghostDir = Path.Combine(SessionStatePath, entry.SessionId); + if (Directory.Exists(ghostDir)) + { + try { Directory.Delete(ghostDir, recursive: true); } + catch (Exception delEx) { Debug($"Failed to delete ghost session dir: {delEx.Message}"); } + } continue; } // Skip if already active diff --git a/PolyPilot/Services/CopilotService.cs b/PolyPilot/Services/CopilotService.cs index bf623de4da..812f4c8c02 100644 --- a/PolyPilot/Services/CopilotService.cs +++ b/PolyPilot/Services/CopilotService.cs @@ -459,7 +459,10 @@ public async Task ReconnectAsync(ConnectionSettings settings, CancellationToken } _sessions.Clear(); _closedSessionIds.Clear(); - _queuedImagePaths.Clear(); + lock (_imageQueueLock) + { + _queuedImagePaths.Clear(); + } _activeSessionName = null; if (_client != null) @@ -1540,8 +1543,8 @@ public async Task SendPromptAsync(string sessionName, string prompt, Lis Interlocked.Exchange(ref newState.ProcessingGeneration, Interlocked.Read(ref state.ProcessingGeneration)); newState.HasUsedToolsThisTurn = state.HasUsedToolsThisTurn; - newSession.On(evt => HandleSessionEvent(newState, evt)); _sessions[sessionName] = newState; + newSession.On(evt => HandleSessionEvent(newState, evt)); state = newState; // Start fresh watchdog for the new connection @@ -1693,7 +1696,10 @@ public void ClearQueue(string sessionName) if (_sessions.TryGetValue(sessionName, out var state)) { state.Info.MessageQueue.Clear(); - _queuedImagePaths.TryRemove(sessionName, out _); + lock (_imageQueueLock) + { + _queuedImagePaths.TryRemove(sessionName, out _); + } OnStateChanged?.Invoke(); } } @@ -1753,6 +1759,8 @@ public void StopReflectionCycle(string sessionName) { var evaluatorName = state.Info.ReflectionCycle.EvaluatorSessionName; state.Info.ReflectionCycle.IsActive = false; + state.Info.ReflectionCycle.IsCancelled = true; + state.Info.ReflectionCycle.CompletedAt = DateTime.Now; // Purge any queued reflection follow-up prompts to prevent zombie iterations state.Info.MessageQueue.RemoveAll(p => ReflectionCycle.IsReflectionFollowUpPrompt(p)); Debug($"Reflection cycle stopped for '{sessionName}'"); @@ -1811,8 +1819,11 @@ public bool RenameSession(string oldName, string newName) state.Info.Name = newName; // Move queued image paths to new name - if (_queuedImagePaths.TryRemove(oldName, out var imageQueue)) - _queuedImagePaths[newName] = imageQueue; + lock (_imageQueueLock) + { + if (_queuedImagePaths.TryRemove(oldName, out var imageQueue)) + _queuedImagePaths[newName] = imageQueue; + } if (!_sessions.TryAdd(newName, state)) { @@ -1879,7 +1890,10 @@ public async Task CloseSessionAsync(string name) return false; // Clean up any queued image paths for this session - _queuedImagePaths.TryRemove(name, out _); + lock (_imageQueueLock) + { + _queuedImagePaths.TryRemove(name, out _); + } // Track as explicitly closed so merge doesn't re-add from file if (state.Info.SessionId != null) diff --git a/docs/multi-agent-orchestration.md b/docs/multi-agent-orchestration.md index 3f17d1c291..7866cdb94d 100644 --- a/docs/multi-agent-orchestration.md +++ b/docs/multi-agent-orchestration.md @@ -64,7 +64,8 @@ while (IsActive && !IsPaused && CurrentIteration < MaxIterations): @worker:worker-2 Write tests for the auth module ParseTaskAssignments extracts these β†’ List - If no assignments parsed β†’ orchestrator decided goal is met β†’ break + If no assignments AND iteration == 1 β†’ error (retry up to 3 times) + If no assignments AND iteration > 1 β†’ orchestrator decided goal is met β†’ break Phase 2: DISPATCH └── Send each assignment to its worker in parallel (Task.WhenAll) @@ -103,12 +104,14 @@ while (IsActive && !IsPaused && CurrentIteration < MaxIterations): | Condition | How Detected | State | |-----------|-------------|-------| | βœ… Goal met | Evaluator score β‰₯ 0.9 or `[[GROUP_REFLECT_COMPLETE]]` sentinel | `GoalMet = true` | -| ⏱️ Max iterations | `CurrentIteration >= MaxIterations` | `IsActive = false` | -| ⚠️ Stalled | 2 consecutive responses with >90% Jaccard similarity | `IsStalled = true` | -| ⚠️ Error budget | 3 consecutive errors within a single iteration | `IsStalled = true` | -| πŸ›‘ Cancelled | CancellationToken triggered | `OperationCanceledException` | +| ⏱️ Max iterations | `CurrentIteration >= MaxIterations` | `IsCancelled = true` | +| ⚠️ Stalled | 2 consecutive responses with >90% Jaccard similarity | `IsStalled = true, IsCancelled = true` | +| ⚠️ Error budget | 3 consecutive errors within a single iteration | `IsStalled = true, IsCancelled = true` | +| πŸ›‘ Cancelled | CancellationToken triggered or user `StopGroupReflection` | `IsCancelled = true` | | ⏸️ Paused | User set `IsPaused = true` | Loop condition fails | +**IsCancelled invariant:** Every non-success exit MUST set `IsCancelled = true`. This allows `BuildCompletionSummary()` to distinguish successful completion from abnormal termination. `GoalMet = true` paths must NOT set `IsCancelled`. + --- ## Invariants β€” What Breaks If You Violate These @@ -173,13 +176,45 @@ You are a worker agent. Complete the following task thoroughly. {task} ``` +### 6. Orphaned Event Handlers Must Not Mutate State + +**Where:** `CopilotService.Events.cs` β†’ `HandleSessionEvent`, `isCurrentState` gate + +**The rule:** When a session is reconnected, the old session's event handler becomes orphaned. ALL events from orphaned handlers must be blocked (not just terminal events). The `isCurrentState` check compares the captured state object with `_sessions[sessionName]` β€” if they don't match, the handler is orphaned. + +**Why:** Orphaned handlers can produce ghost text deltas, phantom tool executions, and stale history entries that corrupt the current session's state. + +### 7. Session Reconnect: Swap `_sessions` Before Wiring Handler + +**Where:** `CopilotService.cs` β†’ reconnect logic + +**The rule:** `_sessions[sessionName] = newState` MUST execute BEFORE `newSession.On(evt => HandleSessionEvent(newState, evt))`. If the handler is wired first, early events from the new session see `isCurrentState=false` (because `_sessions` still points to old state) and get incorrectly dropped. + +### 8. Image Queue: ALL Mutations Under `_imageQueueLock` + +**Where:** `CopilotService.cs` and `CopilotService.Events.cs` β€” all `_queuedImagePaths` access + +**The rule:** Every mutation of `_queuedImagePaths` (enqueue, dequeue, remove, clear, rename, close) must be inside `lock (_imageQueueLock)`. The inner lists (`List>`) are not thread-safe. + +### 9. `IsResumed` Must Be Cleared on ALL Terminal Paths + +**Where:** `CopilotService.Events.cs` β†’ `CompleteResponse`, `SessionErrorEvent`, watchdog timeout + +**The rule:** `state.Info.IsResumed = false` must be set in every code path that sets `IsProcessing = false`. Otherwise, subsequent turns inherit the resumed session's 600s tool timeout. + +### 10. All TCS Must Use `RunContinuationsAsynchronously` + +**Where:** All `new TaskCompletionSource()` in `CopilotService.Events.cs` + +**The rule:** Always use `new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously)`. Without this, TCS continuations can run inline on the completing thread, causing reentrancy and stack overflows in reflection loops. + --- ## Stall Detection Two mechanisms, both in `ReflectionCycle.CheckStall()`: -1. **Exact hash match** β€” Sliding window of last 5 response hashes. If current hash matches any β†’ stall. +1. **Exact string match** β€” Sliding window of last 5 full response strings. If current response matches any (full string equality, no hash) β†’ stall. 2. **Jaccard token similarity** β€” Tokenize current and previous response by whitespace. If intersection/union > 0.9 β†’ stall. **Tolerance:** 2 consecutive stalls required before stopping. First stall generates a warning. This prevents false positives from models that happen to produce similar phrasing once. @@ -259,19 +294,23 @@ OrganizationState try { // ... full iteration (plan β†’ dispatch β†’ collect β†’ evaluate) } -catch (OperationCanceledException) { throw; } // User cancellation propagates +catch (OperationCanceledException) { + IsCancelled = true; // Mark as cancelled for BuildCompletionSummary + throw; // User cancellation propagates +} catch (Exception ex) { CurrentIteration--; // Retry same iteration, don't skip ahead - ConsecutiveStalls++; // Borrow stall counter as error counter - if (ConsecutiveStalls >= 3) { + ConsecutiveErrors++; // Separate error counter (ConsecutiveStalls tracks repetition) + if (ConsecutiveErrors >= 3) { IsStalled = true; // Give up after 3 retries + IsCancelled = true; // Non-success termination break; } await Task.Delay(2000); // Back off before retry } ``` -This prevents a single transient error (network hiccup, model timeout) from killing the entire reflection cycle. +This prevents a single transient error (network hiccup, model timeout) from killing the entire reflection cycle. `ConsecutiveErrors` resets to 0 on successful iterations (alongside `ConsecutiveStalls`), so errors must be truly consecutive. --- diff --git a/recommendation.md b/recommendation.md new file mode 100644 index 0000000000..df27853302 --- /dev/null +++ b/recommendation.md @@ -0,0 +1,41 @@ +# Recommendation: Hybrid Architecture (Option C) + +I recommend adopting **Option C (Hybrid)** as the architectural target, implemented in two phases. + +## Phase 1 (Immediate PR): "Team Context" +Implement **Option A** behavior using the **Option C** data model. +* **Mechanism:** When a user assigns a Repository/Worktree to a `SessionGroup`, propagate that `WorktreeId` to the `SessionMeta` of **every agent** in that group. +* **Result:** All agents share the same directory and branch. +* **User Experience:** "I assign this team to feature-branch-x." + +## Phase 2 (Future): "Agent Independence" +Expose the existing per-agent `WorktreeId` in the UI for advanced scenarios. +* **Mechanism:** Allow power users to override the `WorktreeId` for specific agents (e.g., "Reviewer Agent" checks out `main` while "Coder Agent" is on `feature-branch`). +* **User Experience:** "I want this specific agent to look at a different version of the code." + +## Reasoning & Tradeoffs + +1. **Future-Proofing (Why not A):** `SessionMeta` already has `WorktreeId`. Hardcoding a single `WorktreeId` on `SessionGroup` would restrict us later. By using the per-session field (even if they all point to the same ID initially), we keep the architecture flexible for free. +2. **Complexity Management (Why not B):** Forcing per-agent worktrees now creates massive complexity (merging, disk space, synchronization). Shared worktrees are sufficient for 90% of current use cases (collaborative coding, pair programming). +3. **Correct Abstraction:** A "Team" usually works on a "Project" (Repo/Branch). It is the natural default. Divergence is an exception. + +## Implementation Plan + +1. **Update `CreateMultiAgentGroupAsync`:** + * Accept an optional `repoId` and `worktreeId`. + * If provided, assign `WorktreeId` to the `SessionMeta` of the Orchestrator and all Workers. + * Ensure the `SessionGroup` also stores the `RepoId` for context. + +2. **Update `RepoManager.LinkSessionToWorktree`:** + * Ensure it can handle multiple sessions linking to the same worktree (currently it has a single `SessionName` field, which might be a limitation if strict 1:1 mapping is enforced). **Crucial Check:** `WorktreeInfo.SessionName` is a single string. This needs to change to support multiple sessions (or be ignored for multi-agent groups). + +## Interaction with Reflection/Orchestration + +* **Orchestrator Mode:** The Orchestrator agent typically plans and delegates. Sharing a worktree means the Orchestrator sees the *exact state* the workers are producing in real-time. This is generally beneficial for immediate feedback loops. +* **OrchestratorReflect Mode:** In this mode, the system might benefit from an "isolation sandbox" where a worker tries a change in a separate worktree, runs tests, and only merges if successful. This is a strong argument for **Option C (Hybrid)** in the long term. A shared worktree (Option A) risks breaking the build for the whole team during experimental changes. +* **Recommendation:** Start with shared worktrees for simplicity. For advanced reflection cycles that require safe experimentation, leverage the **Option C** capability later to spawn ephemeral worktrees for specific worker tasks. + +## Critical Code Change Required +`WorktreeInfo` currently has `public string? SessionName { get; set; }`. +* **Issue:** This implies 1 worktree = 1 session. +* **Fix:** For Phase 1, treat `SessionName` as the "Primary/Owner" session (e.g., the Orchestrator). The UI should rely on `SessionMeta.WorktreeId` to find *all* sessions associated with a worktree, rather than relying on the back-pointer in `WorktreeInfo`. From 8554fbcfcccd3f6bf78031678d3a73b522f8b83f Mon Sep 17 00:00:00 2001 From: Shane Date: Fri, 20 Feb 2026 09:05:22 -0600 Subject: [PATCH 42/48] Add Squad integration design to docs, scenarios, and tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Architecture spec: new 'Squad Integration' section with mapping table, discovery flow, preset priority, security constraints, GroupPreset extensions - Copilot instructions: document Squad discovery from .squad/ directories - Scenarios: 7 new CDP scenarios for Squad discovery, charterβ†’system prompt, decisions.md injection, legacy .ai-team/ compat, preset priority, graceful handling of missing files, worker descriptions in orchestrator planning - Tests: 2 new ScenarioReferenceTests validating multi-agent scenario IDs and verifying Squad integration scenarios are present (819 tests passing) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/copilot-instructions.md | 5 +- PolyPilot.Tests/ScenarioReferenceTests.cs | 29 +++++ .../Scenarios/multi-agent-scenarios.json | 103 +++++++++++++++++- docs/multi-agent-orchestration.md | 66 ++++++++++- 4 files changed, 199 insertions(+), 4 deletions(-) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index d4725ac9dc..39c7f61b36 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -56,7 +56,10 @@ For Android, always run `adb reverse tcp:9223 tcp:9223` after deploy. ## Architecture -**See `docs/multi-agent-orchestration.md` for the multi-agent architecture spec** (orchestration modes, reflection loop, sentinel protocol, invariants). Test scenarios in `PolyPilot.Tests/Scenarios/multi-agent-scenarios.json`. Read these before modifying orchestration, reconciliation, or TCS completion logic. +**See `docs/multi-agent-orchestration.md` for the multi-agent architecture spec** (orchestration modes, reflection loop, sentinel protocol, invariants, Squad integration). Test scenarios in `PolyPilot.Tests/Scenarios/multi-agent-scenarios.json`. Read these before modifying orchestration, reconciliation, or TCS completion logic. + +### Squad Integration +PolyPilot discovers [bradygaster/squad](https://github.com/bradygaster/squad) team definitions from `.squad/` (or legacy `.ai-team/`) directories in the worktree root. Each agent's `charter.md` becomes a worker system prompt, `team.md` defines the roster, and `decisions.md` provides shared context. Repo-level teams appear as presets in the multi-agent group creation flow. PolyPilot never writes to `.squad/` β€” it's read-only. This is a .NET MAUI Blazor Hybrid app targeting Mac Catalyst, Android, and iOS. It manages multiple GitHub Copilot CLI sessions through a native GUI. diff --git a/PolyPilot.Tests/ScenarioReferenceTests.cs b/PolyPilot.Tests/ScenarioReferenceTests.cs index 6fff147301..e945a3b694 100644 --- a/PolyPilot.Tests/ScenarioReferenceTests.cs +++ b/PolyPilot.Tests/ScenarioReferenceTests.cs @@ -189,4 +189,33 @@ public void AllScenarios_HaveUniqueIds() Assert.Equal(ids.Count, ids.Distinct().Count()); } + + [Fact] + public void MultiAgentScenarios_HaveUniqueIds() + { + var json = File.ReadAllText(Path.Combine(ScenariosDir, "multi-agent-scenarios.json")); + var doc = JsonDocument.Parse(json); + var ids = doc.RootElement.GetProperty("scenarios") + .EnumerateArray() + .Select(s => s.GetProperty("id").GetString()) + .ToList(); + + Assert.Equal(ids.Count, ids.Distinct().Count()); + } + + [Fact] + public void MultiAgentScenarios_IncludeSquadIntegration() + { + var json = File.ReadAllText(Path.Combine(ScenariosDir, "multi-agent-scenarios.json")); + var doc = JsonDocument.Parse(json); + var ids = doc.RootElement.GetProperty("scenarios") + .EnumerateArray() + .Select(s => s.GetProperty("id").GetString()) + .ToHashSet(); + + Assert.Contains("squad-discovery-creates-preset", ids); + Assert.Contains("squad-charter-becomes-system-prompt", ids); + Assert.Contains("squad-decisions-shared-context", ids); + Assert.Contains("squad-legacy-ai-team-compat", ids); + } } diff --git a/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json b/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json index 0d017aae36..ee9f6ce2df 100644 --- a/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json +++ b/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json @@ -1,10 +1,10 @@ { - "description": "Multi-agent orchestration scenarios for PolyPilot. Tests cover the OrchestratorReflect loop, stall detection, reconciliation stability, and group lifecycle. Each scenario can be executed against a running app using MauiDevFlow CDP commands. See docs/multi-agent-orchestration.md for the architecture spec.", + "description": "Multi-agent orchestration scenarios for PolyPilot. Tests cover the OrchestratorReflect loop, stall detection, reconciliation stability, group lifecycle, and Squad integration. Each scenario can be executed against a running app using MauiDevFlow CDP commands. See docs/multi-agent-orchestration.md for the architecture spec.", "prerequisites": { "build": "cd PolyPilot && .\\relaunch.ps1", "waitForAgent": "maui-devflow MAUI status", "initialMode": "Persistent", - "notes": "App must be in Persistent or Demo mode. Multi-agent features require at least one worktree configured." + "notes": "App must be in Persistent or Demo mode. Multi-agent features require at least one worktree configured. Squad scenarios require a worktree with a .squad/ directory." }, "scenarios": [ { @@ -169,6 +169,105 @@ { "action": "assertSessionMeta", "role": "Orchestrator", "hasPreferredModel": true }, { "action": "assertAllWorkers", "havePreferredModel": true } ] + }, + { + "id": "squad-discovery-creates-preset", + "name": "Squad directory discovered as repo-level preset", + "description": "Verifies that when a worktree contains a .squad/ directory with team.md and agent charters, PolyPilot discovers it and presents it as a selectable preset in the multi-agent group creation flow.", + "invariants": [ + "Squad preset appears in 'From Repo' section of preset picker", + "Preset has IsRepoLevel == true", + "Preset worker count matches number of non-scribe agents in .squad/agents/" + ], + "steps": [ + { "action": "shell", "command": "mkdir -p .squad/agents/reviewer && echo '# Team\n| Member | Role |\n|--------|------|\n| reviewer | Code Reviewer |' > .squad/team.md && echo 'You are a code reviewer.' > .squad/agents/reviewer/charter.md" }, + { "action": "navigate", "route": "/multi-agent" }, + { "action": "selectWorktree", "worktree": "current" }, + { "action": "assertPresetVisible", "section": "From Repo", "name": "Squad Team" }, + { "action": "shell", "command": "rm -rf .squad" } + ] + }, + { + "id": "squad-charter-becomes-system-prompt", + "name": "Squad agent charter.md becomes worker system prompt", + "description": "Verifies that when creating a group from a Squad-discovered preset, each agent's charter.md content is set as the worker's SessionMeta.SystemPrompt.", + "invariants": [ + "Worker SystemPrompt contains charter.md content", + "Orchestrator planning prompt includes worker specializations" + ], + "steps": [ + { "action": "shell", "command": "mkdir -p .squad/agents/security .squad/agents/perf && echo '# Team\n| Member | Role |\n|--------|------|\n| security | Security Auditor |\n| perf | Performance Analyst |' > .squad/team.md && echo 'You are a security auditor. Focus on OWASP Top 10.' > .squad/agents/security/charter.md && echo 'You are a performance analyst. Focus on latency and throughput.' > .squad/agents/perf/charter.md" }, + { "action": "createGroupFromPreset", "preset": "Squad Team", "source": "repo" }, + { "action": "readOrgJson", "capture": "org" }, + { "action": "assertSessionMeta", "sessionNameContains": "security", "systemPromptContains": "OWASP Top 10" }, + { "action": "assertSessionMeta", "sessionNameContains": "perf", "systemPromptContains": "latency and throughput" }, + { "action": "shell", "command": "rm -rf .squad" } + ] + }, + { + "id": "squad-decisions-shared-context", + "name": "Squad decisions.md injected as shared context", + "description": "Verifies that .squad/decisions.md content is prepended to all worker prompts as shared team knowledge.", + "invariants": [ + "All workers receive decisions.md content in their prompt context", + "Decisions content appears before the worker's assigned task" + ], + "steps": [ + { "action": "shell", "command": "mkdir -p .squad/agents/worker1 && echo '# Team\n| Member | Role |\n|--------|------|\n| worker1 | Developer |' > .squad/team.md && echo 'Always use TypeScript. Never use any.' > .squad/decisions.md && echo 'You are a developer.' > .squad/agents/worker1/charter.md" }, + { "action": "createGroupFromPreset", "preset": "Squad Team", "source": "repo" }, + { "action": "sendPrompt", "text": "Write a hello world" }, + { "action": "waitForAllSessions", "state": "idle", "timeout": 120 }, + { "action": "assertWorkerPromptContains", "text": "Always use TypeScript" }, + { "action": "shell", "command": "rm -rf .squad" } + ] + }, + { + "id": "squad-legacy-ai-team-compat", + "name": "Legacy .ai-team/ directory also discovered", + "description": "Verifies backward compatibility: .ai-team/ is discovered if .squad/ doesn't exist (Squad v0.4.x compat).", + "steps": [ + { "action": "shell", "command": "mkdir -p .ai-team/agents/dev && echo '# Team\n| Member | Role |\n|--------|------|\n| dev | Developer |' > .ai-team/team.md && echo 'You are a developer.' > .ai-team/agents/dev/charter.md" }, + { "action": "selectWorktree", "worktree": "current" }, + { "action": "assertPresetVisible", "section": "From Repo", "name": "Squad Team" }, + { "action": "shell", "command": "rm -rf .ai-team" } + ] + }, + { + "id": "squad-preset-priority-over-builtin", + "name": "Squad preset shadows built-in with same name", + "description": "Verifies that if a Squad team has the same name as a built-in preset, the Squad version takes priority within that worktree.", + "steps": [ + { "action": "shell", "command": "mkdir -p .squad/agents/reviewer && echo '# Team\nCode Review Team\n| Member | Role |\n|--------|------|\n| reviewer | Reviewer |' > .squad/team.md && echo 'Custom repo reviewer.' > .squad/agents/reviewer/charter.md" }, + { "action": "selectWorktree", "worktree": "current" }, + { "action": "assertPresetInSection", "name": "Code Review Team", "section": "From Repo", "note": "Repo version should shadow built-in" }, + { "action": "shell", "command": "rm -rf .squad" } + ] + }, + { + "id": "squad-missing-files-graceful", + "name": "Missing Squad files handled gracefully", + "description": "Verifies that partial .squad/ directories (missing team.md, missing charter.md) are handled without errors.", + "steps": [ + { "action": "shell", "command": "mkdir -p .squad/agents/orphan" }, + { "action": "selectWorktree", "worktree": "current" }, + { "action": "assertNoPresetInSection", "section": "From Repo", "note": "No team.md = no preset" }, + { "action": "shell", "command": "rm -rf .squad" } + ] + }, + { + "id": "worker-system-prompt-in-orchestrator-plan", + "name": "Worker system prompts visible in orchestrator planning", + "description": "Verifies that BuildOrchestratorPlanningPrompt includes each worker's SystemPrompt description so the orchestrator can route tasks based on expertise.", + "invariants": [ + "Orchestrator planning prompt lists worker names with their specializations", + "Workers with no SystemPrompt are listed as generic workers" + ], + "steps": [ + { "action": "createGroupFromPreset", "preset": "Code Review Team" }, + { "action": "sendPrompt", "text": "Review the authentication module" }, + { "action": "waitForPhase", "phase": "Planning", "timeout": 30 }, + { "action": "assertOrchestratorReceivedWorkerDescriptions" } + ] } ] } diff --git a/docs/multi-agent-orchestration.md b/docs/multi-agent-orchestration.md index 7866cdb94d..85909b090a 100644 --- a/docs/multi-agent-orchestration.md +++ b/docs/multi-agent-orchestration.md @@ -332,8 +332,9 @@ If no `@worker:` assignments are found, the orchestrator handled the request dir ## Testing ### Unit Tests -- **`MultiAgentRegressionTests.cs`** (30 tests) β€” JSON corruption, reconciliation scattering, preset markers, mode enums, reflection loop logic, TCS ordering, lifecycle scenarios +- **`MultiAgentRegressionTests.cs`** (37 tests) β€” JSON corruption, reconciliation scattering, preset markers, mode enums, reflection loop logic, TCS ordering, lifecycle scenarios, persona tests - **`SessionOrganizationTests.cs`** β†’ `GroupingStabilityTests` (14 tests) β€” JSON round-trips, delete+reconcile, orphan handling +- **`ScenarioReferenceTests.cs`** β€” Validates scenario JSON structure, unique IDs, Squad integration scenario presence ### Executable Scenarios - **`PolyPilot.Tests/Scenarios/multi-agent-scenarios.json`** β€” CDP-based scenarios for MauiDevFlow testing against a running app @@ -344,3 +345,66 @@ If no `@worker:` assignments are found, the orchestrator handled the request dir 3. **Changed TCS/event handling?** β†’ Run `ProcessingWatchdogTests` + verify reflection loop completes 4. **Changed sentinel parsing?** β†’ Run `ReflectionCycleTests` 5. **Changed session persistence?** β†’ Run full suite, verify `organization.json` survives restart + +--- + +## Squad Integration β€” Repo-Level Team Discovery + +### Overview + +PolyPilot can discover and load team definitions from [bradygaster/squad](https://github.com/bradygaster/squad) format directories (`.squad/` or the legacy `.ai-team/`). Any repository that has been "squadified" automatically gets its teams available as presets in PolyPilot's multi-agent group creation flow. + +### How Squad Maps to PolyPilot + +| Squad File | PolyPilot Concept | How It's Used | +|------------|-------------------|---------------| +| `.squad/team.md` | `SessionGroup` + workers | Roster parsed for agent names and roles | +| `.squad/agents/{name}/charter.md` | `SessionMeta.SystemPrompt` | Charter content becomes worker system prompt | +| `.squad/routing.md` | Orchestrator planning context | Injected into `BuildOrchestratorPlanningPrompt` | +| `.squad/decisions.md` | Shared worker context | Prepended to all worker prompts as shared team knowledge | +| Squad coordinator | `MultiAgentMode.OrchestratorReflect` | Squad's iterative coordinator maps to PolyPilot's reflect loop | + +### Discovery Flow + +1. User clicks **πŸ€– Multi** β†’ selects a worktree +2. `SquadDiscovery.Discover(worktreePath)` scans for `.squad/` or `.ai-team/` +3. If found, parses `team.md` + agent charters β†’ builds a `GroupPreset` +4. Preset appears in the picker under **"πŸ“‚ From Repo (Squad)"** section, above built-in presets +5. User clicks the Squad preset β†’ `CreateGroupFromPresetAsync` creates the group with all agents and their charters as system prompts + +### Preset Priority (Three-Tier Cascade) + +``` +Built-in presets < User presets (~/.polypilot/presets.json) < Repo teams (.squad/) +``` + +Repo teams shadow built-in/user presets with the same name when working in that repo's worktree. + +### What PolyPilot Does NOT Do with Squad + +- **Never writes to `.squad/`** β€” PolyPilot is read-only; the repo files are the source of truth +- **No `history.md` persistence** β€” Squad agents accumulate learnings; PolyPilot sessions are stateless across restarts +- **No Scribe agent** β€” Squad's silent decision-logger is not replicated +- **No GitHub Actions integration** β€” Squad's label triage workflows are out of scope +- **No casting system** β€” Squad's thematic name universes; PolyPilot uses agent names as-is + +### Security + +- Agent charters (system prompts) are capped at 4,000 characters +- Model slugs are validated against `ModelCapabilities.AllModels`; unknown slugs fall back to app default +- Repo presets show a **πŸ“‚** source badge so users know the definition came from the repo +- No file-read directives or code execution from parsed files + +### GroupPreset Extensions for Squad Support + +```csharp +public record GroupPreset(...) +{ + public bool IsUserDefined { get; init; } + public bool IsRepoLevel { get; init; } // NEW: loaded from .squad/ + public string? SourcePath { get; init; } // NEW: path to .squad/ dir + public string?[]? WorkerSystemPrompts { get; init; } + public string?[]? WorkerSystemPromptFiles { get; init; } // NEW: file refs + public string? SharedContext { get; init; } // NEW: from decisions.md +} +``` From d09cf8798567979e681ae29b632105e0360b49ae Mon Sep 17 00:00:00 2001 From: Shane Date: Fri, 20 Feb 2026 09:26:13 -0600 Subject: [PATCH 43/48] Implement Squad integration: discovery, presets, UI, and tests - Add SquadDiscovery.cs: parses .squad/ and .ai-team/ directories into GroupPresets (team.md roster, agent charters as system prompts, decisions.md shared context, routing.md orchestrator context) - Extend GroupPreset with IsRepoLevel, SourcePath, SharedContext, RoutingContext - Extend SessionGroup with SharedContext and RoutingContext for orchestration - Add three-tier preset merge in UserPresets.GetAll(baseDir, repoWorkingDirectory) - Update SessionSidebar.razor: sectioned preset picker with From Repo / Built-in / My Presets sections and repo source badge - Inject shared context (decisions.md) into worker prompts via ExecuteWorkerAsync - Inject routing context into orchestrator planning prompt - Store Squad context on group during CreateGroupFromPresetAsync - Add 20 SquadDiscoveryTests covering discovery, parsing, merge, edge cases - Add test data fixtures for .squad/ and .ai-team/ formats - Document copilot-instructions.md auto-inheritance via SDK Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/MultiAgentRegressionTests.cs | 2 +- PolyPilot.Tests/PolyPilot.Tests.csproj | 1 + PolyPilot.Tests/SquadDiscoveryTests.cs | 260 ++++++++++++++++++ .../.ai-team/agents/dev/charter.md | 1 + .../TestData/legacy-ai-team/.ai-team/team.md | 4 + .../.squad/agents/perf-analyst/charter.md | 9 + .../.squad/agents/scribe/charter.md | 1 + .../agents/security-reviewer/charter.md | 9 + .../TestData/squad-sample/.squad/decisions.md | 5 + .../TestData/squad-sample/.squad/routing.md | 6 + .../TestData/squad-sample/.squad/team.md | 7 + .../Components/Layout/SessionSidebar.razor | 50 +++- .../Layout/SessionSidebar.razor.css | 19 ++ PolyPilot/Models/ModelCapabilities.cs | 31 ++- PolyPilot/Models/SessionOrganization.cs | 10 + PolyPilot/Models/SquadDiscovery.cs | 174 ++++++++++++ .../Services/CopilotService.Organization.cs | 29 +- 17 files changed, 602 insertions(+), 16 deletions(-) create mode 100644 PolyPilot.Tests/SquadDiscoveryTests.cs create mode 100644 PolyPilot.Tests/TestData/legacy-ai-team/.ai-team/agents/dev/charter.md create mode 100644 PolyPilot.Tests/TestData/legacy-ai-team/.ai-team/team.md create mode 100644 PolyPilot.Tests/TestData/squad-sample/.squad/agents/perf-analyst/charter.md create mode 100644 PolyPilot.Tests/TestData/squad-sample/.squad/agents/scribe/charter.md create mode 100644 PolyPilot.Tests/TestData/squad-sample/.squad/agents/security-reviewer/charter.md create mode 100644 PolyPilot.Tests/TestData/squad-sample/.squad/decisions.md create mode 100644 PolyPilot.Tests/TestData/squad-sample/.squad/routing.md create mode 100644 PolyPilot.Tests/TestData/squad-sample/.squad/team.md create mode 100644 PolyPilot/Models/SquadDiscovery.cs diff --git a/PolyPilot.Tests/MultiAgentRegressionTests.cs b/PolyPilot.Tests/MultiAgentRegressionTests.cs index 4f23317ff2..f39469b805 100644 --- a/PolyPilot.Tests/MultiAgentRegressionTests.cs +++ b/PolyPilot.Tests/MultiAgentRegressionTests.cs @@ -1039,7 +1039,7 @@ public void OrchestratorPlanningPrompt_IncludesWorkerPersonas() Assert.NotNull(method); var workers = new List { "sec-worker", "perf-worker" }; - var result = (string)method!.Invoke(svc, new object?[] { "Review this code", workers, null })!; + var result = (string)method!.Invoke(svc, new object?[] { "Review this code", workers, null, null })!; Assert.Contains("security auditor", result); Assert.Contains("performance optimizer", result); diff --git a/PolyPilot.Tests/PolyPilot.Tests.csproj b/PolyPilot.Tests/PolyPilot.Tests.csproj index 7e3a2a0c5f..1d1eb5a264 100644 --- a/PolyPilot.Tests/PolyPilot.Tests.csproj +++ b/PolyPilot.Tests/PolyPilot.Tests.csproj @@ -52,6 +52,7 @@ + diff --git a/PolyPilot.Tests/SquadDiscoveryTests.cs b/PolyPilot.Tests/SquadDiscoveryTests.cs new file mode 100644 index 0000000000..955b0f3f69 --- /dev/null +++ b/PolyPilot.Tests/SquadDiscoveryTests.cs @@ -0,0 +1,260 @@ +using PolyPilot.Models; + +namespace PolyPilot.Tests; + +public class SquadDiscoveryTests +{ + private static string TestDataDir => Path.Combine( + AppContext.BaseDirectory, "..", "..", "..", "TestData"); + + private static string SquadSampleDir => Path.Combine(TestDataDir, "squad-sample"); + private static string LegacyAiTeamDir => Path.Combine(TestDataDir, "legacy-ai-team"); + + // --- FindSquadDirectory --- + + [Fact] + public void FindSquadDirectory_PrefersDotSquad() + { + var result = SquadDiscovery.FindSquadDirectory(SquadSampleDir); + Assert.NotNull(result); + Assert.EndsWith(".squad", result); + } + + [Fact] + public void FindSquadDirectory_FallsBackToAiTeam() + { + var result = SquadDiscovery.FindSquadDirectory(LegacyAiTeamDir); + Assert.NotNull(result); + Assert.EndsWith(".ai-team", result); + } + + [Fact] + public void FindSquadDirectory_ReturnsNull_WhenNeitherExists() + { + var result = SquadDiscovery.FindSquadDirectory(Path.GetTempPath()); + Assert.Null(result); + } + + // --- ParseTeamName --- + + [Fact] + public void ParseTeamName_ExtractsH1Heading() + { + var content = "# The Review Squad\n\nSome description\n"; + Assert.Equal("The Review Squad", SquadDiscovery.ParseTeamName(content)); + } + + [Fact] + public void ParseTeamName_ReturnsNull_WhenNoHeading() + { + var content = "Just a table\n| Member | Role |\n"; + Assert.Null(SquadDiscovery.ParseTeamName(content)); + } + + // --- ParseRosterNames --- + + [Fact] + public void ParseRosterNames_ExtractsAgentNames() + { + var content = "# Team\n| Member | Role |\n|--------|------|\n| security-reviewer | Auditor |\n| perf-analyst | Analyst |"; + var names = SquadDiscovery.ParseRosterNames(content); + Assert.Contains("security-reviewer", names); + Assert.Contains("perf-analyst", names); + Assert.DoesNotContain("Member", names); + Assert.DoesNotContain("---", names); + } + + // --- DiscoverAgents --- + + [Fact] + public void DiscoverAgents_SkipsScribe() + { + var squadDir = Path.Combine(SquadSampleDir, ".squad"); + var agents = SquadDiscovery.DiscoverAgents(squadDir); + Assert.DoesNotContain(agents, a => a.Name.Equals("scribe", StringComparison.OrdinalIgnoreCase)); + } + + [Fact] + public void DiscoverAgents_FindsRealAgents() + { + var squadDir = Path.Combine(SquadSampleDir, ".squad"); + var agents = SquadDiscovery.DiscoverAgents(squadDir); + Assert.Equal(2, agents.Count); // security-reviewer + perf-analyst (not scribe) + Assert.Contains(agents, a => a.Name == "security-reviewer"); + Assert.Contains(agents, a => a.Name == "perf-analyst"); + } + + [Fact] + public void DiscoverAgents_ReadsCharterContent() + { + var squadDir = Path.Combine(SquadSampleDir, ".squad"); + var agents = SquadDiscovery.DiscoverAgents(squadDir); + var security = agents.First(a => a.Name == "security-reviewer"); + Assert.NotNull(security.Charter); + Assert.Contains("OWASP Top 10", security.Charter); + } + + // --- Discover (full integration) --- + + [Fact] + public void Discover_ReturnsPreset_FromSquadDir() + { + var presets = SquadDiscovery.Discover(SquadSampleDir); + Assert.Single(presets); + var preset = presets[0]; + Assert.Equal("The Review Squad", preset.Name); + Assert.True(preset.IsRepoLevel); + Assert.Equal(MultiAgentMode.OrchestratorReflect, preset.Mode); + Assert.Equal(2, preset.WorkerModels.Length); + } + + [Fact] + public void Discover_SetsSystemPrompts_FromCharters() + { + var presets = SquadDiscovery.Discover(SquadSampleDir); + var preset = presets[0]; + Assert.NotNull(preset.WorkerSystemPrompts); + Assert.Equal(2, preset.WorkerSystemPrompts.Length); + + // At least one should contain OWASP (security-reviewer's charter) + Assert.True(preset.WorkerSystemPrompts.Any(p => p != null && p.Contains("OWASP")), + "Expected a worker system prompt containing 'OWASP'"); + // At least one should contain latency (perf-analyst's charter) + Assert.True(preset.WorkerSystemPrompts.Any(p => p != null && p.Contains("Latency")), + "Expected a worker system prompt containing 'Latency'"); + } + + [Fact] + public void Discover_ReadsDecisions_AsSharedContext() + { + var presets = SquadDiscovery.Discover(SquadSampleDir); + var preset = presets[0]; + Assert.NotNull(preset.SharedContext); + Assert.Contains("structured logging", preset.SharedContext); + Assert.Contains("async/await", preset.SharedContext); + } + + [Fact] + public void Discover_ReadsRouting_AsRoutingContext() + { + var presets = SquadDiscovery.Discover(SquadSampleDir); + var preset = presets[0]; + Assert.NotNull(preset.RoutingContext); + Assert.Contains("security-reviewer", preset.RoutingContext); + } + + [Fact] + public void Discover_LegacyAiTeam_Works() + { + var presets = SquadDiscovery.Discover(LegacyAiTeamDir); + Assert.Single(presets); + var preset = presets[0]; + Assert.Equal("Legacy Team", preset.Name); + Assert.True(preset.IsRepoLevel); + Assert.Single(preset.WorkerModels); + } + + [Fact] + public void Discover_ReturnsEmpty_WhenNoSquadDir() + { + var presets = SquadDiscovery.Discover(Path.GetTempPath()); + Assert.Empty(presets); + } + + [Fact] + public void Discover_ReturnsEmpty_WhenNoTeamMd() + { + // Create temp dir with .squad/ but no team.md + var tempDir = Path.Combine(Path.GetTempPath(), $"squad-test-{Guid.NewGuid():N}"); + try + { + Directory.CreateDirectory(Path.Combine(tempDir, ".squad", "agents", "test")); + File.WriteAllText(Path.Combine(tempDir, ".squad", "agents", "test", "charter.md"), "test charter"); + + var presets = SquadDiscovery.Discover(tempDir); + Assert.Empty(presets); + } + finally + { + Directory.Delete(tempDir, true); + } + } + + [Fact] + public void Discover_TruncatesLongCharters() + { + var tempDir = Path.Combine(Path.GetTempPath(), $"squad-test-{Guid.NewGuid():N}"); + try + { + Directory.CreateDirectory(Path.Combine(tempDir, ".squad", "agents", "verbose")); + File.WriteAllText(Path.Combine(tempDir, ".squad", "team.md"), "# Long Charter Test\n| Member | Role |\n|---|---|\n| verbose | Talker |"); + File.WriteAllText(Path.Combine(tempDir, ".squad", "agents", "verbose", "charter.md"), + new string('x', 5000)); // Over 4000 char limit + + var presets = SquadDiscovery.Discover(tempDir); + Assert.Single(presets); + Assert.True(presets[0].WorkerSystemPrompts![0]!.Length <= 4000); + } + finally + { + Directory.Delete(tempDir, true); + } + } + + // --- Three-tier merge --- + + [Fact] + public void GetAll_WithRepoPath_IncludesSquadPresets() + { + var all = UserPresets.GetAll(Path.GetTempPath(), SquadSampleDir); + Assert.Contains(all, p => p.Name == "The Review Squad" && p.IsRepoLevel); + // Built-in should also be present + Assert.Contains(all, p => p.Name == "Code Review Team"); + } + + [Fact] + public void GetAll_WithoutRepoPath_NoSquadPresets() + { + var all = UserPresets.GetAll(Path.GetTempPath()); + Assert.DoesNotContain(all, p => p.IsRepoLevel); + } + + [Fact] + public void GetAll_RepoOverrides_BuiltInByName() + { + // Create a temp Squad dir with a preset named "Code Review Team" + var tempDir = Path.Combine(Path.GetTempPath(), $"squad-test-{Guid.NewGuid():N}"); + try + { + Directory.CreateDirectory(Path.Combine(tempDir, ".squad", "agents", "reviewer")); + File.WriteAllText(Path.Combine(tempDir, ".squad", "team.md"), + "# Code Review Team\n| Member | Role |\n|---|---|\n| reviewer | Reviewer |"); + File.WriteAllText(Path.Combine(tempDir, ".squad", "agents", "reviewer", "charter.md"), + "Custom repo reviewer."); + + var all = UserPresets.GetAll(Path.GetTempPath(), tempDir); + var crt = all.Single(p => p.Name == "Code Review Team"); + Assert.True(crt.IsRepoLevel, "Repo version should shadow built-in"); + } + finally + { + Directory.Delete(tempDir, true); + } + } + + [Fact] + public void Discover_SetsSourcePath() + { + var presets = SquadDiscovery.Discover(SquadSampleDir); + Assert.Single(presets); + Assert.NotNull(presets[0].SourcePath); + Assert.True(presets[0].SourcePath!.EndsWith(".squad")); + } + + [Fact] + public void Discover_HasEmoji() + { + var presets = SquadDiscovery.Discover(SquadSampleDir); + Assert.Equal("🫑", presets[0].Emoji); + } +} diff --git a/PolyPilot.Tests/TestData/legacy-ai-team/.ai-team/agents/dev/charter.md b/PolyPilot.Tests/TestData/legacy-ai-team/.ai-team/agents/dev/charter.md new file mode 100644 index 0000000000..9753f3475a --- /dev/null +++ b/PolyPilot.Tests/TestData/legacy-ai-team/.ai-team/agents/dev/charter.md @@ -0,0 +1 @@ +You are a full-stack developer. diff --git a/PolyPilot.Tests/TestData/legacy-ai-team/.ai-team/team.md b/PolyPilot.Tests/TestData/legacy-ai-team/.ai-team/team.md new file mode 100644 index 0000000000..5111c37631 --- /dev/null +++ b/PolyPilot.Tests/TestData/legacy-ai-team/.ai-team/team.md @@ -0,0 +1,4 @@ +# Legacy Team +| Member | Role | +|--------|------| +| dev | Developer | diff --git a/PolyPilot.Tests/TestData/squad-sample/.squad/agents/perf-analyst/charter.md b/PolyPilot.Tests/TestData/squad-sample/.squad/agents/perf-analyst/charter.md new file mode 100644 index 0000000000..d4420ee536 --- /dev/null +++ b/PolyPilot.Tests/TestData/squad-sample/.squad/agents/perf-analyst/charter.md @@ -0,0 +1,9 @@ +You are a performance analyst focused on runtime efficiency. + +Focus on: +- Latency bottlenecks and hot paths +- Memory allocation patterns and GC pressure +- Database query optimization +- Caching opportunities + +Provide concrete metrics and benchmarks where possible. diff --git a/PolyPilot.Tests/TestData/squad-sample/.squad/agents/scribe/charter.md b/PolyPilot.Tests/TestData/squad-sample/.squad/agents/scribe/charter.md new file mode 100644 index 0000000000..61d89ce050 --- /dev/null +++ b/PolyPilot.Tests/TestData/squad-sample/.squad/agents/scribe/charter.md @@ -0,0 +1 @@ +You are a scribe. Log all decisions and session activity. diff --git a/PolyPilot.Tests/TestData/squad-sample/.squad/agents/security-reviewer/charter.md b/PolyPilot.Tests/TestData/squad-sample/.squad/agents/security-reviewer/charter.md new file mode 100644 index 0000000000..0ffd2c207d --- /dev/null +++ b/PolyPilot.Tests/TestData/squad-sample/.squad/agents/security-reviewer/charter.md @@ -0,0 +1,9 @@ +You are a security auditor specializing in application security. + +Focus on: +- OWASP Top 10 vulnerabilities +- Authentication and authorization flaws +- Input validation and injection prevention +- Secrets management + +Rate each finding as Critical, High, Medium, or Low severity. diff --git a/PolyPilot.Tests/TestData/squad-sample/.squad/decisions.md b/PolyPilot.Tests/TestData/squad-sample/.squad/decisions.md new file mode 100644 index 0000000000..ea4cba81ca --- /dev/null +++ b/PolyPilot.Tests/TestData/squad-sample/.squad/decisions.md @@ -0,0 +1,5 @@ +# Team Decisions + +- Always use structured logging with ILogger +- Prefer async/await over blocking calls +- All public APIs must have XML doc comments diff --git a/PolyPilot.Tests/TestData/squad-sample/.squad/routing.md b/PolyPilot.Tests/TestData/squad-sample/.squad/routing.md new file mode 100644 index 0000000000..8cba938281 --- /dev/null +++ b/PolyPilot.Tests/TestData/squad-sample/.squad/routing.md @@ -0,0 +1,6 @@ +# Work Routing + +| Pattern | Owner | Reason | +|---------|-------|--------| +| `src/auth/**` | security-reviewer | Authentication code | +| `*.perf.cs` | perf-analyst | Performance-related files | diff --git a/PolyPilot.Tests/TestData/squad-sample/.squad/team.md b/PolyPilot.Tests/TestData/squad-sample/.squad/team.md new file mode 100644 index 0000000000..1e09a03187 --- /dev/null +++ b/PolyPilot.Tests/TestData/squad-sample/.squad/team.md @@ -0,0 +1,7 @@ +# The Review Squad + +| Member | Role | Expertise | +|--------|------|-----------| +| security-reviewer | Security Auditor | OWASP, CVE analysis, auth flaws | +| perf-analyst | Performance Analyst | Latency, throughput, memory profiling | +| scribe | Scribe | Session logging and decision capture | diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor b/PolyPilot/Components/Layout/SessionSidebar.razor index 4ff0c68717..2c8e3eaf29 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor +++ b/PolyPilot/Components/Layout/SessionSidebar.razor @@ -241,13 +241,51 @@ else 🌿 @pendingMultiAgentWorktree.Branch - @foreach (var preset in UserPresets.GetAll(CopilotService.BaseDir)) + @{ + var allPresets = UserPresets.GetAll(CopilotService.BaseDir, pendingMultiAgentWorktree?.Path); + var repoPresets = allPresets.Where(p => p.IsRepoLevel).ToArray(); + var builtInPresets = allPresets.Where(p => !p.IsRepoLevel && !p.IsUserDefined).ToArray(); + var userPresets = allPresets.Where(p => p.IsUserDefined).ToArray(); + } + @if (repoPresets.Any()) { - var p = preset; - +
πŸ“‚ From Repo
+ @foreach (var preset in repoPresets) + { + var p = preset; + + } + } + @if (builtInPresets.Any()) + { + @if (repoPresets.Any()) + { +
βš™οΈ Built-in
+ } + @foreach (var preset in builtInPresets) + { + var p = preset; + + } + } + @if (userPresets.Any()) + { +
πŸ‘€ My Presets
+ @foreach (var preset in userPresets) + { + var p = preset; + + } }
or create empty team:
Whether this is a user-created preset (vs built-in).
public bool IsUserDefined { get; init; } + /// Whether this preset was loaded from a repo-level team definition (.squad/). + public bool IsRepoLevel { get; init; } + + /// Path to the source directory (e.g., ".squad/") for repo-level presets. + public string? SourcePath { get; init; } + /// /// Per-worker system prompts, indexed to match WorkerModels. /// Null or shorter array = remaining workers get generic prompt. /// public string?[]? WorkerSystemPrompts { get; init; } + /// + /// Shared context from decisions.md or similar, prepended to all worker prompts. + /// + public string? SharedContext { get; init; } + + /// + /// Routing rules from routing.md, injected into orchestrator planning prompt. + /// + public string? RoutingContext { get; init; } + public static readonly GroupPreset[] BuiltIn = new[] { new GroupPreset( @@ -235,11 +251,18 @@ public static void Save(string baseDir, List presets) catch { /* best-effort persistence */ } } - /// Get all presets: built-in + user-defined. - public static GroupPreset[] GetAll(string baseDir) + /// Get all presets: built-in + user-defined + repo-level (Squad). Repo overrides by name. + public static GroupPreset[] GetAll(string baseDir, string? repoWorkingDirectory = null) { - var user = Load(baseDir); - return GroupPreset.BuiltIn.Concat(user).ToArray(); + var merged = new Dictionary(StringComparer.OrdinalIgnoreCase); + foreach (var p in GroupPreset.BuiltIn) merged[p.Name] = p; + foreach (var p in Load(baseDir)) merged[p.Name] = p; + if (repoWorkingDirectory != null) + { + foreach (var p in SquadDiscovery.Discover(repoWorkingDirectory)) + merged[p.Name] = p; + } + return merged.Values.ToArray(); } /// Save the current multi-agent group as a reusable preset. diff --git a/PolyPilot/Models/SessionOrganization.cs b/PolyPilot/Models/SessionOrganization.cs index 203669f163..9041a32236 100644 --- a/PolyPilot/Models/SessionOrganization.cs +++ b/PolyPilot/Models/SessionOrganization.cs @@ -37,6 +37,16 @@ public class SessionGroup /// Active reflection state for OrchestratorReflect mode. Null when not in a reflect loop. public ReflectionCycle? ReflectionState { get; set; } + + /// + /// Shared context from Squad decisions.md or similar, prepended to all worker prompts. + /// + public string? SharedContext { get; set; } + + /// + /// Routing context from Squad routing.md, injected into orchestrator planning prompt. + /// + public string? RoutingContext { get; set; } } public class SessionMeta diff --git a/PolyPilot/Models/SquadDiscovery.cs b/PolyPilot/Models/SquadDiscovery.cs new file mode 100644 index 0000000000..5eabf9f446 --- /dev/null +++ b/PolyPilot/Models/SquadDiscovery.cs @@ -0,0 +1,174 @@ +using System.Text.RegularExpressions; + +namespace PolyPilot.Models; + +/// +/// Discovers bradygaster/squad team definitions from .squad/ or .ai-team/ directories. +/// Parses team.md, agent charters, routing.md, and decisions.md into GroupPreset(s). +/// Read-only: never writes to the .squad/ directory. +/// +public static class SquadDiscovery +{ + private const int MaxCharterLength = 4000; + private const int MaxDecisionsLength = 8000; + + /// Names of agents that are infrastructure, not workers. + private static readonly HashSet InfraAgents = new(StringComparer.OrdinalIgnoreCase) + { + "scribe", "_scribe", "coordinator", "_coordinator", "_alumni" + }; + + /// + /// Discover Squad team definitions from a worktree root. + /// Returns empty list if no .squad/ or .ai-team/ directory found. + /// + public static List Discover(string worktreeRoot) + { + try + { + var squadDir = FindSquadDirectory(worktreeRoot); + if (squadDir == null) return new(); + + var teamFile = Path.Combine(squadDir, "team.md"); + if (!File.Exists(teamFile)) return new(); + + var teamContent = File.ReadAllText(teamFile); + var agents = DiscoverAgents(squadDir); + + if (agents.Count == 0) return new(); + + var teamName = ParseTeamName(teamContent) ?? "Squad Team"; + var decisions = ReadOptionalFile(Path.Combine(squadDir, "decisions.md"), MaxDecisionsLength); + var routing = ReadOptionalFile(Path.Combine(squadDir, "routing.md"), MaxDecisionsLength); + + var preset = BuildPreset(teamName, agents, decisions, routing, squadDir); + return new List { preset }; + } + catch + { + return new(); + } + } + + /// + /// Find .squad/ or .ai-team/ directory. Prefers .squad/ if both exist. + /// + internal static string? FindSquadDirectory(string worktreeRoot) + { + var squadPath = Path.Combine(worktreeRoot, ".squad"); + if (Directory.Exists(squadPath)) return squadPath; + + var aiTeamPath = Path.Combine(worktreeRoot, ".ai-team"); + if (Directory.Exists(aiTeamPath)) return aiTeamPath; + + return null; + } + + /// + /// Discover agents from the agents/ subdirectory. + /// Each agent has a directory with charter.md inside. + /// Skips infrastructure agents (scribe, coordinator, _alumni). + /// + internal static List DiscoverAgents(string squadDir) + { + var agentsDir = Path.Combine(squadDir, "agents"); + if (!Directory.Exists(agentsDir)) return new(); + + var agents = new List(); + foreach (var dir in Directory.GetDirectories(agentsDir)) + { + var name = Path.GetFileName(dir); + if (InfraAgents.Contains(name)) continue; + + var charterPath = Path.Combine(dir, "charter.md"); + string? charter = null; + if (File.Exists(charterPath)) + { + charter = File.ReadAllText(charterPath); + if (charter.Length > MaxCharterLength) + charter = charter[..MaxCharterLength]; + } + + agents.Add(new SquadAgent(name, charter)); + } + + return agents; + } + + /// + /// Parse team name from team.md content. + /// Looks for: first H1 heading, or first line that looks like a title. + /// + internal static string? ParseTeamName(string teamContent) + { + foreach (var line in teamContent.Split('\n')) + { + var trimmed = line.Trim(); + if (trimmed.StartsWith("# ")) + return trimmed[2..].Trim(); + } + return null; + } + + /// + /// Parse agent roster from team.md table rows. + /// Returns member names from the first column of markdown tables. + /// + internal static List ParseRosterNames(string teamContent) + { + var names = new List(); + var tableRegex = new Regex(@"^\s*\|\s*([^\|\s]+)\s*\|", RegexOptions.Multiline); + foreach (Match m in tableRegex.Matches(teamContent)) + { + var name = m.Groups[1].Value.Trim(); + // Skip header row markers and header labels + if (name == "---" || name.All(c => c == '-') + || name.Equals("Member", StringComparison.OrdinalIgnoreCase) + || name.Equals("Name", StringComparison.OrdinalIgnoreCase)) + continue; + names.Add(name); + } + return names; + } + + private static string? ReadOptionalFile(string path, int maxLength) + { + if (!File.Exists(path)) return null; + try + { + var content = File.ReadAllText(path); + if (string.IsNullOrWhiteSpace(content)) return null; + return content.Length > maxLength ? content[..maxLength] : content; + } + catch { return null; } + } + + private static GroupPreset BuildPreset(string teamName, List agents, + string? decisions, string? routing, string squadDir) + { + // Use a sensible default model for all agents (user can override after creation) + var defaultModel = "claude-sonnet-4.6"; + var orchestratorModel = "claude-opus-4.6"; + + var workerModels = agents.Select(_ => defaultModel).ToArray(); + var systemPrompts = agents.Select(a => a.Charter).ToArray(); + + return new GroupPreset( + teamName, + $"Squad team from {Path.GetFileName(Path.GetDirectoryName(squadDir) ?? squadDir)}", + "🫑", + MultiAgentMode.OrchestratorReflect, + orchestratorModel, + workerModels) + { + IsRepoLevel = true, + SourcePath = squadDir, + WorkerSystemPrompts = systemPrompts, + SharedContext = decisions, + RoutingContext = routing, + }; + } + + /// Represents a discovered Squad agent with name and charter content. + internal record SquadAgent(string Name, string? Charter); +} diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index 29abc5f839..dbac9207a6 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -675,7 +675,7 @@ private async Task SendViaOrchestratorAsync(string groupId, List members // Phase 1: Planning β€” ask orchestrator to analyze and assign tasks InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Planning, null)); - var planningPrompt = BuildOrchestratorPlanningPrompt(prompt, workerNames, group?.OrchestratorPrompt); + var planningPrompt = BuildOrchestratorPlanningPrompt(prompt, workerNames, group?.OrchestratorPrompt, group?.RoutingContext); var planResponse = await SendPromptAndWaitAsync(orchestratorName, planningPrompt, cancellationToken); // Phase 2: Parse task assignments from orchestrator response @@ -707,7 +707,7 @@ private async Task SendViaOrchestratorAsync(string groupId, List members InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Complete, null)); } - private string BuildOrchestratorPlanningPrompt(string userPrompt, List workerNames, string? additionalInstructions) + private string BuildOrchestratorPlanningPrompt(string userPrompt, List workerNames, string? additionalInstructions, string? routingContext = null) { var sb = new System.Text.StringBuilder(); sb.AppendLine($"You are the orchestrator of a multi-agent group. You have {workerNames.Count} worker agent(s) available:"); @@ -731,6 +731,12 @@ private string BuildOrchestratorPlanningPrompt(string userPrompt, List w sb.AppendLine("## Additional Orchestration Instructions"); sb.AppendLine(additionalInstructions); } + if (!string.IsNullOrEmpty(routingContext)) + { + sb.AppendLine(); + sb.AppendLine("## Work Routing (from team definition)"); + sb.AppendLine(routingContext); + } sb.AppendLine(); sb.AppendLine("## Your Task"); sb.AppendLine("Analyze the request and assign specific tasks to your workers. Use this exact format for each assignment:"); @@ -779,12 +785,21 @@ private async Task ExecuteWorkerAsync(string workerName, string ta var sw = System.Diagnostics.Stopwatch.StartNew(); await EnsureSessionModelAsync(workerName, cancellationToken); - // Use per-worker system prompt if set, otherwise generic + // Use per-worker system prompt if set, otherwise generic. + // Note: .github/copilot-instructions.md is auto-loaded by the SDK for each session's working directory, + // so workers already inherit repo-level copilot instructions without explicit injection here. var meta = GetSessionMeta(workerName); var identity = !string.IsNullOrEmpty(meta?.SystemPrompt) ? meta.SystemPrompt : "You are a worker agent. Complete the following task thoroughly."; - var workerPrompt = $"{identity}\n\nYour response will be collected and synthesized with other workers' responses.\n\n## Original User Request (context)\n{originalPrompt}\n\n## Your Assigned Task\n{task}"; + + // Inject shared context (e.g., Squad decisions.md) if the group has it + var group = meta != null ? Organization.Groups.FirstOrDefault(g => g.Id == meta.GroupId) : null; + var sharedPrefix = !string.IsNullOrEmpty(group?.SharedContext) + ? $"## Team Context (shared knowledge)\n{group.SharedContext}\n\n" + : ""; + + var workerPrompt = $"{identity}\n\nYour response will be collected and synthesized with other workers' responses.\n\n{sharedPrefix}## Original User Request (context)\n{originalPrompt}\n\n## Your Assigned Task\n{task}"; try { @@ -913,6 +928,10 @@ public string GetEffectiveModel(string sessionName) var group = CreateMultiAgentGroup(preset.Name, preset.Mode, worktreeId: worktreeId, repoId: repoId); if (group == null) return null; + // Store Squad context (routing, decisions) on the group for use during orchestration + group.SharedContext = preset.SharedContext; + group.RoutingContext = preset.RoutingContext; + // Create orchestrator session var orchName = $"{preset.Name}-orchestrator"; try @@ -1084,7 +1103,7 @@ private async Task SendViaOrchestratorReflectAsync(string groupId, List string planPrompt; if (reflectState.CurrentIteration == 1) { - planPrompt = BuildOrchestratorPlanningPrompt(prompt, workerNames, group.OrchestratorPrompt); + planPrompt = BuildOrchestratorPlanningPrompt(prompt, workerNames, group.OrchestratorPrompt, group.RoutingContext); } else { From f0a6c1fd787ceb8e47f92c448ccff2c0353c0ba3 Mon Sep 17 00:00:00 2001 From: Shane Date: Fri, 20 Feb 2026 10:49:11 -0600 Subject: [PATCH 44/48] Fix DeleteGroup: close multi-agent sessions instead of orphaning them MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When deleting a multi-agent group, sessions were moved to the default 'Sessions' group but kept their Role, PreferredModel, and WorktreeId markers β€” appearing as orphaned multi-agent sessions in the sidebar. Now DeleteGroup checks IsMultiAgent: if true, sessions are removed from organization and closed asynchronously. Non-multi-agent groups retain the old behavior (move sessions to default). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/MultiAgentRegressionTests.cs | 3 +- PolyPilot.Tests/SessionOrganizationTests.cs | 53 ++++++++++--------- .../Services/CopilotService.Organization.cs | 28 ++++++++-- 3 files changed, 55 insertions(+), 29 deletions(-) diff --git a/PolyPilot.Tests/MultiAgentRegressionTests.cs b/PolyPilot.Tests/MultiAgentRegressionTests.cs index f39469b805..e61eeeafd8 100644 --- a/PolyPilot.Tests/MultiAgentRegressionTests.cs +++ b/PolyPilot.Tests/MultiAgentRegressionTests.cs @@ -641,9 +641,8 @@ public void Lifecycle_DeleteGroup_ThenCreateNewGroup_NoContamination() // Verify no cross-contamination Assert.NotEqual(group1.Id, group2.Id); - var alpha = svc.Organization.Sessions.First(s => s.SessionName == "alpha-orch"); + Assert.DoesNotContain(svc.Organization.Sessions, s => s.SessionName == "alpha-orch"); // removed with group var beta = svc.Organization.Sessions.First(s => s.SessionName == "beta-orch"); - Assert.Equal(SessionGroup.DefaultId, alpha.GroupId); // moved to default Assert.Equal(group2.Id, beta.GroupId); // in new group } diff --git a/PolyPilot.Tests/SessionOrganizationTests.cs b/PolyPilot.Tests/SessionOrganizationTests.cs index e46125d020..c1916dd455 100644 --- a/PolyPilot.Tests/SessionOrganizationTests.cs +++ b/PolyPilot.Tests/SessionOrganizationTests.cs @@ -2354,7 +2354,7 @@ public void MultipleGroups_IncludingMultiAgent_AllSurviveRoundTrip() // --- DeleteGroup tests --- [Fact] - public void DeleteGroup_MultiAgent_MovesSessionsToDefault() + public void DeleteGroup_MultiAgent_RemovesSessions() { var svc = CreateService(); @@ -2379,17 +2379,15 @@ public void DeleteGroup_MultiAgent_MovesSessionsToDefault() svc.DeleteGroup(group.Id); - // Sessions should be in default group - var orch = svc.Organization.Sessions.First(s => s.SessionName == "orch"); - var worker = svc.Organization.Sessions.First(s => s.SessionName == "worker-1"); - Assert.Equal(SessionGroup.DefaultId, orch.GroupId); - Assert.Equal(SessionGroup.DefaultId, worker.GroupId); + // Multi-agent sessions should be removed, not orphaned + Assert.DoesNotContain(svc.Organization.Sessions, s => s.SessionName == "orch"); + Assert.DoesNotContain(svc.Organization.Sessions, s => s.SessionName == "worker-1"); // Group should be removed Assert.DoesNotContain(svc.Organization.Groups, g => g.Id == group.Id); } [Fact] - public void DeleteGroup_PreservesSessionMetadata() + public void DeleteGroup_MultiAgent_RemovesSessionMetadata() { var svc = CreateService(); var group = svc.CreateMultiAgentGroup("Team"); @@ -2405,11 +2403,8 @@ public void DeleteGroup_PreservesSessionMetadata() svc.DeleteGroup(group.Id); - var meta = svc.Organization.Sessions.First(s => s.SessionName == "orch"); - // Role and PreferredModel should be preserved even after group deletion - Assert.Equal(MultiAgentRole.Orchestrator, meta.Role); - Assert.Equal("claude-opus-4.6", meta.PreferredModel); - Assert.Equal("wt-1", meta.WorktreeId); + // Multi-agent sessions should be removed entirely, not orphaned + Assert.DoesNotContain(svc.Organization.Sessions, s => s.SessionName == "orch"); } // --- Reconciliation protection tests --- @@ -2680,21 +2675,31 @@ public void FullLifecycle_DeleteTeam_ThenReconcile_SessionsStayInDefault() // Delete the team svc.DeleteGroup(group.Id); - // Sessions should be in default - Assert.All(svc.Organization.Sessions.Where(s => s.SessionName.StartsWith("team-")), - m => Assert.Equal(SessionGroup.DefaultId, m.GroupId)); + // Multi-agent sessions should be removed entirely + Assert.DoesNotContain(svc.Organization.Sessions, s => s.SessionName == "team-orch"); + Assert.DoesNotContain(svc.Organization.Sessions, s => s.SessionName == "team-w1"); - RegisterKnownSessions(svc, "team-orch", "team-w1"); + // Group should be gone + Assert.DoesNotContain(svc.Organization.Groups, g => g.Id == group.Id); + } - // Run reconciliation β€” should NOT move them to repo group - svc.ReconcileOrganization(); + [Fact] + public void DeleteGroup_NonMultiAgent_MovesSessionsToDefault() + { + var svc = CreateService(); + var group = svc.GetOrCreateRepoGroup("repo-1", "MyRepo"); + svc.Organization.Sessions.Add(new SessionMeta + { + SessionName = "s1", + GroupId = group.Id, + WorktreeId = "wt-1" + }); - var orch = svc.Organization.Sessions.First(s => s.SessionName == "team-orch"); - var worker = svc.Organization.Sessions.First(s => s.SessionName == "team-w1"); - // Orchestrator role prevents auto-move - Assert.Equal(SessionGroup.DefaultId, orch.GroupId); - // PreferredModel prevents auto-move - Assert.Equal(SessionGroup.DefaultId, worker.GroupId); + svc.DeleteGroup(group.Id); + + // Non-multi-agent: sessions move to default + var s = svc.Organization.Sessions.First(s => s.SessionName == "s1"); + Assert.Equal(SessionGroup.DefaultId, s.GroupId); } [Fact] diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index dbac9207a6..d9b53ddd76 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -322,10 +322,32 @@ public void DeleteGroup(string groupId) { if (groupId == SessionGroup.DefaultId) return; - // Move all sessions in this group to default - foreach (var meta in Organization.Sessions.Where(m => m.GroupId == groupId)) + var group = Organization.Groups.FirstOrDefault(g => g.Id == groupId); + var isMultiAgent = group?.IsMultiAgent ?? false; + + if (isMultiAgent) { - meta.GroupId = SessionGroup.DefaultId; + // Multi-agent sessions are meaningless without their group β€” close them + var sessionNames = Organization.Sessions + .Where(m => m.GroupId == groupId) + .Select(m => m.SessionName) + .ToList(); + // Fire-and-forget: close sessions asynchronously + _ = Task.Run(async () => + { + foreach (var name in sessionNames) + await CloseSessionAsync(name); + }); + // Remove from organization immediately so UI updates + Organization.Sessions.RemoveAll(m => sessionNames.Contains(m.SessionName)); + } + else + { + // Non-multi-agent: move sessions to default group + foreach (var meta in Organization.Sessions.Where(m => m.GroupId == groupId)) + { + meta.GroupId = SessionGroup.DefaultId; + } } Organization.Groups.RemoveAll(g => g.Id == groupId); From 0a1477d15859b1f62a3f706bbe023b5f3c01b09f Mon Sep 17 00:00:00 2001 From: Shane Date: Fri, 20 Feb 2026 11:00:50 -0600 Subject: [PATCH 45/48] Update docs, scenarios, and tests for full feature comprehension - Architecture spec: add Group Deletion section documenting multi-agent vs regular group behavior, add SharedContext/RoutingContext to data model, fix stale GroupPreset code block, update key files table with SquadDiscovery and ModelCapabilities, update test counts - Copilot instructions: expand Squad section with three-tier merge, preset picker sections, routing.md injection, deletion behavior - Scenarios: add delete-multi-agent-group-closes-sessions scenario verifying sessions are removed not orphaned - ScenarioReferenceTests: add group deletion scenario presence check Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/copilot-instructions.md | 6 +++- PolyPilot.Tests/ScenarioReferenceTests.cs | 14 +++++++++ .../Scenarios/multi-agent-scenarios.json | 18 +++++++++++ docs/multi-agent-orchestration.md | 31 ++++++++++++++----- 4 files changed, 61 insertions(+), 8 deletions(-) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 39c7f61b36..0f01b353a7 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -59,7 +59,11 @@ For Android, always run `adb reverse tcp:9223 tcp:9223` after deploy. **See `docs/multi-agent-orchestration.md` for the multi-agent architecture spec** (orchestration modes, reflection loop, sentinel protocol, invariants, Squad integration). Test scenarios in `PolyPilot.Tests/Scenarios/multi-agent-scenarios.json`. Read these before modifying orchestration, reconciliation, or TCS completion logic. ### Squad Integration -PolyPilot discovers [bradygaster/squad](https://github.com/bradygaster/squad) team definitions from `.squad/` (or legacy `.ai-team/`) directories in the worktree root. Each agent's `charter.md` becomes a worker system prompt, `team.md` defines the roster, and `decisions.md` provides shared context. Repo-level teams appear as presets in the multi-agent group creation flow. PolyPilot never writes to `.squad/` β€” it's read-only. +PolyPilot discovers [bradygaster/squad](https://github.com/bradygaster/squad) team definitions from `.squad/` (or legacy `.ai-team/`) directories in the worktree root. Each agent's `charter.md` becomes a worker system prompt, `team.md` defines the roster, `decisions.md` provides shared context injected into all worker prompts, and `routing.md` is injected into the orchestrator's planning prompt. Repo-level teams appear in a **"πŸ“‚ From Repo"** section in the preset picker, above built-in presets. PolyPilot never writes to `.squad/` β€” it's read-only. + +**Preset priority (three-tier merge):** Built-in presets < User presets (`~/.polypilot/presets.json`) < Repo teams (`.squad/`). Repo teams shadow presets with the same name. The preset picker shows three sections: "πŸ“‚ From Repo", "βš™οΈ Built-in", and "πŸ‘€ My Presets". + +**Group deletion:** Deleting a multi-agent team closes and removes all its sessions (they're meaningless without the team). Deleting a regular group moves sessions to the default group. This is a .NET MAUI Blazor Hybrid app targeting Mac Catalyst, Android, and iOS. It manages multiple GitHub Copilot CLI sessions through a native GUI. diff --git a/PolyPilot.Tests/ScenarioReferenceTests.cs b/PolyPilot.Tests/ScenarioReferenceTests.cs index e945a3b694..58fb818fcb 100644 --- a/PolyPilot.Tests/ScenarioReferenceTests.cs +++ b/PolyPilot.Tests/ScenarioReferenceTests.cs @@ -218,4 +218,18 @@ public void MultiAgentScenarios_IncludeSquadIntegration() Assert.Contains("squad-decisions-shared-context", ids); Assert.Contains("squad-legacy-ai-team-compat", ids); } + + [Fact] + public void MultiAgentScenarios_IncludeGroupDeletion() + { + var json = File.ReadAllText(Path.Combine(ScenariosDir, "multi-agent-scenarios.json")); + var doc = JsonDocument.Parse(json); + var ids = doc.RootElement.GetProperty("scenarios") + .EnumerateArray() + .Select(s => s.GetProperty("id").GetString()) + .ToHashSet(); + + Assert.Contains("delete-group-no-contamination", ids); + Assert.Contains("delete-multi-agent-group-closes-sessions", ids); + } } diff --git a/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json b/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json index ee9f6ce2df..f74e42cbd8 100644 --- a/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json +++ b/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json @@ -115,6 +115,24 @@ { "action": "assertNoOverlap", "left": "groupA.sessionNames", "right": "groupB.sessionNames" } ] }, + { + "id": "delete-multi-agent-group-closes-sessions", + "name": "Deleting multi-agent group removes sessions entirely", + "description": "Verifies that deleting a multi-agent group closes all its sessions and removes them from the organization, rather than orphaning them in the default Sessions group.", + "invariants": [ + "No sessions with the deleted group's ID remain in organization.json", + "No orphaned orchestrator/worker sessions appear in the default Sessions group", + "Non-multi-agent group deletion still moves sessions to default (different behavior)" + ], + "steps": [ + { "action": "createGroup", "mode": "OrchestratorReflect", "name": "Temp Team", "workers": 2 }, + { "action": "captureGroupState", "capture": "beforeDelete" }, + { "action": "deleteGroup", "name": "Temp Team" }, + { "action": "readOrgJson", "capture": "orgAfter" }, + { "action": "assertNoSessionsWithGroupId", "groupId": "beforeDelete.groupId" }, + { "action": "assertNoSessionsInDefault", "nameContains": "Temp Team", "note": "Sessions should be gone, not orphaned" } + ] + }, { "id": "broadcast-mode-all-receive", "name": "Broadcast mode sends to all sessions", diff --git a/docs/multi-agent-orchestration.md b/docs/multi-agent-orchestration.md index 85909b090a..dd5daebc56 100644 --- a/docs/multi-agent-orchestration.md +++ b/docs/multi-agent-orchestration.md @@ -10,12 +10,16 @@ PolyPilot's multi-agent system lets you create a **team of AI sessions** that wo | File | Purpose | |------|---------| -| `PolyPilot/Services/CopilotService.Organization.cs` | Orchestration engine (dispatch, reflection loop, reconciliation) | +| `PolyPilot/Services/CopilotService.Organization.cs` | Orchestration engine (dispatch, reflection loop, reconciliation, group deletion) | | `PolyPilot/Models/SessionOrganization.cs` | `SessionGroup`, `SessionMeta`, `MultiAgentMode`, `MultiAgentRole` | | `PolyPilot/Models/ReflectionCycle.cs` | Reflection state, stall detection, sentinel parsing, evaluator prompts | +| `PolyPilot/Models/ModelCapabilities.cs` | `GroupPreset`, `UserPresets` (three-tier merge), built-in presets | +| `PolyPilot/Models/SquadDiscovery.cs` | Squad directory parser (`.squad/` β†’ `GroupPreset`) | | `PolyPilot/Services/CopilotService.Events.cs` | TCS completion (IsProcessing β†’ TrySetResult ordering) | +| `PolyPilot/Components/Layout/SessionSidebar.razor` | Preset picker UI (sectioned: From Repo / Built-in / My Presets) | | `PolyPilot.Tests/MultiAgentRegressionTests.cs` | 37 regression tests covering all known bugs | -| `PolyPilot.Tests/SessionOrganizationTests.cs` | 14 grouping stability tests | +| `PolyPilot.Tests/SessionOrganizationTests.cs` | 15 grouping stability tests | +| `PolyPilot.Tests/SquadDiscoveryTests.cs` | 22 Squad discovery tests | | `PolyPilot.Tests/Scenarios/multi-agent-scenarios.json` | Executable CDP test scenarios | --- @@ -253,6 +257,8 @@ OrganizationState β”‚ β”œβ”€β”€ OrchestratorMode (Broadcast/Sequential/Orchestrator/OrchestratorReflect) β”‚ β”œβ”€β”€ OrchestratorPrompt (optional system prompt for orchestrator) β”‚ β”œβ”€β”€ ReflectionState: ReflectionCycle? (active cycle state) +β”‚ β”œβ”€β”€ SharedContext (from decisions.md β€” prepended to worker prompts) +β”‚ β”œβ”€β”€ RoutingContext (from routing.md β€” injected into orchestrator planning) β”‚ β”œβ”€β”€ WorktreeId, RepoId (links to repo/worktree) β”‚ └── SortOrder β”‚ @@ -286,6 +292,16 @@ OrganizationState **Critical:** Both `Role` and `PreferredModel` must be set on all sessions. These are the markers that `ReconcileOrganization` uses to identify multi-agent sessions. Without them, sessions get scattered on restart. +### Group Deletion + +Deleting a group via `DeleteGroup(groupId)` behaves differently based on group type: + +- **Multi-agent groups (`IsMultiAgent == true`):** All sessions in the group are **removed from the organization and closed asynchronously**. Multi-agent sessions are meaningless without their group β€” they have orchestrator/worker roles, preferred models, and system prompts that only make sense within the team context. Leaving them orphaned in the default group (the old behavior) caused confusion in the sidebar. + +- **Regular groups (repo groups, etc.):** Sessions are **moved to the default group**. These are standalone sessions that the user may still want to access. + +**Invariant:** After `DeleteGroup` on a multi-agent group, `Organization.Sessions` must contain zero entries with the deleted group's ID. The async close fires `CloseSessionAsync` on each session (disposing the SDK session, cleaning up image queues, and tracking closed session IDs to prevent merge re-addition). + --- ## Error Handling in Reflection Loops @@ -333,7 +349,8 @@ If no `@worker:` assignments are found, the orchestrator handled the request dir ### Unit Tests - **`MultiAgentRegressionTests.cs`** (37 tests) β€” JSON corruption, reconciliation scattering, preset markers, mode enums, reflection loop logic, TCS ordering, lifecycle scenarios, persona tests -- **`SessionOrganizationTests.cs`** β†’ `GroupingStabilityTests` (14 tests) β€” JSON round-trips, delete+reconcile, orphan handling +- **`SessionOrganizationTests.cs`** β†’ `GroupingStabilityTests` (15 tests) β€” JSON round-trips, delete+cleanup, orphan handling, multi-agent vs regular group deletion +- **`SquadDiscoveryTests.cs`** (22 tests) β€” Squad directory discovery, team.md parsing, charterβ†’system-prompt, decisions/routing context, three-tier merge, legacy `.ai-team/` compat - **`ScenarioReferenceTests.cs`** β€” Validates scenario JSON structure, unique IDs, Squad integration scenario presence ### Executable Scenarios @@ -401,10 +418,10 @@ Repo teams shadow built-in/user presets with the same name when working in that public record GroupPreset(...) { public bool IsUserDefined { get; init; } - public bool IsRepoLevel { get; init; } // NEW: loaded from .squad/ - public string? SourcePath { get; init; } // NEW: path to .squad/ dir + public bool IsRepoLevel { get; init; } // Loaded from .squad/ + public string? SourcePath { get; init; } // Path to .squad/ dir public string?[]? WorkerSystemPrompts { get; init; } - public string?[]? WorkerSystemPromptFiles { get; init; } // NEW: file refs - public string? SharedContext { get; init; } // NEW: from decisions.md + public string? SharedContext { get; init; } // From decisions.md + public string? RoutingContext { get; init; } // From routing.md } ``` From f67912180545c5b88818357c1c6dc40a102cb476 Mon Sep 17 00:00:00 2001 From: Shane Date: Fri, 20 Feb 2026 12:45:37 -0600 Subject: [PATCH 46/48] Implement Squad write-back: save presets as .squad/ directories - Add SquadWriter.cs: writes GroupPreset as .squad/ directory structure (team.md, agents/{name}/charter.md, decisions.md, routing.md) - Update SaveGroupAsPreset to write .squad/ when worktree is available, with presets.json as personal backup - 15 SquadWriter tests (write, round-trip, sanitize, edge cases) - Add 3 CDP scenarios: save-preset-creates-squad-dir, round-trip-squad-write-read, squad-write-sanitizes-names - Update docs and copilot instructions for write-back behavior - 912 tests passing Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/copilot-instructions.md | 4 +- PolyPilot.Tests/PolyPilot.Tests.csproj | 1 + PolyPilot.Tests/ScenarioReferenceTests.cs | 15 + .../Scenarios/multi-agent-scenarios.json | 53 ++++ PolyPilot.Tests/SquadWriterTests.cs | 256 ++++++++++++++++++ PolyPilot/Models/ModelCapabilities.cs | 24 +- PolyPilot/Models/SquadWriter.cs | 128 +++++++++ .../Services/CopilotService.Organization.cs | 10 +- docs/multi-agent-orchestration.md | 19 +- 9 files changed, 504 insertions(+), 6 deletions(-) create mode 100644 PolyPilot.Tests/SquadWriterTests.cs create mode 100644 PolyPilot/Models/SquadWriter.cs diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 4843f244f5..308ebce0df 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -59,7 +59,9 @@ For Android, always run `adb reverse tcp:9223 tcp:9223` after deploy. **See `docs/multi-agent-orchestration.md` for the multi-agent architecture spec** (orchestration modes, reflection loop, sentinel protocol, invariants, Squad integration). Test scenarios in `PolyPilot.Tests/Scenarios/multi-agent-scenarios.json`. Read these before modifying orchestration, reconciliation, or TCS completion logic. ### Squad Integration -PolyPilot discovers [bradygaster/squad](https://github.com/bradygaster/squad) team definitions from `.squad/` (or legacy `.ai-team/`) directories in the worktree root. Each agent's `charter.md` becomes a worker system prompt, `team.md` defines the roster, `decisions.md` provides shared context injected into all worker prompts, and `routing.md` is injected into the orchestrator's planning prompt. Repo-level teams appear in a **"πŸ“‚ From Repo"** section in the preset picker, above built-in presets. PolyPilot never writes to `.squad/` β€” it's read-only. +PolyPilot discovers [bradygaster/squad](https://github.com/bradygaster/squad) team definitions from `.squad/` (or legacy `.ai-team/`) directories in the worktree root. Each agent's `charter.md` becomes a worker system prompt, `team.md` defines the roster, `decisions.md` provides shared context injected into all worker prompts, and `routing.md` is injected into the orchestrator's planning prompt. Repo-level teams appear in a **"πŸ“‚ From Repo"** section in the preset picker, above built-in presets. + +**Squad write-back:** When saving a multi-agent group as a preset, PolyPilot writes the team definition back to `.squad/` format in the worktree root via `SquadWriter`. This creates `team.md`, `agents/{name}/charter.md`, and optional `decisions.md`/`routing.md`. The preset is also saved to `presets.json` as a personal backup. This enables round-tripping: discover β†’ modify β†’ save back β†’ share via repo. **Preset priority (three-tier merge):** Built-in presets < User presets (`~/.polypilot/presets.json`) < Repo teams (`.squad/`). Repo teams shadow presets with the same name. The preset picker shows three sections: "πŸ“‚ From Repo", "βš™οΈ Built-in", and "πŸ‘€ My Presets". diff --git a/PolyPilot.Tests/PolyPilot.Tests.csproj b/PolyPilot.Tests/PolyPilot.Tests.csproj index 1562a98b76..3308b8155e 100644 --- a/PolyPilot.Tests/PolyPilot.Tests.csproj +++ b/PolyPilot.Tests/PolyPilot.Tests.csproj @@ -53,6 +53,7 @@ + diff --git a/PolyPilot.Tests/ScenarioReferenceTests.cs b/PolyPilot.Tests/ScenarioReferenceTests.cs index 58fb818fcb..a8e497dd3f 100644 --- a/PolyPilot.Tests/ScenarioReferenceTests.cs +++ b/PolyPilot.Tests/ScenarioReferenceTests.cs @@ -232,4 +232,19 @@ public void MultiAgentScenarios_IncludeGroupDeletion() Assert.Contains("delete-group-no-contamination", ids); Assert.Contains("delete-multi-agent-group-closes-sessions", ids); } + + [Fact] + public void MultiAgentScenarios_IncludeSquadWriteBack() + { + var json = File.ReadAllText(Path.Combine(ScenariosDir, "multi-agent-scenarios.json")); + var doc = JsonDocument.Parse(json); + var ids = doc.RootElement.GetProperty("scenarios") + .EnumerateArray() + .Select(s => s.GetProperty("id").GetString()) + .ToHashSet(); + + Assert.Contains("save-preset-creates-squad-dir", ids); + Assert.Contains("round-trip-squad-write-read", ids); + Assert.Contains("squad-write-sanitizes-names", ids); + } } diff --git a/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json b/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json index f74e42cbd8..4b49161290 100644 --- a/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json +++ b/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json @@ -286,6 +286,59 @@ { "action": "waitForPhase", "phase": "Planning", "timeout": 30 }, { "action": "assertOrchestratorReceivedWorkerDescriptions" } ] + }, + { + "id": "save-preset-creates-squad-dir", + "name": "Saving preset writes .squad/ directory", + "description": "Verifies that SaveGroupAsPreset creates a .squad/ directory with team.md, agent charters, and optional decisions.md/routing.md in the worktree root.", + "invariants": [ + ".squad/team.md is created with team name and roster table", + ".squad/agents/{name}/charter.md is created for each worker", + "Agent names are sanitized (lowercase, hyphens)", + "Preset is also saved to presets.json as backup" + ], + "steps": [ + { "action": "createGroupFromPreset", "preset": "Code Review Team" }, + { "action": "saveGroupAsPreset", "name": "My Code Review" }, + { "action": "assertFileExists", "path": ".squad/team.md" }, + { "action": "assertFileContains", "path": ".squad/team.md", "text": "My Code Review" }, + { "action": "assertDirectoryExists", "path": ".squad/agents" }, + { "action": "shell", "command": "rm -rf .squad" } + ] + }, + { + "id": "round-trip-squad-write-read", + "name": "Round-trip: write then discover Squad team", + "description": "Verifies that a team saved via SquadWriter can be discovered back via SquadDiscovery with matching data.", + "invariants": [ + "Written team.md can be parsed back by SquadDiscovery", + "Written charter.md content matches original system prompts", + "decisions.md and routing.md survive the round-trip" + ], + "steps": [ + { "action": "createGroupFromPreset", "preset": "Code Review Team" }, + { "action": "saveGroupAsPreset", "name": "Round Trip Test" }, + { "action": "assertFileExists", "path": ".squad/team.md" }, + { "action": "selectWorktree", "worktree": "current" }, + { "action": "assertPresetInSection", "section": "From Repo", "name": "Round Trip Test" }, + { "action": "shell", "command": "rm -rf .squad" } + ] + }, + { + "id": "squad-write-sanitizes-names", + "name": "Squad writer sanitizes agent names", + "description": "Verifies that SquadWriter strips team-name prefixes and sanitizes agent directory names (lowercase, hyphens instead of special chars).", + "invariants": [ + "Team-name prefix stripped from session names", + "Directory names are lowercase with hyphens", + "No spaces or special characters in directory names" + ], + "steps": [ + { "action": "createGroupFromPreset", "preset": "Code Review Team" }, + { "action": "saveGroupAsPreset", "name": "Test Team" }, + { "action": "assertNoDirectoryContains", "path": ".squad/agents", "pattern": " " }, + { "action": "shell", "command": "rm -rf .squad" } + ] } ] } diff --git a/PolyPilot.Tests/SquadWriterTests.cs b/PolyPilot.Tests/SquadWriterTests.cs new file mode 100644 index 0000000000..86221857b9 --- /dev/null +++ b/PolyPilot.Tests/SquadWriterTests.cs @@ -0,0 +1,256 @@ +using PolyPilot.Models; + +namespace PolyPilot.Tests; + +public class SquadWriterTests : IDisposable +{ + private readonly string _tempDir; + + public SquadWriterTests() + { + _tempDir = Path.Combine(Path.GetTempPath(), "squad-writer-" + Guid.NewGuid().ToString("N")[..8]); + Directory.CreateDirectory(_tempDir); + } + + public void Dispose() + { + if (Directory.Exists(_tempDir)) + Directory.Delete(_tempDir, recursive: true); + } + + [Fact] + public void WritePreset_CreatesSquadDirectory() + { + var preset = MakePreset("My Team"); + var workers = new List<(string Name, string? SystemPrompt)> + { + ("reviewer", "You are a code reviewer. Focus on correctness."), + ("analyst", "You are a performance analyst.") + }; + + var squadDir = SquadWriter.WritePreset(_tempDir, preset, workers); + + Assert.True(Directory.Exists(squadDir)); + Assert.True(File.Exists(Path.Combine(squadDir, "team.md"))); + Assert.True(File.Exists(Path.Combine(squadDir, "agents", "reviewer", "charter.md"))); + Assert.True(File.Exists(Path.Combine(squadDir, "agents", "analyst", "charter.md"))); + } + + [Fact] + public void WritePreset_TeamMdHasCorrectFormat() + { + var preset = MakePreset("Review Squad"); + var workers = new List<(string Name, string? SystemPrompt)> + { + ("security", "You are a security auditor."), + ("perf", null) + }; + + SquadWriter.WritePreset(_tempDir, preset, workers); + + var content = File.ReadAllText(Path.Combine(_tempDir, ".squad", "team.md")); + Assert.Contains("# Review Squad", content); + Assert.Contains("| security |", content); + Assert.Contains("| perf |", content); + Assert.Contains("| Member | Role |", content); + } + + [Fact] + public void WritePreset_CharterContainsSystemPrompt() + { + var preset = MakePreset("Team"); + var workers = new List<(string Name, string? SystemPrompt)> + { + ("dev", "You are a full-stack developer. Write clean code.") + }; + + SquadWriter.WritePreset(_tempDir, preset, workers); + + var charter = File.ReadAllText(Path.Combine(_tempDir, ".squad", "agents", "dev", "charter.md")); + Assert.Equal("You are a full-stack developer. Write clean code.", charter); + } + + [Fact] + public void WritePreset_NullPromptGetsDefaultCharter() + { + var preset = MakePreset("Team"); + var workers = new List<(string Name, string? SystemPrompt)> + { + ("helper", null) + }; + + SquadWriter.WritePreset(_tempDir, preset, workers); + + var charter = File.ReadAllText(Path.Combine(_tempDir, ".squad", "agents", "helper", "charter.md")); + Assert.Contains("helper", charter); + } + + [Fact] + public void WritePreset_WritesDecisionsMd() + { + var preset = MakePreset("Team") with { SharedContext = "Always use async/await." }; + var workers = new List<(string Name, string? SystemPrompt)> { ("w1", null) }; + + SquadWriter.WritePreset(_tempDir, preset, workers); + + var decisions = File.ReadAllText(Path.Combine(_tempDir, ".squad", "decisions.md")); + Assert.Equal("Always use async/await.", decisions); + } + + [Fact] + public void WritePreset_WritesRoutingMd() + { + var preset = MakePreset("Team") with { RoutingContext = "| *.cs | dev | C# code |" }; + var workers = new List<(string Name, string? SystemPrompt)> { ("dev", null) }; + + SquadWriter.WritePreset(_tempDir, preset, workers); + + var routing = File.ReadAllText(Path.Combine(_tempDir, ".squad", "routing.md")); + Assert.Equal("| *.cs | dev | C# code |", routing); + } + + [Fact] + public void WritePreset_NoSharedContext_NoDecisionsFile() + { + var preset = MakePreset("Team"); + var workers = new List<(string Name, string? SystemPrompt)> { ("w1", null) }; + + SquadWriter.WritePreset(_tempDir, preset, workers); + + Assert.False(File.Exists(Path.Combine(_tempDir, ".squad", "decisions.md"))); + } + + [Fact] + public void RoundTrip_WriteAndReadBack() + { + var preset = MakePreset("Round Trip Team") with + { + SharedContext = "Use TypeScript only.", + RoutingContext = "| *.ts | dev | TypeScript |" + }; + var workers = new List<(string Name, string? SystemPrompt)> + { + ("security", "You are a security auditor. Focus on OWASP."), + ("dev", "You are a developer. Write clean code.") + }; + + SquadWriter.WritePreset(_tempDir, preset, workers); + + // Read back via SquadDiscovery + var discovered = SquadDiscovery.Discover(_tempDir); + Assert.Single(discovered); + var result = discovered[0]; + Assert.Equal("Round Trip Team", result.Name); + Assert.True(result.IsRepoLevel); + Assert.Equal(2, result.WorkerModels.Length); + // Order may vary by directory enumeration β€” check both prompts are present + var allPrompts = string.Join(" | ", result.WorkerSystemPrompts!); + Assert.Contains("OWASP", allPrompts); + Assert.Contains("clean code", allPrompts); + Assert.Contains("TypeScript", result.SharedContext); + Assert.Contains("TypeScript", result.RoutingContext); + } + + [Fact] + public void RoundTrip_PreservesTeamName() + { + var preset = MakePreset("Special Characters & Stuff"); + var workers = new List<(string Name, string? SystemPrompt)> { ("w1", "Test prompt.") }; + + SquadWriter.WritePreset(_tempDir, preset, workers); + + var discovered = SquadDiscovery.Discover(_tempDir); + Assert.Single(discovered); + Assert.Equal("Special Characters & Stuff", discovered[0].Name); + } + + [Fact] + public void SanitizeAgentName_StripsTeamPrefix() + { + var name = SquadWriter.SanitizeAgentName("Code Review Team-worker-1", "Code Review Team"); + Assert.Equal("worker-1", name); + } + + [Fact] + public void SanitizeAgentName_LowercasesResult() + { + var name = SquadWriter.SanitizeAgentName("MyTeam-SecurityAuditor", "MyTeam"); + Assert.Equal("securityauditor", name); + } + + [Fact] + public void SanitizeAgentName_NoPrefix_ReturnsLowerName() + { + var name = SquadWriter.SanitizeAgentName("standalone-agent", "Different Team"); + Assert.Equal("standalone-agent", name); + } + + [Fact] + public void DeriveRole_ExtractsFromPrompt() + { + var preset = MakePreset("Team"); + var workers = new List<(string Name, string? SystemPrompt)> + { + ("sec", "You are a security auditor. Focus on OWASP Top 10.") + }; + + SquadWriter.WritePreset(_tempDir, preset, workers); + + var content = File.ReadAllText(Path.Combine(_tempDir, ".squad", "team.md")); + Assert.Contains("| sec | security auditor |", content); + } + + [Fact] + public void WritePreset_OverwritesExisting() + { + var preset1 = MakePreset("Team"); + var workers1 = new List<(string Name, string? SystemPrompt)> { ("old-agent", "Old charter.") }; + SquadWriter.WritePreset(_tempDir, preset1, workers1); + + var preset2 = MakePreset("Team v2"); + var workers2 = new List<(string Name, string? SystemPrompt)> { ("new-agent", "New charter.") }; + SquadWriter.WritePreset(_tempDir, preset2, workers2); + + var content = File.ReadAllText(Path.Combine(_tempDir, ".squad", "team.md")); + Assert.Contains("# Team v2", content); + Assert.Contains("| new-agent |", content); + // Old agent dir may still exist (we don't delete, just overwrite) + Assert.True(Directory.Exists(Path.Combine(_tempDir, ".squad", "agents", "new-agent"))); + } + + [Fact] + public void WriteFromGroup_CreatesSquadFromSessionData() + { + var group = new SessionGroup + { + Name = "Live Team", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.OrchestratorReflect, + SharedContext = "Be concise.", + }; + var members = new List + { + new() { SessionName = "Live Team-orchestrator", Role = MultiAgentRole.Orchestrator, PreferredModel = "claude-opus-4.6" }, + new() { SessionName = "Live Team-worker-1", Role = MultiAgentRole.Worker, PreferredModel = "gpt-5", SystemPrompt = "You are a code reviewer." }, + new() { SessionName = "Live Team-worker-2", Role = MultiAgentRole.Worker, PreferredModel = "claude-sonnet-4.5", SystemPrompt = "You are a test writer." }, + }; + string GetModel(string name) => members.First(m => m.SessionName == name).PreferredModel ?? "default"; + + var squadDir = SquadWriter.WriteFromGroup(_tempDir, "Live Team", group, members, GetModel); + + Assert.True(File.Exists(Path.Combine(squadDir, "team.md"))); + Assert.True(File.Exists(Path.Combine(squadDir, "decisions.md"))); + Assert.True(File.Exists(Path.Combine(squadDir, "agents", "worker-1", "charter.md"))); + Assert.True(File.Exists(Path.Combine(squadDir, "agents", "worker-2", "charter.md"))); + + // Verify round-trip + var discovered = SquadDiscovery.Discover(_tempDir); + Assert.Single(discovered); + Assert.Equal("Live Team", discovered[0].Name); + Assert.Equal(2, discovered[0].WorkerModels.Length); + } + + private static GroupPreset MakePreset(string name) => new( + name, "Test", "πŸ§ͺ", MultiAgentMode.OrchestratorReflect, + "claude-opus-4.6", new[] { "gpt-5", "claude-sonnet-4.5" }); +} diff --git a/PolyPilot/Models/ModelCapabilities.cs b/PolyPilot/Models/ModelCapabilities.cs index 259b3c6b91..98fcdaf294 100644 --- a/PolyPilot/Models/ModelCapabilities.cs +++ b/PolyPilot/Models/ModelCapabilities.cs @@ -267,7 +267,8 @@ public static GroupPreset[] GetAll(string baseDir, string? repoWorkingDirectory /// Save the current multi-agent group as a reusable preset. public static GroupPreset? SaveGroupAsPreset(string baseDir, string name, string description, - string emoji, SessionGroup group, List members, Func getEffectiveModel) + string emoji, SessionGroup group, List members, Func getEffectiveModel, + string? worktreeRoot = null) { var orchestrator = members.FirstOrDefault(m => m.Role == MultiAgentRole.Orchestrator); var workers = members.Where(m => m.Role != MultiAgentRole.Orchestrator).ToList(); @@ -278,10 +279,27 @@ public static GroupPreset[] GetAll(string baseDir, string? repoWorkingDirectory name, description, emoji, group.OrchestratorMode, orchestrator != null ? getEffectiveModel(orchestrator.SessionName) : "claude-opus-4.6", workers.Select(w => getEffectiveModel(w.SessionName)).ToArray()) - { IsUserDefined = true }; + { + IsUserDefined = true, + WorkerSystemPrompts = workers.Select(w => w.SystemPrompt).ToArray(), + SharedContext = group.SharedContext, + RoutingContext = group.RoutingContext, + }; + + // Write as .squad/ directory if worktree is available + if (!string.IsNullOrEmpty(worktreeRoot) && Directory.Exists(worktreeRoot)) + { + try + { + SquadWriter.WriteFromGroup(worktreeRoot, name, group, members, getEffectiveModel); + preset = preset with { IsRepoLevel = true, SourcePath = Path.Combine(worktreeRoot, ".squad") }; + } + catch { /* Fall through to JSON save */ } + } + // Always save to presets.json too (personal backup) var existing = Load(baseDir); - existing.RemoveAll(p => p.Name == name); // replace if same name + existing.RemoveAll(p => p.Name == name); existing.Add(preset); Save(baseDir, existing); return preset; diff --git a/PolyPilot/Models/SquadWriter.cs b/PolyPilot/Models/SquadWriter.cs new file mode 100644 index 0000000000..fb6d3c2fc4 --- /dev/null +++ b/PolyPilot/Models/SquadWriter.cs @@ -0,0 +1,128 @@ +using System.Text; + +namespace PolyPilot.Models; + +/// +/// Writes GroupPreset data as a bradygaster/squad .squad/ directory structure. +/// Produces: team.md, agents/{name}/charter.md, decisions.md (optional), routing.md (optional). +/// This is the inverse of SquadDiscovery β€” write what we read. +/// +public static class SquadWriter +{ + /// + /// Write a GroupPreset to .squad/ format in the given worktree root. + /// Creates .squad/ directory if it doesn't exist. Overwrites existing files. + /// + public static string WritePreset(string worktreeRoot, GroupPreset preset, + List<(string Name, string? SystemPrompt)> workers) + { + var squadDir = Path.Combine(worktreeRoot, ".squad"); + Directory.CreateDirectory(squadDir); + + WriteTeamFile(squadDir, preset.Name, workers); + WriteAgentCharters(squadDir, workers); + + if (!string.IsNullOrWhiteSpace(preset.SharedContext)) + File.WriteAllText(Path.Combine(squadDir, "decisions.md"), preset.SharedContext); + + if (!string.IsNullOrWhiteSpace(preset.RoutingContext)) + File.WriteAllText(Path.Combine(squadDir, "routing.md"), preset.RoutingContext); + + return squadDir; + } + + /// + /// Write a GroupPreset from live session data (orchestrator + workers with their system prompts and group context). + /// + public static string WriteFromGroup(string worktreeRoot, string teamName, + SessionGroup group, List members, Func getEffectiveModel) + { + var workers = members + .Where(m => m.Role != MultiAgentRole.Orchestrator) + .Select(m => (Name: SanitizeAgentName(m.SessionName, teamName), SystemPrompt: m.SystemPrompt)) + .ToList(); + + var preset = new GroupPreset( + teamName, "", "🫑", group.OrchestratorMode, + getEffectiveModel(members.FirstOrDefault(m => m.Role == MultiAgentRole.Orchestrator)?.SessionName ?? ""), + members.Where(m => m.Role != MultiAgentRole.Orchestrator) + .Select(m => getEffectiveModel(m.SessionName)).ToArray()) + { + SharedContext = group.SharedContext, + RoutingContext = group.RoutingContext, + }; + + return WritePreset(worktreeRoot, preset, workers); + } + + private static void WriteTeamFile(string squadDir, string teamName, + List<(string Name, string? SystemPrompt)> workers) + { + var sb = new StringBuilder(); + sb.AppendLine($"# {teamName}"); + sb.AppendLine(); + sb.AppendLine("| Member | Role |"); + sb.AppendLine("|--------|------|"); + foreach (var (name, prompt) in workers) + { + var role = DeriveRole(name, prompt); + sb.AppendLine($"| {name} | {role} |"); + } + File.WriteAllText(Path.Combine(squadDir, "team.md"), sb.ToString()); + } + + private static void WriteAgentCharters(string squadDir, + List<(string Name, string? SystemPrompt)> workers) + { + var agentsDir = Path.Combine(squadDir, "agents"); + Directory.CreateDirectory(agentsDir); + + foreach (var (name, prompt) in workers) + { + var agentDir = Path.Combine(agentsDir, name); + Directory.CreateDirectory(agentDir); + var charter = prompt ?? $"You are {name}. Complete assigned tasks thoroughly."; + File.WriteAllText(Path.Combine(agentDir, "charter.md"), charter); + } + } + + /// + /// Derive a short role description from the agent name or system prompt. + /// + private static string DeriveRole(string name, string? prompt) + { + if (!string.IsNullOrWhiteSpace(prompt)) + { + // Take first sentence of prompt as role, capped at 60 chars + var firstSentence = prompt.Split('.', '\n')[0].Trim(); + if (firstSentence.StartsWith("You are a ", StringComparison.OrdinalIgnoreCase)) + firstSentence = firstSentence[10..]; + else if (firstSentence.StartsWith("You are an ", StringComparison.OrdinalIgnoreCase)) + firstSentence = firstSentence[11..]; + if (firstSentence.Length > 60) + firstSentence = firstSentence[..57] + "..."; + if (!string.IsNullOrWhiteSpace(firstSentence)) + return firstSentence; + } + // Fall back to name-based role + return name.Replace("-", " ").Replace("_", " "); + } + + /// + /// Convert a session name like "Code Review Team-worker-1" into an agent name like "worker-1". + /// Strips the team name prefix and sanitizes for filesystem use. + /// + internal static string SanitizeAgentName(string sessionName, string teamName) + { + var name = sessionName; + // Strip team name prefix (e.g., "Code Review Team-worker-1" β†’ "worker-1") + if (name.StartsWith(teamName + "-", StringComparison.OrdinalIgnoreCase)) + name = name[(teamName.Length + 1)..]; + + // Replace invalid path chars with hyphens + foreach (var c in Path.GetInvalidFileNameChars()) + name = name.Replace(c, '-'); + + return name.Trim('-').ToLowerInvariant(); + } +} diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index d9b53ddd76..bce6407029 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -1526,8 +1526,16 @@ private void AutoAdjustFromFeedback(string groupId, SessionGroup group, List m != null) .ToList(); + // Resolve worktree path for .squad/ write-back + string? worktreeRoot = null; + if (!string.IsNullOrEmpty(group.WorktreeId)) + { + var wt = _repoManager.Worktrees.FirstOrDefault(w => w.Id == group.WorktreeId); + if (wt != null) worktreeRoot = wt.Path; + } + return Models.UserPresets.SaveGroupAsPreset(PolyPilotBaseDir, name, description, emoji, - group, members!, GetEffectiveModel); + group, members!, GetEffectiveModel, worktreeRoot); } #endregion diff --git a/docs/multi-agent-orchestration.md b/docs/multi-agent-orchestration.md index dd5daebc56..37d949da15 100644 --- a/docs/multi-agent-orchestration.md +++ b/docs/multi-agent-orchestration.md @@ -15,11 +15,13 @@ PolyPilot's multi-agent system lets you create a **team of AI sessions** that wo | `PolyPilot/Models/ReflectionCycle.cs` | Reflection state, stall detection, sentinel parsing, evaluator prompts | | `PolyPilot/Models/ModelCapabilities.cs` | `GroupPreset`, `UserPresets` (three-tier merge), built-in presets | | `PolyPilot/Models/SquadDiscovery.cs` | Squad directory parser (`.squad/` β†’ `GroupPreset`) | +| `PolyPilot/Models/SquadWriter.cs` | Squad directory writer (`GroupPreset` β†’ `.squad/`) | | `PolyPilot/Services/CopilotService.Events.cs` | TCS completion (IsProcessing β†’ TrySetResult ordering) | | `PolyPilot/Components/Layout/SessionSidebar.razor` | Preset picker UI (sectioned: From Repo / Built-in / My Presets) | | `PolyPilot.Tests/MultiAgentRegressionTests.cs` | 37 regression tests covering all known bugs | | `PolyPilot.Tests/SessionOrganizationTests.cs` | 15 grouping stability tests | | `PolyPilot.Tests/SquadDiscoveryTests.cs` | 22 Squad discovery tests | +| `PolyPilot.Tests/SquadWriterTests.cs` | 15 Squad write-back tests | | `PolyPilot.Tests/Scenarios/multi-agent-scenarios.json` | Executable CDP test scenarios | --- @@ -397,9 +399,24 @@ Built-in presets < User presets (~/.polypilot/presets.json) < Repo teams (.s Repo teams shadow built-in/user presets with the same name when working in that repo's worktree. +### Squad Write-Back + +When a user saves a multi-agent group as a preset and the group is associated with a worktree, PolyPilot writes the team definition back to `.squad/` format in the worktree root: + +1. **`SaveGroupAsPreset`** resolves the worktree path from the group's `WorktreeId` +2. **`SquadWriter.WriteFromGroup`** converts the live `SessionGroup` + `SessionMeta` into Squad files: + - `.squad/team.md` β€” Team name + agent roster table (Name | Role | Model) + - `.squad/agents/{name}/charter.md` β€” Worker system prompt as charter + - `.squad/decisions.md` β€” Shared context (from `GroupPreset.SharedContext`) + - `.squad/routing.md` β€” Routing context (from `GroupPreset.RoutingContext`) +3. The preset is also saved to `presets.json` as a personal backup + +Agent names are sanitized: team-name prefixes are stripped (e.g., "Code Review Team-worker-1" β†’ "worker-1"), names are lowercased and non-alphanumeric characters replaced with hyphens. Roles are derived from the first sentence of the system prompt, stripping "You are a/an" prefix. + +This enables round-tripping: discover a Squad team β†’ modify it in PolyPilot β†’ save back β†’ others can use the updated team definition from the repo. + ### What PolyPilot Does NOT Do with Squad -- **Never writes to `.squad/`** β€” PolyPilot is read-only; the repo files are the source of truth - **No `history.md` persistence** β€” Squad agents accumulate learnings; PolyPilot sessions are stateless across restarts - **No Scribe agent** β€” Squad's silent decision-logger is not replicated - **No GitHub Actions integration** β€” Squad's label triage workflows are out of scope From 579e351fc8731ad44f8bf30188f6998c07cd2176 Mon Sep 17 00:00:00 2001 From: Shane Date: Sat, 21 Feb 2026 11:57:40 -0600 Subject: [PATCH 47/48] Feature review: fix bugs, add 19 tests, 5 scenarios, update docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bugs fixed: - BuildCompletionSummary: reorder ternary so IsStalled takes priority over IsCancelled (stalls were showing as 'Cancelled by user') - SquadWriter: clean stale agent dirs before re-writing to prevent phantom agents on re-discovery Tests added (19 new, 1114 total passing): - MultiAgentGapTests.cs: ParseTaskAssignments (6), ModelCapabilities (4), BuildCompletionSummary (4) - ScenarioReferenceTests: structural validation + reflect loop checks (2) - SquadWriterTests: stale dir cleanup verification (1) - SessionOrganizationTests: SaveGroupAsPreset with worktree write-back (1) - ReflectionCycleTests: stalled summary priority (1) Scenarios added (5 new, 26 total): - sequential-mode-processes-in-order - pause-resume-reflection-cycle - dedicated-evaluator-session - routing-context-in-orchestrator-plan Docs fixed: - team.md format: 'Name | Role | Model' β†’ 'Member | Role' Verified live: Squad .squad/ discovery shows 'PolyPilot Review Squad' in 'πŸ“‚ From Repo' section with 🫑 badge in preset picker. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/MultiAgentGapTests.cs | 183 ++++++++++++++++++ PolyPilot.Tests/ScenarioReferenceTests.cs | 32 +++ .../Scenarios/multi-agent-scenarios.json | 70 +++++++ PolyPilot.Tests/SessionOrganizationTests.cs | 35 ++++ PolyPilot.Tests/SquadWriterTests.cs | 27 +++ PolyPilot/Models/ReflectionCycle.cs | 4 +- PolyPilot/Models/SquadWriter.cs | 3 + docs/multi-agent-orchestration.md | 2 +- 8 files changed, 353 insertions(+), 3 deletions(-) create mode 100644 PolyPilot.Tests/MultiAgentGapTests.cs diff --git a/PolyPilot.Tests/MultiAgentGapTests.cs b/PolyPilot.Tests/MultiAgentGapTests.cs new file mode 100644 index 0000000000..31b0391da8 --- /dev/null +++ b/PolyPilot.Tests/MultiAgentGapTests.cs @@ -0,0 +1,183 @@ +using PolyPilot.Models; +using PolyPilot.Services; + +namespace PolyPilot.Tests; + +/// +/// Gap-coverage tests for multi-agent parsing, model capabilities, and reflection summaries. +/// +public class MultiAgentGapTests +{ + // --- ParseTaskAssignments --- + + [Fact] + public void ParseTaskAssignments_EmptyInput_ReturnsEmpty() + { + var result = CopilotService.ParseTaskAssignments("", new List { "a", "b" }); + Assert.Empty(result); + } + + [Fact] + public void ParseTaskAssignments_SingleWorker_ExtractsTask() + { + var response = "@worker:alpha\nDo the thing.\n@end"; + var result = CopilotService.ParseTaskAssignments(response, new List { "alpha" }); + + Assert.Single(result); + Assert.Equal("alpha", result[0].WorkerName); + Assert.Contains("Do the thing", result[0].Task); + } + + [Fact] + public void ParseTaskAssignments_MultipleWorkers_ExtractsAll() + { + var response = @"@worker:w1 +Task one. +@end +@worker:w2 +Task two. +@end +@worker:w3 +Task three. +@end"; + var workers = new List { "w1", "w2", "w3" }; + var result = CopilotService.ParseTaskAssignments(response, workers); + + Assert.Equal(3, result.Count); + Assert.Equal("w1", result[0].WorkerName); + Assert.Equal("w2", result[1].WorkerName); + Assert.Equal("w3", result[2].WorkerName); + } + + [Fact] + public void ParseTaskAssignments_FuzzyMatch_FindsClosestWorker() + { + // "coder" is a substring of "coder-session" β†’ fuzzy match + var response = "@worker:coder\nWrite the code.\n@end"; + var result = CopilotService.ParseTaskAssignments(response, new List { "coder-session", "reviewer-session" }); + + Assert.Single(result); + Assert.Equal("coder-session", result[0].WorkerName); + } + + [Fact] + public void ParseTaskAssignments_UnknownWorker_IsIgnored() + { + var response = "@worker:ghost\nDo something.\n@end"; + var result = CopilotService.ParseTaskAssignments(response, new List { "alpha", "beta" }); + + Assert.Empty(result); + } + + [Fact] + public void ParseTaskAssignments_DuplicateWorker_TakesLast() + { + var response = @"@worker:alpha +First task. +@end +@worker:alpha +Second task. +@end"; + var result = CopilotService.ParseTaskAssignments(response, new List { "alpha" }); + + // The regex matches both blocks; both are added (last one wins in practice) + Assert.Equal(2, result.Count); + Assert.Contains("Second task", result[^1].Task); + } + + // --- ModelCapabilities --- + + [Theory] + [InlineData(null)] + [InlineData("")] + public void GetCapabilities_NullOrEmpty_ReturnsNone(string? slug) + { + var caps = ModelCapabilities.GetCapabilities(slug!); + Assert.Equal(ModelCapability.None, caps); + } + + [Fact] + public void GetCapabilities_KnownModel_ReturnsFlags() + { + var caps = ModelCapabilities.GetCapabilities("gpt-5"); + Assert.True(caps.HasFlag(ModelCapability.ReasoningExpert)); + Assert.True(caps.HasFlag(ModelCapability.CodeExpert)); + Assert.True(caps.HasFlag(ModelCapability.ToolUse)); + } + + [Fact] + public void GetRoleWarnings_UnknownModel_ReturnsWarning() + { + var warnings = ModelCapabilities.GetRoleWarnings("totally-unknown-model", MultiAgentRole.Worker); + Assert.NotEmpty(warnings); + Assert.Contains(warnings, w => w.Contains("Unknown model", StringComparison.OrdinalIgnoreCase)); + } + + [Fact] + public void GetRoleWarnings_WeakOrchestrator_ReturnsWarning() + { + // claude-haiku-4.5 is CostEfficient + Fast but not ReasoningExpert + var warnings = ModelCapabilities.GetRoleWarnings("claude-haiku-4.5", MultiAgentRole.Orchestrator); + Assert.NotEmpty(warnings); + Assert.Contains(warnings, w => w.Contains("reasoning", StringComparison.OrdinalIgnoreCase)); + } + + // --- BuildCompletionSummary --- + + [Fact] + public void BuildCompletionSummary_GoalMet_ShowsCheckmark() + { + var cycle = ReflectionCycle.Create("Ship the feature", maxIterations: 5); + cycle.Advance("Done!\n[[REFLECTION_COMPLETE]]"); + + var summary = cycle.BuildCompletionSummary(); + + Assert.Contains("βœ…", summary); + Assert.Contains("Goal met", summary); + } + + [Fact] + public void BuildCompletionSummary_Stalled_ShowsWarning() + { + var cycle = ReflectionCycle.Create("Improve quality", maxIterations: 10); + // Feed identical responses to trigger stall detection + cycle.Advance("Working on the task with specific details about implementation"); + cycle.Advance("Working on the task with specific details about implementation"); + cycle.Advance("Working on the task with specific details about implementation"); + + var summary = cycle.BuildCompletionSummary(); + + // IsStalled takes priority over IsCancelled in the ternary chain + Assert.Contains("⚠️", summary); + Assert.Contains("Stalled", summary); + Assert.DoesNotContain("⏹️", summary); + } + + [Fact] + public void BuildCompletionSummary_Cancelled_ShowsStop() + { + var cycle = ReflectionCycle.Create("Long task", maxIterations: 10); + cycle.Advance("First attempt with unique content here..."); + cycle.IsCancelled = true; + cycle.IsActive = false; + + var summary = cycle.BuildCompletionSummary(); + + Assert.Contains("⏹️", summary); + Assert.Contains("Cancelled", summary); + } + + [Fact] + public void BuildCompletionSummary_MaxIterations_ShowsClock() + { + var cycle = ReflectionCycle.Create("Goal", maxIterations: 2); + cycle.Advance("Trying with approach alpha..."); + cycle.Advance("Still trying with approach beta and new ideas..."); + + var summary = cycle.BuildCompletionSummary(); + + Assert.Contains("⏱️", summary); + Assert.Contains("Max iterations", summary); + Assert.Contains("2/2", summary); + } +} diff --git a/PolyPilot.Tests/ScenarioReferenceTests.cs b/PolyPilot.Tests/ScenarioReferenceTests.cs index a8e497dd3f..c07c70725b 100644 --- a/PolyPilot.Tests/ScenarioReferenceTests.cs +++ b/PolyPilot.Tests/ScenarioReferenceTests.cs @@ -247,4 +247,36 @@ public void MultiAgentScenarios_IncludeSquadWriteBack() Assert.Contains("round-trip-squad-write-read", ids); Assert.Contains("squad-write-sanitizes-names", ids); } + + [Fact] + public void MultiAgentScenarios_AllHaveRequiredFields() + { + var json = File.ReadAllText(Path.Combine(ScenariosDir, "multi-agent-scenarios.json")); + var doc = JsonDocument.Parse(json); + var scenarios = doc.RootElement.GetProperty("scenarios").EnumerateArray().ToList(); + + Assert.NotEmpty(scenarios); + foreach (var s in scenarios) + { + Assert.True(s.TryGetProperty("id", out _), "Scenario missing 'id'"); + Assert.True(s.TryGetProperty("name", out _), "Scenario missing 'name'"); + Assert.True(s.TryGetProperty("steps", out var steps), "Scenario missing 'steps'"); + Assert.NotEqual(0, steps.GetArrayLength()); + } + } + + [Fact] + public void MultiAgentScenarios_IncludeReflectLoopScenarios() + { + var json = File.ReadAllText(Path.Combine(ScenariosDir, "multi-agent-scenarios.json")); + var doc = JsonDocument.Parse(json); + var ids = doc.RootElement.GetProperty("scenarios") + .EnumerateArray() + .Select(s => s.GetProperty("id").GetString()) + .ToHashSet(); + + Assert.Contains("reflect-loop-completes-goal-met", ids); + Assert.Contains("reflect-loop-max-iterations", ids); + Assert.Contains("stall-detection-triggers", ids); + } } diff --git a/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json b/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json index 4b49161290..a1944a40b6 100644 --- a/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json +++ b/PolyPilot.Tests/Scenarios/multi-agent-scenarios.json @@ -339,6 +339,76 @@ { "action": "assertNoDirectoryContains", "path": ".squad/agents", "pattern": " " }, { "action": "shell", "command": "rm -rf .squad" } ] + }, + { + "id": "sequential-mode-processes-in-order", + "name": "Sequential mode sends to sessions one-by-one", + "description": "Verifies that Sequential mode dispatches prompts to sessions one at a time in order, not in parallel.", + "invariants": [ + "Sessions receive prompts sequentially, not simultaneously", + "Each session processes before the next receives its prompt" + ], + "steps": [ + { "action": "createGroupFromPreset", "preset": "Multi-Perspective Analysis" }, + { "action": "setMode", "mode": "Sequential" }, + { "action": "sendPrompt", "text": "Explain dependency injection" }, + { "action": "waitForAllResponses", "timeout": 120 }, + { "action": "assertAllSessionsResponded" } + ] + }, + { + "id": "pause-resume-reflection-cycle", + "name": "Pause and resume an active reflection loop", + "description": "Verifies that pausing a running OrchestratorReflect loop halts iteration dispatch without losing state, and resuming continues from where it left off.", + "invariants": [ + "IsPaused flag prevents new iterations from dispatching", + "Resuming continues from the current iteration count", + "Reflection state (CurrentIteration, goal) is preserved" + ], + "steps": [ + { "action": "createGroupFromPreset", "preset": "Quick Reflection Cycle" }, + { "action": "sendPrompt", "text": "Implement a binary search function" }, + { "action": "waitForPhase", "phase": "Reflecting", "timeout": 30 }, + { "action": "pauseReflection" }, + { "action": "assertReflectionPaused" }, + { "action": "resumeReflection" }, + { "action": "waitForCompletion", "timeout": 120 } + ] + }, + { + "id": "dedicated-evaluator-session", + "name": "OrchestratorReflect with separate evaluator", + "description": "Verifies that when an EvaluatorSessionName is set, the evaluator independently scores each iteration instead of the orchestrator self-evaluating.", + "invariants": [ + "Evaluator session receives synthesis output for scoring", + "Evaluator PASS/FAIL determines iteration continuation", + "Orchestrator and evaluator are different sessions" + ], + "steps": [ + { "action": "createGroupFromPreset", "preset": "Code Review Team" }, + { "action": "setEvaluator", "sessionName": "worker-1" }, + { "action": "sendPrompt", "text": "Review error handling in the auth module" }, + { "action": "waitForCompletion", "timeout": 120 }, + { "action": "assertEvaluatorWasUsed" } + ] + }, + { + "id": "routing-context-in-orchestrator-plan", + "name": "Routing context from routing.md injected into orchestrator", + "description": "Verifies that a Squad-discovered preset's routing.md content appears in the orchestrator's planning prompt.", + "invariants": [ + "RoutingContext from routing.md is prepended to orchestrator planning", + "Orchestrator uses routing hints to assign tasks to appropriate workers" + ], + "steps": [ + { "action": "createSquadDir", "agents": ["security-reviewer", "code-optimizer"], "routing": "Route security tasks to security-reviewer" }, + { "action": "selectWorktree", "worktree": "current" }, + { "action": "createGroupFromPreset", "preset": "PolyPilot Review Squad" }, + { "action": "sendPrompt", "text": "Review this code for security and performance" }, + { "action": "waitForPhase", "phase": "Planning", "timeout": 30 }, + { "action": "assertOrchestratorReceivedRoutingContext" }, + { "action": "shell", "command": "rm -rf .squad" } + ] } ] } diff --git a/PolyPilot.Tests/SessionOrganizationTests.cs b/PolyPilot.Tests/SessionOrganizationTests.cs index c1916dd455..7f44421f9b 100644 --- a/PolyPilot.Tests/SessionOrganizationTests.cs +++ b/PolyPilot.Tests/SessionOrganizationTests.cs @@ -1263,6 +1263,41 @@ public void SaveGroupAsPreset_CreatesFromMembers() if (Directory.Exists(tempDir)) Directory.Delete(tempDir, true); } } + + [Fact] + public void SaveGroupAsPreset_WithWorktreeRoot_WritesSquadDir() + { + var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + var worktreeRoot = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + try + { + Directory.CreateDirectory(worktreeRoot); + var group = new SessionGroup + { + Name = "SquadTeam", + IsMultiAgent = true, + OrchestratorMode = MultiAgentMode.OrchestratorReflect + }; + var members = new List + { + new() { SessionName = "orch", Role = MultiAgentRole.Orchestrator }, + new() { SessionName = "w1", Role = MultiAgentRole.Worker, SystemPrompt = "You are a coder." }, + }; + + var preset = UserPresets.SaveGroupAsPreset(tempDir, "SquadTeam", "desc", "πŸš€", + group, members, name => name == "orch" ? "claude-opus-4.6" : "gpt-5", + worktreeRoot: worktreeRoot); + + Assert.NotNull(preset); + Assert.True(Directory.Exists(Path.Combine(worktreeRoot, ".squad"))); + Assert.True(preset!.IsRepoLevel); + } + finally + { + if (Directory.Exists(tempDir)) Directory.Delete(tempDir, true); + if (Directory.Exists(worktreeRoot)) Directory.Delete(worktreeRoot, true); + } + } } public class EvaluationTrackingTests diff --git a/PolyPilot.Tests/SquadWriterTests.cs b/PolyPilot.Tests/SquadWriterTests.cs index 86221857b9..00e1c087d5 100644 --- a/PolyPilot.Tests/SquadWriterTests.cs +++ b/PolyPilot.Tests/SquadWriterTests.cs @@ -250,6 +250,33 @@ public void WriteFromGroup_CreatesSquadFromSessionData() Assert.Equal(2, discovered[0].WorkerModels.Length); } + [Fact] + public void WritePreset_OverwriteCleansStaleAgents() + { + var preset = MakePreset("Team"); + var threeWorkers = new List<(string Name, string? SystemPrompt)> + { + ("alpha", "Alpha agent."), + ("beta", "Beta agent."), + ("gamma", "Gamma agent.") + }; + + SquadWriter.WritePreset(_tempDir, preset, threeWorkers); + Assert.True(Directory.Exists(Path.Combine(_tempDir, ".squad", "agents", "gamma"))); + + // Overwrite with only 2 workers β€” gamma dir should be gone + var twoWorkers = new List<(string Name, string? SystemPrompt)> + { + ("alpha", "Alpha v2."), + ("beta", "Beta v2.") + }; + SquadWriter.WritePreset(_tempDir, preset, twoWorkers); + + Assert.True(Directory.Exists(Path.Combine(_tempDir, ".squad", "agents", "alpha"))); + Assert.True(Directory.Exists(Path.Combine(_tempDir, ".squad", "agents", "beta"))); + Assert.False(Directory.Exists(Path.Combine(_tempDir, ".squad", "agents", "gamma"))); + } + private static GroupPreset MakePreset(string name) => new( name, "Test", "πŸ§ͺ", MultiAgentMode.OrchestratorReflect, "claude-opus-4.6", new[] { "gpt-5", "claude-sonnet-4.5" }); diff --git a/PolyPilot/Models/ReflectionCycle.cs b/PolyPilot/Models/ReflectionCycle.cs index 616f6f0cf3..5d0c648c38 100644 --- a/PolyPilot/Models/ReflectionCycle.cs +++ b/PolyPilot/Models/ReflectionCycle.cs @@ -419,8 +419,8 @@ public bool AdvanceWithEvaluation(string response, bool evaluatorPassed, string? /// public string BuildCompletionSummary() { - var emoji = GoalMet ? "βœ…" : IsCancelled ? "⏹️" : IsStalled ? "⚠️" : "⏱️"; - var reasonText = GoalMet ? "Goal met" : IsCancelled ? "Cancelled by user" : IsStalled ? $"Stalled ({LastSimilarity:P0} similarity)" : $"Max iterations reached ({MaxIterations})"; + var emoji = GoalMet ? "βœ…" : IsStalled ? "⚠️" : IsCancelled ? "⏹️" : "⏱️"; + var reasonText = GoalMet ? "Goal met" : IsStalled ? $"Stalled ({LastSimilarity:P0} similarity)" : IsCancelled ? "Cancelled by user" : $"Max iterations reached ({MaxIterations})"; var durationText = ""; if (StartedAt.HasValue && CompletedAt.HasValue) { diff --git a/PolyPilot/Models/SquadWriter.cs b/PolyPilot/Models/SquadWriter.cs index fb6d3c2fc4..db2d032f61 100644 --- a/PolyPilot/Models/SquadWriter.cs +++ b/PolyPilot/Models/SquadWriter.cs @@ -75,6 +75,9 @@ private static void WriteAgentCharters(string squadDir, List<(string Name, string? SystemPrompt)> workers) { var agentsDir = Path.Combine(squadDir, "agents"); + // Clean stale agent dirs before re-writing to prevent phantom agents on re-discovery + if (Directory.Exists(agentsDir)) + Directory.Delete(agentsDir, true); Directory.CreateDirectory(agentsDir); foreach (var (name, prompt) in workers) diff --git a/docs/multi-agent-orchestration.md b/docs/multi-agent-orchestration.md index 37d949da15..cd973d8469 100644 --- a/docs/multi-agent-orchestration.md +++ b/docs/multi-agent-orchestration.md @@ -405,7 +405,7 @@ When a user saves a multi-agent group as a preset and the group is associated wi 1. **`SaveGroupAsPreset`** resolves the worktree path from the group's `WorktreeId` 2. **`SquadWriter.WriteFromGroup`** converts the live `SessionGroup` + `SessionMeta` into Squad files: - - `.squad/team.md` β€” Team name + agent roster table (Name | Role | Model) + - `.squad/team.md` β€” Team name + agent roster table (Member | Role) - `.squad/agents/{name}/charter.md` β€” Worker system prompt as charter - `.squad/decisions.md` β€” Shared context (from `GroupPreset.SharedContext`) - `.squad/routing.md` β€” Routing context (from `GroupPreset.RoutingContext`) From 669a435b62bed801014e303137a6788a03a47e90 Mon Sep 17 00:00:00 2001 From: Shane Neuville Date: Sat, 21 Feb 2026 16:32:50 -0600 Subject: [PATCH 48/48] fix: address multi-model review findings for multi-agent orchestration - SendPromptAndWaitAsync: use SendPromptAsync return value directly instead of capturing stale state TCS (prevents 10-min hang after reconnection) - Add RunContinuationsAsynchronously to reconnect TCS (matches normal path) - Fix WaitingForWorkers phase: fire BEFORE Task.WhenAll, not after - Deduplicate worker assignments before parallel dispatch (prevents concurrent send failure for same-worker duplicate @worker blocks) - Mark sessions as hidden during multi-agent group deletion to prevent ReconcileOrganization ghost sessions in default group - Clean up _modelSwitchLocks semaphore on session close (memory leak) - Fix cross-platform test: use Path.Combine instead of hardcoded backslash Findings from: Opus 4.6, Codex 5.3, Sonnet 4.6 reviews 1118/1118 tests passing Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/SessionOrganizationTests.cs | 7 ++- .../Services/CopilotService.Organization.cs | 52 +++++++++++-------- PolyPilot/Services/CopilotService.cs | 6 ++- 3 files changed, 41 insertions(+), 24 deletions(-) diff --git a/PolyPilot.Tests/SessionOrganizationTests.cs b/PolyPilot.Tests/SessionOrganizationTests.cs index 7f44421f9b..4350d7cf8e 100644 --- a/PolyPilot.Tests/SessionOrganizationTests.cs +++ b/PolyPilot.Tests/SessionOrganizationTests.cs @@ -2225,8 +2225,11 @@ public void ShortenPath_TwoOrFewerSegments_ReturnsOriginal() [Fact] public void ShortenPath_LongPath_ShowsLastTwoSegments() { - var result = ShortenPathHelper(@"C:\Users\shneuvil\.polypilot\worktrees\my-repo"); - Assert.Equal(@"…\worktrees\my-repo", result); + // Use platform-native path to avoid separator mismatch + var path = System.IO.Path.Combine("C:", "Users", "shneuvil", ".polypilot", "worktrees", "my-repo"); + var result = ShortenPathHelper(path); + var sep = System.IO.Path.DirectorySeparatorChar; + Assert.Equal($"…{sep}worktrees{sep}my-repo", result); } [Fact] diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index 3f847788b8..bc61b44112 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -369,14 +369,21 @@ public void DeleteGroup(string groupId) .Where(m => m.GroupId == groupId) .Select(m => m.SessionName) .ToList(); + // Remove org metadata first so UI updates immediately + Organization.Sessions.RemoveAll(m => sessionNames.Contains(m.SessionName)); + // Mark sessions as hidden so ReconcileOrganization won't re-add them + // to the default group while CloseSessionAsync is still running + foreach (var name in sessionNames) + { + if (_sessions.TryGetValue(name, out var s)) + s.Info.IsHidden = true; + } // Fire-and-forget: close sessions asynchronously _ = Task.Run(async () => { foreach (var name in sessionNames) await CloseSessionAsync(name); }); - // Remove from organization immediately so UI updates - Organization.Sessions.RemoveAll(m => sessionNames.Contains(m.SessionName)); } else { @@ -755,7 +762,12 @@ private async Task SendViaOrchestratorAsync(string groupId, List members var planResponse = await SendPromptAndWaitAsync(orchestratorName, planningPrompt, cancellationToken); // Phase 2: Parse task assignments from orchestrator response - var assignments = ParseTaskAssignments(planResponse, workerNames); + var rawAssignments = ParseTaskAssignments(planResponse, workerNames); + // Deduplicate: merge multiple tasks for the same worker into one prompt + var assignments = rawAssignments + .GroupBy(a => a.WorkerName, StringComparer.OrdinalIgnoreCase) + .Select(g => new TaskAssignment(g.Key, string.Join("\n\n---\n\n", g.Select(a => a.Task)))) + .ToList(); if (assignments.Count == 0) { // Orchestrator handled it without delegation β€” add a system note @@ -768,12 +780,12 @@ private async Task SendViaOrchestratorAsync(string groupId, List members InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Dispatching, $"Sending tasks to {assignments.Count} worker(s)")); + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.WaitingForWorkers, null)); + var workerTasks = assignments.Select(a => ExecuteWorkerAsync(a.WorkerName, a.Task, prompt, cancellationToken)); var results = await Task.WhenAll(workerTasks); - InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.WaitingForWorkers, null)); - // Phase 4: Synthesize β€” send worker results back to orchestrator InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Synthesizing, null)); @@ -890,19 +902,12 @@ private async Task ExecuteWorkerAsync(string workerName, string ta private async Task SendPromptAndWaitAsync(string sessionName, string prompt, CancellationToken cancellationToken) { - if (!_sessions.TryGetValue(sessionName, out var state)) - throw new InvalidOperationException($"Session '{sessionName}' not found."); - - await SendPromptAsync(sessionName, prompt, cancellationToken: cancellationToken); - - // Wait for the response to complete via the existing ResponseCompletion TCS - if (state.ResponseCompletion != null) - { - using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); - cts.CancelAfter(TimeSpan.FromMinutes(10)); - return await state.ResponseCompletion.Task.WaitAsync(cts.Token); - } - return ""; + // Use SendPromptAsync directly β€” it already awaits ResponseCompletion internally. + // Do NOT capture state and await its TCS separately: reconnection replaces the state + // object, orphaning the old TCS and causing a 10-minute hang. + using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); + cts.CancelAfter(TimeSpan.FromMinutes(10)); + return await SendPromptAsync(sessionName, prompt, cancellationToken: cts.Token); } private string BuildSynthesisPrompt(string originalPrompt, List results) @@ -1187,7 +1192,12 @@ private async Task SendViaOrchestratorReflectAsync(string groupId, List } var planResponse = await SendPromptAndWaitAsync(orchestratorName, planPrompt, ct); - var assignments = ParseTaskAssignments(planResponse, workerNames); + var rawAssignments = ParseTaskAssignments(planResponse, workerNames); + // Deduplicate: merge multiple tasks for the same worker into one prompt + var assignments = rawAssignments + .GroupBy(a => a.WorkerName, StringComparer.OrdinalIgnoreCase) + .Select(g => new TaskAssignment(g.Key, string.Join("\n\n---\n\n", g.Select(a => a.Task)))) + .ToList(); if (assignments.Count == 0) { @@ -1216,11 +1226,11 @@ private async Task SendViaOrchestratorReflectAsync(string groupId, List InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Dispatching, $"Sending tasks to {assignments.Count} worker(s) β€” {iterDetail}")); + InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.WaitingForWorkers, iterDetail)); + var workerTasks = assignments.Select(a => ExecuteWorkerAsync(a.WorkerName, a.Task, prompt, ct)); var results = await Task.WhenAll(workerTasks); - InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.WaitingForWorkers, iterDetail)); - // Phase 4: Synthesize + Evaluate InvokeOnUI(() => OnOrchestratorPhaseChanged?.Invoke(groupId, OrchestratorPhase.Synthesizing, iterDetail)); diff --git a/PolyPilot/Services/CopilotService.cs b/PolyPilot/Services/CopilotService.cs index 47a52dc9c1..ab4c99ef2a 100644 --- a/PolyPilot/Services/CopilotService.cs +++ b/PolyPilot/Services/CopilotService.cs @@ -1594,7 +1594,7 @@ public async Task SendPromptAsync(string sessionName, string prompt, Lis Session = newSession, Info = state.Info }; - newState.ResponseCompletion = new TaskCompletionSource(); + newState.ResponseCompletion = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); // Carry forward ProcessingGeneration so stale callbacks on the // orphaned old state can't pass generation checks on the new state. Interlocked.Exchange(ref newState.ProcessingGeneration, @@ -2066,6 +2066,10 @@ public async Task CloseSessionAsync(string name) _queuedImagePaths.TryRemove(name, out _); } + // Clean up per-session model switch lock + if (_modelSwitchLocks.TryRemove(name, out var sem)) + sem.Dispose(); + // Track as explicitly closed so merge doesn't re-add from file if (state.Info.SessionId != null) _closedSessionIds[state.Info.SessionId] = 0;