Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 38 additions & 18 deletions PolyPilot.Tests/ChatExperienceSafetyTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -426,30 +426,33 @@ private static int ComputeEffectiveTimeout(
bool isResumed, bool hasReceivedEvents)
{
var useResumeQuiescence = isResumed && !hasReceivedEvents && !hasActiveTool && !hasUsedTools;
var useToolTimeout = hasActiveTool || (isResumed && !useResumeQuiescence) || hasUsedTools || isMultiAgent;
var useToolTimeout = hasActiveTool || (isResumed && !useResumeQuiescence);
var useUsedToolsTimeout = !useToolTimeout && hasUsedTools && !hasActiveTool;
return useResumeQuiescence
? CopilotService.WatchdogResumeQuiescenceTimeoutSeconds
: useToolTimeout
? CopilotService.WatchdogToolExecutionTimeoutSeconds
: CopilotService.WatchdogInactivityTimeoutSeconds;
: useUsedToolsTimeout
? CopilotService.WatchdogUsedToolsIdleTimeoutSeconds
: CopilotService.WatchdogInactivityTimeoutSeconds;
}

/// <summary>
/// INV-5: HasUsedToolsThisTurn BETWEEN tool rounds must keep 600s timeout.
/// INV-5: HasUsedToolsThisTurn BETWEEN tool rounds must keep 180s timeout (used-tools idle tier).
/// This is the primary protection against "messages killed during long-running processes."
/// ActiveToolCallCount resets on AssistantTurnStartEvent between rounds — only
/// HasUsedToolsThisTurn persists and keeps the longer timeout.
/// </summary>
[Fact]
public void WatchdogTimeout_BetweenToolRounds_Uses600s()
public void WatchdogTimeout_BetweenToolRounds_Uses180s()
{
// Between tool rounds: ActiveToolCallCount=0, but HasUsedToolsThisTurn=true
var timeout = ComputeEffectiveTimeout(
hasActiveTool: false, hasUsedTools: true, isMultiAgent: false,
isResumed: false, hasReceivedEvents: false);

Assert.Equal(CopilotService.WatchdogToolExecutionTimeoutSeconds, timeout);
Assert.Equal(600, timeout);
Assert.Equal(CopilotService.WatchdogUsedToolsIdleTimeoutSeconds, timeout);
Assert.Equal(180, timeout);
}

/// <summary>Active tool execution gets the 600s timeout.</summary>
Expand All @@ -462,14 +465,14 @@ public void WatchdogTimeout_ActiveTool_Uses600s()
Assert.Equal(600, timeout);
}

/// <summary>Multi-agent sessions always get 600s to prevent killing workers mid-task.</summary>
/// <summary>Multi-agent sessions without active tools get 120s base timeout (isMultiAgent alone no longer escalates).</summary>
[Fact]
public void WatchdogTimeout_MultiAgent_Uses600s()
public void WatchdogTimeout_MultiAgent_Uses120s()
{
var timeout = ComputeEffectiveTimeout(
hasActiveTool: false, hasUsedTools: false, isMultiAgent: true,
isResumed: false, hasReceivedEvents: false);
Assert.Equal(600, timeout);
Assert.Equal(120, timeout);
}

/// <summary>Resumed session with no events → 30s quiescence (fast recovery).</summary>
Expand All @@ -483,6 +486,17 @@ public void WatchdogTimeout_ResumedNoEvents_Uses30sQuiescence()
Assert.Equal(30, timeout);
}

/// <summary>Used tools but none active → 180s middle tier (between 600s active and 120s base).</summary>
[Fact]
public void WatchdogTimeout_UsedToolsIdle_Uses180s()
{
var timeout = ComputeEffectiveTimeout(
hasActiveTool: false, hasUsedTools: true, isMultiAgent: false,
isResumed: false, hasReceivedEvents: false);
Assert.Equal(CopilotService.WatchdogUsedToolsIdleTimeoutSeconds, timeout);
Assert.Equal(180, timeout);
}

/// <summary>Resumed session with events flowing → 600s (session is active).</summary>
[Fact]
public void WatchdogTimeout_ResumedWithEvents_Uses600s()
Expand Down Expand Up @@ -511,12 +525,12 @@ public void WatchdogTimeout_BaseCase_Uses120s()
[Theory]
[InlineData(false, false, false, false, false, 120)] // base case
[InlineData(true, false, false, false, false, 600)] // active tool
[InlineData(false, true, false, false, false, 600)] // used tools (between rounds!)
[InlineData(false, false, true, false, false, 600)] // multi-agent
[InlineData(true, true, false, false, false, 600)] // active + used
[InlineData(false, true, false, false, false, 180)] // used tools (between rounds) → 180s middle tier
[InlineData(false, false, true, false, false, 120)] // multi-agent alone → base (no escalation)
[InlineData(true, true, false, false, false, 600)] // active + used → active wins (600s)
[InlineData(true, false, true, false, false, 600)] // active + multi
[InlineData(false, true, true, false, false, 600)] // used + multi
[InlineData(true, true, true, false, false, 600)] // all three
[InlineData(false, true, true, false, false, 180)] // used + multi → used-tools tier (180s)
[InlineData(true, true, true, false, false, 600)] // all three → active wins (600s)
public void WatchdogTimeout_AllCombinations(
bool hasActive, bool hasUsed, bool isMulti,
bool isResumed, bool hasEvents, int expected)
Expand Down Expand Up @@ -842,12 +856,18 @@ public void ReconnectPath_IncludesMcpServersAndSkills()
var source = File.ReadAllText(
Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.cs"));

// After extraction to BuildFreshSessionConfig, verify the reconnect path calls the helper
var sessionNotFoundIdx = source.IndexOf("Session not found", StringComparison.OrdinalIgnoreCase);
Assert.True(sessionNotFoundIdx > 0);

var afterNotFound = source.Substring(sessionNotFoundIdx, Math.Min(2000, source.Length - sessionNotFoundIdx));
Assert.Contains("McpServers", afterNotFound);
Assert.Contains("SkillDirectories", afterNotFound);
var afterNotFound = source.Substring(sessionNotFoundIdx, Math.Min(1000, source.Length - sessionNotFoundIdx));
Assert.Contains("BuildFreshSessionConfig", afterNotFound);

// And verify the helper itself includes MCP and Skills
var helperIdx = source.IndexOf("BuildFreshSessionConfig(SessionState state");
Assert.True(helperIdx > 0);
var helperBlock = source.Substring(helperIdx, Math.Min(2000, source.Length - helperIdx));
Assert.Contains("McpServers", helperBlock);
Assert.Contains("SkillDirectories", helperBlock);
}

// =========================================================================
Expand Down
54 changes: 25 additions & 29 deletions PolyPilot.Tests/ConnectionRecoveryTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -276,16 +276,20 @@ public void SendPromptAsync_FreshSessionConfig_IncludesMcpServers()
{
// STRUCTURAL REGRESSION GUARD: The "Session not found" fallback must assign
// McpServers in the freshConfig so MCP tools survive reconnection.
// After extraction to BuildFreshSessionConfig helper, verify the helper contains it.
var source = File.ReadAllText(Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.cs"));

// Anchor on the freshConfig initializer inside the "Session not found" reconnect path
var freshConfigIndex = source.IndexOf("freshConfig = new SessionConfig");
Assert.True(freshConfigIndex > 0, "Could not find freshConfig in reconnect path");
// Verify the reconnect path calls the helper
var sessionNotFoundIdx = source.IndexOf("Session not found", StringComparison.OrdinalIgnoreCase);
Assert.True(sessionNotFoundIdx > 0, "Could not find 'Session not found' in reconnect path");
var afterNotFound = source.Substring(sessionNotFoundIdx, Math.Min(1000, source.Length - sessionNotFoundIdx));
Assert.Contains("BuildFreshSessionConfig", afterNotFound);

// Extract the config block (generously sized to cover all fields)
var endIndex = Math.Min(freshConfigIndex + 600, source.Length);
var configBlock = source.Substring(freshConfigIndex, endIndex - freshConfigIndex);
Assert.Contains("McpServers = ", configBlock);
// Verify the helper body includes McpServers
var helperIdx = source.IndexOf("BuildFreshSessionConfig(SessionState state");
Assert.True(helperIdx > 0, "Could not find BuildFreshSessionConfig helper");
var helperBlock = source.Substring(helperIdx, Math.Min(2000, source.Length - helperIdx));
Assert.Contains("McpServers = ", helperBlock);
}

[Fact]
Expand All @@ -295,12 +299,10 @@ public void SendPromptAsync_FreshSessionConfig_IncludesSkillDirectories()
// SkillDirectories in the freshConfig so skills survive reconnection.
var source = File.ReadAllText(Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.cs"));

var freshConfigIndex = source.IndexOf("freshConfig = new SessionConfig");
Assert.True(freshConfigIndex > 0, "Could not find freshConfig in reconnect path");

var endIndex = Math.Min(freshConfigIndex + 600, source.Length);
var configBlock = source.Substring(freshConfigIndex, endIndex - freshConfigIndex);
Assert.Contains("SkillDirectories = ", configBlock);
var helperIdx = source.IndexOf("BuildFreshSessionConfig(SessionState state");
Assert.True(helperIdx > 0, "Could not find BuildFreshSessionConfig helper");
var helperBlock = source.Substring(helperIdx, Math.Min(2000, source.Length - helperIdx));
Assert.Contains("SkillDirectories = ", helperBlock);
}

[Fact]
Expand All @@ -310,39 +312,33 @@ public void SendPromptAsync_FreshSessionConfig_IncludesSystemMessage()
// SystemMessage so the session retains its system prompt after reconnection.
var source = File.ReadAllText(Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.cs"));

var freshConfigIndex = source.IndexOf("freshConfig = new SessionConfig");
Assert.True(freshConfigIndex > 0, "Could not find freshConfig in reconnect path");

var endIndex = Math.Min(freshConfigIndex + 600, source.Length);
var configBlock = source.Substring(freshConfigIndex, endIndex - freshConfigIndex);
Assert.Contains("SystemMessage = ", configBlock);
Assert.Contains("SystemMessageMode.Append", configBlock);
var helperIdx = source.IndexOf("BuildFreshSessionConfig(SessionState state");
Assert.True(helperIdx > 0, "Could not find BuildFreshSessionConfig helper");
var helperBlock = source.Substring(helperIdx, Math.Min(2000, source.Length - helperIdx));
Assert.Contains("SystemMessage = ", helperBlock);
Assert.Contains("SystemMessageMode.Append", helperBlock);
}

[Fact]
public void SendPromptAsync_FreshSessionConfig_MatchesCreateSessionFields()
{
// STRUCTURAL REGRESSION GUARD: The freshConfig in the reconnect path must
// STRUCTURAL REGRESSION GUARD: The BuildFreshSessionConfig helper must
// set the same critical fields as the original CreateSessionAsync config.
// This prevents "environment keeps going away" after connection loss.
var source = File.ReadAllText(Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.cs"));

var freshConfigIndex = source.IndexOf("freshConfig = new SessionConfig");
Assert.True(freshConfigIndex > 0);
var helperIdx = source.IndexOf("BuildFreshSessionConfig(SessionState state");
Assert.True(helperIdx > 0);

// Extract the full config initializer block
var endIndex = Math.Min(freshConfigIndex + 800, source.Length);
var configBlock = source.Substring(freshConfigIndex, endIndex - freshConfigIndex);
var helperBlock = source.Substring(helperIdx, Math.Min(2000, source.Length - helperIdx));

// All critical SessionConfig property assignments must be present
var requiredAssignments = new[]
{
"Model = ", "WorkingDirectory = ", "McpServers = ", "SkillDirectories = ",
"Tools = ", "SystemMessage = ", "OnPermissionRequest = "
};
foreach (var assignment in requiredAssignments)
{
Assert.Contains(assignment, configBlock);
Assert.Contains(assignment, helperBlock);
}
}

Expand Down
54 changes: 50 additions & 4 deletions PolyPilot.Tests/MultiAgentGapTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,13 @@ Task three.
}

[Fact]
public void ParseTaskAssignments_FuzzyMatch_FindsClosestWorker()
public void ParseTaskAssignments_ExactMatchOnly_RejectsSubstring()
{
// "coder" is a substring of "coder-session" → fuzzy match
// With exact-match-only, "coder" does NOT match "coder-session"
var response = "@worker:coder\nWrite the code.\n@end";
var result = CopilotService.ParseTaskAssignments(response, new List<string> { "coder-session", "reviewer-session" });

Assert.Single(result);
Assert.Equal("coder-session", result[0].WorkerName);
Assert.Empty(result); // No exact match
}

[Fact]
Expand Down Expand Up @@ -282,6 +281,53 @@ public void ParseTaskAssignments_CaseInsensitiveWorker_Resolves()
Assert.Equal("team-worker-1", result[0].WorkerName);
}

// --- JSON Parsing Tests ---

[Fact]
public void ParseTaskAssignments_JsonArray_ParsesCorrectly()
{
var response = """[{"worker":"alpha","task":"Do task A"},{"worker":"beta","task":"Do task B"}]""";
var result = CopilotService.ParseTaskAssignments(response, new List<string> { "alpha", "beta" });
Assert.Equal(2, result.Count);
Assert.Equal("alpha", result[0].WorkerName);
Assert.Equal("Do task A", result[0].Task);
Assert.Equal("beta", result[1].WorkerName);
}

[Fact]
public void ParseTaskAssignments_JsonInCodeFence_ParsesCorrectly()
{
var response = "```json\n[{\"worker\":\"alpha\",\"task\":\"Do task A\"}]\n```";
var result = CopilotService.ParseTaskAssignments(response, new List<string> { "alpha", "beta" });
Assert.Single(result);
Assert.Equal("alpha", result[0].WorkerName);
}

[Fact]
public void ParseTaskAssignments_JsonWithUnknownWorker_SkipsUnmatched()
{
var response = """[{"worker":"alpha","task":"Do A"},{"worker":"ghost","task":"Do G"}]""";
var result = CopilotService.ParseTaskAssignments(response, new List<string> { "alpha", "beta" });
Assert.Single(result);
Assert.Equal("alpha", result[0].WorkerName);
}

[Fact]
public void ParseTaskAssignments_MalformedJson_FallsBackToRegex()
{
var response = "[broken json\n@worker:alpha\nDo task A.\n@end";
var result = CopilotService.ParseTaskAssignments(response, new List<string> { "alpha" });
Assert.Single(result);
Assert.Equal("alpha", result[0].WorkerName);
}

[Fact]
public void TryParseJsonAssignments_EmptyArray_ReturnsEmpty()
{
var result = CopilotService.TryParseJsonAssignments("[]", new List<string> { "alpha" });
Assert.Empty(result);
}

// --- BuildDelegationNudgePrompt ---

[Fact]
Expand Down
6 changes: 3 additions & 3 deletions PolyPilot.Tests/SessionOrganizationTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -692,17 +692,17 @@ Implement the login form with email and password fields.
}

[Fact]
public void ParseTaskAssignments_FuzzyMatchesWorkerNames()
public void ParseTaskAssignments_ExactMatchOnly_NoFuzzy()
{
// With exact-match-only, "session" does NOT match "session-alpha"
var response = @"@worker:session
Do the work.
@end";

var workers = new List<string> { "session-alpha", "session-beta" };
var assignments = CopilotService.ParseTaskAssignments(response, workers);

Assert.Single(assignments);
Assert.Equal("session-alpha", assignments[0].WorkerName);
Assert.Empty(assignments); // No exact match for "session"
}

[Fact]
Expand Down
2 changes: 1 addition & 1 deletion PolyPilot/Components/Layout/SessionSidebar.razor
Original file line number Diff line number Diff line change
Expand Up @@ -1620,7 +1620,7 @@ else
// Show an actionable error instead and let the user decide whether to delete.
if (IsCorruptSessionError(ex.Message))
{
resumeError = "Session data appears corrupted. You can delete it manually from ~/.copilot/session-state if needed.";
resumeError = "Session is locked — likely in use by a Copilot CLI terminal session. Close the CLI session first, or delete the session data from ~/.copilot/session-state if it's stale.";
}
else
{
Expand Down
Loading