-
Notifications
You must be signed in to change notification settings - Fork 238
fix: prevent duplicate worker spawns from parallel tool calls #281
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -324,6 +324,25 @@ async fn check_worker_limit(state: &ChannelState) -> std::result::Result<(), Age | |
| reserve_worker_slot_local(active_worker_count, &state.channel_id, max_workers) | ||
| } | ||
|
|
||
| /// Reject spawn if an active worker already has the same task. | ||
| /// | ||
| /// This prevents duplicate workers when the LLM emits multiple spawn_worker | ||
| /// calls in a single response and one fails then gets retried on the next | ||
| /// depth. | ||
| async fn check_duplicate_task( | ||
| state: &ChannelState, | ||
| task: &str, | ||
| ) -> std::result::Result<(), AgentError> { | ||
| let status = state.status_block.read().await; | ||
| if let Some(existing_id) = status.find_duplicate_worker_task(task) { | ||
| return Err(AgentError::DuplicateWorkerTask { | ||
| channel_id: state.channel_id.to_string(), | ||
| existing_worker_id: existing_id.to_string(), | ||
| }); | ||
| } | ||
| Ok(()) | ||
| } | ||
|
Comment on lines
+332
to
+344
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Duplicate check is still racy (TOCTOU) under concurrent spawns. The check reads status and returns, but task registration happens later. Two parallel requests for the same task can both pass this check before either is registered, resulting in duplicate workers anyway. 🛠️ Proposed fix (atomic check + registration)-async fn check_duplicate_task(
- state: &ChannelState,
- task: &str,
-) -> std::result::Result<(), AgentError> {
- let status = state.status_block.read().await;
- if let Some(existing_id) = status.find_duplicate_worker_task(task) {
- return Err(AgentError::DuplicateWorkerTask {
- channel_id: state.channel_id.to_string(),
- existing_worker_id: existing_id.to_string(),
- });
- }
- Ok(())
-}
+async fn check_and_register_worker_task(
+ state: &ChannelState,
+ worker_id: WorkerId,
+ task_to_match: &str,
+ task_to_store: &str,
+) -> std::result::Result<(), AgentError> {
+ let mut status = state.status_block.write().await;
+ if let Some(existing_id) = status.find_duplicate_worker_task(task_to_match) {
+ return Err(AgentError::DuplicateWorkerTask {
+ channel_id: state.channel_id.to_string(),
+ existing_worker_id: existing_id.to_string(),
+ });
+ }
+ status.add_worker(worker_id, task_to_store, false);
+ Ok(())
+}- check_duplicate_task(state, &task).await?;
+ check_duplicate_task(state, &task).await?; // optional fast-fail
...
let worker_id = worker.id;
+ check_and_register_worker_task(state, worker_id, &task, &task).await?;
...
- {
- let mut status = state.status_block.write().await;
- status.add_worker(worker_id, &task, false);
- }- check_duplicate_task(state, &task).await?;
+ check_duplicate_task(state, &task).await?; // optional fast-fail
...
let worker_id = worker.id;
let opencode_task = format!("[opencode] {task}");
+ check_and_register_worker_task(state, worker_id, &task, &opencode_task).await?;
...
- {
- let mut status = state.status_block.write().await;
- status.add_worker(worker_id, &opencode_task, false);
- }🤖 Prompt for AI Agents |
||
|
|
||
| /// Spawn a worker from a ChannelState. Used by the SpawnWorkerTool. | ||
| pub async fn spawn_worker_from_state( | ||
| state: &ChannelState, | ||
|
|
@@ -332,8 +351,9 @@ pub async fn spawn_worker_from_state( | |
| suggested_skills: &[&str], | ||
| ) -> std::result::Result<WorkerId, AgentError> { | ||
| check_worker_limit(state).await?; | ||
| ensure_dispatch_readiness(state, "worker"); | ||
| let task = task.into(); | ||
| check_duplicate_task(state, &task).await?; | ||
| ensure_dispatch_readiness(state, "worker"); | ||
|
|
||
| let rc = &state.deps.runtime_config; | ||
| let prompt_engine = rc.prompts.load(); | ||
|
|
@@ -466,8 +486,9 @@ pub async fn spawn_opencode_worker_from_state( | |
| interactive: bool, | ||
| ) -> std::result::Result<crate::WorkerId, AgentError> { | ||
| check_worker_limit(state).await?; | ||
| ensure_dispatch_readiness(state, "opencode_worker"); | ||
| let task = task.into(); | ||
| check_duplicate_task(state, &task).await?; | ||
| ensure_dispatch_readiness(state, "opencode_worker"); | ||
| let directory = std::path::PathBuf::from(directory); | ||
|
|
||
| let rc = &state.deps.runtime_config; | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -263,6 +263,22 @@ impl StatusBlock { | |||||||||||||||||||||||||||||
| self.active_workers.iter().any(|w| w.id == worker_id) | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| /// Check if an active worker already exists with a matching task. | ||||||||||||||||||||||||||||||
| /// | ||||||||||||||||||||||||||||||
| /// The status block stores OpenCode tasks with a `[opencode] ` prefix, so | ||||||||||||||||||||||||||||||
| /// comparisons strip that prefix before matching. Returns the existing | ||||||||||||||||||||||||||||||
| /// worker's ID if found. | ||||||||||||||||||||||||||||||
| pub fn find_duplicate_worker_task(&self, task: &str) -> Option<WorkerId> { | ||||||||||||||||||||||||||||||
| let normalized = task.strip_prefix("[opencode] ").unwrap_or(task); | ||||||||||||||||||||||||||||||
| self.active_workers.iter().find_map(|worker| { | ||||||||||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might be worth trimming/normalizing whitespace (and
Suggested change
|
||||||||||||||||||||||||||||||
| let existing = worker | ||||||||||||||||||||||||||||||
| .task | ||||||||||||||||||||||||||||||
| .strip_prefix("[opencode] ") | ||||||||||||||||||||||||||||||
| .unwrap_or(&worker.task); | ||||||||||||||||||||||||||||||
| (existing == normalized).then_some(worker.id) | ||||||||||||||||||||||||||||||
| }) | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| /// Get the number of active branches. | ||||||||||||||||||||||||||||||
| pub fn active_branch_count(&self) -> usize { | ||||||||||||||||||||||||||||||
| self.active_branches.len() | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Small race window here: this is a check-then-spawn, and the task only gets added to the status block later. If
spawn_workertool calls can actually execute concurrently, two identical spawns could still both pass. Reserving the task under a write lock (or a per-channel spawn mutex) would close that gap.