From 5ecac921e761a10465b99e83d856170fab8ce654 Mon Sep 17 00:00:00 2001 From: bowen628 Date: Sun, 22 Mar 2026 16:21:23 +0800 Subject: [PATCH] feat(agentic): round-boundary preemption, remove view_image, remote chat UI - Yield dialog turns after a model round when a new user message is queued (scheduler + execution engine + SessionRoundYieldFlags). - Remove view_image tool and ImageAnalysisCard; adjust flow-chat/mobile chat input and state machine. - Remote connect bot command routing, server/desktop wiring, and cron touch-up. --- src/apps/desktop/src/api/agentic_api.rs | 51 +- src/apps/desktop/src/lib.rs | 1 + src/apps/server/src/bootstrap.rs | 1 + src/apps/server/src/rpc_dispatcher.rs | 1 + .../core/src/agentic/agents/agentic_mode.rs | 1 - .../src/agentic/coordination/coordinator.rs | 11 + .../src/agentic/coordination/scheduler.rs | 215 ++++-- .../src/agentic/execution/execution_engine.rs | 23 +- .../core/src/agentic/execution/types.rs | 6 +- src/crates/core/src/agentic/mod.rs | 4 + src/crates/core/src/agentic/round_preempt.rs | 61 ++ .../src/agentic/tools/implementations/mod.rs | 2 - .../implementations/session_message_tool.rs | 1 + .../tools/implementations/view_image_tool.rs | 615 ------------------ src/crates/core/src/agentic/tools/registry.rs | 3 - src/crates/core/src/service/cron/service.rs | 3 +- .../remote_connect/bot/command_router.rs | 106 ++- .../src/service/remote_connect/bot/feishu.rs | 6 +- .../service/remote_connect/bot/telegram.rs | 4 +- .../src/service/remote_connect/bot/weixin.rs | 4 +- .../service/remote_connect/remote_server.rs | 106 +-- src/mobile-web/src/i18n/messages.ts | 4 + src/mobile-web/src/pages/ChatPage.tsx | 70 +- .../src/styles/components/chat-input.scss | 7 + .../src/styles/components/chat.scss | 4 + .../component-library/components/registry.tsx | 29 - .../src/flow_chat/components/ChatInput.scss | 8 +- .../src/flow_chat/components/ChatInput.tsx | 98 ++- .../src/flow_chat/components/FlowToolCard.tsx | 18 - .../src/flow_chat/hooks/useMessageSender.ts | 2 +- .../flow_chat/hooks/useSessionStateMachine.ts | 14 +- .../flow-chat-manager/EventHandlerModule.ts | 10 + .../flow-chat-manager/MessageModule.ts | 4 +- .../flow_chat/state-machine/derivedState.ts | 39 +- .../flow_chat/store/modernFlowChatStore.ts | 26 +- .../tool-cards/ImageAnalysisCard.scss | 92 --- .../tool-cards/ImageAnalysisCard.tsx | 209 ------ src/web-ui/src/flow_chat/tool-cards/index.ts | 14 - .../api/service-api/ImageContextTypes.ts | 2 +- src/web-ui/src/locales/en-US/flow-chat.json | 1 - src/web-ui/src/locales/zh-CN/flow-chat.json | 1 - 41 files changed, 673 insertions(+), 1204 deletions(-) create mode 100644 src/crates/core/src/agentic/round_preempt.rs delete mode 100644 src/crates/core/src/agentic/tools/implementations/view_image_tool.rs delete mode 100644 src/web-ui/src/flow_chat/tool-cards/ImageAnalysisCard.scss delete mode 100644 src/web-ui/src/flow_chat/tool-cards/ImageAnalysisCard.tsx diff --git a/src/apps/desktop/src/api/agentic_api.rs b/src/apps/desktop/src/api/agentic_api.rs index bb47461b..75538d0c 100644 --- a/src/apps/desktop/src/api/agentic_api.rs +++ b/src/apps/desktop/src/api/agentic_api.rs @@ -236,7 +236,7 @@ pub async fn update_session_model( #[tauri::command] pub async fn start_dialog_turn( _app: AppHandle, - coordinator: State<'_, Arc>, + _coordinator: State<'_, Arc>, scheduler: State<'_, Arc>, request: StartDialogTurnRequest, ) -> Result { @@ -250,40 +250,31 @@ pub async fn start_dialog_turn( image_contexts, } = request; - if let Some(image_contexts) = image_contexts + let policy = DialogSubmissionPolicy::for_source(DialogTriggerSource::DesktopUi); + let resolved_images = if let Some(image_contexts) = image_contexts .as_ref() .filter(|images| !images.is_empty()) .cloned() { - let resolved_image_contexts = resolve_missing_image_payloads(image_contexts)?; - coordinator - .start_dialog_turn_with_image_contexts( - session_id, - user_input, - original_user_input, - resolved_image_contexts, - turn_id, - agent_type, - workspace_path, - DialogSubmissionPolicy::for_source(DialogTriggerSource::DesktopUi), - ) - .await - .map_err(|e| format!("Failed to start dialog turn: {}", e))?; + Some(resolve_missing_image_payloads(image_contexts)?) } else { - scheduler - .submit( - session_id, - user_input, - original_user_input, - turn_id, - agent_type, - workspace_path, - DialogSubmissionPolicy::for_source(DialogTriggerSource::DesktopUi), - None, - ) - .await - .map_err(|e| format!("Failed to start dialog turn: {}", e))?; - } + None + }; + + scheduler + .submit( + session_id, + user_input, + original_user_input, + turn_id, + agent_type, + workspace_path, + policy, + None, + resolved_images, + ) + .await + .map_err(|e| format!("Failed to start dialog turn: {}", e))?; Ok(StartDialogTurnResponse { success: true, diff --git a/src/apps/desktop/src/lib.rs b/src/apps/desktop/src/lib.rs index 94c2a37b..a0c47bb5 100644 --- a/src/apps/desktop/src/lib.rs +++ b/src/apps/desktop/src/lib.rs @@ -763,6 +763,7 @@ async fn init_agentic_system() -> anyhow::Result<( let scheduler = coordination::DialogScheduler::new(coordinator.clone(), session_manager.clone()); coordinator.set_scheduler_notifier(scheduler.outcome_sender()); + coordinator.set_round_preempt_source(scheduler.preempt_monitor()); coordination::set_global_scheduler(scheduler.clone()); let cron_service = diff --git a/src/apps/server/src/bootstrap.rs b/src/apps/server/src/bootstrap.rs index cdcb4354..13b72485 100644 --- a/src/apps/server/src/bootstrap.rs +++ b/src/apps/server/src/bootstrap.rs @@ -120,6 +120,7 @@ pub async fn initialize(workspace: Option) -> anyhow::Result, /// Notifies DialogScheduler of turn outcomes; injected after construction scheduler_notify_tx: OnceLock>, + /// Round-boundary yield (same source as scheduler's yield flags); injected after construction + round_preempt_source: OnceLock>, } impl ConversationCoordinator { @@ -247,6 +250,7 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet event_queue, event_router, scheduler_notify_tx: OnceLock::new(), + round_preempt_source: OnceLock::new(), } } @@ -256,6 +260,11 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet let _ = self.scheduler_notify_tx.set(tx); } + /// Wire round-boundary preempt (typically the scheduler's [`SessionRoundYieldFlags`](crate::agentic::round_preempt::SessionRoundYieldFlags)). + pub fn set_round_preempt_source(&self, source: Arc) { + let _ = self.round_preempt_source.set(source); + } + /// Create a new session pub async fn create_session( &self, @@ -1126,6 +1135,7 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet subagent_parent_info: None, skip_tool_confirmation: submission_policy.skip_tool_confirmation, workspace_services, + round_preempt: self.round_preempt_source.get().cloned(), }; // Auto-generate session title on first message @@ -1659,6 +1669,7 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet subagent_parent_info: Some(subagent_parent_info), skip_tool_confirmation: false, workspace_services: subagent_services, + round_preempt: self.round_preempt_source.get().cloned(), }; let initial_messages = vec![Message::user(task_description)]; diff --git a/src/crates/core/src/agentic/coordination/scheduler.rs b/src/crates/core/src/agentic/coordination/scheduler.rs index 71631529..208bfeec 100644 --- a/src/crates/core/src/agentic/coordination/scheduler.rs +++ b/src/crates/core/src/agentic/coordination/scheduler.rs @@ -13,6 +13,8 @@ use super::coordinator::{ConversationCoordinator, DialogTriggerSource}; use super::turn_outcome::{TurnOutcome, TurnOutcomeQueueAction, TurnOutcomeStatus}; use crate::agentic::core::{PromptEnvelope, SessionState}; +use crate::agentic::image_analysis::ImageContextData; +use crate::agentic::round_preempt::{DialogRoundPreemptSource, SessionRoundYieldFlags}; use crate::agentic::session::SessionManager; use dashmap::DashMap; use log::{debug, info, warn}; @@ -21,9 +23,24 @@ use std::sync::Arc; use std::sync::OnceLock; use std::time::SystemTime; use tokio::sync::mpsc; +use uuid::Uuid; const MAX_QUEUE_DEPTH: usize = 20; +/// Result of [`DialogScheduler::submit`]: whether this message began executing immediately +/// or was placed in the per-session queue. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DialogSubmitOutcome { + Started { + session_id: String, + turn_id: String, + }, + Queued { + session_id: String, + turn_id: String, + }, +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub enum DialogQueuePriority { Low = 0, @@ -114,6 +131,7 @@ pub struct QueuedTurn { pub workspace_path: Option, pub policy: DialogSubmissionPolicy, pub reply_route: Option, + pub image_contexts: Option>, #[allow(dead_code)] pub enqueued_at: SystemTime, } @@ -132,6 +150,8 @@ pub struct DialogScheduler { active_turns: Arc>, /// Cloneable sender given to ConversationCoordinator for turn outcome notifications outcome_tx: mpsc::Sender<(String, TurnOutcome)>, + /// When a user submits while `Processing`, engine yields after the current model round. + round_yield_flags: Arc, } impl DialogScheduler { @@ -152,6 +172,7 @@ impl DialogScheduler { queues: Arc::new(DashMap::new()), active_turns: Arc::new(DashMap::new()), outcome_tx, + round_yield_flags: Arc::new(SessionRoundYieldFlags::default()), }); let scheduler_for_handler = Arc::clone(&scheduler); @@ -167,11 +188,29 @@ impl DialogScheduler { self.outcome_tx.clone() } + /// Pass to [`ConversationCoordinator::set_round_preempt_source`](super::coordinator::ConversationCoordinator::set_round_preempt_source). + pub fn preempt_monitor(&self) -> Arc { + self.round_yield_flags.clone() + } + + fn user_message_may_preempt(policy: &DialogSubmissionPolicy) -> bool { + matches!( + policy.trigger_source, + DialogTriggerSource::DesktopUi + | DialogTriggerSource::DesktopApi + | DialogTriggerSource::Cli + | DialogTriggerSource::RemoteRelay + | DialogTriggerSource::Bot + ) + } + /// Submit a user message for a session. /// /// - Session idle, queue empty → dispatched immediately. /// - Session idle, queue non-empty → enqueued then highest-priority queued message dispatched. - /// - Session processing → queued (up to MAX_QUEUE_DEPTH). + /// - Session processing → queued (up to MAX_QUEUE_DEPTH). For interactive sources + /// (desktop, CLI, bot, …), also requests a yield after the current model round so + /// the queued message can start sooner than a full multi-round turn. /// - Session error → queue cleared, dispatched immediately. /// /// Returns `Err(String)` if the queue is full or the coordinator returns an error. @@ -185,15 +224,18 @@ impl DialogScheduler { workspace_path: Option, policy: DialogSubmissionPolicy, reply_route: Option, - ) -> Result<(), String> { + image_contexts: Option>, + ) -> Result { + let resolved_turn_id = turn_id.unwrap_or_else(|| Uuid::new_v4().to_string()); let queued_turn = QueuedTurn { user_input, original_user_input, - turn_id, + turn_id: Some(resolved_turn_id.clone()), agent_type, workspace_path, policy, reply_route, + image_contexts, enqueued_at: SystemTime::now(), }; let state = self @@ -202,11 +244,21 @@ impl DialogScheduler { .map(|s| s.state.clone()); match state { - None => self.start_turn(&session_id, &queued_turn).await, + None => { + let tid = self.start_turn(&session_id, &queued_turn).await?; + Ok(DialogSubmitOutcome::Started { + session_id, + turn_id: tid, + }) + } Some(SessionState::Error { .. }) => { self.clear_queue(&session_id); - self.start_turn(&session_id, &queued_turn).await + let tid = self.start_turn(&session_id, &queued_turn).await?; + Ok(DialogSubmitOutcome::Started { + session_id, + turn_id: tid, + }) } Some(SessionState::Idle) => { @@ -217,16 +269,38 @@ impl DialogScheduler { .unwrap_or(false); if queue_non_empty { - self.enqueue(&session_id, queued_turn)?; - self.dispatch_next_if_idle(&session_id).await + self.enqueue(&session_id, queued_turn.clone())?; + let started_tid = self.try_start_next_queued(&session_id).await?; + let outcome = match started_tid { + Some(tid) if tid == resolved_turn_id => DialogSubmitOutcome::Started { + session_id: session_id.clone(), + turn_id: tid, + }, + _ => DialogSubmitOutcome::Queued { + session_id: session_id.clone(), + turn_id: resolved_turn_id, + }, + }; + Ok(outcome) } else { - self.start_turn(&session_id, &queued_turn).await + let tid = self.start_turn(&session_id, &queued_turn).await?; + Ok(DialogSubmitOutcome::Started { + session_id, + turn_id: tid, + }) } } Some(SessionState::Processing { .. }) => { + let may_preempt = Self::user_message_may_preempt(&queued_turn.policy); self.enqueue(&session_id, queued_turn)?; - Ok(()) + if may_preempt { + self.round_yield_flags.request_yield(&session_id); + } + Ok(DialogSubmitOutcome::Queued { + session_id, + turn_id: resolved_turn_id, + }) } } } @@ -303,25 +377,93 @@ impl DialogScheduler { .push_front(turn); } - async fn start_turn(&self, session_id: &str, queued_turn: &QueuedTurn) -> Result<(), String> { - self.coordinator - .start_dialog_turn( - session_id.to_string(), - queued_turn.user_input.clone(), - queued_turn.original_user_input.clone(), - queued_turn.turn_id.clone(), - queued_turn.agent_type.clone(), - queued_turn.workspace_path.clone(), - queued_turn.policy, - ) - .await - .map_err(|e| e.to_string())?; + async fn try_start_next_queued(&self, session_id: &str) -> Result, String> { + let state = self + .session_manager + .get_session(session_id) + .map(|s| s.state.clone()); + if matches!(state, Some(SessionState::Processing { .. })) { + return Ok(None); + } + + let Some(next_turn) = self.dequeue_next(session_id) else { + return Ok(None); + }; + + let remaining = self.queues.get(session_id).map(|q| q.len()).unwrap_or(0); + info!( + "Dispatching queued message: session_id={}, priority={:?}, remaining_queue_depth={}", + session_id, next_turn.policy.queue_priority, remaining + ); + + match self.start_turn(session_id, &next_turn).await { + Ok(tid) => Ok(Some(tid)), + Err(err) => { + self.requeue_front(session_id, next_turn); + Err(err) + } + } + } + + async fn start_turn(&self, session_id: &str, queued_turn: &QueuedTurn) -> Result { + let res = match queued_turn + .image_contexts + .as_ref() + .filter(|imgs| !imgs.is_empty()) + { + Some(imgs) => { + self.coordinator + .start_dialog_turn_with_image_contexts( + session_id.to_string(), + queued_turn.user_input.clone(), + queued_turn.original_user_input.clone(), + imgs.clone(), + queued_turn.turn_id.clone(), + queued_turn.agent_type.clone(), + queued_turn.workspace_path.clone(), + queued_turn.policy, + ) + .await + } + None => { + self.coordinator + .start_dialog_turn( + session_id.to_string(), + queued_turn.user_input.clone(), + queued_turn.original_user_input.clone(), + queued_turn.turn_id.clone(), + queued_turn.agent_type.clone(), + queued_turn.workspace_path.clone(), + queued_turn.policy, + ) + .await + } + }; + + res.map_err(|e| e.to_string())?; self.active_turns.insert( session_id.to_string(), ActiveTurn::from_queued_turn(queued_turn), ); - Ok(()) + + let resolved = self + .session_manager + .get_session(session_id) + .and_then(|s| match &s.state { + SessionState::Processing { + current_turn_id, .. + } => Some(current_turn_id.clone()), + _ => None, + }) + .ok_or_else(|| { + format!( + "Failed to resolve turn_id after starting dialog: session_id={}", + session_id + ) + })?; + + Ok(resolved) } async fn forward_agent_session_reply( @@ -356,6 +498,7 @@ impl DialogScheduler { Some(reply_route.source_workspace_path.clone()), DialogSubmissionPolicy::for_source(DialogTriggerSource::AgentSession), None, + None, ) .await { @@ -385,35 +528,15 @@ Status: {status}" } async fn dispatch_next_if_idle(&self, session_id: &str) -> Result<(), String> { - let state = self - .session_manager - .get_session(session_id) - .map(|s| s.state.clone()); - if matches!(state, Some(SessionState::Processing { .. })) { - return Ok(()); - } - - let Some(next_turn) = self.dequeue_next(session_id) else { - return Ok(()); - }; - - let remaining = self.queues.get(session_id).map(|q| q.len()).unwrap_or(0); - info!( - "Dispatching queued message: session_id={}, priority={:?}, remaining_queue_depth={}", - session_id, next_turn.policy.queue_priority, remaining - ); - - if let Err(err) = self.start_turn(session_id, &next_turn).await { - self.requeue_front(session_id, next_turn); - return Err(err); - } - + let _ = self.try_start_next_queued(session_id).await?; Ok(()) } /// Background loop that receives turn outcome notifications from the coordinator. async fn run_outcome_handler(&self, mut outcome_rx: mpsc::Receiver<(String, TurnOutcome)>) { while let Some((session_id, outcome)) = outcome_rx.recv().await { + self.round_yield_flags.clear(&session_id); + let active_turn = self.active_turns.remove(&session_id).map(|(_, turn)| turn); if let Some(active_turn) = active_turn.as_ref() { self.forward_agent_session_reply(&session_id, active_turn, &outcome) diff --git a/src/crates/core/src/agentic/execution/execution_engine.rs b/src/crates/core/src/agentic/execution/execution_engine.rs index a9104e84..bf342c11 100644 --- a/src/crates/core/src/agentic/execution/execution_engine.rs +++ b/src/crates/core/src/agentic/execution/execution_engine.rs @@ -657,9 +657,7 @@ impl ExecutionEngine { let enable_context_compression = session.config.enable_context_compression; let compression_threshold = session.config.compression_threshold; // Detect whether the primary model supports multimodal image inputs. - // This is used by tools like `view_image` to decide between: - // - attaching image content for the primary model to analyze directly, or - // - using a dedicated vision model to pre-analyze into text. + // When false, multimodal user messages are converted to text placeholders before the provider call. let (resolved_primary_model_id, primary_supports_image_understanding) = { let config_service = get_global_config_service().await.ok(); if let Some(service) = config_service { @@ -921,6 +919,8 @@ impl ExecutionEngine { round_result.tool_result_messages.len() ); + total_tools += round_result.tool_calls.len(); + // If no more rounds, dialog turn ends if !round_result.has_more_rounds { debug!( @@ -930,8 +930,21 @@ impl ExecutionEngine { break; } - // Count tools - total_tools += round_result.tool_calls.len(); + // Queued user message while this turn was running: stop after a full model round + // (AI response + tool execution for this round are already persisted). + // No special deferral for tool-confirmation phases: we do not require the user to + // finish confirming before this boundary check runs; the check applies as soon as + // this `execute_round` completes (same as any other round). + if let Some(preempt) = context.round_preempt.as_ref() { + if preempt.should_yield_after_round(&context.session_id) { + preempt.clear_yield_after_round(&context.session_id); + info!( + "Yielding dialog turn after model round (queued user message): session_id={}, dialog_turn_id={}, round_index={}", + context.session_id, context.dialog_turn_id, round_index + ); + break; + } + } // Check if cancelled after each round let dialog_turn_cancelled = diff --git a/src/crates/core/src/agentic/execution/types.rs b/src/crates/core/src/agentic/execution/types.rs index 10cd13d9..68e6581c 100644 --- a/src/crates/core/src/agentic/execution/types.rs +++ b/src/crates/core/src/agentic/execution/types.rs @@ -1,15 +1,17 @@ //! Execution Engine Type Definitions use crate::agentic::core::Message; +use crate::agentic::round_preempt::DialogRoundPreemptSource; use crate::agentic::tools::pipeline::SubagentParentInfo; use crate::agentic::workspace::WorkspaceServices; use crate::agentic::WorkspaceBinding; use serde_json::Value; use std::collections::HashMap; +use std::sync::Arc; use tokio_util::sync::CancellationToken; /// Execution context -#[derive(Debug, Clone)] +#[derive(Clone)] pub struct ExecutionContext { pub session_id: String, pub dialog_turn_id: String, @@ -21,6 +23,8 @@ pub struct ExecutionContext { pub skip_tool_confirmation: bool, /// Workspace I/O services (filesystem + shell) injected into tools pub workspace_services: Option, + /// When set, engine may end the turn after a full model round if a user message was queued. + pub round_preempt: Option>, } /// Round context diff --git a/src/crates/core/src/agentic/mod.rs b/src/crates/core/src/agentic/mod.rs index 92570d63..c435b2da 100644 --- a/src/crates/core/src/agentic/mod.rs +++ b/src/crates/core/src/agentic/mod.rs @@ -19,6 +19,9 @@ pub mod tools; // Coordination module pub mod coordination; +/// Round-boundary yield when user queues a message during an active turn +pub mod round_preempt; + // Image analysis module pub mod image_analysis; @@ -36,6 +39,7 @@ pub mod insights; pub use agents::*; pub use coordination::*; +pub use round_preempt::{DialogRoundPreemptSource, NoopDialogRoundPreemptSource, SessionRoundYieldFlags}; pub use core::*; pub use events::{queue, router, types as event_types}; pub use execution::*; diff --git a/src/crates/core/src/agentic/round_preempt.rs b/src/crates/core/src/agentic/round_preempt.rs new file mode 100644 index 00000000..f8eced67 --- /dev/null +++ b/src/crates/core/src/agentic/round_preempt.rs @@ -0,0 +1,61 @@ +//! Yield dialog execution at model-round boundaries when a new user message is queued. +//! +//! The [`DialogRoundPreemptSource`] is implemented by [`DialogScheduler`](super::scheduler::DialogScheduler) +//! and read by [`ExecutionEngine`](super::execution::ExecutionEngine) after each completed model round. + +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; + +/// Observes whether the current dialog turn should end after the latest model round +/// (so a queued user message can start as a new turn). +pub trait DialogRoundPreemptSource: Send + Sync { + fn should_yield_after_round(&self, session_id: &str) -> bool; + fn clear_yield_after_round(&self, session_id: &str); +} + +/// Used when no scheduler is wired (e.g. tests, isolated execution). +pub struct NoopDialogRoundPreemptSource; + +impl DialogRoundPreemptSource for NoopDialogRoundPreemptSource { + fn should_yield_after_round(&self, _session_id: &str) -> bool { + false + } + + fn clear_yield_after_round(&self, _session_id: &str) {} +} + +/// Shared flag storage keyed by session; scheduler sets, engine reads and clears. +#[derive(Debug, Default)] +pub struct SessionRoundYieldFlags { + inner: dashmap::DashMap>, +} + +impl SessionRoundYieldFlags { + pub fn request_yield(&self, session_id: &str) { + self.inner + .entry(session_id.to_string()) + .or_insert_with(|| Arc::new(AtomicBool::new(false))) + .store(true, Ordering::SeqCst); + } + + pub fn should_yield(&self, session_id: &str) -> bool { + self.inner + .get(session_id) + .map(|r| r.value().load(Ordering::SeqCst)) + .unwrap_or(false) + } + + pub fn clear(&self, session_id: &str) { + self.inner.remove(session_id); + } +} + +impl DialogRoundPreemptSource for SessionRoundYieldFlags { + fn should_yield_after_round(&self, session_id: &str) -> bool { + self.should_yield(session_id) + } + + fn clear_yield_after_round(&self, session_id: &str) { + self.clear(session_id); + } +} diff --git a/src/crates/core/src/agentic/tools/implementations/mod.rs b/src/crates/core/src/agentic/tools/implementations/mod.rs index c6e702d2..75b96d46 100644 --- a/src/crates/core/src/agentic/tools/implementations/mod.rs +++ b/src/crates/core/src/agentic/tools/implementations/mod.rs @@ -26,7 +26,6 @@ pub mod task_tool; pub mod terminal_control_tool; pub mod todo_write_tool; pub mod util; -pub mod view_image_tool; pub mod web_tools; pub use ask_user_question_tool::AskUserQuestionTool; @@ -53,5 +52,4 @@ pub use skill_tool::SkillTool; pub use task_tool::TaskTool; pub use terminal_control_tool::TerminalControlTool; pub use todo_write_tool::TodoWriteTool; -pub use view_image_tool::ViewImageTool; pub use web_tools::{WebFetchTool, WebSearchTool}; diff --git a/src/crates/core/src/agentic/tools/implementations/session_message_tool.rs b/src/crates/core/src/agentic/tools/implementations/session_message_tool.rs index eb3e04da..31850d38 100644 --- a/src/crates/core/src/agentic/tools/implementations/session_message_tool.rs +++ b/src/crates/core/src/agentic/tools/implementations/session_message_tool.rs @@ -376,6 +376,7 @@ When overriding an existing session's agent_type, only switching between "agenti source_session_id, source_workspace_path: source_workspace, }), + None, ) .await .map_err(BitFunError::tool)?; diff --git a/src/crates/core/src/agentic/tools/implementations/view_image_tool.rs b/src/crates/core/src/agentic/tools/implementations/view_image_tool.rs deleted file mode 100644 index 57ef2d18..00000000 --- a/src/crates/core/src/agentic/tools/implementations/view_image_tool.rs +++ /dev/null @@ -1,615 +0,0 @@ -//! view_image tool - analyzes image content for text-only or multimodal main models. -//! -//! Current default behavior is to convert image content into structured text analysis. -//! This keeps the tool useful for text-only primary models while preserving an interface -//! that can evolve toward direct multimodal attachment in the future. - -use async_trait::async_trait; -use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _}; -use image::GenericImageView; -use log::{debug, info, trace}; -use serde::Deserialize; -use serde_json::{json, Value}; -use uuid::Uuid; - -use crate::agentic::image_analysis::{ - build_multimodal_message, decode_data_url, detect_mime_type_from_bytes, load_image_from_path, - optimize_image_for_provider, resolve_image_path, resolve_vision_model_from_global_config, - ImageContextData as ModelImageContextData, -}; -use crate::agentic::tools::framework::{ - Tool, ToolRenderOptions, ToolResult, ToolUseContext, ValidationResult, -}; -use crate::infrastructure::ai::get_global_ai_client_factory; -use crate::util::errors::{BitFunError, BitFunResult}; - -#[derive(Debug, Deserialize)] -struct ViewImageInput { - #[serde(default)] - image_path: Option, - #[serde(default)] - data_url: Option, - #[serde(default)] - image_id: Option, - #[serde(default)] - analysis_prompt: Option, - #[serde(default)] - focus_areas: Option>, - #[serde(default)] - detail_level: Option, -} - -pub struct ViewImageTool; - -impl ViewImageTool { - pub fn new() -> Self { - Self - } - - fn primary_model_supports_images(context: &ToolUseContext) -> bool { - context - .options - .as_ref() - .and_then(|o| o.custom_data.as_ref()) - .and_then(|m| m.get("primary_model_supports_image_understanding")) - .and_then(|v| v.as_bool()) - .unwrap_or(false) - } - - fn primary_model_provider(context: &ToolUseContext) -> Option<&str> { - context - .options - .as_ref() - .and_then(|o| o.custom_data.as_ref()) - .and_then(|m| m.get("primary_model_provider")) - .and_then(|v| v.as_str()) - .filter(|s| !s.is_empty()) - } - - fn build_prompt( - &self, - analysis_prompt: Option<&str>, - focus_areas: &Option>, - detail_level: &Option, - ) -> String { - let mut prompt = String::new(); - - prompt.push_str( - analysis_prompt - .filter(|s| !s.trim().is_empty()) - .unwrap_or("Please analyze this image and describe the relevant details."), - ); - prompt.push_str("\n\n"); - - if let Some(areas) = focus_areas { - if !areas.is_empty() { - prompt.push_str("Please pay special attention to the following aspects:\n"); - for area in areas { - prompt.push_str(&format!("- {}\n", area)); - } - prompt.push('\n'); - } - } - - let detail_guide = match detail_level.as_deref() { - Some("brief") => "Please answer concisely in 1-2 sentences.", - Some("detailed") => { - "Please provide a detailed analysis including all relevant details." - } - _ => "Please provide a moderate level of analysis detail.", - }; - prompt.push_str(detail_guide); - - prompt - } - - async fn build_attachment_image_context( - &self, - input_data: &ViewImageInput, - context: &ToolUseContext, - primary_provider: &str, - ) -> BitFunResult<(ModelImageContextData, String)> { - let workspace_path = context.workspace_root().map(|path| path.to_path_buf()); - - if let Some(image_id) = &input_data.image_id { - let provider = context.image_context_provider.as_ref().ok_or_else(|| { - BitFunError::tool( - "image_id mode requires ImageContextProvider support, but no provider was injected.\n\ - Please inject image_context_provider when calling the tool, or use image_path/data_url mode." - .to_string(), - ) - })?; - - let ctx = provider.get_image(image_id).ok_or_else(|| { - BitFunError::tool(format!( - "Image context not found: image_id={}. Image may have expired (5-minute validity) or was never uploaded.", - image_id - )) - })?; - - let crate::agentic::tools::image_context::ImageContextData { - id: ctx_id, - image_path: ctx_image_path, - data_url: ctx_data_url, - mime_type: ctx_mime_type, - image_name: ctx_image_name, - file_size: ctx_file_size, - width: ctx_width, - height: ctx_height, - source: ctx_source, - } = ctx; - - let description = format!("{} (clipboard)", ctx_image_name); - - if let Some(path_str) = ctx_image_path.as_ref().filter(|s| !s.is_empty()) { - let path = resolve_image_path(path_str, workspace_path.as_deref())?; - let metadata = json!({ - "name": ctx_image_name, - "width": ctx_width, - "height": ctx_height, - "file_size": ctx_file_size, - "source": ctx_source, - "origin": "image_id", - "image_id": ctx_id.clone(), - }); - - return Ok(( - ModelImageContextData { - id: ctx_id, - image_path: Some(path.display().to_string()), - data_url: None, - mime_type: ctx_mime_type, - metadata: Some(metadata), - }, - description, - )); - } - - if let Some(data_url) = ctx_data_url.as_ref().filter(|s| !s.is_empty()) { - let (data, data_url_mime) = decode_data_url(data_url)?; - let fallback_mime = data_url_mime - .as_deref() - .or_else(|| Some(ctx_mime_type.as_str())); - let processed = optimize_image_for_provider(data, primary_provider, fallback_mime)?; - let optimized_data_url = format!( - "data:{};base64,{}", - processed.mime_type, - BASE64.encode(&processed.data) - ); - - let metadata = json!({ - "name": ctx_image_name, - "width": processed.width, - "height": processed.height, - "file_size": processed.data.len(), - "source": ctx_source, - "origin": "image_id", - "image_id": ctx_id.clone(), - }); - - return Ok(( - ModelImageContextData { - id: ctx_id, - image_path: None, - data_url: Some(optimized_data_url), - mime_type: processed.mime_type, - metadata: Some(metadata), - }, - description, - )); - } - - return Err(BitFunError::tool(format!( - "Image context {} has neither data_url nor image_path", - image_id - ))); - } - - if let Some(data_url) = &input_data.data_url { - let (data, data_url_mime) = decode_data_url(data_url)?; - let processed = - optimize_image_for_provider(data, primary_provider, data_url_mime.as_deref())?; - let optimized_data_url = format!( - "data:{};base64,{}", - processed.mime_type, - BASE64.encode(&processed.data) - ); - let metadata = json!({ - "name": "clipboard_image", - "width": processed.width, - "height": processed.height, - "file_size": processed.data.len(), - "source": "data_url", - "origin": "data_url" - }); - - return Ok(( - ModelImageContextData { - id: format!("img-view-{}", Uuid::new_v4()), - image_path: None, - data_url: Some(optimized_data_url), - mime_type: processed.mime_type, - metadata: Some(metadata), - }, - "clipboard_image".to_string(), - )); - } - - if let Some(image_path_str) = &input_data.image_path { - let abs_path = resolve_image_path(image_path_str, workspace_path.as_deref())?; - let data = load_image_from_path(&abs_path, workspace_path.as_deref()).await?; - - let mime_type = detect_mime_type_from_bytes(&data, None)?; - let dynamic = image::load_from_memory(&data).map_err(|e| { - BitFunError::validation(format!("Failed to decode image data: {}", e)) - })?; - let (width, height) = dynamic.dimensions(); - - let name = abs_path - .file_name() - .and_then(|s| s.to_str()) - .unwrap_or("image") - .to_string(); - - let metadata = json!({ - "name": name, - "width": width, - "height": height, - "file_size": data.len(), - "source": "local_path", - "origin": "image_path" - }); - - return Ok(( - ModelImageContextData { - id: format!("img-view-{}", Uuid::new_v4()), - image_path: Some(abs_path.display().to_string()), - data_url: None, - mime_type, - metadata: Some(metadata), - }, - abs_path.display().to_string(), - )); - } - - Err(BitFunError::validation( - "Must provide one of image_path, data_url, or image_id", - )) - } - - async fn load_source( - &self, - input_data: &ViewImageInput, - context: &ToolUseContext, - ) -> BitFunResult<(Vec, Option, String)> { - let workspace_path = context.workspace_root().map(|path| path.to_path_buf()); - - if let Some(image_id) = &input_data.image_id { - let provider = context.image_context_provider.as_ref().ok_or_else(|| { - BitFunError::tool( - "image_id mode requires ImageContextProvider support, but no provider was injected.\n\ - Please inject image_context_provider when calling the tool, or use image_path/data_url mode.".to_string() - ) - })?; - - let image_context = provider.get_image(image_id).ok_or_else(|| { - BitFunError::tool(format!( - "Image context not found: image_id={}. Image may have expired (5-minute validity) or was never uploaded.", - image_id - )) - })?; - - if let Some(data_url) = &image_context.data_url { - let (data, data_url_mime) = decode_data_url(data_url)?; - let fallback_mime = data_url_mime.or_else(|| Some(image_context.mime_type.clone())); - return Ok(( - data, - fallback_mime, - format!("{} (clipboard)", image_context.image_name), - )); - } - - if let Some(image_path_str) = &image_context.image_path { - let image_path = resolve_image_path(image_path_str, workspace_path.as_deref())?; - let data = load_image_from_path(&image_path, workspace_path.as_deref()).await?; - let detected_mime = - detect_mime_type_from_bytes(&data, Some(&image_context.mime_type)).ok(); - return Ok((data, detected_mime, image_path.display().to_string())); - } - - return Err(BitFunError::tool(format!( - "Image context {} has neither data_url nor image_path", - image_id - ))); - } - - if let Some(data_url) = &input_data.data_url { - let (data, data_url_mime) = decode_data_url(data_url)?; - return Ok((data, data_url_mime, "clipboard_image".to_string())); - } - - if let Some(image_path_str) = &input_data.image_path { - let image_path = resolve_image_path(image_path_str, workspace_path.as_deref())?; - let data = load_image_from_path(&image_path, workspace_path.as_deref()).await?; - let detected_mime = detect_mime_type_from_bytes(&data, None).ok(); - return Ok((data, detected_mime, image_path.display().to_string())); - } - - Err(BitFunError::validation( - "Must provide one of image_path, data_url, or image_id", - )) - } -} - -#[async_trait] -impl Tool for ViewImageTool { - fn name(&self) -> &str { - "view_image" - } - - async fn description(&self) -> BitFunResult { - Ok(r#"Analyzes image content and returns detailed text descriptions. - -Use this tool when the user provides an image (file path, data URL, or uploaded clipboard image_id) and asks questions about it. - -Current behavior: -- For text-only primary models, this tool converts image content to structured text (uses the configured image understanding model). -- For multimodal primary models, this tool attaches the image for the primary model to analyze directly. - -Parameters: -- image_path / data_url / image_id: provide one image source -- analysis_prompt: optional custom analysis goal -- focus_areas: optional analysis focus list -- detail_level: brief / normal / detailed"#.to_string()) - } - - fn input_schema(&self) -> Value { - json!({ - "type": "object", - "properties": { - "image_path": { - "type": "string", - "description": "Path to image file (relative to workspace or absolute path). Example: 'screenshot.png'" - }, - "data_url": { - "type": "string", - "description": "Base64-encoded image data URL. Example: 'data:image/png;base64,...'" - }, - "image_id": { - "type": "string", - "description": "Temporary image ID from clipboard upload. Example: 'img-clipboard-1234567890-abc123'" - }, - "analysis_prompt": { - "type": "string", - "description": "Optional custom prompt describing what to extract from the image" - }, - "focus_areas": { - "type": "array", - "items": { - "type": "string" - }, - "description": "Optional list of aspects to emphasize" - }, - "detail_level": { - "type": "string", - "enum": ["brief", "normal", "detailed"], - "description": "Optional detail level" - } - } - }) - } - - fn is_readonly(&self) -> bool { - true - } - - fn is_concurrency_safe(&self, _input: Option<&Value>) -> bool { - true - } - - fn needs_permissions(&self, _input: Option<&Value>) -> bool { - false - } - - async fn validate_input( - &self, - input: &Value, - context: Option<&ToolUseContext>, - ) -> ValidationResult { - let has_path = input - .get("image_path") - .and_then(|v| v.as_str()) - .is_some_and(|s| !s.is_empty()); - let has_data_url = input - .get("data_url") - .and_then(|v| v.as_str()) - .is_some_and(|s| !s.is_empty()); - let has_image_id = input - .get("image_id") - .and_then(|v| v.as_str()) - .is_some_and(|s| !s.is_empty()); - - if !has_path && !has_data_url && !has_image_id { - return ValidationResult { - result: false, - message: Some("Must provide one of image_path, data_url, or image_id".to_string()), - error_code: Some(400), - meta: None, - }; - } - - if let Some(image_path) = input.get("image_path").and_then(|v| v.as_str()) { - if !image_path.is_empty() { - let workspace_path = context.and_then(|ctx| ctx.workspace_root()); - match resolve_image_path(image_path, workspace_path) { - Ok(path) => { - if !path.exists() { - return ValidationResult { - result: false, - message: Some(format!("Image file does not exist: {}", image_path)), - error_code: Some(404), - meta: None, - }; - } - - if !path.is_file() { - return ValidationResult { - result: false, - message: Some(format!("Path is not a file: {}", image_path)), - error_code: Some(400), - meta: None, - }; - } - } - Err(e) => { - return ValidationResult { - result: false, - message: Some(format!("Path parsing failed: {}", e)), - error_code: Some(400), - meta: None, - }; - } - } - } - } - - ValidationResult::default() - } - - fn render_tool_use_message(&self, input: &Value, options: &ToolRenderOptions) -> String { - let image_source = if let Some(path) = input.get("image_path").and_then(|v| v.as_str()) { - if !path.is_empty() { - path.to_string() - } else { - "Clipboard image".to_string() - } - } else if input - .get("image_id") - .and_then(|v| v.as_str()) - .is_some_and(|id| !id.is_empty()) - { - "Clipboard image (image_id)".to_string() - } else if input.get("data_url").is_some() { - "Clipboard image".to_string() - } else { - "unknown".to_string() - }; - - if options.verbose { - let prompt = input - .get("analysis_prompt") - .and_then(|v| v.as_str()) - .unwrap_or("default analysis"); - format!("Viewing image: {} (prompt: {})", image_source, prompt) - } else { - format!("Viewing image: {}", image_source) - } - } - - async fn call_impl( - &self, - input: &Value, - context: &ToolUseContext, - ) -> BitFunResult> { - let start = std::time::Instant::now(); - - let input_data: ViewImageInput = serde_json::from_value(input.clone()) - .map_err(|e| BitFunError::parse(format!("Failed to parse input: {}", e)))?; - - let primary_provider = Self::primary_model_provider(context).unwrap_or("openai"); - if Self::primary_model_supports_images(context) { - let (image, image_source_description) = self - .build_attachment_image_context(&input_data, context, primary_provider) - .await?; - - let result_for_assistant = format!( - "Image attached for primary model analysis ({})", - image_source_description - ); - - return Ok(vec![ToolResult::Result { - data: json!({ - "success": true, - "mode": "attached_to_primary_model", - "image_source": image_source_description, - "image": image, - }), - result_for_assistant: Some(result_for_assistant), - }]); - } - - let (image_data, fallback_mime, image_source_description) = - self.load_source(&input_data, context).await?; - - let vision_model = resolve_vision_model_from_global_config().await?; - debug!( - "Using image understanding model: id={}, name={}, provider={}", - vision_model.id, vision_model.name, vision_model.provider - ); - - let processed = optimize_image_for_provider( - image_data, - &vision_model.provider, - fallback_mime.as_deref(), - )?; - - let prompt = self.build_prompt( - input_data.analysis_prompt.as_deref(), - &input_data.focus_areas, - &input_data.detail_level, - ); - trace!("Full view_image prompt: {}", prompt); - - let messages = build_multimodal_message( - &prompt, - &processed.data, - &processed.mime_type, - &vision_model.provider, - )?; - - let ai_client_factory = get_global_ai_client_factory() - .await - .map_err(|e| BitFunError::service(format!("Failed to get AI client factory: {}", e)))?; - let ai_client = ai_client_factory - .get_client_by_id(&vision_model.id) - .await - .map_err(|e| { - BitFunError::service(format!( - "Failed to create vision model client for {}: {}", - vision_model.id, e - )) - })?; - - debug!("Calling vision model for image analysis..."); - let ai_response = ai_client - .send_message(messages, None) - .await - .map_err(|e| BitFunError::service(format!("AI call failed: {}", e)))?; - - let elapsed = start.elapsed(); - info!("view_image completed: duration={:?}", elapsed); - - let result_for_assistant = format!( - "Image analysis result ({})\n\n{}", - image_source_description, ai_response.text - ); - - Ok(vec![ToolResult::Result { - data: json!({ - "success": true, - "image_source": image_source_description, - "analysis": ai_response.text, - "metadata": { - "mime_type": processed.mime_type, - "file_size": processed.data.len(), - "width": processed.width, - "height": processed.height, - "analysis_time_ms": elapsed.as_millis() as u64, - "model_used": vision_model.name, - "prompt_used": input_data.analysis_prompt.unwrap_or_else(|| "default".to_string()), - } - }), - result_for_assistant: Some(result_for_assistant), - }]) - } -} diff --git a/src/crates/core/src/agentic/tools/registry.rs b/src/crates/core/src/agentic/tools/registry.rs index 8c5f2617..a58cb1ab 100644 --- a/src/crates/core/src/agentic/tools/registry.rs +++ b/src/crates/core/src/agentic/tools/registry.rs @@ -122,9 +122,6 @@ impl ToolRegistry { // Log tool self.register_tool(Arc::new(LogTool::new())); - // Image analysis / viewing tool - self.register_tool(Arc::new(ViewImageTool::new())); - // Git version control tool self.register_tool(Arc::new(GitTool::new())); diff --git a/src/crates/core/src/service/cron/service.rs b/src/crates/core/src/service/cron/service.rs index 2d931996..e7b7b313 100644 --- a/src/crates/core/src/service/cron/service.rs +++ b/src/crates/core/src/service/cron/service.rs @@ -499,6 +499,7 @@ impl CronService { Some(enqueue_input.workspace_path), scheduled_job_policy(), None, + None, ) .await; @@ -508,7 +509,7 @@ impl CronService { }; match submit_result { - Ok(()) => { + Ok(_) => { job.state.active_turn_id = Some(enqueue_input.turn_id); job.state.pending_trigger_at_ms = None; job.state.retry_at_ms = None; diff --git a/src/crates/core/src/service/remote_connect/bot/command_router.rs b/src/crates/core/src/service/remote_connect/bot/command_router.rs index 38512240..0962b3f9 100644 --- a/src/crates/core/src/service/remote_connect/bot/command_router.rs +++ b/src/crates/core/src/service/remote_connect/bot/command_router.rs @@ -2507,6 +2507,34 @@ async fn handle_chat_message( let session_id = state.current_session_id.clone().unwrap(); let turn_id = format!("turn_{}", uuid::Uuid::new_v4()); + + let session_busy = { + use crate::agentic::coordination::get_global_coordinator; + use crate::agentic::core::SessionState; + get_global_coordinator() + .and_then(|c| c.get_session_manager().get_session(&session_id)) + .is_some_and(|s| matches!(s.state, SessionState::Processing { .. })) + }; + + if session_busy { + return HandleResult { + reply: if language.is_chinese() { + "消息已加入队列,将在当前助手步骤结束后自动处理。".to_string() + } else { + "Your message was queued and will run after the current assistant step finishes." + .to_string() + }, + actions: vec![], + forward_to_session: Some(ForwardRequest { + session_id, + content: message.to_string(), + agent_type: "agentic".to_string(), + turn_id, + image_contexts, + }), + }; + } + let cancel_command = format!("/cancel_task {}", turn_id); HandleResult { reply: format!( @@ -2520,7 +2548,7 @@ async fn handle_chat_message( format!("如需停止本次请求,请发送 `{}`。", cancel_command) } else { format!("If needed, send `{}` to stop this request.", cancel_command) - } + }, ), actions: cancel_task_actions(language, cancel_command), forward_to_session: Some(ForwardRequest { @@ -2559,6 +2587,8 @@ pub async fn execute_forwarded_turn( let tracker = dispatcher.ensure_tracker(&forward.session_id); let mut event_rx = tracker.subscribe(); + let target_turn_id = forward.turn_id.clone(); + if let Err(e) = dispatcher .send_message( &forward.session_id, @@ -2566,7 +2596,7 @@ pub async fn execute_forwarded_turn( Some(&forward.agent_type), forward.image_contexts, DialogSubmissionPolicy::for_source(DialogTriggerSource::Bot), - Some(forward.turn_id), + Some(forward.turn_id.clone()), ) .await { @@ -2588,13 +2618,26 @@ pub async fn execute_forwarded_turn( let mut tool_params_cache: std::collections::HashMap> = std::collections::HashMap::new(); + let streams_our_turn = || { + tracker + .snapshot_active_turn() + .map(|s| s.turn_id == target_turn_id) + .unwrap_or(false) + }; + loop { match event_rx.recv().await { Ok(event) => match event { TrackerEvent::ThinkingChunk(chunk) => { + if !streams_our_turn() { + continue; + } thinking_buf.push_str(&chunk); } TrackerEvent::ThinkingEnd => { + if !streams_our_turn() { + continue; + } if verbose_mode && !thinking_buf.trim().is_empty() { if let Some(sender) = message_sender.as_ref() { let content = truncate_at_char_boundary(&thinking_buf, 500); @@ -2609,6 +2652,9 @@ pub async fn execute_forwarded_turn( thinking_buf.clear(); } TrackerEvent::TextChunk(t) => { + if !streams_our_turn() { + continue; + } response.push_str(&t); } TrackerEvent::ToolStarted { @@ -2616,6 +2662,9 @@ pub async fn execute_forwarded_turn( tool_name, params, } => { + if !streams_our_turn() { + continue; + } if tool_name == "AskUserQuestion" { if let Some(questions_value) = params.and_then(|p| p.get("questions").cloned()) @@ -2647,6 +2696,9 @@ pub async fn execute_forwarded_turn( duration_ms, success, } => { + if !streams_our_turn() { + continue; + } if verbose_mode { if let Some(sender) = message_sender.as_ref() { let params_str = tool_params_cache @@ -2675,28 +2727,36 @@ pub async fn execute_forwarded_turn( } } } - TrackerEvent::TurnCompleted => break, - TrackerEvent::TurnFailed(e) => { - let msg = if language.is_chinese() { - format!("错误: {e}") - } else { - format!("Error: {e}") - }; - return ForwardedTurnResult { - display_text: msg.clone(), - full_text: msg, - }; + TrackerEvent::TurnCompleted { turn_id } => { + if turn_id == target_turn_id { + break; + } } - TrackerEvent::TurnCancelled => { - let msg = if language.is_chinese() { - "任务已取消。".to_string() - } else { - "Task was cancelled.".to_string() - }; - return ForwardedTurnResult { - display_text: msg.clone(), - full_text: msg, - }; + TrackerEvent::TurnFailed { turn_id, error } => { + if turn_id == target_turn_id { + let msg = if language.is_chinese() { + format!("错误: {error}") + } else { + format!("Error: {error}") + }; + return ForwardedTurnResult { + display_text: msg.clone(), + full_text: msg, + }; + } + } + TrackerEvent::TurnCancelled { turn_id } => { + if turn_id == target_turn_id { + let msg = if language.is_chinese() { + "任务已取消。".to_string() + } else { + "Task was cancelled.".to_string() + }; + return ForwardedTurnResult { + display_text: msg.clone(), + full_text: msg, + }; + } } }, Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => { diff --git a/src/crates/core/src/service/remote_connect/bot/feishu.rs b/src/crates/core/src/service/remote_connect/bot/feishu.rs index 5ce027c4..9153f81a 100644 --- a/src/crates/core/src/service/remote_connect/bot/feishu.rs +++ b/src/crates/core/src/service/remote_connect/bot/feishu.rs @@ -1563,8 +1563,10 @@ impl FeishuBot { }); let verbose_mode = load_bot_persistence().verbose_mode; let result = execute_forwarded_turn(forward, Some(handler), Some(sender), verbose_mode).await; - if let Err(err) = bot.send_message(&cid, &result.display_text).await { - warn!("Failed to send Feishu final message to {cid}: {err}"); + if !result.display_text.is_empty() { + if let Err(err) = bot.send_message(&cid, &result.display_text).await { + warn!("Failed to send Feishu final message to {cid}: {err}"); + } } bot.notify_files_ready(&cid, &result.full_text).await; }); diff --git a/src/crates/core/src/service/remote_connect/bot/telegram.rs b/src/crates/core/src/service/remote_connect/bot/telegram.rs index 7ebe07ba..d0c89d2e 100644 --- a/src/crates/core/src/service/remote_connect/bot/telegram.rs +++ b/src/crates/core/src/service/remote_connect/bot/telegram.rs @@ -705,7 +705,9 @@ impl TelegramBot { }); let verbose_mode = load_bot_persistence().verbose_mode; let result = execute_forwarded_turn(forward, Some(handler), Some(sender), verbose_mode).await; - bot.send_message(chat_id, &result.display_text).await.ok(); + if !result.display_text.is_empty() { + bot.send_message(chat_id, &result.display_text).await.ok(); + } bot.notify_files_ready(chat_id, &result.full_text).await; }); } diff --git a/src/crates/core/src/service/remote_connect/bot/weixin.rs b/src/crates/core/src/service/remote_connect/bot/weixin.rs index c5c9db80..497f534d 100644 --- a/src/crates/core/src/service/remote_connect/bot/weixin.rs +++ b/src/crates/core/src/service/remote_connect/bot/weixin.rs @@ -1715,7 +1715,9 @@ impl WeixinBot { let verbose_mode = load_bot_persistence().verbose_mode; let turn_result = execute_forwarded_turn(forward, Some(handler), Some(sender), verbose_mode).await; - let _ = bot.send_text(&peer, &turn_result.display_text).await; + if !turn_result.display_text.is_empty() { + let _ = bot.send_text(&peer, &turn_result.display_text).await; + } bot.notify_files_ready(&peer, &turn_result.full_text).await; }); } diff --git a/src/crates/core/src/service/remote_connect/remote_server.rs b/src/crates/core/src/service/remote_connect/remote_server.rs index 3faf40d6..21d34e95 100644 --- a/src/crates/core/src/service/remote_connect/remote_server.rs +++ b/src/crates/core/src/service/remote_connect/remote_server.rs @@ -953,9 +953,9 @@ pub enum TrackerEvent { duration_ms: Option, success: bool, }, - TurnCompleted, - TurnFailed(String), - TurnCancelled, + TurnCompleted { turn_id: String }, + TurnFailed { turn_id: String, error: String }, + TurnCancelled { turn_id: String }, } /// Tracks the real-time state of a session for polling by the mobile client. @@ -1480,32 +1480,39 @@ impl RemoteSessionStateTracker { drop(s); self.bump_version(); } - AE::DialogTurnCompleted { .. } if is_direct => { + AE::DialogTurnCompleted { turn_id, .. } if is_direct => { let mut s = self.state.write().unwrap(); s.turn_status = "completed".to_string(); s.session_state = "idle".to_string(); s.persistence_dirty = true; drop(s); self.bump_version(); - let _ = self.event_tx.send(TrackerEvent::TurnCompleted); + let _ = self.event_tx.send(TrackerEvent::TurnCompleted { + turn_id: turn_id.clone(), + }); } - AE::DialogTurnFailed { error, .. } if is_direct => { + AE::DialogTurnFailed { turn_id, error, .. } if is_direct => { let mut s = self.state.write().unwrap(); s.turn_status = "failed".to_string(); s.session_state = "idle".to_string(); s.persistence_dirty = true; drop(s); self.bump_version(); - let _ = self.event_tx.send(TrackerEvent::TurnFailed(error.clone())); + let _ = self.event_tx.send(TrackerEvent::TurnFailed { + turn_id: turn_id.clone(), + error: error.clone(), + }); } - AE::DialogTurnCancelled { .. } if is_direct => { + AE::DialogTurnCancelled { turn_id, .. } if is_direct => { let mut s = self.state.write().unwrap(); s.turn_status = "cancelled".to_string(); s.session_state = "idle".to_string(); s.persistence_dirty = true; drop(s); self.bump_version(); - let _ = self.event_tx.send(TrackerEvent::TurnCancelled); + let _ = self.event_tx.send(TrackerEvent::TurnCancelled { + turn_id: turn_id.clone(), + }); } AE::ModelRoundStarted { round_index, .. } if is_direct => { let mut s = self.state.write().unwrap(); @@ -1620,9 +1627,12 @@ impl RemoteExecutionDispatcher { } } - /// Dispatch a SendMessage command: ensure tracker, restore session, start dialog turn. - /// Returns `(session_id, turn_id)` on success. - /// If `turn_id` is `None`, one is auto-generated. + /// Dispatch a SendMessage command: ensure tracker, restore session, submit via + /// [`DialogScheduler`](crate::agentic::coordination::DialogScheduler) (same as desktop). + /// When the session is already processing, the message is queued and the current turn + /// may yield after the current model round (for interactive `submission_policy` sources). + /// Returns whether this message started immediately or was only queued, plus ids. + /// If `turn_id` is `None`, one is auto-generated before queueing. /// /// All platforms (desktop, mobile, bot) use the same `ImageContextData` format. pub async fn send_message( @@ -1633,12 +1643,15 @@ impl RemoteExecutionDispatcher { image_contexts: Vec, submission_policy: crate::agentic::coordination::DialogSubmissionPolicy, turn_id: Option, - ) -> std::result::Result<(String, String), String> { - use crate::agentic::coordination::get_global_coordinator; + ) -> std::result::Result { + use crate::agentic::coordination::{get_global_coordinator, get_global_scheduler}; let coordinator = get_global_coordinator() .ok_or_else(|| "Desktop session system not ready".to_string())?; + let scheduler = get_global_scheduler() + .ok_or_else(|| "Dialog scheduler is not initialized".to_string())?; + self.ensure_tracker(session_id); let session_mgr = coordinator.get_session_manager(); @@ -1707,36 +1720,25 @@ impl RemoteExecutionDispatcher { let turn_id = turn_id.unwrap_or_else(|| format!("turn_{}", chrono::Utc::now().timestamp_millis())); - if image_contexts.is_empty() { - coordinator - .start_dialog_turn( - session_id.to_string(), - content.clone(), - None, - Some(turn_id.clone()), - resolved_agent_type, - binding_workspace.clone(), - submission_policy, - ) - .await - .map_err(|e| e.to_string())?; + let image_payload = if image_contexts.is_empty() { + None } else { - coordinator - .start_dialog_turn_with_image_contexts( - session_id.to_string(), - content.clone(), - None, - image_contexts, - Some(turn_id.clone()), - resolved_agent_type, - binding_workspace, - submission_policy, - ) - .await - .map_err(|e| e.to_string())?; - } + Some(image_contexts) + }; - Ok((session_id.to_string(), turn_id)) + scheduler + .submit( + session_id.to_string(), + content, + None, + Some(turn_id.clone()), + resolved_agent_type, + binding_workspace, + submission_policy, + None, + image_payload, + ) + .await } /// Cancel a running dialog turn. @@ -2741,10 +2743,22 @@ impl RemoteServer { ) .await { - Ok((sid, turn_id)) => RemoteResponse::MessageSent { - session_id: sid, - turn_id, - }, + Ok(outcome) => { + let (sid, turn_id) = match outcome { + crate::agentic::coordination::DialogSubmitOutcome::Started { + session_id, + turn_id, + } + | crate::agentic::coordination::DialogSubmitOutcome::Queued { + session_id, + turn_id, + } => (session_id, turn_id), + }; + RemoteResponse::MessageSent { + session_id: sid, + turn_id, + } + } Err(e) => RemoteResponse::Error { message: e }, } } diff --git a/src/mobile-web/src/i18n/messages.ts b/src/mobile-web/src/i18n/messages.ts index 1cd88889..1adb816b 100644 --- a/src/mobile-web/src/i18n/messages.ts +++ b/src/mobile-web/src/i18n/messages.ts @@ -111,6 +111,8 @@ export const messages: Record = { analyzingImage: 'Analyzing image with image understanding model...', inputPlaceholder: 'How can I help you...', workingPlaceholder: 'BitFun is working...', + streamingTapToQueue: 'Assistant is replying — tap to type a follow-up (queued)', + messageQueued: 'Message queued; it will run after the current step', imageAnalyzingPlaceholder: 'Analyzing image...', imageAttachmentFallback: '(see attached images)', modelSelection: 'Select model', @@ -262,6 +264,8 @@ export const messages: Record = { analyzingImage: '正在使用图像理解模型分析图片...', inputPlaceholder: '有什么可以帮您...', workingPlaceholder: 'BitFun 正在处理中...', + streamingTapToQueue: '助手正在回复 — 点击输入后续消息(将排队)', + messageQueued: '已加入队列,将在当前步骤结束后处理', imageAnalyzingPlaceholder: '正在分析图片...', imageAttachmentFallback: '(见附带图片)', modelSelection: '选择模型', diff --git a/src/mobile-web/src/pages/ChatPage.tsx b/src/mobile-web/src/pages/ChatPage.tsx index df3f5f42..e7830e38 100644 --- a/src/mobile-web/src/pages/ChatPage.tsx +++ b/src/mobile-web/src/pages/ChatPage.tsx @@ -1984,6 +1984,7 @@ const ChatPage: React.FC = ({ sessionMgr, sessionId, sessionName, const messagesEndRef = useRef(null); const messagesContainerRef = useRef(null); const [expandedMsgIds, setExpandedMsgIds] = useState>(new Set()); + const [infoToast, setInfoToast] = useState(null); const isStreaming = activeTurn != null && activeTurn.status === 'active'; @@ -2091,6 +2092,12 @@ const ChatPage: React.FC = ({ sessionMgr, sessionId, sessionName, return () => clearTimeout(t); }, [error, setError]); + useEffect(() => { + if (!infoToast) return; + const timer = setTimeout(() => setInfoToast(null), 3200); + return () => clearTimeout(timer); + }, [infoToast]); + const loadMessages = useCallback(async (beforeId?: string) => { if (isLoadingMoreRef.current || (!hasMoreRef.current && beforeId)) return; try { @@ -2243,10 +2250,13 @@ const ChatPage: React.FC = ({ sessionMgr, sessionId, sessionName, const handleSend = useCallback(async () => { const text = input.trim(); const imgs = pendingImages; - if ((!text && imgs.length === 0) || isStreaming || imageAnalyzing) return; + if ((!text && imgs.length === 0) || imageAnalyzing) return; + const wasStreaming = isStreaming; setInput(''); setPendingImages([]); - setInputExpanded(false); + if (!wasStreaming) { + setInputExpanded(false); + } const hasImages = imgs.length > 0; const imageContexts = hasImages @@ -2278,6 +2288,9 @@ const ChatPage: React.FC = ({ sessionMgr, sessionId, sessionName, imageContexts, ); pollerRef.current?.nudge(); + if (wasStreaming) { + setInfoToast(t('chat.messageQueued')); + } } catch (e: any) { setError(e.message); } finally { @@ -2659,7 +2672,7 @@ const ChatPage: React.FC = ({ sessionMgr, sessionId, sessionName, >
{/* Input area */}
@@ -2674,14 +2687,14 @@ const ChatPage: React.FC = ({ sessionMgr, sessionId, sessionName, onCompositionStart={handleCompositionStart} onCompositionEnd={handleCompositionEnd} rows={1} - disabled={isStreaming || imageAnalyzing} + disabled={imageAnalyzing} /> ) : ( {imageAnalyzing ? t('chat.imageAnalyzingPlaceholder') : isStreaming - ? t('chat.workingPlaceholder') + ? t('chat.streamingTapToQueue') : t('chat.inputPlaceholder')} )} @@ -2694,7 +2707,7 @@ const ChatPage: React.FC = ({ sessionMgr, sessionId, sessionName, )} @@ -2719,14 +2732,14 @@ const ChatPage: React.FC = ({ sessionMgr, sessionId, sessionName, const idx = modes.indexOf(agentMode); setAgentMode(modes[(idx + 1) % modes.length]); }} - disabled={isStreaming} + disabled={imageAnalyzing} > {modeOptions.find(m => m.id === agentMode)?.label} )} - {isStreaming || imageAnalyzing ? ( - + ) : isStreaming ? ( +
+ + + +
) : (
)} + {infoToast && ( +
setInfoToast(null)}> + {infoToast} +
+ )}
); }; diff --git a/src/mobile-web/src/styles/components/chat-input.scss b/src/mobile-web/src/styles/components/chat-input.scss index e1f44626..de71d137 100644 --- a/src/mobile-web/src/styles/components/chat-input.scss +++ b/src/mobile-web/src/styles/components/chat-input.scss @@ -474,6 +474,13 @@ } } +.chat-page__stream-actions { + display: inline-flex; + align-items: center; + gap: 6px; + flex-shrink: 0; +} + // ── Image previews ─────────────────────────────────────────────────────────── .chat-page__image-preview-row { diff --git a/src/mobile-web/src/styles/components/chat.scss b/src/mobile-web/src/styles/components/chat.scss index 6ece7a96..60246b57 100644 --- a/src/mobile-web/src/styles/components/chat.scss +++ b/src/mobile-web/src/styles/components/chat.scss @@ -283,6 +283,10 @@ white-space: pre-line; animation: toastSlideIn 0.25s ease-out; backdrop-filter: blur(8px); + + &--info { + background: rgba(22, 110, 75, 0.94); + } } @keyframes toastSlideIn { diff --git a/src/web-ui/src/component-library/components/registry.tsx b/src/web-ui/src/component-library/components/registry.tsx index 32d56441..468851b3 100644 --- a/src/web-ui/src/component-library/components/registry.tsx +++ b/src/web-ui/src/component-library/components/registry.tsx @@ -32,7 +32,6 @@ import { LSDisplay } from '@/flow_chat/tool-cards/LSDisplay'; import { MCPToolDisplay } from '@/flow_chat/tool-cards/MCPToolDisplay'; import { MermaidInteractiveDisplay } from '@/flow_chat/tool-cards/MermaidInteractiveDisplay'; import { ContextCompressionDisplay } from '@/flow_chat/tool-cards/ContextCompressionDisplay'; -import { ImageAnalysisCard } from '@/flow_chat/tool-cards/ImageAnalysisCard'; import { SkillDisplay } from '@/flow_chat/tool-cards/SkillDisplay'; import { AskUserQuestionCard } from '@/flow_chat/tool-cards/AskUserQuestionCard'; import { GitToolDisplay } from '@/flow_chat/tool-cards/GitToolDisplay'; @@ -1583,34 +1582,6 @@ console.log(user.greet());`); ), }, - { - id: 'image-analysis-card', - name: 'view_image - 图片分析', - description: '图片查看分析工具', - category: 'flowchat-cards', - component: () => ( -
-

图片分析 - 成功

- -
- ), - }, { id: 'skill-card', name: 'Skill - 技能调用', diff --git a/src/web-ui/src/flow_chat/components/ChatInput.scss b/src/web-ui/src/flow_chat/components/ChatInput.scss index 3f6da031..607a35fd 100644 --- a/src/web-ui/src/flow_chat/components/ChatInput.scss +++ b/src/web-ui/src/flow_chat/components/ChatInput.scss @@ -112,7 +112,6 @@ .bitfun-chat-input__target-switcher, .bitfun-chat-input__actions-left, .bitfun-chat-input__agent-boost, - .bitfun-chat-input__queued-indicator, .bitfun-chat-input__actions-right { display: none !important; } @@ -1267,6 +1266,13 @@ height: 100%; animation: bitfun-stacked-reveal 0.24s cubic-bezier(0.4, 0, 0.2, 1) 0.22s both; } + + &__split-actions { + display: inline-flex; + align-items: center; + gap: 6px; + flex-shrink: 0; + } &__send-button { position: relative; diff --git a/src/web-ui/src/flow_chat/components/ChatInput.tsx b/src/web-ui/src/flow_chat/components/ChatInput.tsx index 81973a17..fe972f98 100644 --- a/src/web-ui/src/flow_chat/components/ChatInput.tsx +++ b/src/web-ui/src/flow_chat/components/ChatInput.tsx @@ -119,7 +119,10 @@ export const ChatInput: React.FC = ({ // Get input history for current session (after currentSessionId is defined) const inputHistory = effectiveTargetSessionId ? getSessionHistory(effectiveTargetSessionId) : []; - const derivedState = useSessionDerivedState(effectiveTargetSessionId); + const derivedState = useSessionDerivedState( + effectiveTargetSessionId, + inputState.value.trim() + ); const { transition, setQueuedInput } = useSessionStateMachineActions(effectiveTargetSessionId); const stateMachine = useSessionStateMachine(effectiveTargetSessionId); @@ -499,18 +502,25 @@ export const ChatInput: React.FC = ({ React.useEffect(() => { const queuedInput = stateMachine?.context?.queuedInput; - if (queuedInput && effectiveTargetSessionId) { + if (!queuedInput?.trim() || !effectiveTargetSessionId) { + return; + } + // Sync machine queue into the input (e.g. failed turn restored by EventHandlerModule). + // `queuedInput` is cleared on successful send via `setQueuedInput(null)` so we do not fight CLEAR_VALUE. + if (inputState.value !== queuedInput) { log.debug('Detected queuedInput, restoring message to input', { queuedInput }); dispatchInput({ type: 'ACTIVATE' }); dispatchInput({ type: 'SET_VALUE', payload: queuedInput }); - - setQueuedInput(null); - if (richTextInputRef.current) { richTextInputRef.current.focus(); } } - }, [stateMachine?.context?.queuedInput, effectiveTargetSessionId, setQueuedInput, stateMachine?.context]); + }, [ + stateMachine?.context?.queuedInput, + effectiveTargetSessionId, + inputState.value, + stateMachine?.context, + ]); React.useEffect(() => { const handleClickOutside = (event: MouseEvent) => { @@ -782,8 +792,11 @@ export const ChatInput: React.FC = ({ if (!derivedState) return; const { sendButtonMode } = derivedState; - - if (sendButtonMode === 'cancel') { + const draftTrimmed = inputState.value.trim(); + + // While generating, an empty control in `cancel` mode means stop. If the user has typed a follow-up, + // never treat this path as cancel — that would call cancel_dialog_turn and abort the current round early. + if (sendButtonMode === 'cancel' && !draftTrimmed) { await transition(SessionExecutionEvent.USER_CANCEL); return; } @@ -792,9 +805,9 @@ export const ChatInput: React.FC = ({ await transition(SessionExecutionEvent.RESET); } - if (!inputState.value.trim()) return; + if (!draftTrimmed) return; - const message = inputState.value.trim(); + const message = draftTrimmed; if (message.toLowerCase().startsWith('/btw')) { // When idle, /btw can be sent via the normal send button. @@ -810,7 +823,9 @@ export const ChatInput: React.FC = ({ setSavedDraft(''); dispatchInput({ type: 'CLEAR_VALUE' }); - + // Clear machine queue too; otherwise the queuedInput→input sync effect puts the text back after send. + setQueuedInput(null); + try { await sendMessage(message); dispatchInput({ type: 'CLEAR_VALUE' }); @@ -819,8 +834,20 @@ export const ChatInput: React.FC = ({ log.error('Failed to send message', { error }); dispatchInput({ type: 'ACTIVATE' }); dispatchInput({ type: 'SET_VALUE', payload: message }); + if (derivedState?.isProcessing) { + setQueuedInput(message); + } } - }, [inputState.value, derivedState, transition, sendMessage, addToHistory, effectiveTargetSessionId, setQueuedInput, submitBtwFromInput]); + }, [ + inputState.value, + derivedState, + transition, + sendMessage, + addToHistory, + effectiveTargetSessionId, + setQueuedInput, + submitBtwFromInput, + ]); const getFilteredIncrementalModes = useCallback(() => { if (!canSwitchModes) return []; @@ -1160,6 +1187,8 @@ export const ChatInput: React.FC = ({ } if (derivedState?.isProcessing) { + if (!inputState.value.trim()) return; + void handleSendOrCancel(); return; } @@ -1394,6 +1423,34 @@ export const ChatInput: React.FC = ({ ); } + + if (sendButtonMode === 'split') { + return ( +
+ +
{ + void transition(SessionExecutionEvent.USER_CANCEL); + }} + data-testid="chat-input-cancel-btn" + > +
+
+ + + + +
+ ); + } return ( = ({
); })()} - - {derivedState?.hasQueuedInput && ( -
- {t('input.willSendAfterStop')} - { - dispatchInput({ type: 'CLEAR_VALUE' }); - setQueuedInput(null); - }} - > - {t('input.clear')} - -
- )} | undefined; - const mode = - raw?.mode ?? - (raw?.result as Record | undefined)?.mode ?? - (raw?.data as Record | undefined)?.mode; - return mode === 'attached_to_primary_model'; -} - interface FlowToolCardProps { toolItem: FlowToolItem; onConfirm?: (toolId: string, updatedInput?: any) => void; @@ -45,10 +31,6 @@ export const FlowToolCard: React.FC = React.memo(({ sessionId, className = '' }) => { - if (isViewImageAttachedMode(toolItem)) { - return null; - } - const config = getToolCardConfig(toolItem.toolName); const CardComponent = getToolCardComponent(toolItem.toolName); diff --git a/src/web-ui/src/flow_chat/hooks/useMessageSender.ts b/src/web-ui/src/flow_chat/hooks/useMessageSender.ts index 074ecc4c..e1c471bf 100644 --- a/src/web-ui/src/flow_chat/hooks/useMessageSender.ts +++ b/src/web-ui/src/flow_chat/hooks/useMessageSender.ts @@ -155,7 +155,7 @@ export function useMessageSender(props: UseMessageSenderProps): UseMessageSender return `[Code Snippet: ${ctx.filePath}:${ctx.startLine}-${ctx.endLine}]`; case 'image': // Images are sent out-of-band via `imageContexts` so the backend can attach them - // (multimodal) or let the model call `view_image` (text-only). Avoid embedding + // for multimodal models or convert to text placeholders for text-only models. Avoid embedding // "Image ID" references into the user prompt, which can cause redundant tool calls. return ''; case 'terminal-command': diff --git a/src/web-ui/src/flow_chat/hooks/useSessionStateMachine.ts b/src/web-ui/src/flow_chat/hooks/useSessionStateMachine.ts index 344371a3..30218a2a 100644 --- a/src/web-ui/src/flow_chat/hooks/useSessionStateMachine.ts +++ b/src/web-ui/src/flow_chat/hooks/useSessionStateMachine.ts @@ -41,14 +41,22 @@ export function useSessionStateMachine(sessionId: string | null) { /** * Derived session state. + * @param processingInputDraftTrimmed - trimmed chat input while generating; keeps send UI in `split` when user has typed a follow-up (see derivedState). */ -export function useSessionDerivedState(sessionId: string | null): SessionDerivedState | null { +export function useSessionDerivedState( + sessionId: string | null, + processingInputDraftTrimmed?: string +): SessionDerivedState | null { const snapshot = useSessionStateMachine(sessionId); const derivedState = useMemo(() => { if (!snapshot) return null; - return deriveSessionState(snapshot); - }, [snapshot]); + const opts = + processingInputDraftTrimmed !== undefined + ? { processingInputDraftTrimmed } + : undefined; + return deriveSessionState(snapshot, opts); + }, [snapshot, processingInputDraftTrimmed]); return derivedState; } diff --git a/src/web-ui/src/flow_chat/services/flow-chat-manager/EventHandlerModule.ts b/src/web-ui/src/flow_chat/services/flow-chat-manager/EventHandlerModule.ts index 2d42f7f1..1c55057f 100644 --- a/src/web-ui/src/flow_chat/services/flow-chat-manager/EventHandlerModule.ts +++ b/src/web-ui/src/flow_chat/services/flow-chat-manager/EventHandlerModule.ts @@ -524,6 +524,16 @@ function handleDialogTurnStarted(context: FlowChatContext, event: any): void { backendTurnIndex: turnIndex, })); } + + // User may have pre-added this turn from the composer while the previous turn was still running; + // START was skipped then. When the backend dispatches this turn, move the state machine to PROCESSING. + const machine = stateMachineManager.get(sessionId); + if (machine && machine.getCurrentState() === SessionExecutionState.IDLE) { + void stateMachineManager.transition(sessionId, SessionExecutionEvent.START, { + taskId: sessionId, + dialogTurnId: turnId, + }); + } } /** diff --git a/src/web-ui/src/flow_chat/services/flow-chat-manager/MessageModule.ts b/src/web-ui/src/flow_chat/services/flow-chat-manager/MessageModule.ts index 0d260172..5bb69af3 100644 --- a/src/web-ui/src/flow_chat/services/flow-chat-manager/MessageModule.ts +++ b/src/web-ui/src/flow_chat/services/flow-chat-manager/MessageModule.ts @@ -135,8 +135,8 @@ export async function sendMessage( images: options?.imageDisplayData, }, modelRounds: [], - // Images are handled by the agent/tooling (e.g. `view_image`) or sent directly to multimodal - // primary models. We don't run a separate frontend "image pre-analysis" phase here. + // Images are attached for multimodal primary models or reduced to text placeholders for text-only models. + // We don't run a separate frontend "image pre-analysis" phase here. status: 'pending', startTime: Date.now() }; diff --git a/src/web-ui/src/flow_chat/state-machine/derivedState.ts b/src/web-ui/src/flow_chat/state-machine/derivedState.ts index bee6ad4e..1a214773 100644 --- a/src/web-ui/src/flow_chat/state-machine/derivedState.ts +++ b/src/web-ui/src/flow_chat/state-machine/derivedState.ts @@ -14,9 +14,22 @@ import { SessionDerivedState, } from './types'; -export function deriveSessionState(machine: SessionStateMachine): SessionDerivedState { +/** Optional live chat input draft while PROCESSING (mirrors input box); used so send mode stays `split` when user has typed a follow-up. */ +export type DeriveSessionOptions = { + processingInputDraftTrimmed?: string; +}; + +export function deriveSessionState( + machine: SessionStateMachine, + options?: DeriveSessionOptions +): SessionDerivedState { const { currentState, context } = machine; const { processingPhase } = context; + const draftTrimmed = + currentState === SessionExecutionState.PROCESSING || + currentState === SessionExecutionState.ERROR + ? options?.processingInputDraftTrimmed?.trim() ?? '' + : ''; const plannerStats = context.planner?.todos ? { @@ -40,7 +53,13 @@ export function deriveSessionState(machine: SessionStateMachine): SessionDerived showSendButton: !isProcessing, showCancelButton: isProcessing, - sendButtonMode: getSendButtonMode(currentState, processingPhase, context.queuedInput, context.pendingToolConfirmations.size > 0), + sendButtonMode: getSendButtonMode( + currentState, + processingPhase, + context.queuedInput, + context.pendingToolConfirmations.size > 0, + draftTrimmed + ), inputPlaceholder: 'How can I help you...', @@ -66,7 +85,11 @@ export function deriveSessionState(machine: SessionStateMachine): SessionDerived canCancel: isProcessing, canSendNewMessage: isIdle || isError, - hasQueuedInput: context.queuedInput !== null && context.queuedInput.trim() !== '', + hasQueuedInput: + (context.queuedInput?.trim()?.length ?? 0) > 0 || + ((currentState === SessionExecutionState.PROCESSING || + currentState === SessionExecutionState.ERROR) && + draftTrimmed.length > 0), hasError: isError, errorType: context.errorMessage ? detectErrorType(context.errorMessage) : null, @@ -78,17 +101,21 @@ function getSendButtonMode( state: SessionExecutionState, phase: ProcessingPhase | null, queuedInput: string | null, - hasPendingConfirmations: boolean + hasPendingConfirmations: boolean, + processingDraftTrimmed: string ): SessionDerivedState['sendButtonMode'] { if (state === SessionExecutionState.ERROR) { - return queuedInput ? 'split' : 'retry'; + const hasQueued = (queuedInput?.trim()?.length ?? 0) > 0 || processingDraftTrimmed.length > 0; + return hasQueued ? 'split' : 'retry'; } if (state === SessionExecutionState.PROCESSING) { if (phase === ProcessingPhase.TOOL_CONFIRMING || hasPendingConfirmations) { return 'confirm'; } - return queuedInput ? 'split' : 'cancel'; + const hasFollowUpDraft = + (queuedInput?.trim()?.length ?? 0) > 0 || processingDraftTrimmed.length > 0; + return hasFollowUpDraft ? 'split' : 'cancel'; } return 'send'; diff --git a/src/web-ui/src/flow_chat/store/modernFlowChatStore.ts b/src/web-ui/src/flow_chat/store/modernFlowChatStore.ts index 903fea1f..2ab5b4d3 100644 --- a/src/web-ui/src/flow_chat/store/modernFlowChatStore.ts +++ b/src/web-ui/src/flow_chat/store/modernFlowChatStore.ts @@ -6,7 +6,7 @@ import { create } from 'zustand'; import { immer } from 'zustand/middleware/immer'; -import type { Session, DialogTurn, ModelRound, FlowToolItem } from '../types/flow-chat'; +import type { Session, DialogTurn, ModelRound, FlowItem, FlowToolItem, FlowTextItem } from '../types/flow-chat'; import { isCollapsibleTool, READ_TOOL_NAMES, SEARCH_TOOL_NAMES } from '../tool-cards'; /** @@ -25,10 +25,14 @@ export interface ExploreGroupStats { export interface ExploreGroupData { groupId: string; rounds: ModelRound[]; - allItems: import('../types/flow-chat').FlowItem[]; + allItems: FlowItem[]; stats: ExploreGroupStats; isGroupStreaming: boolean; isLastGroupInTurn: boolean; + /** + * When true, ExploreGroupRenderer auto-collapses after a critical follow-up round (e.g. Mermaid). + * Set false if the group contains assistant `text` items so narrative stays visible. + */ isFollowedByCritical: boolean; } @@ -131,6 +135,18 @@ function computeRoundStats(round: ModelRound): { readCount: number; searchCount: return { readCount, searchCount, thinkingCount }; } +/** + * True when the merged explore group includes assistant markdown/text with real content. + * Auto-collapse on "followed by critical tool" must not hide this narrative. + */ +function exploreGroupHasNarrativeText(items: FlowItem[]): boolean { + return items.some( + (item) => + item.type === 'text' && + String((item as FlowTextItem).content || '').trim().length > 0 + ); +} + let cachedSession: Session | null = null; let cachedDialogTurnsRef: DialogTurn[] | null = null; let cachedVirtualItems: VirtualItem[] = []; @@ -185,7 +201,7 @@ export function sessionToVirtualItems(session: Session | null): VirtualItem[] { interface TempExploreGroup { rounds: ModelRound[]; - allItems: import('../types/flow-chat').FlowItem[]; + allItems: FlowItem[]; readCount: number; searchCount: number; thinkingCount: number; @@ -252,6 +268,10 @@ export function sessionToVirtualItems(session: Session | null): VirtualItem[] { isFollowedByCritical = !isExploreOnlyRound(nextRound); } } + + if (exploreGroupHasNarrativeText(group.allItems)) { + isFollowedByCritical = false; + } const isGroupStreaming = group.rounds.some(r => r.isStreaming); diff --git a/src/web-ui/src/flow_chat/tool-cards/ImageAnalysisCard.scss b/src/web-ui/src/flow_chat/tool-cards/ImageAnalysisCard.scss deleted file mode 100644 index 055c557e..00000000 --- a/src/web-ui/src/flow_chat/tool-cards/ImageAnalysisCard.scss +++ /dev/null @@ -1,92 +0,0 @@ -/** - * ImageAnalysis tool specific styles - * Extended styles based on CompactToolCard - */ - -@use './_tool-card-common.scss'; -.image-analysis-expanded-content { - /* Parent container .compact-tool-card-expanded already has margin and padding */ - max-height: 400px; - overflow-y: auto; - - .section-label { - font-size: 10px; - font-weight: 600; - color: var(--color-text-muted); - text-transform: uppercase; - letter-spacing: 0.5px; - margin-bottom: 6px; - display: flex; - align-items: center; - gap: 6px; - - &::before { - content: "▸"; - margin-right: 4px; - font-size: 10px; - opacity: 0.5; - } - } - - .section-content { - color: var(--color-text-secondary); - font-size: 12px; - line-height: 1.6; - padding: 0; - } - -} - -.analysis-prompt-section { - margin-bottom: 10px; -} - -.focus-areas-section { - margin-bottom: 10px; -} - -.focus-areas-tags { - display: flex; - flex-wrap: wrap; - gap: 6px; -} - -.focus-area-tag { - display: inline-flex; - align-items: center; - padding: 3px 8px; - background: var(--element-bg-soft); - color: var(--color-text-secondary); - font-size: 11px; - font-weight: 500; - border-radius: 10px; - white-space: nowrap; -} - -.analysis-result-section { - margin-bottom: 0; -} - -.model-badge { - display: inline-flex; - align-items: center; - padding: 2px 6px; - margin-left: 6px; - background: var(--element-bg-soft); - color: var(--color-text-muted); - font-size: 9px; - font-weight: 500; - border-radius: 8px; - text-transform: none; - letter-spacing: 0; -} - -.analysis-content { - color: var(--color-text-secondary); - font-size: 12px; - line-height: 1.6; - padding: 0; - white-space: pre-wrap; - word-break: break-word; -} - diff --git a/src/web-ui/src/flow_chat/tool-cards/ImageAnalysisCard.tsx b/src/web-ui/src/flow_chat/tool-cards/ImageAnalysisCard.tsx deleted file mode 100644 index fbf78ece..00000000 --- a/src/web-ui/src/flow_chat/tool-cards/ImageAnalysisCard.tsx +++ /dev/null @@ -1,209 +0,0 @@ -/** - * Image analysis tool card - compact mode - * Used for view_image tool - */ - -import React, { useState, useMemo, useEffect, useCallback } from 'react'; -import { Loader2, Clock, Check } from 'lucide-react'; -import { useTranslation } from 'react-i18next'; -import type { ToolCardProps } from '../types/flow-chat'; -import { CompactToolCard, CompactToolCardHeader } from './CompactToolCard'; -import { useToolCardHeightContract } from './useToolCardHeightContract'; -import './ImageAnalysisCard.scss'; - -const imageAnalysisExpandedStateCache = new Map(); - -export const ImageAnalysisCard: React.FC = ({ - toolItem, - onExpand -}) => { - const { t } = useTranslation('flow-chat'); - const { toolCall, toolResult, status } = toolItem; - const toolId = toolItem.id || toolCall?.id; - const [isExpanded, setIsExpanded] = useState(false); - const { cardRootRef, applyExpandedState } = useToolCardHeightContract({ - toolId, - toolName: toolItem.toolName, - }); - - useEffect(() => { - if (!toolId) return; - const cached = imageAnalysisExpandedStateCache.get(toolId); - setIsExpanded(cached ?? false); - }, [toolId]); - - const getStatusIcon = () => { - switch (status) { - case 'running': - case 'streaming': - return ; - case 'completed': - return ; - case 'pending': - default: - return ; - } - }; - - const getAnalysisInfo = () => { - const input = toolCall?.input; - - if (!input) { - return { - prompt: t('toolCards.imageAnalysis.parsingAnalysisInfo'), - imagePath: null, - imageName: t('toolCards.imageAnalysis.unknownImage'), - focusAreas: [], - detailLevel: 'normal' - }; - } - - const imagePath = input.image_path || input.image_id || null; - let imageName = t('toolCards.imageAnalysis.unknownImage'); - if (imagePath) { - if (imagePath.startsWith('data:')) { - imageName = t('toolCards.imageAnalysis.clipboardImage'); - } else { - const parts = imagePath.split(/[/\\]/); - imageName = parts[parts.length - 1]; - } - } - - return { - prompt: input.analysis_prompt || t('toolCards.imageAnalysis.analyzeImageContent'), - imagePath, - imageName, - focusAreas: input.focus_areas || [], - detailLevel: input.detail_level || 'normal' - }; - }; - - const getAnalysisResult = () => { - if (!toolResult?.result) return null; - - const raw = toolResult.result; - const result = - (raw?.analysis || raw?.description || raw?.content) ? raw : - (raw?.result?.analysis || raw?.result?.description || raw?.result?.content) ? raw.result : - (raw?.data?.analysis || raw?.data?.description || raw?.data?.content) ? raw.data : - null; - - if (result) { - return { - analysis: result.analysis || result.description || result.content, - modelUsed: result.model_used || result.model, - imagePath: result.image_path - }; - } - - return null; - }; - - const handleToggleExpand = useCallback(() => { - const nextExpanded = !isExpanded; - applyExpandedState(isExpanded, nextExpanded, setIsExpanded, { - onExpand, - }); - if (toolId) { - imageAnalysisExpandedStateCache.set(toolId, nextExpanded); - } - }, [applyExpandedState, isExpanded, onExpand, toolId]); - - const analysisInfo = useMemo(() => getAnalysisInfo(), [toolCall?.input]); - const analysisResult = useMemo(() => getAnalysisResult(), [toolResult?.result]); - const hasResultData = toolResult?.result !== undefined && toolResult?.result !== null; - const canExpand = status === 'completed' && !!analysisResult; - - const handleCardClick = () => { - if (canExpand) { - handleToggleExpand(); - } - }; - - const renderContent = () => { - if (status === 'completed') { - return ( - <> - {t('toolCards.imageAnalysis.imageAnalysis')}: {analysisInfo.imageName} - {hasResultData && ( - - → {t('toolCards.imageAnalysis.completed')} - - )} - - ); - } - if (status === 'running' || status === 'streaming') { - return <>{t('toolCards.imageAnalysis.analyzing')} {analysisInfo.imageName}...; - } - if (status === 'pending') { - return <>{t('toolCards.imageAnalysis.preparing')} {analysisInfo.imageName}; - } - return null; - }; - - const expandedContent = useMemo(() => { - if (!analysisResult) return null; - - return ( -
- {analysisInfo.prompt && ( -
-
{t('toolCards.imageAnalysis.analysisPrompt')}
-
{analysisInfo.prompt}
-
- )} - - {analysisInfo.focusAreas && analysisInfo.focusAreas.length > 0 && ( -
-
{t('toolCards.imageAnalysis.focusAreas')}
-
- {analysisInfo.focusAreas.map((area: string, index: number) => ( - {area} - ))} -
-
- )} - -
-
- {t('toolCards.imageAnalysis.analysisResult')} - {analysisResult.modelUsed && ( - {analysisResult.modelUsed} - )} -
-
- {analysisResult.analysis} -
-
-
- ); - }, [analysisInfo, analysisResult, t]); - - // Important: do not conditionally skip hooks (e.g. useMemo) across renders. - // Returning early here is safe because all hooks above have already run. - if ((status as string) === 'error') { - return null; - } - - const normalizedStatus = status === 'analyzing' ? 'running' : status; - - return ( -
- - } - expandedContent={expandedContent ?? undefined} - /> -
- ); -}; diff --git a/src/web-ui/src/flow_chat/tool-cards/index.ts b/src/web-ui/src/flow_chat/tool-cards/index.ts index f733ad7e..42f9cecd 100644 --- a/src/web-ui/src/flow_chat/tool-cards/index.ts +++ b/src/web-ui/src/flow_chat/tool-cards/index.ts @@ -19,7 +19,6 @@ import { CodeReviewToolCard } from './CodeReviewToolCard'; import { FileOperationToolCard } from './FileOperationToolCard'; import { DefaultToolCard } from './DefaultToolCard'; import { WebSearchCard } from './WebSearchCard'; // Temporary until WebSearchDisplay exists. -import { ImageAnalysisCard } from './ImageAnalysisCard'; import { ContextCompressionDisplay } from './ContextCompressionDisplay'; import { MCPToolDisplay } from './MCPToolDisplay'; import { SkillDisplay } from './SkillDisplay'; @@ -173,16 +172,6 @@ export const TOOL_CARD_CONFIGS: Record = { displayMode: 'compact', primaryColor: '#8b5cf6' }, - 'view_image': { - toolName: 'view_image', - displayName: 'Image Analysis', - icon: 'IMG', - requiresConfirmation: false, - resultDisplayType: 'detailed', - description: 'Analyze images and extract details', - displayMode: 'compact', - primaryColor: '#ec4899' - }, 'ContextCompression': { toolName: 'ContextCompression', displayName: 'Context Compression', @@ -351,9 +340,6 @@ export const TOOL_CARD_COMPONENTS = { 'submit_code_review': CodeReviewToolCard, - // Image analysis tools - 'view_image': ImageAnalysisCard, - // Context compression 'ContextCompression': ContextCompressionDisplay, diff --git a/src/web-ui/src/infrastructure/api/service-api/ImageContextTypes.ts b/src/web-ui/src/infrastructure/api/service-api/ImageContextTypes.ts index cf338617..ff532e24 100644 --- a/src/web-ui/src/infrastructure/api/service-api/ImageContextTypes.ts +++ b/src/web-ui/src/infrastructure/api/service-api/ImageContextTypes.ts @@ -2,7 +2,7 @@ // // Note: This is intentionally small and does not include the legacy image analysis APIs // (`analyze_images`, `send_enhanced_message`). Image handling is done by the backend -// coordinator + tools (e.g. `view_image`). +// coordinator / execution pipeline. export interface ImageContextData { id: string; diff --git a/src/web-ui/src/locales/en-US/flow-chat.json b/src/web-ui/src/locales/en-US/flow-chat.json index 916d804f..1f80ca67 100644 --- a/src/web-ui/src/locales/en-US/flow-chat.json +++ b/src/web-ui/src/locales/en-US/flow-chat.json @@ -118,7 +118,6 @@ "imageAddedSuccess": "Successfully added {{count}} images", "imageAddedSingle": "Added clipboard image: {{name}}", "imagePasteFailed": "Image paste failed", - "willSendAfterStop": "Will send after stop", "openWorkspaceFolder": "Open workspace folder", "openWorkspaceFolderFailed": "Failed to open workspace folder: {{error}}", "spaceToActivate": "Press Space to type" diff --git a/src/web-ui/src/locales/zh-CN/flow-chat.json b/src/web-ui/src/locales/zh-CN/flow-chat.json index 311bd438..0b3c576e 100644 --- a/src/web-ui/src/locales/zh-CN/flow-chat.json +++ b/src/web-ui/src/locales/zh-CN/flow-chat.json @@ -118,7 +118,6 @@ "imageAddedSuccess": "成功添加 {{count}} 张图片", "imageAddedSingle": "已添加剪贴板图片: {{name}}", "imagePasteFailed": "图片粘贴失败", - "willSendAfterStop": "将在停止后发送", "openWorkspaceFolder": "打开工作区文件夹", "openWorkspaceFolderFailed": "打开工作区文件夹失败:{{error}}", "spaceToActivate": "按空格键快速键入"