From 780a94d3f8ef702192cdc5ef99b20122435decb1 Mon Sep 17 00:00:00 2001 From: MaximEdogawa Date: Sun, 19 Apr 2026 02:20:04 +0200 Subject: [PATCH 1/2] feat: enhance Ollama model handling with cloud support - Updated the Ollama model types to include `OllamaModelInfo` and `OllamaModelKind`. - Modified the DashboardPage to utilize the new model structure for better representation of available models. - Implemented cloud model discovery and caching in the new `cloud` module, allowing for seamless integration of cloud models into the existing workflow. - Added fallback logic for cloud model rate limits, ensuring a smooth user experience by automatically switching to the last used local model when necessary. - Enhanced the AppState to track the last local model for improved error handling during cloud interactions. --- src-tauri/src/app.rs | 7 + src-tauri/src/infrastructure/http_server.rs | 42 ++++-- src-tauri/src/modules/bot/agent.rs | 69 ++++++++- src-tauri/src/modules/ollama/cloud.rs | 147 ++++++++++++++++++++ src-tauri/src/modules/ollama/mod.rs | 1 + src-tauri/src/modules/ollama/service.rs | 117 +++++++++++++++- src-tauri/src/shared/state.rs | 5 + src/modules/ollama/index.ts | 2 +- src/modules/ollama/types.ts | 9 +- src/pages/DashboardPage.tsx | 7 +- 10 files changed, 382 insertions(+), 24 deletions(-) create mode 100644 src-tauri/src/modules/ollama/cloud.rs diff --git a/src-tauri/src/app.rs b/src-tauri/src/app.rs index 584027c..bd55b9e 100644 --- a/src-tauri/src/app.rs +++ b/src-tauri/src/app.rs @@ -3,6 +3,7 @@ use crate::infrastructure::http_server; use crate::modules::bot::{commands, repository, service as bot_service}; use crate::modules::cron::{repository as cron_repository, scheduler as cron_scheduler}; use crate::modules::mcp::service as mcp_service; +use crate::modules::ollama::cloud as ollama_cloud; use crate::modules::secure_store; use crate::shared::state::{AppState, ConnectionData}; use std::path::PathBuf; @@ -182,6 +183,12 @@ pub fn run() { http_server::start_server(server_state).await; }); + // Pre-warm the Ollama cloud catalog so the first dashboard refresh + // returns cloud entries without the user waiting on ollama.com. + tauri::async_runtime::spawn(async move { + let _ = ollama_cloud::list_cloud_models().await; + }); + Ok(()) }) .invoke_handler(tauri::generate_handler![ diff --git a/src-tauri/src/infrastructure/http_server.rs b/src-tauri/src/infrastructure/http_server.rs index cb0e4ab..e1bcad4 100644 --- a/src-tauri/src/infrastructure/http_server.rs +++ b/src-tauri/src/infrastructure/http_server.rs @@ -87,12 +87,20 @@ pub struct PutMcpFilesystemBody { pub paths: Vec, } +#[derive(Serialize)] +pub struct OllamaModelDto { + pub name: String, + /// `"local"` (default Ollama models) or `"cloud"` (Ollama Cloud — surfaced + /// after `ollama signin` with names like `gpt-oss:120b-cloud`). + pub kind: &'static str, +} + #[derive(Serialize)] pub struct OllamaModelsResponse { pub reachable: bool, pub active_model: Option, pub selected_model: Option, - pub models: Vec, + pub models: Vec, } #[derive(Deserialize)] @@ -372,7 +380,14 @@ async fn handle_ollama_models(State(state): State) -> Json Json(OllamaModelsResponse { reachable: false, @@ -436,17 +451,21 @@ async fn handle_ollama_model_put( .map(|m| m.trim().to_string()) .filter(|m| !m.is_empty()); + let mut selected_kind: Option = None; if let Some(ref model) = normalized { let catalog = ollama_service::model_catalog(3000) .await .map_err(|e| (StatusCode::BAD_GATEWAY, Json(ErrorResponse { error: e })))?; - if !catalog.models.iter().any(|m| m == model) { - return Err(( - StatusCode::BAD_REQUEST, - Json(ErrorResponse { - error: format!("model '{model}' is not available in Ollama"), - }), - )); + match catalog.models.iter().find(|m| &m.name == model) { + Some(m) => selected_kind = Some(m.kind), + None => { + return Err(( + StatusCode::BAD_REQUEST, + Json(ErrorResponse { + error: format!("model '{model}' is not available in Ollama"), + }), + )); + } } } @@ -455,6 +474,11 @@ async fn handle_ollama_model_put( *lock = normalized.clone(); } + if let (Some(name), Some(ollama_service::ModelKind::Local)) = (&normalized, selected_kind) { + let mut last = state.last_local_model.write().await; + *last = Some(name.clone()); + } + state .emit_log( "run", diff --git a/src-tauri/src/modules/bot/agent.rs b/src-tauri/src/modules/bot/agent.rs index c1ea4b8..1f1894d 100644 --- a/src-tauri/src/modules/bot/agent.rs +++ b/src-tauri/src/modules/bot/agent.rs @@ -87,6 +87,59 @@ fn chat_options_for_agent_step( /// not truncated before sending to the user. const TOOL_OUTPUT_CHAR_CAP: usize = 4000; +/// Run a chat call; if the request goes to a cloud model and the daemon +/// returns a rate-limit error, downgrade to the user's last local model and +/// retry once. The downgraded model is also written back to +/// `preferred_ollama_model` so the rest of the turn (and future turns) stay +/// local until the user picks again. +async fn chat_with_cloud_fallback( + state: &AppState, + model: &mut String, + messages: &serde_json::Value, + tools: &serde_json::Value, + options: &ChatOptions, +) -> Result { + match ollama::chat_with_tools(model, messages, tools, options).await { + Ok(r) => Ok(r), + Err(err) => { + if ollama::classify_model(model) != ollama::ModelKind::Cloud + || !ollama::is_rate_limit_error(&err) + { + return Err(err); + } + let last_local = state.last_local_model.read().await.clone(); + let catalog = ollama::model_catalog(3000).await.ok(); + let fallback = catalog + .as_ref() + .and_then(|c| ollama::pick_local_fallback(c, None, last_local.as_deref())); + let Some(local) = fallback else { + state + .emit_log( + "ollama", + &format!("cloud limit on '{model}', no local model available"), + ) + .await; + return Err(err); + }; + if local == *model { + return Err(err); + } + state + .emit_log( + "ollama", + &format!("cloud limit on '{model}' — switching to local '{local}'"), + ) + .await; + { + let mut pref = state.preferred_ollama_model.write().await; + *pref = Some(local.clone()); + } + *model = local; + ollama::chat_with_tools(model, messages, tools, options).await + } + } +} + fn tool_name_is_fetch(name: &str) -> bool { name.eq_ignore_ascii_case("fetch") || name @@ -645,7 +698,7 @@ async fn run_model_turn( think: bool, skills_slug_filter: Option<&[String]>, ) -> Result { - let model = match state.preferred_ollama_model.read().await.clone() { + let mut model = match state.preferred_ollama_model.read().await.clone() { Some(m) => m, None => ollama::active_model().await?, }; @@ -745,7 +798,9 @@ async fn run_model_turn( push_ephemeral_post_tool_reminder(&mut messages); } - let result = ollama::chat_with_tools(&model, &messages, effective_tools, &chat_opts).await; + let result = + chat_with_cloud_fallback(state, &mut model, &messages, effective_tools, &chat_opts) + .await; if inject_post_tool { pop_ephemeral_post_tool_reminder(&mut messages); } @@ -991,8 +1046,14 @@ async fn run_model_turn( ..ChatOptions::default() }; let t0 = Instant::now(); - let result = - ollama::chat_with_tools(&model, &summary_messages, &json!([]), &summary_opts).await?; + let result = chat_with_cloud_fallback( + state, + &mut model, + &summary_messages, + &json!([]), + &summary_opts, + ) + .await?; let tokens = fmt_tokens(result.prompt_tokens, result.eval_tokens); state .emit_log( diff --git a/src-tauri/src/modules/ollama/cloud.rs b/src-tauri/src/modules/ollama/cloud.rs new file mode 100644 index 0000000..fe0f7ea --- /dev/null +++ b/src-tauri/src/modules/ollama/cloud.rs @@ -0,0 +1,147 @@ +//! Cloud model discovery from ollama.com. +//! +//! The local Ollama daemon's `/api/tags` only lists models that have been +//! pulled to disk. Cloud models (accessible after `ollama signin`) live in +//! the upstream catalog at `https://ollama.com/library/` and use +//! `:cloud` or `-cloud` tags. This module enumerates them by scraping +//! the cloud category page and each model's detail page, then caches the +//! result so the dashboard picker can show them without re-fetching every +//! few seconds. + +use regex::Regex; +use std::sync::OnceLock; +use std::time::{Duration, Instant}; +use tokio::sync::Mutex; + +const CLOUD_SEARCH_URL: &str = "https://ollama.com/search?c=cloud"; +const CLOUD_LIBRARY_PREFIX: &str = "https://ollama.com/library/"; +/// Stale cloud catalog is fine — Ollama publishes new cloud tags rarely. One +/// hour keeps the dashboard responsive and avoids hammering ollama.com. +const CACHE_TTL: Duration = Duration::from_secs(60 * 60); +const SEARCH_TIMEOUT: Duration = Duration::from_secs(5); +const DETAIL_TIMEOUT: Duration = Duration::from_secs(4); + +struct CacheEntry { + fetched_at: Instant, + models: Vec, +} + +static CACHE: OnceLock>> = OnceLock::new(); +static SLUG_RE: OnceLock = OnceLock::new(); +static RUN_RE: OnceLock = OnceLock::new(); + +fn cache() -> &'static Mutex> { + CACHE.get_or_init(|| Mutex::new(None)) +} + +fn slug_re() -> &'static Regex { + SLUG_RE.get_or_init(|| Regex::new(r#"href="/library/([a-z0-9._-]+)""#).unwrap()) +} + +fn run_re() -> &'static Regex { + RUN_RE.get_or_init(|| { + Regex::new(r#"ollama\s+(?:run|pull)\s+([a-z0-9._-]+(?::[a-z0-9._-]+)?)"#).unwrap() + }) +} + +/// Returns cloud-tagged model names (e.g. `glm-4.6:cloud`, +/// `qwen3-coder:480b-cloud`). Falls back to a stale cache, then to an empty +/// list, when the upstream catalog is unreachable. +pub async fn list_cloud_models() -> Vec { + { + let guard = cache().lock().await; + if let Some(ref entry) = *guard { + if entry.fetched_at.elapsed() < CACHE_TTL { + return entry.models.clone(); + } + } + } + match fetch_cloud_models().await { + Ok(models) => { + let mut guard = cache().lock().await; + *guard = Some(CacheEntry { + fetched_at: Instant::now(), + models: models.clone(), + }); + models + } + Err(e) => { + log::warn!("ollama cloud catalog fetch failed: {e}"); + cache() + .lock() + .await + .as_ref() + .map(|c| c.models.clone()) + .unwrap_or_default() + } + } +} + +async fn fetch_cloud_models() -> Result, String> { + let client = reqwest::Client::builder() + .timeout(DETAIL_TIMEOUT) + .user_agent("pengine/1.0") + .build() + .map_err(|e| e.to_string())?; + let body = client + .get(CLOUD_SEARCH_URL) + .timeout(SEARCH_TIMEOUT) + .send() + .await + .map_err(|e| e.to_string())? + .text() + .await + .map_err(|e| e.to_string())?; + let mut slugs: Vec = slug_re() + .captures_iter(&body) + .map(|c| c[1].to_string()) + .collect(); + slugs.sort(); + slugs.dedup(); + if slugs.is_empty() { + return Err("no cloud slugs found in /search?c=cloud".to_string()); + } + + let mut tasks = Vec::with_capacity(slugs.len()); + for slug in slugs { + let client = client.clone(); + tasks.push(tokio::spawn(async move { + cloud_models_for_slug(&client, &slug).await + })); + } + let mut out: Vec = Vec::new(); + for t in tasks { + if let Ok(Ok(names)) = t.await { + out.extend(names); + } + } + out.sort(); + out.dedup(); + Ok(out) +} + +async fn cloud_models_for_slug( + client: &reqwest::Client, + slug: &str, +) -> Result, String> { + let url = format!("{CLOUD_LIBRARY_PREFIX}{slug}"); + let body = client + .get(&url) + .send() + .await + .map_err(|e| e.to_string())? + .text() + .await + .map_err(|e| e.to_string())?; + let mut out = Vec::new(); + for cap in run_re().captures_iter(&body) { + let name = &cap[1]; + let tag = name.split_once(':').map(|(_, t)| t).unwrap_or(""); + if tag == "cloud" || tag.ends_with("-cloud") { + out.push(name.to_string()); + } + } + out.sort(); + out.dedup(); + Ok(out) +} diff --git a/src-tauri/src/modules/ollama/mod.rs b/src-tauri/src/modules/ollama/mod.rs index 48dbcb1..0276eba 100644 --- a/src-tauri/src/modules/ollama/mod.rs +++ b/src-tauri/src/modules/ollama/mod.rs @@ -1,3 +1,4 @@ +pub mod cloud; pub mod constants; pub mod keywords; pub mod service; diff --git a/src-tauri/src/modules/ollama/service.rs b/src-tauri/src/modules/ollama/service.rs index 09b782e..b7e62f4 100644 --- a/src-tauri/src/modules/ollama/service.rs +++ b/src-tauri/src/modules/ollama/service.rs @@ -1,3 +1,4 @@ +use crate::modules::ollama::cloud; use crate::modules::ollama::constants::{OLLAMA_CHAT_URL, OLLAMA_PS_URL, OLLAMA_TAGS_URL}; use crate::shared::text::normalize_assistant_message_content; use std::sync::OnceLock; @@ -8,10 +9,43 @@ fn http_client() -> &'static reqwest::Client { HTTP.get_or_init(reqwest::Client::new) } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ModelKind { + Local, + Cloud, +} + +impl ModelKind { + pub fn as_str(self) -> &'static str { + match self { + ModelKind::Local => "local", + ModelKind::Cloud => "cloud", + } + } +} + +#[derive(Debug, Clone)] +pub struct ModelInfo { + pub name: String, + pub kind: ModelKind, +} + #[derive(Debug, Clone)] pub struct ModelCatalog { pub active: Option, - pub models: Vec, + pub models: Vec, +} + +/// Cloud models are surfaced by the local Ollama daemon after `ollama signin` +/// and are tagged with `-cloud` (e.g. `gpt-oss:120b-cloud`) or the bare tag +/// `cloud`. Tag is the part after the first `:` (defaulting to `latest`). +pub fn classify_model(name: &str) -> ModelKind { + let tag = name.split_once(':').map(|(_, t)| t).unwrap_or(""); + if tag == "cloud" || tag.ends_with("-cloud") { + ModelKind::Cloud + } else { + ModelKind::Local + } } /// Returns active model and the full pulled model list (`/api/tags`). @@ -46,7 +80,7 @@ pub async fn model_catalog(timeout_ms: u64) -> Result { Err(e) => log::warn!("ollama {}: request error: {e}", OLLAMA_PS_URL), } - let mut models: Vec = Vec::new(); + let mut models: Vec = Vec::new(); match client.get(OLLAMA_TAGS_URL).timeout(timeout).send().await { Ok(resp) => { if !resp.status().is_success() { @@ -62,7 +96,12 @@ pub async fn model_catalog(timeout_ms: u64) -> Result { .as_array() .map(|arr| { arr.iter() - .filter_map(|m| m["name"].as_str().map(|s| s.to_string())) + .filter_map(|m| { + m["name"].as_str().map(|s| ModelInfo { + name: s.to_string(), + kind: classify_model(s), + }) + }) .collect() }) .unwrap_or_default(); @@ -77,18 +116,72 @@ pub async fn model_catalog(timeout_ms: u64) -> Result { } if let Some(ref a) = active { - if !models.iter().any(|m| m == a) { - models.insert(0, a.clone()); + if !models.iter().any(|m| &m.name == a) { + models.insert( + 0, + ModelInfo { + name: a.clone(), + kind: classify_model(a), + }, + ); } } + // Cloud models are proxied through the local daemon, so if local Ollama + // is unreachable they aren't usable either — keep the original error. if active.is_none() && models.is_empty() { return Err("ollama unreachable: no active model and no pulled models".to_string()); } + for cloud_name in cloud::list_cloud_models().await { + if !models.iter().any(|m| m.name == cloud_name) { + models.push(ModelInfo { + name: cloud_name, + kind: ModelKind::Cloud, + }); + } + } + Ok(ModelCatalog { active, models }) } +/// Best-guess local fallback model when a cloud rate-limit forces a downgrade. +/// Prefers `preferred` if local, then `last_local`, then the active model if +/// local, then the first local entry in the catalog. +pub fn pick_local_fallback( + catalog: &ModelCatalog, + preferred: Option<&str>, + last_local: Option<&str>, +) -> Option { + let local_named = |name: &str| { + catalog + .models + .iter() + .find(|m| m.name == name && m.kind == ModelKind::Local) + .map(|m| m.name.clone()) + }; + if let Some(p) = preferred { + if let Some(m) = local_named(p) { + return Some(m); + } + } + if let Some(p) = last_local { + if let Some(m) = local_named(p) { + return Some(m); + } + } + if let Some(active) = catalog.active.as_deref() { + if let Some(m) = local_named(active) { + return Some(m); + } + } + catalog + .models + .iter() + .find(|m| m.kind == ModelKind::Local) + .map(|m| m.name.clone()) +} + /// Returns the currently loaded model (from `/api/ps`), falling back to the /// first pulled model (from `/api/tags`) if nothing is loaded yet. pub async fn active_model() -> Result { @@ -99,10 +192,22 @@ pub async fn active_model() -> Result { catalog .models .first() - .cloned() + .map(|m| m.name.clone()) .ok_or_else(|| "no models pulled in ollama".to_string()) } +/// Detect rate-limit / quota errors returned for cloud models. The local +/// daemon proxies the upstream HTTP status (typically 429) and may also embed +/// a textual hint in the response body. +pub fn is_rate_limit_error(err: &str) -> bool { + let lower = err.to_ascii_lowercase(); + lower.contains("http 429") + || lower.contains("rate limit") + || lower.contains("rate-limit") + || lower.contains("quota") + || lower.contains("too many requests") +} + /// Outcome of a single chat call so the caller knows whether tools were included in the request. pub struct ChatResult { pub message: serde_json::Value, diff --git a/src-tauri/src/shared/state.rs b/src-tauri/src/shared/state.rs index 657ab9d..a092865 100644 --- a/src-tauri/src/shared/state.rs +++ b/src-tauri/src/shared/state.rs @@ -82,6 +82,10 @@ pub struct AppState { /// Ensures only one MCP registry rebuild (stdio connects) runs at a time. pub mcp_rebuild_mutex: Arc>, pub preferred_ollama_model: Arc>>, + /// Last user-selected **local** Ollama model. Used as the automatic + /// downgrade target when a cloud model returns a rate-limit error so the + /// agent can keep replying without the user having to repick. + pub last_local_model: Arc>>, pub cached_filesystem_paths: Arc>>, pub tool_engine_mutex: Arc>, /// Active memory-session recording (toggled by keyword commands; see `bot::agent`). @@ -136,6 +140,7 @@ impl AppState { mcp_config_mutex: Arc::new(Mutex::new(())), mcp_rebuild_mutex: Arc::new(Mutex::new(())), preferred_ollama_model: Arc::new(RwLock::new(None)), + last_local_model: Arc::new(RwLock::new(None)), cached_filesystem_paths: Arc::new(RwLock::new(Vec::new())), tool_engine_mutex: Arc::new(Mutex::new(())), memory_session: Arc::new(RwLock::new(None)), diff --git a/src/modules/ollama/index.ts b/src/modules/ollama/index.ts index 1af34a0..bc39531 100644 --- a/src/modules/ollama/index.ts +++ b/src/modules/ollama/index.ts @@ -1,2 +1,2 @@ export { fetchOllamaModel, fetchOllamaModels, setPreferredOllamaModel } from "./api"; -export type { OllamaModelsResponse, OllamaProbe } from "./types"; +export type { OllamaModelInfo, OllamaModelKind, OllamaModelsResponse, OllamaProbe } from "./types"; diff --git a/src/modules/ollama/types.ts b/src/modules/ollama/types.ts index ccf2986..63302b5 100644 --- a/src/modules/ollama/types.ts +++ b/src/modules/ollama/types.ts @@ -1,8 +1,15 @@ export type OllamaProbe = { reachable: boolean; model: string | null }; +export type OllamaModelKind = "local" | "cloud"; + +export type OllamaModelInfo = { + name: string; + kind: OllamaModelKind; +}; + export type OllamaModelsResponse = { reachable: boolean; active_model: string | null; selected_model: string | null; - models: string[]; + models: OllamaModelInfo[]; }; diff --git a/src/pages/DashboardPage.tsx b/src/pages/DashboardPage.tsx index ba9ffcc..3b0b95e 100644 --- a/src/pages/DashboardPage.tsx +++ b/src/pages/DashboardPage.tsx @@ -5,6 +5,7 @@ import { useAppSessionStore } from "../modules/bot/store/appSessionStore"; import { CronPanel } from "../modules/cron"; import { McpToolsPanel } from "../modules/mcp/components/McpToolsPanel"; import { fetchOllamaModels, setPreferredOllamaModel } from "../modules/ollama/api"; +import type { OllamaModelInfo } from "../modules/ollama/types"; import { SkillsPanel } from "../modules/skills"; import { ToolEnginePanel } from "../modules/toolengine/components/ToolEnginePanel"; import { UpdateIndicator } from "../modules/updater"; @@ -21,7 +22,7 @@ export function DashboardPage() { const isDeviceConnected = useAppSessionStore((state) => state.isDeviceConnected); const disconnectDevice = useAppSessionStore((state) => state.disconnectDevice); const botUsername = useAppSessionStore((state) => state.botUsername); - const [availableModels, setAvailableModels] = useState([]); + const [availableModels, setAvailableModels] = useState([]); const [selectedModel, setSelectedModel] = useState(null); const [activeModel, setActiveModel] = useState(null); const [savingModel, setSavingModel] = useState(false); @@ -182,8 +183,8 @@ export function DashboardPage() { {activeModel ? `Active (${activeModel})` : "Active model"} {availableModels.map((model) => ( - ))} From c8126dff99ea570f06ba46e37e2bc3a531a667e0 Mon Sep 17 00:00:00 2001 From: MaximEdogawa Date: Sun, 19 Apr 2026 03:01:18 +0200 Subject: [PATCH 2/2] update: fix review changes --- src-tauri/src/modules/bot/agent.rs | 6 ++--- src-tauri/src/modules/ollama/cloud.rs | 32 +++++++++++++++---------- src-tauri/src/modules/ollama/service.rs | 32 +++++++++++++++++-------- 3 files changed, 44 insertions(+), 26 deletions(-) diff --git a/src-tauri/src/modules/bot/agent.rs b/src-tauri/src/modules/bot/agent.rs index 1f1894d..9a41060 100644 --- a/src-tauri/src/modules/bot/agent.rs +++ b/src-tauri/src/modules/bot/agent.rs @@ -103,7 +103,7 @@ async fn chat_with_cloud_fallback( Ok(r) => Ok(r), Err(err) => { if ollama::classify_model(model) != ollama::ModelKind::Cloud - || !ollama::is_rate_limit_error(&err) + || !ollama::is_cloud_unavailable_error(&err) { return Err(err); } @@ -116,7 +116,7 @@ async fn chat_with_cloud_fallback( state .emit_log( "ollama", - &format!("cloud limit on '{model}', no local model available"), + &format!("cloud '{model}' unavailable ({err}); no local fallback"), ) .await; return Err(err); @@ -127,7 +127,7 @@ async fn chat_with_cloud_fallback( state .emit_log( "ollama", - &format!("cloud limit on '{model}' — switching to local '{local}'"), + &format!("cloud '{model}' unavailable, switching to local '{local}': {err}"), ) .await; { diff --git a/src-tauri/src/modules/ollama/cloud.rs b/src-tauri/src/modules/ollama/cloud.rs index fe0f7ea..958365f 100644 --- a/src-tauri/src/modules/ollama/cloud.rs +++ b/src-tauri/src/modules/ollama/cloud.rs @@ -8,6 +8,7 @@ //! result so the dashboard picker can show them without re-fetching every //! few seconds. +use futures::stream::{self, StreamExt}; use regex::Regex; use std::sync::OnceLock; use std::time::{Duration, Instant}; @@ -20,6 +21,8 @@ const CLOUD_LIBRARY_PREFIX: &str = "https://ollama.com/library/"; const CACHE_TTL: Duration = Duration::from_secs(60 * 60); const SEARCH_TIMEOUT: Duration = Duration::from_secs(5); const DETAIL_TIMEOUT: Duration = Duration::from_secs(4); +/// Cap parallel detail-page fetches to avoid bursting ollama.com. +const CLOUD_DETAIL_CONCURRENCY: usize = 8; struct CacheEntry { fetched_at: Instant, @@ -102,21 +105,24 @@ async fn fetch_cloud_models() -> Result, String> { return Err("no cloud slugs found in /search?c=cloud".to_string()); } - let mut tasks = Vec::with_capacity(slugs.len()); - for slug in slugs { - let client = client.clone(); - tasks.push(tokio::spawn(async move { - cloud_models_for_slug(&client, &slug).await - })); - } - let mut out: Vec = Vec::new(); - for t in tasks { - if let Ok(Ok(names)) = t.await { - out.extend(names); - } - } + let results: Vec, String>> = stream::iter(slugs) + .map(|slug| { + let client = client.clone(); + async move { cloud_models_for_slug(&client, &slug).await } + }) + .buffer_unordered(CLOUD_DETAIL_CONCURRENCY) + .collect() + .await; + let mut out: Vec = results + .into_iter() + .filter_map(Result::ok) + .flatten() + .collect(); out.sort(); out.dedup(); + if out.is_empty() { + return Err("cloud catalog detail scrape returned no model names".to_string()); + } Ok(out) } diff --git a/src-tauri/src/modules/ollama/service.rs b/src-tauri/src/modules/ollama/service.rs index b7e62f4..3306bf5 100644 --- a/src-tauri/src/modules/ollama/service.rs +++ b/src-tauri/src/modules/ollama/service.rs @@ -54,6 +54,7 @@ pub async fn model_catalog(timeout_ms: u64) -> Result { let timeout = std::time::Duration::from_millis(timeout_ms); let mut active: Option = None; + let mut daemon_reachable = false; match client.get(OLLAMA_PS_URL).timeout(timeout).send().await { Ok(resp) => { if !resp.status().is_success() { @@ -63,6 +64,7 @@ pub async fn model_catalog(timeout_ms: u64) -> Result { resp.status() ); } else { + daemon_reachable = true; match resp.json::().await { Ok(body) => { active = body["models"] @@ -90,6 +92,7 @@ pub async fn model_catalog(timeout_ms: u64) -> Result { resp.status() ); } else { + daemon_reachable = true; match resp.json::().await { Ok(body) => { models = body["models"] @@ -127,12 +130,6 @@ pub async fn model_catalog(timeout_ms: u64) -> Result { } } - // Cloud models are proxied through the local daemon, so if local Ollama - // is unreachable they aren't usable either — keep the original error. - if active.is_none() && models.is_empty() { - return Err("ollama unreachable: no active model and no pulled models".to_string()); - } - for cloud_name in cloud::list_cloud_models().await { if !models.iter().any(|m| m.name == cloud_name) { models.push(ModelInfo { @@ -142,6 +139,10 @@ pub async fn model_catalog(timeout_ms: u64) -> Result { } } + if !daemon_reachable && models.is_empty() { + return Err("ollama unreachable: no active model and no pulled models".to_string()); + } + Ok(ModelCatalog { active, models }) } @@ -196,16 +197,27 @@ pub async fn active_model() -> Result { .ok_or_else(|| "no models pulled in ollama".to_string()) } -/// Detect rate-limit / quota errors returned for cloud models. The local -/// daemon proxies the upstream HTTP status (typically 429) and may also embed -/// a textual hint in the response body. -pub fn is_rate_limit_error(err: &str) -> bool { +/// Detect cloud-side failures that warrant downgrading to a local model. +/// Covers explicit rate limits (429 / "rate limit" / "quota"), upstream +/// outages proxied as 5xx with the cloud's `ref: ` envelope, and the +/// "sign in / unauthorized" responses returned when the user hasn't run +/// `ollama signin`. Any of these mean the picked cloud model can't serve +/// this turn — the local fallback keeps the agent responsive. +pub fn is_cloud_unavailable_error(err: &str) -> bool { let lower = err.to_ascii_lowercase(); lower.contains("http 429") || lower.contains("rate limit") || lower.contains("rate-limit") || lower.contains("quota") || lower.contains("too many requests") + || lower.contains("http 500") + || lower.contains("http 502") + || lower.contains("http 503") + || lower.contains("http 504") + || lower.contains("internal server error") + || lower.contains("unauthorized") + || lower.contains("sign in") + || lower.contains("not signed in") } /// Outcome of a single chat call so the caller knows whether tools were included in the request.