From 780a94d3f8ef702192cdc5ef99b20122435decb1 Mon Sep 17 00:00:00 2001
From: MaximEdogawa <leo@librem.one>
Date: Sun, 19 Apr 2026 02:20:04 +0200
Subject: [PATCH 1/2] feat: enhance Ollama model handling with cloud support

- Updated the Ollama model types to include `OllamaModelInfo` and `OllamaModelKind`.
- Modified the DashboardPage to utilize the new model structure for better representation of available models.
- Implemented cloud model discovery and caching in the new `cloud` module, allowing for seamless integration of cloud models into the existing workflow.
- Added fallback logic for cloud model rate limits, ensuring a smooth user experience by automatically switching to the last used local model when necessary.
- Enhanced the AppState to track the last local model for improved error handling during cloud interactions.
---
 src-tauri/src/app.rs                        |   7 +
 src-tauri/src/infrastructure/http_server.rs |  42 ++++--
 src-tauri/src/modules/bot/agent.rs          |  69 ++++++++-
 src-tauri/src/modules/ollama/cloud.rs       | 147 ++++++++++++++++++++
 src-tauri/src/modules/ollama/mod.rs         |   1 +
 src-tauri/src/modules/ollama/service.rs     | 117 +++++++++++++++-
 src-tauri/src/shared/state.rs               |   5 +
 src/modules/ollama/index.ts                 |   2 +-
 src/modules/ollama/types.ts                 |   9 +-
 src/pages/DashboardPage.tsx                 |   7 +-
 10 files changed, 382 insertions(+), 24 deletions(-)
 create mode 100644 src-tauri/src/modules/ollama/cloud.rs
diff --git a/src-tauri/src/app.rs b/src-tauri/src/app.rs
index 584027c..bd55b9e 100644
--- a/src-tauri/src/app.rs
+++ b/src-tauri/src/app.rs
@@ -3,6 +3,7 @@ use crate::infrastructure::http_server;
 use crate::modules::bot::{commands, repository, service as bot_service};
 use crate::modules::cron::{repository as cron_repository, scheduler as cron_scheduler};
 use crate::modules::mcp::service as mcp_service;
+use crate::modules::ollama::cloud as ollama_cloud;
 use crate::modules::secure_store;
 use crate::shared::state::{AppState, ConnectionData};
 use std::path::PathBuf;
@@ -182,6 +183,12 @@ pub fn run() {
                 http_server::start_server(server_state).await;
             });
 
+            // Pre-warm the Ollama cloud catalog so the first dashboard refresh
+            // returns cloud entries without the user waiting on ollama.com.
+            tauri::async_runtime::spawn(async move {
+                let _ = ollama_cloud::list_cloud_models().await;
+            });
+
             Ok(())
         })
         .invoke_handler(tauri::generate_handler![
diff --git a/src-tauri/src/infrastructure/http_server.rs b/src-tauri/src/infrastructure/http_server.rs
index cb0e4ab..e1bcad4 100644
--- a/src-tauri/src/infrastructure/http_server.rs
+++ b/src-tauri/src/infrastructure/http_server.rs
@@ -87,12 +87,20 @@ pub struct PutMcpFilesystemBody {
     pub paths: Vec<String>,
 }
 
+#[derive(Serialize)]
+pub struct OllamaModelDto {
+    pub name: String,
+    /// `"local"` (default Ollama models) or `"cloud"` (Ollama Cloud — surfaced
+    /// after `ollama signin` with names like `gpt-oss:120b-cloud`).
+    pub kind: &'static str,
+}
+
 #[derive(Serialize)]
 pub struct OllamaModelsResponse {
     pub reachable: bool,
     pub active_model: Option<String>,
     pub selected_model: Option<String>,
-    pub models: Vec<String>,
+    pub models: Vec<OllamaModelDto>,
 }
 
 #[derive(Deserialize)]
@@ -372,7 +380,14 @@ async fn handle_ollama_models(State(state): State<AppState>) -> Json<OllamaModel
             reachable: true,
             active_model: catalog.active,
             selected_model,
-            models: catalog.models,
+            models: catalog
+                .models
+                .into_iter()
+                .map(|m| OllamaModelDto {
+                    name: m.name,
+                    kind: m.kind.as_str(),
+                })
+                .collect(),
         }),
         Err(_) => Json(OllamaModelsResponse {
             reachable: false,
@@ -436,17 +451,21 @@ async fn handle_ollama_model_put(
         .map(|m| m.trim().to_string())
         .filter(|m| !m.is_empty());
 
+    let mut selected_kind: Option<ollama_service::ModelKind> = None;
     if let Some(ref model) = normalized {
         let catalog = ollama_service::model_catalog(3000)
             .await
             .map_err(|e| (StatusCode::BAD_GATEWAY, Json(ErrorResponse { error: e })))?;
-        if !catalog.models.iter().any(|m| m == model) {
-            return Err((
-                StatusCode::BAD_REQUEST,
-                Json(ErrorResponse {
-                    error: format!("model '{model}' is not available in Ollama"),
-                }),
-            ));
+        match catalog.models.iter().find(|m| &m.name == model) {
+            Some(m) => selected_kind = Some(m.kind),
+            None => {
+                return Err((
+                    StatusCode::BAD_REQUEST,
+                    Json(ErrorResponse {
+                        error: format!("model '{model}' is not available in Ollama"),
+                    }),
+                ));
+            }
         }
     }
 
@@ -455,6 +474,11 @@ async fn handle_ollama_model_put(
         *lock = normalized.clone();
     }
 
+    if let (Some(name), Some(ollama_service::ModelKind::Local)) = (&normalized, selected_kind) {
+        let mut last = state.last_local_model.write().await;
+        *last = Some(name.clone());
+    }
+
     state
         .emit_log(
             "run",
diff --git a/src-tauri/src/modules/bot/agent.rs b/src-tauri/src/modules/bot/agent.rs
index c1ea4b8..1f1894d 100644
--- a/src-tauri/src/modules/bot/agent.rs
+++ b/src-tauri/src/modules/bot/agent.rs
@@ -87,6 +87,59 @@ fn chat_options_for_agent_step(
 /// not truncated before sending to the user.
 const TOOL_OUTPUT_CHAR_CAP: usize = 4000;
 
+/// Run a chat call; if the request goes to a cloud model and the daemon
+/// returns a rate-limit error, downgrade to the user's last local model and
+/// retry once. The downgraded model is also written back to
+/// `preferred_ollama_model` so the rest of the turn (and future turns) stay
+/// local until the user picks again.
+async fn chat_with_cloud_fallback(
+    state: &AppState,
+    model: &mut String,
+    messages: &serde_json::Value,
+    tools: &serde_json::Value,
+    options: &ChatOptions,
+) -> Result<ollama::ChatResult, String> {
+    match ollama::chat_with_tools(model, messages, tools, options).await {
+        Ok(r) => Ok(r),
+        Err(err) => {
+            if ollama::classify_model(model) != ollama::ModelKind::Cloud
+                || !ollama::is_rate_limit_error(&err)
+            {
+                return Err(err);
+            }
+            let last_local = state.last_local_model.read().await.clone();
+            let catalog = ollama::model_catalog(3000).await.ok();
+            let fallback = catalog
+                .as_ref()
+                .and_then(|c| ollama::pick_local_fallback(c, None, last_local.as_deref()));
+            let Some(local) = fallback else {
+                state
+                    .emit_log(
+                        "ollama",
+                        &format!("cloud limit on '{model}', no local model available"),
+                    )
+                    .await;
+                return Err(err);
+            };
+            if local == *model {
+                return Err(err);
+            }
+            state
+                .emit_log(
+                    "ollama",
+                    &format!("cloud limit on '{model}' — switching to local '{local}'"),
+                )
+                .await;
+            {
+                let mut pref = state.preferred_ollama_model.write().await;
+                *pref = Some(local.clone());
+            }
+            *model = local;
+            ollama::chat_with_tools(model, messages, tools, options).await
+        }
+    }
+}
+
 fn tool_name_is_fetch(name: &str) -> bool {
     name.eq_ignore_ascii_case("fetch")
         || name
@@ -645,7 +698,7 @@ async fn run_model_turn(
     think: bool,
     skills_slug_filter: Option<&[String]>,
 ) -> Result<TurnResult, String> {
-    let model = match state.preferred_ollama_model.read().await.clone() {
+    let mut model = match state.preferred_ollama_model.read().await.clone() {
         Some(m) => m,
         None => ollama::active_model().await?,
     };
@@ -745,7 +798,9 @@ async fn run_model_turn(
             push_ephemeral_post_tool_reminder(&mut messages);
         }
 
-        let result = ollama::chat_with_tools(&model, &messages, effective_tools, &chat_opts).await;
+        let result =
+            chat_with_cloud_fallback(state, &mut model, &messages, effective_tools, &chat_opts)
+                .await;
         if inject_post_tool {
             pop_ephemeral_post_tool_reminder(&mut messages);
         }
@@ -991,8 +1046,14 @@ async fn run_model_turn(
             ..ChatOptions::default()
         };
         let t0 = Instant::now();
-        let result =
-            ollama::chat_with_tools(&model, &summary_messages, &json!([]), &summary_opts).await?;
+        let result = chat_with_cloud_fallback(
+            state,
+            &mut model,
+            &summary_messages,
+            &json!([]),
+            &summary_opts,
+        )
+        .await?;
         let tokens = fmt_tokens(result.prompt_tokens, result.eval_tokens);
         state
             .emit_log(
diff --git a/src-tauri/src/modules/ollama/cloud.rs b/src-tauri/src/modules/ollama/cloud.rs
new file mode 100644
index 0000000..fe0f7ea
--- /dev/null
+++ b/src-tauri/src/modules/ollama/cloud.rs
@@ -0,0 +1,147 @@
+//! Cloud model discovery from ollama.com.
+//!
+//! The local Ollama daemon's `/api/tags` only lists models that have been
+//! pulled to disk. Cloud models (accessible after `ollama signin`) live in
+//! the upstream catalog at `https://ollama.com/library/<slug>` and use
+//! `:cloud` or `<size>-cloud` tags. This module enumerates them by scraping
+//! the cloud category page and each model's detail page, then caches the
+//! result so the dashboard picker can show them without re-fetching every
+//! few seconds.
+
+use regex::Regex;
+use std::sync::OnceLock;
+use std::time::{Duration, Instant};
+use tokio::sync::Mutex;
+
+const CLOUD_SEARCH_URL: &str = "https://ollama.com/search?c=cloud";
+const CLOUD_LIBRARY_PREFIX: &str = "https://ollama.com/library/";
+/// Stale cloud catalog is fine — Ollama publishes new cloud tags rarely. One
+/// hour keeps the dashboard responsive and avoids hammering ollama.com.
+const CACHE_TTL: Duration = Duration::from_secs(60 * 60);
+const SEARCH_TIMEOUT: Duration = Duration::from_secs(5);
+const DETAIL_TIMEOUT: Duration = Duration::from_secs(4);
+
+struct CacheEntry {
+    fetched_at: Instant,
+    models: Vec<String>,
+}
+
+static CACHE: OnceLock<Mutex<Option<CacheEntry>>> = OnceLock::new();
+static SLUG_RE: OnceLock<Regex> = OnceLock::new();
+static RUN_RE: OnceLock<Regex> = OnceLock::new();
+
+fn cache() -> &'static Mutex<Option<CacheEntry>> {
+    CACHE.get_or_init(|| Mutex::new(None))
+}
+
+fn slug_re() -> &'static Regex {
+    SLUG_RE.get_or_init(|| Regex::new(r#"href="/library/([a-z0-9._-]+)""#).unwrap())
+}
+
+fn run_re() -> &'static Regex {
+    RUN_RE.get_or_init(|| {
+        Regex::new(r#"ollama\s+(?:run|pull)\s+([a-z0-9._-]+(?::[a-z0-9._-]+)?)"#).unwrap()
+    })
+}
+
+/// Returns cloud-tagged model names (e.g. `glm-4.6:cloud`,
+/// `qwen3-coder:480b-cloud`). Falls back to a stale cache, then to an empty
+/// list, when the upstream catalog is unreachable.
+pub async fn list_cloud_models() -> Vec<String> {
+    {
+        let guard = cache().lock().await;
+        if let Some(ref entry) = *guard {
+            if entry.fetched_at.elapsed() < CACHE_TTL {
+                return entry.models.clone();
+            }
+        }
+    }
+    match fetch_cloud_models().await {
+        Ok(models) => {
+            let mut guard = cache().lock().await;
+            *guard = Some(CacheEntry {
+                fetched_at: Instant::now(),
+                models: models.clone(),
+            });
+            models
+        }
+        Err(e) => {
+            log::warn!("ollama cloud catalog fetch failed: {e}");
+            cache()
+                .lock()
+                .await
+                .as_ref()
+                .map(|c| c.models.clone())
+                .unwrap_or_default()
+        }
+    }
+}
+
+async fn fetch_cloud_models() -> Result<Vec<String>, String> {
+    let client = reqwest::Client::builder()
+        .timeout(DETAIL_TIMEOUT)
+        .user_agent("pengine/1.0")
+        .build()
+        .map_err(|e| e.to_string())?;
+    let body = client
+        .get(CLOUD_SEARCH_URL)
+        .timeout(SEARCH_TIMEOUT)
+        .send()
+        .await
+        .map_err(|e| e.to_string())?
+        .text()
+        .await
+        .map_err(|e| e.to_string())?;
+    let mut slugs: Vec<String> = slug_re()
+        .captures_iter(&body)
+        .map(|c| c[1].to_string())
+        .collect();
+    slugs.sort();
+    slugs.dedup();
+    if slugs.is_empty() {
+        return Err("no cloud slugs found in /search?c=cloud".to_string());
+    }
+
+    let mut tasks = Vec::with_capacity(slugs.len());
+    for slug in slugs {
+        let client = client.clone();
+        tasks.push(tokio::spawn(async move {
+            cloud_models_for_slug(&client, &slug).await
+        }));
+    }
+    let mut out: Vec<String> = Vec::new();
+    for t in tasks {
+        if let Ok(Ok(names)) = t.await {
+            out.extend(names);
+        }
+    }
+    out.sort();
+    out.dedup();
+    Ok(out)
+}
+
+async fn cloud_models_for_slug(
+    client: &reqwest::Client,
+    slug: &str,
+) -> Result<Vec<String>, String> {
+    let url = format!("{CLOUD_LIBRARY_PREFIX}{slug}");
+    let body = client
+        .get(&url)
+        .send()
+        .await
+        .map_err(|e| e.to_string())?
+        .text()
+        .await
+        .map_err(|e| e.to_string())?;
+    let mut out = Vec::new();
+    for cap in run_re().captures_iter(&body) {
+        let name = &cap[1];
+        let tag = name.split_once(':').map(|(_, t)| t).unwrap_or("");
+        if tag == "cloud" || tag.ends_with("-cloud") {
+            out.push(name.to_string());
+        }
+    }
+    out.sort();
+    out.dedup();
+    Ok(out)
+}
diff --git a/src-tauri/src/modules/ollama/mod.rs b/src-tauri/src/modules/ollama/mod.rs
index 48dbcb1..0276eba 100644
--- a/src-tauri/src/modules/ollama/mod.rs
+++ b/src-tauri/src/modules/ollama/mod.rs
@@ -1,3 +1,4 @@
+pub mod cloud;
 pub mod constants;
 pub mod keywords;
 pub mod service;
diff --git a/src-tauri/src/modules/ollama/service.rs b/src-tauri/src/modules/ollama/service.rs
index 09b782e..b7e62f4 100644
--- a/src-tauri/src/modules/ollama/service.rs
+++ b/src-tauri/src/modules/ollama/service.rs
@@ -1,3 +1,4 @@
+use crate::modules::ollama::cloud;
 use crate::modules::ollama::constants::{OLLAMA_CHAT_URL, OLLAMA_PS_URL, OLLAMA_TAGS_URL};
 use crate::shared::text::normalize_assistant_message_content;
 use std::sync::OnceLock;
@@ -8,10 +9,43 @@ fn http_client() -> &'static reqwest::Client {
     HTTP.get_or_init(reqwest::Client::new)
 }
 
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum ModelKind {
+    Local,
+    Cloud,
+}
+
+impl ModelKind {
+    pub fn as_str(self) -> &'static str {
+        match self {
+            ModelKind::Local => "local",
+            ModelKind::Cloud => "cloud",
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct ModelInfo {
+    pub name: String,
+    pub kind: ModelKind,
+}
+
 #[derive(Debug, Clone)]
 pub struct ModelCatalog {
     pub active: Option<String>,
-    pub models: Vec<String>,
+    pub models: Vec<ModelInfo>,
+}
+
+/// Cloud models are surfaced by the local Ollama daemon after `ollama signin`
+/// and are tagged with `-cloud` (e.g. `gpt-oss:120b-cloud`) or the bare tag
+/// `cloud`. Tag is the part after the first `:` (defaulting to `latest`).
+pub fn classify_model(name: &str) -> ModelKind {
+    let tag = name.split_once(':').map(|(_, t)| t).unwrap_or("");
+    if tag == "cloud" || tag.ends_with("-cloud") {
+        ModelKind::Cloud
+    } else {
+        ModelKind::Local
+    }
 }
 
 /// Returns active model and the full pulled model list (`/api/tags`).
@@ -46,7 +80,7 @@ pub async fn model_catalog(timeout_ms: u64) -> Result<ModelCatalog, String> {
         Err(e) => log::warn!("ollama {}: request error: {e}", OLLAMA_PS_URL),
     }
 
-    let mut models: Vec<String> = Vec::new();
+    let mut models: Vec<ModelInfo> = Vec::new();
     match client.get(OLLAMA_TAGS_URL).timeout(timeout).send().await {
         Ok(resp) => {
             if !resp.status().is_success() {
@@ -62,7 +96,12 @@ pub async fn model_catalog(timeout_ms: u64) -> Result<ModelCatalog, String> {
                             .as_array()
                             .map(|arr| {
                                 arr.iter()
-                                    .filter_map(|m| m["name"].as_str().map(|s| s.to_string()))
+                                    .filter_map(|m| {
+                                        m["name"].as_str().map(|s| ModelInfo {
+                                            name: s.to_string(),
+                                            kind: classify_model(s),
+                                        })
+                                    })
                                     .collect()
                             })
                             .unwrap_or_default();
@@ -77,18 +116,72 @@ pub async fn model_catalog(timeout_ms: u64) -> Result<ModelCatalog, String> {
     }
 
     if let Some(ref a) = active {
-        if !models.iter().any(|m| m == a) {
-            models.insert(0, a.clone());
+        if !models.iter().any(|m| &m.name == a) {
+            models.insert(
+                0,
+                ModelInfo {
+                    name: a.clone(),
+                    kind: classify_model(a),
+                },
+            );
         }
     }
 
+    // Cloud models are proxied through the local daemon, so if local Ollama
+    // is unreachable they aren't usable either — keep the original error.
     if active.is_none() && models.is_empty() {
         return Err("ollama unreachable: no active model and no pulled models".to_string());
     }
 
+    for cloud_name in cloud::list_cloud_models().await {
+        if !models.iter().any(|m| m.name == cloud_name) {
+            models.push(ModelInfo {
+                name: cloud_name,
+                kind: ModelKind::Cloud,
+            });
+        }
+    }
+
     Ok(ModelCatalog { active, models })
 }
 
+/// Best-guess local fallback model when a cloud rate-limit forces a downgrade.
+/// Prefers `preferred` if local, then `last_local`, then the active model if
+/// local, then the first local entry in the catalog.
+pub fn pick_local_fallback(
+    catalog: &ModelCatalog,
+    preferred: Option<&str>,
+    last_local: Option<&str>,
+) -> Option<String> {
+    let local_named = |name: &str| {
+        catalog
+            .models
+            .iter()
+            .find(|m| m.name == name && m.kind == ModelKind::Local)
+            .map(|m| m.name.clone())
+    };
+    if let Some(p) = preferred {
+        if let Some(m) = local_named(p) {
+            return Some(m);
+        }
+    }
+    if let Some(p) = last_local {
+        if let Some(m) = local_named(p) {
+            return Some(m);
+        }
+    }
+    if let Some(active) = catalog.active.as_deref() {
+        if let Some(m) = local_named(active) {
+            return Some(m);
+        }
+    }
+    catalog
+        .models
+        .iter()
+        .find(|m| m.kind == ModelKind::Local)
+        .map(|m| m.name.clone())
+}
+
 /// Returns the currently loaded model (from `/api/ps`), falling back to the
 /// first pulled model (from `/api/tags`) if nothing is loaded yet.
 pub async fn active_model() -> Result<String, String> {
@@ -99,10 +192,22 @@ pub async fn active_model() -> Result<String, String> {
     catalog
         .models
         .first()
-        .cloned()
+        .map(|m| m.name.clone())
         .ok_or_else(|| "no models pulled in ollama".to_string())
 }
 
+/// Detect rate-limit / quota errors returned for cloud models. The local
+/// daemon proxies the upstream HTTP status (typically 429) and may also embed
+/// a textual hint in the response body.
+pub fn is_rate_limit_error(err: &str) -> bool {
+    let lower = err.to_ascii_lowercase();
+    lower.contains("http 429")
+        || lower.contains("rate limit")
+        || lower.contains("rate-limit")
+        || lower.contains("quota")
+        || lower.contains("too many requests")
+}
+
 /// Outcome of a single chat call so the caller knows whether tools were included in the request.
 pub struct ChatResult {
     pub message: serde_json::Value,
diff --git a/src-tauri/src/shared/state.rs b/src-tauri/src/shared/state.rs
index 657ab9d..a092865 100644
--- a/src-tauri/src/shared/state.rs
+++ b/src-tauri/src/shared/state.rs
@@ -82,6 +82,10 @@ pub struct AppState {
     /// Ensures only one MCP registry rebuild (stdio connects) runs at a time.
     pub mcp_rebuild_mutex: Arc<Mutex<()>>,
     pub preferred_ollama_model: Arc<RwLock<Option<String>>>,
+    /// Last user-selected **local** Ollama model. Used as the automatic
+    /// downgrade target when a cloud model returns a rate-limit error so the
+    /// agent can keep replying without the user having to repick.
+    pub last_local_model: Arc<RwLock<Option<String>>>,
     pub cached_filesystem_paths: Arc<RwLock<Vec<String>>>,
     pub tool_engine_mutex: Arc<Mutex<()>>,
     /// Active memory-session recording (toggled by keyword commands; see `bot::agent`).
@@ -136,6 +140,7 @@ impl AppState {
             mcp_config_mutex: Arc::new(Mutex::new(())),
             mcp_rebuild_mutex: Arc::new(Mutex::new(())),
             preferred_ollama_model: Arc::new(RwLock::new(None)),
+            last_local_model: Arc::new(RwLock::new(None)),
             cached_filesystem_paths: Arc::new(RwLock::new(Vec::new())),
             tool_engine_mutex: Arc::new(Mutex::new(())),
             memory_session: Arc::new(RwLock::new(None)),
diff --git a/src/modules/ollama/index.ts b/src/modules/ollama/index.ts
index 1af34a0..bc39531 100644
--- a/src/modules/ollama/index.ts
+++ b/src/modules/ollama/index.ts
@@ -1,2 +1,2 @@
 export { fetchOllamaModel, fetchOllamaModels, setPreferredOllamaModel } from "./api";
-export type { OllamaModelsResponse, OllamaProbe } from "./types";
+export type { OllamaModelInfo, OllamaModelKind, OllamaModelsResponse, OllamaProbe } from "./types";
diff --git a/src/modules/ollama/types.ts b/src/modules/ollama/types.ts
index ccf2986..63302b5 100644
--- a/src/modules/ollama/types.ts
+++ b/src/modules/ollama/types.ts
@@ -1,8 +1,15 @@
 export type OllamaProbe = { reachable: boolean; model: string | null };
 
+export type OllamaModelKind = "local" | "cloud";
+
+export type OllamaModelInfo = {
+  name: string;
+  kind: OllamaModelKind;
+};
+
 export type OllamaModelsResponse = {
   reachable: boolean;
   active_model: string | null;
   selected_model: string | null;
-  models: string[];
+  models: OllamaModelInfo[];
 };
diff --git a/src/pages/DashboardPage.tsx b/src/pages/DashboardPage.tsx
index ba9ffcc..3b0b95e 100644
--- a/src/pages/DashboardPage.tsx
+++ b/src/pages/DashboardPage.tsx
@@ -5,6 +5,7 @@ import { useAppSessionStore } from "../modules/bot/store/appSessionStore";
 import { CronPanel } from "../modules/cron";
 import { McpToolsPanel } from "../modules/mcp/components/McpToolsPanel";
 import { fetchOllamaModels, setPreferredOllamaModel } from "../modules/ollama/api";
+import type { OllamaModelInfo } from "../modules/ollama/types";
 import { SkillsPanel } from "../modules/skills";
 import { ToolEnginePanel } from "../modules/toolengine/components/ToolEnginePanel";
 import { UpdateIndicator } from "../modules/updater";
@@ -21,7 +22,7 @@ export function DashboardPage() {
   const isDeviceConnected = useAppSessionStore((state) => state.isDeviceConnected);
   const disconnectDevice = useAppSessionStore((state) => state.disconnectDevice);
   const botUsername = useAppSessionStore((state) => state.botUsername);
-  const [availableModels, setAvailableModels] = useState<string[]>([]);
+  const [availableModels, setAvailableModels] = useState<OllamaModelInfo[]>([]);
   const [selectedModel, setSelectedModel] = useState<string | null>(null);
   const [activeModel, setActiveModel] = useState<string | null>(null);
   const [savingModel, setSavingModel] = useState(false);
@@ -182,8 +183,8 @@ export function DashboardPage() {
                   {activeModel ? `Active (${activeModel})` : "Active model"}
                 </option>
                 {availableModels.map((model) => (
-                  <option key={model} value={model}>
-                    {model}
+                  <option key={model.name} value={model.name}>
+                    {model.kind === "cloud" ? `${model.name} · cloud` : model.name}
                   </option>
                 ))}
               </select>

From c8126dff99ea570f06ba46e37e2bc3a531a667e0 Mon Sep 17 00:00:00 2001
From: MaximEdogawa <leo@librem.one>
Date: Sun, 19 Apr 2026 03:01:18 +0200
Subject: [PATCH 2/2] update: fix review changes

---
 src-tauri/src/modules/bot/agent.rs      |  6 ++---
 src-tauri/src/modules/ollama/cloud.rs   | 32 +++++++++++++++----------
 src-tauri/src/modules/ollama/service.rs | 32 +++++++++++++++++--------
 3 files changed, 44 insertions(+), 26 deletions(-)

diff --git a/src-tauri/src/modules/bot/agent.rs b/src-tauri/src/modules/bot/agent.rs
index 1f1894d..9a41060 100644
--- a/src-tauri/src/modules/bot/agent.rs
+++ b/src-tauri/src/modules/bot/agent.rs
@@ -103,7 +103,7 @@ async fn chat_with_cloud_fallback(
         Ok(r) => Ok(r),
         Err(err) => {
             if ollama::classify_model(model) != ollama::ModelKind::Cloud
-                || !ollama::is_rate_limit_error(&err)
+                || !ollama::is_cloud_unavailable_error(&err)
             {
                 return Err(err);
             }
@@ -116,7 +116,7 @@ async fn chat_with_cloud_fallback(
                 state
                     .emit_log(
                         "ollama",
-                        &format!("cloud limit on '{model}', no local model available"),
+                        &format!("cloud '{model}' unavailable ({err}); no local fallback"),
                     )
                     .await;
                 return Err(err);
@@ -127,7 +127,7 @@ async fn chat_with_cloud_fallback(
             state
                 .emit_log(
                     "ollama",
-                    &format!("cloud limit on '{model}' — switching to local '{local}'"),
+                    &format!("cloud '{model}' unavailable, switching to local '{local}': {err}"),
                 )
                 .await;
             {
diff --git a/src-tauri/src/modules/ollama/cloud.rs b/src-tauri/src/modules/ollama/cloud.rs
index fe0f7ea..958365f 100644
--- a/src-tauri/src/modules/ollama/cloud.rs
+++ b/src-tauri/src/modules/ollama/cloud.rs
@@ -8,6 +8,7 @@
 //! result so the dashboard picker can show them without re-fetching every
 //! few seconds.
 
+use futures::stream::{self, StreamExt};
 use regex::Regex;
 use std::sync::OnceLock;
 use std::time::{Duration, Instant};
@@ -20,6 +21,8 @@ const CLOUD_LIBRARY_PREFIX: &str = "https://ollama.com/library/";
 const CACHE_TTL: Duration = Duration::from_secs(60 * 60);
 const SEARCH_TIMEOUT: Duration = Duration::from_secs(5);
 const DETAIL_TIMEOUT: Duration = Duration::from_secs(4);
+/// Cap parallel detail-page fetches to avoid bursting ollama.com.
+const CLOUD_DETAIL_CONCURRENCY: usize = 8;
 
 struct CacheEntry {
     fetched_at: Instant,
@@ -102,21 +105,24 @@ async fn fetch_cloud_models() -> Result<Vec<String>, String> {
         return Err("no cloud slugs found in /search?c=cloud".to_string());
     }
 
-    let mut tasks = Vec::with_capacity(slugs.len());
-    for slug in slugs {
-        let client = client.clone();
-        tasks.push(tokio::spawn(async move {
-            cloud_models_for_slug(&client, &slug).await
-        }));
-    }
-    let mut out: Vec<String> = Vec::new();
-    for t in tasks {
-        if let Ok(Ok(names)) = t.await {
-            out.extend(names);
-        }
-    }
+    let results: Vec<Result<Vec<String>, String>> = stream::iter(slugs)
+        .map(|slug| {
+            let client = client.clone();
+            async move { cloud_models_for_slug(&client, &slug).await }
+        })
+        .buffer_unordered(CLOUD_DETAIL_CONCURRENCY)
+        .collect()
+        .await;
+    let mut out: Vec<String> = results
+        .into_iter()
+        .filter_map(Result::ok)
+        .flatten()
+        .collect();
     out.sort();
     out.dedup();
+    if out.is_empty() {
+        return Err("cloud catalog detail scrape returned no model names".to_string());
+    }
     Ok(out)
 }
 
diff --git a/src-tauri/src/modules/ollama/service.rs b/src-tauri/src/modules/ollama/service.rs
index b7e62f4..3306bf5 100644
--- a/src-tauri/src/modules/ollama/service.rs
+++ b/src-tauri/src/modules/ollama/service.rs
@@ -54,6 +54,7 @@ pub async fn model_catalog(timeout_ms: u64) -> Result<ModelCatalog, String> {
     let timeout = std::time::Duration::from_millis(timeout_ms);
 
     let mut active: Option<String> = None;
+    let mut daemon_reachable = false;
     match client.get(OLLAMA_PS_URL).timeout(timeout).send().await {
         Ok(resp) => {
             if !resp.status().is_success() {
@@ -63,6 +64,7 @@ pub async fn model_catalog(timeout_ms: u64) -> Result<ModelCatalog, String> {
                     resp.status()
                 );
             } else {
+                daemon_reachable = true;
                 match resp.json::<serde_json::Value>().await {
                     Ok(body) => {
                         active = body["models"]
@@ -90,6 +92,7 @@ pub async fn model_catalog(timeout_ms: u64) -> Result<ModelCatalog, String> {
                     resp.status()
                 );
             } else {
+                daemon_reachable = true;
                 match resp.json::<serde_json::Value>().await {
                     Ok(body) => {
                         models = body["models"]
@@ -127,12 +130,6 @@ pub async fn model_catalog(timeout_ms: u64) -> Result<ModelCatalog, String> {
         }
     }
 
-    // Cloud models are proxied through the local daemon, so if local Ollama
-    // is unreachable they aren't usable either — keep the original error.
-    if active.is_none() && models.is_empty() {
-        return Err("ollama unreachable: no active model and no pulled models".to_string());
-    }
-
     for cloud_name in cloud::list_cloud_models().await {
         if !models.iter().any(|m| m.name == cloud_name) {
             models.push(ModelInfo {
@@ -142,6 +139,10 @@ pub async fn model_catalog(timeout_ms: u64) -> Result<ModelCatalog, String> {
         }
     }
 
+    if !daemon_reachable && models.is_empty() {
+        return Err("ollama unreachable: no active model and no pulled models".to_string());
+    }
+
     Ok(ModelCatalog { active, models })
 }
 
@@ -196,16 +197,27 @@ pub async fn active_model() -> Result<String, String> {
         .ok_or_else(|| "no models pulled in ollama".to_string())
 }
 
-/// Detect rate-limit / quota errors returned for cloud models. The local
-/// daemon proxies the upstream HTTP status (typically 429) and may also embed
-/// a textual hint in the response body.
-pub fn is_rate_limit_error(err: &str) -> bool {
+/// Detect cloud-side failures that warrant downgrading to a local model.
+/// Covers explicit rate limits (429 / "rate limit" / "quota"), upstream
+/// outages proxied as 5xx with the cloud's `ref: <uuid>` envelope, and the
+/// "sign in / unauthorized" responses returned when the user hasn't run
+/// `ollama signin`. Any of these mean the picked cloud model can't serve
+/// this turn — the local fallback keeps the agent responsive.
+pub fn is_cloud_unavailable_error(err: &str) -> bool {
     let lower = err.to_ascii_lowercase();
     lower.contains("http 429")
         || lower.contains("rate limit")
         || lower.contains("rate-limit")
         || lower.contains("quota")
         || lower.contains("too many requests")
+        || lower.contains("http 500")
+        || lower.contains("http 502")
+        || lower.contains("http 503")
+        || lower.contains("http 504")
+        || lower.contains("internal server error")
+        || lower.contains("unauthorized")
+        || lower.contains("sign in")
+        || lower.contains("not signed in")
 }
 
 /// Outcome of a single chat call so the caller knows whether tools were included in the request.