pengine-ai · maximedogawa · Apr 19, 2026 · Apr 19, 2026 · Apr 19, 2026
diff --git a/src-tauri/src/app.rs b/src-tauri/src/app.rs
@@ -3,6 +3,7 @@ use crate::infrastructure::http_server;
 use crate::modules::bot::{commands, repository, service as bot_service};
 use crate::modules::cron::{repository as cron_repository, scheduler as cron_scheduler};
 use crate::modules::mcp::service as mcp_service;
+use crate::modules::ollama::cloud as ollama_cloud;
 use crate::modules::secure_store;
 use crate::shared::state::{AppState, ConnectionData};
 use std::path::PathBuf;
@@ -182,6 +183,12 @@ pub fn run() {
                 http_server::start_server(server_state).await;
             });
 
+            // Pre-warm the Ollama cloud catalog so the first dashboard refresh
+            // returns cloud entries without the user waiting on ollama.com.
+            tauri::async_runtime::spawn(async move {
+                let _ = ollama_cloud::list_cloud_models().await;
+            });
+
             Ok(())
         })
         .invoke_handler(tauri::generate_handler![

diff --git a/src-tauri/src/infrastructure/http_server.rs b/src-tauri/src/infrastructure/http_server.rs
@@ -87,12 +87,20 @@ pub struct PutMcpFilesystemBody {
     pub paths: Vec<String>,
 }
 
+#[derive(Serialize)]
+pub struct OllamaModelDto {
+    pub name: String,
+    /// `"local"` (default Ollama models) or `"cloud"` (Ollama Cloud — surfaced
+    /// after `ollama signin` with names like `gpt-oss:120b-cloud`).
+    pub kind: &'static str,
+}
+
 #[derive(Serialize)]
 pub struct OllamaModelsResponse {
     pub reachable: bool,
     pub active_model: Option<String>,
     pub selected_model: Option<String>,
-    pub models: Vec<String>,
+    pub models: Vec<OllamaModelDto>,
 }
 
 #[derive(Deserialize)]
@@ -372,7 +380,14 @@ async fn handle_ollama_models(State(state): State<AppState>) -> Json<OllamaModel
             reachable: true,
             active_model: catalog.active,
             selected_model,
-            models: catalog.models,
+            models: catalog
+                .models
+                .into_iter()
+                .map(|m| OllamaModelDto {
+                    name: m.name,
+                    kind: m.kind.as_str(),
+                })
+                .collect(),
         }),
         Err(_) => Json(OllamaModelsResponse {
             reachable: false,
@@ -436,17 +451,21 @@ async fn handle_ollama_model_put(
         .map(|m| m.trim().to_string())
         .filter(|m| !m.is_empty());
 
+    let mut selected_kind: Option<ollama_service::ModelKind> = None;
     if let Some(ref model) = normalized {
         let catalog = ollama_service::model_catalog(3000)
             .await
             .map_err(|e| (StatusCode::BAD_GATEWAY, Json(ErrorResponse { error: e })))?;
-        if !catalog.models.iter().any(|m| m == model) {
-            return Err((
-                StatusCode::BAD_REQUEST,
-                Json(ErrorResponse {
-                    error: format!("model '{model}' is not available in Ollama"),
-                }),
-            ));
+        match catalog.models.iter().find(|m| &m.name == model) {
+            Some(m) => selected_kind = Some(m.kind),
+            None => {
+                return Err((
+                    StatusCode::BAD_REQUEST,
+                    Json(ErrorResponse {
+                        error: format!("model '{model}' is not available in Ollama"),
+                    }),
+                ));
+            }
         }
     }
 
@@ -455,6 +474,11 @@ async fn handle_ollama_model_put(
         *lock = normalized.clone();
     }
 
+    if let (Some(name), Some(ollama_service::ModelKind::Local)) = (&normalized, selected_kind) {
+        let mut last = state.last_local_model.write().await;
+        *last = Some(name.clone());
+    }
+
     state
         .emit_log(
             "run",

diff --git a/src-tauri/src/modules/bot/agent.rs b/src-tauri/src/modules/bot/agent.rs
@@ -87,6 +87,59 @@ fn chat_options_for_agent_step(
 /// not truncated before sending to the user.
 const TOOL_OUTPUT_CHAR_CAP: usize = 4000;
 
+/// Run a chat call; if the request goes to a cloud model and the daemon
+/// returns a rate-limit error, downgrade to the user's last local model and
+/// retry once. The downgraded model is also written back to
+/// `preferred_ollama_model` so the rest of the turn (and future turns) stay
+/// local until the user picks again.
+async fn chat_with_cloud_fallback(
+    state: &AppState,
+    model: &mut String,
+    messages: &serde_json::Value,
+    tools: &serde_json::Value,
+    options: &ChatOptions,
+) -> Result<ollama::ChatResult, String> {
+    match ollama::chat_with_tools(model, messages, tools, options).await {
+        Ok(r) => Ok(r),
+        Err(err) => {
+            if ollama::classify_model(model) != ollama::ModelKind::Cloud
+                || !ollama::is_cloud_unavailable_error(&err)
+            {
+                return Err(err);
+            }
+            let last_local = state.last_local_model.read().await.clone();
+            let catalog = ollama::model_catalog(3000).await.ok();
+            let fallback = catalog
+                .as_ref()
+                .and_then(|c| ollama::pick_local_fallback(c, None, last_local.as_deref()));
+            let Some(local) = fallback else {
+                state
+                    .emit_log(
+                        "ollama",
+                        &format!("cloud '{model}' unavailable ({err}); no local fallback"),
+                    )
+                    .await;
+                return Err(err);
+            };
+            if local == *model {
+                return Err(err);
+            }
+            state
+                .emit_log(
+                    "ollama",
+                    &format!("cloud '{model}' unavailable, switching to local '{local}': {err}"),
+                )
+                .await;
+            {
+                let mut pref = state.preferred_ollama_model.write().await;
+                *pref = Some(local.clone());
+            }
+            *model = local;
+            ollama::chat_with_tools(model, messages, tools, options).await
+        }
+    }
+}
+
 fn tool_name_is_fetch(name: &str) -> bool {
     name.eq_ignore_ascii_case("fetch")
         || name
@@ -645,7 +698,7 @@ async fn run_model_turn(
     think: bool,
     skills_slug_filter: Option<&[String]>,
 ) -> Result<TurnResult, String> {
-    let model = match state.preferred_ollama_model.read().await.clone() {
+    let mut model = match state.preferred_ollama_model.read().await.clone() {
         Some(m) => m,
         None => ollama::active_model().await?,
     };
@@ -745,7 +798,9 @@ async fn run_model_turn(
             push_ephemeral_post_tool_reminder(&mut messages);
         }
 
-        let result = ollama::chat_with_tools(&model, &messages, effective_tools, &chat_opts).await;
+        let result =
+            chat_with_cloud_fallback(state, &mut model, &messages, effective_tools, &chat_opts)
+                .await;
         if inject_post_tool {
             pop_ephemeral_post_tool_reminder(&mut messages);
         }
@@ -991,8 +1046,14 @@ async fn run_model_turn(
             ..ChatOptions::default()
         };
         let t0 = Instant::now();
-        let result =
-            ollama::chat_with_tools(&model, &summary_messages, &json!([]), &summary_opts).await?;
+        let result = chat_with_cloud_fallback(
+            state,
+            &mut model,
+            &summary_messages,
+            &json!([]),
+            &summary_opts,
+        )
+        .await?;
         let tokens = fmt_tokens(result.prompt_tokens, result.eval_tokens);
         state
             .emit_log(

diff --git a/src-tauri/src/modules/ollama/cloud.rs b/src-tauri/src/modules/ollama/cloud.rs
@@ -0,0 +1,153 @@
+//! Cloud model discovery from ollama.com.
+//!
+//! The local Ollama daemon's `/api/tags` only lists models that have been
+//! pulled to disk. Cloud models (accessible after `ollama signin`) live in
+//! the upstream catalog at `https://ollama.com/library/<slug>` and use
+//! `:cloud` or `<size>-cloud` tags. This module enumerates them by scraping
+//! the cloud category page and each model's detail page, then caches the
+//! result so the dashboard picker can show them without re-fetching every
+//! few seconds.
+
+use futures::stream::{self, StreamExt};
+use regex::Regex;
+use std::sync::OnceLock;
+use std::time::{Duration, Instant};
+use tokio::sync::Mutex;
+
+const CLOUD_SEARCH_URL: &str = "https://ollama.com/search?c=cloud";
+const CLOUD_LIBRARY_PREFIX: &str = "https://ollama.com/library/";
+/// Stale cloud catalog is fine — Ollama publishes new cloud tags rarely. One
+/// hour keeps the dashboard responsive and avoids hammering ollama.com.
+const CACHE_TTL: Duration = Duration::from_secs(60 * 60);
+const SEARCH_TIMEOUT: Duration = Duration::from_secs(5);
+const DETAIL_TIMEOUT: Duration = Duration::from_secs(4);
+/// Cap parallel detail-page fetches to avoid bursting ollama.com.
+const CLOUD_DETAIL_CONCURRENCY: usize = 8;
+
+struct CacheEntry {
+    fetched_at: Instant,
+    models: Vec<String>,
+}
+
+static CACHE: OnceLock<Mutex<Option<CacheEntry>>> = OnceLock::new();
+static SLUG_RE: OnceLock<Regex> = OnceLock::new();
+static RUN_RE: OnceLock<Regex> = OnceLock::new();
+
+fn cache() -> &'static Mutex<Option<CacheEntry>> {
+    CACHE.get_or_init(|| Mutex::new(None))
+}
+
+fn slug_re() -> &'static Regex {
+    SLUG_RE.get_or_init(|| Regex::new(r#"href="/library/([a-z0-9._-]+)""#).unwrap())
+}
+
+fn run_re() -> &'static Regex {
+    RUN_RE.get_or_init(|| {
+        Regex::new(r#"ollama\s+(?:run|pull)\s+([a-z0-9._-]+(?::[a-z0-9._-]+)?)"#).unwrap()
+    })
+}
+
+/// Returns cloud-tagged model names (e.g. `glm-4.6:cloud`,
+/// `qwen3-coder:480b-cloud`). Falls back to a stale cache, then to an empty
+/// list, when the upstream catalog is unreachable.
+pub async fn list_cloud_models() -> Vec<String> {
+    {
+        let guard = cache().lock().await;
+        if let Some(ref entry) = *guard {
+            if entry.fetched_at.elapsed() < CACHE_TTL {
+                return entry.models.clone();
+            }
+        }
+    }
+    match fetch_cloud_models().await {
+        Ok(models) => {
+            let mut guard = cache().lock().await;
+            *guard = Some(CacheEntry {
+                fetched_at: Instant::now(),
+                models: models.clone(),
+            });
+            models
+        }
+        Err(e) => {
+            log::warn!("ollama cloud catalog fetch failed: {e}");
+            cache()
+                .lock()
+                .await
+                .as_ref()
+                .map(|c| c.models.clone())
+                .unwrap_or_default()
+        }
+    }
+}
+
+async fn fetch_cloud_models() -> Result<Vec<String>, String> {
+    let client = reqwest::Client::builder()
+        .timeout(DETAIL_TIMEOUT)
+        .user_agent("pengine/1.0")
+        .build()
+        .map_err(|e| e.to_string())?;
+    let body = client
+        .get(CLOUD_SEARCH_URL)
+        .timeout(SEARCH_TIMEOUT)
+        .send()
+        .await
+        .map_err(|e| e.to_string())?
+        .text()
+        .await
+        .map_err(|e| e.to_string())?;
+    let mut slugs: Vec<String> = slug_re()
+        .captures_iter(&body)
+        .map(|c| c[1].to_string())
+        .collect();
+    slugs.sort();
+    slugs.dedup();
+    if slugs.is_empty() {
+        return Err("no cloud slugs found in /search?c=cloud".to_string());
+    }
+
+    let results: Vec<Result<Vec<String>, String>> = stream::iter(slugs)
+        .map(|slug| {
+            let client = client.clone();
+            async move { cloud_models_for_slug(&client, &slug).await }
+        })
+        .buffer_unordered(CLOUD_DETAIL_CONCURRENCY)
+        .collect()
+        .await;
+    let mut out: Vec<String> = results
+        .into_iter()
+        .filter_map(Result::ok)
+        .flatten()
+        .collect();
+    out.sort();
+    out.dedup();
+    if out.is_empty() {
+        return Err("cloud catalog detail scrape returned no model names".to_string());
+    }
+    Ok(out)
+}
+
+async fn cloud_models_for_slug(
+    client: &reqwest::Client,
+    slug: &str,
+) -> Result<Vec<String>, String> {
+    let url = format!("{CLOUD_LIBRARY_PREFIX}{slug}");
+    let body = client
+        .get(&url)
+        .send()
+        .await
+        .map_err(|e| e.to_string())?
+        .text()
+        .await
+        .map_err(|e| e.to_string())?;
+    let mut out = Vec::new();
+    for cap in run_re().captures_iter(&body) {
+        let name = &cap[1];
+        let tag = name.split_once(':').map(|(_, t)| t).unwrap_or("");
+        if tag == "cloud" || tag.ends_with("-cloud") {
+            out.push(name.to_string());
+        }
+    }
+    out.sort();
+    out.dedup();
+    Ok(out)
+}
diff --git a/src-tauri/src/modules/ollama/mod.rs b/src-tauri/src/modules/ollama/mod.rs
@@ -1,3 +1,4 @@
+pub mod cloud;
 pub mod constants;
 pub mod keywords;
 pub mod service;