Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src-tauri/src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::infrastructure::http_server;
use crate::modules::bot::{commands, repository, service as bot_service};
use crate::modules::cron::{repository as cron_repository, scheduler as cron_scheduler};
use crate::modules::mcp::service as mcp_service;
use crate::modules::ollama::cloud as ollama_cloud;
use crate::modules::secure_store;
use crate::shared::state::{AppState, ConnectionData};
use std::path::PathBuf;
Expand Down Expand Up @@ -182,6 +183,12 @@ pub fn run() {
http_server::start_server(server_state).await;
});

// Pre-warm the Ollama cloud catalog so the first dashboard refresh
// returns cloud entries without the user waiting on ollama.com.
tauri::async_runtime::spawn(async move {
let _ = ollama_cloud::list_cloud_models().await;
});

Ok(())
})
.invoke_handler(tauri::generate_handler![
Expand Down
42 changes: 33 additions & 9 deletions src-tauri/src/infrastructure/http_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,20 @@ pub struct PutMcpFilesystemBody {
pub paths: Vec<String>,
}

#[derive(Serialize)]
pub struct OllamaModelDto {
pub name: String,
/// `"local"` (default Ollama models) or `"cloud"` (Ollama Cloud — surfaced
/// after `ollama signin` with names like `gpt-oss:120b-cloud`).
pub kind: &'static str,
}

#[derive(Serialize)]
pub struct OllamaModelsResponse {
pub reachable: bool,
pub active_model: Option<String>,
pub selected_model: Option<String>,
pub models: Vec<String>,
pub models: Vec<OllamaModelDto>,
}

#[derive(Deserialize)]
Expand Down Expand Up @@ -372,7 +380,14 @@ async fn handle_ollama_models(State(state): State<AppState>) -> Json<OllamaModel
reachable: true,
active_model: catalog.active,
selected_model,
models: catalog.models,
models: catalog
.models
.into_iter()
.map(|m| OllamaModelDto {
name: m.name,
kind: m.kind.as_str(),
})
.collect(),
}),
Err(_) => Json(OllamaModelsResponse {
reachable: false,
Expand Down Expand Up @@ -436,17 +451,21 @@ async fn handle_ollama_model_put(
.map(|m| m.trim().to_string())
.filter(|m| !m.is_empty());

let mut selected_kind: Option<ollama_service::ModelKind> = None;
if let Some(ref model) = normalized {
let catalog = ollama_service::model_catalog(3000)
.await
.map_err(|e| (StatusCode::BAD_GATEWAY, Json(ErrorResponse { error: e })))?;
if !catalog.models.iter().any(|m| m == model) {
return Err((
StatusCode::BAD_REQUEST,
Json(ErrorResponse {
error: format!("model '{model}' is not available in Ollama"),
}),
));
match catalog.models.iter().find(|m| &m.name == model) {
Some(m) => selected_kind = Some(m.kind),
None => {
return Err((
StatusCode::BAD_REQUEST,
Json(ErrorResponse {
error: format!("model '{model}' is not available in Ollama"),
}),
));
}
}
}

Expand All @@ -455,6 +474,11 @@ async fn handle_ollama_model_put(
*lock = normalized.clone();
}

if let (Some(name), Some(ollama_service::ModelKind::Local)) = (&normalized, selected_kind) {
let mut last = state.last_local_model.write().await;
*last = Some(name.clone());
}

state
.emit_log(
"run",
Expand Down
69 changes: 65 additions & 4 deletions src-tauri/src/modules/bot/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,59 @@ fn chat_options_for_agent_step(
/// not truncated before sending to the user.
const TOOL_OUTPUT_CHAR_CAP: usize = 4000;

/// Run a chat call; if the request goes to a cloud model and the daemon
/// returns a rate-limit error, downgrade to the user's last local model and
/// retry once. The downgraded model is also written back to
/// `preferred_ollama_model` so the rest of the turn (and future turns) stay
/// local until the user picks again.
async fn chat_with_cloud_fallback(
state: &AppState,
model: &mut String,
messages: &serde_json::Value,
tools: &serde_json::Value,
options: &ChatOptions,
) -> Result<ollama::ChatResult, String> {
match ollama::chat_with_tools(model, messages, tools, options).await {
Ok(r) => Ok(r),
Err(err) => {
if ollama::classify_model(model) != ollama::ModelKind::Cloud
|| !ollama::is_cloud_unavailable_error(&err)
{
return Err(err);
}
let last_local = state.last_local_model.read().await.clone();
let catalog = ollama::model_catalog(3000).await.ok();
let fallback = catalog
.as_ref()
.and_then(|c| ollama::pick_local_fallback(c, None, last_local.as_deref()));
let Some(local) = fallback else {
state
.emit_log(
"ollama",
&format!("cloud '{model}' unavailable ({err}); no local fallback"),
)
.await;
return Err(err);
};
if local == *model {
return Err(err);
}
state
.emit_log(
"ollama",
&format!("cloud '{model}' unavailable, switching to local '{local}': {err}"),
)
.await;
{
let mut pref = state.preferred_ollama_model.write().await;
*pref = Some(local.clone());
}
*model = local;
ollama::chat_with_tools(model, messages, tools, options).await
}
}
}

fn tool_name_is_fetch(name: &str) -> bool {
name.eq_ignore_ascii_case("fetch")
|| name
Expand Down Expand Up @@ -645,7 +698,7 @@ async fn run_model_turn(
think: bool,
skills_slug_filter: Option<&[String]>,
) -> Result<TurnResult, String> {
let model = match state.preferred_ollama_model.read().await.clone() {
let mut model = match state.preferred_ollama_model.read().await.clone() {
Some(m) => m,
None => ollama::active_model().await?,
};
Expand Down Expand Up @@ -745,7 +798,9 @@ async fn run_model_turn(
push_ephemeral_post_tool_reminder(&mut messages);
}

let result = ollama::chat_with_tools(&model, &messages, effective_tools, &chat_opts).await;
let result =
chat_with_cloud_fallback(state, &mut model, &messages, effective_tools, &chat_opts)
.await;
if inject_post_tool {
pop_ephemeral_post_tool_reminder(&mut messages);
}
Expand Down Expand Up @@ -991,8 +1046,14 @@ async fn run_model_turn(
..ChatOptions::default()
};
let t0 = Instant::now();
let result =
ollama::chat_with_tools(&model, &summary_messages, &json!([]), &summary_opts).await?;
let result = chat_with_cloud_fallback(
state,
&mut model,
&summary_messages,
&json!([]),
&summary_opts,
)
.await?;
let tokens = fmt_tokens(result.prompt_tokens, result.eval_tokens);
state
.emit_log(
Expand Down
153 changes: 153 additions & 0 deletions src-tauri/src/modules/ollama/cloud.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
//! Cloud model discovery from ollama.com.
//!
//! The local Ollama daemon's `/api/tags` only lists models that have been
//! pulled to disk. Cloud models (accessible after `ollama signin`) live in
//! the upstream catalog at `https://ollama.com/library/<slug>` and use
//! `:cloud` or `<size>-cloud` tags. This module enumerates them by scraping
//! the cloud category page and each model's detail page, then caches the
//! result so the dashboard picker can show them without re-fetching every
//! few seconds.

use futures::stream::{self, StreamExt};
use regex::Regex;
use std::sync::OnceLock;
use std::time::{Duration, Instant};
use tokio::sync::Mutex;

const CLOUD_SEARCH_URL: &str = "https://ollama.com/search?c=cloud";
const CLOUD_LIBRARY_PREFIX: &str = "https://ollama.com/library/";
/// Stale cloud catalog is fine — Ollama publishes new cloud tags rarely. One
/// hour keeps the dashboard responsive and avoids hammering ollama.com.
const CACHE_TTL: Duration = Duration::from_secs(60 * 60);
const SEARCH_TIMEOUT: Duration = Duration::from_secs(5);
const DETAIL_TIMEOUT: Duration = Duration::from_secs(4);
/// Cap parallel detail-page fetches to avoid bursting ollama.com.
const CLOUD_DETAIL_CONCURRENCY: usize = 8;

struct CacheEntry {
fetched_at: Instant,
models: Vec<String>,
}

static CACHE: OnceLock<Mutex<Option<CacheEntry>>> = OnceLock::new();
static SLUG_RE: OnceLock<Regex> = OnceLock::new();
static RUN_RE: OnceLock<Regex> = OnceLock::new();

fn cache() -> &'static Mutex<Option<CacheEntry>> {
CACHE.get_or_init(|| Mutex::new(None))
}

fn slug_re() -> &'static Regex {
SLUG_RE.get_or_init(|| Regex::new(r#"href="/library/([a-z0-9._-]+)""#).unwrap())
}

fn run_re() -> &'static Regex {
RUN_RE.get_or_init(|| {
Regex::new(r#"ollama\s+(?:run|pull)\s+([a-z0-9._-]+(?::[a-z0-9._-]+)?)"#).unwrap()
})
}

/// Returns cloud-tagged model names (e.g. `glm-4.6:cloud`,
/// `qwen3-coder:480b-cloud`). Falls back to a stale cache, then to an empty
/// list, when the upstream catalog is unreachable.
pub async fn list_cloud_models() -> Vec<String> {
{
let guard = cache().lock().await;
if let Some(ref entry) = *guard {
if entry.fetched_at.elapsed() < CACHE_TTL {
return entry.models.clone();
}
}
}
match fetch_cloud_models().await {
Ok(models) => {
let mut guard = cache().lock().await;
*guard = Some(CacheEntry {
fetched_at: Instant::now(),
models: models.clone(),
});
models
}
Err(e) => {
log::warn!("ollama cloud catalog fetch failed: {e}");
cache()
.lock()
.await
.as_ref()
.map(|c| c.models.clone())
.unwrap_or_default()
}
}
}

async fn fetch_cloud_models() -> Result<Vec<String>, String> {
let client = reqwest::Client::builder()
.timeout(DETAIL_TIMEOUT)
.user_agent("pengine/1.0")
.build()
.map_err(|e| e.to_string())?;
let body = client
.get(CLOUD_SEARCH_URL)
.timeout(SEARCH_TIMEOUT)
.send()
.await
.map_err(|e| e.to_string())?
.text()
.await
.map_err(|e| e.to_string())?;
let mut slugs: Vec<String> = slug_re()
.captures_iter(&body)
.map(|c| c[1].to_string())
.collect();
slugs.sort();
slugs.dedup();
if slugs.is_empty() {
return Err("no cloud slugs found in /search?c=cloud".to_string());
}

let results: Vec<Result<Vec<String>, String>> = stream::iter(slugs)
.map(|slug| {
let client = client.clone();
async move { cloud_models_for_slug(&client, &slug).await }
})
.buffer_unordered(CLOUD_DETAIL_CONCURRENCY)
.collect()
.await;
let mut out: Vec<String> = results
.into_iter()
.filter_map(Result::ok)
.flatten()
.collect();
out.sort();
out.dedup();
if out.is_empty() {
return Err("cloud catalog detail scrape returned no model names".to_string());
}
Ok(out)
}

async fn cloud_models_for_slug(
client: &reqwest::Client,
slug: &str,
) -> Result<Vec<String>, String> {
let url = format!("{CLOUD_LIBRARY_PREFIX}{slug}");
let body = client
.get(&url)
.send()
.await
.map_err(|e| e.to_string())?
.text()
.await
.map_err(|e| e.to_string())?;
let mut out = Vec::new();
for cap in run_re().captures_iter(&body) {
let name = &cap[1];
let tag = name.split_once(':').map(|(_, t)| t).unwrap_or("");
if tag == "cloud" || tag.ends_with("-cloud") {
out.push(name.to_string());
}
}
out.sort();
out.dedup();
Ok(out)
}
1 change: 1 addition & 0 deletions src-tauri/src/modules/ollama/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub mod cloud;
pub mod constants;
pub mod keywords;
pub mod service;
Loading