From 951b18f0146d0fdcf2b28ce3e5f1cd0ba50dee94 Mon Sep 17 00:00:00 2001 From: Marenz Date: Sat, 21 Feb 2026 11:54:15 +0100 Subject: [PATCH 1/4] Add local Whisper STT backend via whisper-rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When routing.voice = "whisper-local://", audio attachments are transcribed locally instead of via the LLM provider HTTP path. is either: - A known size name (tiny/base/small/medium/large) — fetched from ggerganov/whisper.cpp on HuggingFace via hf-hub, using the existing HF cache if already present - An absolute path to a GGML model file The WhisperContext is loaded once and cached in a OnceLock for the process lifetime. Audio decoding (ogg, opus, mp3, flac, wav, m4a) is handled by symphonia with linear resampling to 16 kHz mono f32. All three deps (whisper-rs, hf-hub, symphonia) are optional behind the stt-whisper feature flag. --- Cargo.lock | 1390 +++++++++++++++--------------------------- Cargo.toml | 4 + src/agent/channel.rs | 1233 +++++++++++++++++++++++++++++++++++++ src/lib.rs | 2 + src/stt.rs | 275 +++++++++ 5 files changed, 1995 insertions(+), 909 deletions(-) create mode 100644 src/stt.rs diff --git a/Cargo.lock b/Cargo.lock index 9be359324..a41956207 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -241,18 +241,6 @@ dependencies = [ "syn 2.0.114", ] -[[package]] -name = "argon2" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c3610892ee6e0cbce8ae2700349fcf8f98adb0dbfbee85aec3c9179d29cc072" -dependencies = [ - "base64ct", - "blake2", - "cpufeatures", - "password-hash", -] - [[package]] name = "arraydeque" version = "0.5.1" @@ -265,12 +253,6 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" -[[package]] -name = "arrayvec" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" - [[package]] name = "arrayvec" version = "0.7.6" @@ -363,7 +345,7 @@ dependencies = [ "chrono", "comfy-table", "half", - "lexical-core 1.0.6", + "lexical-core", "num-traits", "ryu", ] @@ -427,7 +409,7 @@ dependencies = [ "half", "indexmap 2.13.0", "itoa", - "lexical-core 1.0.6", + "lexical-core", "memchr", "num-traits", "ryu", @@ -654,11 +636,11 @@ dependencies = [ "aligned", "anyhow", "arg_enum_proc_macro", - "arrayvec 0.7.6", + "arrayvec", "log", "num-rational", "num-traits", - "pastey 0.1.1", + "pastey", "rayon", "thiserror 2.0.18", "v_frame", @@ -672,7 +654,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8cfddb07216410377231960af4fcab838eaa12e013417781b78bd95ee22077f8" dependencies = [ "anyhow", - "arrayvec 0.7.6", + "arrayvec", "log", "nom 8.0.0", "num-rational", @@ -685,7 +667,7 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "375082f007bd67184fb9c0374614b29f9aaa604ec301635f72338bb65386a53d" dependencies = [ - "arrayvec 0.7.6", + "arrayvec", ] [[package]] @@ -720,10 +702,10 @@ dependencies = [ "bytes", "form_urlencoded", "futures-util", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "http-body-util", - "hyper 1.8.1", + "hyper", "hyper-util", "itoa", "matchit", @@ -736,7 +718,7 @@ dependencies = [ "serde_json", "serde_path_to_error", "serde_urlencoded", - "sync_wrapper 1.0.2", + "sync_wrapper", "tokio", "tower", "tower-layer", @@ -752,12 +734,12 @@ checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" dependencies = [ "bytes", "futures-core", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "http-body-util", "mime", "pin-project-lite", - "sync_wrapper 1.0.2", + "sync_wrapper", "tower-layer", "tower-service", "tracing", @@ -769,12 +751,6 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" -[[package]] -name = "base64" -version = "0.21.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" - [[package]] name = "base64" version = "0.22.1" @@ -800,6 +776,26 @@ dependencies = [ "num-traits", ] +[[package]] +name = "bindgen" +version = "0.71.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" +dependencies = [ + "bitflags 2.10.0", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.114", +] + [[package]] name = "bit_field" version = "0.10.3" @@ -867,7 +863,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" dependencies = [ "arrayref", - "arrayvec 0.7.6", + "arrayvec", "cc", "cfg-if", "constant_time_eq 0.4.2", @@ -913,9 +909,9 @@ dependencies = [ "futures-core", "futures-util", "hex", - "http 1.4.0", + "http", "http-body-util", - "hyper 1.8.1", + "hyper", "hyper-named-pipe", "hyper-util", "hyperlocal", @@ -961,7 +957,7 @@ version = "3.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89ec27229c38ed0eb3c0feee3d2c1d6a4379ae44f418a29a658890e062d8f365" dependencies = [ - "darling 0.23.0", + "darling 0.21.3", "ident_case", "prettyplease", "proc-macro2", @@ -1001,12 +997,6 @@ dependencies = [ "serde", ] -[[package]] -name = "bufstream" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40e38929add23cdf8a366df9b0e088953150724bcbe5fc330b0d8eb3b328eec8" - [[package]] name = "built" version = "0.8.0" @@ -1136,10 +1126,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" [[package]] -name = "cesu8" -version = "1.1.0" +name = "cexpr" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom 7.1.3", +] [[package]] name = "cff-parser" @@ -1159,16 +1152,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" -[[package]] -name = "charset" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1f927b07c74ba84c7e5fe4db2baeb3e996ab2688992e39ac68ce3220a677c7e" -dependencies = [ - "base64 0.22.1", - "encoding_rs", -] - [[package]] name = "chromiumoxide" version = "0.8.0" @@ -1179,14 +1162,13 @@ dependencies = [ "base64 0.22.1", "cfg-if", "chromiumoxide_cdp", - "chromiumoxide_fetcher", "chromiumoxide_types", "dunce", "fnv", "futures", "futures-timer", "pin-project-lite", - "reqwest 0.12.28", + "reqwest", "serde", "serde_json", "thiserror 1.0.69", @@ -1209,23 +1191,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "chromiumoxide_fetcher" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22e39b54dfcb6973284f55cf3639d44e84d23feed4e2e7d1faa4a9029a365737" -dependencies = [ - "anyhow", - "directories", - "reqwest 0.12.28", - "serde", - "thiserror 1.0.69", - "tokio", - "tracing", - "windows-version", - "zip 0.6.6", -] - [[package]] name = "chromiumoxide_pdl" version = "0.8.0" @@ -1274,17 +1239,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3" dependencies = [ "chrono", - "phf 0.12.1", -] - -[[package]] -name = "chumsky" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" -dependencies = [ - "hashbrown 0.14.5", - "stacker", + "phf", ] [[package]] @@ -1297,6 +1252,17 @@ dependencies = [ "inout", ] +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + [[package]] name = "clap" version = "4.5.58" @@ -1367,16 +1333,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "combine" -version = "4.6.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" -dependencies = [ - "bytes", - "memchr", -] - [[package]] name = "comfy-table" version = "7.2.2" @@ -1461,6 +1417,19 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "console" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03e45a4a8926227e4197636ba97a9fc9b00477e9f4bd711395687c5f0734bec4" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "unicode-width", + "windows-sys 0.61.2", +] + [[package]] name = "const-oid" version = "0.9.6" @@ -1517,6 +1486,35 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "cookie" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ddef33a339a91ea89fb53151bd0a4689cfce27055c291dfa69945475d22c747" +dependencies = [ + "percent-encoding", + "time", + "version_check", +] + +[[package]] +name = "cookie_store" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15b2c103cf610ec6cae3da84a766285b42fd16aad564758459e6ecf128c75206" +dependencies = [ + "cookie", + "document-features", + "idna", + "indexmap 2.13.0", + "log", + "serde", + "serde_derive", + "serde_json", + "time", + "url", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -1585,17 +1583,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "cron" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f8c3e73077b4b4a6ab1ea5047c37c57aee77657bc8ecd6f29b0af082d0b0c07" -dependencies = [ - "chrono", - "nom 7.1.3", - "once_cell", -] - [[package]] name = "crossbeam-channel" version = "0.5.15" @@ -1693,7 +1680,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73736a89c4aff73035ba2ed2e565061954da00d4970fc9ac25dcc85a2a20d790" dependencies = [ "dispatch2", - "nix 0.30.1", + "nix", "windows-sys 0.61.2", ] @@ -1726,16 +1713,6 @@ dependencies = [ "darling_macro 0.21.3", ] -[[package]] -name = "darling" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" -dependencies = [ - "darling_core 0.23.0", - "darling_macro 0.23.0", -] - [[package]] name = "darling_core" version = "0.20.11" @@ -1764,19 +1741,6 @@ dependencies = [ "syn 2.0.114", ] -[[package]] -name = "darling_core" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" -dependencies = [ - "ident_case", - "proc-macro2", - "quote", - "strsim 0.11.1", - "syn 2.0.114", -] - [[package]] name = "darling_macro" version = "0.20.11" @@ -1799,17 +1763,6 @@ dependencies = [ "syn 2.0.114", ] -[[package]] -name = "darling_macro" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" -dependencies = [ - "darling_core 0.23.0", - "quote", - "syn 2.0.114", -] - [[package]] name = "dary_heap" version = "0.3.8" @@ -2498,7 +2451,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eddada51c8576df9d6a8450c351ff63042b092c9458b8ac7d20f89cbd0ffd313" dependencies = [ - "arrayvec 0.7.6", + "arrayvec", "proc-macro2", "quote", "strsim 0.10.0", @@ -2610,7 +2563,7 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "658bce805d770f407bc62102fca7c2c64ceef2fbcb2b8bd19d2765ce093980de" dependencies = [ - "console", + "console 0.15.11", "shell-words", "tempfile", "thiserror 1.0.69", @@ -2629,15 +2582,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "directories" -version = "6.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16f5094c54661b38d03bd7e50df373292118db60b585c08a411c6d840017fe7d" -dependencies = [ - "dirs-sys", -] - [[package]] name = "dirs" version = "6.0.0" @@ -2691,6 +2635,15 @@ dependencies = [ "const-random", ] +[[package]] +name = "document-features" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61" +dependencies = [ + "litrs", +] + [[package]] name = "dotenvy" version = "0.15.7" @@ -2753,31 +2706,6 @@ dependencies = [ "serde", ] -[[package]] -name = "email-encoding" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9298e6504d9b9e780ed3f7dfd43a61be8cd0e09eb07f7706a945b0072b6670b6" -dependencies = [ - "base64 0.22.1", - "memchr", -] - -[[package]] -name = "email_address" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e079f19b08ca6239f47f8ba8509c11cf3ea30095831f7fed61441475edd8c449" - -[[package]] -name = "emojis" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50c1c1870b766fc398e5f0526498d09c94b6de15be5fd769a28bbc804fb1b05d" -dependencies = [ - "phf 0.13.1", -] - [[package]] name = "encode_unicode" version = "1.0.0" @@ -2956,6 +2884,12 @@ dependencies = [ "zune-inflate", ] +[[package]] +name = "extended" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af9673d8203fcb076b19dfd17e38b3d4ae9f44959416ea532ce72415a6020365" + [[package]] name = "fast-float2" version = "0.2.3" @@ -2975,7 +2909,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04c269a76bfc6cea69553b7d040acb16c793119cebd97c756d21e08d0f075ff8" dependencies = [ "anyhow", - "hf-hub", + "hf-hub 0.4.3", "image", "ndarray", "ort", @@ -3509,25 +3443,6 @@ dependencies = [ "regex-syntax", ] -[[package]] -name = "h2" -version = "0.3.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" -dependencies = [ - "bytes", - "fnv", - "futures-core", - "futures-sink", - "futures-util", - "http 0.2.12", - "indexmap 2.13.0", - "slab", - "tokio", - "tokio-util", - "tracing", -] - [[package]] name = "h2" version = "0.4.13" @@ -3539,7 +3454,7 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http 1.4.0", + "http", "indexmap 2.13.0", "slab", "tokio", @@ -3651,20 +3566,44 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "629d8f3bbeda9d148036d6b0de0a3ab947abd08ce90626327fc3547a49d59d97" dependencies = [ "dirs", - "http 1.4.0", - "indicatif", + "http", + "indicatif 0.17.11", "libc", "log", "native-tls", "rand 0.9.2", - "reqwest 0.12.28", + "reqwest", "serde", "serde_json", "thiserror 2.0.18", - "ureq", + "ureq 2.12.1", "windows-sys 0.60.2", ] +[[package]] +name = "hf-hub" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef3982638978efa195ff11b305f51f1f22f4f0a6cabee7af79b383ebee6a213" +dependencies = [ + "dirs", + "futures", + "http", + "indicatif 0.18.4", + "libc", + "log", + "native-tls", + "num_cpus", + "rand 0.9.2", + "reqwest", + "serde", + "serde_json", + "thiserror 2.0.18", + "tokio", + "ureq 3.2.0", + "windows-sys 0.61.2", +] + [[package]] name = "hkdf" version = "0.12.4" @@ -3692,34 +3631,12 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "hostname" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "617aaa3557aef3810a6369d0a99fac8a080891b68bd9f9812a1eeda0c0730cbd" -dependencies = [ - "cfg-if", - "libc", - "windows-link 0.2.1", -] - [[package]] name = "htmlescape" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" -[[package]] -name = "http" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - [[package]] name = "http" version = "1.4.0" @@ -3730,17 +3647,6 @@ dependencies = [ "itoa", ] -[[package]] -name = "http-body" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" -dependencies = [ - "bytes", - "http 0.2.12", - "pin-project-lite", -] - [[package]] name = "http-body" version = "1.0.1" @@ -3748,7 +3654,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http 1.4.0", + "http", ] [[package]] @@ -3759,8 +3665,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "pin-project-lite", ] @@ -3790,47 +3696,23 @@ checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" [[package]] name = "hyper" -version = "0.14.32" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" dependencies = [ + "atomic-waker", "bytes", "futures-channel", "futures-core", - "futures-util", - "h2 0.3.27", - "http 0.2.12", - "http-body 0.4.6", + "h2", + "http", + "http-body", "httparse", "httpdate", "itoa", "pin-project-lite", - "socket2 0.5.10", - "tokio", - "tower-service", - "tracing", - "want", -] - -[[package]] -name = "hyper" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" -dependencies = [ - "atomic-waker", - "bytes", - "futures-channel", - "futures-core", - "h2 0.4.13", - "http 1.4.0", - "http-body 1.0.1", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "pin-utils", - "smallvec", + "pin-utils", + "smallvec", "tokio", "want", ] @@ -3842,7 +3724,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73b7d8abf35697b81a825e386fc151e0d503e8cb5fcb93cc8669c376dfd6f278" dependencies = [ "hex", - "hyper 1.8.1", + "hyper", "hyper-util", "pin-project-lite", "tokio", @@ -3850,28 +3732,14 @@ dependencies = [ "winapi", ] -[[package]] -name = "hyper-rustls" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" -dependencies = [ - "futures-util", - "http 0.2.12", - "hyper 0.14.32", - "rustls 0.21.12", - "tokio", - "tokio-rustls 0.24.1", -] - [[package]] name = "hyper-rustls" version = "0.27.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ - "http 1.4.0", - "hyper 1.8.1", + "http", + "hyper", "hyper-util", "log", "rustls 0.23.36", @@ -3891,7 +3759,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", "http-body-util", - "hyper 1.8.1", + "hyper", "hyper-util", "native-tls", "tokio", @@ -3909,15 +3777,15 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "http 1.4.0", - "http-body 1.0.1", - "hyper 1.8.1", + "http", + "http-body", + "hyper", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.2", - "system-configuration 0.7.0", + "socket2", + "system-configuration", "tokio", "tower-service", "tracing", @@ -3932,7 +3800,7 @@ checksum = "986c5ce3b994526b3cd75578e62554abd09f0899d6206de48b3e96ab34ccc8c7" dependencies = [ "hex", "http-body-util", - "hyper 1.8.1", + "hyper", "hyper-util", "pin-project-lite", "tokio", @@ -4185,31 +4053,6 @@ dependencies = [ "quick-error", ] -[[package]] -name = "imap" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c617c55def8c42129e0dd503f11d7ee39d73f5c7e01eff55768b3879ff1d107d" -dependencies = [ - "base64 0.13.1", - "bufstream", - "chrono", - "imap-proto", - "lazy_static", - "native-tls", - "nom 5.1.3", - "regex", -] - -[[package]] -name = "imap-proto" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16a6def1d5ac8975d70b3fd101d57953fe3278ef2ee5d7816cba54b1d1dfc22f" -dependencies = [ - "nom 5.1.3", -] - [[package]] name = "imgref" version = "1.12.0" @@ -4264,13 +4107,26 @@ version = "0.17.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" dependencies = [ - "console", + "console 0.15.11", "number_prefix", "portable-atomic", "unicode-width", "web-time", ] +[[package]] +name = "indicatif" +version = "0.18.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25470f23803092da7d239834776d653104d551bc4d7eacaf31e6837854b8e9eb" +dependencies = [ + "console 0.16.2", + "portable-atomic", + "unicode-width", + "unit-prefix", + "web-time", +] + [[package]] name = "indoc" version = "2.0.7" @@ -4454,28 +4310,6 @@ dependencies = [ "jiff-tzdb", ] -[[package]] -name = "jni" -version = "0.21.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97" -dependencies = [ - "cesu8", - "cfg-if", - "combine", - "jni-sys", - "log", - "thiserror 1.0.69", - "walkdir", - "windows-sys 0.45.0", -] - -[[package]] -name = "jni-sys" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" - [[package]] name = "jobserver" version = "0.1.34" @@ -4996,7 +4830,7 @@ version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2acdba67f84190067532fce07b51a435dd390d7cdc1129a05003e5cb3274cf0" dependencies = [ - "reqwest 0.12.28", + "reqwest", "serde", "serde_json", "serde_repr", @@ -5136,53 +4970,12 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a79a3332a6609480d7d0c9eab957bca6b455b91bb84e66d19f5ff66294b85b8" -[[package]] -name = "lettre" -version = "0.11.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e13e10e8818f8b2a60f52cb127041d388b89f3a96a62be9ceaffa22262fef7f" -dependencies = [ - "async-trait", - "base64 0.22.1", - "chumsky", - "email-encoding", - "email_address", - "fastrand", - "futures-io", - "futures-util", - "hostname", - "httpdate", - "idna", - "mime", - "native-tls", - "nom 8.0.0", - "percent-encoding", - "quoted_printable", - "socket2 0.6.2", - "tokio", - "tokio-native-tls", - "url", -] - [[package]] name = "levenshtein_automata" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" -[[package]] -name = "lexical-core" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6607c62aa161d23d17a9072cc5da0be67cdfc89d3afb1e8d9c842bebc2525ffe" -dependencies = [ - "arrayvec 0.5.2", - "bitflags 1.3.2", - "cfg-if", - "ryu", - "static_assertions", -] - [[package]] name = "lexical-core" version = "1.0.6" @@ -5242,9 +5035,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.180" +version = "0.2.181" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" +checksum = "459427e2af2b9c839b132acb702a1c654d95e10f8c326bfc2ad11310e458b1c5" [[package]] name = "libfuzzer-sys" @@ -5256,6 +5049,16 @@ dependencies = [ "cc", ] +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link 0.2.1", +] + [[package]] name = "libm" version = "0.2.16" @@ -5302,6 +5105,12 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" +[[package]] +name = "litrs" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092" + [[package]] name = "lock_api" version = "0.4.14" @@ -5453,17 +5262,6 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30" -[[package]] -name = "mailparse" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60819a97ddcb831a5614eb3b0174f3620e793e97e09195a395bfa948fd68ed2f" -dependencies = [ - "charset", - "data-encoding", - "quoted_printable", -] - [[package]] name = "matchers" version = "0.2.0" @@ -5671,7 +5469,7 @@ dependencies = [ "bytes", "encoding_rs", "futures-util", - "http 1.4.0", + "http", "httparse", "memchr", "mime", @@ -5750,29 +5548,6 @@ dependencies = [ "libc", ] -[[package]] -name = "nix" -version = "0.31.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225e7cfe711e0ba79a68baeddb2982723e4235247aefce1482f2f16c27865b66" -dependencies = [ - "bitflags 2.10.0", - "cfg-if", - "cfg_aliases", - "libc", -] - -[[package]] -name = "nom" -version = "5.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08959a387a676302eebf4ddbcbc611da04285579f76f88ee0506c63b1a61dd4b" -dependencies = [ - "lexical-core 0.7.6", - "memchr", - "version_check", -] - [[package]] name = "nom" version = "7.1.3" @@ -6011,7 +5786,7 @@ dependencies = [ "bytes", "chrono", "futures", - "http 1.4.0", + "http", "humantime", "itertools 0.14.0", "parking_lot", @@ -6154,9 +5929,9 @@ checksum = "46d7ab32b827b5b495bd90fa95a6cb65ccc293555dcc3199ae2937d2d237c8ed" dependencies = [ "async-trait", "bytes", - "http 1.4.0", + "http", "opentelemetry", - "reqwest 0.12.28", + "reqwest", "tracing", ] @@ -6167,13 +5942,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d899720fe06916ccba71c01d04ecd77312734e2de3467fd30d9d580c8ce85656" dependencies = [ "futures-core", - "http 1.4.0", + "http", "opentelemetry", "opentelemetry-http", "opentelemetry-proto", "opentelemetry_sdk", "prost 0.13.5", - "reqwest 0.12.28", + "reqwest", "thiserror 2.0.18", ] @@ -6261,7 +6036,7 @@ dependencies = [ "pkg-config", "sha2", "tar", - "ureq", + "ureq 2.12.1", ] [[package]] @@ -6302,17 +6077,6 @@ dependencies = [ "windows-link 0.2.1", ] -[[package]] -name = "password-hash" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "346f04948ba92c43e8469c1ee6736c7563d71012b17d40745260fe106aac2166" -dependencies = [ - "base64ct", - "rand_core 0.6.4", - "subtle", -] - [[package]] name = "paste" version = "1.0.15" @@ -6325,12 +6089,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35fb2e5f958ec131621fdd531e9fc186ed768cbe395337403ae56c17a74c68ec" -[[package]] -name = "pastey" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b867cad97c0791bbd3aaa6472142568c6c9e8f71937e98379f584cfb0cf35bec" - [[package]] name = "path_abs" version = "0.5.1" @@ -6458,16 +6216,7 @@ version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7" dependencies = [ - "phf_shared 0.12.1", -] - -[[package]] -name = "phf" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" -dependencies = [ - "phf_shared 0.13.1", + "phf_shared", ] [[package]] @@ -6479,15 +6228,6 @@ dependencies = [ "siphasher", ] -[[package]] -name = "phf_shared" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" -dependencies = [ - "siphasher", -] - [[package]] name = "pin-project" version = "1.1.10" @@ -6682,20 +6422,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "process-wrap" -version = "9.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccd9713fe2c91c3c85ac388b31b89de339365d2c995146e630b5e0da9d06526a" -dependencies = [ - "futures", - "indexmap 2.13.0", - "nix 0.31.1", - "tokio", - "tracing", - "windows", -] - [[package]] name = "profiling" version = "1.0.17" @@ -6868,7 +6594,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls 0.23.36", - "socket2 0.6.2", + "socket2", "thiserror 2.0.18", "tokio", "tracing", @@ -6881,7 +6607,6 @@ version = "0.11.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" dependencies = [ - "aws-lc-rs", "bytes", "getrandom 0.3.4", "lru-slab", @@ -6906,7 +6631,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.2", + "socket2", "tracing", "windows-sys 0.60.2", ] @@ -6920,12 +6645,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "quoted_printable" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "640c9bd8497b02465aeef5375144c26062e0dcd5939dfcbb0f5db76cb8c17c73" - [[package]] name = "r-efi" version = "5.3.0" @@ -7054,7 +6773,7 @@ dependencies = [ "aligned-vec", "arbitrary", "arg_enum_proc_macro", - "arrayvec 0.7.6", + "arrayvec", "av-scenechange", "av1-grain", "bitstream-io", @@ -7234,47 +6953,6 @@ version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" -[[package]] -name = "reqwest" -version = "0.11.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" -dependencies = [ - "base64 0.21.7", - "bytes", - "encoding_rs", - "futures-core", - "futures-util", - "h2 0.3.27", - "http 0.2.12", - "http-body 0.4.6", - "hyper 0.14.32", - "hyper-rustls 0.24.2", - "ipnet", - "js-sys", - "log", - "mime", - "once_cell", - "percent-encoding", - "pin-project-lite", - "rustls 0.21.12", - "rustls-pemfile", - "serde", - "serde_json", - "serde_urlencoded", - "sync_wrapper 0.1.2", - "system-configuration 0.5.1", - "tokio", - "tokio-rustls 0.24.1", - "tower-service", - "url", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", - "webpki-roots 0.25.4", - "winreg", -] - [[package]] name = "reqwest" version = "0.12.28" @@ -7287,12 +6965,12 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2 0.4.13", - "http 1.4.0", - "http-body 1.0.1", + "h2", + "http", + "http-body", "http-body-util", - "hyper 1.8.1", - "hyper-rustls 0.27.7", + "hyper", + "hyper-rustls", "hyper-tls", "hyper-util", "js-sys", @@ -7309,7 +6987,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", - "sync_wrapper 1.0.2", + "sync_wrapper", "tokio", "tokio-native-tls", "tokio-rustls 0.26.4", @@ -7320,51 +6998,11 @@ dependencies = [ "url", "wasm-bindgen", "wasm-bindgen-futures", - "wasm-streams 0.4.2", + "wasm-streams", "web-sys", "webpki-roots 1.0.6", ] -[[package]] -name = "reqwest" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab3f43e3283ab1488b624b44b0e988d0acea0b3214e694730a055cb6b2efa801" -dependencies = [ - "base64 0.22.1", - "bytes", - "futures-core", - "futures-util", - "http 1.4.0", - "http-body 1.0.1", - "http-body-util", - "hyper 1.8.1", - "hyper-rustls 0.27.7", - "hyper-util", - "js-sys", - "log", - "percent-encoding", - "pin-project-lite", - "quinn", - "rustls 0.23.36", - "rustls-pki-types", - "rustls-platform-verifier", - "serde", - "serde_json", - "sync_wrapper 1.0.2", - "tokio", - "tokio-rustls 0.26.4", - "tokio-util", - "tower", - "tower-http", - "tower-service", - "url", - "wasm-bindgen", - "wasm-bindgen-futures", - "wasm-streams 0.5.0", - "web-sys", -] - [[package]] name = "rgb" version = "0.8.52" @@ -7389,13 +7027,13 @@ dependencies = [ "futures", "futures-timer", "glob", - "http 1.4.0", + "http", "mime", "mime_guess", "nanoid", "ordered-float", "pin-project-lite", - "reqwest 0.12.28", + "reqwest", "rig-derive", "schemars 1.2.1", "serde", @@ -7437,50 +7075,10 @@ dependencies = [ ] [[package]] -name = "rmcp" -version = "0.16.0" +name = "roaring" +version = "0.10.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc4c9c94680f75470ee8083a0667988b5d7b5beb70b9f998a8e51de7c682ce60" -dependencies = [ - "async-trait", - "base64 0.22.1", - "chrono", - "futures", - "http 1.4.0", - "pastey 0.2.1", - "pin-project-lite", - "process-wrap", - "reqwest 0.13.2", - "rmcp-macros", - "schemars 1.2.1", - "serde", - "serde_json", - "sse-stream", - "thiserror 2.0.18", - "tokio", - "tokio-stream", - "tokio-util", - "tracing", -] - -[[package]] -name = "rmcp-macros" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90c23c8f26cae4da838fbc3eadfaecf2d549d97c04b558e7bd90526a9c28b42a" -dependencies = [ - "darling 0.23.0", - "proc-macro2", - "quote", - "serde_json", - "syn 2.0.114", -] - -[[package]] -name = "roaring" -version = "0.10.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19e8d2cfa184d94d0726d650a9f4a1be7f9b76ac9fdb954219878dc00c1c1e7b" +checksum = "19e8d2cfa184d94d0726d650a9f4a1be7f9b76ac9fdb954219878dc00c1c1e7b" dependencies = [ "bytemuck", "byteorder", @@ -7727,15 +7325,6 @@ dependencies = [ "security-framework 3.5.1", ] -[[package]] -name = "rustls-pemfile" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" -dependencies = [ - "base64 0.21.7", -] - [[package]] name = "rustls-pki-types" version = "1.14.0" @@ -7746,33 +7335,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "rustls-platform-verifier" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d99feebc72bae7ab76ba994bb5e121b8d83d910ca40b36e0921f53becc41784" -dependencies = [ - "core-foundation 0.10.1", - "core-foundation-sys", - "jni", - "log", - "once_cell", - "rustls 0.23.36", - "rustls-native-certs", - "rustls-platform-verifier-android", - "rustls-webpki 0.103.9", - "security-framework 3.5.1", - "security-framework-sys", - "webpki-root-certs", - "windows-sys 0.61.2", -] - -[[package]] -name = "rustls-platform-verifier-android" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" - [[package]] name = "rustls-webpki" version = "0.101.7" @@ -7886,7 +7448,6 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc" dependencies = [ - "chrono", "dyn-clone", "ref-cast", "schemars_derive 1.2.1", @@ -8166,7 +7727,7 @@ version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9bde37f42765dfdc34e2a039e0c84afbf79a3101c1941763b0beb816c2f17541" dependencies = [ - "arrayvec 0.7.6", + "arrayvec", "async-trait", "base64 0.22.1", "bitflags 2.10.0", @@ -8178,7 +7739,7 @@ dependencies = [ "mime_guess", "parking_lot", "percent-encoding", - "reqwest 0.12.28", + "reqwest", "rustc-hash", "secrecy", "serde", @@ -8362,10 +7923,10 @@ dependencies = [ "futures-util", "hex", "hmac", - "http 1.4.0", + "http", "http-body-util", - "hyper 1.8.1", - "hyper-rustls 0.27.7", + "hyper", + "hyper-rustls", "hyper-util", "lazy_static", "mime", @@ -8417,16 +7978,6 @@ dependencies = [ "syn 2.0.114", ] -[[package]] -name = "socket2" -version = "0.5.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] - [[package]] name = "socket2" version = "0.6.2" @@ -8450,12 +8001,11 @@ dependencies = [ [[package]] name = "spacebot" -version = "0.2.2" +version = "0.1.12" dependencies = [ "aes-gcm", "anyhow", "arc-swap", - "argon2", "arrow-array", "arrow-schema", "async-stream", @@ -8466,30 +8016,21 @@ dependencies = [ "chromiumoxide", "chromiumoxide_cdp", "chrono", - "chrono-tz", "clap", "config", - "cron", "daemonize", "dialoguer", "dirs", - "emojis", "fastembed", - "flate2", "futures", - "hex", + "hf-hub 0.5.0", "ignore", - "imap", "indoc", "lance-index", "lancedb", - "lettre", "libc", - "mailparse", "mime_guess", "minijinja", - "moka", - "native-tls", "notify", "open", "opentelemetry", @@ -8502,13 +8043,11 @@ dependencies = [ "rand 0.9.2", "redb", "regex", - "reqwest 0.12.28", + "reqwest", "rig-core", - "rmcp", "rust-embed", "rustls 0.23.36", "schemars 0.8.22", - "security-framework 3.5.1", "semver", "serde", "serde_json", @@ -8516,13 +8055,13 @@ dependencies = [ "sha2", "slack-morphism", "sqlx", + "symphonia", "teloxide", "tempfile", "thiserror 2.0.18", "tokio", "tokio-stream", "tokio-test", - "tokio-tungstenite 0.28.0", "toml 0.8.23", "toml_edit 0.22.27", "tower-http", @@ -8533,7 +8072,8 @@ dependencies = [ "twitch-irc", "urlencoding", "uuid", - "zip 2.4.2", + "whisper-rs", + "zip", ] [[package]] @@ -8796,19 +8336,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "sse-stream" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb4dc4d33c68ec1f27d386b5610a351922656e1fdf5c05bbaad930cd1519479a" -dependencies = [ - "bytes", - "futures-util", - "http-body 1.0.1", - "http-body-util", - "pin-project-lite", -] - [[package]] name = "stable_deref_trait" version = "1.2.1" @@ -8897,6 +8424,178 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +[[package]] +name = "symphonia" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5773a4c030a19d9bfaa090f49746ff35c75dfddfa700df7a5939d5e076a57039" +dependencies = [ + "lazy_static", + "symphonia-bundle-flac", + "symphonia-bundle-mp3", + "symphonia-codec-aac", + "symphonia-codec-adpcm", + "symphonia-codec-pcm", + "symphonia-codec-vorbis", + "symphonia-core", + "symphonia-format-isomp4", + "symphonia-format-mkv", + "symphonia-format-ogg", + "symphonia-format-riff", + "symphonia-metadata", +] + +[[package]] +name = "symphonia-bundle-flac" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c91565e180aea25d9b80a910c546802526ffd0072d0b8974e3ebe59b686c9976" +dependencies = [ + "log", + "symphonia-core", + "symphonia-metadata", + "symphonia-utils-xiph", +] + +[[package]] +name = "symphonia-bundle-mp3" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4872dd6bb56bf5eac799e3e957aa1981086c3e613b27e0ac23b176054f7c57ed" +dependencies = [ + "lazy_static", + "log", + "symphonia-core", + "symphonia-metadata", +] + +[[package]] +name = "symphonia-codec-aac" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c263845aa86881416849c1729a54c7f55164f8b96111dba59de46849e73a790" +dependencies = [ + "lazy_static", + "log", + "symphonia-core", +] + +[[package]] +name = "symphonia-codec-adpcm" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dddc50e2bbea4cfe027441eece77c46b9f319748605ab8f3443350129ddd07f" +dependencies = [ + "log", + "symphonia-core", +] + +[[package]] +name = "symphonia-codec-pcm" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e89d716c01541ad3ebe7c91ce4c8d38a7cf266a3f7b2f090b108fb0cb031d95" +dependencies = [ + "log", + "symphonia-core", +] + +[[package]] +name = "symphonia-codec-vorbis" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f025837c309cd69ffef572750b4a2257b59552c5399a5e49707cc5b1b85d1c73" +dependencies = [ + "log", + "symphonia-core", + "symphonia-utils-xiph", +] + +[[package]] +name = "symphonia-core" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea00cc4f79b7f6bb7ff87eddc065a1066f3a43fe1875979056672c9ef948c2af" +dependencies = [ + "arrayvec", + "bitflags 1.3.2", + "bytemuck", + "lazy_static", + "log", +] + +[[package]] +name = "symphonia-format-isomp4" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "243739585d11f81daf8dac8d9f3d18cc7898f6c09a259675fc364b382c30e0a5" +dependencies = [ + "encoding_rs", + "log", + "symphonia-core", + "symphonia-metadata", + "symphonia-utils-xiph", +] + +[[package]] +name = "symphonia-format-mkv" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "122d786d2c43a49beb6f397551b4a050d8229eaa54c7ddf9ee4b98899b8742d0" +dependencies = [ + "lazy_static", + "log", + "symphonia-core", + "symphonia-metadata", + "symphonia-utils-xiph", +] + +[[package]] +name = "symphonia-format-ogg" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b4955c67c1ed3aa8ae8428d04ca8397fbef6a19b2b051e73b5da8b1435639cb" +dependencies = [ + "log", + "symphonia-core", + "symphonia-metadata", + "symphonia-utils-xiph", +] + +[[package]] +name = "symphonia-format-riff" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2d7c3df0e7d94efb68401d81906eae73c02b40d5ec1a141962c592d0f11a96f" +dependencies = [ + "extended", + "log", + "symphonia-core", + "symphonia-metadata", +] + +[[package]] +name = "symphonia-metadata" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36306ff42b9ffe6e5afc99d49e121e0bd62fe79b9db7b9681d48e29fa19e6b16" +dependencies = [ + "encoding_rs", + "lazy_static", + "log", + "symphonia-core", +] + +[[package]] +name = "symphonia-utils-xiph" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee27c85ab799a338446b68eec77abf42e1a6f1bb490656e121c6e27bfbab9f16" +dependencies = [ + "symphonia-core", + "symphonia-metadata", +] + [[package]] name = "syn" version = "1.0.109" @@ -8919,12 +8618,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "sync_wrapper" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" - [[package]] name = "sync_wrapper" version = "1.0.2" @@ -8945,17 +8638,6 @@ dependencies = [ "syn 2.0.114", ] -[[package]] -name = "system-configuration" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" -dependencies = [ - "bitflags 1.3.2", - "core-foundation 0.9.4", - "system-configuration-sys 0.5.0", -] - [[package]] name = "system-configuration" version = "0.7.0" @@ -8964,17 +8646,7 @@ checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" dependencies = [ "bitflags 2.10.0", "core-foundation 0.9.4", - "system-configuration-sys 0.6.0", -] - -[[package]] -name = "system-configuration-sys" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" -dependencies = [ - "core-foundation-sys", - "libc", + "system-configuration-sys", ] [[package]] @@ -9210,7 +8882,7 @@ dependencies = [ "once_cell", "pin-project", "rc-box", - "reqwest 0.12.28", + "reqwest", "rgb", "serde", "serde_json", @@ -9420,7 +9092,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.6.2", + "socket2", "tokio-macros", "tracing", "windows-sys 0.61.2", @@ -9649,8 +9321,8 @@ dependencies = [ "async-trait", "base64 0.22.1", "bytes", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "http-body-util", "percent-encoding", "pin-project", @@ -9670,7 +9342,7 @@ dependencies = [ "futures-core", "futures-util", "pin-project-lite", - "sync_wrapper 1.0.2", + "sync_wrapper", "tokio", "tower-layer", "tower-service", @@ -9688,8 +9360,8 @@ dependencies = [ "bytes", "futures-core", "futures-util", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "http-body-util", "http-range-header", "httpdate", @@ -9849,7 +9521,7 @@ dependencies = [ "byteorder", "bytes", "data-encoding", - "http 1.4.0", + "http", "httparse", "log", "rand 0.8.5", @@ -9869,7 +9541,7 @@ checksum = "8628dcc84e5a09eb3d8423d6cb682965dea9133204e8fb3efee74c2a0c259442" dependencies = [ "bytes", "data-encoding", - "http 1.4.0", + "http", "httparse", "log", "rand 0.9.2", @@ -9892,8 +9564,6 @@ dependencies = [ "either", "enum_dispatch", "futures-util", - "reqwest 0.11.27", - "serde", "smallvec", "thiserror 1.0.69", "tokio", @@ -10041,6 +9711,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" +[[package]] +name = "unit-prefix" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3" + [[package]] name = "universal-hash" version = "0.5.1" @@ -10077,6 +9753,42 @@ dependencies = [ "webpki-roots 0.26.11", ] +[[package]] +name = "ureq" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc97a28575b85cfedf2a7e7d3cc64b3e11bd8ac766666318003abbacc7a21fc" +dependencies = [ + "base64 0.22.1", + "cookie_store", + "der", + "flate2", + "log", + "native-tls", + "percent-encoding", + "rustls 0.23.36", + "rustls-pki-types", + "serde", + "serde_json", + "socks", + "ureq-proto", + "utf-8", + "webpki-root-certs", + "webpki-roots 1.0.6", +] + +[[package]] +name = "ureq-proto" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d81f9efa9df032be5934a46a068815a10a042b494b6a58cb0a1a97bb5467ed6f" +dependencies = [ + "base64 0.22.1", + "http", + "httparse", + "log", +] + [[package]] name = "url" version = "2.5.8" @@ -10304,19 +10016,6 @@ dependencies = [ "web-sys", ] -[[package]] -name = "wasm-streams" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d1ec4f6517c9e11ae630e200b2b65d193279042e28edd4a2cda233e46670bbb" -dependencies = [ - "futures-util", - "js-sys", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", -] - [[package]] name = "wasmparser" version = "0.244.0" @@ -10351,9 +10050,9 @@ dependencies = [ [[package]] name = "webpki-root-certs" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36a29fc0408b113f68cf32637857ab740edfafdf460c326cd2afaa2d84cc05dc" +checksum = "804f18a4ac2676ffb4e8b5b5fa9ae38af06df08162314f96a68d2a363e21a8ca" dependencies = [ "rustls-pki-types", ] @@ -10399,6 +10098,27 @@ dependencies = [ "winsafe", ] +[[package]] +name = "whisper-rs" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71ea5d2401f30f51d08126a2d133fee4c1955136519d7ac6cf6f5ac0a91e6bc8" +dependencies = [ + "whisper-rs-sys", +] + +[[package]] +name = "whisper-rs-sys" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e2a6e06e7ac7b8f53c53a5f50bb0bc823ba69b63ecd887339f807a5598bbd2" +dependencies = [ + "bindgen", + "cfg-if", + "cmake", + "fs_extra", +] + [[package]] name = "whoami" version = "1.6.1" @@ -10440,27 +10160,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -[[package]] -name = "windows" -version = "0.62.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "527fadee13e0c05939a6a05d5bd6eec6cd2e3dbd648b9f8e447c6518133d8580" -dependencies = [ - "windows-collections", - "windows-core", - "windows-future", - "windows-numerics", -] - -[[package]] -name = "windows-collections" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23b2d95af1a8a14a3c7367e1ed4fc9c20e0a26e79551b1454d72583c97cc6610" -dependencies = [ - "windows-core", -] - [[package]] name = "windows-core" version = "0.62.2" @@ -10474,17 +10173,6 @@ dependencies = [ "windows-strings 0.5.1", ] -[[package]] -name = "windows-future" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d6f90251fe18a279739e78025bd6ddc52a7e22f921070ccdc67dde84c605cb" -dependencies = [ - "windows-core", - "windows-link 0.2.1", - "windows-threading", -] - [[package]] name = "windows-implement" version = "0.60.2" @@ -10519,16 +10207,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" -[[package]] -name = "windows-numerics" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e2e40844ac143cdb44aead537bbf727de9b044e107a0f1220392177d15b0f26" -dependencies = [ - "windows-core", - "windows-link 0.2.1", -] - [[package]] name = "windows-registry" version = "0.5.3" @@ -10587,15 +10265,6 @@ dependencies = [ "windows-link 0.2.1", ] -[[package]] -name = "windows-sys" -version = "0.45.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" -dependencies = [ - "windows-targets 0.42.2", -] - [[package]] name = "windows-sys" version = "0.48.0" @@ -10641,21 +10310,6 @@ dependencies = [ "windows-link 0.2.1", ] -[[package]] -name = "windows-targets" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - [[package]] name = "windows-targets" version = "0.48.5" @@ -10704,30 +10358,6 @@ dependencies = [ "windows_x86_64_msvc 0.53.1", ] -[[package]] -name = "windows-threading" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37" -dependencies = [ - "windows-link 0.2.1", -] - -[[package]] -name = "windows-version" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4060a1da109b9d0326b7262c8e12c84df67cc0dbc9e33cf49e01ccc2eb63631" -dependencies = [ - "windows-link 0.2.1", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -10746,12 +10376,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" -[[package]] -name = "windows_aarch64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" - [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -10770,12 +10394,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" -[[package]] -name = "windows_i686_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" - [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -10806,12 +10424,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" -[[package]] -name = "windows_i686_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" - [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -10830,12 +10442,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" -[[package]] -name = "windows_x86_64_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" - [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -10854,12 +10460,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" - [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -10878,12 +10478,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" -[[package]] -name = "windows_x86_64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" - [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -10920,16 +10514,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "winreg" -version = "0.50.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" -dependencies = [ - "cfg-if", - "windows-sys 0.48.0", -] - [[package]] name = "winsafe" version = "0.0.19" @@ -11223,18 +10807,6 @@ dependencies = [ "syn 2.0.114", ] -[[package]] -name = "zip" -version = "0.6.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261" -dependencies = [ - "byteorder", - "crc32fast", - "crossbeam-utils", - "flate2", -] - [[package]] name = "zip" version = "2.4.2" diff --git a/Cargo.toml b/Cargo.toml index 62709453b..6834d0c2f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -150,12 +150,16 @@ tempfile = "3" # Prometheus metrics (optional, behind "metrics" feature) prometheus = { version = "0.13", optional = true } +whisper-rs = { version = "0.15", optional = true } +hf-hub = { version = "0.5", optional = true } +symphonia = { version = "0.5", features = ["mp3", "aac", "flac", "ogg", "wav", "isomp4"], optional = true } pdf-extract = "0.10.0" open = "5.3.3" urlencoding = "2.1.3" moka = "0.12.13" [features] +stt-whisper = ["dep:whisper-rs", "dep:hf-hub", "dep:symphonia"] metrics = ["dep:prometheus"] [lints.clippy] diff --git a/src/agent/channel.rs b/src/agent/channel.rs index 7ced52b81..d838a0d25 100644 --- a/src/agent/channel.rs +++ b/src/agent/channel.rs @@ -1793,3 +1793,1236 @@ impl Channel { } } } + + +/// Spawn a branch from a ChannelState. Used by the BranchTool. +pub async fn spawn_branch_from_state( + state: &ChannelState, + description: impl Into, +) -> std::result::Result { + let description = description.into(); + let rc = &state.deps.runtime_config; + let prompt_engine = rc.prompts.load(); + let system_prompt = prompt_engine + .render_branch_prompt( + &rc.instance_dir.display().to_string(), + &rc.workspace_dir.display().to_string(), + ) + .map_err(|e| AgentError::Other(anyhow::anyhow!("{e}")))?; + + spawn_branch( + state, + &description, + &description, + &system_prompt, + &description, + ) + .await +} + +/// Spawn a silent memory persistence branch. +/// +/// Uses the same branching infrastructure as regular branches but with a +/// dedicated prompt focused on memory recall + save. The result is not injected +/// into channel history — the channel handles these branch IDs specially. +async fn spawn_memory_persistence_branch( + state: &ChannelState, + deps: &AgentDeps, +) -> std::result::Result { + let prompt_engine = deps.runtime_config.prompts.load(); + let system_prompt = prompt_engine + .render_static("memory_persistence") + .map_err(|e| AgentError::Other(anyhow::anyhow!("{e}")))?; + let prompt = prompt_engine + .render_system_memory_persistence() + .map_err(|e| AgentError::Other(anyhow::anyhow!("{e}")))?; + + spawn_branch( + state, + "memory persistence", + &prompt, + &system_prompt, + "persisting memories...", + ) + .await +} + +/// Shared branch spawning logic. +/// +/// Checks the branch limit, clones history, creates a Branch, spawns it as +/// a tokio task, and registers it in the channel's active branches and status block. +async fn spawn_branch( + state: &ChannelState, + description: &str, + prompt: &str, + system_prompt: &str, + status_label: &str, +) -> std::result::Result { + let max_branches = **state.deps.runtime_config.max_concurrent_branches.load(); + { + let branches = state.active_branches.read().await; + if branches.len() >= max_branches { + return Err(AgentError::BranchLimitReached { + channel_id: state.channel_id.to_string(), + max: max_branches, + }); + } + } + + let history = { + let h = state.history.read().await; + h.clone() + }; + + let tool_server = crate::tools::create_branch_tool_server( + state.deps.memory_search.clone(), + state.conversation_logger.clone(), + state.channel_store.clone(), + ); + let branch_max_turns = **state.deps.runtime_config.branch_max_turns.load(); + + let branch = Branch::new( + state.channel_id.clone(), + description, + state.deps.clone(), + system_prompt, + history, + tool_server, + branch_max_turns, + ); + + let branch_id = branch.id; + let prompt = prompt.to_owned(); + + let branch_span = tracing::info_span!( + "branch.run", + branch_id = %branch_id, + channel_id = %state.channel_id, + description = %description, + ); + let handle = tokio::spawn( + async move { + if let Err(error) = branch.run(&prompt).await { + tracing::error!(branch_id = %branch_id, %error, "branch failed"); + } + } + .instrument(branch_span), + ); + + { + let mut branches = state.active_branches.write().await; + branches.insert(branch_id, handle); + } + + { + let mut status = state.status_block.write().await; + status.add_branch(branch_id, status_label); + } + + #[cfg(feature = "metrics")] + crate::telemetry::Metrics::global() + .active_branches + .with_label_values(&[&*state.deps.agent_id]) + .inc(); + + state + .deps + .event_tx + .send(crate::ProcessEvent::BranchStarted { + agent_id: state.deps.agent_id.clone(), + branch_id, + channel_id: state.channel_id.clone(), + description: status_label.to_string(), + reply_to_message_id: *state.reply_target_message_id.read().await, + }) + .ok(); + + tracing::info!(branch_id = %branch_id, description = %status_label, "branch spawned"); + + Ok(branch_id) +} + +/// Check whether the channel has capacity for another worker. +async fn check_worker_limit(state: &ChannelState) -> std::result::Result<(), AgentError> { + let max_workers = **state.deps.runtime_config.max_concurrent_workers.load(); + let workers = state.active_workers.read().await; + if workers.len() >= max_workers { + return Err(AgentError::WorkerLimitReached { + channel_id: state.channel_id.to_string(), + max: max_workers, + }); + } + Ok(()) +} + +/// Spawn a worker from a ChannelState. Used by the SpawnWorkerTool. +pub async fn spawn_worker_from_state( + state: &ChannelState, + task: impl Into, + interactive: bool, + skill_name: Option<&str>, +) -> std::result::Result { + check_worker_limit(state).await?; + let task = task.into(); + + let rc = &state.deps.runtime_config; + let prompt_engine = rc.prompts.load(); + let worker_system_prompt = prompt_engine + .render_worker_prompt( + &rc.instance_dir.display().to_string(), + &rc.workspace_dir.display().to_string(), + ) + .map_err(|e| AgentError::Other(anyhow::anyhow!("{e}")))?; + let skills = rc.skills.load(); + let browser_config = (**rc.browser_config.load()).clone(); + let brave_search_key = (**rc.brave_search_key.load()).clone(); + + // Build the worker system prompt, optionally prepending skill instructions + let system_prompt = if let Some(name) = skill_name { + if let Some(skill_prompt) = skills.render_worker_prompt(name, &prompt_engine) { + format!("{}\n\n{}", worker_system_prompt, skill_prompt) + } else { + tracing::warn!(skill = %name, "skill not found, spawning worker without skill context"); + worker_system_prompt + } + } else { + worker_system_prompt + }; + + let worker = if interactive { + let (worker, input_tx) = Worker::new_interactive( + Some(state.channel_id.clone()), + &task, + &system_prompt, + state.deps.clone(), + browser_config.clone(), + state.screenshot_dir.clone(), + brave_search_key.clone(), + state.logs_dir.clone(), + ); + let worker_id = worker.id; + state + .worker_inputs + .write() + .await + .insert(worker_id, input_tx); + worker + } else { + Worker::new( + Some(state.channel_id.clone()), + &task, + &system_prompt, + state.deps.clone(), + browser_config, + state.screenshot_dir.clone(), + brave_search_key, + state.logs_dir.clone(), + ) + }; + + let worker_id = worker.id; + + let worker_span = tracing::info_span!( + "worker.run", + worker_id = %worker_id, + channel_id = %state.channel_id, + task = %task, + ); + let handle = spawn_worker_task( + worker_id, + state.deps.event_tx.clone(), + state.deps.agent_id.clone(), + Some(state.channel_id.clone()), + worker.run().instrument(worker_span), + ); + + state.worker_handles.write().await.insert(worker_id, handle); + + { + let mut status = state.status_block.write().await; + status.add_worker(worker_id, &task, false); + } + + state + .deps + .event_tx + .send(crate::ProcessEvent::WorkerStarted { + agent_id: state.deps.agent_id.clone(), + worker_id, + channel_id: Some(state.channel_id.clone()), + task: task.clone(), + }) + .ok(); + + tracing::info!(worker_id = %worker_id, task = %task, "worker spawned"); + + Ok(worker_id) +} + +/// Spawn an OpenCode-backed worker for coding tasks. +/// +/// Instead of a Rig agent loop, this spawns an OpenCode subprocess that has its +/// own codebase exploration, context management, and tool suite. The worker +/// communicates with OpenCode via HTTP + SSE. +pub async fn spawn_opencode_worker_from_state( + state: &ChannelState, + task: impl Into, + directory: &str, + interactive: bool, +) -> std::result::Result { + check_worker_limit(state).await?; + let task = task.into(); + let directory = std::path::PathBuf::from(directory); + + let rc = &state.deps.runtime_config; + let opencode_config = rc.opencode.load(); + + if !opencode_config.enabled { + return Err(AgentError::Other(anyhow::anyhow!( + "OpenCode workers are not enabled in config" + ))); + } + + let server_pool = rc.opencode_server_pool.clone(); + + let worker = if interactive { + let (worker, input_tx) = crate::opencode::OpenCodeWorker::new_interactive( + Some(state.channel_id.clone()), + state.deps.agent_id.clone(), + &task, + directory, + server_pool, + state.deps.event_tx.clone(), + ); + let worker_id = worker.id; + state + .worker_inputs + .write() + .await + .insert(worker_id, input_tx); + worker + } else { + crate::opencode::OpenCodeWorker::new( + Some(state.channel_id.clone()), + state.deps.agent_id.clone(), + &task, + directory, + server_pool, + state.deps.event_tx.clone(), + ) + }; + + let worker_id = worker.id; + + let worker_span = tracing::info_span!( + "worker.run", + worker_id = %worker_id, + channel_id = %state.channel_id, + task = %task, + worker_type = "opencode", + ); + let handle = spawn_worker_task( + worker_id, + state.deps.event_tx.clone(), + state.deps.agent_id.clone(), + Some(state.channel_id.clone()), + async move { + let result = worker.run().await?; + Ok::(result.result_text) + } + .instrument(worker_span), + ); + + state.worker_handles.write().await.insert(worker_id, handle); + + let opencode_task = format!("[opencode] {task}"); + { + let mut status = state.status_block.write().await; + status.add_worker(worker_id, &opencode_task, false); + } + + state + .deps + .event_tx + .send(crate::ProcessEvent::WorkerStarted { + agent_id: state.deps.agent_id.clone(), + worker_id, + channel_id: Some(state.channel_id.clone()), + task: opencode_task, + }) + .ok(); + + tracing::info!(worker_id = %worker_id, task = %task, "OpenCode worker spawned"); + + Ok(worker_id) +} + +/// Spawn a future as a tokio task that sends a `WorkerComplete` event on completion. +/// +/// Handles both success and error cases, logging failures and sending the +/// appropriate event. Used by both builtin workers and OpenCode workers. +/// Returns the JoinHandle so the caller can store it for cancellation. +fn spawn_worker_task( + worker_id: WorkerId, + event_tx: broadcast::Sender, + agent_id: crate::AgentId, + channel_id: Option, + future: F, +) -> tokio::task::JoinHandle<()> +where + F: std::future::Future> + Send + 'static, + E: std::fmt::Display + Send + 'static, +{ + tokio::spawn(async move { + #[cfg(feature = "metrics")] + let worker_start = std::time::Instant::now(); + + #[cfg(feature = "metrics")] + crate::telemetry::Metrics::global() + .active_workers + .with_label_values(&[&*agent_id]) + .inc(); + + let (result_text, notify) = match future.await { + Ok(text) => (text, true), + Err(error) => { + tracing::error!(worker_id = %worker_id, %error, "worker failed"); + (format!("Worker failed: {error}"), true) + } + }; + #[cfg(feature = "metrics")] + { + let metrics = crate::telemetry::Metrics::global(); + metrics + .active_workers + .with_label_values(&[&*agent_id]) + .dec(); + metrics + .worker_duration_seconds + .with_label_values(&[&*agent_id, "builtin"]) + .observe(worker_start.elapsed().as_secs_f64()); + } + + let _ = event_tx.send(ProcessEvent::WorkerComplete { + agent_id, + worker_id, + channel_id, + result: result_text, + notify, + }); + }) +} + +/// Some models emit tool call syntax as plain text instead of making actual tool calls. +/// When the text starts with a tool-like prefix (e.g. `[reply]`, `(reply)`), try to +/// extract the reply content so we can send it cleanly instead of showing raw JSON. +/// Returns `None` if the text doesn't match or can't be parsed — the caller falls +/// back to sending the original text as-is. +fn extract_reply_from_tool_syntax(text: &str) -> Option { + // Match patterns like "[reply]\n{...}" or "(reply)\n{...}" (with optional whitespace) + let tool_prefixes = [ + "[reply]", + "(reply)", + "[react]", + "(react)", + "[skip]", + "(skip)", + "[branch]", + "(branch)", + "[spawn_worker]", + "(spawn_worker)", + "[route]", + "(route)", + "[cancel]", + "(cancel)", + ]; + + let lower = text.to_lowercase(); + let matched_prefix = tool_prefixes.iter().find(|p| lower.starts_with(*p))?; + let is_reply = matched_prefix.contains("reply"); + let is_skip = matched_prefix.contains("skip"); + + // For skip, just return empty — the user shouldn't see anything + if is_skip { + return Some(String::new()); + } + + // For non-reply tools (react, branch, etc.), suppress entirely + if !is_reply { + return Some(String::new()); + } + + // Try to extract "content" from the JSON payload after the prefix + let rest = text[matched_prefix.len()..].trim(); + if let Ok(parsed) = serde_json::from_str::(rest) + && let Some(content) = parsed.get("content").and_then(|v| v.as_str()) + { + return Some(content.to_string()); + } + + // If we can't parse JSON, the rest might just be the message itself (no JSON wrapper) + if !rest.is_empty() && !rest.starts_with('{') { + return Some(rest.to_string()); + } + + None +} + +/// Format a user message with sender attribution from message metadata. +/// +/// In multi-user channels, this lets the LLM distinguish who said what. +/// System-generated messages (re-triggers) are passed through as-is. +fn format_user_message(raw_text: &str, message: &InboundMessage) -> String { + if message.source == "system" { + return raw_text.to_string(); + } + + // Use platform-formatted author if available, fall back to metadata + let display_name = message + .formatted_author + .as_deref() + .or_else(|| { + message + .metadata + .get("sender_display_name") + .and_then(|v| v.as_str()) + }) + .unwrap_or(&message.sender_id); + + let bot_tag = if message + .metadata + .get("sender_is_bot") + .and_then(|v| v.as_bool()) + .unwrap_or(false) + { + " (bot)" + } else { + "" + }; + + let reply_context = message + .metadata + .get("reply_to_author") + .and_then(|v| v.as_str()) + .map(|author| { + let content_preview = message + .metadata + .get("reply_to_text") + .or_else(|| message.metadata.get("reply_to_content")) + .and_then(|v| v.as_str()) + .unwrap_or(""); + if content_preview.is_empty() { + format!(" (replying to {author})") + } else { + format!(" (replying to {author}: \"{content_preview}\")") + } + }) + .unwrap_or_default(); + + format!("{display_name}{bot_tag}{reply_context}: {raw_text}") +} + +fn extract_discord_message_id(message: &InboundMessage) -> Option { + if message.source != "discord" { + return None; + } + + message + .metadata + .get("discord_message_id") + .and_then(|value| value.as_u64()) +} + +/// Check if a ProcessEvent is targeted at a specific channel. +/// +/// Events from branches and workers carry a channel_id. We only process events +/// that originated from this channel — otherwise broadcast events from one +/// channel's workers would leak into sibling channels (e.g. threads). +fn event_is_for_channel(event: &ProcessEvent, channel_id: &ChannelId) -> bool { + match event { + ProcessEvent::BranchResult { + channel_id: event_channel, + .. + } => event_channel == channel_id, + ProcessEvent::WorkerComplete { + channel_id: event_channel, + .. + } => event_channel.as_ref() == Some(channel_id), + ProcessEvent::WorkerStatus { + channel_id: event_channel, + .. + } => event_channel.as_ref() == Some(channel_id), + // Status block updates, tool events, etc. — match on agent_id which + // is already filtered by the event bus subscription. Let them through. + _ => true, + } +} + +/// Image MIME types we support for vision. +const IMAGE_MIME_PREFIXES: &[&str] = &["image/jpeg", "image/png", "image/gif", "image/webp"]; + +/// Text-based MIME types where we inline the content. +const TEXT_MIME_PREFIXES: &[&str] = &[ + "text/", + "application/json", + "application/xml", + "application/javascript", + "application/typescript", + "application/toml", + "application/yaml", +]; + +/// Download attachments and convert them to LLM-ready UserContent parts. +/// +/// Images become `UserContent::Image` (base64). Text files get inlined. +/// Other file types get a metadata-only description. +async fn download_attachments( + deps: &AgentDeps, + attachments: &[crate::Attachment], +) -> Vec { + let http = deps.llm_manager.http_client(); + let mut parts = Vec::new(); + + for attachment in attachments { + let is_image = IMAGE_MIME_PREFIXES + .iter() + .any(|p| attachment.mime_type.starts_with(p)); + let is_text = TEXT_MIME_PREFIXES + .iter() + .any(|p| attachment.mime_type.starts_with(p)); + + let content = if is_image { + download_image_attachment(http, attachment).await + } else if is_text { + download_text_attachment(http, attachment).await + } else if attachment.mime_type.starts_with("audio/") { + transcribe_audio_attachment(deps, http, attachment).await + } else { + let size_str = attachment + .size_bytes + .map(|s| format!("{:.1} KB", s as f64 / 1024.0)) + .unwrap_or_else(|| "unknown size".into()); + UserContent::text(format!( + "[Attachment: {} ({}, {})]", + attachment.filename, attachment.mime_type, size_str + )) + }; + + parts.push(content); + } + + parts +} + +/// Download an image attachment and encode it as base64 for the LLM. +async fn download_image_attachment( + http: &reqwest::Client, + attachment: &crate::Attachment, +) -> UserContent { + let response = match http.get(&attachment.url).send().await { + Ok(r) => r, + Err(error) => { + tracing::warn!(%error, filename = %attachment.filename, "failed to download image"); + return UserContent::text(format!( + "[Failed to download image: {}]", + attachment.filename + )); + } + }; + + let bytes = match response.bytes().await { + Ok(b) => b, + Err(error) => { + tracing::warn!(%error, filename = %attachment.filename, "failed to read image bytes"); + return UserContent::text(format!( + "[Failed to download image: {}]", + attachment.filename + )); + } + }; + + use base64::Engine as _; + let base64_data = base64::engine::general_purpose::STANDARD.encode(&bytes); + let media_type = ImageMediaType::from_mime_type(&attachment.mime_type); + + tracing::info!( + filename = %attachment.filename, + mime = %attachment.mime_type, + size = bytes.len(), + "downloaded image attachment" + ); + + UserContent::image_base64(base64_data, media_type, None) +} + +/// Download an audio attachment and transcribe it with the configured voice model. +async fn transcribe_audio_attachment( + deps: &AgentDeps, + http: &reqwest::Client, + attachment: &crate::Attachment, +) -> UserContent { + let response = match http.get(&attachment.url).send().await { + Ok(r) => r, + Err(error) => { + tracing::warn!(%error, filename = %attachment.filename, "failed to download audio"); + return UserContent::text(format!( + "[Failed to download audio: {}]", + attachment.filename + )); + } + }; + + let bytes = match response.bytes().await { + Ok(b) => b, + Err(error) => { + tracing::warn!(%error, filename = %attachment.filename, "failed to read audio bytes"); + return UserContent::text(format!( + "[Failed to download audio: {}]", + attachment.filename + )); + } + }; + + tracing::info!( + filename = %attachment.filename, + mime = %attachment.mime_type, + size = bytes.len(), + "downloaded audio attachment" + ); + + let routing = deps.runtime_config.routing.load(); + let voice_model = routing.voice.trim(); + if voice_model.is_empty() { + return UserContent::text(format!( + "[Audio attachment received but no voice model is configured in routing.voice: {}]", + attachment.filename + )); + } + + // Local Whisper backend — bypass the LLM provider path entirely. + #[cfg(feature = "stt-whisper")] + if let Some(model_spec) = voice_model.strip_prefix("whisper-local://") { + let transcript = match crate::stt::transcribe(model_spec, &bytes).await { + Ok(text) if text.is_empty() => { + tracing::warn!(filename = %attachment.filename, "local Whisper returned empty transcript"); + return UserContent::text(format!( + "[Audio transcription returned empty text for {}]", + attachment.filename + )); + } + Ok(text) => text, + Err(error) => { + tracing::warn!(%error, filename = %attachment.filename, "local Whisper transcription failed"); + return UserContent::text(format!( + "[Audio transcription failed for {}: {}]", + attachment.filename, error + )); + } + }; + return UserContent::text(format!( + "\n{}\n", + attachment.filename, attachment.mime_type, transcript + )); + } + + let (provider_id, model_name) = match deps.llm_manager.resolve_model(voice_model) { + Ok(parts) => parts, + Err(error) => { + tracing::warn!(%error, model = %voice_model, "invalid voice model route"); + return UserContent::text(format!( + "[Audio transcription failed for {}: invalid voice model '{}']", + attachment.filename, voice_model + )); + } + }; + + let provider = match deps.llm_manager.get_provider(&provider_id) { + Ok(provider) => provider, + Err(error) => { + tracing::warn!(%error, provider = %provider_id, "voice provider not configured"); + return UserContent::text(format!( + "[Audio transcription failed for {}: provider '{}' is not configured]", + attachment.filename, provider_id + )); + } + }; + + if provider.api_type == ApiType::Anthropic { + return UserContent::text(format!( + "[Audio transcription failed for {}: provider '{}' does not support input_audio on this endpoint]", + attachment.filename, provider_id + )); + } + + let format = audio_format_for_attachment(attachment); + use base64::Engine as _; + let base64_audio = base64::engine::general_purpose::STANDARD.encode(&bytes); + + let endpoint = format!( + "{}/v1/chat/completions", + provider.base_url.trim_end_matches('/') + ); + let body = serde_json::json!({ + "model": model_name, + "messages": [{ + "role": "user", + "content": [ + { + "type": "text", + "text": "Transcribe this audio verbatim. Return only the transcription text." + }, + { + "type": "input_audio", + "input_audio": { + "data": base64_audio, + "format": format, + } + } + ] + }], + "temperature": 0 + }); + + let response = match http + .post(&endpoint) + .header("authorization", format!("Bearer {}", provider.api_key)) + .header("content-type", "application/json") + .json(&body) + .send() + .await + { + Ok(response) => response, + Err(error) => { + tracing::warn!(%error, model = %voice_model, "voice transcription request failed"); + return UserContent::text(format!( + "[Audio transcription failed for {}]", + attachment.filename + )); + } + }; + + let status = response.status(); + let response_body = match response.json::().await { + Ok(body) => body, + Err(error) => { + tracing::warn!(%error, model = %voice_model, "invalid transcription response"); + return UserContent::text(format!( + "[Audio transcription failed for {}]", + attachment.filename + )); + } + }; + + if !status.is_success() { + let message = response_body["error"]["message"] + .as_str() + .unwrap_or("unknown error"); + tracing::warn!( + status = %status, + model = %voice_model, + error = %message, + "voice transcription provider returned error" + ); + return UserContent::text(format!( + "[Audio transcription failed for {}: {}]", + attachment.filename, message + )); + } + + let transcript = extract_transcript_text(&response_body); + if transcript.is_empty() { + tracing::warn!(model = %voice_model, "empty transcription returned"); + return UserContent::text(format!( + "[Audio transcription returned empty text for {}]", + attachment.filename + )); + } + + UserContent::text(format!( + "\n{}\n", + attachment.filename, attachment.mime_type, transcript + )) +} + +fn audio_format_for_attachment(attachment: &crate::Attachment) -> &'static str { + let mime = attachment.mime_type.to_lowercase(); + if mime.contains("mpeg") || mime.contains("mp3") { + return "mp3"; + } + if mime.contains("wav") { + return "wav"; + } + if mime.contains("flac") { + return "flac"; + } + if mime.contains("aac") { + return "aac"; + } + if mime.contains("ogg") { + return "ogg"; + } + if mime.contains("mp4") || mime.contains("m4a") { + return "m4a"; + } + + match attachment + .filename + .rsplit('.') + .next() + .unwrap_or_default() + .to_lowercase() + .as_str() + { + "mp3" => "mp3", + "wav" => "wav", + "flac" => "flac", + "aac" => "aac", + "m4a" | "mp4" => "m4a", + "oga" | "ogg" => "ogg", + _ => "ogg", + } +} + +fn extract_transcript_text(body: &serde_json::Value) -> String { + if let Some(text) = body["choices"][0]["message"]["content"].as_str() { + return text.trim().to_string(); + } + + let Some(parts) = body["choices"][0]["message"]["content"].as_array() else { + return String::new(); + }; + + parts + .iter() + .filter_map(|part| { + if part["type"].as_str() == Some("text") { + part["text"].as_str().map(str::trim) + } else { + None + } + }) + .filter(|text| !text.is_empty()) + .collect::>() + .join("\n") +} + +/// Download a text attachment and inline its content for the LLM. +async fn download_text_attachment( + http: &reqwest::Client, + attachment: &crate::Attachment, +) -> UserContent { + let response = match http.get(&attachment.url).send().await { + Ok(r) => r, + Err(error) => { + tracing::warn!(%error, filename = %attachment.filename, "failed to download text file"); + return UserContent::text(format!( + "[Failed to download file: {}]", + attachment.filename + )); + } + }; + + let content = match response.text().await { + Ok(c) => c, + Err(error) => { + tracing::warn!(%error, filename = %attachment.filename, "failed to read text file"); + return UserContent::text(format!("[Failed to read file: {}]", attachment.filename)); + } + }; + + // Truncate very large files to avoid blowing up context + let truncated = if content.len() > 50_000 { + format!( + "{}...\n[truncated — {} bytes total]", + &content[..50_000], + content.len() + ) + } else { + content + }; + + tracing::info!( + filename = %attachment.filename, + mime = %attachment.mime_type, + "downloaded text attachment" + ); + + UserContent::text(format!( + "\n{}\n", + attachment.filename, attachment.mime_type, truncated + )) +} + +/// Write history back after the agentic loop completes. +/// +/// On success or `MaxTurnsError`, the history Rig built is consistent and safe +/// to keep. On `PromptCancelled` or hard errors, it must be rolled back: +/// +/// - `PromptCancelled`: Rig snapshots history *before* the tool batch runs, so +/// the carried history has the assistant's tool-call message but no tool +/// results. Writing it back leaves a dangling tool-call that poisons every +/// subsequent turn with "tool call result does not follow tool call (2013)". +/// - Hard errors: Rig mutates history in-place and may have appended a +/// tool-call message before the error was raised. +/// +/// `MaxTurnsError` is safe — Rig pushes all tool results into a `User` message +/// before raising it, so history is consistent. +fn apply_history_after_turn( + result: &std::result::Result, + guard: &mut Vec, + history: Vec, + history_len_before: usize, + channel_id: &str, +) { + match result { + Ok(_) | Err(rig::completion::PromptError::MaxTurnsError { .. }) => { + *guard = history; + } + Err(rig::completion::PromptError::PromptCancelled { .. }) | Err(_) => { + tracing::debug!( + channel_id = %channel_id, + rolled_back = history.len().saturating_sub(history_len_before), + "rolling back history after cancelled or failed turn" + ); + guard.truncate(history_len_before); + } + } +} + +#[cfg(test)] +mod tests { + use super::apply_history_after_turn; + use rig::completion::{CompletionError, PromptError}; + use rig::message::Message; + use rig::tool::ToolSetError; + + fn user_msg(text: &str) -> Message { + Message::User { + content: rig::OneOrMany::one(rig::message::UserContent::text(text)), + } + } + + fn assistant_msg(text: &str) -> Message { + Message::Assistant { + id: None, + content: rig::OneOrMany::one(rig::message::AssistantContent::text(text)), + } + } + + fn make_history(msgs: &[&str]) -> Vec { + msgs.iter() + .enumerate() + .map(|(i, text)| { + if i % 2 == 0 { + user_msg(text) + } else { + assistant_msg(text) + } + }) + .collect() + } + + /// On success, the full post-turn history is written back. + #[test] + fn ok_writes_history_back() { + let mut guard = make_history(&["hello"]); + let history = make_history(&["hello", "hi there", "how are you?"]); + let len_before = 1; + + apply_history_after_turn( + &Ok("hi there".to_string()), + &mut guard, + history.clone(), + len_before, + "test", + ); + + assert_eq!(guard, history); + } + + /// MaxTurnsError carries consistent history (tool results included) — write it back. + #[test] + fn max_turns_writes_history_back() { + let mut guard = make_history(&["hello"]); + let history = make_history(&["hello", "hi there", "how are you?"]); + let len_before = 1; + + let err = Err(PromptError::MaxTurnsError { + max_turns: 5, + chat_history: Box::new(history.clone()), + prompt: Box::new(user_msg("prompt")), + }); + + apply_history_after_turn(&err, &mut guard, history.clone(), len_before, "test"); + + assert_eq!(guard, history); + } + + /// PromptCancelled carries history missing tool results — roll back to snapshot. + #[test] + fn prompt_cancelled_rolls_back() { + let initial = make_history(&["hello", "thinking..."]); + let mut guard = initial.clone(); + // Rig appended a tool-call message before cancelling — simulated by + // the longer history passed as `history`. + let mut history = initial.clone(); + history.push(user_msg("[dangling tool-call]")); + let len_before = initial.len(); + + let err = Err(PromptError::PromptCancelled { + chat_history: Box::new(history.clone()), + reason: "reply delivered".to_string(), + }); + + apply_history_after_turn(&err, &mut guard, history, len_before, "test"); + + assert_eq!( + guard, initial, + "history should be rolled back to pre-turn snapshot" + ); + } + + /// Hard completion errors also roll back to prevent dangling tool-calls. + #[test] + fn completion_error_rolls_back() { + let initial = make_history(&["hello", "thinking..."]); + let mut guard = initial.clone(); + let mut history = initial.clone(); + history.push(user_msg("[dangling tool-call]")); + let len_before = initial.len(); + + let err = Err(PromptError::CompletionError( + CompletionError::ResponseError("API error".to_string()), + )); + + apply_history_after_turn(&err, &mut guard, history, len_before, "test"); + + assert_eq!( + guard, initial, + "history should be rolled back after hard error" + ); + } + + /// ToolError (tool not found) rolls back — same catch-all arm as hard errors. + #[test] + fn tool_error_rolls_back() { + let initial = make_history(&["hello", "thinking..."]); + let mut guard = initial.clone(); + let mut history = initial.clone(); + history.push(user_msg("[dangling tool-call]")); + let len_before = initial.len(); + + let err = Err(PromptError::ToolError(ToolSetError::ToolNotFoundError( + "nonexistent_tool".to_string(), + ))); + + apply_history_after_turn(&err, &mut guard, history, len_before, "test"); + + assert_eq!( + guard, initial, + "history should be rolled back after tool error" + ); + } + + /// Rollback on empty history is a no-op and must not panic. + #[test] + fn rollback_on_empty_history_is_noop() { + let mut guard: Vec = vec![]; + let history: Vec = vec![]; + let len_before = 0; + + let err = Err(PromptError::PromptCancelled { + chat_history: Box::new(history.clone()), + reason: "reply delivered".to_string(), + }); + + apply_history_after_turn(&err, &mut guard, history, len_before, "test"); + + assert!( + guard.is_empty(), + "empty history should stay empty after rollback" + ); + } + + /// Rollback when nothing was appended is also a no-op (len unchanged). + #[test] + fn rollback_when_nothing_appended_is_noop() { + let initial = make_history(&["hello", "thinking..."]); + let mut guard = initial.clone(); + // history has same length as before — Rig cancelled before appending anything + let history = initial.clone(); + let len_before = initial.len(); + + let err = Err(PromptError::PromptCancelled { + chat_history: Box::new(history.clone()), + reason: "skip delivered".to_string(), + }); + + apply_history_after_turn(&err, &mut guard, history, len_before, "test"); + + assert_eq!( + guard, initial, + "history should be unchanged when nothing was appended" + ); + } + + /// After rollback, the next turn starts clean with no dangling messages. + #[test] + fn next_turn_is_clean_after_prompt_cancelled() { + let initial = make_history(&["hello", "thinking..."]); + let mut guard = initial.clone(); + let mut poisoned_history = initial.clone(); + poisoned_history.push(user_msg("[dangling tool-call without result]")); + let len_before = initial.len(); + + // First turn: cancelled (reply tool fired) + apply_history_after_turn( + &Err(PromptError::PromptCancelled { + chat_history: Box::new(poisoned_history.clone()), + reason: "reply delivered".to_string(), + }), + &mut guard, + poisoned_history, + len_before, + "test", + ); + + // Second turn: new user message appended, successful response + guard.push(user_msg("follow-up question")); + let len_before2 = guard.len(); + let mut history2 = guard.clone(); + history2.push(assistant_msg("clean response")); + + apply_history_after_turn( + &Ok("clean response".to_string()), + &mut guard, + history2.clone(), + len_before2, + "test", + ); + + assert_eq!( + guard, history2, + "second turn should succeed with clean history" + ); + // Crucially: no dangling tool-call in history + let has_dangling = guard.iter().any(|m| { + if let Message::User { content } = m { + content.iter().any(|c| { + if let rig::message::UserContent::Text(t) = c { + t.text.contains("dangling") + } else { + false + } + }) + } else { + false + } + }); + assert!( + !has_dangling, + "no dangling tool-call messages in history after rollback" + ); + } +} + diff --git a/src/lib.rs b/src/lib.rs index 3557849b0..bb1e7b1b2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,6 +27,8 @@ pub mod tasks; #[cfg(feature = "metrics")] pub mod telemetry; pub mod tools; +#[cfg(feature = "stt-whisper")] +pub mod stt; pub mod update; pub use error::{Error, Result}; diff --git a/src/stt.rs b/src/stt.rs new file mode 100644 index 000000000..2e0e5a8cc --- /dev/null +++ b/src/stt.rs @@ -0,0 +1,275 @@ +//! Local Whisper speech-to-text via whisper-rs. +//! +//! Only compiled when the `stt-whisper` feature is enabled. +//! Exposed as a single async `transcribe` function that lazily loads and caches +//! the model context for the lifetime of the process. + +#[cfg(feature = "stt-whisper")] +pub use local::transcribe; + +#[cfg(feature = "stt-whisper")] +mod local { + use std::sync::OnceLock; + + use hf_hub::api::sync::Api; + use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters}; + + /// Known model size names and their GGML filenames on `ggerganov/whisper.cpp`. + const KNOWN_SIZES: &[(&str, &str)] = &[ + ("tiny", "ggml-tiny.bin"), + ("tiny.en", "ggml-tiny.en.bin"), + ("base", "ggml-base.bin"), + ("base.en", "ggml-base.en.bin"), + ("small", "ggml-small.bin"), + ("small.en", "ggml-small.en.bin"), + ("medium", "ggml-medium.bin"), + ("medium.en", "ggml-medium.en.bin"), + ("large", "ggml-large-v3.bin"), + ("large-v1", "ggml-large-v1.bin"), + ("large-v2", "ggml-large-v2.bin"), + ("large-v3", "ggml-large-v3.bin"), + ]; + + /// Cached (model_spec, WhisperContext) — one per process. + /// + /// If the user changes `routing.voice` at runtime we just keep using the + /// already-loaded model; a restart is required to switch models. + static CONTEXT: OnceLock<(String, WhisperContext)> = OnceLock::new(); + + #[derive(Debug, thiserror::Error)] + pub enum WhisperError { + #[error("model not found and could not be downloaded: {0}")] + ModelNotFound(String), + #[error("hf-hub error: {0}")] + HfHub(String), + #[error("failed to load whisper model: {0}")] + Load(String), + #[error("failed to create whisper state: {0}")] + State(String), + #[error("transcription failed: {0}")] + Transcription(String), + #[error("audio decode error: {0}")] + Decode(String), + } + + /// Transcribe raw audio bytes using the local Whisper model. + /// + /// `model_spec` is the part after `whisper-local://`: + /// - A known size name (`small`, `medium`, `large`, …) — downloaded from HF + /// into the HF cache on first use. + /// - An absolute path (`/path/to/ggml-small.bin`) — loaded directly. + pub async fn transcribe(model_spec: &str, audio: &[u8]) -> Result { + let model_spec = model_spec.to_owned(); + let audio = audio.to_vec(); + + // Whisper inference is CPU-bound and blocking — run on a thread pool. + tokio::task::spawn_blocking(move || transcribe_blocking(&model_spec, &audio)) + .await + .map_err(|e| WhisperError::Transcription(e.to_string()))? + } + + fn transcribe_blocking(model_spec: &str, audio: &[u8]) -> Result { + let ctx = get_or_load_context(model_spec)?; + + let mut state = ctx + .create_state() + .map_err(|e| WhisperError::State(e.to_string()))?; + + let samples = decode_to_f32(audio)?; + + let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 }); + params.set_language(Some("auto")); + params.set_print_progress(false); + params.set_print_realtime(false); + params.set_print_timestamps(false); + + state + .full(params, &samples) + .map_err(|e| WhisperError::Transcription(e.to_string()))?; + + let n = state.full_n_segments(); + let mut parts = Vec::with_capacity(n as usize); + for i in 0..n { + if let Some(segment) = state.get_segment(i) { + if let Ok(text) = segment.to_str() { + let trimmed = text.trim(); + if !trimmed.is_empty() { + parts.push(trimmed.to_owned()); + } + } + } + } + + Ok(parts.join(" ")) + } + + /// Return the cached context, loading it first if necessary. + fn get_or_load_context(model_spec: &str) -> Result<&'static WhisperContext, WhisperError> { + if let Some((_, ctx)) = CONTEXT.get() { + return Ok(ctx); + } + + let model_path = resolve_model_path(model_spec)?; + + tracing::info!(model_path = %model_path, "loading local Whisper model"); + + let params = WhisperContextParameters::default(); + let ctx = WhisperContext::new_with_params(&model_path, params) + .map_err(|e| WhisperError::Load(e.to_string()))?; + + let _ = CONTEXT.set((model_spec.to_owned(), ctx)); + + tracing::info!(model_path = %model_path, "Whisper model loaded and cached"); + + Ok(&CONTEXT.get().unwrap().1) + } + + /// Resolve a model spec to an absolute path on disk, downloading via hf-hub if needed. + fn resolve_model_path(spec: &str) -> Result { + // Absolute path — use directly. + if spec.starts_with('/') { + if std::path::Path::new(spec).exists() { + return Ok(spec.to_owned()); + } + return Err(WhisperError::ModelNotFound(format!( + "model file not found: {spec}" + ))); + } + + // Known size name — fetch via hf-hub (uses HF_HOME cache, downloads if missing). + let filename = KNOWN_SIZES + .iter() + .find(|(name, _)| *name == spec) + .map(|(_, file)| *file) + .ok_or_else(|| { + WhisperError::ModelNotFound(format!( + "unknown model size '{spec}'; use one of: {}", + KNOWN_SIZES + .iter() + .map(|(n, _)| *n) + .collect::>() + .join(", ") + )) + })?; + + tracing::info!(model = %spec, filename = %filename, "fetching Whisper model via hf-hub"); + + let api = Api::new().map_err(|e| WhisperError::HfHub(e.to_string()))?; + let repo = api.model("ggerganov/whisper.cpp".to_owned()); + let path = repo + .get(filename) + .map_err(|e| WhisperError::HfHub(e.to_string()))?; + + Ok(path.to_string_lossy().to_string()) + } + + /// Decode arbitrary audio bytes to 16 kHz mono f32 samples for Whisper. + /// + /// Uses symphonia so ogg/opus, mp3, flac, wav, etc. all work without manual + /// format detection. + fn decode_to_f32(audio: &[u8]) -> Result, WhisperError> { + use symphonia::core::codecs::DecoderOptions; + use symphonia::core::formats::FormatOptions; + use symphonia::core::io::MediaSourceStream; + use symphonia::core::meta::MetadataOptions; + use symphonia::core::probe::Hint; + + let cursor = std::io::Cursor::new(audio.to_vec()); + let mss = MediaSourceStream::new(Box::new(cursor), Default::default()); + + let probed = symphonia::default::get_probe() + .format( + &Hint::new(), + mss, + &FormatOptions::default(), + &MetadataOptions::default(), + ) + .map_err(|e| WhisperError::Decode(e.to_string()))?; + + let mut format = probed.format; + let track = format + .tracks() + .iter() + .find(|t| { + t.codec_params.codec != symphonia::core::codecs::CODEC_TYPE_NULL + }) + .ok_or_else(|| WhisperError::Decode("no audio track found".into()))? + .clone(); + + let mut decoder = symphonia::default::get_codecs() + .make(&track.codec_params, &DecoderOptions::default()) + .map_err(|e| WhisperError::Decode(e.to_string()))?; + + let track_id = track.id; + let sample_rate = track.codec_params.sample_rate.unwrap_or(16000); + let channels = track + .codec_params + .channels + .map(|c| c.count()) + .unwrap_or(1); + + let mut raw_samples: Vec = Vec::new(); + + loop { + let packet = match format.next_packet() { + Ok(p) => p, + Err(symphonia::core::errors::Error::IoError(_)) => break, + Err(symphonia::core::errors::Error::ResetRequired) => break, + Err(e) => return Err(WhisperError::Decode(e.to_string())), + }; + + if packet.track_id() != track_id { + continue; + } + + let decoded = decoder + .decode(&packet) + .map_err(|e| WhisperError::Decode(e.to_string()))?; + + // Convert to f32 mono using a sample-converting audio buffer. + use symphonia::core::audio::{AudioBuffer, Signal as _}; + + let mut f32_buf: AudioBuffer = AudioBuffer::new( + decoded.capacity() as u64, + decoded.spec().clone(), + ); + decoded.convert(&mut f32_buf); + + // Mix down to mono. + let frames = f32_buf.frames(); + for frame in 0..frames { + let mut sum = 0f32; + for ch in 0..channels { + sum += f32_buf.chan(ch)[frame]; + } + raw_samples.push(sum / channels as f32); + } + } + + // Resample to 16 kHz if needed. + if sample_rate != 16000 { + raw_samples = resample(raw_samples, sample_rate, 16000); + } + + Ok(raw_samples) + } + + /// Simple linear resampler (good enough for speech; not for music). + fn resample(samples: Vec, from_hz: u32, to_hz: u32) -> Vec { + if from_hz == to_hz { + return samples; + } + let ratio = from_hz as f64 / to_hz as f64; + let out_len = (samples.len() as f64 / ratio) as usize; + let mut out = Vec::with_capacity(out_len); + for i in 0..out_len { + let pos = i as f64 * ratio; + let idx = pos as usize; + let frac = (pos - idx as f64) as f32; + let a = samples.get(idx).copied().unwrap_or(0.0); + let b = samples.get(idx + 1).copied().unwrap_or(0.0); + out.push(a + frac * (b - a)); + } + out + } +} From 5271a117ad8f81beb41b03df83c1966da66f15f4 Mon Sep 17 00:00:00 2001 From: Marenz Date: Sat, 21 Feb 2026 13:20:17 +0100 Subject: [PATCH 2/4] Enable Vulkan GPU backend and Ogg/Opus decode for local Whisper STT --- Cargo.lock | 32 +++++++++++++++++++++++ Cargo.toml | 6 +++-- src/stt.rs | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 111 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a41956207..1846f49df 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -621,6 +621,17 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "audiopus_sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62314a1546a2064e033665d658e88c620a62904be945f8147e6b16c3db9f8651" +dependencies = [ + "cmake", + "log", + "pkg-config", +] + [[package]] name = "autocfg" version = "1.5.0" @@ -5800,6 +5811,15 @@ dependencies = [ "web-time", ] +[[package]] +name = "ogg" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdab8dcd8d4052eaacaf8fb07a3ccd9a6e26efadb42878a413c68fc4af1dee2b" +dependencies = [ + "byteorder", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -5996,6 +6016,15 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "opus" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d3809943dff6fbad5f0484449ea26bdb9cb7d8efdf26ed50d3c7f227f69eb5c" +dependencies = [ + "audiopus_sys", +] + [[package]] name = "ordered-float" version = "5.1.0" @@ -8032,11 +8061,13 @@ dependencies = [ "mime_guess", "minijinja", "notify", + "ogg", "open", "opentelemetry", "opentelemetry-otlp", "opentelemetry-semantic-conventions", "opentelemetry_sdk", + "opus", "pdf-extract", "pin-project", "prometheus", @@ -10104,6 +10135,7 @@ version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71ea5d2401f30f51d08126a2d133fee4c1955136519d7ac6cf6f5ac0a91e6bc8" dependencies = [ + "libc", "whisper-rs-sys", ] diff --git a/Cargo.toml b/Cargo.toml index 6834d0c2f..9c39b0af9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -150,16 +150,18 @@ tempfile = "3" # Prometheus metrics (optional, behind "metrics" feature) prometheus = { version = "0.13", optional = true } -whisper-rs = { version = "0.15", optional = true } +whisper-rs = { version = "0.15", optional = true, features = ["vulkan"] } hf-hub = { version = "0.5", optional = true } symphonia = { version = "0.5", features = ["mp3", "aac", "flac", "ogg", "wav", "isomp4"], optional = true } +ogg = { version = "0.9", optional = true } +opus = { version = "0.3", optional = true } pdf-extract = "0.10.0" open = "5.3.3" urlencoding = "2.1.3" moka = "0.12.13" [features] -stt-whisper = ["dep:whisper-rs", "dep:hf-hub", "dep:symphonia"] +stt-whisper = ["dep:whisper-rs", "dep:hf-hub", "dep:symphonia", "dep:ogg", "dep:opus"] metrics = ["dep:prometheus"] [lints.clippy] diff --git a/src/stt.rs b/src/stt.rs index 2e0e5a8cc..d07afa400 100644 --- a/src/stt.rs +++ b/src/stt.rs @@ -165,9 +165,13 @@ mod local { /// Decode arbitrary audio bytes to 16 kHz mono f32 samples for Whisper. /// - /// Uses symphonia so ogg/opus, mp3, flac, wav, etc. all work without manual - /// format detection. + /// Ogg/Opus (Telegram voice messages) is handled directly via the `ogg` + + /// `opus` crates. Everything else falls through to symphonia. fn decode_to_f32(audio: &[u8]) -> Result, WhisperError> { + if is_ogg_opus(audio) { + return decode_ogg_opus(audio); + } + use symphonia::core::codecs::DecoderOptions; use symphonia::core::formats::FormatOptions; use symphonia::core::io::MediaSourceStream; @@ -254,6 +258,75 @@ mod local { Ok(raw_samples) } + /// Check if the audio is an Ogg container with an Opus stream. + fn is_ogg_opus(audio: &[u8]) -> bool { + // OggS capture pattern at offset 0, and OpusHead magic at offset 28 + // (first packet of the first logical stream). + audio.starts_with(b"OggS") && audio.len() > 36 && &audio[28..36] == b"OpusHead" + } + + /// Decode Ogg/Opus audio to 16 kHz mono f32 samples. + fn decode_ogg_opus(audio: &[u8]) -> Result, WhisperError> { + use ogg::reading::PacketReader; + + let cursor = std::io::Cursor::new(audio); + let mut reader = PacketReader::new(cursor); + + // Skip the OpusHead and OpusTags header packets. + let mut header_packets = 0; + let mut decoder: Option = None; + let mut sample_rate = 48000u32; + let mut channels = 1usize; + let mut samples: Vec = Vec::new(); + + while let Ok(Some(packet)) = reader.read_packet() { + if header_packets < 2 { + if header_packets == 0 { + // Parse OpusHead to get channel count and pre-skip. + if packet.data.len() >= 11 && &packet.data[0..8] == b"OpusHead" { + channels = packet.data[9] as usize; + // Output sample rate is always 48000 for libopus. + sample_rate = 48000; + } + decoder = Some( + opus::Decoder::new(sample_rate, if channels == 2 { + opus::Channels::Stereo + } else { + opus::Channels::Mono + }) + .map_err(|e| WhisperError::Decode(e.to_string()))?, + ); + } + header_packets += 1; + continue; + } + + let dec = decoder.as_mut().unwrap(); + // Max Opus frame: 120ms at 48kHz = 5760 samples per channel. + let max_samples = 5760 * channels; + let mut pcm = vec![0f32; max_samples]; + let n = dec + .decode_float(&packet.data, &mut pcm, false) + .map_err(|e| WhisperError::Decode(e.to_string()))?; + + // Mix down to mono. + if channels == 1 { + samples.extend_from_slice(&pcm[..n]); + } else { + for frame in 0..n { + let mut sum = 0f32; + for ch in 0..channels { + sum += pcm[frame * channels + ch]; + } + samples.push(sum / channels as f32); + } + } + } + + // Resample from 48 kHz to 16 kHz. + Ok(resample(samples, sample_rate, 16000)) + } + /// Simple linear resampler (good enough for speech; not for music). fn resample(samples: Vec, from_hz: u32, to_hz: u32) -> Vec { if from_hz == to_hz { From c12dc1fc4d1489b9f2220e3177cd3e0e87e2eb52 Mon Sep 17 00:00:00 2001 From: Marenz Date: Sat, 21 Feb 2026 13:36:15 +0100 Subject: [PATCH 3/4] docs: document local Whisper STT backend in README --- README.md | 24 ++++++++++++++++++++++++ src/lib.rs | 4 ++-- src/stt.rs | 29 ++++++++++++----------------- 3 files changed, 38 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index fad6dd1e0..59dfdc786 100644 --- a/README.md +++ b/README.md @@ -197,6 +197,30 @@ channel = "my-provider/my-model" Additional built-in providers include **Kilo Gateway**, **OpenCode Go**, **NVIDIA**, **MiniMax**, **Moonshot AI (Kimi)**, and **Z.AI Coding Plan** — configure with `kilo_key`, `opencode_go_key`, `nvidia_key`, `minimax_key`, `moonshot_key`, or `zai_coding_plan_key` in `[llm]`. +### Voice Transcription + +Audio attachments (voice messages, audio files) are transcribed before being passed to the channel. Set `routing.voice` to choose the backend: + +**Provider-based** — route through any configured LLM provider that supports audio input: + +```toml +[defaults.routing] +voice = "openai/whisper-1" +``` + +**Local Whisper** (`stt-whisper` feature, requires `--features stt-whisper` at build time) — run inference locally via [whisper-rs](https://codeberg.org/tazz4843/whisper-rs), no API call needed: + +```toml +[defaults.routing] +voice = "whisper-local://small" +``` + +The model is downloaded automatically from [`ggerganov/whisper.cpp`](https://huggingface.co/ggerganov/whisper.cpp) on first use and cached in `~/.cache/huggingface/hub`. Supported size names: `tiny`, `tiny.en`, `base`, `base.en`, `small`, `small.en`, `medium`, `medium.en`, `large`, `large-v1`, `large-v2`, `large-v3`. An absolute path to a GGML model file also works. + +GPU acceleration via Vulkan is enabled automatically when a compatible device is detected. The loaded model is cached for the process lifetime — restart to switch models. + +Ogg/Opus audio (Telegram voice messages) is decoded natively. All other formats are handled via symphonia. + ### Skills Extensible skill system integrated with [skills.sh](https://skills.sh): diff --git a/src/lib.rs b/src/lib.rs index bb1e7b1b2..ca5540d82 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,11 +24,11 @@ pub mod secrets; pub mod settings; pub mod skills; pub mod tasks; +#[cfg(feature = "stt-whisper")] +pub mod stt; #[cfg(feature = "metrics")] pub mod telemetry; pub mod tools; -#[cfg(feature = "stt-whisper")] -pub mod stt; pub mod update; pub use error::{Error, Result}; diff --git a/src/stt.rs b/src/stt.rs index d07afa400..14860f11e 100644 --- a/src/stt.rs +++ b/src/stt.rs @@ -194,9 +194,7 @@ mod local { let track = format .tracks() .iter() - .find(|t| { - t.codec_params.codec != symphonia::core::codecs::CODEC_TYPE_NULL - }) + .find(|t| t.codec_params.codec != symphonia::core::codecs::CODEC_TYPE_NULL) .ok_or_else(|| WhisperError::Decode("no audio track found".into()))? .clone(); @@ -206,11 +204,7 @@ mod local { let track_id = track.id; let sample_rate = track.codec_params.sample_rate.unwrap_or(16000); - let channels = track - .codec_params - .channels - .map(|c| c.count()) - .unwrap_or(1); + let channels = track.codec_params.channels.map(|c| c.count()).unwrap_or(1); let mut raw_samples: Vec = Vec::new(); @@ -233,10 +227,8 @@ mod local { // Convert to f32 mono using a sample-converting audio buffer. use symphonia::core::audio::{AudioBuffer, Signal as _}; - let mut f32_buf: AudioBuffer = AudioBuffer::new( - decoded.capacity() as u64, - decoded.spec().clone(), - ); + let mut f32_buf: AudioBuffer = + AudioBuffer::new(decoded.capacity() as u64, decoded.spec().clone()); decoded.convert(&mut f32_buf); // Mix down to mono. @@ -289,11 +281,14 @@ mod local { sample_rate = 48000; } decoder = Some( - opus::Decoder::new(sample_rate, if channels == 2 { - opus::Channels::Stereo - } else { - opus::Channels::Mono - }) + opus::Decoder::new( + sample_rate, + if channels == 2 { + opus::Channels::Stereo + } else { + opus::Channels::Mono + }, + ) .map_err(|e| WhisperError::Decode(e.to_string()))?, ); } From 89b57ea0cb683cb857f0a06a649529dc2dadd39d Mon Sep 17 00:00:00 2001 From: Marenz Date: Mon, 23 Feb 2026 22:01:14 +0100 Subject: [PATCH 4/4] feat(stt): add transcribe_audio worker tool and unify STT dispatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Workers can now call transcribe_audio(path) to transcribe local audio files. The tool uses whatever is configured in routing.voice — local Whisper (whisper-local://) or any OpenAI-compatible HTTP provider. The transcription logic is extracted from channel.rs into stt.rs as transcribe_bytes(), shared by both the channel attachment handler and the new tool. The stt module is now always compiled (not gated on stt-whisper) since it handles all provider paths. --- .../tools/transcribe_audio_description.md.j2 | 1 + src/agent/channel.rs | 225 ++---------------- src/agent/worker.rs | 10 +- src/prompts/text.rs | 3 + src/stt.rs | 188 ++++++++++++++- src/tools.rs | 16 ++ src/tools/transcribe_audio.rs | 117 +++++++++ 7 files changed, 349 insertions(+), 211 deletions(-) create mode 100644 prompts/en/tools/transcribe_audio_description.md.j2 create mode 100644 src/tools/transcribe_audio.rs diff --git a/prompts/en/tools/transcribe_audio_description.md.j2 b/prompts/en/tools/transcribe_audio_description.md.j2 new file mode 100644 index 000000000..b00515fba --- /dev/null +++ b/prompts/en/tools/transcribe_audio_description.md.j2 @@ -0,0 +1 @@ +Transcribe an audio file to text using local speech-to-text. Provide the path to the audio file. Supports ogg, opus, mp3, flac, wav, and m4a formats. Use this instead of external whisper CLI tools. diff --git a/src/agent/channel.rs b/src/agent/channel.rs index d838a0d25..7522827b2 100644 --- a/src/agent/channel.rs +++ b/src/agent/channel.rs @@ -13,6 +13,8 @@ use crate::agent::channel_prompt::{ use crate::agent::compactor::Compactor; use crate::agent::status::StatusBlock; use crate::agent::worker::Worker; + + use crate::conversation::{ChannelStore, ConversationLogger, ProcessRunLogger}; use crate::error::{AgentError, Result}; use crate::hooks::SpacebotHook; @@ -2491,219 +2493,32 @@ async fn transcribe_audio_attachment( ); let routing = deps.runtime_config.routing.load(); - let voice_model = routing.voice.trim(); - if voice_model.is_empty() { - return UserContent::text(format!( - "[Audio attachment received but no voice model is configured in routing.voice: {}]", - attachment.filename - )); - } + let voice_model = routing.voice.clone(); - // Local Whisper backend — bypass the LLM provider path entirely. - #[cfg(feature = "stt-whisper")] - if let Some(model_spec) = voice_model.strip_prefix("whisper-local://") { - let transcript = match crate::stt::transcribe(model_spec, &bytes).await { - Ok(text) if text.is_empty() => { - tracing::warn!(filename = %attachment.filename, "local Whisper returned empty transcript"); - return UserContent::text(format!( - "[Audio transcription returned empty text for {}]", - attachment.filename - )); - } - Ok(text) => text, - Err(error) => { - tracing::warn!(%error, filename = %attachment.filename, "local Whisper transcription failed"); - return UserContent::text(format!( - "[Audio transcription failed for {}: {}]", - attachment.filename, error - )); - } - }; - return UserContent::text(format!( + match crate::stt::transcribe_bytes(&voice_model, &bytes, &attachment.mime_type, &deps.llm_manager, http).await { + Ok(transcript) => UserContent::text(format!( "\n{}\n", attachment.filename, attachment.mime_type, transcript - )); - } - - let (provider_id, model_name) = match deps.llm_manager.resolve_model(voice_model) { - Ok(parts) => parts, - Err(error) => { - tracing::warn!(%error, model = %voice_model, "invalid voice model route"); - return UserContent::text(format!( - "[Audio transcription failed for {}: invalid voice model '{}']", - attachment.filename, voice_model - )); - } - }; - - let provider = match deps.llm_manager.get_provider(&provider_id) { - Ok(provider) => provider, - Err(error) => { - tracing::warn!(%error, provider = %provider_id, "voice provider not configured"); - return UserContent::text(format!( - "[Audio transcription failed for {}: provider '{}' is not configured]", - attachment.filename, provider_id - )); - } - }; - - if provider.api_type == ApiType::Anthropic { - return UserContent::text(format!( - "[Audio transcription failed for {}: provider '{}' does not support input_audio on this endpoint]", - attachment.filename, provider_id - )); - } - - let format = audio_format_for_attachment(attachment); - use base64::Engine as _; - let base64_audio = base64::engine::general_purpose::STANDARD.encode(&bytes); - - let endpoint = format!( - "{}/v1/chat/completions", - provider.base_url.trim_end_matches('/') - ); - let body = serde_json::json!({ - "model": model_name, - "messages": [{ - "role": "user", - "content": [ - { - "type": "text", - "text": "Transcribe this audio verbatim. Return only the transcription text." - }, - { - "type": "input_audio", - "input_audio": { - "data": base64_audio, - "format": format, - } - } - ] - }], - "temperature": 0 - }); - - let response = match http - .post(&endpoint) - .header("authorization", format!("Bearer {}", provider.api_key)) - .header("content-type", "application/json") - .json(&body) - .send() - .await - { - Ok(response) => response, - Err(error) => { - tracing::warn!(%error, model = %voice_model, "voice transcription request failed"); - return UserContent::text(format!( - "[Audio transcription failed for {}]", + )), + Err(crate::stt::SttError::NotConfigured) => UserContent::text(format!( + "[Audio attachment received but no voice model is configured in routing.voice: {}]", + attachment.filename + )), + Err(crate::stt::SttError::EmptyResult) => { + tracing::warn!(filename = %attachment.filename, "transcription returned empty text"); + UserContent::text(format!( + "[Audio transcription returned empty text for {}]", attachment.filename - )); + )) } - }; - - let status = response.status(); - let response_body = match response.json::().await { - Ok(body) => body, Err(error) => { - tracing::warn!(%error, model = %voice_model, "invalid transcription response"); - return UserContent::text(format!( - "[Audio transcription failed for {}]", - attachment.filename - )); + tracing::warn!(%error, filename = %attachment.filename, "audio transcription failed"); + UserContent::text(format!( + "[Audio transcription failed for {}: {}]", + attachment.filename, error + )) } - }; - - if !status.is_success() { - let message = response_body["error"]["message"] - .as_str() - .unwrap_or("unknown error"); - tracing::warn!( - status = %status, - model = %voice_model, - error = %message, - "voice transcription provider returned error" - ); - return UserContent::text(format!( - "[Audio transcription failed for {}: {}]", - attachment.filename, message - )); - } - - let transcript = extract_transcript_text(&response_body); - if transcript.is_empty() { - tracing::warn!(model = %voice_model, "empty transcription returned"); - return UserContent::text(format!( - "[Audio transcription returned empty text for {}]", - attachment.filename - )); } - - UserContent::text(format!( - "\n{}\n", - attachment.filename, attachment.mime_type, transcript - )) -} - -fn audio_format_for_attachment(attachment: &crate::Attachment) -> &'static str { - let mime = attachment.mime_type.to_lowercase(); - if mime.contains("mpeg") || mime.contains("mp3") { - return "mp3"; - } - if mime.contains("wav") { - return "wav"; - } - if mime.contains("flac") { - return "flac"; - } - if mime.contains("aac") { - return "aac"; - } - if mime.contains("ogg") { - return "ogg"; - } - if mime.contains("mp4") || mime.contains("m4a") { - return "m4a"; - } - - match attachment - .filename - .rsplit('.') - .next() - .unwrap_or_default() - .to_lowercase() - .as_str() - { - "mp3" => "mp3", - "wav" => "wav", - "flac" => "flac", - "aac" => "aac", - "m4a" | "mp4" => "m4a", - "oga" | "ogg" => "ogg", - _ => "ogg", - } -} - -fn extract_transcript_text(body: &serde_json::Value) -> String { - if let Some(text) = body["choices"][0]["message"]["content"].as_str() { - return text.trim().to_string(); - } - - let Some(parts) = body["choices"][0]["message"]["content"].as_array() else { - return String::new(); - }; - - parts - .iter() - .filter_map(|part| { - if part["type"].as_str() == Some("text") { - part["text"].as_str().map(str::trim) - } else { - None - } - }) - .filter(|text| !text.is_empty()) - .collect::>() - .join("\n") } /// Download a text attachment and inline its content for the LLM. diff --git a/src/agent/worker.rs b/src/agent/worker.rs index 90c6e2e89..45bca97b6 100644 --- a/src/agent/worker.rs +++ b/src/agent/worker.rs @@ -1,5 +1,7 @@ //! Worker: Independent task execution process. +use std::sync::Arc; + use crate::agent::compactor::estimate_history_tokens; use crate::config::BrowserConfig; use crate::error::Result; @@ -193,6 +195,9 @@ impl Worker { let mcp_tools = self.deps.mcp_manager.get_tools().await; // Create per-worker ToolServer with task tools + let routing = self.deps.runtime_config.routing.load(); + let voice_model = routing.voice.clone(); + let worker_tool_server = crate::tools::create_worker_tool_server( self.deps.agent_id.clone(), self.id, @@ -206,9 +211,10 @@ impl Worker { self.deps.sandbox.clone(), mcp_tools, self.deps.runtime_config.clone(), + voice_model, + Arc::clone(&self.deps.llm_manager), + self.deps.llm_manager.http_client().clone(), ); - - let routing = self.deps.runtime_config.routing.load(); let model_name = routing.resolve(ProcessType::Worker, None).to_string(); let model = SpacebotModel::make(&self.deps.llm_manager, &model_name) .with_context(&*self.deps.agent_id, "worker") diff --git a/src/prompts/text.rs b/src/prompts/text.rs index 177d6132a..f65f3eafe 100644 --- a/src/prompts/text.rs +++ b/src/prompts/text.rs @@ -189,6 +189,9 @@ fn lookup(lang: &str, key: &str) -> &'static str { ("en", "tools/task_update") => { include_str!("../../prompts/en/tools/task_update_description.md.j2") } + ("en", "tools/transcribe_audio") => { + include_str!("../../prompts/en/tools/transcribe_audio_description.md.j2") + } // Fallback: unknown language or key -> try English (lang, key) if lang != "en" => { diff --git a/src/stt.rs b/src/stt.rs index 14860f11e..735fc5bdf 100644 --- a/src/stt.rs +++ b/src/stt.rs @@ -1,12 +1,192 @@ -//! Local Whisper speech-to-text via whisper-rs. +//! Speech-to-text transcription. //! -//! Only compiled when the `stt-whisper` feature is enabled. -//! Exposed as a single async `transcribe` function that lazily loads and caches -//! the model context for the lifetime of the process. +//! Provides a unified `transcribe_bytes` function that dispatches to either: +//! - The local Whisper backend (`whisper-local://`) when the `stt-whisper` +//! feature is enabled. +//! - An OpenAI-compatible HTTP provider (anything else) via `input_audio`. + +use crate::llm::manager::LlmManager; +use crate::config::ApiType; #[cfg(feature = "stt-whisper")] pub use local::transcribe; +/// Unified error type for all STT backends. +#[derive(Debug, thiserror::Error)] +pub enum SttError { + #[error("no voice model configured in routing.voice")] + NotConfigured, + #[error("local Whisper STT is not available in this build")] + WhisperNotBuilt, + #[error("whisper error: {0}")] + #[cfg(feature = "stt-whisper")] + Whisper(#[from] local::WhisperError), + #[error("provider '{0}' is not configured")] + ProviderNotConfigured(String), + #[error("provider '{0}' does not support audio transcription on this endpoint")] + ProviderUnsupported(String), + #[error("invalid voice model spec '{0}': {1}")] + InvalidModel(String, String), + #[error("transcription request failed: {0}")] + Http(String), + #[error("transcription returned empty result")] + EmptyResult, +} + +/// Transcribe raw audio bytes using the configured voice model. +/// +/// `voice_model` is the full value from `routing.voice`, e.g.: +/// - `"whisper-local://small"` — local Whisper +/// - `"openai/whisper-1"` — OpenAI-compatible HTTP provider +/// +/// `mime_type` is used to set the audio format hint for HTTP providers. +pub async fn transcribe_bytes( + voice_model: &str, + audio: &[u8], + mime_type: &str, + llm_manager: &LlmManager, + http: &reqwest::Client, +) -> Result { + let voice_model = voice_model.trim(); + if voice_model.is_empty() { + return Err(SttError::NotConfigured); + } + + // Local Whisper backend. + if let Some(model_spec) = voice_model.strip_prefix("whisper-local://") { + #[cfg(feature = "stt-whisper")] + { + return local::transcribe(model_spec, audio) + .await + .map_err(SttError::Whisper); + } + #[cfg(not(feature = "stt-whisper"))] + { + let _ = (model_spec, audio); + return Err(SttError::WhisperNotBuilt); + } + } + + // HTTP provider path. + let (provider_id, model_name) = llm_manager + .resolve_model(voice_model) + .map_err(|e| SttError::InvalidModel(voice_model.to_string(), e.to_string()))?; + + let provider = llm_manager + .get_provider(&provider_id) + .map_err(|_| SttError::ProviderNotConfigured(provider_id.clone()))?; + + if provider.api_type == ApiType::Anthropic { + return Err(SttError::ProviderUnsupported(provider_id)); + } + + let format = audio_format_for_mime(mime_type); + use base64::Engine as _; + let base64_audio = base64::engine::general_purpose::STANDARD.encode(audio); + + let endpoint = format!( + "{}/v1/chat/completions", + provider.base_url.trim_end_matches('/') + ); + let body = serde_json::json!({ + "model": model_name, + "messages": [{ + "role": "user", + "content": [ + { + "type": "text", + "text": "Transcribe this audio verbatim. Return only the transcription text." + }, + { + "type": "input_audio", + "input_audio": { + "data": base64_audio, + "format": format, + } + } + ] + }], + "temperature": 0 + }); + + let response = http + .post(&endpoint) + .header("authorization", format!("Bearer {}", provider.api_key)) + .header("content-type", "application/json") + .json(&body) + .send() + .await + .map_err(|e| SttError::Http(e.to_string()))?; + + let status = response.status(); + let response_body = response + .json::() + .await + .map_err(|e| SttError::Http(e.to_string()))?; + + if !status.is_success() { + let message = response_body["error"]["message"] + .as_str() + .unwrap_or("unknown error"); + return Err(SttError::Http(format!("{status}: {message}"))); + } + + let transcript = extract_transcript_text(&response_body); + if transcript.is_empty() { + return Err(SttError::EmptyResult); + } + + Ok(transcript) +} + +/// Infer the audio format string from a MIME type. +pub fn audio_format_for_mime(mime_type: &str) -> &'static str { + let mime = mime_type.to_lowercase(); + if mime.contains("mpeg") || mime.contains("mp3") { + return "mp3"; + } + if mime.contains("wav") { + return "wav"; + } + if mime.contains("flac") { + return "flac"; + } + if mime.contains("aac") { + return "aac"; + } + if mime.contains("ogg") { + return "ogg"; + } + if mime.contains("mp4") || mime.contains("m4a") { + return "m4a"; + } + "ogg" +} + +/// Extract the transcript text from an OpenAI-compatible chat completion response. +fn extract_transcript_text(body: &serde_json::Value) -> String { + if let Some(text) = body["choices"][0]["message"]["content"].as_str() { + return text.trim().to_string(); + } + + let Some(parts) = body["choices"][0]["message"]["content"].as_array() else { + return String::new(); + }; + + parts + .iter() + .filter_map(|part| { + if part["type"].as_str() == Some("text") { + part["text"].as_str().map(str::trim) + } else { + None + } + }) + .filter(|text| !text.is_empty()) + .collect::>() + .join("\n") +} + #[cfg(feature = "stt-whisper")] mod local { use std::sync::OnceLock; diff --git a/src/tools.rs b/src/tools.rs index efc37294a..562da9d40 100644 --- a/src/tools.rs +++ b/src/tools.rs @@ -51,6 +51,7 @@ pub mod spawn_worker; pub mod task_create; pub mod task_list; pub mod task_update; +pub mod transcribe_audio; pub mod web_search; pub mod worker_inspect; @@ -96,6 +97,9 @@ pub use spawn_worker::{SpawnWorkerArgs, SpawnWorkerError, SpawnWorkerOutput, Spa pub use task_create::{TaskCreateArgs, TaskCreateError, TaskCreateOutput, TaskCreateTool}; pub use task_list::{TaskListArgs, TaskListError, TaskListOutput, TaskListTool}; pub use task_update::{TaskUpdateArgs, TaskUpdateError, TaskUpdateOutput, TaskUpdateTool}; +pub use transcribe_audio::{ + TranscribeAudioArgs, TranscribeAudioError, TranscribeAudioOutput, TranscribeAudioTool, +}; pub use web_search::{SearchResult, WebSearchArgs, WebSearchError, WebSearchOutput, WebSearchTool}; pub use worker_inspect::{ WorkerInspectArgs, WorkerInspectError, WorkerInspectOutput, WorkerInspectTool, @@ -103,6 +107,7 @@ pub use worker_inspect::{ use crate::agent::channel::ChannelState; use crate::config::{BrowserConfig, RuntimeConfig}; +use crate::llm::manager::LlmManager; use crate::memory::MemorySearch; use crate::sandbox::Sandbox; use crate::tasks::TaskStore; @@ -400,6 +405,9 @@ pub fn create_worker_tool_server( sandbox: Arc, mcp_tools: Vec, runtime_config: Arc, + voice_model: String, + llm_manager: Arc, + http: reqwest::Client, ) -> ToolServerHandle { let mut server = ToolServer::new() .tool(ShellTool::new(workspace.clone(), sandbox.clone())) @@ -423,6 +431,14 @@ pub fn create_worker_tool_server( server = server.tool(SecretSetTool::new(store.clone())); } + if !voice_model.is_empty() { + server = server.tool(TranscribeAudioTool::new( + voice_model, + llm_manager, + http, + )); + } + if browser_config.enabled { server = server.tool(BrowserTool::new(browser_config, screenshot_dir)); } diff --git a/src/tools/transcribe_audio.rs b/src/tools/transcribe_audio.rs new file mode 100644 index 000000000..f9b1194a8 --- /dev/null +++ b/src/tools/transcribe_audio.rs @@ -0,0 +1,117 @@ +//! Transcribe audio tool for workers. +//! +//! Allows workers to transcribe audio files using whatever STT backend is +//! configured in `routing.voice` — local Whisper or an HTTP provider. + +use std::sync::Arc; + +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +use crate::llm::manager::LlmManager; + +/// Tool for transcribing audio files to text. +#[derive(Clone)] +pub struct TranscribeAudioTool { + /// The configured voice model spec (full `routing.voice` value). + voice_model: String, + llm_manager: Arc, + http: reqwest::Client, +} + +impl TranscribeAudioTool { + /// Create a new transcribe audio tool. + pub fn new( + voice_model: impl Into, + llm_manager: Arc, + http: reqwest::Client, + ) -> Self { + Self { + voice_model: voice_model.into(), + llm_manager, + http, + } + } +} + +/// Error type for transcribe audio tool. +#[derive(Debug, thiserror::Error)] +#[error("Audio transcription failed: {0}")] +pub struct TranscribeAudioError(String); + +/// Arguments for transcribe audio tool. +#[derive(Debug, Deserialize, JsonSchema)] +pub struct TranscribeAudioArgs { + /// Path to the audio file to transcribe (absolute or relative to the workspace). + /// Supports ogg, opus, mp3, flac, wav, m4a. + pub path: String, +} + +/// Output from transcribe audio tool. +#[derive(Debug, Serialize)] +pub struct TranscribeAudioOutput { + /// The transcribed text. + pub transcript: String, +} + +impl Tool for TranscribeAudioTool { + const NAME: &'static str = "transcribe_audio"; + + type Error = TranscribeAudioError; + type Args = TranscribeAudioArgs; + type Output = TranscribeAudioOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: crate::prompts::text::get("tools/transcribe_audio").to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "Path to the audio file to transcribe (absolute or relative to the workspace). Supports ogg, opus, mp3, flac, wav, m4a." + } + }, + "required": ["path"] + }), + } + } + + async fn call(&self, args: Self::Args) -> Result { + let audio = tokio::fs::read(&args.path) + .await + .map_err(|e| TranscribeAudioError(format!("failed to read {}: {}", args.path, e)))?; + + // Infer mime type from file extension for the HTTP provider path. + let mime_type = mime_from_path(&args.path); + + let transcript = + crate::stt::transcribe_bytes(&self.voice_model, &audio, mime_type, &self.llm_manager, &self.http) + .await + .map_err(|e| TranscribeAudioError(e.to_string()))?; + + Ok(TranscribeAudioOutput { transcript }) + } +} + +/// Infer a MIME type string from a file path extension. +fn mime_from_path(path: &str) -> &'static str { + match path + .rsplit('.') + .next() + .unwrap_or_default() + .to_lowercase() + .as_str() + { + "mp3" => "audio/mpeg", + "wav" => "audio/wav", + "flac" => "audio/flac", + "aac" => "audio/aac", + "m4a" | "mp4" => "audio/mp4", + "opus" => "audio/opus", + _ => "audio/ogg", + } +}