diff --git a/Cargo.lock b/Cargo.lock index f0afc0227e..9e490d4313 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5847,9 +5847,9 @@ dependencies = [ [[package]] name = "reqwest-eventsource" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f03f570355882dd8d15acc3a313841e6e90eddbc76a93c748fd82cc13ba9f51" +checksum = "f529a5ff327743addc322af460761dff5b50e0c826b9e6ac44c3195c50bb2026" dependencies = [ "eventsource-stream", "futures-core", diff --git a/server/bleep/Cargo.toml b/server/bleep/Cargo.toml index 274bd06d2d..df378ae4cd 100644 --- a/server/bleep/Cargo.toml +++ b/server/bleep/Cargo.toml @@ -97,7 +97,7 @@ tower-http = { version = "0.4.4", features = ["auth", "cors", "catch-panic", "fs # api integrations octocrab = { version = "0.25.1", features = ["rustls", "rustls-webpki-tokio"] } reqwest = { version = "0.11.20", features = ["rustls-tls-webpki-roots", "cookies", "gzip"], default-features = false } -reqwest-eventsource = "0.4.0" +reqwest-eventsource = "0.5.0" secrecy = { version = "0.8.0", features = ["serde"] } # file processing diff --git a/server/bleep/src/llm_gateway.rs b/server/bleep/src/llm_gateway.rs index 4cfc0c04ae..e60c20d794 100644 --- a/server/bleep/src/llm_gateway.rs +++ b/server/bleep/src/llm_gateway.rs @@ -187,8 +187,8 @@ impl From<&api::Message> for tiktoken_rs::ChatCompletionRequestMessage { } enum ChatError { - BadRequest, - TooManyRequests, + BadRequest(String), + TooManyRequests(String), Other(anyhow::Error), } @@ -329,23 +329,23 @@ impl Client { let mut delay = INITIAL_DELAY; for _ in 0..self.max_retries { match self.chat_stream_oneshot(messages, functions).await { - Err(ChatError::TooManyRequests) => { + Err(ChatError::TooManyRequests(_)) => { warn!(?delay, "too many LLM requests, retrying with delay..."); tokio::time::sleep(delay).await; delay = Duration::from_millis((delay.as_millis() as f32 * SCALE_FACTOR) as u64); } - Err(ChatError::BadRequest) => { + Err(ChatError::BadRequest(body)) => { // We log the messages in a separate `debug!` statement so that they can be // filtered out, due to their verbosity. debug!("LLM message list: {messages:?}"); - error!("LLM request failed, request not eligible for retry"); - bail!("request not eligible for retry"); + error!("LLM request failed, request not eligible for retry: {body}"); + bail!("request failed (not eligible for retry): {body}"); } Err(ChatError::Other(e)) => { // We log the messages in a separate `debug!` statement so that they can be // filtered out, due to their verbosity. debug!("LLM message list: {messages:?}"); - error!("LLM request failed due to unknown reason: {e}"); + error!("LLM request failed due to unknown reason: {e:?}"); return Err(e); } Ok(stream) => return Ok(stream), @@ -399,20 +399,30 @@ impl Client { match event_source.next().await { Some(Ok(reqwest_eventsource::Event::Open)) => {} - Some(Err(reqwest_eventsource::Error::InvalidStatusCode(status))) + Some(Err(reqwest_eventsource::Error::InvalidStatusCode(status, response))) if status == StatusCode::BAD_REQUEST => { - warn!("bad request to LLM"); - return Err(ChatError::BadRequest); + let body = response + .text() + .await + .map_err(|e| ChatError::Other(anyhow!(e)))?; + warn!("bad request to LLM: {body}"); + return Err(ChatError::BadRequest(body)); } - Some(Err(reqwest_eventsource::Error::InvalidStatusCode(status))) + Some(Err(reqwest_eventsource::Error::InvalidStatusCode(status, response))) if status == StatusCode::TOO_MANY_REQUESTS => { - warn!("too many requests to LLM"); - return Err(ChatError::TooManyRequests); + let body = response + .text() + .await + .map_err(|e| ChatError::Other(anyhow!(e)))?; + warn!("too many requests to LLM: {body}"); + return Err(ChatError::TooManyRequests(body)); } Some(Err(e)) => { - return Err(ChatError::Other(anyhow!("event source error: {:?}", e))); + return Err(ChatError::Other(anyhow!( + "failed to make event source request to answer API: {e}", + ))); } _ => { return Err(ChatError::Other(anyhow!("event source failed to open"))); diff --git a/server/bleep/src/webserver/quota.rs b/server/bleep/src/webserver/quota.rs index 5c0c317a1e..b6e9677a0a 100644 --- a/server/bleep/src/webserver/quota.rs +++ b/server/bleep/src/webserver/quota.rs @@ -50,7 +50,13 @@ async fn get_request Deserialize<'a>>( .map_err(Error::internal)?; if response.status().is_success() { - response.json().await.map_err(Error::internal).map(Json) + let body = response.text().await.map_err(Error::internal)?; + match serde_json::from_str::(&body) { + Ok(t) => Ok(Json(t)), + Err(_) => Err(Error::internal(format!( + "quota call return invalid JSON: {body}" + ))), + } } else { let status = response.status(); match response.text().await {