Skip to content
This repository was archived by the owner on Jan 2, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion server/bleep/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ tower-http = { version = "0.4.4", features = ["auth", "cors", "catch-panic", "fs
# api integrations
octocrab = { version = "0.25.1", features = ["rustls", "rustls-webpki-tokio"] }
reqwest = { version = "0.11.20", features = ["rustls-tls-webpki-roots", "cookies", "gzip"], default-features = false }
reqwest-eventsource = "0.4.0"
reqwest-eventsource = "0.5.0"
secrecy = { version = "0.8.0", features = ["serde"] }

# file processing
Expand Down
38 changes: 24 additions & 14 deletions server/bleep/src/llm_gateway.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,8 @@ impl From<&api::Message> for tiktoken_rs::ChatCompletionRequestMessage {
}

enum ChatError {
BadRequest,
TooManyRequests,
BadRequest(String),
TooManyRequests(String),
Other(anyhow::Error),
}

Expand Down Expand Up @@ -329,23 +329,23 @@ impl Client {
let mut delay = INITIAL_DELAY;
for _ in 0..self.max_retries {
match self.chat_stream_oneshot(messages, functions).await {
Err(ChatError::TooManyRequests) => {
Err(ChatError::TooManyRequests(_)) => {
warn!(?delay, "too many LLM requests, retrying with delay...");
tokio::time::sleep(delay).await;
delay = Duration::from_millis((delay.as_millis() as f32 * SCALE_FACTOR) as u64);
}
Err(ChatError::BadRequest) => {
Err(ChatError::BadRequest(body)) => {
// We log the messages in a separate `debug!` statement so that they can be
// filtered out, due to their verbosity.
debug!("LLM message list: {messages:?}");
error!("LLM request failed, request not eligible for retry");
bail!("request not eligible for retry");
error!("LLM request failed, request not eligible for retry: {body}");
bail!("request failed (not eligible for retry): {body}");
}
Err(ChatError::Other(e)) => {
// We log the messages in a separate `debug!` statement so that they can be
// filtered out, due to their verbosity.
debug!("LLM message list: {messages:?}");
error!("LLM request failed due to unknown reason: {e}");
error!("LLM request failed due to unknown reason: {e:?}");
return Err(e);
}
Ok(stream) => return Ok(stream),
Expand Down Expand Up @@ -399,20 +399,30 @@ impl Client {

match event_source.next().await {
Some(Ok(reqwest_eventsource::Event::Open)) => {}
Some(Err(reqwest_eventsource::Error::InvalidStatusCode(status)))
Some(Err(reqwest_eventsource::Error::InvalidStatusCode(status, response)))
if status == StatusCode::BAD_REQUEST =>
{
warn!("bad request to LLM");
return Err(ChatError::BadRequest);
let body = response
.text()
.await
.map_err(|e| ChatError::Other(anyhow!(e)))?;
warn!("bad request to LLM: {body}");
return Err(ChatError::BadRequest(body));
}
Some(Err(reqwest_eventsource::Error::InvalidStatusCode(status)))
Some(Err(reqwest_eventsource::Error::InvalidStatusCode(status, response)))
if status == StatusCode::TOO_MANY_REQUESTS =>
{
warn!("too many requests to LLM");
return Err(ChatError::TooManyRequests);
let body = response
.text()
.await
.map_err(|e| ChatError::Other(anyhow!(e)))?;
warn!("too many requests to LLM: {body}");
return Err(ChatError::TooManyRequests(body));
}
Some(Err(e)) => {
return Err(ChatError::Other(anyhow!("event source error: {:?}", e)));
return Err(ChatError::Other(anyhow!(
"failed to make event source request to answer API: {e}",
)));
}
_ => {
return Err(ChatError::Other(anyhow!("event source failed to open")));
Expand Down
8 changes: 7 additions & 1 deletion server/bleep/src/webserver/quota.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,13 @@ async fn get_request<T: for<'a> Deserialize<'a>>(
.map_err(Error::internal)?;

if response.status().is_success() {
response.json().await.map_err(Error::internal).map(Json)
let body = response.text().await.map_err(Error::internal)?;
match serde_json::from_str::<T>(&body) {
Ok(t) => Ok(Json(t)),
Err(_) => Err(Error::internal(format!(
"quota call return invalid JSON: {body}"
))),
}
} else {
let status = response.status();
match response.text().await {
Expand Down