Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/next-release/changeset-service-tier-response.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
"livekit-agents": patch
"livekit-plugins-openai": patch
---

Expose service_tier in CompletionUsage from OpenAI Responses API and Chat Completions
1 change: 1 addition & 0 deletions livekit-agents/livekit/agents/inference/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,7 @@ async def _run(self) -> None:
prompt_tokens=chunk.usage.prompt_tokens,
prompt_cached_tokens=cached_tokens or 0,
total_tokens=chunk.usage.total_tokens,
service_tier=getattr(chunk, "service_tier", None),
),
)
self._event_ch.send_nowait(usage_chunk)
Expand Down
3 changes: 3 additions & 0 deletions livekit-agents/livekit/agents/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ class CompletionUsage(BaseModel):
"""The number of tokens read from the cache."""
total_tokens: int
"""The total number of tokens used (completion + prompt tokens)."""
service_tier: str | None = None
"""The service tier used for processing the request (e.g. 'default', 'priority', 'flex').
Returned by providers that support tiered processing (e.g. OpenAI)."""


class FunctionToolCall(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,7 @@ def _handle_response_completed(self, event: ResponseCompletedEvent) -> llm.ChatC
if usage.input_tokens_details
else 0,
total_tokens=usage.total_tokens,
service_tier=getattr(event.response, "service_tier", None),
),
)
return chunk
Expand Down
Loading