-
Notifications
You must be signed in to change notification settings - Fork 399
feat: display llm usage data #784
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8c26e03
38072b2
08f675c
5b44aac
2e4b575
79b4a23
71eb413
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| import { Zap } from "lucide-react"; | ||
| import type { TokenUsage as TokenUsageType } from "../_types/types"; | ||
|
|
||
| interface TokenUsageProps { | ||
| usage: TokenUsageType; | ||
| } | ||
|
|
||
| export function TokenUsage({ usage }: TokenUsageProps) { | ||
| // Guard against partial/malformed usage data | ||
| if (typeof usage.input_tokens !== "number" || typeof usage.output_tokens !== "number") { | ||
| return null; | ||
| } | ||
|
|
||
| return ( | ||
| <div className="flex items-center gap-2 mt-2 text-xs text-muted-foreground"> | ||
| <Zap className="h-3 w-3" /> | ||
| <span> | ||
| {usage.input_tokens.toLocaleString()} in / {usage.output_tokens.toLocaleString()} out | ||
| {usage.input_tokens_details?.cached_tokens ? ( | ||
| <span className="text-green-500 ml-1"> | ||
| ({usage.input_tokens_details.cached_tokens.toLocaleString()} cached) | ||
| </span> | ||
| ) : null} | ||
| </span> | ||
| </div> | ||
| ); | ||
| } |
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -197,6 +197,18 @@ async def async_response_stream( | |||||||||
| sample_data=str(potential_tool_fields)[:500] | ||||||||||
| ) | ||||||||||
|
|
||||||||||
| # Detect response.completed event and log usage | ||||||||||
| if isinstance(chunk_data, dict) and chunk_data.get("type") == "response.completed": | ||||||||||
| response_data = chunk_data.get("response", {}) | ||||||||||
| usage = response_data.get("usage") | ||||||||||
| if usage: | ||||||||||
| logger.info( | ||||||||||
| "Stream usage data", | ||||||||||
| input_tokens=usage.get("input_tokens"), | ||||||||||
| output_tokens=usage.get("output_tokens"), | ||||||||||
| total_tokens=usage.get("total_tokens"), | ||||||||||
| ) | ||||||||||
|
Comment on lines
+200
to
+210
|
||||||||||
|
|
||||||||||
| # Middleware: Detect implicit tool calls and inject standardized events | ||||||||||
| # This helps Granite 3.3 8b and other models that don't emit standard markers | ||||||||||
| if isinstance(chunk_data, dict) and not detected_tool_call: | ||||||||||
|
|
@@ -487,6 +499,7 @@ async def async_chat_stream( | |||||||||
|
|
||||||||||
| full_response = "" | ||||||||||
| response_id = None | ||||||||||
| usage_data = None | ||||||||||
| async for chunk in async_stream( | ||||||||||
| async_client, | ||||||||||
| prompt, | ||||||||||
|
|
@@ -506,6 +519,10 @@ async def async_chat_stream( | |||||||||
| response_id = chunk_data["id"] | ||||||||||
| elif "response_id" in chunk_data: | ||||||||||
| response_id = chunk_data["response_id"] | ||||||||||
| # Capture usage from response.completed event | ||||||||||
| if chunk_data.get("type") == "response.completed": | ||||||||||
| response_obj = chunk_data.get("response", {}) | ||||||||||
| usage_data = response_obj.get("usage") | ||||||||||
| except: | ||||||||||
| pass | ||||||||||
|
Comment on lines
+522
to
527
|
||||||||||
| yield chunk | ||||||||||
|
|
@@ -518,6 +535,9 @@ async def async_chat_stream( | |||||||||
| "response_id": response_id, | ||||||||||
| "timestamp": datetime.now(), | ||||||||||
| } | ||||||||||
| # Store usage data if available (from response.completed event) | ||||||||||
| if usage_data: | ||||||||||
| assistant_message["response_data"] = {"usage": usage_data} | ||||||||||
| conversation_state["messages"].append(assistant_message) | ||||||||||
|
|
||||||||||
| # Store the conversation thread with its response_id | ||||||||||
|
|
@@ -676,6 +696,7 @@ async def async_langflow_chat_stream( | |||||||||
|
|
||||||||||
| full_response = "" | ||||||||||
| response_id = None | ||||||||||
| usage_data = None | ||||||||||
| collected_chunks = [] # Store all chunks for function call data | ||||||||||
|
|
||||||||||
| async for chunk in async_stream( | ||||||||||
|
|
@@ -700,6 +721,10 @@ async def async_langflow_chat_stream( | |||||||||
| response_id = chunk_data["id"] | ||||||||||
| elif "response_id" in chunk_data: | ||||||||||
| response_id = chunk_data["response_id"] | ||||||||||
| # Capture usage from response.completed event | ||||||||||
| if chunk_data.get("type") == "response.completed": | ||||||||||
| response_obj = chunk_data.get("response", {}) | ||||||||||
| usage_data = response_obj.get("usage") | ||||||||||
| except: | ||||||||||
| pass | ||||||||||
|
Comment on lines
728
to
729
|
||||||||||
| except: | |
| pass | |
| except Exception as e: | |
| logger.warning(f"Failed to parse langflow chunk: {e}") |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -239,11 +239,16 @@ async def chat_get_endpoint(request: Request, chat_service, session_manager): | |||||||||||||||||||
| # Transform to public API format | ||||||||||||||||||||
| messages = [] | ||||||||||||||||||||
| for msg in conversation.get("messages", []): | ||||||||||||||||||||
| messages.append({ | ||||||||||||||||||||
| message_data = { | ||||||||||||||||||||
| "role": msg.get("role"), | ||||||||||||||||||||
| "content": msg.get("content"), | ||||||||||||||||||||
| "timestamp": msg.get("timestamp"), | ||||||||||||||||||||
| }) | ||||||||||||||||||||
| } | ||||||||||||||||||||
| # Include token usage if available (from Responses API) | ||||||||||||||||||||
| usage = msg.get("response_data", {}).get("usage") if isinstance(msg.get("response_data"), dict) else None | ||||||||||||||||||||
|
||||||||||||||||||||
| usage = msg.get("response_data", {}).get("usage") if isinstance(msg.get("response_data"), dict) else None | |
| response_data = msg.get("response_data") | |
| if isinstance(response_data, str): | |
| try: | |
| response_data = json.loads(response_data) | |
| except Exception: | |
| # If parsing fails, leave response_data as-is (usage will be omitted) | |
| response_data = None | |
| usage = response_data.get("usage") if isinstance(response_data, dict) else None |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The current guard
if (msg.response_data && typeof msg.response_data === "object")makes the subsequenttypeof msg.response_data === "string" ? JSON.parse(...)branch unreachable, so usage will never be extracted whenresponse_datais actually a string. Also,JSON.parsehere can throw and break conversation loading ifresponse_datais non-JSON. Consider widening the guard to acceptstring | objectand wrapping parsing in a try/catch (or a small safe-parse helper) before reading.usage.