Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion frontend/app/chat/_components/assistant-message.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ import { motion } from "motion/react";
import DogIcon from "@/components/icons/dog-icon";
import { MarkdownRenderer } from "@/components/markdown-renderer";
import { cn } from "@/lib/utils";
import type { FunctionCall } from "../_types/types";
import type { FunctionCall, TokenUsage as TokenUsageType } from "../_types/types";
import { FunctionCalls } from "./function-calls";
import { Message } from "./message";
import { TokenUsage } from "./token-usage";

interface AssistantMessageProps {
content: string;
Expand All @@ -21,6 +22,7 @@ interface AssistantMessageProps {
animate?: boolean;
delay?: number;
isInitialGreeting?: boolean;
usage?: TokenUsageType;
}

export function AssistantMessage({
Expand All @@ -37,6 +39,7 @@ export function AssistantMessage({
animate = true,
delay = 0.2,
isInitialGreeting = false,
usage,
}: AssistantMessageProps) {
return (
<motion.div
Expand Down Expand Up @@ -135,6 +138,7 @@ export function AssistantMessage({
: content
}
/>
{usage && !isStreaming && <TokenUsage usage={usage} />}
</motion.div>
</div>
</Message>
Expand Down
27 changes: 27 additions & 0 deletions frontend/app/chat/_components/token-usage.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import { Zap } from "lucide-react";
import type { TokenUsage as TokenUsageType } from "../_types/types";

interface TokenUsageProps {
usage: TokenUsageType;
}

export function TokenUsage({ usage }: TokenUsageProps) {
// Guard against partial/malformed usage data
if (typeof usage.input_tokens !== "number" || typeof usage.output_tokens !== "number") {
return null;
}

return (
<div className="flex items-center gap-2 mt-2 text-xs text-muted-foreground">
<Zap className="h-3 w-3" />
<span>
{usage.input_tokens.toLocaleString()} in / {usage.output_tokens.toLocaleString()} out
{usage.input_tokens_details?.cached_tokens ? (
<span className="text-green-500 ml-1">
({usage.input_tokens_details.cached_tokens.toLocaleString()} cached)
</span>
) : null}
</span>
</div>
);
}
13 changes: 13 additions & 0 deletions frontend/app/chat/_types/types.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,23 @@
export interface TokenUsage {
input_tokens: number;
output_tokens: number;
total_tokens: number;
input_tokens_details?: {
cached_tokens?: number;
};
output_tokens_details?: {
reasoning_tokens?: number;
};
}

export interface Message {
role: "user" | "assistant";
content: string;
timestamp: Date;
functionCalls?: FunctionCall[];
isStreaming?: boolean;
source?: "langflow" | "chat";
usage?: TokenUsage;
}

export interface FunctionCall {
Expand Down
13 changes: 13 additions & 0 deletions frontend/app/chat/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,17 @@ function ChatPage() {
} else {
console.log("No function calls found in message");
}

// Extract usage data from response_data
if (msg.response_data && typeof msg.response_data === "object") {
const responseData =
typeof msg.response_data === "string"
? JSON.parse(msg.response_data)
: msg.response_data;
if (responseData.usage) {
message.usage = responseData.usage;
}
}
Comment on lines +507 to +515
Copy link

Copilot AI Feb 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current guard if (msg.response_data && typeof msg.response_data === "object") makes the subsequent typeof msg.response_data === "string" ? JSON.parse(...) branch unreachable, so usage will never be extracted when response_data is actually a string. Also, JSON.parse here can throw and break conversation loading if response_data is non-JSON. Consider widening the guard to accept string | object and wrapping parsing in a try/catch (or a small safe-parse helper) before reading .usage.

Copilot uses AI. Check for mistakes.
}

return message;
Expand Down Expand Up @@ -837,6 +848,7 @@ function ChatPage() {
role: "assistant",
content: result.response,
timestamp: new Date(),
usage: result.usage,
};
setMessages((prev) => [...prev, assistantMessage]);
if (result.response_id) {
Expand Down Expand Up @@ -1152,6 +1164,7 @@ function ChatPage() {
messages.length === 1 &&
message.content === "How can I assist?"
}
usage={message.usage}
/>
</div>
),
Expand Down
7 changes: 7 additions & 0 deletions frontend/hooks/useChatStreaming.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import type {
FunctionCall,
Message,
SelectedFilters,
TokenUsage,
} from "@/app/chat/_types/types";
import { useChat } from "@/contexts/chat-context";

Expand Down Expand Up @@ -130,6 +131,7 @@ export function useChatStreaming({
let currentContent = "";
const currentFunctionCalls: FunctionCall[] = [];
let newResponseId: string | null = null;
let usageData: TokenUsage | undefined;

// Initialize streaming message
if (!controller.signal.aborted && thisStreamId === streamIdRef.current) {
Expand Down Expand Up @@ -448,6 +450,10 @@ export function useChatStreaming({
else if (chunk.type === "response.output_text.delta") {
currentContent += chunk.delta || "";
}
// Handle response.completed event - capture usage
else if (chunk.type === "response.completed" && chunk.response?.usage) {
usageData = chunk.response.usage;
}
// Handle OpenRAG backend format
else if (chunk.output_text) {
currentContent += chunk.output_text;
Expand Down Expand Up @@ -567,6 +573,7 @@ export function useChatStreaming({
currentFunctionCalls.length > 0 ? currentFunctionCalls : undefined,
timestamp: new Date(),
isStreaming: false,
usage: usageData,
};

if (!controller.signal.aborted && thisStreamId === streamIdRef.current) {
Expand Down
28 changes: 28 additions & 0 deletions src/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,18 @@ async def async_response_stream(
sample_data=str(potential_tool_fields)[:500]
)

# Detect response.completed event and log usage
if isinstance(chunk_data, dict) and chunk_data.get("type") == "response.completed":
response_data = chunk_data.get("response", {})
usage = response_data.get("usage")
if usage:
logger.info(
"Stream usage data",
input_tokens=usage.get("input_tokens"),
output_tokens=usage.get("output_tokens"),
total_tokens=usage.get("total_tokens"),
)
Comment on lines +200 to +210
Copy link

Copilot AI Feb 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Logging token usage at info level for every streamed response.completed event can generate a lot of log volume in production and may be inconsistent with nearby per-chunk logging (which is debug). Consider lowering this to debug (or gating behind a feature flag / sampling) to reduce operational noise while still allowing investigation when needed.

Copilot uses AI. Check for mistakes.

# Middleware: Detect implicit tool calls and inject standardized events
# This helps Granite 3.3 8b and other models that don't emit standard markers
if isinstance(chunk_data, dict) and not detected_tool_call:
Expand Down Expand Up @@ -487,6 +499,7 @@ async def async_chat_stream(

full_response = ""
response_id = None
usage_data = None
async for chunk in async_stream(
async_client,
prompt,
Expand All @@ -506,6 +519,10 @@ async def async_chat_stream(
response_id = chunk_data["id"]
elif "response_id" in chunk_data:
response_id = chunk_data["response_id"]
# Capture usage from response.completed event
if chunk_data.get("type") == "response.completed":
response_obj = chunk_data.get("response", {})
usage_data = response_obj.get("usage")
except:
pass
Comment on lines +522 to 527
Copy link

Copilot AI Feb 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The bare except: pass here will also swallow asyncio.CancelledError and any unexpected decoding/parsing errors, which can break cooperative cancellation and make stream issues extremely hard to diagnose. Prefer except Exception as e with at least a debug log, and let CancelledError propagate.

Copilot uses AI. Check for mistakes.
yield chunk
Expand All @@ -518,6 +535,9 @@ async def async_chat_stream(
"response_id": response_id,
"timestamp": datetime.now(),
}
# Store usage data if available (from response.completed event)
if usage_data:
assistant_message["response_data"] = {"usage": usage_data}
conversation_state["messages"].append(assistant_message)

# Store the conversation thread with its response_id
Expand Down Expand Up @@ -676,6 +696,7 @@ async def async_langflow_chat_stream(

full_response = ""
response_id = None
usage_data = None
collected_chunks = [] # Store all chunks for function call data

async for chunk in async_stream(
Expand All @@ -700,6 +721,10 @@ async def async_langflow_chat_stream(
response_id = chunk_data["id"]
elif "response_id" in chunk_data:
response_id = chunk_data["response_id"]
# Capture usage from response.completed event
if chunk_data.get("type") == "response.completed":
response_obj = chunk_data.get("response", {})
usage_data = response_obj.get("usage")
except:
pass
Comment on lines 728 to 729
Copy link

Copilot AI Feb 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same issue as the other stream: except: pass will swallow asyncio.CancelledError and hide JSON decoding problems, which can lead to stuck/cancel-ignoring requests and makes debugging difficult. Prefer catching Exception (and logging) while allowing cancellation to propagate.

Suggested change
except:
pass
except Exception as e:
logger.warning(f"Failed to parse langflow chunk: {e}")

Copilot uses AI. Check for mistakes.
yield chunk
Expand All @@ -713,6 +738,9 @@ async def async_langflow_chat_stream(
"timestamp": datetime.now(),
"chunks": collected_chunks, # Store complete chunk data for function calls
}
# Store usage data if available (from response.completed event)
if usage_data:
assistant_message["response_data"] = {"usage": usage_data}
conversation_state["messages"].append(assistant_message)

# Store the conversation thread with its response_id
Expand Down
9 changes: 7 additions & 2 deletions src/api/v1/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,11 +239,16 @@ async def chat_get_endpoint(request: Request, chat_service, session_manager):
# Transform to public API format
messages = []
for msg in conversation.get("messages", []):
messages.append({
message_data = {
"role": msg.get("role"),
"content": msg.get("content"),
"timestamp": msg.get("timestamp"),
})
}
# Include token usage if available (from Responses API)
usage = msg.get("response_data", {}).get("usage") if isinstance(msg.get("response_data"), dict) else None
Copy link

Copilot AI Feb 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

response_data from Langflow/history may be serialized as a JSON string (the frontend already treats it as possibly-string). This code only extracts usage when response_data is a dict, so usage will be silently omitted for string payloads. Consider normalizing response_data once (e.g., parse JSON strings when possible) and then reading usage from the normalized object; also avoid calling msg.get("response_data") multiple times in the same expression for clarity.

Suggested change
usage = msg.get("response_data", {}).get("usage") if isinstance(msg.get("response_data"), dict) else None
response_data = msg.get("response_data")
if isinstance(response_data, str):
try:
response_data = json.loads(response_data)
except Exception:
# If parsing fails, leave response_data as-is (usage will be omitted)
response_data = None
usage = response_data.get("usage") if isinstance(response_data, dict) else None

Copilot uses AI. Check for mistakes.
if usage:
message_data["usage"] = usage
messages.append(message_data)

response_data = {
"chat_id": conversation.get("response_id"),
Expand Down
Loading