Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 14 additions & 29 deletions src/agentevals/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

from .extraction import (
extract_agent_response_from_attrs,
extract_tool_call_from_span,
extract_tool_result_from_span,
extract_user_text_from_attrs,
get_extractor,
parse_json,
Expand All @@ -30,11 +32,7 @@
ADK_LLM_REQUEST,
ADK_LLM_RESPONSE,
ADK_SCOPE_VALUE,
ADK_TOOL_CALL_ARGS,
ADK_TOOL_RESPONSE,
OTEL_GENAI_AGENT_NAME,
OTEL_GENAI_TOOL_CALL_ID,
OTEL_GENAI_TOOL_NAME,
OTEL_SCOPE,
)

Expand Down Expand Up @@ -223,37 +221,24 @@ def _extract_tool_trajectory(
def _extract_from_tool_span(
tool_span: Span,
) -> tuple[genai_types.FunctionCall | None, genai_types.FunctionResponse | None]:
tool_name = tool_span.get_tag(OTEL_GENAI_TOOL_NAME)
tool_call_id = tool_span.get_tag(OTEL_GENAI_TOOL_CALL_ID)

if not tool_name:
# Fallback: parse tool name from operationName "execute_tool <name>"
op = tool_span.operation_name
if op.startswith("execute_tool "):
tool_name = op[len("execute_tool ") :]
else:
logger.warning("execute_tool span %s: no tool name found", tool_span.span_id)
return None, None

args_raw = tool_span.get_tag(ADK_TOOL_CALL_ARGS, "{}")
args = parse_json(args_raw)
tool_call = extract_tool_call_from_span(tool_span)
if tool_call is None:
logger.warning("execute_tool span %s: no tool name found", tool_span.span_id)
return None, None

fc = genai_types.FunctionCall(
name=tool_name,
args=args if args else {},
id=tool_call_id,
name=tool_call["name"],
args=tool_call["args"],
id=tool_call["id"],
)

response_raw = tool_span.get_tag(ADK_TOOL_RESPONSE)
tool_result = extract_tool_result_from_span(tool_span)
fr = None
if response_raw:
response_data = parse_json(response_raw)
# Response format varies: MCP uses {"content": [...], "isError": false},
# other tools return flat dicts. We pass through as-is.
if tool_result:
fr = genai_types.FunctionResponse(
name=tool_name,
response=response_data if response_data else {},
id=tool_call_id,
name=tool_call["name"],
response=tool_result["response"],
id=tool_call["id"],
)
Comment on lines +235 to 242
Copy link

Copilot AI Mar 31, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extract_tool_result_from_span() can currently return None for an empty-but-present tool response (e.g. raw attribute value "{}"), because extract_tool_result_from_attrs() treats an empty parsed dict as falsy. This means _extract_from_tool_span() may omit a FunctionResponse that would previously have been emitted whenever gcp.vertex.agent.tool_response was present. Consider fixing the shared extractor (preferred) or handling the empty-dict case so ADK conversions don’t silently drop tool responses.

Copilot uses AI. Check for mistakes.

return fc, fr
Expand Down
25 changes: 24 additions & 1 deletion src/agentevals/extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def extract_tool_result_from_attrs(attrs: dict[str, Any]) -> dict[str, Any] | No

if raw:
parsed = parse_tool_response_content(raw)
if parsed:
if isinstance(parsed, dict):
is_error = bool(parsed.get("isError", False))
return {"response": parsed, "isError": is_error}

Expand Down Expand Up @@ -235,6 +235,29 @@ def extract_tool_result_from_attrs(attrs: dict[str, Any]) -> dict[str, Any] | No
return None


# ---------------------------------------------------------------------------
# Span-level convenience wrappers
# ---------------------------------------------------------------------------


def extract_tool_call_from_span(span: Span) -> dict[str, Any] | None:
"""Extract tool call info from a Span object.

Delegates to extract_tool_call_from_attrs using the span's tags, operation
name, and span ID. Returns {"id", "name", "args"} or None.
"""
return extract_tool_call_from_attrs(span.tags, span.operation_name, span.span_id)


def extract_tool_result_from_span(span: Span) -> dict[str, Any] | None:
"""Extract tool result from a Span object.

Delegates to extract_tool_result_from_attrs using the span's tags.
Returns {"response": dict, "isError": bool} or None.
"""
return extract_tool_result_from_attrs(span.tags)


# ---------------------------------------------------------------------------
# Span classification helpers
# ---------------------------------------------------------------------------
Expand Down
77 changes: 19 additions & 58 deletions src/agentevals/genai_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,21 @@
from .extraction import (
GenAIExtractor,
extract_agent_response_from_attrs,
extract_tool_call_from_span,
extract_tool_result_from_span,
extract_user_text_from_attrs,
is_invocation_span,
is_llm_span,
parse_tool_response_content,
)
from .loader.base import Span, Trace
from .trace_attrs import (
OTEL_GENAI_INPUT_MESSAGES,
OTEL_GENAI_OUTPUT_MESSAGES,
OTEL_GENAI_TOOL_CALL_ARGUMENTS,
OTEL_GENAI_TOOL_CALL_ID,
OTEL_GENAI_TOOL_CALL_RESULT,
OTEL_GENAI_TOOL_NAME,
)
from .utils.genai_messages import (
ASSISTANT_ROLES,
USER_ROLES,
extract_text_from_message,
extract_tool_call_args_from_messages,
extract_tool_calls_from_message,
parse_json_attr,
)
Expand Down Expand Up @@ -385,68 +381,33 @@ def _extract_tool_calls(
tool_responses: list[_ToolResponse] = []

for tool_span in tool_spans:
tool_name = tool_span.get_tag(OTEL_GENAI_TOOL_NAME)
if not tool_name:
logger.warning(f"Tool span missing gen_ai.tool.name: {tool_span.operation_name}")
tool_call = extract_tool_call_from_span(tool_span)
if tool_call is None:
logger.warning(f"Tool span missing tool name: {tool_span.operation_name}")
continue

tool_call_id = tool_span.get_tag(OTEL_GENAI_TOOL_CALL_ID)
# The shared function returns span_id or "unknown" as fallback IDs.
# For dedup against LLM-message tool calls, only real IDs are usable.
tc_id = tool_call["id"]
is_placeholder = tc_id in ("unknown", tool_span.span_id)
dedup_id = None if is_placeholder else tc_id

args_raw = tool_span.get_tag(OTEL_GENAI_TOOL_CALL_ARGUMENTS, "{}")
args = parse_json_attr(args_raw, "gen_ai.tool.call.arguments")
if not isinstance(args, dict):
args = {}

if not args:
input_msgs_raw = tool_span.get_tag(OTEL_GENAI_INPUT_MESSAGES)
if input_msgs_raw:
args, _ = extract_tool_call_args_from_messages(input_msgs_raw, tool_name)

tc = _ToolCall(name=tool_name, args=args, id=tool_call_id)
if tool_call_id:
tool_calls_by_id[tool_call_id] = tc
tc = _ToolCall(name=tool_call["name"], args=tool_call["args"], id=dedup_id)
if dedup_id:
tool_calls_by_id[dedup_id] = tc
else:
tool_calls_no_id.append(tc)

result_raw = tool_span.get_tag(OTEL_GENAI_TOOL_CALL_RESULT)
if result_raw:
result_data = parse_tool_response_content(result_raw)
logger.debug(f"Tool {tool_name} result: {str(result_data)[:100]}")
tool_result = extract_tool_result_from_span(tool_span)
if tool_result:
logger.debug(f"Tool {tool_call['name']} result: {str(tool_result['response'])[:100]}")
tool_responses.append(
_ToolResponse(
name=tool_name,
response=result_data,
id=tool_call_id,
name=tool_call["name"],
response=tool_result["response"],
id=dedup_id,
)
)
Comment on lines +401 to 410
Copy link

Copilot AI Mar 31, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Switching to extract_tool_result_from_span() changes behavior for empty JSON tool results. extract_tool_result_from_attrs() currently returns None when the parsed response is an empty dict (because it checks if parsed:), so a tool span with gen_ai.tool.call.result == "{}" (or an empty dict response) will now produce no _ToolResponse, whereas the previous code would still record an empty response. Consider adjusting the shared extractor to treat the presence of the raw attribute as a result even if the parsed dict is empty, or otherwise ensure empty-but-present results aren’t dropped here.

Copilot uses AI. Check for mistakes.
else:
output_msgs_raw = tool_span.get_tag(OTEL_GENAI_OUTPUT_MESSAGES)
if output_msgs_raw:
output_msgs = parse_json_attr(output_msgs_raw, "gen_ai.output.messages")
if isinstance(output_msgs, list):
for msg in output_msgs:
if not isinstance(msg, dict):
continue
for part in msg.get("parts", []):
if not isinstance(part, dict):
continue
if part.get("type") == "tool_call_response" and "response" in part:
resp = part["response"]
if isinstance(resp, list):
texts = [t.get("text", "") for t in resp if isinstance(t, dict) and "text" in t]
result_data = parse_tool_response_content(" ".join(texts))
elif isinstance(resp, dict):
result_data = resp
else:
result_data = {"result": str(resp)}
tool_responses.append(
_ToolResponse(
name=tool_name,
response=result_data,
id=tool_call_id,
)
)
break

if llm_spans:
for llm_span in llm_spans:
Expand Down