Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions src/agentevals/extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,43 @@

FORMAT_DETECTION_SPAN_LIMIT = 10


def _parse_legacy_indexed_attrs(attrs: dict[str, Any], prefix: str) -> list[dict]:
"""Parse flat gen_ai.{prefix}.N.* attributes into a message list."""
messages: dict[int, dict] = {}
for key, value in attrs.items():
if not key.startswith(prefix):
continue
rest = key[len(prefix):]
parts = rest.split(".", 1)
if not parts[0].isdigit():
continue
idx = int(parts[0])
msg = messages.setdefault(idx, {})
if len(parts) < 2:
continue
field = parts[1]
if field == "role":
msg["role"] = value
elif field == "content":
msg["content"] = value
elif field.startswith("tool_calls."):
tc_rest = field[len("tool_calls."):]
tc_parts = tc_rest.split(".", 1)
if not tc_parts[0].isdigit() or len(tc_parts) < 2:
continue
tc_map = msg.setdefault("_tc", {})
tc_map.setdefault(int(tc_parts[0]), {})[tc_parts[1]] = value
result = []
for idx in sorted(messages):
msg = messages[idx].copy()
tc_map = msg.pop("_tc", {})
if tc_map:
msg["tool_calls"] = [tc_map[i] for i in sorted(tc_map)]
result.append(msg)
return result


# ---------------------------------------------------------------------------
# Pure extraction functions (operate on flat attribute dicts)
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -92,6 +129,12 @@ def extract_user_text_from_attrs(attrs: dict[str, Any]) -> str | None:
if text:
return text

for msg in reversed(_parse_legacy_indexed_attrs(attrs, "gen_ai.prompt.")):
if msg.get("role") in USER_ROLES:
text = extract_text_from_message(msg)
if text:
return text

return None


Expand All @@ -118,6 +161,12 @@ def extract_agent_response_from_attrs(attrs: dict[str, Any]) -> str | None:
if text:
return text

for msg in reversed(_parse_legacy_indexed_attrs(attrs, "gen_ai.completion.")):
if msg.get("role") in ASSISTANT_ROLES:
text = extract_text_from_message(msg)
if text:
return text

return None


Expand Down
50 changes: 50 additions & 0 deletions tests/test_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -802,3 +802,53 @@ def test_absent_type_and_description(self):
result = extract_tool_call_from_attrs(attrs)
assert "type" not in result
assert "description" not in result


# ---------------------------------------------------------------------------
# Legacy gen_ai.prompt.* / gen_ai.completion.* attributes (Ollama style)
# ---------------------------------------------------------------------------


class TestLegacyGenAIAttributes:
def test_user_text_from_legacy_prompt(self):
attrs = {
"gen_ai.prompt.0.role": "user",
"gen_ai.prompt.0.content": "Hi! Can you help me?",
"gen_ai.request.model": "llama3.2:3b",
}
assert extract_user_text_from_attrs(attrs) == "Hi! Can you help me?"

def test_user_text_prefers_last_user_in_legacy_prompt(self):
attrs = {
"gen_ai.prompt.0.role": "user",
"gen_ai.prompt.0.content": "First message",
"gen_ai.prompt.1.role": "assistant",
"gen_ai.prompt.1.content": "Response",
"gen_ai.prompt.2.role": "user",
"gen_ai.prompt.2.content": "Follow-up",
}
assert extract_user_text_from_attrs(attrs) == "Follow-up"

def test_agent_response_from_legacy_completion(self):
attrs = {
"gen_ai.completion.0.role": "assistant",
"gen_ai.completion.0.content": "You rolled a 4 on a 6-sided die.",
"gen_ai.request.model": "llama3.2:3b",
}
assert extract_agent_response_from_attrs(attrs) == "You rolled a 4 on a 6-sided die."

def test_legacy_prompt_ignored_when_standard_attr_present(self):
attrs = {
OTEL_GENAI_INPUT_MESSAGES: json.dumps([{"role": "user", "content": "Standard wins"}]),
"gen_ai.prompt.0.role": "user",
"gen_ai.prompt.0.content": "Legacy loses",
}
assert extract_user_text_from_attrs(attrs) == "Standard wins"

def test_legacy_completion_ignored_when_standard_attr_present(self):
attrs = {
OTEL_GENAI_OUTPUT_MESSAGES: json.dumps([{"role": "assistant", "content": "Standard wins"}]),
"gen_ai.completion.0.role": "assistant",
"gen_ai.completion.0.content": "Legacy loses",
}
assert extract_agent_response_from_attrs(attrs) == "Standard wins"