Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ agentevals accepts OTLP/HTTP on port 4318 (`http/protobuf` and `http/json`) and
| [zero-code-examples/ollama/](./zero-code-examples/ollama/) | LangChain | Ollama |
| [zero-code-examples/strands/](./zero-code-examples/strands/) | Strands | OpenAI |
| [zero-code-examples/adk/](./zero-code-examples/adk/) | Google ADK | Gemini |
| [zero-code-examples/pydantic-ai/](./zero-code-examples/pydantic-ai/) | Pydantic AI | OpenAI |

This approach works with any framework that has OTel instrumentation: LangChain, Strands, Google ADK, etc. If your framework already emits OTel spans, you only need to add `OTLPSpanExporter` (and `OTLPLogExporter` if it uses GenAI log-based content delivery).

Expand Down Expand Up @@ -103,6 +104,7 @@ Detection checks for `gen_ai.request.model` / `gen_ai.input.messages` (GenAI sem
| [zero-code-examples/ollama/](./zero-code-examples/ollama/) | LangChain | Ollama | GenAI semconv (logs) | Standard OTLP export |
| [zero-code-examples/strands/](./zero-code-examples/strands/) | Strands | OpenAI | GenAI semconv (events*) | Standard OTLP export |
| [zero-code-examples/adk/](./zero-code-examples/adk/) | Google ADK | Gemini | ADK built-in | Standard OTLP export |
| [zero-code-examples/pydantic-ai/](./zero-code-examples/pydantic-ai/) | Pydantic AI | OpenAI | GenAI semconv (span attrs) | Standard OTLP export |
| [langchain_agent](./langchain_agent/) | LangChain | OpenAI | GenAI semconv (logs) | SDK WebSocket |
| [strands_agent](./strands_agent/) | Strands | OpenAI | GenAI semconv (events*) | SDK WebSocket |
| [dice_agent](./dice_agent/) | Google ADK | Gemini | ADK built-in | SDK WebSocket |
Expand Down Expand Up @@ -217,6 +219,7 @@ python examples/zero-code-examples/langchain/run.py
python examples/zero-code-examples/ollama/run.py
python examples/zero-code-examples/strands/run.py
python examples/zero-code-examples/adk/run.py
python examples/zero-code-examples/pydantic-ai/run.py

# SDK examples:
python examples/sdk_example/context_manager_example.py
Expand All @@ -232,7 +235,7 @@ python examples/strands_agent/main.py
Traces stream to the dev server in real-time. Evaluation runs automatically when the session completes.

See each example's README for prerequisites and detailed instructions:
- [zero-code-examples/](./zero-code-examples/) (LangChain + Strands, standard OTLP)
- [zero-code-examples/](./zero-code-examples/) (LangChain, Strands, ADK, OpenAI Agents, Pydantic AI — standard OTLP)
- [dice_agent/README.md](./dice_agent/README.md) (Google ADK + Gemini)
- [langchain_agent/README.md](./langchain_agent/README.md) (LangChain + OpenAI, SDK)
- [strands_agent/](./strands_agent/) (Strands + OpenAI, SDK)
Expand Down
5 changes: 5 additions & 0 deletions examples/zero-code-examples/pydantic-ai/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pydantic-ai>=1.81.0

opentelemetry-sdk>=1.36.0
opentelemetry-exporter-otlp-proto-http>=1.36.0
python-dotenv>=1.0.0
105 changes: 105 additions & 0 deletions examples/zero-code-examples/pydantic-ai/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
"""Run a dice-rolling Pydantic AI agent with OTLP export — no agentevals SDK.

Demonstrates zero-code integration: any OTel-instrumented agent streams
traces to agentevals by pointing the OTLP exporter at the receiver.

Pydantic AI has built-in OTel support via Agent.instrument_all(). By default
it uses version 2 of the GenAI semconv format, storing message content in span
attributes — only a TracerProvider is needed.
No separate instrumentation library is needed.

Prerequisites:
1. pip install -r requirements.txt
2. agentevals serve --dev
3. export OPENAI_API_KEY="your-key-here"

Usage:
python examples/zero-code-examples/pydantic-ai/run.py
"""

import os
import random

from dotenv import load_dotenv
from opentelemetry import trace
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from pydantic_ai import Agent

load_dotenv(override=True)


def roll_die(sides: int) -> int:
"""Roll a die with the given number of sides and return the result."""
return random.randint(1, sides)


def check_prime(number: int) -> bool:
"""Return True if the number is prime, False otherwise."""
if number < 2:
return False
for i in range(2, int(number**0.5) + 1):
if number % i == 0:
return False
return True


def main():
if not os.getenv("OPENAI_API_KEY"):
print("OPENAI_API_KEY not set.")
return

endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318")
print(f"OTLP endpoint: {endpoint}")

os.environ.setdefault(
"OTEL_RESOURCE_ATTRIBUTES",
"agentevals.eval_set_id=pydantic_ai_eval,agentevals.session_name=pydantic-ai-zero-code",
)

resource = Resource.create()

tracer_provider = TracerProvider(resource=resource)
tracer_provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter(), schedule_delay_millis=1000))
trace.set_tracer_provider(tracer_provider)

# Enable Pydantic AI's built-in OTel instrumentation. This one call
# wires up all agents globally — no framework-specific instrumentor
# library (like opentelemetry-instrumentation-openai-v2) is needed.
Agent.instrument_all()

agent = Agent(
"openai:gpt-4o-mini",
instructions="You are a helpful assistant. You can roll dice and check if numbers are prime.",
)
agent.tool_plain(roll_die)
agent.tool_plain(check_prime)

test_queries = [
"Hi! Can you help me?",
"Roll a 20-sided die for me",
"Is the number you rolled prime?",
]

message_history = []

try:
for i, query in enumerate(test_queries, 1):
print(f"\n[{i}/{len(test_queries)}] User: {query}")

result = agent.run_sync(query, message_history=message_history)

print(f" Agent: {result.output}")

# Pass the full message history forward for multi-turn conversation.
message_history = result.all_messages()
finally:
print()
tracer_provider.force_flush()
print("All traces flushed to OTLP receiver.")


if __name__ == "__main__":
main()
60 changes: 60 additions & 0 deletions tests/integration/test_live_agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,66 @@ def test_session_visible_via_api(self, live_servers):
assert session_name in session_ids


@_skip_no_openai
class TestPydanticAIZeroCode:
"""Run the Pydantic AI zero-code OTLP example and verify session grouping."""

def test_session_created_with_spans(self, live_servers):
main_port, otlp_http_port, mgr = live_servers
session_name = "e2e-pydantic-ai"

result = _run_agent(
"examples/zero-code-examples/pydantic-ai/run.py",
otlp_http_port,
session_name,
)
assert result.returncode == 0, f"Agent failed:\nstdout: {result.stdout}\nstderr: {result.stderr}"

wait_for_session_complete_sync(mgr, session_name, timeout=30)
session = mgr.sessions[session_name]

assert session.is_complete
assert session.source == "otlp"
assert len(session.spans) > 0, "Expected spans from LLM calls"

def test_invocations_extracted_with_content(self, live_servers):
main_port, otlp_http_port, mgr = live_servers
session_name = "e2e-pydantic-ai-inv"

result = _run_agent(
"examples/zero-code-examples/pydantic-ai/run.py",
otlp_http_port,
session_name,
)
assert result.returncode == 0, f"Agent failed:\nstdout: {result.stdout}\nstderr: {result.stderr}"

wait_for_session_complete_sync(mgr, session_name, timeout=30)
session = mgr.sessions[session_name]

assert len(session.invocations) > 0, "Expected extracted invocations"
for inv in session.invocations:
has_content = inv.get("userText") or inv.get("agentResponse")
assert has_content, f"Invocation {inv.get('invocationId', '?')} has no content"

def test_session_visible_via_api(self, live_servers):
main_port, otlp_http_port, mgr = live_servers
session_name = "e2e-pydantic-ai-api"

result = _run_agent(
"examples/zero-code-examples/pydantic-ai/run.py",
otlp_http_port,
session_name,
)
assert result.returncode == 0

wait_for_session_complete_sync(mgr, session_name, timeout=30)

resp = httpx.get(f"http://127.0.0.1:{main_port}/api/streaming/sessions")
assert resp.status_code == 200
session_ids = [s["sessionId"] for s in resp.json()["data"]]
assert session_name in session_ids


@_skip_no_openai
class TestAgentRerun:
"""Verify that re-running an agent with the same session_name creates
Expand Down