Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ agentevals accepts OTLP/HTTP on port 4318 (`http/protobuf` and `http/json`) and
| [zero-code-examples/strands/](./zero-code-examples/strands/) | Strands | OpenAI |
| [zero-code-examples/adk/](./zero-code-examples/adk/) | Google ADK | Gemini |
| [zero-code-examples/pydantic-ai/](./zero-code-examples/pydantic-ai/) | Pydantic AI | OpenAI |
| [zero-code-examples/agentcore/](./zero-code-examples/agentcore/) | AWS AgentCore | Amazon Bedrock |

This approach works with any framework that has OTel instrumentation: LangChain, Strands, Google ADK, etc. If your framework already emits OTel spans, you only need to add `OTLPSpanExporter` (and `OTLPLogExporter` if it uses GenAI log-based content delivery).

Expand Down Expand Up @@ -105,6 +106,7 @@ Detection checks for `gen_ai.request.model` / `gen_ai.input.messages` (GenAI sem
| [zero-code-examples/strands/](./zero-code-examples/strands/) | Strands | OpenAI | GenAI semconv (events*) | Standard OTLP export |
| [zero-code-examples/adk/](./zero-code-examples/adk/) | Google ADK | Gemini | ADK built-in | Standard OTLP export |
| [zero-code-examples/pydantic-ai/](./zero-code-examples/pydantic-ai/) | Pydantic AI | OpenAI | GenAI semconv (span attrs) | Standard OTLP export |
| [zero-code-examples/agentcore/](./zero-code-examples/agentcore/) | AWS AgentCore | Amazon Bedrock | GenAI semconv (events*) | Standard OTLP export |
| [langchain_agent](./langchain_agent/) | LangChain | OpenAI | GenAI semconv (logs) | SDK WebSocket |
| [strands_agent](./strands_agent/) | Strands | OpenAI | GenAI semconv (events*) | SDK WebSocket |
| [dice_agent](./dice_agent/) | Google ADK | Gemini | ADK built-in | SDK WebSocket |
Expand Down Expand Up @@ -221,6 +223,10 @@ python examples/zero-code-examples/strands/run.py
python examples/zero-code-examples/adk/run.py
python examples/zero-code-examples/pydantic-ai/run.py

# AgentCore starts a server (AWS credentials required):
python examples/zero-code-examples/agentcore/run.py &
curl http://localhost:8080/invocations -d '{"prompt": "Roll a 20-sided die for me"}'

# SDK examples:
python examples/sdk_example/context_manager_example.py
python examples/sdk_example/decorator_example.py
Expand All @@ -235,7 +241,7 @@ python examples/strands_agent/main.py
Traces stream to the dev server in real-time. Evaluation runs automatically when the session completes.

See each example's README for prerequisites and detailed instructions:
- [zero-code-examples/](./zero-code-examples/) (LangChain, Strands, ADK, OpenAI Agents, Pydantic AI standard OTLP)
- [zero-code-examples/](./zero-code-examples/) (LangChain, Strands, ADK, OpenAI Agents, Pydantic AI, AWS AgentCore, standard OTLP)
- [dice_agent/README.md](./dice_agent/README.md) (Google ADK + Gemini)
- [langchain_agent/README.md](./langchain_agent/README.md) (LangChain + OpenAI, SDK)
- [strands_agent/](./strands_agent/) (Strands + OpenAI, SDK)
Expand Down
6 changes: 6 additions & 0 deletions examples/zero-code-examples/agentcore/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
bedrock-agentcore>=1.8.0
strands-agents>=1.35.0
boto3>=1.38.0
opentelemetry-sdk>=1.36.0
opentelemetry-exporter-otlp-proto-http>=1.36.0
python-dotenv>=1.0.0
62 changes: 62 additions & 0 deletions examples/zero-code-examples/agentcore/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""AWS AgentCore zero-code OTLP example -- no agentevals SDK.

Setup:
pip install -r examples/zero-code-examples/agentcore/requirements.txt
export AWS_DEFAULT_REGION=us-east-1
agentevals serve --dev

Run:
python examples/zero-code-examples/agentcore/run.py
curl http://localhost:8080/invocations -d '{"prompt": "Roll a 20-sided die"}'
agentcore dev # alternative: npm install -g @aws/agentcore
"""

import os
import random

from bedrock_agentcore import BedrockAgentCoreApp
from dotenv import load_dotenv
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from strands import Agent, tool
from strands.models import BedrockModel
from strands.telemetry import StrandsTelemetry

load_dotenv(override=True)
os.environ.setdefault("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental")
os.environ.setdefault("OTEL_RESOURCE_ATTRIBUTES",
"agentevals.eval_set_id=agentcore_eval,agentevals.session_name=agentcore-zero-code")

_telemetry = StrandsTelemetry()
_telemetry.tracer_provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter(), schedule_delay_millis=1000))

app = BedrockAgentCoreApp()


@tool
def roll_die(sides: int = 6) -> dict:
"""Roll a die with the given number of sides."""
result = random.randint(1, sides)
return {"sides": sides, "result": result, "message": f"Rolled a {sides}-sided die and got {result}"}


@tool
def check_prime(n: int) -> bool:
"""Return True if number is prime."""
return n >= 2 and all(n % i for i in range(2, int(n**0.5) + 1))


@app.entrypoint
async def handler(payload):
prompt = payload.get("prompt", "Hello!")
agent = Agent(
model=BedrockModel(model_id="us.amazon.nova-pro-v1:0"),
tools=[roll_die, check_prime],
system_prompt="Use roll_die when asked to roll dice. Use check_prime when asked about prime numbers.",
name="dice_agent",
)
async for event in agent.stream_async(prompt):
yield event


app.run()
65 changes: 64 additions & 1 deletion tests/integration/test_live_agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@

from __future__ import annotations

import contextlib
import os
import subprocess
import sys
import time

import httpx
import pytest
Expand All @@ -38,7 +40,6 @@
reason="GOOGLE_API_KEY not set",
)


def _run_agent(
script: str,
otlp_http_port: int,
Expand All @@ -64,6 +65,40 @@ def _run_agent(
)


_AGENTCORE_ENV = {"OTEL_SEMCONV_STABILITY_OPT_IN": "gen_ai_latest_experimental"}
_AGENTCORE_SCRIPT = "examples/zero-code-examples/agentcore/run.py"


@contextlib.contextmanager
def _agentcore_server(otlp_http_port: int, session_name: str, extra_env: dict | None = None):
env = {**os.environ,
"OTEL_EXPORTER_OTLP_ENDPOINT": f"http://127.0.0.1:{otlp_http_port}",
"OTEL_RESOURCE_ATTRIBUTES": f"agentevals.eval_set_id=e2e-test,agentevals.session_name={session_name}",
**(extra_env or {})}
proc = subprocess.Popen([sys.executable, os.path.join(REPO_ROOT, _AGENTCORE_SCRIPT)], env=env, cwd=REPO_ROOT)
try:
for _ in range(20):
if proc.poll() is not None:
raise RuntimeError(f"agentcore server exited early (code {proc.returncode})")
try:
httpx.get("http://127.0.0.1:8080/ping", timeout=1)
break
except Exception:
time.sleep(0.5)
else:
proc.kill()
raise RuntimeError("agentcore server did not start within 10s")
yield proc
finally:
time.sleep(2)
proc.terminate()
try:
proc.wait(timeout=5)
except subprocess.TimeoutExpired:
proc.kill()
proc.wait()


@_skip_no_openai
class TestLangchainZeroCode:
"""Run the LangChain zero-code OTLP example and verify session grouping."""
Expand Down Expand Up @@ -365,6 +400,34 @@ def test_session_visible_via_api(self, live_servers):
assert session_name in session_ids


class TestAgentCoreZeroCode:
def test_session_created_spans_only(self, live_servers):
main_port, otlp_http_port, mgr = live_servers
session_name = "e2e-agentcore"
with _agentcore_server(otlp_http_port, session_name, extra_env=_AGENTCORE_ENV):
httpx.post("http://127.0.0.1:8080/invocations", json={"prompt": "Roll a 20-sided die"}, timeout=60)
wait_for_session_complete_sync(mgr, session_name, timeout=60)
s = mgr.sessions[session_name]
assert s.is_complete and s.source == "otlp" and len(s.spans) > 0

def test_invocations_extracted(self, live_servers):
main_port, otlp_http_port, mgr = live_servers
session_name = "e2e-agentcore-inv"
with _agentcore_server(otlp_http_port, session_name, extra_env=_AGENTCORE_ENV):
httpx.post("http://127.0.0.1:8080/invocations", json={"prompt": "Is 17 prime?"}, timeout=60)
wait_for_session_complete_sync(mgr, session_name, timeout=60)
assert len(mgr.sessions[session_name].invocations) > 0

def test_session_visible_via_api(self, live_servers):
main_port, otlp_http_port, mgr = live_servers
session_name = "e2e-agentcore-api"
with _agentcore_server(otlp_http_port, session_name, extra_env=_AGENTCORE_ENV):
httpx.post("http://127.0.0.1:8080/invocations", json={"prompt": "Hello!"}, timeout=60)
wait_for_session_complete_sync(mgr, session_name, timeout=60)
data = httpx.get(f"http://127.0.0.1:{main_port}/api/streaming/sessions").json()["data"]
assert session_name in [s["sessionId"] for s in data]


@_skip_no_openai
class TestAgentRerun:
"""Verify that re-running an agent with the same session_name creates
Expand Down