From 00093a06bf69d7bb79da3079d678c685e47ae2ac Mon Sep 17 00:00:00 2001 From: Julio Menendez Gonzalez Date: Tue, 28 Apr 2026 16:44:21 -0600 Subject: [PATCH 1/9] docs(samples): add Azure Monitor + Agent 365 observability sample Lean greenfield CLI sample showing how to add the Agent 365 SDK to an app that already has Azure Monitor OpenTelemetry initialized. Uses the OpenAI Agents SDK with auto-instrumentation via microsoft-agents-a365-observability-extensions-openai. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../.env.template | 13 +++ .../.gitignore | 6 ++ .../README.md | 62 +++++++++++ .../observability-with-azure-monitor/main.py | 100 ++++++++++++++++++ .../pyproject.toml | 20 ++++ 5 files changed, 201 insertions(+) create mode 100644 python/observability-with-azure-monitor/.env.template create mode 100644 python/observability-with-azure-monitor/.gitignore create mode 100644 python/observability-with-azure-monitor/README.md create mode 100644 python/observability-with-azure-monitor/main.py create mode 100644 python/observability-with-azure-monitor/pyproject.toml diff --git a/python/observability-with-azure-monitor/.env.template b/python/observability-with-azure-monitor/.env.template new file mode 100644 index 00000000..695b1eab --- /dev/null +++ b/python/observability-with-azure-monitor/.env.template @@ -0,0 +1,13 @@ +# Azure Monitor / Application Insights +APPLICATIONINSIGHTS_CONNECTION_STRING=InstrumentationKey=00000000-0000-0000-0000-000000000000;IngestionEndpoint=https://... + +# OpenAI / Azure OpenAI (the OpenAI Agents SDK reads these) +# For OpenAI: +OPENAI_API_KEY=sk-... +# For Azure OpenAI (set these instead of OPENAI_API_KEY): +# AZURE_OPENAI_API_KEY= +# AZURE_OPENAI_ENDPOINT= +# OPENAI_API_VERSION=2024-08-01-preview + +# Service identification (optional; used as service.name attribute) +AGENT_SERVICE_NAME=sample-agent-azure-monitor diff --git a/python/observability-with-azure-monitor/.gitignore b/python/observability-with-azure-monitor/.gitignore new file mode 100644 index 00000000..cd71e8cc --- /dev/null +++ b/python/observability-with-azure-monitor/.gitignore @@ -0,0 +1,6 @@ +.env +.venv/ +__pycache__/ +*.pyc +dist/ +*.egg-info/ diff --git a/python/observability-with-azure-monitor/README.md b/python/observability-with-azure-monitor/README.md new file mode 100644 index 00000000..1a6f273b --- /dev/null +++ b/python/observability-with-azure-monitor/README.md @@ -0,0 +1,62 @@ +# Observability — Agent 365 SDK alongside Azure Monitor + +This sample shows how to add the [Microsoft Agent 365 Python SDK](https://github.com/microsoft/Agent365-python) to an app that **already** uses [Azure Monitor / Application Insights OpenTelemetry](https://learn.microsoft.com/azure/azure-monitor/app/opentelemetry-overview). After running this, both Azure Monitor and the Agent 365 backend receive your agent's spans. + +> This is **not** a from-scratch tracing setup. For a full agent host with Microsoft 365 Agents SDK, see the [`python/openai/sample-agent`](../openai/sample-agent) sample. + +## Prerequisites + +- Python 3.11+ +- An OpenAI or Azure OpenAI key +- An Application Insights resource (connection string) + +## Setup + +1. Copy the env template and fill in your values: + + ```bash + cp .env.template .env + ``` + +2. Create a virtualenv and install: + + ```bash + python -m venv .venv + source .venv/bin/activate # Windows: .venv\Scripts\activate + pip install -e . + ``` + +## Run + +```bash +python main.py +``` + +Expected stdout: a one-line weather answer for Seattle. + +## What to look for + +In Azure Portal → your Application Insights resource → **Transaction search**, filter by: + +- Operation name `invoke_agent` — the agent invocation span +- Operation name `inference` — one or more LLM call spans +- Operation name `execute_tool` — the `get_weather` tool span + +If you see those spans in App Insights, the integration is working. The Agent 365 backend receives the same spans (configured via the stub token resolver — replace with a real one for production). + +## Where the integration happens + +`main.py` is organized into four numbered sections: + +1. **Step 1 — Azure Monitor.** `configure_azure_monitor(...)` installs an OTel TracerProvider and the Azure Monitor exporter. This is the part of the file you'd already have in your real app. +2. **Step 2 — Agent 365 `configure()`.** Detects the TracerProvider set by Step 1 and adds its processors to it. Both backends now receive spans. Replace `_stub_token_resolver` with your production token resolver. +3. **Step 2b — `OpenAIAgentsTraceInstrumentor`.** Must run after `configure()`; the instrumentor raises `RuntimeError` otherwise. After this call, OpenAI Agents SDK spans flow through Agent 365's scope classes automatically. +4. **Step 3 — Build the agent.** Standard OpenAI Agents SDK code; no observability code needed (the instrumentor handles it). +5. **Step 4 — Run + flush.** `force_flush()` is critical — without it, batched spans may not export before the process exits. + +To diff against your own app: copy Steps 1, 2, and 2b into the file where your app currently initializes Azure Monitor. + +## Going further + +- Integration patterns and pitfalls: [Integrating with existing OpenTelemetry](https://github.com/microsoft/Agent365-python/blob/main/docs/integrating-with-existing-opentelemetry.md) (in the SDK repo) +- Manual instrumentation example (no agent framework): [`python/observability-with-otlp`](../observability-with-otlp) diff --git a/python/observability-with-azure-monitor/main.py b/python/observability-with-azure-monitor/main.py new file mode 100644 index 00000000..f9689db6 --- /dev/null +++ b/python/observability-with-azure-monitor/main.py @@ -0,0 +1,100 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Sample: Agent 365 SDK alongside an existing Azure Monitor OpenTelemetry setup. + +Demonstrates the recommended initialization order: + + 1. Initialize your existing OTel stack first (Azure Monitor here). + 2. Then call Agent 365 `configure()` — it detects the existing TracerProvider + and adds its processors to it. Both backends receive spans. + 3. Then install the OpenAI Agents SDK instrumentor (it requires A365 to be + configured first). It auto-instruments your agent — no manual span code. + +Run with: ``python main.py`` +""" + +import json +import os + +from dotenv import load_dotenv + +load_dotenv() + +# --------------------------------------------------------------------------- +# Step 1 — Existing OTel setup (Azure Monitor / Application Insights). +# This is what an app already has in production today. +# --------------------------------------------------------------------------- +from azure.monitor.opentelemetry import configure_azure_monitor + +configure_azure_monitor( + connection_string=os.environ["APPLICATIONINSIGHTS_CONNECTION_STRING"], +) + +# --------------------------------------------------------------------------- +# Step 2 — Agent 365 SDK `configure()`. +# Detects the TracerProvider set in Step 1 and adds its processors to it. +# Both Azure Monitor and the Agent 365 exporter now receive spans. +# --------------------------------------------------------------------------- +from microsoft_agents_a365.observability.core import configure + + +def _stub_token_resolver(agent_id: str, tenant_id: str) -> str | None: + # In a real app, return a bearer token for the Agent 365 backend. + # See the observability-core docs for the production pattern. + return "stub-token" + + +configure( + service_name=os.environ.get("AGENT_SERVICE_NAME", "sample-agent-azure-monitor"), + service_namespace="agent365-samples", + token_resolver=_stub_token_resolver, +) + +# --------------------------------------------------------------------------- +# Step 2b — Install the OpenAI Agents SDK instrumentor. +# Must run AFTER `configure()` — the instrumentor raises RuntimeError otherwise. +# --------------------------------------------------------------------------- +from microsoft_agents_a365.observability.extensions.openai import ( + OpenAIAgentsTraceInstrumentor, +) + +OpenAIAgentsTraceInstrumentor().instrument() + +# --------------------------------------------------------------------------- +# Step 3 — Build the tool-calling agent (auto-instrumented). +# --------------------------------------------------------------------------- +from agents import Agent, Runner, function_tool + + +@function_tool +def get_weather(city: str) -> str: + """Return the current weather for ``city`` as a JSON string.""" + return json.dumps({"city": city, "temperature_f": 72, "conditions": "sunny"}) + + +agent = Agent( + name="WeatherAgent", + instructions=( + "You are a helpful assistant that answers weather questions " + "using the get_weather tool." + ), + tools=[get_weather], +) + +# --------------------------------------------------------------------------- +# Step 4 — Run a single turn and exit, flushing spans on the way out. +# --------------------------------------------------------------------------- +from opentelemetry import trace + + +def main() -> None: + result = Runner.run_sync(agent, "What's the weather in Seattle?") + print(result.final_output) + # Force span flush so both Azure Monitor and Agent 365 exporters drain + # before the process exits. + trace.get_tracer_provider().force_flush() + + +if __name__ == "__main__": + main() diff --git a/python/observability-with-azure-monitor/pyproject.toml b/python/observability-with-azure-monitor/pyproject.toml new file mode 100644 index 00000000..aed27eb2 --- /dev/null +++ b/python/observability-with-azure-monitor/pyproject.toml @@ -0,0 +1,20 @@ +[project] +name = "observability-with-azure-monitor" +version = "0.1.0" +description = "Sample: Agent 365 SDK with existing Azure Monitor OpenTelemetry" +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ + "openai-agents>=0.2.6", + "azure-monitor-opentelemetry>=1.6.0", + "microsoft-agents-a365-observability-core", + "microsoft-agents-a365-observability-extensions-openai", + "python-dotenv>=1.0.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["."] From 082edd4a0ad38a05ff5541ef4cb074d13ec2ca1a Mon Sep 17 00:00:00 2001 From: Julio Menendez Gonzalez Date: Tue, 28 Apr 2026 16:46:00 -0600 Subject: [PATCH 2/9] docs(samples): fix section-count phrasing in azure-monitor README Said "four numbered sections" while listing five (Step 1, 2, 2b, 3, 4). Reword to acknowledge Step 2b as a sub-step. Co-Authored-By: Claude Opus 4.7 (1M context) --- python/observability-with-azure-monitor/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/observability-with-azure-monitor/README.md b/python/observability-with-azure-monitor/README.md index 1a6f273b..6dec6b0e 100644 --- a/python/observability-with-azure-monitor/README.md +++ b/python/observability-with-azure-monitor/README.md @@ -46,7 +46,7 @@ If you see those spans in App Insights, the integration is working. The Agent 36 ## Where the integration happens -`main.py` is organized into four numbered sections: +`main.py` is organized into the following sections (Step 2b is a sub-step that must run after Step 2): 1. **Step 1 — Azure Monitor.** `configure_azure_monitor(...)` installs an OTel TracerProvider and the Azure Monitor exporter. This is the part of the file you'd already have in your real app. 2. **Step 2 — Agent 365 `configure()`.** Detects the TracerProvider set by Step 1 and adds its processors to it. Both backends now receive spans. Replace `_stub_token_resolver` with your production token resolver. From 538cf7fe6403b06cf0a5985b666d0f849dd13bfe Mon Sep 17 00:00:00 2001 From: Julio Menendez Gonzalez Date: Tue, 28 Apr 2026 16:52:53 -0600 Subject: [PATCH 3/9] docs(samples): correct App Insights filter guidance in azure-monitor sample MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous wording told users to filter Transaction search by "Operation name invoke_agent" — but invoke_agent / inference / execute_tool are values of the gen_ai.operation.name *attribute*, not the App Insights operation name. Reword to point at the right field. Co-Authored-By: Claude Opus 4.7 (1M context) --- python/observability-with-azure-monitor/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/python/observability-with-azure-monitor/README.md b/python/observability-with-azure-monitor/README.md index 6dec6b0e..1338cb4f 100644 --- a/python/observability-with-azure-monitor/README.md +++ b/python/observability-with-azure-monitor/README.md @@ -36,13 +36,13 @@ Expected stdout: a one-line weather answer for Seattle. ## What to look for -In Azure Portal → your Application Insights resource → **Transaction search**, filter by: +In Azure Portal → your Application Insights resource → **Transaction search**, look for spans with the custom-dimension attribute `gen_ai.operation.name`. You should see three values across one turn: -- Operation name `invoke_agent` — the agent invocation span -- Operation name `inference` — one or more LLM call spans -- Operation name `execute_tool` — the `get_weather` tool span +- `invoke_agent` — the agent invocation (root span; its display name is `invoke_agent WeatherAgent`) +- `inference` — one or more LLM call spans +- `execute_tool` — the `get_weather` tool span -If you see those spans in App Insights, the integration is working. The Agent 365 backend receives the same spans (configured via the stub token resolver — replace with a real one for production). +If you see those three operation values, the integration is working. The Agent 365 backend receives the same spans (configured via the stub token resolver — replace with a real one for production). ## Where the integration happens From 79999e36d79259e8a6286e7def45f3f0855cdd90 Mon Sep 17 00:00:00 2001 From: Julio Menendez Gonzalez Date: Tue, 28 Apr 2026 16:59:53 -0600 Subject: [PATCH 4/9] docs(samples): add OTLP + manual-instrumentation Agent 365 sample Lean greenfield CLI sample showing how to add the Agent 365 SDK to an app that already has the OpenTelemetry SDK initialized, demonstrating manual instrumentation with InvokeAgentScope / InferenceScope / ExecuteToolScope around a raw OpenAI client (no agent framework). Defaults to ConsoleSpanExporter for zero-setup; a commented OTLPSpanExporter block shows the swap-in for real backends. Co-Authored-By: Claude Opus 4.7 (1M context) --- python/observability-with-otlp/.env.template | 14 ++ python/observability-with-otlp/.gitignore | 6 + python/observability-with-otlp/README.md | 78 +++++++ python/observability-with-otlp/main.py | 208 ++++++++++++++++++ python/observability-with-otlp/pyproject.toml | 20 ++ 5 files changed, 326 insertions(+) create mode 100644 python/observability-with-otlp/.env.template create mode 100644 python/observability-with-otlp/.gitignore create mode 100644 python/observability-with-otlp/README.md create mode 100644 python/observability-with-otlp/main.py create mode 100644 python/observability-with-otlp/pyproject.toml diff --git a/python/observability-with-otlp/.env.template b/python/observability-with-otlp/.env.template new file mode 100644 index 00000000..bb3e06b7 --- /dev/null +++ b/python/observability-with-otlp/.env.template @@ -0,0 +1,14 @@ +# OpenAI / Azure OpenAI (the raw openai client reads these) +OPENAI_API_KEY=sk-... +# OPENAI_MODEL=gpt-4o-mini +# For Azure OpenAI, use AzureOpenAI() in main.py and set: +# AZURE_OPENAI_API_KEY= +# AZURE_OPENAI_ENDPOINT= +# OPENAI_API_VERSION=2024-08-01-preview + +# OpenTelemetry service identification +OTEL_SERVICE_NAME=sample-agent-otlp +AGENT_SERVICE_NAME=sample-agent-otlp + +# Optional: only used if you swap ConsoleSpanExporter for OTLPSpanExporter in main.py +# OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318/v1/traces diff --git a/python/observability-with-otlp/.gitignore b/python/observability-with-otlp/.gitignore new file mode 100644 index 00000000..cd71e8cc --- /dev/null +++ b/python/observability-with-otlp/.gitignore @@ -0,0 +1,6 @@ +.env +.venv/ +__pycache__/ +*.pyc +dist/ +*.egg-info/ diff --git a/python/observability-with-otlp/README.md b/python/observability-with-otlp/README.md new file mode 100644 index 00000000..5128445d --- /dev/null +++ b/python/observability-with-otlp/README.md @@ -0,0 +1,78 @@ +# Observability — Agent 365 SDK with manual OTel + manual instrumentation + +This sample shows two patterns at once: + +1. Adding the [Microsoft Agent 365 Python SDK](https://github.com/microsoft/Agent365-python) to an app with an **already-configured OpenTelemetry SDK** (vendor-neutral / OTLP). +2. **Manual instrumentation** using `InvokeAgentScope`, `InferenceScope`, and `ExecuteToolScope` — useful when you don't use an agent framework, or when you want explicit control over which calls produce spans. + +> This is **not** a from-scratch tracing setup. For a full agent host with Microsoft 365 Agents SDK, see the [`python/openai/sample-agent`](../openai/sample-agent) sample. + +## Prerequisites + +- Python 3.11+ +- An OpenAI or Azure OpenAI key + +No collector or external service is required — the sample defaults to `ConsoleSpanExporter` so spans print to stdout. + +## Setup + +1. Copy the env template and fill in your values: + + ```bash + cp .env.template .env + ``` + +2. Create a virtualenv and install: + + ```bash + python -m venv .venv + source .venv/bin/activate # Windows: .venv\Scripts\activate + pip install -e . + ``` + +## Run + +```bash +python main.py +``` + +Expected output (truncated): + +- A one-line weather answer for Seattle. +- Multiple JSON span dumps printed by `ConsoleSpanExporter`. Look for spans named `invoke_agent`, `inference`, and `execute_tool`. + +## Swap to a real OTLP endpoint + +In `main.py`, comment out the `ConsoleSpanExporter` lines and uncomment the `OTLPSpanExporter` block. Set `OTEL_EXPORTER_OTLP_ENDPOINT` in `.env` (e.g. `http://localhost:4318/v1/traces` for a local collector, or your vendor's endpoint). + +## What to look for + +The console output (or your OTLP backend) should contain a span tree: + +- `invoke_agent WeatherAgent` (the outer span — one per user turn) + - `inference` (first LLM call: the model decides to call the tool) + - `execute_tool get_weather` (the tool runs) + - `inference` (second LLM call: the model summarizes the tool result) + +If those four spans show up, the integration is working. + +## Where the integration happens + +`main.py` is organized into the following sections: + +1. **Step 1 — OTel SDK setup.** Build a `TracerProvider`, attach a `BatchSpanProcessor` with the exporter, call `trace.set_tracer_provider(...)`. This is the part of the file you'd already have in your real app. +2. **Step 2 — Agent 365 `configure()`.** Detects the TracerProvider set by Step 1 and adds its processors to it. Both your existing exporter and the Agent 365 exporter receive spans. +3. **Step 3 — Agent setup.** Raw OpenAI client and a `get_weather` Python function. No framework — the loop is in `run_one_turn`. +4. **Step 4 — `run_one_turn`.** Wraps each SDK call: + - The whole turn → `InvokeAgentScope`. + - Each `client.chat.completions.create(...)` call → `InferenceScope`. + - The `get_weather` invocation → `ExecuteToolScope`. + + `record_response(...)` on each scope records the output as a span attribute. + +To diff against your own app: copy Step 1 (replace with whatever exporter / resource you use) and Step 2 into your bootstrapping. Apply the Step 4 wrapping pattern around your own SDK calls. + +## Going further + +- Integration patterns and pitfalls: [Integrating with existing OpenTelemetry](https://github.com/microsoft/Agent365-python/blob/main/docs/integrating-with-existing-opentelemetry.md) (in the SDK repo) +- Auto-instrumented OpenAI Agents SDK example: [`python/observability-with-azure-monitor`](../observability-with-azure-monitor) diff --git a/python/observability-with-otlp/main.py b/python/observability-with-otlp/main.py new file mode 100644 index 00000000..8459b980 --- /dev/null +++ b/python/observability-with-otlp/main.py @@ -0,0 +1,208 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Sample: Agent 365 SDK alongside a manual OTel SDK setup, with manual span scopes. + +This sample demonstrates two things at once: + +1. The recommended init order: existing OTel first, then Agent 365 `configure()`. +2. Manual instrumentation using `InvokeAgentScope` / `InferenceScope` / + `ExecuteToolScope` — useful when you don't use an agent framework, or when + you want explicit control over which calls produce which spans. + +The default exporter is `ConsoleSpanExporter` so you can run this with zero +external setup. To export to a real OTLP endpoint, swap it for +`OTLPSpanExporter` (commented block below). + +Run with: ``python main.py`` +""" + +import json +import os + +from dotenv import load_dotenv + +load_dotenv() + +# --------------------------------------------------------------------------- +# Step 1 — Existing OTel setup (manual, vendor-neutral). +# +# Default: ConsoleSpanExporter. Spans print to stdout — no extra setup needed. +# +# To export to a real OTLP collector / Jaeger / Honeycomb / etc., uncomment +# the OTLP block and comment out the Console block. +# --------------------------------------------------------------------------- +from opentelemetry import trace +from opentelemetry.sdk.resources import SERVICE_NAME, Resource +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter + +# from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +# exporter = OTLPSpanExporter(endpoint=os.environ["OTEL_EXPORTER_OTLP_ENDPOINT"]) + +exporter = ConsoleSpanExporter() +provider = TracerProvider( + resource=Resource.create( + {SERVICE_NAME: os.environ.get("OTEL_SERVICE_NAME", "sample-agent-otlp")} + ) +) +provider.add_span_processor(BatchSpanProcessor(exporter)) +trace.set_tracer_provider(provider) + +# --------------------------------------------------------------------------- +# Step 2 — Agent 365 SDK `configure()`. +# Detects the TracerProvider set in Step 1 and adds its processors to it. +# --------------------------------------------------------------------------- +from microsoft_agents_a365.observability.core import ( + AgentDetails, + ExecuteToolScope, + InferenceCallDetails, + InferenceOperationType, + InferenceScope, + InvokeAgentScope, + InvokeAgentScopeDetails, + Request, + ToolCallDetails, + configure, +) + + +def _stub_token_resolver(agent_id: str, tenant_id: str) -> str | None: + # In a real app, return a bearer token for the Agent 365 backend. + return "stub-token" + + +configure( + service_name=os.environ.get("AGENT_SERVICE_NAME", "sample-agent-otlp"), + service_namespace="agent365-samples", + token_resolver=_stub_token_resolver, +) + +# --------------------------------------------------------------------------- +# Step 3 — Agent setup: raw OpenAI client + a fake tool. +# No agent framework — we drive the loop manually so each SDK call is wrapped +# in the corresponding Agent 365 scope. +# --------------------------------------------------------------------------- +from openai import OpenAI + +client = OpenAI() +MODEL = os.environ.get("OPENAI_MODEL", "gpt-4o-mini") + + +def get_weather(city: str) -> str: + """Return the current weather for ``city`` as a JSON string.""" + return json.dumps({"city": city, "temperature_f": 72, "conditions": "sunny"}) + + +WEATHER_TOOL_SCHEMA = { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current weather for a city.", + "parameters": { + "type": "object", + "properties": {"city": {"type": "string"}}, + "required": ["city"], + }, + }, +} + +AGENT = AgentDetails(agent_id="sample-agent", agent_name="WeatherAgent") + + +# --------------------------------------------------------------------------- +# Step 4 — Run one turn manually, wrapping each SDK call in a scope. +# --------------------------------------------------------------------------- +def run_one_turn(user_message: str) -> str: + with InvokeAgentScope.start( + request=Request(content=[user_message]), + scope_details=InvokeAgentScopeDetails(endpoint=None), + agent_details=AGENT, + ) as invoke_scope: + messages = [ + { + "role": "system", + "content": "You answer weather questions using the get_weather tool.", + }, + {"role": "user", "content": user_message}, + ] + + # First inference: model decides to call the tool. + with InferenceScope.start( + request=Request(content=[user_message]), + details=InferenceCallDetails( + operationName=InferenceOperationType.CHAT, + model=MODEL, + providerName="openai", + ), + agent_details=AGENT, + ) as inf_scope: + first = client.chat.completions.create( + model=MODEL, + messages=messages, + tools=[WEATHER_TOOL_SCHEMA], + ) + assistant_msg = first.choices[0].message + inf_scope.record_response(assistant_msg.content or "") + + tool_call = assistant_msg.tool_calls[0] + args = json.loads(tool_call.function.arguments) + + # Tool execution. + with ExecuteToolScope.start( + request=Request(content=[tool_call.function.arguments]), + details=ToolCallDetails( + tool_name=tool_call.function.name, + arguments=args, + ), + agent_details=AGENT, + ) as tool_scope: + tool_result = get_weather(**args) + tool_scope.record_response(tool_result) + + # Build messages for the follow-up call. + messages.append( + { + "role": "assistant", + "content": assistant_msg.content, + "tool_calls": [ + { + "id": tool_call.id, + "type": "function", + "function": { + "name": tool_call.function.name, + "arguments": tool_call.function.arguments, + }, + } + ], + } + ) + messages.append( + {"role": "tool", "tool_call_id": tool_call.id, "content": tool_result} + ) + + # Second inference: model summarizes the tool result. + with InferenceScope.start( + request=Request(content=[tool_result]), + details=InferenceCallDetails( + operationName=InferenceOperationType.CHAT, + model=MODEL, + providerName="openai", + ), + agent_details=AGENT, + ) as inf_scope: + second = client.chat.completions.create(model=MODEL, messages=messages) + final = second.choices[0].message.content or "" + inf_scope.record_response(final) + + invoke_scope.record_response(final) + return final + + +def main() -> None: + print(run_one_turn("What's the weather in Seattle?")) + trace.get_tracer_provider().force_flush() + + +if __name__ == "__main__": + main() diff --git a/python/observability-with-otlp/pyproject.toml b/python/observability-with-otlp/pyproject.toml new file mode 100644 index 00000000..2f727f52 --- /dev/null +++ b/python/observability-with-otlp/pyproject.toml @@ -0,0 +1,20 @@ +[project] +name = "observability-with-otlp" +version = "0.1.0" +description = "Sample: Agent 365 SDK with existing OTel SDK + manual instrumentation" +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ + "openai>=1.40.0", + "opentelemetry-sdk>=1.27.0", + "opentelemetry-exporter-otlp-proto-http>=1.27.0", + "microsoft-agents-a365-observability-core", + "python-dotenv>=1.0.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["."] From c3775c482b14aa857411aef1bfd6d1422e19fcde Mon Sep 17 00:00:00 2001 From: Julio Menendez Gonzalez Date: Tue, 28 Apr 2026 17:10:43 -0600 Subject: [PATCH 5/9] docs(samples): correct inference span/operation name in sample READMEs InferenceScope uses InferenceOperationType.value (e.g. "Chat") for both the gen_ai.operation.name attribute and the span name; "inference" is not what users will actually see in their backend. Update both sample READMEs to reflect the real SDK behavior and clarify where the value comes from. Co-Authored-By: Claude Opus 4.7 (1M context) --- python/observability-with-azure-monitor/README.md | 10 +++++----- python/observability-with-otlp/README.md | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/python/observability-with-azure-monitor/README.md b/python/observability-with-azure-monitor/README.md index 1338cb4f..c00f672a 100644 --- a/python/observability-with-azure-monitor/README.md +++ b/python/observability-with-azure-monitor/README.md @@ -36,13 +36,13 @@ Expected stdout: a one-line weather answer for Seattle. ## What to look for -In Azure Portal → your Application Insights resource → **Transaction search**, look for spans with the custom-dimension attribute `gen_ai.operation.name`. You should see three values across one turn: +In Azure Portal → your Application Insights resource → **Transaction search**, look for spans with the custom-dimension attribute `gen_ai.operation.name`. You should see three operation types across one turn: -- `invoke_agent` — the agent invocation (root span; its display name is `invoke_agent WeatherAgent`) -- `inference` — one or more LLM call spans -- `execute_tool` — the `get_weather` tool span +- `invoke_agent` — the agent invocation (root span; display name `invoke_agent WeatherAgent`) +- `Chat` — one or more LLM call spans (display name e.g. `Chat gpt-4o-mini`; the value comes from the configured `InferenceOperationType` — Auto-instrumentation defaults to `Chat`) +- `execute_tool` — the `get_weather` tool span (display name `execute_tool get_weather`) -If you see those three operation values, the integration is working. The Agent 365 backend receives the same spans (configured via the stub token resolver — replace with a real one for production). +If you see those three operation types, the integration is working. The Agent 365 backend receives the same spans (configured via the stub token resolver — replace with a real one for production). ## Where the integration happens diff --git a/python/observability-with-otlp/README.md b/python/observability-with-otlp/README.md index 5128445d..37115316 100644 --- a/python/observability-with-otlp/README.md +++ b/python/observability-with-otlp/README.md @@ -39,7 +39,7 @@ python main.py Expected output (truncated): - A one-line weather answer for Seattle. -- Multiple JSON span dumps printed by `ConsoleSpanExporter`. Look for spans named `invoke_agent`, `inference`, and `execute_tool`. +- Multiple JSON span dumps printed by `ConsoleSpanExporter`. Look for spans named `invoke_agent WeatherAgent`, `Chat gpt-4o-mini` (twice), and `execute_tool get_weather`. ## Swap to a real OTLP endpoint @@ -50,11 +50,11 @@ In `main.py`, comment out the `ConsoleSpanExporter` lines and uncomment the `OTL The console output (or your OTLP backend) should contain a span tree: - `invoke_agent WeatherAgent` (the outer span — one per user turn) - - `inference` (first LLM call: the model decides to call the tool) - - `execute_tool get_weather` (the tool runs) - - `inference` (second LLM call: the model summarizes the tool result) + - `Chat ` — first LLM call (the model decides to call the tool); `gen_ai.operation.name` attribute is `Chat` + - `execute_tool get_weather` — the tool runs + - `Chat ` — second LLM call (the model summarizes the tool result) -If those four spans show up, the integration is working. +If those four spans show up, the integration is working. The `Chat` span name and `gen_ai.operation.name` attribute come from the `InferenceOperationType` passed to `InferenceScope.start(...)` (this sample uses `InferenceOperationType.CHAT`). ## Where the integration happens From 03b998f3a5010e56d96132f68d516b63489919ef Mon Sep 17 00:00:00 2001 From: Julio Menendez Gonzalez Date: Wed, 29 Apr 2026 07:18:28 -0600 Subject: [PATCH 6/9] docs(samples): address PR #288 review feedback - Replace OPENAI_API_KEY=sk-... placeholder with a non-secret form so secret scanners don't false-positive. - Capture and check configure() return status in both samples; exit cleanly with a clear message when configuration fails. - Validate APPLICATIONINSIGHTS_CONNECTION_STRING up front in the AM sample with a friendly error pointing at .env.template. - Guard tool_calls indexing in the OTLP sample so the manual-loop doesn't crash when the model answers without calling the tool. - Add "Demonstrates" and "Troubleshooting" sections to both sample READMEs to make setup and failure-mode diagnosis easier. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../.env.template | 2 +- .../observability-with-azure-monitor/README.md | 13 +++++++++++++ python/observability-with-azure-monitor/main.py | 17 +++++++++++++---- python/observability-with-otlp/.env.template | 2 +- python/observability-with-otlp/README.md | 13 +++++++++++++ python/observability-with-otlp/main.py | 12 +++++++++++- 6 files changed, 52 insertions(+), 7 deletions(-) diff --git a/python/observability-with-azure-monitor/.env.template b/python/observability-with-azure-monitor/.env.template index 695b1eab..401964ef 100644 --- a/python/observability-with-azure-monitor/.env.template +++ b/python/observability-with-azure-monitor/.env.template @@ -3,7 +3,7 @@ APPLICATIONINSIGHTS_CONNECTION_STRING=InstrumentationKey=00000000-0000-0000-0000 # OpenAI / Azure OpenAI (the OpenAI Agents SDK reads these) # For OpenAI: -OPENAI_API_KEY=sk-... +OPENAI_API_KEY=<> # For Azure OpenAI (set these instead of OPENAI_API_KEY): # AZURE_OPENAI_API_KEY= # AZURE_OPENAI_ENDPOINT= diff --git a/python/observability-with-azure-monitor/README.md b/python/observability-with-azure-monitor/README.md index c00f672a..8c3563c2 100644 --- a/python/observability-with-azure-monitor/README.md +++ b/python/observability-with-azure-monitor/README.md @@ -4,6 +4,12 @@ This sample shows how to add the [Microsoft Agent 365 Python SDK](https://github > This is **not** a from-scratch tracing setup. For a full agent host with Microsoft 365 Agents SDK, see the [`python/openai/sample-agent`](../openai/sample-agent) sample. +## Demonstrates + +- The recommended init order: existing OTel → Agent 365 `configure()` → OpenAI Agents SDK instrumentor. +- Auto-instrumentation via `microsoft-agents-a365-observability-extensions-openai` — no manual span code in the agent body. +- Both Azure Monitor and the Agent 365 backend receive every span produced by the agent. + ## Prerequisites - Python 3.11+ @@ -60,3 +66,10 @@ To diff against your own app: copy Steps 1, 2, and 2b into the file where your a - Integration patterns and pitfalls: [Integrating with existing OpenTelemetry](https://github.com/microsoft/Agent365-python/blob/main/docs/integrating-with-existing-opentelemetry.md) (in the SDK repo) - Manual instrumentation example (no agent framework): [`python/observability-with-otlp`](../observability-with-otlp) + +## Troubleshooting + +- **`SystemExit: APPLICATIONINSIGHTS_CONNECTION_STRING is not set`** — set the env var via `.env`. The connection string is on your App Insights resource → **Overview** → **Connection String**. +- **No spans visible in App Insights** — wait 1–2 minutes for ingestion; confirm the connection string targets the right resource. If the agent ran successfully but spans never appear, temporarily add a `ConsoleSpanExporter` (see [the integration guide's verify recipe](https://github.com/microsoft/Agent365-python/blob/main/docs/integrating-with-existing-opentelemetry.md#verifying-the-integration)) to prove the SDK is producing them. +- **`SystemExit: Agent 365 observability configuration failed`** — check logs for the failing step (most often a missing or unreachable token resolver in production; the sample uses a stub). +- **OpenAI auth errors** — verify `OPENAI_API_KEY` (or `AZURE_OPENAI_*` variables) in `.env`. The OpenAI Agents SDK reads these directly. diff --git a/python/observability-with-azure-monitor/main.py b/python/observability-with-azure-monitor/main.py index f9689db6..bd075864 100644 --- a/python/observability-with-azure-monitor/main.py +++ b/python/observability-with-azure-monitor/main.py @@ -27,9 +27,14 @@ # --------------------------------------------------------------------------- from azure.monitor.opentelemetry import configure_azure_monitor -configure_azure_monitor( - connection_string=os.environ["APPLICATIONINSIGHTS_CONNECTION_STRING"], -) +_app_insights_conn = os.environ.get("APPLICATIONINSIGHTS_CONNECTION_STRING") +if not _app_insights_conn: + raise SystemExit( + "APPLICATIONINSIGHTS_CONNECTION_STRING is not set. " + "Copy .env.template to .env and fill in your Application Insights " + "connection string. See README.md for setup steps." + ) +configure_azure_monitor(connection_string=_app_insights_conn) # --------------------------------------------------------------------------- # Step 2 — Agent 365 SDK `configure()`. @@ -45,11 +50,15 @@ def _stub_token_resolver(agent_id: str, tenant_id: str) -> str | None: return "stub-token" -configure( +_configure_ok = configure( service_name=os.environ.get("AGENT_SERVICE_NAME", "sample-agent-azure-monitor"), service_namespace="agent365-samples", token_resolver=_stub_token_resolver, ) +if not _configure_ok: + raise SystemExit( + "Agent 365 observability configuration failed. See logs for details." + ) # --------------------------------------------------------------------------- # Step 2b — Install the OpenAI Agents SDK instrumentor. diff --git a/python/observability-with-otlp/.env.template b/python/observability-with-otlp/.env.template index bb3e06b7..19358107 100644 --- a/python/observability-with-otlp/.env.template +++ b/python/observability-with-otlp/.env.template @@ -1,5 +1,5 @@ # OpenAI / Azure OpenAI (the raw openai client reads these) -OPENAI_API_KEY=sk-... +OPENAI_API_KEY=<> # OPENAI_MODEL=gpt-4o-mini # For Azure OpenAI, use AzureOpenAI() in main.py and set: # AZURE_OPENAI_API_KEY= diff --git a/python/observability-with-otlp/README.md b/python/observability-with-otlp/README.md index 37115316..22566432 100644 --- a/python/observability-with-otlp/README.md +++ b/python/observability-with-otlp/README.md @@ -7,6 +7,12 @@ This sample shows two patterns at once: > This is **not** a from-scratch tracing setup. For a full agent host with Microsoft 365 Agents SDK, see the [`python/openai/sample-agent`](../openai/sample-agent) sample. +## Demonstrates + +- The recommended init order: existing OTel SDK → Agent 365 `configure()`. +- Manual instrumentation using `InvokeAgentScope`, `InferenceScope`, and `ExecuteToolScope` around a hand-rolled OpenAI tool-calling loop. +- Default `ConsoleSpanExporter` for zero external setup, with a one-line swap to `OTLPSpanExporter` for real backends. + ## Prerequisites - Python 3.11+ @@ -76,3 +82,10 @@ To diff against your own app: copy Step 1 (replace with whatever exporter / reso - Integration patterns and pitfalls: [Integrating with existing OpenTelemetry](https://github.com/microsoft/Agent365-python/blob/main/docs/integrating-with-existing-opentelemetry.md) (in the SDK repo) - Auto-instrumented OpenAI Agents SDK example: [`python/observability-with-azure-monitor`](../observability-with-azure-monitor) + +## Troubleshooting + +- **No spans printed to stdout** — `BatchSpanProcessor` may not have flushed; the sample calls `force_flush()` on exit, so make sure the script ran to completion. If the model answered without calling the tool, the sample skips the `execute_tool` span and returns the model's text directly. +- **`KeyError` or auth error from OpenAI** — verify `OPENAI_API_KEY` (or `AZURE_OPENAI_*` variables) in `.env`. The raw `openai` client reads these directly. +- **Spans missing from your OTLP backend (after swap)** — temporarily fall back to `ConsoleSpanExporter` to confirm the SDK is producing spans. If they appear on stdout but not in your backend, the issue is in the exporter / collector / network. See [the integration guide's verify recipe](https://github.com/microsoft/Agent365-python/blob/main/docs/integrating-with-existing-opentelemetry.md#verifying-the-integration). +- **`SystemExit: Agent 365 observability configuration failed`** — check logs for the failing step. diff --git a/python/observability-with-otlp/main.py b/python/observability-with-otlp/main.py index 8459b980..bc82a0f4 100644 --- a/python/observability-with-otlp/main.py +++ b/python/observability-with-otlp/main.py @@ -72,11 +72,15 @@ def _stub_token_resolver(agent_id: str, tenant_id: str) -> str | None: return "stub-token" -configure( +_configure_ok = configure( service_name=os.environ.get("AGENT_SERVICE_NAME", "sample-agent-otlp"), service_namespace="agent365-samples", token_resolver=_stub_token_resolver, ) +if not _configure_ok: + raise SystemExit( + "Agent 365 observability configuration failed. See logs for details." + ) # --------------------------------------------------------------------------- # Step 3 — Agent setup: raw OpenAI client + a fake tool. @@ -145,6 +149,12 @@ def run_one_turn(user_message: str) -> str: assistant_msg = first.choices[0].message inf_scope.record_response(assistant_msg.content or "") + # If the model answered without calling the tool, return its text. + if not assistant_msg.tool_calls: + final = assistant_msg.content or "" + invoke_scope.record_response(final) + return final + tool_call = assistant_msg.tool_calls[0] args = json.loads(tool_call.function.arguments) From 6139138e010a9c76ba767a42fc0a00879290bf2b Mon Sep 17 00:00:00 2001 From: Julio Menendez Gonzalez Date: Wed, 29 Apr 2026 11:17:41 -0600 Subject: [PATCH 7/9] docs(samples): document ENABLE_OBSERVABILITY env var requirement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Agent 365 SDK gates span creation behind ENABLE_OBSERVABILITY (or ENABLE_A365_OBSERVABILITY) env var — without one set to a truthy value, scopes silently produce zero spans. Both .env.template files now include ENABLE_OBSERVABILITY=true, and the READMEs surface the requirement in Setup plus a Troubleshooting bullet at the top of each sample's troubleshooting list (it's the most common silent-failure mode). Co-Authored-By: Claude Opus 4.7 (1M context) --- python/observability-with-azure-monitor/.env.template | 5 +++++ python/observability-with-azure-monitor/README.md | 3 +++ python/observability-with-otlp/.env.template | 5 +++++ python/observability-with-otlp/README.md | 3 +++ 4 files changed, 16 insertions(+) diff --git a/python/observability-with-azure-monitor/.env.template b/python/observability-with-azure-monitor/.env.template index 401964ef..a5be3c41 100644 --- a/python/observability-with-azure-monitor/.env.template +++ b/python/observability-with-azure-monitor/.env.template @@ -11,3 +11,8 @@ OPENAI_API_KEY=<> # Service identification (optional; used as service.name attribute) AGENT_SERVICE_NAME=sample-agent-azure-monitor + +# REQUIRED: enable Agent 365 span emission. The SDK gates scope creation +# behind one of these flags; without either set to a truthy value, the +# scopes produce zero spans (silent failure mode). +ENABLE_OBSERVABILITY=true diff --git a/python/observability-with-azure-monitor/README.md b/python/observability-with-azure-monitor/README.md index 8c3563c2..78d058fa 100644 --- a/python/observability-with-azure-monitor/README.md +++ b/python/observability-with-azure-monitor/README.md @@ -24,6 +24,8 @@ This sample shows how to add the [Microsoft Agent 365 Python SDK](https://github cp .env.template .env ``` + The template includes `ENABLE_OBSERVABILITY=true` — leave this as-is. Without it, the SDK silently emits zero spans. + 2. Create a virtualenv and install: ```bash @@ -69,6 +71,7 @@ To diff against your own app: copy Steps 1, 2, and 2b into the file where your a ## Troubleshooting +- **Sample runs without errors but no spans appear** — most commonly `ENABLE_OBSERVABILITY` is not set to a truthy value. The SDK gates span creation behind this env var and produces zero spans silently when it's missing. The sample's `.env.template` includes it; if you assembled `.env` manually, add `ENABLE_OBSERVABILITY=true`. - **`SystemExit: APPLICATIONINSIGHTS_CONNECTION_STRING is not set`** — set the env var via `.env`. The connection string is on your App Insights resource → **Overview** → **Connection String**. - **No spans visible in App Insights** — wait 1–2 minutes for ingestion; confirm the connection string targets the right resource. If the agent ran successfully but spans never appear, temporarily add a `ConsoleSpanExporter` (see [the integration guide's verify recipe](https://github.com/microsoft/Agent365-python/blob/main/docs/integrating-with-existing-opentelemetry.md#verifying-the-integration)) to prove the SDK is producing them. - **`SystemExit: Agent 365 observability configuration failed`** — check logs for the failing step (most often a missing or unreachable token resolver in production; the sample uses a stub). diff --git a/python/observability-with-otlp/.env.template b/python/observability-with-otlp/.env.template index 19358107..db65ac57 100644 --- a/python/observability-with-otlp/.env.template +++ b/python/observability-with-otlp/.env.template @@ -12,3 +12,8 @@ AGENT_SERVICE_NAME=sample-agent-otlp # Optional: only used if you swap ConsoleSpanExporter for OTLPSpanExporter in main.py # OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318/v1/traces + +# REQUIRED: enable Agent 365 span emission. The SDK gates scope creation +# behind one of these flags; without either set to a truthy value, the +# scopes produce zero spans (silent failure mode). +ENABLE_OBSERVABILITY=true diff --git a/python/observability-with-otlp/README.md b/python/observability-with-otlp/README.md index 22566432..84ef3d15 100644 --- a/python/observability-with-otlp/README.md +++ b/python/observability-with-otlp/README.md @@ -28,6 +28,8 @@ No collector or external service is required — the sample defaults to `Console cp .env.template .env ``` + The template includes `ENABLE_OBSERVABILITY=true` — leave this as-is. Without it, the SDK silently emits zero spans. + 2. Create a virtualenv and install: ```bash @@ -85,6 +87,7 @@ To diff against your own app: copy Step 1 (replace with whatever exporter / reso ## Troubleshooting +- **Sample runs without errors but no spans appear** — most commonly `ENABLE_OBSERVABILITY` is not set to a truthy value. The SDK gates span creation behind this env var and produces zero spans silently when it's missing. The sample's `.env.template` includes it; if you assembled `.env` manually, add `ENABLE_OBSERVABILITY=true`. - **No spans printed to stdout** — `BatchSpanProcessor` may not have flushed; the sample calls `force_flush()` on exit, so make sure the script ran to completion. If the model answered without calling the tool, the sample skips the `execute_tool` span and returns the model's text directly. - **`KeyError` or auth error from OpenAI** — verify `OPENAI_API_KEY` (or `AZURE_OPENAI_*` variables) in `.env`. The raw `openai` client reads these directly. - **Spans missing from your OTLP backend (after swap)** — temporarily fall back to `ConsoleSpanExporter` to confirm the SDK is producing spans. If they appear on stdout but not in your backend, the issue is in the exporter / collector / network. See [the integration guide's verify recipe](https://github.com/microsoft/Agent365-python/blob/main/docs/integrating-with-existing-opentelemetry.md#verifying-the-integration). From 59898ecf6d09fdccd58e6c0bb6059ae8af475c7e Mon Sep 17 00:00:00 2001 From: Julio Menendez Gonzalez Date: Wed, 29 Apr 2026 11:31:57 -0600 Subject: [PATCH 8/9] docs(samples): correct chat operation name casing in AM sample README Auto-instrumentation via the openai extension produces gen_ai.operation .name="chat" (lowercase) per the OTel GenAI semantic conventions, not "Chat" as previously documented. The OTLP sample's manual instrumentation correctly produces "Chat" (the InferenceOperationType.CHAT.value); the AM sample uses auto-instrumentation and emits the spec-conformant lowercase form. Fixing the AM README to reflect what users will actually see in App Insights. Co-Authored-By: Claude Opus 4.7 (1M context) --- python/observability-with-azure-monitor/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/observability-with-azure-monitor/README.md b/python/observability-with-azure-monitor/README.md index 78d058fa..3f79cc1d 100644 --- a/python/observability-with-azure-monitor/README.md +++ b/python/observability-with-azure-monitor/README.md @@ -47,7 +47,7 @@ Expected stdout: a one-line weather answer for Seattle. In Azure Portal → your Application Insights resource → **Transaction search**, look for spans with the custom-dimension attribute `gen_ai.operation.name`. You should see three operation types across one turn: - `invoke_agent` — the agent invocation (root span; display name `invoke_agent WeatherAgent`) -- `Chat` — one or more LLM call spans (display name e.g. `Chat gpt-4o-mini`; the value comes from the configured `InferenceOperationType` — Auto-instrumentation defaults to `Chat`) +- `chat` — one or more LLM call spans (display name e.g. `chat gpt-4.1`; the auto-instrumentation extension uses lowercase `chat` per the [OpenTelemetry GenAI semantic conventions](https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/)) - `execute_tool` — the `get_weather` tool span (display name `execute_tool get_weather`) If you see those three operation types, the integration is working. The Agent 365 backend receives the same spans (configured via the stub token resolver — replace with a real one for production). From 5a5bf8ad53541796e8e8877b18fecd3a41d34726 Mon Sep 17 00:00:00 2001 From: Julio Menendez Gonzalez Date: Wed, 29 Apr 2026 14:36:53 -0600 Subject: [PATCH 9/9] docs(samples): add LangGraph + Agent 365 observability sample Mirrors Google Cloud's LangGraph + OpenTelemetry reference sample, adapted for the Agent 365 SDK: existing OTel TracerProvider first, then `configure()`, then `CustomLangChainInstrumentor` for auto-instrumented LLM/tool spans, with `InvokeAgentScope` wrapping `agent.invoke(...)` for the top-level span. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../.env.template | 22 +++ .../observability-with-langgraph/.gitignore | 6 + python/observability-with-langgraph/README.md | 95 +++++++++++ python/observability-with-langgraph/main.py | 155 ++++++++++++++++++ .../pyproject.toml | 26 +++ 5 files changed, 304 insertions(+) create mode 100644 python/observability-with-langgraph/.env.template create mode 100644 python/observability-with-langgraph/.gitignore create mode 100644 python/observability-with-langgraph/README.md create mode 100644 python/observability-with-langgraph/main.py create mode 100644 python/observability-with-langgraph/pyproject.toml diff --git a/python/observability-with-langgraph/.env.template b/python/observability-with-langgraph/.env.template new file mode 100644 index 00000000..5008a6f2 --- /dev/null +++ b/python/observability-with-langgraph/.env.template @@ -0,0 +1,22 @@ +# OpenAI / Azure OpenAI (langchain-openai reads these) +OPENAI_API_KEY=<> +# OPENAI_MODEL=gpt-4o-mini +# For Azure OpenAI, use AzureChatOpenAI() in main.py and set: +# AZURE_OPENAI_API_KEY= +# AZURE_OPENAI_ENDPOINT= +# OPENAI_API_VERSION=2024-08-01-preview + +# OpenTelemetry service identification +OTEL_SERVICE_NAME=sample-agent-langgraph +AGENT_SERVICE_NAME=sample-agent-langgraph + +# Optional: only used if you swap ConsoleSpanExporter for OTLPSpanExporter in main.py. +# Examples: +# - Local OTLP/gRPC collector: http://localhost:4317 +# - Google Cloud Trace (per the reference guide): https://telemetry.googleapis.com:443/v1/traces +# OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 + +# REQUIRED: enable Agent 365 span emission. The SDK gates scope creation +# behind one of these flags; without either set to a truthy value, the +# scopes produce zero spans (silent failure mode). +ENABLE_OBSERVABILITY=true diff --git a/python/observability-with-langgraph/.gitignore b/python/observability-with-langgraph/.gitignore new file mode 100644 index 00000000..cd71e8cc --- /dev/null +++ b/python/observability-with-langgraph/.gitignore @@ -0,0 +1,6 @@ +.env +.venv/ +__pycache__/ +*.pyc +dist/ +*.egg-info/ diff --git a/python/observability-with-langgraph/README.md b/python/observability-with-langgraph/README.md new file mode 100644 index 00000000..837f21f8 --- /dev/null +++ b/python/observability-with-langgraph/README.md @@ -0,0 +1,95 @@ +# Observability — Agent 365 SDK with LangGraph + +This sample shows how to add the [Microsoft Agent 365 Python SDK](https://github.com/microsoft/Agent365-python) to a [LangGraph](https://langchain-ai.github.io/langgraph/) agent that **already** uses OpenTelemetry. After running this, both your existing exporter and the Agent 365 backend receive every span produced by the agent. + +The structure mirrors Google Cloud's [LangGraph + OpenTelemetry reference sample](https://docs.cloud.google.com/stackdriver/docs/instrumentation/ai-agent-langgraph): an OTel TracerProvider is set up first, an auto-instrumentor handles the LLM and tool spans, and a manual top-level span wraps `agent.invoke(...)` so the trace tree has a clear "agent run" root. + +> This is **not** a from-scratch tracing setup. For a full agent host with Microsoft 365 Agents SDK, see the [`python/openai/sample-agent`](../openai/sample-agent) sample. + +## Demonstrates + +- The recommended init order: existing OTel → Agent 365 `configure()` → LangChain instrumentor. +- Auto-instrumentation via `microsoft-agents-a365-observability-extensions-langchain` — every LangChain LLM and tool callback emits an OTel span automatically; no per-call wrapping in the agent body. +- A manual `InvokeAgentScope` around `agent.invoke(...)` for the top-level `invoke_agent ` span (the Agent 365 equivalent of Google's `tracer.start_as_current_span("invoke agent")`). +- Default `ConsoleSpanExporter` for zero external setup, with a one-line swap to OTLP/gRPC for real backends (including Google Cloud Trace). + +## Prerequisites + +- Python 3.11+ +- An OpenAI or Azure OpenAI key + +No collector or external service is required — the sample defaults to `ConsoleSpanExporter` so spans print to stdout. + +## Setup + +1. Copy the env template and fill in your values: + + ```bash + cp .env.template .env + ``` + + The template includes `ENABLE_OBSERVABILITY=true` — leave this as-is. Without it, the SDK silently emits zero spans. + +2. Create a virtualenv and install: + + ```bash + python -m venv .venv + source .venv/bin/activate # Windows: .venv\Scripts\activate + pip install -e . + ``` + +## Run + +```bash +python main.py +``` + +Expected output (truncated): + +- A one-line weather answer for Seattle. +- Multiple JSON span dumps printed by `ConsoleSpanExporter`. Look for spans named `invoke_agent WeatherAgent`, `chat ChatOpenAI` (typically twice — one per ReAct cycle), and `execute_tool get_weather`. + +## Swap to a real OTLP endpoint + +In `main.py`, comment out the `ConsoleSpanExporter` lines and uncomment the `OTLPSpanExporter` block. Set `OTEL_EXPORTER_OTLP_ENDPOINT` in `.env` (e.g. `http://localhost:4317` for a local OTLP/gRPC collector, or `https://telemetry.googleapis.com:443/v1/traces` for Google Cloud Trace per the [reference guide](https://docs.cloud.google.com/stackdriver/docs/instrumentation/ai-agent-langgraph)). + +For Cloud Trace specifically, the reference guide also configures gRPC channel credentials with Google ADC; copy that block into Step 1 if you target Google Cloud. + +## What to look for + +The console output (or your OTLP backend) should contain a span tree rooted at `invoke_agent WeatherAgent`. Inside it you'll see the LangGraph ReAct loop nested under the `agent` and `tools` graph nodes, with three spans that matter for Agent 365 telemetry: + +- `invoke_agent WeatherAgent` (the outer span — one per user turn; emitted by `InvokeAgentScope`) +- `chat ChatOpenAI` — one per LLM call (twice for a tool-using turn); the LangChain instrumentor renames LLM runs to `chat ` when the underlying response carries a chat-completion id, matching the [OpenTelemetry GenAI semantic conventions](https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/) +- `execute_tool get_weather` — the tool runs (renamed by the same instrumentor; carries `gen_ai.operation.name=execute_tool` and `gen_ai.tool.name=get_weather`) + +The instrumentor also emits internal LangGraph spans (`LangGraph`, `agent`, `tools`, `call_model`, `should_continue`, `RunnableSequence`, `Prompt`) — those are normal and reflect the underlying graph execution. The Agent 365 backend receives the same spans (configured via the stub token resolver — replace with a real one for production). + +## Where the integration happens + +`main.py` is organized into the following sections (Step 2b is a sub-step that must run after Step 2): + +1. **Step 1 — OTel SDK setup.** Build a `TracerProvider`, attach a `BatchSpanProcessor` with the exporter, call `trace.set_tracer_provider(...)`. This is the part of the file you'd already have in your real app. +2. **Step 2 — Agent 365 `configure()`.** Detects the TracerProvider set by Step 1 and adds its processors to it. Both your existing exporter and the Agent 365 exporter receive spans. Replace `_stub_token_resolver` with your production token resolver. +3. **Step 2b — `CustomLangChainInstrumentor`.** Must run after `configure()`; the constructor raises `RuntimeError` otherwise. Construction auto-calls `.instrument()`. After this, every LangChain LLM and tool callback flows through Agent 365's tracer. +4. **Step 3 — Build the agent.** Standard `langgraph.prebuilt.create_react_agent(...)` with a `langchain-openai` model and a `@tool`-decorated `get_weather` function. No observability code needed (the instrumentor handles it). +5. **Step 4 — Run + flush.** `InvokeAgentScope` wraps `agent.invoke(...)` so the run gets a top-level `invoke_agent ` span; `force_flush()` is critical — without it, batched spans may not export before the process exits. + +To diff against your own app: copy Steps 1, 2, and 2b into the file where your app currently initializes its TracerProvider, and apply the Step 4 `InvokeAgentScope` wrapping pattern around your `agent.invoke(...)` calls. + +## Going further + +- Integration patterns and pitfalls: [Integrating with existing OpenTelemetry](https://github.com/microsoft/Agent365-python/blob/main/docs/integrating-with-existing-opentelemetry.md) (in the SDK repo) +- Manual instrumentation example (no agent framework): [`python/observability-with-otlp`](../observability-with-otlp) +- Auto-instrumented OpenAI Agents SDK example: [`python/observability-with-azure-monitor`](../observability-with-azure-monitor) +- Google Cloud reference this sample mirrors: [LangGraph + OpenTelemetry on Stackdriver](https://docs.cloud.google.com/stackdriver/docs/instrumentation/ai-agent-langgraph) + +## Troubleshooting + +- **Sample runs without errors but no spans appear** — most commonly `ENABLE_OBSERVABILITY` is not set to a truthy value. The SDK gates span creation behind this env var and produces zero spans silently when it's missing. The sample's `.env.template` includes it; if you assembled `.env` manually, add `ENABLE_OBSERVABILITY=true`. +- **No spans printed to stdout** — `BatchSpanProcessor` may not have flushed; the sample calls `force_flush()` on exit, so make sure the script ran to completion. +- **`KeyError` or auth error from OpenAI** — verify `OPENAI_API_KEY` (or `AZURE_OPENAI_*` variables) in `.env`. `langchain-openai` reads these directly. +- **Spans missing from your OTLP backend (after swap)** — temporarily fall back to `ConsoleSpanExporter` to confirm the SDK is producing spans. If they appear on stdout but not in your backend, the issue is in the exporter / collector / network. See [the integration guide's verify recipe](https://github.com/microsoft/Agent365-python/blob/main/docs/integrating-with-existing-opentelemetry.md#verifying-the-integration). +- **`SystemExit: Agent 365 observability configuration failed`** — check logs for the failing step (most often a missing or unreachable token resolver in production; the sample uses a stub). +- **`RuntimeError: Tracing SDK is not configured`** — `CustomLangChainInstrumentor()` ran before `configure()`. Make sure Step 2 (`configure(...)`) executes successfully before Step 2b. +- **`TypeError: wrap_function_wrapper() got an unexpected keyword argument 'module'`** — the LangChain extension uses `wrapt`'s legacy keyword-argument call style, which `wrapt 2.x` removed. `pyproject.toml` pins `wrapt<2` to keep the extension working; if you assemble dependencies manually, do the same until the SDK ships a fix. diff --git a/python/observability-with-langgraph/main.py b/python/observability-with-langgraph/main.py new file mode 100644 index 00000000..67b33130 --- /dev/null +++ b/python/observability-with-langgraph/main.py @@ -0,0 +1,155 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Sample: Agent 365 SDK alongside an existing OTel SDK + a LangGraph ReAct agent. + +Mirrors the pattern from Google Cloud's "LangGraph + OpenTelemetry" reference +sample (https://docs.cloud.google.com/stackdriver/docs/instrumentation/ai-agent-langgraph), +adapted for the Agent 365 Python SDK: + + 1. Initialize your existing OTel stack first (vendor-neutral here, with a + commented OTLP/gRPC swap that matches the Google Cloud reference). + 2. Then call Agent 365 ``configure()`` — it detects the existing + TracerProvider and adds its processors to it. Both backends receive spans. + 3. Then install ``CustomLangChainInstrumentor`` — auto-instruments LangChain + LLM and tool callbacks. Like Google's guide, the agent invocation itself + is wrapped in a manual top-level span (``InvokeAgentScope`` here, which + mirrors Google's ``tracer.start_as_current_span("invoke agent")``). + +The default exporter is ``ConsoleSpanExporter`` so you can run this with zero +external setup. To export to a real backend (including Google Cloud Trace, per +the reference guide), uncomment the OTLP block. + +Run with: ``python main.py`` +""" + +import json +import os + +from dotenv import load_dotenv + +load_dotenv() + +# --------------------------------------------------------------------------- +# Step 1 — Existing OTel setup (manual, vendor-neutral). +# +# Default: ConsoleSpanExporter. Spans print to stdout — no extra setup needed. +# +# To export to a real backend (Google Cloud Trace, an OTLP collector, Jaeger, +# Honeycomb, etc.), uncomment the OTLP/gRPC block and comment out the Console +# block. The gRPC exporter mirrors the Google Cloud LangGraph reference. +# --------------------------------------------------------------------------- +from opentelemetry import trace +from opentelemetry.sdk.resources import SERVICE_NAME, Resource +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter + +# from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +# exporter = OTLPSpanExporter(endpoint=os.environ["OTEL_EXPORTER_OTLP_ENDPOINT"]) + +exporter = ConsoleSpanExporter() +provider = TracerProvider( + resource=Resource.create( + {SERVICE_NAME: os.environ.get("OTEL_SERVICE_NAME", "sample-agent-langgraph")} + ) +) +provider.add_span_processor(BatchSpanProcessor(exporter)) +trace.set_tracer_provider(provider) + +# --------------------------------------------------------------------------- +# Step 2 — Agent 365 SDK `configure()`. +# Detects the TracerProvider set in Step 1 and adds its processors to it. +# Both your existing exporter and the Agent 365 exporter now receive spans. +# --------------------------------------------------------------------------- +from microsoft_agents_a365.observability.core import ( + AgentDetails, + ExecutionType, + InvokeAgentDetails, + InvokeAgentScope, + Request, + TenantDetails, + configure, +) + + +def _stub_token_resolver(agent_id: str, tenant_id: str) -> str | None: + # In a real app, return a bearer token for the Agent 365 backend. + return "stub-token" + + +_configure_ok = configure( + service_name=os.environ.get("AGENT_SERVICE_NAME", "sample-agent-langgraph"), + service_namespace="agent365-samples", + token_resolver=_stub_token_resolver, +) +if not _configure_ok: + raise SystemExit( + "Agent 365 observability configuration failed. See logs for details." + ) + +# --------------------------------------------------------------------------- +# Step 2b — Install the LangChain instrumentor. +# Must run AFTER `configure()` — the instrumentor raises RuntimeError otherwise. +# Construction auto-calls ``.instrument()``; after this, every LangChain run +# (LLM call, tool call, chain) emits an OpenTelemetry span via Agent 365's +# tracer. +# --------------------------------------------------------------------------- +from microsoft_agents_a365.observability.extensions.langchain import ( + CustomLangChainInstrumentor, +) + +CustomLangChainInstrumentor() + +# --------------------------------------------------------------------------- +# Step 3 — Build the LangGraph prebuilt ReAct agent (auto-instrumented). +# --------------------------------------------------------------------------- +from langchain_core.tools import tool +from langchain_openai import ChatOpenAI +from langgraph.prebuilt import create_react_agent + + +@tool +def get_weather(city: str) -> str: + """Return the current weather for ``city`` as a JSON string.""" + return json.dumps({"city": city, "temperature_f": 72, "conditions": "sunny"}) + + +MODEL = os.environ.get("OPENAI_MODEL", "gpt-4o-mini") +llm = ChatOpenAI(model=MODEL) +agent = create_react_agent(model=llm, tools=[get_weather]) + +AGENT = AgentDetails(agent_id="sample-agent", agent_name="WeatherAgent") +TENANT = TenantDetails(tenant_id=os.environ.get("TENANT_ID", "sample-tenant")) + +# --------------------------------------------------------------------------- +# Step 4 — Run a single turn, wrapping the LangGraph invocation in a manual +# `InvokeAgentScope`. Google's reference guide does the equivalent with a +# generic `tracer.start_as_current_span("invoke agent")`; using the Agent 365 +# scope gives the standard `invoke_agent ` span name and request / +# response attributes for free. +# --------------------------------------------------------------------------- +def main() -> None: + user_message = "What's the weather in Seattle?" + + with InvokeAgentScope.start( + invoke_agent_details=InvokeAgentDetails(details=AGENT), + tenant_details=TENANT, + request=Request( + content=user_message, + execution_type=ExecutionType.HUMAN_TO_AGENT, + ), + ) as invoke_scope: + result = agent.invoke( + {"messages": [{"role": "user", "content": user_message}]} + ) + final = result["messages"][-1].content + invoke_scope.record_response(final) + print(final) + + # Force span flush so both your existing exporter and the Agent 365 + # exporter drain before the process exits. + trace.get_tracer_provider().force_flush() + + +if __name__ == "__main__": + main() diff --git a/python/observability-with-langgraph/pyproject.toml b/python/observability-with-langgraph/pyproject.toml new file mode 100644 index 00000000..fb1a02a2 --- /dev/null +++ b/python/observability-with-langgraph/pyproject.toml @@ -0,0 +1,26 @@ +[project] +name = "observability-with-langgraph" +version = "0.1.0" +description = "Sample: Agent 365 SDK with existing OTel SDK + LangGraph ReAct agent" +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ + "langgraph>=0.3.0", + "langchain-openai>=0.3.0", + "langchain-core>=0.3.0", + "opentelemetry-sdk>=1.27.0", + "opentelemetry-exporter-otlp-proto-grpc>=1.27.0", + "microsoft-agents-a365-observability-core", + "microsoft-agents-a365-observability-extensions-langchain", + # The langchain extension uses `wrap_function_wrapper(module=..., name=...)` + # which the wrapt 2.x release removed. Pin until the SDK ships a fix. + "wrapt<2", + "python-dotenv>=1.0.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["."]