From 38deb32d1e8b97e42e199b4b880405a8e5e5dddc Mon Sep 17 00:00:00 2001 From: Lester Sanchez Date: Tue, 21 Apr 2026 16:14:48 +0100 Subject: [PATCH 1/2] feat(providers): add claude-agent-sdk provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new provider that uses the claude-agent-sdk package to delegate the agentic loop, tool execution, and structured output extraction to the Claude Code CLI. Unlike the raw claude provider, this provider does not manage its own retry logic, MCP servers, or tool wiring — these are handled by the SDK runtime. - New provider: ClaudeAgentSdkProvider with execute(), validate_connection(), close() - Message dispatch via type(message).__name__ matching real SDK class names - Tool result pairing: tracks pending tool_use IDs to emit agent_tool_complete with actual results from ToolResultBlock (not fake nulls) - Structured output via ClaudeAgentOptions.output_format (json_schema) - Event callback parity: agent_turn_start, agent_message, agent_reasoning, agent_tool_start, agent_tool_complete - 22 tests using real SDK types (AssistantMessage, TextBlock, etc.) - Updated schema, factory, __init__, docs, and example workflow Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude/skills/conductor/SKILL.md | 2 +- .../conductor/references/yaml-schema.md | 14 +- AGENTS.md | 11 +- README.md | 29 +- docs/cli-reference.md | 2 +- docs/configuration.md | 2 +- docs/providers/comparison.md | 84 ++- examples/test-claude-agent-sdk.yaml | 53 ++ pyproject.toml | 6 + src/conductor/config/schema.py | 4 +- src/conductor/providers/__init__.py | 2 + src/conductor/providers/claude_agent_sdk.py | 307 +++++++++++ src/conductor/providers/factory.py | 19 +- tests/test_providers/test_claude_agent_sdk.py | 493 ++++++++++++++++++ uv.lock | 30 +- 15 files changed, 1018 insertions(+), 40 deletions(-) create mode 100644 examples/test-claude-agent-sdk.yaml create mode 100644 src/conductor/providers/claude_agent_sdk.py create mode 100644 tests/test_providers/test_claude_agent_sdk.py diff --git a/.claude/skills/conductor/SKILL.md b/.claude/skills/conductor/SKILL.md index c54094f..ca2ee7a 100644 --- a/.claude/skills/conductor/SKILL.md +++ b/.claude/skills/conductor/SKILL.md @@ -5,7 +5,7 @@ description: Validate, run, and execute workflows; creating new workflows when e # Conductor -CLI tool for defining and running multi-agent workflows with the GitHub Copilot SDK or Anthropic Claude. +CLI tool for defining and running multi-agent workflows with the GitHub Copilot SDK, Anthropic Claude, or Claude Agent SDK. > **DO NOT create new workflow files unless the user explicitly asks you to create one.** Default to running, validating, or debugging existing workflows. If the user's request is ambiguous, assume they want to run or modify an existing workflow rather than create a new one. diff --git a/.claude/skills/conductor/references/yaml-schema.md b/.claude/skills/conductor/references/yaml-schema.md index aca9cfa..d698f5d 100644 --- a/.claude/skills/conductor/references/yaml-schema.md +++ b/.claude/skills/conductor/references/yaml-schema.md @@ -6,7 +6,7 @@ Complete reference for all YAML configuration options. Derived from the Pydantic ```yaml workflow: WorkflowDef # Required: workflow configuration -tools: [string] # Optional: workflow-level tool names +tools: [string] # Optional: workflow-level tool names (ignored by claude-agent-sdk — uses CLI config) agents: [AgentDef] # Required: agent definitions parallel: [ParallelGroup] # Optional: static parallel groups for_each: [ForEachDef] # Optional: dynamic parallel groups @@ -27,14 +27,14 @@ workflow: # Runtime configuration runtime: - provider: string # "copilot" (default) or "claude" + provider: string # "copilot" (default), "claude", or "claude-agent-sdk" default_model: string # Default model for all agents - temperature: float # 0.0-1.0, controls randomness (optional) - max_tokens: integer # Max OUTPUT tokens per response, 1-200000 (optional) - timeout: float # Per-request timeout in seconds (optional, default: 600) + temperature: float # 0.0-1.0, controls randomness (optional, copilot/claude only) + max_tokens: integer # Max OUTPUT tokens per response, 1-200000 (optional, copilot/claude only) + timeout: float # Per-request timeout in seconds (optional, default: 600, copilot/claude only) max_agent_iterations: integer # Max tool-use roundtrips per agent (1-500, optional) max_session_seconds: float # Wall-clock timeout per agent session in seconds (optional) - mcp_servers: # MCP server configurations + mcp_servers: # MCP server configurations (ignored by claude-agent-sdk — uses CLI config) : type: string # "stdio" (default), "http", or "sse" command: string # Command to run (required for stdio) @@ -93,7 +93,7 @@ agents: type: string # "agent" (default), "human_gate", "script", or "workflow" description: string # What this agent does model: string # Override default_model - provider: string # Per-agent provider override ("copilot" or "claude") + provider: string # Per-agent provider override ("copilot", "claude", or "claude-agent-sdk") # Input specification (for explicit context mode) input: diff --git a/AGENTS.md b/AGENTS.md index d9618ec..242dee4 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -88,6 +88,7 @@ make validate-examples # validate all examples - `base.py` - `AgentProvider` ABC defining `execute()`, `validate_connection()`, `close()` - `copilot.py` - GitHub Copilot SDK implementation - `claude.py` - Anthropic Claude API implementation + - `claude_agent_sdk.py` - Claude Agent SDK implementation (uses `claude-agent-sdk` package) - `factory.py` - Provider instantiation - **gates/**: Human-in-the-loop support @@ -146,7 +147,7 @@ Use `pytest.mark.performance` for performance tests (exclude with `-m "not perfo ### Provider Parity -All providers (`copilot.py`, `claude.py`) must maintain feature parity. Any change to one provider's behavior, contract, or capabilities must be applied to all providers. This includes: +All providers must maintain feature parity where applicable. Any change to one provider's behavior, contract, or capabilities must be applied to all providers. This includes: - **Event callbacks**: Same event types emitted at the same semantic points - `agent_turn_start` with `{"turn": "awaiting_model"}` — immediately before each API call @@ -160,3 +161,11 @@ All providers (`copilot.py`, `claude.py`) must maintain feature parity. Any chan - **Session management**: Same lifecycle (`validate_connection()`, `execute()`, `close()`) When modifying any provider, check all other providers for the same change. The dashboard, JSONL logger, console subscriber, and workflow engine all depend on consistent behavior across providers. + +#### `claude_agent_sdk.py` parity notes + +The Claude Agent SDK provider (`claude_agent_sdk.py`) delegates the agentic loop to the `claude` CLI via the `claude-agent-sdk` package. This achieves **event and output parity** but the following are managed by the SDK rather than Conductor: + +- **Retry and error handling**: The SDK handles retries, backoff, and parse recovery internally. The provider wraps SDK errors in `ProviderError` but does not implement its own retry logic. +- **Tool execution**: Tools and MCP servers are managed by the `claude` CLI's own configuration. Workflow-level `tools` and `runtime.mcp_servers` fields are ignored. +- **Runtime config**: `temperature`, `max_tokens`, and `timeout` are not configurable per-workflow — they are controlled by the CLI. diff --git a/README.md b/README.md index 97d430a..214d827 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Conductor provides the patterns that work: evaluator-optimizer loops for iterati ## Features - **YAML-based workflows** - Define multi-agent workflows in readable YAML -- **Multiple providers** - GitHub Copilot or Anthropic Claude with seamless switching +- **Multiple providers** - GitHub Copilot, Anthropic Claude, or Claude Agent SDK with seamless switching - **Parallel execution** - Run agents concurrently (static groups or dynamic for-each) - **Script steps** - Run shell commands and route on exit code without an AI agent - **Conditional routing** - Route between agents based on output conditions @@ -155,13 +155,13 @@ conductor stop Conductor supports multiple AI providers. Choose based on your needs: -| Feature | Copilot | Claude | -|---------|---------|--------| -| **Pricing** | Subscription ($10-39/mo) | Pay-per-token | -| **Context Window** | 8K-128K tokens | 200K tokens | -| **Tool Support (MCP)** | Yes | Planned | -| **Streaming** | Yes | Planned | -| **Best For** | Heavy usage, tools | Large context, pay-per-use | +| Feature | Copilot | Claude | Claude Agent SDK | +|---------|---------|--------|------------------| +| **Pricing** | Subscription ($10-39/mo) | Pay-per-token | Via Claude Code CLI | +| **Context Window** | 8K-128K tokens | 200K tokens | 200K tokens | +| **Tool Support (MCP)** | Yes | Planned | Yes (built-in) | +| **Streaming** | Yes | Planned | Yes | +| **Best For** | Heavy usage, tools | Large context, pay-per-use | Full Claude Code toolset | ### Using Claude @@ -174,6 +174,19 @@ workflow: Set your API key: `export ANTHROPIC_API_KEY=sk-ant-...` +### Using Claude Agent SDK + +```yaml +workflow: + runtime: + provider: claude-agent-sdk + default_model: claude-sonnet-4-6 +``` + +Requires the `claude` CLI to be installed and authenticated. Install the SDK: `uv add claude-agent-sdk` + +> **Note:** The `claude-agent-sdk` provider delegates tool and MCP server management to the `claude` CLI. Workflow-level `tools` and `runtime.mcp_servers` fields are ignored — configure these through your Claude Code settings instead. + **See also:** [Claude Documentation](docs/providers/claude.md) | [Provider Comparison](docs/providers/comparison.md) | [Migration Guide](docs/providers/migration.md) ## CLI Reference diff --git a/docs/cli-reference.md b/docs/cli-reference.md index 238deee..5118bef 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -23,7 +23,7 @@ conductor run [OPTIONS] |--------|-------|-------------| | `--input NAME=VALUE` | `-i` | Workflow input (repeatable) | | `--input.NAME=VALUE` | | Alternative input syntax | -| `--provider PROVIDER` | `-p` | Override provider (copilot, claude) | +| `--provider PROVIDER` | `-p` | Override provider (copilot, claude, claude-agent-sdk) | | `--dry-run` | | Show execution plan without running | | `--skip-gates` | | Auto-select first option at human gates | | `--quiet` | `-q` | Minimal output (agent lifecycle and routing only) | diff --git a/docs/configuration.md b/docs/configuration.md index c83499d..3691483 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -11,7 +11,7 @@ The `runtime` section of your workflow defines provider settings and global defa ```yaml workflow: runtime: - provider: copilot # or 'claude' + provider: copilot # or 'claude' or 'claude-agent-sdk' default_model: gpt-5.2 # Provider-specific settings... ``` diff --git a/docs/providers/comparison.md b/docs/providers/comparison.md index 80e633d..40f7606 100644 --- a/docs/providers/comparison.md +++ b/docs/providers/comparison.md @@ -1,21 +1,21 @@ -# Provider Comparison: Copilot vs Claude +# Provider Comparison: Copilot vs Claude vs Claude Agent SDK -This guide helps you choose between GitHub Copilot and Anthropic Claude providers for your workflows. +This guide helps you choose between GitHub Copilot, Anthropic Claude, and Claude Agent SDK providers for your workflows. ## Quick Comparison -| Feature | Copilot | Claude | Winner | -|---------|---------|--------|--------| -| **Context Window** | 8K-128K | 200K (all models) | Claude | -| **Pricing Model** | Subscription ($10-39/mo) | Pay-per-token | Depends | -| **Setup** | GitHub auth | API key | Copilot (easier) | -| **Model Selection** | GPT-5.2, o1 | Haiku, Sonnet, Opus | Tie | -| **Streaming** | Yes | No (Phase 1) | Copilot | -| **Tool Support** | Yes (MCP) | No (Phase 1) | Copilot | -| **Speed** | Fast | Fast | Tie | -| **Output Quality** | Excellent | Excellent | Tie | -| **Cost Predictability** | High (flat rate) | Variable (usage-based) | Copilot | -| **Multi-provider** | No | Yes (via Conductor) | Claude | +| Feature | Copilot | Claude | Claude Agent SDK | +|---------|---------|--------|------------------| +| **Context Window** | 8K-128K | 200K (all models) | 200K | +| **Pricing Model** | Subscription ($10-39/mo) | Pay-per-token | Via Claude Code CLI | +| **Setup** | GitHub auth | API key | `claude` CLI auth | +| **Model Selection** | GPT-5.2, o1 | Haiku, Sonnet, Opus | Haiku, Sonnet, Opus | +| **Streaming** | Yes | No (Phase 1) | Yes | +| **Tool Support** | Yes (MCP) | No (Phase 1) | Yes (built-in) | +| **Speed** | Fast | Fast | Fast | +| **Output Quality** | Excellent | Excellent | Excellent | +| **Cost Predictability** | High (flat rate) | Variable (usage-based) | Variable | +| **Agentic Loop** | SDK-managed | Manual (provider code) | SDK-managed | ## When to Use Copilot @@ -111,6 +111,51 @@ agents: prompt: "Analyze the following document ({{ document | length }} chars)" ``` +## When to Use Claude Agent SDK + +### ✅ Choose Claude Agent SDK if: + +1. **You want built-in tool support with Claude models** + - WebSearch, WebFetch, Bash, file operations out of the box + - No MCP server configuration needed for common tools + - Full Claude Code toolset available + +2. **You already use the `claude` CLI** + - Authentication handled by the CLI + - No separate API key management + - Settings inherited from your Claude Code environment + +3. **You want the SDK to manage the agentic loop** + - Retry logic, tool execution, and structured output handled by the SDK + - Less provider code to maintain + - Native interrupt support + +4. **You need streaming with Claude models** + - Real-time message streaming (unlike the raw Claude provider) + - Typed message objects for each event + +### Important: Tools and MCP Servers + +The `claude-agent-sdk` provider delegates tool and MCP server management entirely to the `claude` CLI. This means: + +- Workflow-level `tools` and `runtime.mcp_servers` fields are **ignored** — configure tools and MCP servers through your Claude Code settings instead +- The full Claude Code toolset (WebSearch, Bash, Read, Write, etc.) is available automatically +- `temperature`, `max_tokens`, and `timeout` are also managed by the CLI and not configurable per-workflow + +### Example Claude Agent SDK Workflow + +```yaml +workflow: + name: sdk-workflow + runtime: + provider: claude-agent-sdk + default_model: claude-sonnet-4-6 + +agents: + - name: researcher + prompt: "Research {{ topic }} using web search" +``` + ## Cost Comparison ### Scenario 1: Light Usage (10 hours/month) @@ -298,6 +343,8 @@ Use this matrix to decide: | Process long documents | **Claude** | | Complex reasoning tasks | **Claude** (Opus) | | Simple high-volume tasks | **Claude** (Haiku 4.5) | +| Already use `claude` CLI | **Claude Agent SDK** | +| Want streaming with Claude | **Claude Agent SDK** | ## Multi-Provider Strategy @@ -347,4 +394,11 @@ workflow: - ✅ Long document processing - ✅ Cost optimization (Haiku) -**Bottom line**: Both are excellent. Choose based on your usage patterns, budget, and feature requirements. Conductor makes it easy to switch between them or use both strategically. +**Choose Claude Agent SDK** for: +- ✅ Built-in tools (WebSearch, Bash, etc.) +- ✅ Streaming with Claude models +- ✅ SDK-managed agentic loop +- ✅ Existing `claude` CLI users +- ✅ No API key management + +**Bottom line**: All three are excellent. Choose based on your usage patterns, budget, and feature requirements. Conductor makes it easy to switch between them or use all three strategically. diff --git a/examples/test-claude-agent-sdk.yaml b/examples/test-claude-agent-sdk.yaml new file mode 100644 index 0000000..4a934df --- /dev/null +++ b/examples/test-claude-agent-sdk.yaml @@ -0,0 +1,53 @@ +# Simple Question-Answering Workflow using Claude Agent SDK +# +# This example demonstrates a basic linear workflow with a single agent +# that answers questions. It shows: +# - Basic workflow structure +# - Input parameters +# - Output schema validation +# - Simple routing to $end +# +# Usage: +# conductor run examples/test-claude-agent-sdk.yaml --input question="What is Microsoft Conductor?" +# +# Note: Requires Claude Code CLI installed and configured. Adjust the runtime model as needed based on your Claude Code setup. + +workflow: + name: simple-qa + description: A simple question-answering workflow with a single agent + version: "1.0.0" + entry_point: answerer + + runtime: + provider: claude-agent-sdk + default_model: claude-haiku-4-5@20251001 # Vertex AI naming; it depends on the backedend your Claude Code setup uses. Adjust as needed. + + input: + question: + type: string + required: true + description: The question to answer + +agents: + - name: answerer + description: Answers the user's question clearly and concisely + prompt: | + You are a helpful assistant and researcher. Please answer the following question + clearly and concisely: + + Question: {{ workflow.input.question }} + + Provide a direct answer without unnecessary preamble. + output: + answer: + type: string + description: The answer to the question + confidence: + type: string + description: Confidence level (high, medium, low) + routes: + - to: $end + +output: + answer: "{{ answerer.output.answer }}" + confidence: "{{ answerer.output.confidence }}" \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index cc40455..3db672c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,12 @@ dependencies = [ "uvicorn>=0.30.0", "websockets>=12.0", "httpx>=0.27.0", + "claude-agent-sdk>=0.1.64", +] + +[project.optional-dependencies] +claude-agent-sdk = [ + "claude-agent-sdk>=0.1.0", ] [project.urls] diff --git a/src/conductor/config/schema.py b/src/conductor/config/schema.py index 527c50b..a937200 100644 --- a/src/conductor/config/schema.py +++ b/src/conductor/config/schema.py @@ -399,7 +399,7 @@ class AgentDef(BaseModel): type: Literal["agent", "human_gate", "script", "workflow"] | None = None """Agent type. Defaults to 'agent' if not specified.""" - provider: Literal["copilot", "claude"] | None = None + provider: Literal["copilot", "claude", "claude-agent-sdk"] | None = None """Provider override for this agent. If None (default), the agent uses the workflow.runtime.provider. @@ -630,7 +630,7 @@ def validate_type_requirements(self) -> MCPServerDef: class RuntimeConfig(BaseModel): """Provider and runtime configuration.""" - provider: Literal["copilot", "openai-agents", "claude"] = "copilot" + provider: Literal["copilot", "openai-agents", "claude", "claude-agent-sdk"] = "copilot" """SDK provider to use for agent execution.""" default_model: str | None = None diff --git a/src/conductor/providers/__init__.py b/src/conductor/providers/__init__.py index 4912e0a..96a010b 100644 --- a/src/conductor/providers/__init__.py +++ b/src/conductor/providers/__init__.py @@ -6,12 +6,14 @@ from conductor.providers.base import AgentOutput, AgentProvider from conductor.providers.claude import ClaudeProvider +from conductor.providers.claude_agent_sdk import ClaudeAgentSdkProvider from conductor.providers.copilot import CopilotProvider from conductor.providers.factory import create_provider __all__ = [ "AgentOutput", "AgentProvider", + "ClaudeAgentSdkProvider", "ClaudeProvider", "CopilotProvider", "create_provider", diff --git a/src/conductor/providers/claude_agent_sdk.py b/src/conductor/providers/claude_agent_sdk.py new file mode 100644 index 0000000..6e077f8 --- /dev/null +++ b/src/conductor/providers/claude_agent_sdk.py @@ -0,0 +1,307 @@ +"""Claude Agent SDK provider — delegates agentic loop to the claude-agent-sdk package.""" + +from __future__ import annotations + +import asyncio +import json +import logging +from typing import TYPE_CHECKING, Any + +from conductor.exceptions import ProviderError +from conductor.providers.base import AgentOutput, AgentProvider, EventCallback + +if TYPE_CHECKING: + from conductor.config.schema import AgentDef, OutputField + +try: + from claude_agent_sdk import ClaudeAgentOptions, query + + CLAUDE_AGENT_SDK_AVAILABLE = True +except ImportError: + CLAUDE_AGENT_SDK_AVAILABLE = False + query = None + ClaudeAgentOptions = None + +logger = logging.getLogger(__name__) + + +def _build_field_schema(field: OutputField, depth: int = 0) -> dict[str, Any]: + if depth > 10: + raise ProviderError("Output schema nesting exceeds 10 levels") + + schema: dict[str, Any] = {"type": field.type} + if field.description: + schema["description"] = field.description + if field.type == "object" and field.properties: + schema["properties"] = _build_properties(field.properties, depth + 1) + schema["required"] = list(field.properties.keys()) + if field.type == "array" and field.items: + schema["items"] = _build_field_schema(field.items, depth + 1) + return schema + + +def _build_properties(fields: dict[str, OutputField], depth: int = 0) -> dict[str, Any]: + return {name: _build_field_schema(field, depth) for name, field in fields.items()} + + +def _build_output_format(output: dict[str, OutputField]) -> dict[str, Any]: + return { + "type": "json_schema", + "schema": { + "type": "object", + "properties": _build_properties(output), + "required": list(output.keys()), + }, + } + + +class ClaudeAgentSdkProvider(AgentProvider): + """Claude Agent SDK provider. + + Uses the claude-agent-sdk package (async iterator API) to execute agents. + The SDK manages the agentic loop, tool execution, and structured output + extraction internally. + """ + + def __init__( + self, + model: str | None = None, + max_turns: int | None = None, + max_session_seconds: float | None = None, + ) -> None: + if not CLAUDE_AGENT_SDK_AVAILABLE: + raise ProviderError( + "Claude Agent SDK not installed", + suggestion="Install with: uv add 'claude-agent-sdk>=0.1.0'", + ) + + self._default_model = model or "claude-sonnet-4-6" + self._default_max_turns = max_turns if max_turns is not None else 50 + self._max_session_seconds = max_session_seconds + + async def execute( + self, + agent: AgentDef, + context: dict[str, Any], + rendered_prompt: str, + tools: list[str] | None = None, + interrupt_signal: asyncio.Event | None = None, + event_callback: EventCallback | None = None, + ) -> AgentOutput: + if query is None or ClaudeAgentOptions is None: + raise ProviderError("Claude Agent SDK not available") + + model = agent.model or self._default_model + max_turns = ( + agent.max_agent_iterations + if agent.max_agent_iterations is not None + else self._default_max_turns + ) + + options = ClaudeAgentOptions( + model=model, + system_prompt=agent.system_prompt, + output_format=_build_output_format(agent.output) if agent.output else None, + max_turns=max_turns, + permission_mode="bypassPermissions", + tools={"type": "preset", "preset": "claude_code"}, + ) + + content_parts: list[str] = [] + structured_output: Any = None + total_input_tokens = 0 + total_output_tokens = 0 + result_model: str | None = model + turn_count = 0 + # Track pending tool_use IDs so we can pair them with ToolResultBlocks + pending_tools: dict[str, str] = {} + + try: + async for message in query(prompt=rendered_prompt, options=options): + if interrupt_signal is not None and interrupt_signal.is_set(): + return self._build_output( + content_parts, + structured_output, + agent, + result_model, + total_input_tokens, + total_output_tokens, + partial=True, + ) + + msg_type = type(message).__name__ + + if msg_type == "AssistantMessage": + blocks = getattr(message, "content", None) + if blocks: + if event_callback: + _safe_callback( + event_callback, + "agent_turn_start", + {"turn": "awaiting_model"}, + ) + self._process_assistant_blocks( + blocks, content_parts, pending_tools, event_callback + ) + + if hasattr(message, "model") and message.model: + result_model = message.model + if hasattr(message, "usage") and message.usage: + total_input_tokens += message.usage.get("input_tokens", 0) + total_output_tokens += message.usage.get("output_tokens", 0) + turn_count += 1 + if event_callback: + _safe_callback( + event_callback, + "agent_turn_start", + {"turn": turn_count}, + ) + + elif msg_type == "UserMessage": + msg_content = getattr(message, "content", None) + if msg_content and event_callback: + self._process_tool_results(msg_content, pending_tools, event_callback) + + elif msg_type == "ResultMessage": + if getattr(message, "structured_output", None) is not None: + structured_output = message.structured_output + elif getattr(message, "result", None) and not content_parts: + content_parts.append(message.result) + if hasattr(message, "usage") and message.usage: + total_input_tokens += message.usage.get("input_tokens", 0) + total_output_tokens += message.usage.get("output_tokens", 0) + if getattr(message, "is_error", False): + raise ProviderError( + f"Claude Agent SDK execution failed: " + f"{getattr(message, 'result', 'Unknown error')}" + ) + + except ProviderError: + raise + except Exception as e: + raise ProviderError( + f"Claude Agent SDK execution error: {e}", + suggestion="Check that the claude CLI is installed and accessible", + ) from e + + return self._build_output( + content_parts, + structured_output, + agent, + result_model, + total_input_tokens, + total_output_tokens, + ) + + async def validate_connection(self) -> bool: + return CLAUDE_AGENT_SDK_AVAILABLE + + async def close(self) -> None: + pass + + @staticmethod + def _process_assistant_blocks( + blocks: list[Any], + content_parts: list[str], + pending_tools: dict[str, str], + event_callback: EventCallback | None, + ) -> None: + for block in blocks: + block_type = getattr(block, "type", None) or type(block).__name__ + + if block_type in ("text", "TextBlock"): + text = getattr(block, "text", "") + if text: + content_parts.append(text) + if event_callback: + _safe_callback(event_callback, "agent_message", {"content": text}) + + elif block_type in ("thinking", "ThinkingBlock"): + thinking = getattr(block, "thinking", "") + if thinking and event_callback: + _safe_callback( + event_callback, + "agent_reasoning", + {"content": thinking}, + ) + + elif block_type in ("tool_use", "ToolUseBlock"): + tool_name = getattr(block, "name", "unknown") + tool_id = getattr(block, "id", "") + tool_input = getattr(block, "input", {}) + pending_tools[tool_id] = tool_name + if event_callback: + _safe_callback( + event_callback, + "agent_tool_start", + {"tool_name": tool_name, "arguments": tool_input}, + ) + + @staticmethod + def _process_tool_results( + blocks: list[Any], + pending_tools: dict[str, str], + event_callback: EventCallback, + ) -> None: + for block in blocks: + block_type = getattr(block, "type", None) or type(block).__name__ + if block_type not in ("tool_result", "ToolResultBlock"): + continue + + tool_use_id = getattr(block, "tool_use_id", "") + tool_name = pending_tools.pop(tool_use_id, "unknown") + content = getattr(block, "content", "") + result_str = str(content)[:500] if content else None + + _safe_callback( + event_callback, + "agent_tool_complete", + {"tool_name": tool_name, "result": result_str}, + ) + + @staticmethod + def _build_output( + content_parts: list[str], + structured_output: Any, + agent: AgentDef, + model: str | None, + input_tokens: int, + output_tokens: int, + partial: bool = False, + ) -> AgentOutput: + if structured_output is not None: + if isinstance(structured_output, dict): + content = structured_output + elif isinstance(structured_output, str): + try: + content = json.loads(structured_output) + except json.JSONDecodeError: + content = {"response": structured_output} + else: + content = {"response": str(structured_output)} + elif agent.output: + combined = "\n".join(content_parts) + try: + content = json.loads(combined) + except json.JSONDecodeError: + content = {"response": combined} + else: + content = {"response": "\n".join(content_parts)} + + total = input_tokens + output_tokens + return AgentOutput( + content=content, + raw_response=structured_output or "\n".join(content_parts), + tokens_used=total if total else None, + input_tokens=input_tokens or None, + output_tokens=output_tokens or None, + model=model, + partial=partial, + ) + + +def _safe_callback(callback: EventCallback, event_type: str, data: dict[str, Any]) -> None: + try: + callback(event_type, data) + except Exception: + logger.debug("Error in event_callback for %s", event_type, exc_info=True) diff --git a/src/conductor/providers/factory.py b/src/conductor/providers/factory.py index 324c1c7..9bf3ff9 100644 --- a/src/conductor/providers/factory.py +++ b/src/conductor/providers/factory.py @@ -11,11 +11,15 @@ from conductor.exceptions import ProviderError from conductor.providers.base import AgentProvider from conductor.providers.claude import ANTHROPIC_SDK_AVAILABLE, ClaudeProvider +from conductor.providers.claude_agent_sdk import ( + CLAUDE_AGENT_SDK_AVAILABLE, + ClaudeAgentSdkProvider, +) from conductor.providers.copilot import CopilotProvider, IdleRecoveryConfig async def create_provider( - provider_type: Literal["copilot", "openai-agents", "claude"] = "copilot", + provider_type: Literal["copilot", "openai-agents", "claude", "claude-agent-sdk"] = "copilot", validate: bool = True, mcp_servers: dict[str, Any] | None = None, default_model: str | None = None, @@ -90,10 +94,21 @@ async def create_provider( max_agent_iterations=max_agent_iterations, max_session_seconds=max_session_seconds, ) + case "claude-agent-sdk": + if not CLAUDE_AGENT_SDK_AVAILABLE: + raise ProviderError( + "Claude Agent SDK provider requires claude-agent-sdk package", + suggestion="Install with: uv add 'claude-agent-sdk>=0.1.0'", + ) + provider = ClaudeAgentSdkProvider( + model=default_model, + max_turns=max_agent_iterations, + max_session_seconds=max_session_seconds, + ) case _: raise ProviderError( f"Unknown provider: {provider_type}", - suggestion="Valid providers are: copilot, openai-agents, claude", + suggestion="Valid providers are: copilot, openai-agents, claude, claude-agent-sdk", ) if validate and not await provider.validate_connection(): diff --git a/tests/test_providers/test_claude_agent_sdk.py b/tests/test_providers/test_claude_agent_sdk.py new file mode 100644 index 0000000..4a3da21 --- /dev/null +++ b/tests/test_providers/test_claude_agent_sdk.py @@ -0,0 +1,493 @@ +"""Unit tests for the ClaudeAgentSdkProvider implementation.""" + +from __future__ import annotations + +import asyncio +from dataclasses import dataclass, field +from unittest.mock import Mock, patch + +import pytest +from claude_agent_sdk import ( + AssistantMessage, + ResultMessage, + TextBlock, + ThinkingBlock, + ToolResultBlock, + ToolUseBlock, + UserMessage, +) + +from conductor.config.schema import AgentDef, OutputField +from conductor.exceptions import ProviderError +from conductor.providers.claude_agent_sdk import ClaudeAgentSdkProvider + + +def _assistant( + content: list, + model: str = "claude-sonnet-4-6", + usage: dict | None = None, +) -> AssistantMessage: + return AssistantMessage(content=content, model=model, usage=usage) + + +def _result( + result: str | None = None, + structured_output: object | None = None, + usage: dict | None = None, + is_error: bool = False, +) -> ResultMessage: + return ResultMessage( + subtype="result", + duration_ms=1000, + duration_api_ms=900, + is_error=is_error, + num_turns=1, + session_id="test-session", + usage=usage, + result=result, + structured_output=structured_output, + ) + + +class TestClaudeAgentSdkProviderInitialization: + @patch("conductor.providers.claude_agent_sdk.CLAUDE_AGENT_SDK_AVAILABLE", False) + def test_init_raises_when_sdk_not_installed(self) -> None: + with pytest.raises(ProviderError, match="Claude Agent SDK not installed"): + ClaudeAgentSdkProvider() + + @patch("conductor.providers.claude_agent_sdk.CLAUDE_AGENT_SDK_AVAILABLE", True) + @patch("conductor.providers.claude_agent_sdk.query", lambda **kwargs: None) + @patch("conductor.providers.claude_agent_sdk.ClaudeAgentOptions", Mock) + def test_init_with_defaults(self) -> None: + provider = ClaudeAgentSdkProvider() + assert provider._default_model == "claude-sonnet-4-6" + assert provider._default_max_turns == 50 + + @patch("conductor.providers.claude_agent_sdk.CLAUDE_AGENT_SDK_AVAILABLE", True) + @patch("conductor.providers.claude_agent_sdk.query", lambda **kwargs: None) + @patch("conductor.providers.claude_agent_sdk.ClaudeAgentOptions", Mock) + def test_init_with_custom_params(self) -> None: + provider = ClaudeAgentSdkProvider( + model="claude-opus-4-20250514", + max_turns=10, + ) + assert provider._default_model == "claude-opus-4-20250514" + assert provider._default_max_turns == 10 + + +class TestValidateConnection: + @patch("conductor.providers.claude_agent_sdk.CLAUDE_AGENT_SDK_AVAILABLE", True) + @patch("conductor.providers.claude_agent_sdk.query", lambda **kwargs: None) + @patch("conductor.providers.claude_agent_sdk.ClaudeAgentOptions", Mock) + async def test_validate_connection_returns_true(self) -> None: + provider = ClaudeAgentSdkProvider() + assert await provider.validate_connection() is True + + +class TestExecute: + @patch("conductor.providers.claude_agent_sdk.CLAUDE_AGENT_SDK_AVAILABLE", True) + @patch("conductor.providers.claude_agent_sdk.ClaudeAgentOptions", Mock) + async def test_execute_text_only(self) -> None: + async def fake_query(**kwargs): + yield _assistant( + content=[TextBlock(text="The answer is 42")], + usage={"input_tokens": 100, "output_tokens": 50}, + ) + yield _result( + result="The answer is 42", + usage={"input_tokens": 0, "output_tokens": 0}, + ) + + with patch("conductor.providers.claude_agent_sdk.query", fake_query): + provider = ClaudeAgentSdkProvider() + agent = AgentDef(name="test_agent", prompt="What is the answer?") + output = await provider.execute( + agent=agent, + context={}, + rendered_prompt="What is the answer?", + ) + + assert output.content == {"response": "The answer is 42"} + assert output.input_tokens == 100 + assert output.output_tokens == 50 + assert output.partial is False + + @patch("conductor.providers.claude_agent_sdk.CLAUDE_AGENT_SDK_AVAILABLE", True) + @patch("conductor.providers.claude_agent_sdk.ClaudeAgentOptions", Mock) + async def test_execute_structured_output(self) -> None: + async def fake_query(**kwargs): + yield _assistant( + content=[TextBlock(text="thinking...")], + usage={"input_tokens": 100, "output_tokens": 50}, + ) + yield _result( + structured_output={"answer": "42", "confidence": 0.95}, + usage={"input_tokens": 0, "output_tokens": 0}, + ) + + with patch("conductor.providers.claude_agent_sdk.query", fake_query): + provider = ClaudeAgentSdkProvider() + agent = AgentDef( + name="test_agent", + prompt="What is the answer?", + output={ + "answer": OutputField(type="string"), + "confidence": OutputField(type="number"), + }, + ) + output = await provider.execute( + agent=agent, + context={}, + rendered_prompt="What is the answer?", + ) + + assert output.content == {"answer": "42", "confidence": 0.95} + + @patch("conductor.providers.claude_agent_sdk.CLAUDE_AGENT_SDK_AVAILABLE", True) + @patch("conductor.providers.claude_agent_sdk.ClaudeAgentOptions", Mock) + async def test_execute_emits_event_callbacks(self) -> None: + async def fake_query(**kwargs): + yield _assistant( + content=[ + TextBlock(text="Hello"), + ToolUseBlock(id="t1", name="search", input={"q": "test"}), + ThinkingBlock(thinking="Hmm", signature="sig"), + ], + usage={"input_tokens": 50, "output_tokens": 25}, + ) + yield UserMessage(content=[ToolResultBlock(tool_use_id="t1", content="search results")]) + yield _result() + + events: list[tuple[str, dict]] = [] + + with patch("conductor.providers.claude_agent_sdk.query", fake_query): + provider = ClaudeAgentSdkProvider() + agent = AgentDef(name="test", prompt="hi") + await provider.execute( + agent=agent, + context={}, + rendered_prompt="hi", + event_callback=lambda t, d: events.append((t, d)), + ) + + event_types = [e[0] for e in events] + assert "agent_turn_start" in event_types + assert "agent_message" in event_types + assert "agent_tool_start" in event_types + assert "agent_tool_complete" in event_types + assert "agent_reasoning" in event_types + + tool_complete = next(e for e in events if e[0] == "agent_tool_complete") + assert tool_complete[1]["tool_name"] == "search" + assert "search results" in tool_complete[1]["result"] + + @patch("conductor.providers.claude_agent_sdk.CLAUDE_AGENT_SDK_AVAILABLE", True) + @patch("conductor.providers.claude_agent_sdk.ClaudeAgentOptions", Mock) + async def test_execute_interrupt_signal(self) -> None: + interrupt = asyncio.Event() + interrupt.set() + + async def fake_query(**kwargs): + yield _assistant( + content=[TextBlock(text="partial")], + usage={"input_tokens": 10, "output_tokens": 5}, + ) + + with patch("conductor.providers.claude_agent_sdk.query", fake_query): + provider = ClaudeAgentSdkProvider() + agent = AgentDef(name="test", prompt="hi") + output = await provider.execute( + agent=agent, + context={}, + rendered_prompt="hi", + interrupt_signal=interrupt, + ) + + assert output.partial is True + + @patch("conductor.providers.claude_agent_sdk.CLAUDE_AGENT_SDK_AVAILABLE", True) + @patch("conductor.providers.claude_agent_sdk.ClaudeAgentOptions", Mock) + async def test_execute_error_result(self) -> None: + async def fake_query(**kwargs): + yield _result(is_error=True, result="API key invalid") + + with patch("conductor.providers.claude_agent_sdk.query", fake_query): + provider = ClaudeAgentSdkProvider() + agent = AgentDef(name="test", prompt="hi") + with pytest.raises(ProviderError, match="API key invalid"): + await provider.execute( + agent=agent, + context={}, + rendered_prompt="hi", + ) + + @patch("conductor.providers.claude_agent_sdk.CLAUDE_AGENT_SDK_AVAILABLE", True) + @patch("conductor.providers.claude_agent_sdk.ClaudeAgentOptions", Mock) + async def test_execute_wraps_unexpected_errors(self) -> None: + async def failing_query(**kwargs): + raise RuntimeError("connection refused") + yield # noqa: F401 - make it an async generator + + with patch("conductor.providers.claude_agent_sdk.query", failing_query): + provider = ClaudeAgentSdkProvider() + agent = AgentDef(name="test", prompt="hi") + with pytest.raises(ProviderError, match="connection refused"): + await provider.execute( + agent=agent, + context={}, + rendered_prompt="hi", + ) + + @patch("conductor.providers.claude_agent_sdk.CLAUDE_AGENT_SDK_AVAILABLE", True) + @patch("conductor.providers.claude_agent_sdk.ClaudeAgentOptions", Mock) + async def test_execute_token_accumulation(self) -> None: + async def fake_query(**kwargs): + yield _assistant( + content=[TextBlock(text="part1")], + usage={"input_tokens": 100, "output_tokens": 50}, + ) + yield _assistant( + content=[TextBlock(text="part2")], + usage={"input_tokens": 80, "output_tokens": 40}, + ) + yield _result(usage={"input_tokens": 10, "output_tokens": 5}) + + with patch("conductor.providers.claude_agent_sdk.query", fake_query): + provider = ClaudeAgentSdkProvider() + agent = AgentDef(name="test", prompt="hi") + output = await provider.execute( + agent=agent, + context={}, + rendered_prompt="hi", + ) + + assert output.input_tokens == 190 + assert output.output_tokens == 95 + assert output.tokens_used == 285 + + +class TestOutputFormatConstruction: + @patch("conductor.providers.claude_agent_sdk.CLAUDE_AGENT_SDK_AVAILABLE", True) + @patch("conductor.providers.claude_agent_sdk.ClaudeAgentOptions", Mock) + async def test_output_format_built_from_output_fields(self) -> None: + original_options = Mock() + + async def capture_query(**kwargs): + yield _result(structured_output={"name": "test", "score": 5}) + + with ( + patch("conductor.providers.claude_agent_sdk.query", capture_query), + patch("conductor.providers.claude_agent_sdk.ClaudeAgentOptions", original_options), + ): + provider = ClaudeAgentSdkProvider() + agent = AgentDef( + name="test", + prompt="hi", + output={ + "name": OutputField(type="string", description="The name"), + "score": OutputField(type="number"), + }, + ) + await provider.execute(agent=agent, context={}, rendered_prompt="hi") + + call_kwargs = original_options.call_args[1] + output_format = call_kwargs["output_format"] + assert output_format["type"] == "json_schema" + schema = output_format["schema"] + assert schema["type"] == "object" + assert "name" in schema["properties"] + assert "score" in schema["properties"] + assert schema["properties"]["name"]["description"] == "The name" + assert set(schema["required"]) == {"name", "score"} + + +class TestSchemaBuilding: + def test_nested_object_schema(self) -> None: + from conductor.providers.claude_agent_sdk import _build_output_format + + output = { + "person": OutputField( + type="object", + properties={ + "name": OutputField(type="string", description="Full name"), + "age": OutputField(type="number"), + }, + ), + } + result = _build_output_format(output) + person = result["schema"]["properties"]["person"] + assert person["type"] == "object" + assert "name" in person["properties"] + assert person["properties"]["name"]["description"] == "Full name" + assert person["required"] == ["name", "age"] + + def test_array_schema(self) -> None: + from conductor.providers.claude_agent_sdk import _build_output_format + + output = { + "tags": OutputField( + type="array", + items=OutputField(type="string"), + ), + } + result = _build_output_format(output) + tags = result["schema"]["properties"]["tags"] + assert tags["type"] == "array" + assert tags["items"]["type"] == "string" + + def test_array_of_objects_schema(self) -> None: + from conductor.providers.claude_agent_sdk import _build_output_format + + output = { + "items": OutputField( + type="array", + items=OutputField( + type="object", + properties={ + "id": OutputField(type="number"), + "label": OutputField(type="string"), + }, + ), + ), + } + result = _build_output_format(output) + items_schema = result["schema"]["properties"]["items"]["items"] + assert items_schema["type"] == "object" + assert set(items_schema["required"]) == {"id", "label"} + + def test_depth_limit_raises(self) -> None: + from conductor.providers.claude_agent_sdk import _build_field_schema + + field_def = OutputField(type="string") + for _ in range(12): + field_def = OutputField(type="object", properties={"nested": field_def}) + + with pytest.raises(ProviderError, match="nesting exceeds 10"): + _build_field_schema(field_def) + + +class TestBuildOutput: + @patch("conductor.providers.claude_agent_sdk.CLAUDE_AGENT_SDK_AVAILABLE", True) + @patch("conductor.providers.claude_agent_sdk.ClaudeAgentOptions", Mock) + async def test_string_structured_output_parsed_as_json(self) -> None: + async def fake_query(**kwargs): + yield _result(structured_output='{"answer": "yes"}') + + with patch("conductor.providers.claude_agent_sdk.query", fake_query): + provider = ClaudeAgentSdkProvider() + agent = AgentDef( + name="test", + prompt="hi", + output={"answer": OutputField(type="string")}, + ) + output = await provider.execute(agent=agent, context={}, rendered_prompt="hi") + + assert output.content == {"answer": "yes"} + + @patch("conductor.providers.claude_agent_sdk.CLAUDE_AGENT_SDK_AVAILABLE", True) + @patch("conductor.providers.claude_agent_sdk.ClaudeAgentOptions", Mock) + async def test_non_dict_structured_output_wrapped(self) -> None: + async def fake_query(**kwargs): + yield _result(structured_output=42) + + with patch("conductor.providers.claude_agent_sdk.query", fake_query): + provider = ClaudeAgentSdkProvider() + agent = AgentDef(name="test", prompt="hi") + output = await provider.execute(agent=agent, context={}, rendered_prompt="hi") + + assert output.content == {"response": "42"} + + @patch("conductor.providers.claude_agent_sdk.CLAUDE_AGENT_SDK_AVAILABLE", True) + @patch("conductor.providers.claude_agent_sdk.ClaudeAgentOptions", Mock) + async def test_output_schema_with_non_json_text_falls_back(self) -> None: + async def fake_query(**kwargs): + yield _assistant( + content=[TextBlock(text="not valid json")], + usage={"input_tokens": 10, "output_tokens": 5}, + ) + yield _result() + + with patch("conductor.providers.claude_agent_sdk.query", fake_query): + provider = ClaudeAgentSdkProvider() + agent = AgentDef( + name="test", + prompt="hi", + output={"answer": OutputField(type="string")}, + ) + output = await provider.execute(agent=agent, context={}, rendered_prompt="hi") + + assert output.content == {"response": "not valid json"} + + +class TestMessageDispatch: + @patch("conductor.providers.claude_agent_sdk.CLAUDE_AGENT_SDK_AVAILABLE", True) + @patch("conductor.providers.claude_agent_sdk.ClaudeAgentOptions", Mock) + async def test_unknown_message_types_ignored(self) -> None: + @dataclass + class FakeSystemMessage: + subtype: str = "init" + data: dict = field(default_factory=dict) + + @dataclass + class FakeStreamEvent: + event: str = "keepalive" + + async def fake_query(**kwargs): + yield FakeSystemMessage() + yield FakeStreamEvent() + yield _result(result="done") + + with patch("conductor.providers.claude_agent_sdk.query", fake_query): + provider = ClaudeAgentSdkProvider() + agent = AgentDef(name="test", prompt="hi") + output = await provider.execute(agent=agent, context={}, rendered_prompt="hi") + + assert output.content == {"response": "done"} + + @patch("conductor.providers.claude_agent_sdk.CLAUDE_AGENT_SDK_AVAILABLE", True) + @patch("conductor.providers.claude_agent_sdk.ClaudeAgentOptions", Mock) + async def test_tool_result_with_no_matching_pending_tool(self) -> None: + events: list[tuple[str, dict]] = [] + + async def fake_query(**kwargs): + yield UserMessage( + content=[ToolResultBlock(tool_use_id="orphan_id", content="orphan result")] + ) + yield _result(result="done") + + with patch("conductor.providers.claude_agent_sdk.query", fake_query): + provider = ClaudeAgentSdkProvider() + agent = AgentDef(name="test", prompt="hi") + await provider.execute( + agent=agent, + context={}, + rendered_prompt="hi", + event_callback=lambda t, d: events.append((t, d)), + ) + + tool_complete = [e for e in events if e[0] == "agent_tool_complete"] + assert len(tool_complete) == 1 + assert tool_complete[0][1]["tool_name"] == "unknown" + + @patch("conductor.providers.claude_agent_sdk.CLAUDE_AGENT_SDK_AVAILABLE", True) + @patch("conductor.providers.claude_agent_sdk.ClaudeAgentOptions", Mock) + async def test_system_prompt_passed_to_options(self) -> None: + options_mock = Mock() + + async def fake_query(**kwargs): + yield _result(result="done") + + with ( + patch("conductor.providers.claude_agent_sdk.query", fake_query), + patch("conductor.providers.claude_agent_sdk.ClaudeAgentOptions", options_mock), + ): + provider = ClaudeAgentSdkProvider() + agent = AgentDef( + name="test", + prompt="hi", + system_prompt="You are a helpful assistant", + ) + await provider.execute(agent=agent, context={}, rendered_prompt="hi") + + call_kwargs = options_mock.call_args[1] + assert call_kwargs["system_prompt"] == "You are a helpful assistant" diff --git a/uv.lock b/uv.lock index ac918e3..0ed94a5 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.12" [[package]] @@ -127,6 +127,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload-time = "2025-09-08T23:23:43.004Z" }, ] +[[package]] +name = "claude-agent-sdk" +version = "0.1.64" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "mcp" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/31/0b/900fcdd70384da09d717ec7eea595dbe241e93aca92505483351b3a31d52/claude_agent_sdk-0.1.64.tar.gz", hash = "sha256:147e513cb45095b57c37d74b8d01dd41b5f3ec7f70e408edce43a6590159c27d", size = 213492, upload-time = "2026-04-20T22:29:56.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ca/3b/3acc290014ca3ff75e90bf02f444d4e245091717178e61ad4bce23eb5d08/claude_agent_sdk-0.1.64-py3-none-macosx_11_0_arm64.whl", hash = "sha256:4cf47a9e40c0a683a05afff4fac1e3d5ea7965b1e9f72a8e266c8d2efbf65904", size = 60642119, upload-time = "2026-04-20T22:30:02.639Z" }, + { url = "https://files.pythonhosted.org/packages/0c/e7/4e01c53350d7851b1ec1b12873ada29bbfc4bac7e4b75e6d7cbd95dd338e/claude_agent_sdk-0.1.64-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:7fe765c6482c74bc6b0b4491ad3bddd1349c25f4cdf4483191c68ea9c1336825", size = 62473618, upload-time = "2026-04-20T22:30:09.988Z" }, + { url = "https://files.pythonhosted.org/packages/82/75/59b9df9bafe6df4e2286d086a9aaad950b79e0286f78da6e0d645b60bdce/claude_agent_sdk-0.1.64-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:605eebf46e7590e4f878572c2743954fba3f3530dfd99e10ff3b8b41a9fee757", size = 73918731, upload-time = "2026-04-20T22:30:17.972Z" }, + { url = "https://files.pythonhosted.org/packages/78/77/6d7b064224b59bc7b411636aaa0e4dff745bdcec32f2c1b15558914ac814/claude_agent_sdk-0.1.64-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:bbb1373ee0b4494e2db24aa10d312d22b86895b4b8f18eb5b58f99f14d827237", size = 74019300, upload-time = "2026-04-20T22:30:25.603Z" }, + { url = "https://files.pythonhosted.org/packages/c6/e6/e475efffa4eb13f3c268de2266f2c1027b4ee0e1fb8037789d804b0c74cf/claude_agent_sdk-0.1.64-py3-none-win_amd64.whl", hash = "sha256:453fa251e2a4aeed580c72d4c7b2cb98fc8d8d26012798126f5cb11a9829cd71", size = 76184108, upload-time = "2026-04-20T22:30:33.129Z" }, +] + [[package]] name = "click" version = "8.3.1" @@ -150,10 +167,11 @@ wheels = [ [[package]] name = "conductor-cli" -version = "0.1.8" +version = "0.1.9" source = { editable = "." } dependencies = [ { name = "anthropic" }, + { name = "claude-agent-sdk" }, { name = "fastapi" }, { name = "github-copilot-sdk" }, { name = "httpx" }, @@ -168,6 +186,11 @@ dependencies = [ { name = "websockets" }, ] +[package.optional-dependencies] +claude-agent-sdk = [ + { name = "claude-agent-sdk" }, +] + [package.dev-dependencies] dev = [ { name = "pytest" }, @@ -180,6 +203,8 @@ dev = [ [package.metadata] requires-dist = [ { name = "anthropic", specifier = ">=0.77.0,<1.0.0" }, + { name = "claude-agent-sdk", specifier = ">=0.1.64" }, + { name = "claude-agent-sdk", marker = "extra == 'claude-agent-sdk'", specifier = ">=0.1.0" }, { name = "fastapi", specifier = ">=0.115.0" }, { name = "github-copilot-sdk", specifier = ">=0.2.2" }, { name = "httpx", specifier = ">=0.27.0" }, @@ -193,6 +218,7 @@ requires-dist = [ { name = "uvicorn", specifier = ">=0.30.0" }, { name = "websockets", specifier = ">=12.0" }, ] +provides-extras = ["claude-agent-sdk"] [package.metadata.requires-dev] dev = [ From a2c6d5cf386bd7397b47cc94d5d6bc4202452d55 Mon Sep 17 00:00:00 2001 From: Lester Sanchez Date: Tue, 21 Apr 2026 18:14:16 +0100 Subject: [PATCH 2/2] feat(claude-agent-sdk): enhance logging with verbose and full mode options --- src/conductor/providers/claude_agent_sdk.py | 154 +++++++++++++++++--- 1 file changed, 132 insertions(+), 22 deletions(-) diff --git a/src/conductor/providers/claude_agent_sdk.py b/src/conductor/providers/claude_agent_sdk.py index 6e077f8..81d456a 100644 --- a/src/conductor/providers/claude_agent_sdk.py +++ b/src/conductor/providers/claude_agent_sdk.py @@ -91,6 +91,11 @@ async def execute( if query is None or ClaudeAgentOptions is None: raise ProviderError("Claude Agent SDK not available") + from conductor.cli.app import is_full, is_verbose + + verbose_enabled = is_verbose() + full_enabled = is_full() + model = agent.model or self._default_model max_turns = ( agent.max_agent_iterations @@ -141,7 +146,12 @@ async def execute( {"turn": "awaiting_model"}, ) self._process_assistant_blocks( - blocks, content_parts, pending_tools, event_callback + blocks, + content_parts, + pending_tools, + event_callback, + verbose_enabled, + full_enabled, ) if hasattr(message, "model") and message.model: @@ -159,8 +169,14 @@ async def execute( elif msg_type == "UserMessage": msg_content = getattr(message, "content", None) - if msg_content and event_callback: - self._process_tool_results(msg_content, pending_tools, event_callback) + if msg_content: + self._process_tool_results( + msg_content, + pending_tools, + event_callback, + verbose_enabled, + full_enabled, + ) elif msg_type == "ResultMessage": if getattr(message, "structured_output", None) is not None: @@ -172,8 +188,8 @@ async def execute( total_output_tokens += message.usage.get("output_tokens", 0) if getattr(message, "is_error", False): raise ProviderError( - f"Claude Agent SDK execution failed: " - f"{getattr(message, 'result', 'Unknown error')}" + self._build_error_message(message), + is_retryable=False, ) except ProviderError: @@ -205,6 +221,8 @@ def _process_assistant_blocks( content_parts: list[str], pending_tools: dict[str, str], event_callback: EventCallback | None, + verbose: bool = False, + full_mode: bool = False, ) -> None: for block in blocks: block_type = getattr(block, "type", None) or type(block).__name__ @@ -218,30 +236,34 @@ def _process_assistant_blocks( elif block_type in ("thinking", "ThinkingBlock"): thinking = getattr(block, "thinking", "") - if thinking and event_callback: - _safe_callback( - event_callback, - "agent_reasoning", - {"content": thinking}, - ) + if thinking: + if event_callback: + _safe_callback( + event_callback, + "agent_reasoning", + {"content": thinking}, + ) + if verbose: + _log_event_verbose("agent_reasoning", {"content": thinking}, full_mode) elif block_type in ("tool_use", "ToolUseBlock"): tool_name = getattr(block, "name", "unknown") tool_id = getattr(block, "id", "") tool_input = getattr(block, "input", {}) pending_tools[tool_id] = tool_name + data = {"tool_name": tool_name, "arguments": tool_input} if event_callback: - _safe_callback( - event_callback, - "agent_tool_start", - {"tool_name": tool_name, "arguments": tool_input}, - ) + _safe_callback(event_callback, "agent_tool_start", data) + if verbose: + _log_event_verbose("agent_tool_start", data, full_mode) @staticmethod def _process_tool_results( blocks: list[Any], pending_tools: dict[str, str], - event_callback: EventCallback, + event_callback: EventCallback | None, + verbose: bool = False, + full_mode: bool = False, ) -> None: for block in blocks: block_type = getattr(block, "type", None) or type(block).__name__ @@ -252,12 +274,36 @@ def _process_tool_results( tool_name = pending_tools.pop(tool_use_id, "unknown") content = getattr(block, "content", "") result_str = str(content)[:500] if content else None + data = {"tool_name": tool_name, "result": result_str} - _safe_callback( - event_callback, - "agent_tool_complete", - {"tool_name": tool_name, "result": result_str}, - ) + if event_callback: + _safe_callback(event_callback, "agent_tool_complete", data) + if verbose: + _log_event_verbose("agent_tool_complete", data, full_mode) + + @staticmethod + def _build_error_message(message: Any) -> str: + parts: list[str] = [] + + errors = getattr(message, "errors", None) + if errors: + parts.append("; ".join(str(e) for e in errors)) + + result = getattr(message, "result", None) + if result: + parts.append(str(result)) + + stop_reason = getattr(message, "stop_reason", None) + if stop_reason: + parts.append(f"stop_reason={stop_reason}") + + num_turns = getattr(message, "num_turns", None) + if num_turns is not None: + parts.append(f"after {num_turns} turns") + + if parts: + return f"Claude Agent SDK execution failed: {', '.join(parts)}" + return "Claude Agent SDK execution failed (no details available)" @staticmethod def _build_output( @@ -300,6 +346,70 @@ def _build_output( ) +def _log_event_verbose(event_type: str, data: dict[str, Any], full_mode: bool) -> None: + from rich.console import Console + from rich.text import Text + + from conductor.cli.run import _file_console + + console = Console(stderr=True, highlight=False) + + def _print(renderable: Any) -> None: + console.print(renderable) + if _file_console is not None: + _file_console.print(renderable) + + if event_type == "agent_tool_start": + tool_name = data.get("tool_name", "unknown") + text = Text() + text.append(" ├─ ", style="dim") + text.append("🔧 ", style="") + text.append(str(tool_name), style="cyan bold") + _print(text) + + if full_mode: + args = data.get("arguments") + if args: + args_str = str(args) + args_preview = args_str[:200] + "..." if len(args_str) > 200 else args_str + arg_text = Text() + arg_text.append(" │ ", style="dim") + arg_text.append("args: ", style="dim italic") + arg_text.append(args_preview, style="dim") + _print(arg_text) + + elif event_type == "agent_tool_complete": + tool_name = data.get("tool_name") + if tool_name: + text = Text() + text.append(" │ ", style="dim") + text.append("✓ ", style="green") + text.append(str(tool_name), style="dim") + _print(text) + + if full_mode: + result = data.get("result") + if result: + result_str = str(result) + result_preview = result_str[:200] + "..." if len(result_str) > 200 else result_str + result_text = Text() + result_text.append(" │ ", style="dim") + result_text.append("result: ", style="dim italic") + result_text.append(result_preview, style="dim") + _print(result_text) + + elif event_type == "agent_reasoning": + if full_mode: + reasoning = data.get("content", "") + if reasoning: + display = reasoning[:150] + "..." if len(reasoning) > 150 else reasoning + text = Text() + text.append(" │ ", style="dim") + text.append("💭 ", style="") + text.append(display.replace("\n", " "), style="italic dim") + _print(text) + + def _safe_callback(callback: EventCallback, event_type: str, data: dict[str, Any]) -> None: try: callback(event_type, data)