diff --git a/src/conductor/providers/claude.py b/src/conductor/providers/claude.py index b812703..d96eafd 100644 --- a/src/conductor/providers/claude.py +++ b/src/conductor/providers/claude.py @@ -360,7 +360,7 @@ def _convert_mcp_tools_to_claude( claude_tools: list[dict[str, Any]] = [] for tool in self._mcp_manager.get_all_tools(): # Apply filter if specified - if tool_filter is not None and tool["name"] not in tool_filter: + if tool_filter and tool["name"] not in tool_filter: continue claude_tools.append( @@ -1170,6 +1170,11 @@ async def _execute_with_parse_recovery( logger.debug("Successfully extracted structured output from tool_use block") return response + # Check for MCP tool calls — return to agentic loop for execution + if self._has_mcp_tool_use(response): + logger.debug("Response contains MCP tool calls, returning to agentic loop") + return response + # Check if we can extract JSON from text (fallback success path) json_content = self._extract_json_fallback(response) if json_content is not None: @@ -1225,6 +1230,13 @@ async def _execute_with_parse_recovery( logger.info(f"Parse recovery succeeded on attempt {attempt} (tool_use)") return response + # Check for MCP tool calls — return to agentic loop for execution + if self._has_mcp_tool_use(response): + logger.debug( + f"Recovery attempt {attempt} returned MCP tool calls, returning to agentic loop" + ) + return response + # Check if recovery succeeded (JSON fallback) json_content = self._extract_json_fallback(response) if json_content is not None: @@ -1574,6 +1586,20 @@ def _extract_structured_output(self, response: Any) -> dict[str, Any] | None: return dict(block.input) return None + def _has_mcp_tool_use(self, response: Any) -> bool: + """Check if response contains non-emit_output tool_use blocks (MCP tool calls). + + Args: + response: Claude API response. + + Returns: + True if response contains MCP tool calls that the agentic loop should handle. + """ + return any( + hasattr(block, "type") and block.type == "tool_use" and block.name != "emit_output" + for block in response.content + ) + def _extract_json_fallback(self, response: Any) -> dict[str, Any] | None: """Fallback: parse JSON from text content. diff --git a/tests/test_integration/test_claude_mcp_tool_filter.py b/tests/test_integration/test_claude_mcp_tool_filter.py new file mode 100644 index 0000000..6c93e68 --- /dev/null +++ b/tests/test_integration/test_claude_mcp_tool_filter.py @@ -0,0 +1,332 @@ +"""Integration test for MCP tool filtering bug in a Claude workflow. + +Regression test for https://github.com/microsoft/conductor/issues/37: +When a Claude workflow has mcp_servers configured but no workflow-level +``tools:`` section, MCP tools are silently excluded from API requests. + +This test exercises the full path: + YAML workflow (mcp_servers, no tools:) → WorkflowEngine → AgentExecutor + → ClaudeProvider._execute_with_retry → _convert_mcp_tools_to_claude(tools=[]) + +The bug causes ``tool_filter=[]`` to be treated as "include nothing" instead +of "no filter — include all MCP tools". +""" + +from __future__ import annotations + +from typing import Any +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from conductor.config.loader import load_workflow +from conductor.config.schema import ( + AgentDef, + InputDef, + OutputField, + RouteDef, + RuntimeConfig, + WorkflowConfig, + WorkflowDef, +) +from conductor.engine.workflow import WorkflowEngine +from conductor.providers.claude import ClaudeProvider + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +FAKE_MCP_TOOLS: list[dict[str, Any]] = [ + { + "name": "filesystem__read_file", + "description": "Read a file from the filesystem", + "input_schema": {"type": "object", "properties": {"path": {"type": "string"}}}, + "server": "filesystem", + "original_name": "read_file", + }, + { + "name": "filesystem__write_file", + "description": "Write a file to the filesystem", + "input_schema": {"type": "object", "properties": {"path": {"type": "string"}}}, + "server": "filesystem", + "original_name": "write_file", + }, + { + "name": "web_search__search", + "description": "Search the web", + "input_schema": {"type": "object", "properties": {"query": {"type": "string"}}}, + "server": "web_search", + "original_name": "search", + }, +] + + +def _make_provider_with_mcp() -> ClaudeProvider: + """Create a ClaudeProvider pre-wired with a mock MCP manager. + + The provider is constructed via ``__new__`` to bypass real SDK + initialisation, then populated with the minimum attributes needed + for ``execute()`` → ``_execute_with_retry()`` to reach the MCP + tool-building code path. + """ + provider = ClaudeProvider.__new__(ClaudeProvider) + provider._client = MagicMock() + provider._default_model = "claude-3-5-sonnet-latest" + provider._default_temperature = None + provider._default_max_tokens = 8192 + provider._retry_config = MagicMock() + provider._retry_config.max_attempts = 1 + provider._retry_config.base_delay = 1.0 + provider._retry_config.max_delay = 30.0 + provider._retry_config.jitter = 0.0 + provider._retry_history = [] + provider._max_parse_recovery_attempts = 2 + provider._max_schema_depth = 10 + + # Pre-wire a mock MCP manager so _ensure_mcp_connected is a no-op + mock_mcp = MagicMock() + mock_mcp.get_all_tools.return_value = FAKE_MCP_TOOLS + mock_mcp.has_servers.return_value = True + provider._mcp_manager = mock_mcp + provider._mcp_servers_config = { + "filesystem": {"command": "npx", "args": ["-y", "@modelcontextprotocol/server-filesystem"]}, + } + + return provider + + +def _make_emit_output_response(output: dict[str, Any]) -> MagicMock: + """Create a mock Claude API response that calls emit_output.""" + tool_block = MagicMock() + tool_block.type = "tool_use" + tool_block.name = "emit_output" + tool_block.id = "tool_call_1" + tool_block.input = output + + response = MagicMock() + response.content = [tool_block] + response.usage = MagicMock() + response.usage.input_tokens = 100 + response.usage.output_tokens = 50 + response.usage.cache_read_input_tokens = None + response.usage.cache_creation_input_tokens = None + response.stop_reason = "end_turn" + return response + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestMcpToolsReachApiInWorkflow: + """Verify MCP tools survive the full workflow → provider pipeline. + + The common scenario: a workflow declares ``mcp_servers`` but has no + ``tools:`` key. WorkflowConfig.tools defaults to ``[]``, which flows + through AgentExecutor → resolve_agent_tools → ClaudeProvider.execute + as ``tools=[]``. + + The bug: ``_convert_mcp_tools_to_claude(tool_filter=[])`` treated + ``[]`` as "include nothing", silently dropping every MCP tool. + """ + + @pytest.mark.asyncio + async def test_mcp_tools_included_when_workflow_has_no_tools_section(self) -> None: + """MCP tools must appear in the API request even without a tools: section.""" + provider = _make_provider_with_mcp() + + # Capture the tools kwarg passed to _execute_agentic_loop + captured: dict[str, Any] = {} + + async def spy_agentic_loop(**kwargs: Any) -> Any: + captured["tools"] = kwargs.get("tools") + # Return a canned emit_output response so execute completes + response = _make_emit_output_response({"content": "file contents here"}) + return (response, 150, False) + + provider._execute_agentic_loop = AsyncMock(side_effect=spy_agentic_loop) + + agent = AgentDef( + name="reader", + model="claude-3-5-sonnet-latest", + prompt="Read the file at /tmp/test.txt", + output={"content": OutputField(type="string", description="File contents")}, + routes=[RouteDef(to="$end")], + ) + + # Simulate what the engine does: tools=[] (no workflow tools defined) + await provider.execute( + agent=agent, + context={}, + rendered_prompt="Read the file at /tmp/test.txt", + tools=[], + ) + + # The tools list sent to the agentic loop must contain the MCP tools + # plus emit_output. Before the fix, only emit_output would appear. + api_tools = captured["tools"] + assert api_tools is not None, "tools should not be None" + + tool_names = {t["name"] for t in api_tools} + assert "emit_output" in tool_names, "emit_output should always be present" + + # THE KEY ASSERTION: MCP tools must be included + assert "filesystem__read_file" in tool_names, ( + "MCP tool 'filesystem__read_file' was filtered out — this is the bug from issue #37" + ) + assert "filesystem__write_file" in tool_names + assert "web_search__search" in tool_names + assert len(tool_names) == 4 # 3 MCP + 1 emit_output + + @pytest.mark.asyncio + async def test_mcp_tools_included_in_full_workflow_engine_run(self, tmp_path) -> None: + """End-to-end: WorkflowEngine → AgentExecutor → ClaudeProvider with MCP. + + Wires up a real WorkflowConfig (no ``tools:`` section) and verifies + MCP tools reach the agentic loop through the full call chain. + """ + provider = _make_provider_with_mcp() + + # Spy on _execute_agentic_loop to capture the tools list + captured: dict[str, Any] = {} + + async def spy_agentic_loop(**kwargs: Any) -> Any: + captured["tools"] = kwargs.get("tools") + response = _make_emit_output_response({"content": "hello world"}) + return (response, 150, False) + + provider._execute_agentic_loop = AsyncMock(side_effect=spy_agentic_loop) + + # Build a workflow config — note: no `tools` key, so defaults to [] + config = WorkflowConfig( + workflow=WorkflowDef( + name="mcp-test", + description="Test MCP tool filtering", + entry_point="reader", + runtime=RuntimeConfig( + provider="claude", + mcp_servers={ + "filesystem": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-filesystem", "/tmp"], + "tools": ["*"], + }, + }, + ), + input={ + "path": InputDef(type="string", required=True), + }, + ), + agents=[ + AgentDef( + name="reader", + model="claude-3-5-sonnet-latest", + prompt="Use read_file to read {{ workflow.input.path }}", + output={"content": OutputField(type="string", description="File contents")}, + routes=[RouteDef(to="$end")], + ), + ], + output={"content": "{{ reader.output.content }}"}, + ) + + # Sanity: the workflow has no explicit tools + assert config.tools == [] + + engine = WorkflowEngine(config, provider) + result = await engine.run({"path": "/tmp/test.txt"}) + + # Verify the workflow completed + assert result["content"] == "hello world" + + # Verify MCP tools were included in the API call + api_tools = captured["tools"] + assert api_tools is not None + + tool_names = {t["name"] for t in api_tools} + assert "filesystem__read_file" in tool_names, ( + "MCP tool missing from API request — issue #37 regression" + ) + assert "filesystem__write_file" in tool_names + assert "web_search__search" in tool_names + + @pytest.mark.asyncio + async def test_explicit_tool_filter_still_works(self) -> None: + """When workflow defines specific tools, only those MCP tools are included.""" + provider = _make_provider_with_mcp() + + captured: dict[str, Any] = {} + + async def spy_agentic_loop(**kwargs: Any) -> Any: + captured["tools"] = kwargs.get("tools") + response = _make_emit_output_response({"content": "filtered"}) + return (response, 150, False) + + provider._execute_agentic_loop = AsyncMock(side_effect=spy_agentic_loop) + + agent = AgentDef( + name="reader", + model="claude-3-5-sonnet-latest", + prompt="Read the file", + output={"content": OutputField(type="string")}, + routes=[RouteDef(to="$end")], + ) + + # Pass a specific tool filter — only filesystem__read_file + await provider.execute( + agent=agent, + context={}, + rendered_prompt="Read the file", + tools=["filesystem__read_file"], + ) + + api_tools = captured["tools"] + tool_names = {t["name"] for t in api_tools} + + # Only the explicitly listed MCP tool + emit_output + assert "filesystem__read_file" in tool_names + assert "filesystem__write_file" not in tool_names + assert "web_search__search" not in tool_names + assert "emit_output" in tool_names + + +class TestWorkflowYamlWithMcpServers: + """Test loading and validating a workflow YAML that mirrors the issue repro.""" + + def test_load_mcp_workflow_has_empty_tools(self, tmp_path) -> None: + """A workflow with mcp_servers but no tools: section has config.tools == [].""" + workflow_yaml = tmp_path / "mcp_workflow.yaml" + workflow_yaml.write_text("""\ +workflow: + name: mcp-test + entry_point: reader + runtime: + provider: claude + mcp_servers: + filesystem: + command: npx + args: ["-y", "@modelcontextprotocol/server-filesystem", "/tmp"] + tools: ["*"] + input: + path: { type: string, required: true } + +agents: + - name: reader + model: claude-sonnet-4.6 + prompt: "Use read_file to read {{ workflow.input.path }}" + output: + content: { type: string } + routes: + - to: $end +""") + + config = load_workflow(str(workflow_yaml)) + + # The workflow has no `tools:` key → defaults to [] + assert config.tools == [] + + # But mcp_servers IS configured + assert "filesystem" in config.workflow.runtime.mcp_servers + + # The agent has no explicit tools → agent.tools is None (meaning "all") + assert config.agents[0].tools is None diff --git a/tests/test_providers/test_claude_mcp_tool_filter.py b/tests/test_providers/test_claude_mcp_tool_filter.py new file mode 100644 index 0000000..bdf1235 --- /dev/null +++ b/tests/test_providers/test_claude_mcp_tool_filter.py @@ -0,0 +1,304 @@ +"""Tests for MCP tool filtering in the Claude provider. + +Regression tests for: +- https://github.com/microsoft/conductor/issues/37: + An empty tool_filter ([]) silently excludes all MCP tools because + `[] is not None` evaluates to True, causing every tool to be skipped. +- https://github.com/microsoft/conductor/issues/38: + _execute_with_parse_recovery blocks MCP tool calls — it enters parse + recovery instead of returning to the agentic loop when the API response + contains non-emit_output tool_use blocks. +""" + +from typing import Any +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from conductor.config.schema import OutputField +from conductor.exceptions import ProviderError +from conductor.providers.claude import ClaudeProvider + + +def _make_provider_with_mcp_tools(tools: list[dict[str, Any]]) -> ClaudeProvider: + """Create a ClaudeProvider with a mock MCP manager that returns the given tools.""" + provider = ClaudeProvider.__new__(ClaudeProvider) + provider._client = MagicMock() + provider._mcp_servers_config = None + provider._default_model = "claude-3-5-sonnet-latest" + provider._default_temperature = None + provider._default_max_tokens = 8192 + provider._retry_config = MagicMock() + provider._retry_config.max_attempts = 1 + provider._retry_history = [] + provider._max_parse_recovery_attempts = 2 + provider._max_schema_depth = 10 + + # Set up a mock MCP manager with tools + mock_mcp_manager = MagicMock() + mock_mcp_manager.get_all_tools.return_value = tools + mock_mcp_manager.has_servers.return_value = True + provider._mcp_manager = mock_mcp_manager + + return provider + + +FAKE_MCP_TOOLS: list[dict[str, Any]] = [ + { + "name": "filesystem__read_file", + "description": "Read a file from the filesystem", + "input_schema": {"type": "object", "properties": {"path": {"type": "string"}}}, + }, + { + "name": "filesystem__write_file", + "description": "Write a file to the filesystem", + "input_schema": {"type": "object", "properties": {"path": {"type": "string"}}}, + }, + { + "name": "web_search__search", + "description": "Search the web", + "input_schema": {"type": "object", "properties": {"query": {"type": "string"}}}, + }, +] + + +class TestConvertMcpToolsFilter: + """Tests for _convert_mcp_tools_to_claude tool filtering.""" + + def test_none_filter_includes_all_tools(self) -> None: + """tool_filter=None should include all MCP tools (no filtering).""" + provider = _make_provider_with_mcp_tools(FAKE_MCP_TOOLS) + result = provider._convert_mcp_tools_to_claude(tool_filter=None) + assert len(result) == 3 + + def test_empty_list_filter_includes_all_tools(self) -> None: + """tool_filter=[] should include all MCP tools (no filtering). + + Regression test for issue #37: empty list was treated as + 'filter to nothing' instead of 'no filter applied'. + """ + provider = _make_provider_with_mcp_tools(FAKE_MCP_TOOLS) + result = provider._convert_mcp_tools_to_claude(tool_filter=[]) + assert len(result) == 3 + + def test_specific_filter_includes_only_matching_tools(self) -> None: + """tool_filter=['name'] should include only matching tools.""" + provider = _make_provider_with_mcp_tools(FAKE_MCP_TOOLS) + result = provider._convert_mcp_tools_to_claude(tool_filter=["filesystem__read_file"]) + assert len(result) == 1 + assert result[0]["name"] == "filesystem__read_file" + + def test_filter_with_multiple_tools(self) -> None: + """tool_filter with multiple names should include all matching tools.""" + provider = _make_provider_with_mcp_tools(FAKE_MCP_TOOLS) + result = provider._convert_mcp_tools_to_claude( + tool_filter=["filesystem__read_file", "web_search__search"] + ) + assert len(result) == 2 + names = {t["name"] for t in result} + assert names == {"filesystem__read_file", "web_search__search"} + + def test_filter_with_nonexistent_tool_excludes_it(self) -> None: + """tool_filter with names not in MCP tools should return no matches for those.""" + provider = _make_provider_with_mcp_tools(FAKE_MCP_TOOLS) + result = provider._convert_mcp_tools_to_claude(tool_filter=["nonexistent_tool"]) + assert len(result) == 0 + + def test_no_mcp_manager_returns_empty(self) -> None: + """No MCP manager should return empty list regardless of filter.""" + provider = ClaudeProvider.__new__(ClaudeProvider) + provider._mcp_manager = None + result = provider._convert_mcp_tools_to_claude(tool_filter=[]) + assert result == [] + result = provider._convert_mcp_tools_to_claude(tool_filter=None) + assert result == [] + + def test_tool_format_preserved(self) -> None: + """Converted tools should have name, description, and input_schema.""" + provider = _make_provider_with_mcp_tools(FAKE_MCP_TOOLS) + result = provider._convert_mcp_tools_to_claude(tool_filter=None) + for tool in result: + assert "name" in tool + assert "description" in tool + assert "input_schema" in tool + + +# --------------------------------------------------------------------------- +# Helpers for _has_mcp_tool_use / _execute_with_parse_recovery tests +# --------------------------------------------------------------------------- + + +def _make_tool_use_block(name: str, input_data: dict[str, Any] | None = None) -> MagicMock: + """Create a mock tool_use content block.""" + block = MagicMock() + block.type = "tool_use" + block.name = name + block.id = f"call_{name}" + block.input = input_data or {} + return block + + +def _make_text_block(text: str) -> MagicMock: + """Create a mock text content block.""" + block = MagicMock() + block.type = "text" + block.text = text + return block + + +def _make_response(blocks: list[MagicMock]) -> MagicMock: + """Create a mock Claude API response with the given content blocks.""" + resp = MagicMock() + resp.content = blocks + resp.usage = MagicMock() + resp.usage.input_tokens = 100 + resp.usage.output_tokens = 50 + resp.usage.cache_read_input_tokens = None + resp.usage.cache_creation_input_tokens = None + return resp + + +def _make_bare_provider() -> ClaudeProvider: + """Create a minimal ClaudeProvider for unit-testing helper methods.""" + provider = ClaudeProvider.__new__(ClaudeProvider) + provider._client = MagicMock() + provider._mcp_manager = None + provider._mcp_servers_config = None + provider._default_model = "claude-3-5-sonnet-latest" + provider._default_temperature = None + provider._default_max_tokens = 8192 + provider._retry_config = MagicMock() + provider._retry_config.max_attempts = 1 + provider._retry_history = [] + provider._max_parse_recovery_attempts = 2 + provider._max_schema_depth = 10 + return provider + + +# --------------------------------------------------------------------------- +# Issue #38: _has_mcp_tool_use +# --------------------------------------------------------------------------- + + +class TestHasMcpToolUse: + """Tests for the _has_mcp_tool_use helper.""" + + def test_detects_mcp_tool_use(self) -> None: + """Response with a non-emit_output tool_use should return True.""" + provider = _make_bare_provider() + response = _make_response([_make_tool_use_block("filesystem__read_file")]) + assert provider._has_mcp_tool_use(response) is True + + def test_ignores_emit_output(self) -> None: + """Response with only emit_output should return False.""" + provider = _make_bare_provider() + response = _make_response([_make_tool_use_block("emit_output", {"result": "hi"})]) + assert provider._has_mcp_tool_use(response) is False + + def test_mixed_emit_and_mcp(self) -> None: + """Response with both emit_output and MCP tool_use should return True.""" + provider = _make_bare_provider() + response = _make_response( + [ + _make_tool_use_block("emit_output", {"result": "hi"}), + _make_tool_use_block("filesystem__read_file"), + ] + ) + assert provider._has_mcp_tool_use(response) is True + + def test_text_only_response(self) -> None: + """Response with only text blocks should return False.""" + provider = _make_bare_provider() + response = _make_response([_make_text_block("Hello world")]) + assert provider._has_mcp_tool_use(response) is False + + def test_empty_response(self) -> None: + """Response with no content blocks should return False.""" + provider = _make_bare_provider() + response = _make_response([]) + assert provider._has_mcp_tool_use(response) is False + + +# --------------------------------------------------------------------------- +# Issue #38: _execute_with_parse_recovery returns MCP tool calls +# --------------------------------------------------------------------------- + + +class TestParseRecoveryMcpPassthrough: + """Verify _execute_with_parse_recovery returns MCP tool_use responses + immediately instead of entering parse recovery. + + Regression tests for issue #38. + """ + + @pytest.mark.asyncio + async def test_mcp_tool_use_returned_without_recovery(self) -> None: + """Initial API response with MCP tool_use should be returned directly.""" + provider = _make_bare_provider() + + mcp_response = _make_response( + [ + _make_tool_use_block("filesystem__read_file", {"path": "/tmp/test.txt"}), + ] + ) + provider._execute_api_call = AsyncMock(return_value=mcp_response) + + result = await provider._execute_with_parse_recovery( + messages=[{"role": "user", "content": "read the file"}], + model="claude-3-5-sonnet-latest", + temperature=None, + max_tokens=8192, + tools=[{"name": "emit_output", "description": "d", "input_schema": {}}], + output_schema={"content": OutputField(type="string")}, + ) + + # Should return the MCP response, not enter recovery + assert result is mcp_response + # API should have been called exactly once (no recovery retries) + assert provider._execute_api_call.call_count == 1 + + @pytest.mark.asyncio + async def test_emit_output_still_returned_directly(self) -> None: + """emit_output response should still be returned on the fast path.""" + provider = _make_bare_provider() + + emit_response = _make_response( + [ + _make_tool_use_block("emit_output", {"content": "result"}), + ] + ) + provider._execute_api_call = AsyncMock(return_value=emit_response) + + result = await provider._execute_with_parse_recovery( + messages=[{"role": "user", "content": "test"}], + model="claude-3-5-sonnet-latest", + temperature=None, + max_tokens=8192, + tools=[{"name": "emit_output", "description": "d", "input_schema": {}}], + output_schema={"content": OutputField(type="string")}, + ) + + assert result is emit_response + assert provider._execute_api_call.call_count == 1 + + @pytest.mark.asyncio + async def test_text_response_still_triggers_recovery(self) -> None: + """Plain text response (no tool_use) should still enter parse recovery.""" + provider = _make_bare_provider() + + text_response = _make_response([_make_text_block("I cannot use tools")]) + provider._execute_api_call = AsyncMock(return_value=text_response) + + with pytest.raises(ProviderError): + # Should exhaust recovery attempts and raise + await provider._execute_with_parse_recovery( + messages=[{"role": "user", "content": "test"}], + model="claude-3-5-sonnet-latest", + temperature=None, + max_tokens=8192, + tools=[{"name": "emit_output", "description": "d", "input_schema": {}}], + output_schema={"content": OutputField(type="string")}, + ) + + # Should have called API 1 (initial) + 2 (recovery attempts) = 3 times + assert provider._execute_api_call.call_count == 3