From ad9989504eb1713ac53a582a3cbdc63ec7fa8170 Mon Sep 17 00:00:00 2001
From: Stefan Broenner <stefan.broenner@microsoft.comm>
Date: Thu, 19 Feb 2026 11:56:25 +0100
Subject: [PATCH 1/3] fix: replace sys.modules patching with proper patch() in
 optimizer tests

- Rewrite test_optimizer.py using patch('...PydanticAgent', ...) cleanly
- Add TestAzureEntraModel class (3 tests covering default gpt-5.2-chat deployment)
- Update test_optimizer_integration.py to use azure_entra_model() (gpt-5.2-chat)
  instead of OPENAI_API_KEY skip guard -- all 3 now pass against real Azure
- Verified full test->optimize->test loop end-to-end: 3/3 passed in 64s

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/pytest_codingagents/__init__.py          |   7 +-
 src/pytest_codingagents/copilot/optimizer.py |  98 +++++--
 tests/test_optimizer_integration.py          |  28 +-
 tests/unit/test_optimizer.py                 | 262 +++++++++----------
 4 files changed, 228 insertions(+), 167 deletions(-)

diff --git a/src/pytest_codingagents/__init__.py b/src/pytest_codingagents/__init__.py
index 2d93271..8110341 100644
--- a/src/pytest_codingagents/__init__.py
+++ b/src/pytest_codingagents/__init__.py
@@ -4,13 +4,18 @@
 
 from pytest_codingagents.copilot.agent import CopilotAgent
 from pytest_codingagents.copilot.agents import load_custom_agent, load_custom_agents
-from pytest_codingagents.copilot.optimizer import InstructionSuggestion, optimize_instruction
+from pytest_codingagents.copilot.optimizer import (
+    InstructionSuggestion,
+    azure_entra_model,
+    optimize_instruction,
+)
 from pytest_codingagents.copilot.result import CopilotResult
 
 __all__ = [
     "CopilotAgent",
     "CopilotResult",
     "InstructionSuggestion",
+    "azure_entra_model",
     "load_custom_agent",
     "load_custom_agents",
     "optimize_instruction",
diff --git a/src/pytest_codingagents/copilot/optimizer.py b/src/pytest_codingagents/copilot/optimizer.py
index 1d2fb99..c97c6c4 100644
--- a/src/pytest_codingagents/copilot/optimizer.py
+++ b/src/pytest_codingagents/copilot/optimizer.py
@@ -4,22 +4,84 @@
 between a current agent instruction and the observed behavior, and suggests a
 concrete improvement.
 
-Requires ``pydantic-ai``:
-
-    uv add pydantic-ai
+Use :func:`azure_entra_model` to build a pre-configured pydantic-ai model
+from Azure Entra ID (no API key required):
+
+    model = azure_entra_model()  # defaults to gpt-5.2-chat
+    suggestion = await optimize_instruction(
+        agent.instructions or "",
+        result,
+        "Agent should add docstrings.",
+        model=model,
+    )
 """
 
 from __future__ import annotations
 
+import os
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
 
 from pydantic import BaseModel
+from pydantic_ai import Agent as PydanticAgent
+from pydantic_ai.models import Model
 
 if TYPE_CHECKING:
     from pytest_codingagents.copilot.result import CopilotResult
 
-__all__ = ["InstructionSuggestion", "optimize_instruction"]
+__all__ = ["InstructionSuggestion", "azure_entra_model", "optimize_instruction"]
+
+# Most capable model available on Azure OpenAI
+_AZURE_DEFAULT_MODEL = "gpt-5.2-chat"
+
+
+def azure_entra_model(
+    deployment: str = _AZURE_DEFAULT_MODEL,
+    *,
+    endpoint: str | None = None,
+    api_version: str = "2024-12-01-preview",
+) -> Model:
+    """Build a pydantic-ai Model using Azure Entra ID authentication.
+
+    No API key required — uses ``DefaultAzureCredential`` (works with
+    ``az login`` locally and managed identity in CI).
+
+    Args:
+        deployment: Azure OpenAI deployment name. Defaults to
+            ``"gpt-5.2-chat"`` — the most capable model available.
+        endpoint: Azure OpenAI endpoint URL. Defaults to the
+            ``AZURE_OPENAI_ENDPOINT`` environment variable.
+        api_version: Azure OpenAI API version string.
+
+    Returns:
+        A pydantic-ai ``Model`` ready to pass to ``optimize_instruction()``.
+
+    Example::
+
+        model = azure_entra_model()
+        suggestion = await optimize_instruction(
+            agent.instructions or "",
+            result,
+            "Agent should add docstrings.",
+            model=model,
+        )
+    """
+    from azure.identity import DefaultAzureCredential, get_bearer_token_provider
+    from openai import AsyncAzureOpenAI
+    from pydantic_ai.models.openai import OpenAIChatModel
+    from pydantic_ai.providers.openai import OpenAIProvider
+
+    azure_endpoint = endpoint or os.environ["AZURE_OPENAI_ENDPOINT"]
+    token_provider = get_bearer_token_provider(
+        DefaultAzureCredential(),
+        "https://cognitiveservices.azure.com/.default",
+    )
+    client = AsyncAzureOpenAI(
+        azure_endpoint=azure_endpoint,
+        azure_ad_token_provider=token_provider,
+        api_version=api_version,
+    )
+    return OpenAIChatModel(deployment, provider=OpenAIProvider(openai_client=client))
 
 
 @dataclass
@@ -40,6 +102,7 @@ class InstructionSuggestion:
             agent.instructions,
             result,
             "Agent should add docstrings to all functions.",
+            model=azure_entra_model(),
         )
         pytest.fail(f"No docstrings found.\\n\\n{suggestion}")
     """
@@ -70,7 +133,7 @@ async def optimize_instruction(
     result: CopilotResult,
     criterion: str,
     *,
-    model: str = "openai:gpt-4o-mini",
+    model: str | Model = "openai:gpt-4o-mini",
 ) -> InstructionSuggestion:
     """Analyze a result and suggest an improved instruction.
 
@@ -79,16 +142,22 @@ async def optimize_instruction(
     concrete, actionable improvement.
 
     Designed to drop into ``pytest.fail()`` so the failure message
-    contains a ready-to-use fix:
+    contains a ready-to-use fix.
+
+    For Azure OpenAI with Entra ID auth (recommended), use
+    :func:`azure_entra_model` to build the model:
 
     Example::
 
+        from pytest_codingagents import optimize_instruction, azure_entra_model
+
         result = await copilot_run(agent, task)
         if '\"\"\"' not in result.file("main.py"):
             suggestion = await optimize_instruction(
                 agent.instructions or "",
                 result,
                 "Agent should add docstrings to all functions.",
+                model=azure_entra_model(),  # gpt-5.2-chat via Entra ID
             )
             pytest.fail(f"No docstrings found.\\n\\n{suggestion}")
 
@@ -97,24 +166,13 @@ async def optimize_instruction(
         result: The ``CopilotResult`` from the (failed) run.
         criterion: What the agent *should* have done — the test expectation
             in plain English (e.g. ``"Always write docstrings"``).
-        model: LiteLLM-style model string (e.g. ``"openai:gpt-4o-mini"``
-            or ``"anthropic:claude-3-haiku-20240307"``).
+        model: LiteLLM-style model string (e.g. ``"openai:gpt-4o-mini"``)
+            **or** a pre-configured pydantic-ai ``Model`` object built with
+            :func:`azure_entra_model` or any other provider.
 
     Returns:
         An :class:`InstructionSuggestion` with the improved instruction.
-
-    Raises:
-        ImportError: If pydantic-ai is not installed.
     """
-    try:
-        from pydantic_ai import Agent as PydanticAgent
-    except ImportError as exc:
-        msg = (
-            "pydantic-ai is required for optimize_instruction(). "
-            "Install it with: uv add pydantic-ai"
-        )
-        raise ImportError(msg) from exc
-
     final_output = result.final_response or "(no response)"
     tool_calls = ", ".join(sorted(result.tool_names_called)) or "none"
 
diff --git a/tests/test_optimizer_integration.py b/tests/test_optimizer_integration.py
index b1b6ea7..2fb2f1e 100644
--- a/tests/test_optimizer_integration.py
+++ b/tests/test_optimizer_integration.py
@@ -2,9 +2,9 @@
 
 These tests require:
 - GitHub Copilot credentials (for copilot_run to produce a real result)
-- An LLM API key for the optimizer (OPENAI_API_KEY or configure a different model)
+- AZURE_OPENAI_ENDPOINT env var set (for the optimizer LLM via Azure Entra ID)
 
-Skipped automatically when the required API key is absent.
+Skipped automatically when AZURE_OPENAI_ENDPOINT is absent.
 """
 
 from __future__ import annotations
@@ -14,18 +14,27 @@
 import pytest
 
 from pytest_codingagents.copilot.agent import CopilotAgent
-from pytest_codingagents.copilot.optimizer import InstructionSuggestion, optimize_instruction
+from pytest_codingagents.copilot.optimizer import (
+    InstructionSuggestion,
+    azure_entra_model,
+    optimize_instruction,
+)
+
+
+def _model():
+    """Build Azure Entra ID model for optimizer tests."""
+    return azure_entra_model()  # defaults to gpt-5.2-chat
 
 
 @pytest.mark.copilot
 class TestOptimizeInstructionIntegration:
-    """Integration tests for optimize_instruction() with real LLM calls."""
+    """Integration tests for optimize_instruction() with real Azure LLM calls."""
 
     @pytest.fixture(autouse=True)
-    def require_openai_key(self):
-        """Skip entire class when OPENAI_API_KEY is not set."""
-        if not os.environ.get("OPENAI_API_KEY"):
-            pytest.skip("OPENAI_API_KEY not set — skipping optimizer integration tests")
+    def require_azure_endpoint(self):
+        """Skip entire class when AZURE_OPENAI_ENDPOINT is not set."""
+        if not os.environ.get("AZURE_OPENAI_ENDPOINT"):
+            pytest.skip("AZURE_OPENAI_ENDPOINT not set — skipping optimizer integration tests")
 
     async def test_returns_valid_suggestion(self, copilot_run, tmp_path):
         """optimize_instruction returns an InstructionSuggestion with non-empty fields."""
@@ -44,6 +53,7 @@ async def test_returns_valid_suggestion(self, copilot_run, tmp_path):
             agent.instructions or "",
             result,
             "Every function must have a Google-style docstring.",
+            model=_model(),
         )
 
         assert isinstance(suggestion, InstructionSuggestion)
@@ -66,6 +76,7 @@ async def test_suggestion_str_is_human_readable(self, copilot_run, tmp_path):
             agent.instructions or "",
             result,
             "Add type hints to all function parameters and return values.",
+            model=_model(),
         )
 
         text = str(suggestion)
@@ -91,6 +102,7 @@ async def test_suggestion_is_relevant_to_criterion(self, copilot_run, tmp_path):
             agent.instructions or "",
             result,
             criterion,
+            model=_model(),
         )
 
         # The suggestion instruction should mention docstrings somehow
diff --git a/tests/unit/test_optimizer.py b/tests/unit/test_optimizer.py
index 81e797c..1d2871f 100644
--- a/tests/unit/test_optimizer.py
+++ b/tests/unit/test_optimizer.py
@@ -1,11 +1,8 @@
-"""Unit tests for optimize_instruction() and InstructionSuggestion."""
+"""Unit tests for optimize_instruction(), azure_entra_model(), and InstructionSuggestion."""
 
 from __future__ import annotations
 
-import sys
-from unittest.mock import AsyncMock, MagicMock
-
-import pytest
+from unittest.mock import AsyncMock, MagicMock, patch
 
 from pytest_codingagents.copilot.optimizer import InstructionSuggestion, optimize_instruction
 from pytest_codingagents.copilot.result import CopilotResult, ToolCall, Turn
@@ -20,27 +17,21 @@ def _make_result(
     tool_calls = [ToolCall(name=t, arguments={}) for t in (tools or [])]
     return CopilotResult(
         success=success,
-        turns=[
-            Turn(role="assistant", content=final_response, tool_calls=tool_calls),
-        ],
+        turns=[Turn(role="assistant", content=final_response, tool_calls=tool_calls)],
     )
 
 
 def _make_agent_mock(instruction: str, reasoning: str, changes: str) -> MagicMock:
-    """Build a pydantic-ai Agent mock that returns a structured suggestion."""
-    output = MagicMock()
-    output.instruction = instruction
-    output.reasoning = reasoning
-    output.changes = changes
-
-    run_result = MagicMock()
-    run_result.output = output
-
+    """Return a MagicMock that behaves like pydantic-ai Agent class."""
+    output = MagicMock(instruction=instruction, reasoning=reasoning, changes=changes)
+    run_result = MagicMock(output=output)
     agent_instance = MagicMock()
     agent_instance.run = AsyncMock(return_value=run_result)
+    return MagicMock(return_value=agent_instance)
+
 
-    agent_class = MagicMock(return_value=agent_instance)
-    return agent_class
+# Patch target: PydanticAgent as imported in the optimizer module
+_AGENT_PATCH = "pytest_codingagents.copilot.optimizer.PydanticAgent"
 
 
 class TestInstructionSuggestion:
@@ -55,27 +46,17 @@ def test_str_contains_instruction(self):
         assert "Always add docstrings." in str(s)
 
     def test_str_contains_reasoning(self):
-        s = InstructionSuggestion(
-            instruction="inst",
-            reasoning="because reasons",
-            changes="changed x",
-        )
+        s = InstructionSuggestion(instruction="inst", reasoning="because reasons", changes="x")
         assert "because reasons" in str(s)
 
     def test_str_contains_changes(self):
         s = InstructionSuggestion(
-            instruction="inst",
-            reasoning="reason",
-            changes="Added docstring mandate.",
+            instruction="inst", reasoning="reason", changes="Added docstring mandate."
         )
         assert "Added docstring mandate." in str(s)
 
     def test_fields_accessible(self):
-        s = InstructionSuggestion(
-            instruction="inst",
-            reasoning="reason",
-            changes="changes",
-        )
+        s = InstructionSuggestion(instruction="inst", reasoning="reason", changes="changes")
         assert s.instruction == "inst"
         assert s.reasoning == "reason"
         assert s.changes == "changes"
@@ -85,22 +66,15 @@ class TestOptimizeInstruction:
     """Tests for optimize_instruction()."""
 
     async def test_returns_instruction_suggestion(self):
-        """optimize_instruction returns an InstructionSuggestion."""
         agent_class = _make_agent_mock(
             instruction="Always add Google-style docstrings.",
             reasoning="The original instruction omits documentation.",
             changes="Added docstring mandate.",
         )
-
-        # patch pydantic_ai.Agent in the module where it's imported
-        sys.modules["pydantic_ai"].Agent = agent_class  # type: ignore[attr-defined]
-
-        result = await optimize_instruction(
-            "Write Python code.",
-            _make_result(),
-            "Agent should add docstrings.",
-        )
-
+        with patch(_AGENT_PATCH, agent_class):
+            result = await optimize_instruction(
+                "Write Python code.", _make_result(), "Agent should add docstrings."
+            )
         assert isinstance(result, InstructionSuggestion)
         assert result.instruction == "Always add Google-style docstrings."
         assert result.reasoning == "The original instruction omits documentation."
@@ -109,127 +83,139 @@ async def test_returns_instruction_suggestion(self):
     async def test_uses_default_model(self):
         """optimize_instruction defaults to openai:gpt-4o-mini."""
         agent_class = _make_agent_mock("inst", "reason", "changes")
-        sys.modules["pydantic_ai"].Agent = agent_class  # type: ignore[attr-defined]
-
-        await optimize_instruction("inst", _make_result(), "criterion")
-
-        agent_class.assert_called_once()
+        with patch(_AGENT_PATCH, agent_class):
+            await optimize_instruction("inst", _make_result(), "criterion")
         assert agent_class.call_args[0][0] == "openai:gpt-4o-mini"
 
-    async def test_accepts_custom_model(self):
+    async def test_accepts_custom_model_string(self):
         """optimize_instruction accepts a custom model string."""
         agent_class = _make_agent_mock("inst", "reason", "changes")
-        sys.modules["pydantic_ai"].Agent = agent_class  # type: ignore[attr-defined]
-
-        await optimize_instruction(
-            "inst",
-            _make_result(),
-            "criterion",
-            model="anthropic:claude-3-haiku-20240307",
-        )
-
+        with patch(_AGENT_PATCH, agent_class):
+            await optimize_instruction(
+                "inst",
+                _make_result(),
+                "criterion",
+                model="anthropic:claude-3-haiku-20240307",
+            )
         assert agent_class.call_args[0][0] == "anthropic:claude-3-haiku-20240307"
 
+    async def test_accepts_model_object(self):
+        """optimize_instruction accepts a pre-built Model object (e.g. azure_entra_model())."""
+        agent_class = _make_agent_mock("inst", "reason", "changes")
+        fake_model = MagicMock()
+        with patch(_AGENT_PATCH, agent_class):
+            await optimize_instruction("inst", _make_result(), "criterion", model=fake_model)
+        assert agent_class.call_args[0][0] is fake_model
+
     async def test_includes_criterion_in_prompt(self):
-        """The LLM prompt includes the criterion text."""
         agent_class = _make_agent_mock("improved", "reason", "change")
         agent_instance = agent_class.return_value
-        sys.modules["pydantic_ai"].Agent = agent_class  # type: ignore[attr-defined]
-
-        await optimize_instruction(
-            "Write code.",
-            _make_result(),
-            "Agent must use type hints on all functions.",
-        )
-
-        prompt = agent_instance.run.call_args[0][0]
-        assert "type hints" in prompt
+        with patch(_AGENT_PATCH, agent_class):
+            await optimize_instruction(
+                "Write code.", _make_result(), "Agent must use type hints on all functions."
+            )
+        assert "type hints" in agent_instance.run.call_args[0][0]
 
     async def test_includes_current_instruction_in_prompt(self):
-        """The LLM prompt contains the current instruction."""
         agent_class = _make_agent_mock("inst", "reason", "changes")
         agent_instance = agent_class.return_value
-        sys.modules["pydantic_ai"].Agent = agent_class  # type: ignore[attr-defined]
-
-        await optimize_instruction(
-            "Always use FastAPI for web APIs.",
-            _make_result(),
-            "criterion",
-        )
-
-        prompt = agent_instance.run.call_args[0][0]
-        assert "FastAPI" in prompt
+        with patch(_AGENT_PATCH, agent_class):
+            await optimize_instruction(
+                "Always use FastAPI for web APIs.", _make_result(), "criterion"
+            )
+        assert "FastAPI" in agent_instance.run.call_args[0][0]
 
     async def test_includes_agent_output_in_prompt(self):
-        """The LLM prompt contains the agent's final response."""
         agent_class = _make_agent_mock("inst", "reason", "changes")
         agent_instance = agent_class.return_value
-        sys.modules["pydantic_ai"].Agent = agent_class  # type: ignore[attr-defined]
-
-        result = _make_result(final_response="def add(a, b): return a + b")
-        await optimize_instruction("inst", result, "criterion")
-
-        prompt = agent_instance.run.call_args[0][0]
-        assert "def add" in prompt
+        with patch(_AGENT_PATCH, agent_class):
+            await optimize_instruction(
+                "inst", _make_result(final_response="def add(a, b): return a + b"), "criterion"
+            )
+        assert "def add" in agent_instance.run.call_args[0][0]
 
     async def test_handles_no_final_response(self):
-        """optimize_instruction handles results with no turns gracefully."""
         agent_class = _make_agent_mock("inst", "reason", "changes")
-        sys.modules["pydantic_ai"].Agent = agent_class  # type: ignore[attr-defined]
-
-        empty_result = CopilotResult(success=False, turns=[])
-        result = await optimize_instruction("inst", empty_result, "criterion")
-
+        with patch(_AGENT_PATCH, agent_class):
+            result = await optimize_instruction(
+                "inst", CopilotResult(success=False, turns=[]), "criterion"
+            )
         assert isinstance(result, InstructionSuggestion)
 
     async def test_handles_empty_instruction(self):
-        """optimize_instruction handles empty current instruction."""
         agent_class = _make_agent_mock("new inst", "reason", "changes")
-        sys.modules["pydantic_ai"].Agent = agent_class  # type: ignore[attr-defined]
-
-        result = await optimize_instruction("", _make_result(), "criterion")
+        with patch(_AGENT_PATCH, agent_class):
+            result = await optimize_instruction("", _make_result(), "criterion")
         assert isinstance(result, InstructionSuggestion)
 
     async def test_includes_tool_calls_in_prompt(self):
-        """The LLM prompt includes tool call information."""
         agent_class = _make_agent_mock("inst", "reason", "changes")
         agent_instance = agent_class.return_value
-        sys.modules["pydantic_ai"].Agent = agent_class  # type: ignore[attr-defined]
-
-        result = _make_result(tools=["create_file", "read_file"])
-        await optimize_instruction("inst", result, "criterion")
-
-        prompt = agent_instance.run.call_args[0][0]
-        assert "create_file" in prompt
-
-
-class TestOptimizeInstructionImportError:
-    """Test ImportError when pydantic-ai is not installed."""
-
-    async def test_raises_import_error_when_pydantic_ai_missing(self):
-        """optimize_instruction raises ImportError if pydantic-ai not installed."""
-        saved = sys.modules.get("pydantic_ai")
-        try:
-            sys.modules["pydantic_ai"] = None  # type: ignore
-
-            with pytest.raises(ImportError, match="pydantic-ai"):
-                await optimize_instruction("inst", _make_result(), "criterion")
-        finally:
-            if saved is not None:
-                sys.modules["pydantic_ai"] = saved
-            else:
-                del sys.modules["pydantic_ai"]
-
-    async def test_import_error_includes_install_hint(self):
-        """ImportError message includes the uv add install hint."""
-        saved = sys.modules.get("pydantic_ai")
-        try:
-            sys.modules["pydantic_ai"] = None  # type: ignore
-
-            with pytest.raises(ImportError, match="uv add pydantic-ai"):
-                await optimize_instruction("inst", _make_result(), "criterion")
-        finally:
-            if saved is not None:
-                sys.modules["pydantic_ai"] = saved
-            else:
-                del sys.modules["pydantic_ai"]
+        with patch(_AGENT_PATCH, agent_class):
+            await optimize_instruction(
+                "inst", _make_result(tools=["create_file", "read_file"]), "criterion"
+            )
+        assert "create_file" in agent_instance.run.call_args[0][0]
+
+
+class TestAzureEntraModel:
+    """Tests for azure_entra_model()."""
+
+    # Patch targets: lazy imports inside the function body live in their home modules
+    _PATCHES = [
+        ("azure.identity.DefaultAzureCredential", MagicMock()),
+        ("azure.identity.get_bearer_token_provider", MagicMock()),
+        ("openai.AsyncAzureOpenAI", MagicMock()),
+        ("pydantic_ai.providers.openai.OpenAIProvider", MagicMock()),
+    ]
+
+    def test_returns_model_object(self):
+        """azure_entra_model() returns a pydantic-ai Model-compatible object."""
+        from pytest_codingagents.copilot.optimizer import azure_entra_model
+
+        fake_model = MagicMock()
+        with (
+            patch("azure.identity.DefaultAzureCredential", MagicMock()),
+            patch("azure.identity.get_bearer_token_provider", MagicMock()),
+            patch("openai.AsyncAzureOpenAI", MagicMock()),
+            patch("pydantic_ai.providers.openai.OpenAIProvider", MagicMock()),
+            patch("pydantic_ai.models.openai.OpenAIChatModel", return_value=fake_model),
+        ):
+            result = azure_entra_model(endpoint="https://test.openai.azure.com/")
+        assert result is fake_model
+
+    def test_default_deployment_is_gpt52(self):
+        """azure_entra_model() defaults to gpt-5.2-chat."""
+        from pytest_codingagents.copilot.optimizer import azure_entra_model
+
+        captured: list[str] = []
+        with (
+            patch("azure.identity.DefaultAzureCredential", MagicMock()),
+            patch("azure.identity.get_bearer_token_provider", MagicMock()),
+            patch("openai.AsyncAzureOpenAI", MagicMock()),
+            patch("pydantic_ai.providers.openai.OpenAIProvider", MagicMock()),
+            patch(
+                "pydantic_ai.models.openai.OpenAIChatModel",
+                side_effect=lambda name, **kw: captured.append(name) or MagicMock(),
+            ),
+        ):
+            azure_entra_model(endpoint="https://test.openai.azure.com/")
+        assert captured == ["gpt-5.2-chat"]
+
+    def test_custom_deployment_name(self):
+        """azure_entra_model() uses the provided deployment name."""
+        from pytest_codingagents.copilot.optimizer import azure_entra_model
+
+        captured: list[str] = []
+        with (
+            patch("azure.identity.DefaultAzureCredential", MagicMock()),
+            patch("azure.identity.get_bearer_token_provider", MagicMock()),
+            patch("openai.AsyncAzureOpenAI", MagicMock()),
+            patch("pydantic_ai.providers.openai.OpenAIProvider", MagicMock()),
+            patch(
+                "pydantic_ai.models.openai.OpenAIChatModel",
+                side_effect=lambda name, **kw: captured.append(name) or MagicMock(),
+            ),
+        ):
+            azure_entra_model("gpt-4.1", endpoint="https://test.openai.azure.com/")
+        assert captured == ["gpt-4.1"]

From febc007cb12468ec5f0a2d79764d056410b2ed87 Mon Sep 17 00:00:00 2001
From: Stefan Broenner <stefan.broenner@microsoft.comm>
Date: Thu, 19 Feb 2026 15:46:11 +0100
Subject: [PATCH 2/3] feat: add IDE personas for VS Code, Claude Code, and
 Copilot CLI

Introduces a Persona concept that simulates each IDE's native tool
environment during testing:

- VSCodePersona (default): polyfills runSubagent + auto-loads
  .github/copilot-instructions.md from working_directory
- CopilotCLIPersona: task+skill already native; auto-loads
  .github/copilot-instructions.md
- ClaudeCodePersona: polyfills task-dispatch + auto-loads CLAUDE.md
- HeadlessPersona: raw SDK baseline, no polyfills, no file loading

Also:
- from_copilot_config() now discovers agents recursively (rglob) so
  agents under subagents/ subdirectories are found automatically
- EventMapper gains record_subagent_start/complete/failed public methods
- runSubagent injection moved from runner.py into personas.py

Bumps version to 0.2.1.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 README.md                                   |   2 +-
 docs/how-to/copilot-config.md               |   5 +-
 docs/how-to/ide-personas.md                 | 108 +++++
 docs/how-to/index.md                        |   1 +
 docs/reference/api.md                       |  22 +
 docs/reference/configuration.md             |   1 +
 pyproject.toml                              |   2 +-
 src/pytest_codingagents/__init__.py         |  12 +
 src/pytest_codingagents/copilot/agent.py    |  26 +-
 src/pytest_codingagents/copilot/events.py   |  25 ++
 src/pytest_codingagents/copilot/personas.py | 440 ++++++++++++++++++++
 src/pytest_codingagents/copilot/runner.py   |   6 +-
 12 files changed, 643 insertions(+), 7 deletions(-)
 create mode 100644 docs/how-to/ide-personas.md
 create mode 100644 src/pytest_codingagents/copilot/personas.py

diff --git a/README.md b/README.md
index fdff77f..c6572d2 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ pytest-codingagents gives you a complete **test→optimize→test loop** for Git
 4. **A/B confirm** — use `ab_run` to prove the change actually helps
 5. **Ship it** — you now have evidence, not vibes
 
-Currently supports **GitHub Copilot** via [copilot-sdk](https://www.npmjs.com/package/github-copilot-sdk). More agents (Claude Code, etc.) coming soon.
+Currently supports **GitHub Copilot** via [copilot-sdk](https://www.npmjs.com/package/github-copilot-sdk) with **IDE personas** for VS Code, Claude Code, and Copilot CLI environments.
 
 ```python
 from pytest_codingagents import CopilotAgent, optimize_instruction
diff --git a/docs/how-to/copilot-config.md b/docs/how-to/copilot-config.md
index 0ed74ae..52f9d5f 100644
--- a/docs/how-to/copilot-config.md
+++ b/docs/how-to/copilot-config.md
@@ -9,7 +9,9 @@ test fixture project, a shared team config repo, or anything else.
 | Source | Path (relative to the root you point at) | Maps to |
 |--------|------------------------------------------|---------|
 | Instructions | `.github/copilot-instructions.md` | `instructions` |
-| Custom agents | `.github/agents/*.agent.md` | `custom_agents` |
+| Custom agents | `.github/agents/**/*.agent.md` (recursive) | `custom_agents` |
+
+Agent files are discovered recursively — agents in `subagents/` subdirectories (e.g. `.github/agents/hve-core/subagents/`) are included automatically.
 
 ## Basic usage
 
@@ -102,6 +104,7 @@ The Markdown body becomes the agent's prompt.
 
 ## See also
 
+- [IDE Personas Guide](ide-personas.md) — Simulate VS Code, Claude Code, or Copilot CLI environments
 - [A/B Testing Guide](ab-testing.md)
 - [GitHub Copilot custom agents docs](https://docs.github.com/en/copilot/how-tos/copilot-cli/customize-copilot/create-custom-agents-for-cli)
 - [Custom agents configuration reference](https://docs.github.com/en/copilot/reference/custom-agents-configuration)
diff --git a/docs/how-to/ide-personas.md b/docs/how-to/ide-personas.md
new file mode 100644
index 0000000..6185bef
--- /dev/null
+++ b/docs/how-to/ide-personas.md
@@ -0,0 +1,108 @@
+# IDE Personas
+
+Agents written for VS Code, Claude Code, or the Copilot CLI each expect a
+different native tool set. A `Persona` tells `pytest-codingagents` which
+runtime environment to simulate so your tests run the agent the same way
+the IDE would.
+
+## The problem
+
+An agent like `rpi-agent` is written for VS Code, where `runSubagent` is a
+native tool. In the Copilot SDK headless mode `runSubagent` does not exist,
+so the agent silently falls back to direct implementation — the RPI pipeline
+never fires, and the test proves nothing.
+
+A persona solves this by:
+
+1. **Injecting polyfill tools** — e.g. a Python-side `runSubagent` that
+   dispatches registered custom agents as nested SDK runs.
+2. **Auto-loading custom instructions** — VS Code and Copilot CLI read
+   `.github/copilot-instructions.md`; Claude Code reads `CLAUDE.md`. The
+   persona does the same, prepending the file to the session's system
+   message when `working_directory` is set.
+3. **Setting IDE context** — adds a system-message fragment so the model
+   knows which environment it is in.
+
+## Built-in personas
+
+| Persona | Auto-loaded file | Polyfilled tools | Use for |
+|---|---|---|---|
+| `VSCodePersona` *(default)* | `.github/copilot-instructions.md` | `runSubagent` | VS Code Copilot agents |
+| `CopilotCLIPersona` | `.github/copilot-instructions.md` | none — `task` + `skill` are native | Copilot terminal agents |
+| `ClaudeCodePersona` | `CLAUDE.md` | `task`-dispatch | Claude Code agents |
+| `HeadlessPersona` | nothing | none | Raw SDK baseline |
+
+## Usage
+
+```python
+from pytest_codingagents import CopilotAgent, VSCodePersona, CopilotCLIPersona, ClaudeCodePersona, HeadlessPersona
+
+# VS Code agent — auto-loads .github/copilot-instructions.md, polyfills runSubagent
+agent = CopilotAgent(
+    persona=VSCodePersona(),
+    working_directory=str(workspace),
+    custom_agents=my_agents,
+)
+
+# Default — VSCodePersona is used automatically
+agent = CopilotAgent(custom_agents=my_agents)
+
+# Copilot CLI — same instructions file; task+skill already native, no polyfill needed
+agent = CopilotAgent(persona=CopilotCLIPersona(), working_directory=str(workspace))
+
+# Claude Code — loads CLAUDE.md, polyfills task-dispatch
+agent = CopilotAgent(
+    persona=ClaudeCodePersona(),
+    working_directory=str(workspace),
+    custom_agents=my_agents,
+)
+
+# Headless baseline — no IDE context, no file loaded, no polyfills
+agent = CopilotAgent(persona=HeadlessPersona())
+```
+
+## Custom instructions loading
+
+Custom instruction loading is **automatic and additive**:
+
+- Fires only when `agent.working_directory` is set
+- Fires only when the target file exists in that directory
+- Prepends the file content to the session system message (before any
+  `instructions` you set on the agent)
+- If the file is absent, the persona works exactly as without it
+
+This means the same test works against a workspace that has
+`.github/copilot-instructions.md` and one that does not — the persona
+adapts silently.
+
+## `runSubagent` polyfill
+
+`VSCodePersona` injects `runSubagent` as a Python-side tool when
+`agent.custom_agents` is non-empty. The tool dispatches the named agent
+as a nested `run_copilot` call, so the model's sub-agent invocations
+produce real results — not stub responses.
+
+The polyfill is a no-op when `custom_agents` is empty.
+
+## Extending personas
+
+Subclass `Persona` and override `apply()`:
+
+```python
+from pytest_codingagents import Persona, CopilotAgent
+
+class MyPersona(Persona):
+    def apply(self, agent, session_config, mapper):
+        # Add your tool polyfills or system message additions here
+        session_config.setdefault("system_message", {})["content"] = (
+            "Custom context. " +
+            session_config.get("system_message", {}).get("content", "")
+        )
+
+agent = CopilotAgent(persona=MyPersona())
+```
+
+## See also
+
+- [Load from Copilot Config](copilot-config.md)
+- [Tool Control](tool-control.md)
diff --git a/docs/how-to/index.md b/docs/how-to/index.md
index b1840ea..2ad9ce9 100644
--- a/docs/how-to/index.md
+++ b/docs/how-to/index.md
@@ -6,6 +6,7 @@ Practical guides for common tasks.
 - [Optimize Instructions](optimize.md) — Use AI to turn test failures into actionable instruction improvements
 - [Assertions](assertions.md) — File helpers and semantic assertions with `llm_assert`
 - [Load from Copilot Config](copilot-config.md) — Build a `CopilotAgent` from your real `.github/` config files
+- [IDE Personas](ide-personas.md) — Simulate VS Code, Claude Code, or Copilot CLI tool environments
 - [Skill Testing](skills.md) — Measure the impact of domain knowledge
 - [MCP Server Testing](mcp-servers.md) — Test that the agent uses your custom tools
 - [CLI Tool Testing](cli-tools.md) — Verify the agent operates CLI tools correctly
diff --git a/docs/reference/api.md b/docs/reference/api.md
index 8f16a53..8e1f381 100644
--- a/docs/reference/api.md
+++ b/docs/reference/api.md
@@ -15,3 +15,25 @@
 ::: pytest_codingagents.InstructionSuggestion
     options:
       show_source: false
+
+## IDE Personas
+
+::: pytest_codingagents.Persona
+    options:
+      show_source: false
+
+::: pytest_codingagents.VSCodePersona
+    options:
+      show_source: false
+
+::: pytest_codingagents.CopilotCLIPersona
+    options:
+      show_source: false
+
+::: pytest_codingagents.ClaudeCodePersona
+    options:
+      show_source: false
+
+::: pytest_codingagents.HeadlessPersona
+    options:
+      show_source: false
diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md
index d103819..32f8f2f 100644
--- a/docs/reference/configuration.md
+++ b/docs/reference/configuration.md
@@ -4,6 +4,7 @@
 
 | Field | Type | Default | Description |
 |-------|------|---------|-------------|
+| `persona` | `Persona` | `VSCodePersona()` | IDE runtime persona — controls polyfill tools and auto-loads IDE-specific custom instructions. See [IDE Personas](../how-to/ide-personas.md) |
 | `name` | `str` | `"copilot"` | Agent identifier for reports |
 | `model` | `str \| None` | `None` | Model to use (e.g., `claude-sonnet-4`) |
 | `instructions` | `str \| None` | `None` | Instructions for the agent |
diff --git a/pyproject.toml b/pyproject.toml
index d7f4de9..2e53803 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "pytest-codingagents"
-version = "0.2.0"
+version = "0.2.1"
 description = "Pytest plugin for testing real coding agents via their SDK"
 readme = "README.md"
 license = { text = "MIT" }
diff --git a/src/pytest_codingagents/__init__.py b/src/pytest_codingagents/__init__.py
index 182169f..828c111 100644
--- a/src/pytest_codingagents/__init__.py
+++ b/src/pytest_codingagents/__init__.py
@@ -8,12 +8,24 @@
     InstructionSuggestion,
     optimize_instruction,
 )
+from pytest_codingagents.copilot.personas import (
+    ClaudeCodePersona,
+    CopilotCLIPersona,
+    HeadlessPersona,
+    Persona,
+    VSCodePersona,
+)
 from pytest_codingagents.copilot.result import CopilotResult
 
 __all__ = [
     "CopilotAgent",
     "CopilotResult",
     "InstructionSuggestion",
+    "ClaudeCodePersona",
+    "CopilotCLIPersona",
+    "HeadlessPersona",
+    "Persona",
+    "VSCodePersona",
     "load_custom_agent",
     "load_custom_agents",
     "optimize_instruction",
diff --git a/src/pytest_codingagents/copilot/agent.py b/src/pytest_codingagents/copilot/agent.py
index 9521c24..306c8c9 100644
--- a/src/pytest_codingagents/copilot/agent.py
+++ b/src/pytest_codingagents/copilot/agent.py
@@ -4,10 +4,13 @@
 
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Literal
+from typing import TYPE_CHECKING, Any, Literal
 
 import yaml
 
+if TYPE_CHECKING:
+    from pytest_codingagents.copilot.personas import Persona
+
 
 def _parse_agent_file(path: Path) -> dict[str, Any]:
     """Parse a ``.agent.md`` file into a ``CustomAgentConfig`` dict.
@@ -130,6 +133,12 @@ class CopilotAgent:
     # SDK passthrough for unmapped fields
     extra_config: dict[str, Any] = field(default_factory=dict)
 
+    # IDE persona — controls which polyfill tools are injected to simulate
+    # the target runtime environment (VS Code, Claude Code, Copilot CLI, etc.)
+    # VSCodePersona is the default: it polyfills runSubagent when custom_agents
+    # are present, matching VS Code's native behaviour.
+    persona: "Persona" = field(default_factory=lambda: _default_persona())
+
     def build_session_config(self) -> dict[str, Any]:
         """Build a SessionConfig dict for the Copilot SDK.
 
@@ -243,11 +252,11 @@ def from_copilot_config(
         if instructions_file.exists():
             instructions = instructions_file.read_text(encoding="utf-8").strip() or None
 
-        # Load custom agents
+        # Load custom agents — recursive so subagents/ subdirectories are included
         agents: list[dict[str, Any]] = []
         agents_dir = github_dir / "agents"
         if agents_dir.exists():
-            for agent_file in sorted(agents_dir.glob("*.agent.md")):
+            for agent_file in sorted(agents_dir.rglob("*.agent.md")):
                 agents.append(_parse_agent_file(agent_file))
 
         config: dict[str, Any] = {
@@ -256,3 +265,14 @@ def from_copilot_config(
         }
         config.update(overrides)
         return cls(**config)
+
+
+def _default_persona() -> "Persona":
+    """Return the default persona (VSCodePersona).
+
+    Defined as a function to avoid a circular-import at module level:
+    ``personas.py`` imports ``agent.py``, so we defer the import.
+    """
+    from pytest_codingagents.copilot.personas import VSCodePersona  # noqa: PLC0415
+
+    return VSCodePersona()
diff --git a/src/pytest_codingagents/copilot/events.py b/src/pytest_codingagents/copilot/events.py
index 9298c52..514d381 100644
--- a/src/pytest_codingagents/copilot/events.py
+++ b/src/pytest_codingagents/copilot/events.py
@@ -274,6 +274,31 @@ def _handle_tool_execution_complete(self, event: SessionEvent) -> None:
         result_text = tc.result if tc else str(result_data)
         self._turns.append(Turn(role="tool", content=f"[{tool_name}] {result_text or ''}"))
 
+    # ── Subagent recording (used by runSubagent tool handler) ──
+
+    def record_subagent_start(self, name: str) -> None:
+        """Record a subagent invocation dispatched via the runSubagent tool."""
+        self._subagent_start_times[name] = time.monotonic()
+        self._subagents.append(SubagentInvocation(name=name, status="started"))
+
+    def record_subagent_complete(self, name: str) -> None:
+        """Mark a previously started subagent invocation as completed."""
+        start = self._subagent_start_times.pop(name, None)
+        duration = (time.monotonic() - start) * 1000 if start else None
+        for sa in self._subagents:
+            if sa.name == name and sa.status == "started":
+                sa.status = "completed"
+                sa.duration_ms = duration
+                return
+
+    def record_subagent_failed(self, name: str) -> None:
+        """Mark a previously started subagent invocation as failed."""
+        self._subagent_start_times.pop(name, None)
+        for sa in self._subagents:
+            if sa.name == name and sa.status == "started":
+                sa.status = "failed"
+                return
+
     # ── Subagent events ──
 
     def _handle_subagent_selected(self, event: SessionEvent) -> None:
diff --git a/src/pytest_codingagents/copilot/personas.py b/src/pytest_codingagents/copilot/personas.py
new file mode 100644
index 0000000..501142c
--- /dev/null
+++ b/src/pytest_codingagents/copilot/personas.py
@@ -0,0 +1,440 @@
+"""IDE Personas for pytest-codingagents.
+
+A ``Persona`` defines the runtime environment in which an agent under test
+is expected to run.  Each persona ensures the agent has the correct tool set
+for its target IDE by injecting polyfill tools and adding a system-message
+fragment that sets context.
+
+Built-in personas
+-----------------
+``VSCodePersona`` (default)
+    Simulates the VS Code Copilot extension.  Polyfills ``runSubagent`` so
+    that agents written for VS Code dispatch sub-agents correctly.
+
+``ClaudeCodePersona``
+    Simulates Claude Code.  Polyfills a ``task``-dispatch tool (same
+    mechanism as ``runSubagent``, named ``task`` to match Claude Code's
+    native API).
+
+``CopilotCLIPersona``
+    Simulates the GitHub Copilot terminal agent.  No polyfills are needed —
+    ``task`` and ``skill`` are already in the SDK's native 16-tool set.
+    Adds a system-message fragment so the model knows its environment.
+
+``HeadlessPersona``
+    Raw SDK headless mode — no polyfills, no extra system message.  Use
+    when you want to test exactly what the SDK exposes with no IDE context.
+
+Usage::
+
+    from pytest_codingagents import CopilotAgent, VSCodePersona, ClaudeCodePersona
+
+    # Explicit — recommended for clarity
+    agent = CopilotAgent(persona=VSCodePersona(), custom_agents=[...])
+
+    # Default — VSCodePersona is used automatically
+    agent = CopilotAgent(custom_agents=[...])
+
+    # Headless — no IDE context, no polyfills
+    agent = CopilotAgent(persona=HeadlessPersona())
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from copilot.types import Tool, ToolInvocation, ToolResult
+
+    from pytest_codingagents.copilot.agent import CopilotAgent
+    from pytest_codingagents.copilot.events import EventMapper
+
+
+# ---------------------------------------------------------------------------
+# Base class
+# ---------------------------------------------------------------------------
+
+
+class Persona:
+    """Base class for IDE runtime personas.
+
+    Override ``apply()`` to inject polyfill tools and system-message
+    additions that match your target IDE's native tool set.
+
+    The ``apply()`` method is called by the runner *after*
+    ``agent.build_session_config()`` and *before* the session is created,
+    so modifications to ``session_config`` take effect immediately.
+
+    Phase-2 extension point: override ``create_client()`` to swap the
+    underlying SDK backend (e.g. Anthropic SDK for Claude Code).
+    """
+
+    def apply(
+        self,
+        agent: "CopilotAgent",
+        session_config: dict[str, Any],
+        mapper: "EventMapper",
+    ) -> None:
+        """Modify *session_config* in-place to match this persona's environment.
+
+        Args:
+            agent: The ``CopilotAgent`` being executed (read-only).
+            session_config: The session config dict built from ``agent``.
+                Mutate this to inject tools, update system_message, etc.
+            mapper: The ``EventMapper`` for the current run.  Pass to
+                tool handlers that need to record subagent events.
+        """
+
+    # ------------------------------------------------------------------
+    # Phase-2 extension point (not yet used)
+    # ------------------------------------------------------------------
+
+    # async def create_client(self, agent: CopilotAgent) -> CopilotClient:
+    #     """Override to swap the SDK backend for this persona."""
+    #     from copilot import CopilotClient
+    #     return CopilotClient(...)
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}()"
+
+
+# ---------------------------------------------------------------------------
+# Headless (raw SDK baseline)
+# ---------------------------------------------------------------------------
+
+
+class HeadlessPersona(Persona):
+    """Raw SDK headless mode — no polyfills, no IDE system message.
+
+    Use this when you want to test exactly what the Copilot SDK exposes
+    with no runtime context added.  This is the minimal baseline.
+    """
+
+
+# ---------------------------------------------------------------------------
+# GitHub Copilot CLI
+# ---------------------------------------------------------------------------
+
+
+class CopilotCLIPersona(Persona):
+    """GitHub Copilot terminal agent persona.
+
+    ``task`` and ``skill`` are already in the SDK's native 16-tool set, so
+    no polyfills are needed.  This persona only adds a system-message
+    fragment so the model knows it is running inside the Copilot CLI and
+    can use ``task`` for sub-task dispatch.
+    """
+
+    _SYSTEM_MSG = "You are running inside GitHub Copilot CLI."
+    _INSTRUCTIONS_FILE = Path(".github") / "copilot-instructions.md"
+
+    def apply(
+        self,
+        agent: "CopilotAgent",
+        session_config: dict[str, Any],
+        mapper: "EventMapper",
+    ) -> None:
+        _prepend_system_message(session_config, self._SYSTEM_MSG)
+        if agent.working_directory:
+            custom = _load_custom_instructions_file(
+                Path(agent.working_directory) / self._INSTRUCTIONS_FILE
+            )
+            if custom:
+                _prepend_system_message(session_config, custom)
+
+
+# ---------------------------------------------------------------------------
+# VS Code
+# ---------------------------------------------------------------------------
+
+
+class VSCodePersona(Persona):
+    """VS Code Copilot extension persona.
+
+    Polyfills ``runSubagent`` so agents written for VS Code (where
+    ``runSubagent`` is a native tool) can dispatch custom sub-agents
+    correctly during testing.
+
+    The polyfill is only injected when ``agent.custom_agents`` is non-empty,
+    so using this persona with a plain agent has no side-effects.
+    """
+
+    _SYSTEM_MSG = "You are running inside VS Code."
+    _INSTRUCTIONS_FILE = Path(".github") / "copilot-instructions.md"
+
+    def apply(
+        self,
+        agent: "CopilotAgent",
+        session_config: dict[str, Any],
+        mapper: "EventMapper",
+    ) -> None:
+        _prepend_system_message(session_config, self._SYSTEM_MSG)
+        if agent.working_directory:
+            custom = _load_custom_instructions_file(
+                Path(agent.working_directory) / self._INSTRUCTIONS_FILE
+            )
+            if custom:
+                _prepend_system_message(session_config, custom)
+        if agent.custom_agents:
+            tool = _make_runsubagent_tool(agent, agent.custom_agents, mapper)
+            _inject_tool(session_config, tool)
+
+
+# ---------------------------------------------------------------------------
+# Claude Code
+# ---------------------------------------------------------------------------
+
+
+class ClaudeCodePersona(Persona):
+    """Claude Code persona.
+
+    Polyfills a ``task``-dispatch tool (same dispatch mechanism as
+    ``runSubagent``, named ``task`` to match Claude Code's native API) so
+    agents written for Claude Code can dispatch sub-agents during testing.
+
+    The polyfill is only injected when ``agent.custom_agents`` is non-empty.
+    """
+
+    _SYSTEM_MSG = "You are running inside Claude Code."
+    _INSTRUCTIONS_FILE = Path("CLAUDE.md")
+
+    def apply(
+        self,
+        agent: "CopilotAgent",
+        session_config: dict[str, Any],
+        mapper: "EventMapper",
+    ) -> None:
+        _prepend_system_message(session_config, self._SYSTEM_MSG)
+        if agent.working_directory:
+            custom = _load_custom_instructions_file(
+                Path(agent.working_directory) / self._INSTRUCTIONS_FILE
+            )
+            if custom:
+                _prepend_system_message(session_config, custom)
+        if agent.custom_agents:
+            tool = _make_task_tool(agent, agent.custom_agents, mapper)
+            _inject_tool(session_config, tool)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _load_custom_instructions_file(file_path: Path) -> str | None:
+    """Read a custom instructions file and return its content, or None if absent."""
+    if file_path.exists():
+        content = file_path.read_text(encoding="utf-8").strip()
+        return content or None
+    return None
+
+
+def _prepend_system_message(session_config: dict[str, Any], message: str) -> None:
+    """Prepend *message* to the system_message in *session_config*.
+
+    If no system_message is set, creates one in "append" mode so it is
+    added to the CLI's built-in system message rather than replacing it.
+    """
+    existing = session_config.get("system_message") or {}
+    existing_content: str = existing.get("content") or ""
+    mode: str = existing.get("mode") or "append"
+    combined = f"{message}\n\n{existing_content}".strip()
+    session_config["system_message"] = {"mode": mode, "content": combined}
+
+
+def _inject_tool(session_config: dict[str, Any], tool: "Tool") -> None:
+    """Append *tool* to the tools list in *session_config*."""
+    existing: list[Any] = list(session_config.get("tools") or [])
+    session_config["tools"] = existing + [tool]
+
+
+def _make_runsubagent_tool(
+    parent_agent: "CopilotAgent",
+    custom_agents: list[dict[str, Any]],
+    mapper: "EventMapper",
+) -> "Tool":
+    """Build a ``runSubagent`` polyfill tool for the VS Code persona.
+
+    The Copilot CLI does not natively expose ``runSubagent`` in SDK headless
+    mode.  This factory creates a Python-side ``Tool`` that dispatches
+    registered custom agents as nested ``run_copilot`` calls.
+    """
+    from copilot.types import Tool, ToolResult
+
+    from pytest_codingagents.copilot.agent import CopilotAgent as _CopilotAgent
+    from pytest_codingagents.copilot.runner import run_copilot
+
+    agent_map: dict[str, dict[str, Any]] = {a["name"]: a for a in custom_agents}
+
+    async def _handler(invocation: "ToolInvocation") -> "ToolResult":
+        args: dict[str, Any] = invocation.get("arguments") or {}  # type: ignore[assignment]
+
+        agent_name: str | None = (
+            args.get("agent_name") or args.get("agent") or args.get("agentName")
+        )
+        prompt_text: str = args.get("prompt") or args.get("message") or args.get("task") or ""
+
+        if not agent_name:
+            available = sorted(agent_map)
+            return ToolResult(
+                textResultForLlm=(f"Error: agent_name is required. Available agents: {available}"),
+                resultType="failure",
+            )
+
+        agent_cfg = agent_map.get(agent_name)
+        if agent_cfg is None:
+            available = sorted(agent_map)
+            return ToolResult(
+                textResultForLlm=(f"Error: agent '{agent_name}' not found. Available: {available}"),
+                resultType="failure",
+            )
+
+        mapper.record_subagent_start(agent_name)
+
+        sub_agent = _CopilotAgent(
+            name=agent_name,
+            model=parent_agent.model,
+            instructions=agent_cfg.get("prompt"),
+            working_directory=parent_agent.working_directory,
+            timeout_s=min(parent_agent.timeout_s, 600.0),
+            max_turns=min(parent_agent.max_turns, 30),
+            auto_confirm=True,
+        )
+
+        sub_result = await run_copilot(sub_agent, prompt_text)
+
+        if sub_result.success:
+            mapper.record_subagent_complete(agent_name)
+            return ToolResult(
+                textResultForLlm=sub_result.final_response or "Sub-agent completed.",
+                resultType="success",
+            )
+
+        mapper.record_subagent_failed(agent_name)
+        return ToolResult(
+            textResultForLlm=f"Sub-agent '{agent_name}' failed: {sub_result.error}",
+            resultType="failure",
+        )
+
+    return Tool(
+        name="runSubagent",
+        description=(
+            "Dispatch a named custom agent to perform a task. "
+            "The agent runs with its own instructions and returns its "
+            "final response. "
+            f"Available agents: {sorted(agent_map)}"
+        ),
+        handler=_handler,
+        parameters={
+            "type": "object",
+            "properties": {
+                "agent_name": {
+                    "type": "string",
+                    "description": "Name of the agent to dispatch.",
+                    "enum": sorted(agent_map),
+                },
+                "prompt": {
+                    "type": "string",
+                    "description": "Task or message to send to the agent.",
+                },
+            },
+            "required": ["agent_name", "prompt"],
+        },
+    )
+
+
+def _make_task_tool(
+    parent_agent: "CopilotAgent",
+    custom_agents: list[dict[str, Any]],
+    mapper: "EventMapper",
+) -> "Tool":
+    """Build a ``task`` polyfill tool for the Claude Code persona.
+
+    Identical dispatch mechanism to ``_make_runsubagent_tool`` but named
+    ``task`` to match Claude Code's native sub-agent dispatch API.
+    """
+    from copilot.types import Tool, ToolResult
+
+    from pytest_codingagents.copilot.agent import CopilotAgent as _CopilotAgent
+    from pytest_codingagents.copilot.runner import run_copilot
+
+    agent_map: dict[str, dict[str, Any]] = {a["name"]: a for a in custom_agents}
+
+    async def _handler(invocation: "ToolInvocation") -> "ToolResult":
+        args: dict[str, Any] = invocation.get("arguments") or {}  # type: ignore[assignment]
+
+        agent_name: str | None = (
+            args.get("agent_name") or args.get("agent") or args.get("agentName")
+        )
+        prompt_text: str = (
+            args.get("prompt") or args.get("message") or args.get("description") or ""
+        )
+
+        if not agent_name:
+            available = sorted(agent_map)
+            return ToolResult(
+                textResultForLlm=(f"Error: agent_name is required. Available agents: {available}"),
+                resultType="failure",
+            )
+
+        agent_cfg = agent_map.get(agent_name)
+        if agent_cfg is None:
+            available = sorted(agent_map)
+            return ToolResult(
+                textResultForLlm=(f"Error: agent '{agent_name}' not found. Available: {available}"),
+                resultType="failure",
+            )
+
+        mapper.record_subagent_start(agent_name)
+
+        sub_agent = _CopilotAgent(
+            name=agent_name,
+            model=parent_agent.model,
+            instructions=agent_cfg.get("prompt"),
+            working_directory=parent_agent.working_directory,
+            timeout_s=min(parent_agent.timeout_s, 600.0),
+            max_turns=min(parent_agent.max_turns, 30),
+            auto_confirm=True,
+        )
+
+        sub_result = await run_copilot(sub_agent, prompt_text)
+
+        if sub_result.success:
+            mapper.record_subagent_complete(agent_name)
+            return ToolResult(
+                textResultForLlm=sub_result.final_response or "Sub-agent completed.",
+                resultType="success",
+            )
+
+        mapper.record_subagent_failed(agent_name)
+        return ToolResult(
+            textResultForLlm=f"Sub-agent '{agent_name}' failed: {sub_result.error}",
+            resultType="failure",
+        )
+
+    return Tool(
+        name="task",
+        description=(
+            "Dispatch a named agent to perform a task. "
+            "The agent runs with its own instructions and returns its "
+            "final response. "
+            f"Available agents: {sorted(agent_map)}"
+        ),
+        handler=_handler,
+        parameters={
+            "type": "object",
+            "properties": {
+                "agent_name": {
+                    "type": "string",
+                    "description": "Name of the agent to dispatch.",
+                    "enum": sorted(agent_map),
+                },
+                "prompt": {
+                    "type": "string",
+                    "description": "Task or message to send to the agent.",
+                },
+            },
+            "required": ["agent_name", "prompt"],
+        },
+    )
diff --git a/src/pytest_codingagents/copilot/runner.py b/src/pytest_codingagents/copilot/runner.py
index 813205a..6eafe99 100644
--- a/src/pytest_codingagents/copilot/runner.py
+++ b/src/pytest_codingagents/copilot/runner.py
@@ -100,7 +100,7 @@ def _is_transient_error(error: str | None) -> bool:
     return any(pattern in error for pattern in _TRANSIENT_PATTERNS)
 
 
-async def _run_copilot_once(agent: CopilotAgent, prompt: str) -> CopilotResult:
+async def _run_copilot_once(agent: "CopilotAgent", prompt: str) -> "CopilotResult":
     """Execute a single attempt of a prompt against GitHub Copilot."""
     client_options: dict[str, Any] = {
         "cwd": agent.working_directory or ".",
@@ -127,6 +127,10 @@ async def _run_copilot_once(agent: CopilotAgent, prompt: str) -> CopilotResult:
         # Build session config from agent
         session_config = agent.build_session_config()
 
+        # Apply the persona: injects polyfill tools and system-message
+        # additions that match the target IDE environment.
+        agent.persona.apply(agent, session_config, mapper)
+
         # Install permission handler if auto_confirm is enabled
         if agent.auto_confirm:
             session_config["on_permission_request"] = _auto_approve_handler

From 150aa968d9da210207e9af807b3edfbdcd7c3463 Mon Sep 17 00:00:00 2001
From: Stefan Broenner <stefan.broenner@microsoft.comm>
Date: Thu, 19 Feb 2026 18:46:59 +0100
Subject: [PATCH 3/3] feat: inject <agents> XML block into system prompt to
 mirror VS Code orchestrator behavior

When an agent has custom_agents configured, VSCodePersona and ClaudeCodePersona
now inject an <agents> XML block listing available subagents by name/description.
This mirrors the behavior of computeAutomaticInstructions.ts in microsoft/vscode,
which guides the model to use runSubagent/task for delegation rather than
implementing directly.

Bump version to 0.2.2.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 pyproject.toml                              |  2 +-
 src/pytest_codingagents/copilot/personas.py | 42 +++++++++++++++++++++
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2e53803..4296774 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "pytest-codingagents"
-version = "0.2.1"
+version = "0.2.2"
 description = "Pytest plugin for testing real coding agents via their SDK"
 readme = "README.md"
 license = { text = "MIT" }
diff --git a/src/pytest_codingagents/copilot/personas.py b/src/pytest_codingagents/copilot/personas.py
index 501142c..92632e2 100644
--- a/src/pytest_codingagents/copilot/personas.py
+++ b/src/pytest_codingagents/copilot/personas.py
@@ -179,6 +179,8 @@ def apply(
         if agent.custom_agents:
             tool = _make_runsubagent_tool(agent, agent.custom_agents, mapper)
             _inject_tool(session_config, tool)
+            agents_block = _build_agents_block(agent.custom_agents, tool_name="runSubagent")
+            _prepend_system_message(session_config, agents_block)
 
 
 # ---------------------------------------------------------------------------
@@ -215,6 +217,8 @@ def apply(
         if agent.custom_agents:
             tool = _make_task_tool(agent, agent.custom_agents, mapper)
             _inject_tool(session_config, tool)
+            agents_block = _build_agents_block(agent.custom_agents, tool_name="task")
+            _prepend_system_message(session_config, agents_block)
 
 
 # ---------------------------------------------------------------------------
@@ -249,6 +253,44 @@ def _inject_tool(session_config: dict[str, Any], tool: "Tool") -> None:
     session_config["tools"] = existing + [tool]
 
 
+def _build_agents_block(custom_agents: list[dict[str, Any]], tool_name: str = "runSubagent") -> str:
+    """Build the <agents> XML block that VS Code injects into the system prompt.
+
+    Mirrors ``computeAutomaticInstructions.ts`` in ``microsoft/vscode``:
+    lists available subagents by name and description so the model knows
+    which agents to dispatch and how to call them.
+
+    Args:
+        custom_agents: List of custom agent config dicts (each with at least
+            a ``name`` key, optionally ``description`` and ``argument_hint``).
+        tool_name: Name of the dispatch tool (``runSubagent`` for VS Code,
+            ``task`` for Claude Code).
+
+    Returns:
+        The ``<agents>…</agents>`` XML string to prepend to the system message.
+    """
+    lines: list[str] = [
+        "<agents>",
+        "Here is a list of agents that can be used when running a subagent.",
+        (
+            "Each agent has optionally a description with the agent's purpose "
+            "and expertise. When asked to run a subagent, choose the most "
+            "appropriate agent from this list."
+        ),
+        f"Use the {tool_name} tool with the agent name to run the subagent.",
+    ]
+    for a in custom_agents:
+        lines.append("<agent>")
+        lines.append(f"<name>{a['name']}</name>")
+        if desc := a.get("description"):
+            lines.append(f"<description>{desc}</description>")
+        if hint := a.get("argument_hint") or a.get("argumentHint"):
+            lines.append(f"<argumentHint>{hint}</argumentHint>")
+        lines.append("</agent>")
+    lines.append("</agents>")
+    return "\n".join(lines)
+
+
 def _make_runsubagent_tool(
     parent_agent: "CopilotAgent",
     custom_agents: list[dict[str, Any]],