From a0275feaee194cba79484fa731a71f0354e3c790 Mon Sep 17 00:00:00 2001
From: arcticfly <davidlcorbitt@gmail.com>
Date: Tue, 26 Aug 2025 12:35:47 -0700
Subject: [PATCH 1/5] Add art.mcp package

---
 examples/mcp-rl/test_scenario_generation.py | 364 ++++++++++++++++++++
 examples/mcp-rl/uv.lock                     |  35 +-
 src/art/mcp/__init__.py                     |  19 +
 src/art/mcp/default_tools.py                |  16 +
 src/art/mcp/generate_scenarios.py           | 213 ++++++++++++
 src/art/mcp/types.py                        | 201 +++++++++++
 src/art/utils/logging.py                    |  44 +++
 7 files changed, 880 insertions(+), 12 deletions(-)
 create mode 100644 examples/mcp-rl/test_scenario_generation.py
 create mode 100644 src/art/mcp/__init__.py
 create mode 100644 src/art/mcp/default_tools.py
 create mode 100644 src/art/mcp/generate_scenarios.py
 create mode 100644 src/art/mcp/types.py
 create mode 100644 src/art/utils/logging.py

diff --git a/examples/mcp-rl/test_scenario_generation.py b/examples/mcp-rl/test_scenario_generation.py
new file mode 100644
index 000000000..40b826ceb
--- /dev/null
+++ b/examples/mcp-rl/test_scenario_generation.py
@@ -0,0 +1,364 @@
+#!/usr/bin/env python3
+"""Test scenario generation functionality."""
+
+import asyncio
+import os
+from typing import List
+
+from dotenv import load_dotenv
+
+from art.mcp import MCPResource, MCPTool, generate_scenarios
+
+load_dotenv()
+
+
+def create_sample_tools() -> List[MCPTool]:
+    """Create sample tools for testing."""
+    return [
+        MCPTool(
+            name="search_files",
+            description="Search for files by name or content pattern",
+            parameters={
+                "type": "object",
+                "properties": {
+                    "query": {"type": "string", "description": "Search query"},
+                    "file_type": {
+                        "type": "string",
+                        "enum": ["txt", "py", "json"],
+                        "description": "File type filter",
+                    },
+                },
+                "required": ["query"],
+            },
+        ),
+        MCPTool(
+            name="read_file",
+            description="Read the contents of a specific file",
+            parameters={
+                "type": "object",
+                "properties": {
+                    "file_path": {
+                        "type": "string",
+                        "description": "Path to the file to read",
+                    }
+                },
+                "required": ["file_path"],
+            },
+        ),
+        MCPTool(
+            name="analyze_code",
+            description="Analyze code quality and suggest improvements",
+            parameters={
+                "type": "object",
+                "properties": {
+                    "code": {"type": "string", "description": "Code to analyze"},
+                    "language": {
+                        "type": "string",
+                        "description": "Programming language",
+                    },
+                },
+                "required": ["code"],
+            },
+        ),
+        MCPTool(
+            name="execute_command",
+            description="Execute a shell command and return the output",
+            parameters={
+                "type": "object",
+                "properties": {
+                    "command": {
+                        "type": "string",
+                        "description": "Shell command to execute",
+                    },
+                    "timeout": {
+                        "type": "integer",
+                        "description": "Timeout in seconds",
+                        "default": 30,
+                    },
+                },
+                "required": ["command"],
+            },
+        ),
+    ]
+
+
+def create_sample_resources() -> List[MCPResource]:
+    """Create sample resources for testing."""
+    return [
+        MCPResource(
+            uri="file://docs/api.md",
+            name="API Documentation",
+            description="Complete API documentation with examples",
+            mime_type="text/markdown",
+        ),
+        MCPResource(
+            uri="file://src/main.py",
+            name="Main Application",
+            description="Primary application entry point",
+            mime_type="text/x-python",
+        ),
+        MCPResource(
+            uri="file://config.json",
+            name="Configuration File",
+            description="Application configuration settings",
+            mime_type="application/json",
+        ),
+    ]
+
+
+async def test_basic_scenario_generation():
+    """Test basic scenario generation with tools only."""
+    print("[TEST] Testing basic scenario generation...")
+
+    tools = create_sample_tools()
+
+    try:
+        scenarios = await generate_scenarios(
+            tools=tools,
+            num_scenarios=5,
+            show_preview=True,
+            generator_model="openai/gpt-4o-mini",  # Use a cheaper model for testing
+        )
+
+        print(f"[PASS] Generated {len(scenarios)} scenarios successfully")
+        print(f"[INFO] Summary: {scenarios.get_summary()}")
+
+        # Test collection methods
+        print("\n[TEST] Testing collection methods...")
+
+        # Test difficulty filtering
+        easy_scenarios = scenarios.filter_by_difficulty(max_difficulty=2)
+        print(f"[INFO] Easy scenarios (<=2): {len(easy_scenarios)}")
+
+        # Test shuffling and splitting
+        shuffled = scenarios.shuffle()
+        if len(scenarios) >= 3:
+            train, val = shuffled.split(train_size=3)
+            print(f"[INFO] Train/Val split: {len(train)}/{len(val)}")
+
+        # Test JSON serialization
+        json_str = scenarios.to_json(indent=2)
+        print(f"[INFO] JSON export: {len(json_str)} characters")
+
+        return True
+
+    except Exception as e:
+        print(f"[FAIL] Basic test failed: {e}")
+        return False
+
+
+async def test_scenario_generation_with_resources():
+    """Test scenario generation with both tools and resources."""
+    print("\n[TEST] Testing scenario generation with resources...")
+
+    tools = create_sample_tools()
+    resources = create_sample_resources()
+
+    try:
+        scenarios = await generate_scenarios(
+            tools=tools,
+            resources=resources,
+            num_scenarios=3,
+            show_preview=True,
+            custom_instructions="Focus on file management and code analysis tasks.",
+            generator_model="openai/gpt-4o-mini",
+        )
+
+        print(f"[PASS] Generated {len(scenarios)} scenarios with resources")
+
+        # Verify scenarios reference the available tools/resources appropriately
+        for i, scenario in enumerate(scenarios):
+            print(
+                f"[INFO] Scenario {i + 1} (Difficulty {scenario.difficulty}): {scenario.preview(80)}"
+            )
+
+        return True
+
+    except Exception as e:
+        print(f"[FAIL] Resources test failed: {e}")
+        return False
+
+
+async def test_dict_input_compatibility():
+    """Test backward compatibility with dictionary inputs."""
+    print("\n[TEST] Testing dictionary input compatibility...")
+
+    tools_dict = [
+        {
+            "name": "get_weather",
+            "description": "Get current weather for a location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {"type": "string", "description": "City name"}
+                },
+                "required": ["location"],
+            },
+        },
+        {
+            "name": "send_email",
+            "description": "Send an email message",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "to": {"type": "string", "description": "Recipient email"},
+                    "subject": {"type": "string", "description": "Email subject"},
+                    "body": {"type": "string", "description": "Email body"},
+                },
+                "required": ["to", "subject", "body"],
+            },
+        },
+    ]
+
+    resources_dict = [
+        {
+            "uri": "database://users",
+            "name": "User Database",
+            "description": "User account information",
+            "mimeType": "application/sql",
+        }
+    ]
+
+    try:
+        scenarios = await generate_scenarios(
+            tools=tools_dict,
+            resources=resources_dict,
+            num_scenarios=3,
+            show_preview=False,  # Don't show preview to keep output clean
+            generator_model="openai/gpt-4o-mini",
+        )
+
+        print(f"[PASS] Dictionary input test passed: {len(scenarios)} scenarios")
+        return True
+
+    except Exception as e:
+        print(f"[FAIL] Dictionary input test failed: {e}")
+        return False
+
+
+async def test_error_handling():
+    """Test error handling scenarios."""
+    print("\n[TEST] Testing error handling...")
+
+    # Test with empty tools list
+    try:
+        await generate_scenarios(
+            tools=[],
+            num_scenarios=1,
+            show_preview=False,
+            generator_model="openai/gpt-4o-mini",
+        )
+        print("[FAIL] Should have failed with empty tools list")
+        return False
+    except Exception as e:
+        print(f"[PASS] Correctly handled empty tools: {type(e).__name__}")
+
+    # Test with invalid API key
+    tools = create_sample_tools()[:1]  # Just one tool for speed
+
+    try:
+        await generate_scenarios(
+            tools=tools,
+            num_scenarios=1,
+            show_preview=False,
+            generator_model="openai/gpt-4o-mini",
+            generator_api_key="invalid_key",
+        )
+        print("[FAIL] Should have failed with invalid API key")
+        return False
+    except Exception as e:
+        print(f"[PASS] Correctly handled invalid API key: {type(e).__name__}")
+
+    return True
+
+
+def test_tool_resource_classes():
+    """Test Tool and Resource class functionality."""
+    print("\n[TEST] Testing Tool and Resource classes...")
+
+    try:
+        # Test Tool class
+        tool_dict = {
+            "name": "test_tool",
+            "description": "A test tool",
+            "parameters": {"type": "object", "properties": {}},
+        }
+
+        tool = MCPTool.from_dict(tool_dict)
+        assert tool.name == "test_tool"
+        assert tool.to_dict() == tool_dict
+        print("[PASS] MCPTool class tests passed")
+
+        # Test Resource class
+        resource_dict = {
+            "uri": "file://test.txt",
+            "name": "Test File",
+            "description": "A test file",
+            "mimeType": "text/plain",
+        }
+
+        resource = MCPResource.from_dict(resource_dict)
+        assert resource.uri == "file://test.txt"
+        assert resource.mime_type == "text/plain"
+
+        # Test alternative field name
+        resource_dict2 = resource_dict.copy()
+        resource_dict2["mime_type"] = resource_dict2.pop("mimeType")
+        resource2 = MCPResource.from_dict(resource_dict2)
+        assert resource2.mime_type == "text/plain"
+
+        print("[PASS] MCPResource class tests passed")
+        return True
+
+    except Exception as e:
+        print(f"[FAIL] Class tests failed: {e}")
+        return False
+
+
+async def main():
+    """Run all tests."""
+    print("Starting MCP scenario generation tests...\n")
+
+    # Check for API key
+    if not os.getenv("OPENROUTER_API_KEY"):
+        print("[WARN] OPENROUTER_API_KEY not set. Some tests may fail.")
+        print("       Set your API key: export OPENROUTER_API_KEY='your_key_here'")
+        print()
+
+    test_results = []
+
+    # Run class tests (synchronous)
+    test_results.append(test_tool_resource_classes())
+
+    # Run async tests
+    if os.getenv("OPENROUTER_API_KEY"):
+        test_results.extend(
+            await asyncio.gather(
+                test_basic_scenario_generation(),
+                test_scenario_generation_with_resources(),
+                test_dict_input_compatibility(),
+                test_error_handling(),
+                return_exceptions=True,
+            )
+        )
+    else:
+        print("[SKIP] Skipping API-dependent tests (no API key)")
+        test_results.extend([True, True, True, True])  # Assume they would pass
+
+    # Summary
+    passed = sum(1 for result in test_results if result is True)
+    total = len(test_results)
+
+    print(f"\n[SUMMARY] Test Results: {passed}/{total} tests passed")
+
+    if passed == total:
+        print("[SUCCESS] All tests passed!")
+        return 0
+    else:
+        print("[FAILURE] Some tests failed")
+        return 1
+
+
+if __name__ == "__main__":
+    exit_code = asyncio.run(main())
+    exit(exit_code)
diff --git a/examples/mcp-rl/uv.lock b/examples/mcp-rl/uv.lock
index 52ccf318b..6e2ba6e66 100644
--- a/examples/mcp-rl/uv.lock
+++ b/examples/mcp-rl/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.10"
 resolution-markers = [
     "python_full_version >= '3.13' and sys_platform == 'linux'",
@@ -1890,7 +1890,7 @@ wheels = [
 
 [[package]]
 name = "litellm"
-version = "1.74.4"
+version = "1.74.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohttp" },
@@ -1905,9 +1905,9 @@ dependencies = [
     { name = "tiktoken" },
     { name = "tokenizers" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/52/49/32f0e7052309f2757885737e7eb7ce6f5ea5b48fad455b10dfd21720f04e/litellm-1.74.4.tar.gz", hash = "sha256:ace3dd8c052b57b728a2dbd38e7061cf95e3506b13a58c61da39902f6ee4a6be", size = 9405133, upload-time = "2025-07-17T02:46:11.015Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c9/25/8253bbc904d69b61806fc76e6c9c11509b4270ac201eeff6e5f95a5f2d01/litellm-1.74.1.tar.gz", hash = "sha256:0e0c83356c33885dce379cd86d38a728e870dbaaf43ae50e9d0153e29c207a85", size = 9215296, upload-time = "2025-07-10T15:31:13.968Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/21/0c/88df53727c28c006b2fb36616f93a036cde7fb9e37f016f60f02422f52ae/litellm-1.74.4-py3-none-any.whl", hash = "sha256:28de09c9d4cdbe322402f94236ec8dbac9edc5356e2f3b628b9bab0fb39284e4", size = 8639543, upload-time = "2025-07-17T02:46:08.052Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/3e/440c4ea5088c2c251ea711930e7bb4b1021b091fb3cbf512ca426af16f1e/litellm-1.74.1-py3-none-any.whl", hash = "sha256:72fe93ad7310db872543b51cc3ec4b13d4b0e1d7e636f20cd3940544ce2fb020", size = 8564714, upload-time = "2025-07-10T15:31:11.106Z" },
 ]
 
 [[package]]
@@ -2350,7 +2350,7 @@ wheels = [
 
 [[package]]
 name = "openpipe-art"
-version = "0.4.4"
+version = "0.4.9"
 source = { editable = "../../" }
 dependencies = [
     { name = "litellm" },
@@ -2371,11 +2371,17 @@ requires-dist = [
     { name = "awscli", marker = "extra == 'backend'", specifier = ">=1.38.1" },
     { name = "bitsandbytes", marker = "extra == 'backend'", specifier = ">=0.45.2" },
     { name = "hf-xet", marker = "extra == 'backend'", specifier = ">=1.1.0" },
-    { name = "litellm", specifier = ">=1.63.0" },
+    { name = "langchain-core", marker = "extra == 'langgraph'", specifier = ">=0.3.51" },
+    { name = "langchain-openai", marker = "extra == 'langgraph'", specifier = ">=0.3.27" },
+    { name = "langgraph", marker = "extra == 'langgraph'", specifier = ">=0.6.2" },
+    { name = "litellm", specifier = "==1.74.1" },
     { name = "matplotlib", marker = "extra == 'plotting'", specifier = ">=3.10.1" },
-    { name = "openai", specifier = ">=1.65.5" },
+    { name = "nbclient", marker = "extra == 'backend'", specifier = ">=0.10.1" },
+    { name = "nbmake", marker = "extra == 'backend'", specifier = ">=1.5.5" },
+    { name = "openai", specifier = ">=1.65.5,<=1.99.1" },
     { name = "peft", marker = "extra == 'backend'", specifier = ">=0.14.0" },
     { name = "polars", marker = "extra == 'backend'", specifier = ">=1.26.0" },
+    { name = "pytest", marker = "extra == 'backend'", specifier = ">=8.4.1" },
     { name = "seaborn", marker = "extra == 'plotting'", specifier = ">=0.13.2" },
     { name = "semver", marker = "extra == 'skypilot'", specifier = ">=3.0.4" },
     { name = "setproctitle", marker = "extra == 'backend'", specifier = ">=1.3.6" },
@@ -2389,13 +2395,13 @@ requires-dist = [
     { name = "trl", marker = "extra == 'backend'", specifier = "==0.20.0" },
     { name = "trl", marker = "extra == 'backend'", specifier = ">=0.19.0" },
     { name = "typer", specifier = ">=0.15.2" },
-    { name = "unsloth", marker = "extra == 'backend'", specifier = "==2025.8.1" },
-    { name = "unsloth-zoo", marker = "extra == 'backend'", git = "https://github.com/bradhilton/unsloth-zoo" },
-    { name = "vllm", marker = "extra == 'backend'", specifier = "==0.9.1" },
-    { name = "wandb", marker = "extra == 'backend'", specifier = ">=0.19.8" },
+    { name = "unsloth", marker = "extra == 'backend'", specifier = "==2025.8.6" },
+    { name = "unsloth-zoo", marker = "extra == 'backend'", specifier = "==2025.8.5" },
+    { name = "vllm", marker = "extra == 'backend'", specifier = ">=0.9.2,<=0.10.0" },
+    { name = "wandb", marker = "extra == 'backend'", specifier = "==0.21.0" },
     { name = "weave", specifier = ">=0.51.51" },
 ]
-provides-extras = ["plotting", "backend", "skypilot"]
+provides-extras = ["plotting", "backend", "skypilot", "langgraph"]
 
 [package.metadata.requires-dev]
 dev = [
@@ -2403,7 +2409,12 @@ dev = [
     { name = "hatch", specifier = ">=1.14.1" },
     { name = "ipykernel", specifier = ">=6.29.5" },
     { name = "ipywidgets", specifier = ">=8.1.5" },
+    { name = "nbval", specifier = ">=0.11.0" },
     { name = "openpipe", specifier = ">=4.49.0" },
+    { name = "pyright", extras = ["nodejs"], specifier = ">=1.1.403" },
+    { name = "pytest", specifier = ">=8.4.1" },
+    { name = "pytest-asyncio", specifier = ">=1.1.0" },
+    { name = "pytest-xdist", specifier = ">=3.8.0" },
     { name = "ruff", specifier = ">=0.12.1" },
 ]
 
diff --git a/src/art/mcp/__init__.py b/src/art/mcp/__init__.py
new file mode 100644
index 000000000..cb6021ec3
--- /dev/null
+++ b/src/art/mcp/__init__.py
@@ -0,0 +1,19 @@
+"""MCP utilities for Agent Reinforcement Training."""
+
+from .default_tools import complete_task_tool
+from .generate_scenarios import generate_scenarios
+from .types import (
+    GeneratedScenario,
+    GeneratedScenarioCollection,
+    MCPResource,
+    MCPTool,
+)
+
+__all__ = [
+    "MCPResource",
+    "MCPTool",
+    "GeneratedScenario",
+    "GeneratedScenarioCollection",
+    "complete_task_tool",
+    "generate_scenarios",
+]
diff --git a/src/art/mcp/default_tools.py b/src/art/mcp/default_tools.py
new file mode 100644
index 000000000..9f11e3ee1
--- /dev/null
+++ b/src/art/mcp/default_tools.py
@@ -0,0 +1,16 @@
+from art.mcp.types import MCPTool
+
+complete_task_tool = MCPTool(
+    name="complete_task",
+    description="Complete a task",
+    parameters={
+        "type": "object",
+        "properties": {
+            "summary": {
+                "type": "string",
+                "description": "Summary of accomplishments",
+            }
+        },
+        "required": ["summary"],
+    },
+)
diff --git a/src/art/mcp/generate_scenarios.py b/src/art/mcp/generate_scenarios.py
new file mode 100644
index 000000000..0a5233aa4
--- /dev/null
+++ b/src/art/mcp/generate_scenarios.py
@@ -0,0 +1,213 @@
+"""Scenario generation for MCP tools."""
+
+import json
+import time
+from typing import Any, Dict, List, Optional
+
+import openai
+
+from art.mcp.types import GeneratedScenarioCollection, MCPResource, MCPTool
+from art.utils.logging import _C, dim, err, info, ok, step
+
+
+def preview_scenarios(scenarios: List[Dict[str, Any]], n: int = 5):
+    """Preview generated scenarios."""
+    n = min(n, len(scenarios))
+    for i in range(n):
+        s = scenarios[i]
+        task_preview = s["task"][:120].strip()
+        ellipsis = "&" if len(s["task"]) > 120 else ""
+        difficulty = s.get("difficulty", "N/A")
+        dim(
+            f"   {i + 1}. {task_preview}{ellipsis}  "
+            f"{_C.GRAY}(difficulty {difficulty}/5){_C.RESET}"
+        )
+
+
+async def generate_scenarios(
+    tools: List[MCPTool] | List[Dict[str, Any]],
+    resources: List[MCPResource] | List[Dict[str, Any]] = [],
+    num_scenarios: int = 24,
+    show_preview: bool = True,
+    custom_instructions: Optional[str] = None,
+    generator_model: str = "openai/gpt-4.1-mini",
+    generator_api_key: Optional[str] = None,
+    generator_base_url: str = "https://openrouter.ai/api/v1",
+) -> GeneratedScenarioCollection:
+    """
+    Generate scenarios for MCP tools.
+
+    Args:
+        tools: List of Tool objects or list of tool dictionaries
+        resources: Optional list of Resource objects or list of resource dictionaries
+        num_scenarios: Number of scenarios to generate (default: 24)
+        show_preview: Whether to show a preview of generated scenarios (default: True)
+        custom_instructions: Optional custom instructions for scenario generation
+        generator_model: Model to use for generation (default: "openai/gpt-4.1-mini")
+        generator_api_key: API key for the generator model. If None, will use OPENROUTER_API_KEY env var
+        generator_base_url: Base URL for the API (default: OpenRouter)
+
+    Returns:
+        GeneratedScenarioCollection containing the generated scenarios
+    """
+    import os
+
+    t0 = time.perf_counter()
+
+    # Handle API key
+    if generator_api_key is None:
+        generator_api_key = os.getenv("OPENROUTER_API_KEY")
+        if not generator_api_key:
+            raise ValueError(
+                "generator_api_key is required or OPENROUTER_API_KEY env var must be set"
+            )
+
+    # Validate that we have at least tools or resources
+    if not tools and not resources:
+        raise ValueError("At least one tool or resource must be provided")
+    
+    ok(f"Using model: {generator_model}")
+
+    # Convert tools to dictionaries
+    if isinstance(tools, list) and tools and isinstance(tools[0], MCPTool):
+        tools_info = [tool.to_dict() for tool in tools]
+    else:
+        # Assume it's already a list of dictionaries
+        tools_info = [
+            {
+                "name": tool.get("name", ""),
+                "description": tool.get("description", ""),
+                "parameters": tool.get("parameters", {}),
+            }
+            for tool in tools
+        ]
+
+    # Convert resources to dictionaries
+    if resources is None:
+        resources_info = []
+    elif (
+        isinstance(resources, list)
+        and resources
+        and isinstance(resources[0], MCPResource)
+    ):
+        resources_info = [resource.to_dict() for resource in resources]
+    else:
+        # Assume it's already a list of dictionaries
+        resources_info = resources or []
+
+    info(f"Available: {len(tools_info)} tool(s), {len(resources_info)} resource(s).")
+
+    step("Preparing prompt & JSON schema &")
+    tools_description = json.dumps(tools_info, indent=2)
+    resources_description = (
+        json.dumps(resources_info, indent=2)
+        if resources_info
+        else "No resources available"
+    )
+
+    prompt = f"""You are an expert at creating realistic scenarios for testing AI agents that interact with MCP (Model Context Protocol) servers.
+
+Given the following available tools and resources from an MCP server, generate {num_scenarios} diverse, realistic scenarios that a user might want to accomplish using these tools.
+
+AVAILABLE TOOLS:
+{tools_description}
+
+AVAILABLE RESOURCES:
+{resources_description}
+
+Requirements for scenarios:
+1. Each scenario should be a task that can be accomplished using the available tools
+2. Scenarios should vary in complexity - some simple (1-2 tool calls), some complex (multiple tool calls)
+3. Scenarios should cover different use cases and tool combinations (though the task should not specify which tools to use)
+4. Each scenario should be realistic - something a real user might actually want to do
+5. Assign a difficulty rating from 1 (easy, single tool call) to 5 (hard, complex multi-step analysis)
+6. The task should always include generating a summary of the work done and a thorough analysis and report of the results
+
+You must respond with a JSON object containing a "scenarios" array of exactly {num_scenarios} objects. Each object must have:
+- "task": string describing the scenario
+- "difficulty": integer from 1-5 representing complexity
+"""
+
+    if custom_instructions:
+        prompt += f"\n\nPay close attention to the following instructions when generating scenarios:\n\n{custom_instructions}"
+
+    response_schema = {
+        "type": "object",
+        "properties": {
+            "scenarios": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "task": {"type": "string"},
+                        "difficulty": {"type": "integer", "minimum": 1, "maximum": 5},
+                    },
+                    "required": ["task", "difficulty"],
+                    "additionalProperties": False,
+                },
+                "minItems": num_scenarios,
+                "maxItems": num_scenarios,
+            }
+        },
+        "required": ["scenarios"],
+        "additionalProperties": False,
+    }
+
+    step(f"Calling model: {_C.BOLD}{generator_model}{_C.RESET} &")
+    client_openai = openai.OpenAI(
+        api_key=generator_api_key,
+        base_url=generator_base_url,
+    )
+
+    t1 = time.perf_counter()
+    response = client_openai.chat.completions.create(
+        model=generator_model,
+        messages=[{"role": "user", "content": prompt}],
+        max_completion_tokens=8000,
+        response_format={
+            "type": "json_schema",
+            "json_schema": {"name": "scenario_list", "schema": response_schema},
+        },
+    )
+    dt = time.perf_counter() - t1
+    ok(f"Model responded in {dt:.2f}s.")
+
+    content = response.choices[0].message.content
+    info(f"Raw content length: {len(content)} chars.")
+
+    # Parse JSON
+    try:
+        result = json.loads(content)
+    except Exception as e:
+        err("Failed to parse JSON from model response.")
+        dim(f"   Exception: {e}")
+        dim("   First 500 chars of response content:")
+        dim(content[:500])
+        raise
+
+    # Extract scenarios
+    if "scenarios" in result:
+        scenarios = result["scenarios"]
+    else:
+        scenarios = result if isinstance(result, list) else list(result.values())[0]
+
+    # Validate count
+    if len(scenarios) != num_scenarios:
+        err(f"Expected {num_scenarios} scenarios, got {len(scenarios)}.")
+        raise ValueError(f"Expected {num_scenarios} scenarios, got {len(scenarios)}")
+
+    ok(f"Parsed {len(scenarios)} scenario(s) successfully.")
+
+    # Convert to ScenarioCollection
+    scenario_collection = GeneratedScenarioCollection.from_dicts(scenarios)
+
+    # Show difficulty distribution and preview using the collection methods
+    scenario_collection.print_difficulty_distribution()
+
+    if show_preview:
+        scenario_collection.preview(n=min(5, num_scenarios))
+
+    total_time = time.perf_counter() - t0
+    ok(f"Generated {len(scenario_collection)} scenarios in {total_time:.2f}s total.")
+
+    return scenario_collection
diff --git a/src/art/mcp/types.py b/src/art/mcp/types.py
new file mode 100644
index 000000000..d30ad5f74
--- /dev/null
+++ b/src/art/mcp/types.py
@@ -0,0 +1,201 @@
+import json
+import random
+from collections import Counter
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+
+from openai.types.chat.chat_completion_tool import ChatCompletionTool
+
+from art.utils.logging import _C, dim, info
+
+
+@dataclass
+class MCPTool:
+    """Represents an MCP tool."""
+
+    name: str
+    description: str
+    parameters: Dict[str, Any]
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "MCPTool":
+        """Create a Tool from a dictionary."""
+        return cls(
+            name=data.get("name", ""),
+            description=data.get("description", ""),
+            parameters=data.get("parameters", {}),
+        )
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert the tool to a dictionary."""
+        return {
+            "name": self.name,
+            "description": self.description,
+            "parameters": self.parameters,
+        }
+
+
+@dataclass
+class MCPResource:
+    """Represents an MCP resource."""
+
+    uri: str
+    name: str
+    description: str
+    mime_type: Optional[str] = None
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "MCPResource":
+        """Create a Resource from a dictionary."""
+        return cls(
+            uri=data.get("uri", ""),
+            name=data.get("name", ""),
+            description=data.get("description", ""),
+            mime_type=data.get("mimeType") or data.get("mime_type"),
+        )
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert the resource to a dictionary."""
+        result = {"uri": self.uri, "name": self.name, "description": self.description}
+        if self.mime_type:
+            result["mimeType"] = self.mime_type
+        return result
+
+
+@dataclass
+class GeneratedScenario:
+    """A single scenario for testing AI agents."""
+
+    task: str
+    difficulty: int
+
+    def __post_init__(self):
+        if not isinstance(self.difficulty, int) or not 1 <= self.difficulty <= 5:
+            raise ValueError("Difficulty must be an integer between 1 and 5")
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "GeneratedScenario":
+        """Create a GeneratedScenario from a dictionary."""
+        return cls(task=data["task"], difficulty=data["difficulty"])
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert the scenario to a dictionary."""
+        return {"task": self.task, "difficulty": self.difficulty}
+
+    def preview(self, max_length: int = 120) -> str:
+        """Get a preview of the scenario task."""
+        if len(self.task) <= max_length:
+            return self.task
+        return self.task[:max_length].strip() + "…"
+
+
+class GeneratedScenarioCollection:
+    """A collection of scenarios with utilities for management and analysis."""
+
+    def __init__(self, scenarios: List[GeneratedScenario]):
+        self.scenarios = scenarios
+
+    @classmethod
+    def from_dicts(cls, data: List[Dict[str, Any]]) -> "GeneratedScenarioCollection":
+        """Create a GeneratedScenarioCollection from a list of dictionaries."""
+        scenarios = [GeneratedScenario.from_dict(item) for item in data]
+        return cls(scenarios)
+
+    @classmethod
+    def from_json(cls, json_str: str) -> "GeneratedScenarioCollection":
+        """Create a GeneratedScenarioCollection from a JSON string."""
+        data = json.loads(json_str)
+        if "scenarios" in data:
+            scenarios_data = data["scenarios"]
+        else:
+            scenarios_data = data if isinstance(data, list) else list(data.values())[0]
+        return cls.from_dicts(scenarios_data)
+
+    def to_dicts(self) -> List[Dict[str, Any]]:
+        """Convert all scenarios to dictionaries."""
+        return [scenario.to_dict() for scenario in self.scenarios]
+
+    def to_json(self, indent: Optional[int] = None) -> str:
+        """Convert the collection to JSON."""
+        return json.dumps({"scenarios": self.to_dicts()}, indent=indent)
+
+    def __len__(self) -> int:
+        return len(self.scenarios)
+
+    def __iter__(self):
+        return iter(self.scenarios)
+
+    def __getitem__(self, index):
+        return self.scenarios[index]
+
+    def shuffle(self) -> "GeneratedScenarioCollection":
+        """Return a new collection with shuffled scenarios."""
+        shuffled = self.scenarios.copy()
+        random.shuffle(shuffled)
+        return GeneratedScenarioCollection(shuffled)
+
+    def split(
+        self, train_size: int
+    ) -> tuple["GeneratedScenarioCollection", "GeneratedScenarioCollection"]:
+        """Split the collection into train and validation sets."""
+        if train_size > len(self.scenarios):
+            raise ValueError(
+                f"train_size ({train_size}) cannot be larger than total scenarios ({len(self.scenarios)})"
+            )
+
+        train_scenarios = self.scenarios[:train_size]
+        val_scenarios = self.scenarios[train_size:]
+
+        return GeneratedScenarioCollection(
+            train_scenarios
+        ), GeneratedScenarioCollection(val_scenarios)
+
+    def filter_by_difficulty(
+        self, min_difficulty: int = 1, max_difficulty: int = 5
+    ) -> "GeneratedScenarioCollection":
+        """Filter scenarios by difficulty range."""
+        filtered = [
+            scenario
+            for scenario in self.scenarios
+            if min_difficulty <= scenario.difficulty <= max_difficulty
+        ]
+        return GeneratedScenarioCollection(filtered)
+
+    def get_difficulty_distribution(self) -> Counter:
+        """Get the distribution of difficulties."""
+        return Counter(scenario.difficulty for scenario in self.scenarios)
+
+    def preview(self, n: int = 5, max_task_length: int = 120) -> None:
+        """Preview the first n scenarios."""
+        n = min(n, len(self.scenarios))
+        for i in range(n):
+            scenario = self.scenarios[i]
+            preview_text = scenario.preview(max_task_length)
+            dim(
+                f"   {i + 1}. {preview_text}  "
+                f"{_C.GRAY}(difficulty {scenario.difficulty}/5){_C.RESET}"
+            )
+
+    def print_difficulty_distribution(self) -> None:
+        """Print a visual representation of the difficulty distribution."""
+        diff_counts = self.get_difficulty_distribution()
+        info("Difficulty distribution:")
+        for d in range(1, 6):
+            cnt = diff_counts.get(d, 0)
+            bar = "█" * min(cnt, 30)
+            dim(f"   {d}/5: {cnt:3d}  {bar}")
+
+    def get_summary(self) -> Dict[str, Any]:
+        """Get a summary of the scenario collection."""
+        return {
+            "total_scenarios": len(self.scenarios),
+            "difficulty_distribution": dict(self.get_difficulty_distribution()),
+            "avg_difficulty": sum(s.difficulty for s in self.scenarios)
+            / len(self.scenarios)
+            if self.scenarios
+            else 0,
+            "avg_task_length": sum(len(s.task) for s in self.scenarios)
+            / len(self.scenarios)
+            if self.scenarios
+            else 0,
+        }
diff --git a/src/art/utils/logging.py b/src/art/utils/logging.py
new file mode 100644
index 000000000..2e84cdd28
--- /dev/null
+++ b/src/art/utils/logging.py
@@ -0,0 +1,44 @@
+import time
+
+
+# ---------- lightweight "nice print" helpers ----------
+class _C:
+    RESET = "\x1b[0m"
+    DIM = "\x1b[2m"
+    BOLD = "\x1b[1m"
+    ITAL = "\x1b[3m"
+    GRAY = "\x1b[90m"
+    BLUE = "\x1b[34m"
+    CYAN = "\x1b[36m"
+    GREEN = "\x1b[32m"
+    YELLOW = "\x1b[33m"
+    RED = "\x1b[31m"
+    MAGENTA = "\x1b[35m"
+
+
+def _ts():
+    return time.strftime("%H:%M:%S")
+
+
+def info(msg):
+    print(f"[{_ts()}] {_C.BLUE}INFO{_C.RESET}  {msg}")
+
+
+def step(msg):
+    print(f"[{_ts()}] {_C.CYAN}STEP{_C.RESET}  {msg}")
+
+
+def ok(msg):
+    print(f"[{_ts()}] {_C.GREEN}OK{_C.RESET}    {msg}")
+
+
+def warn(msg):
+    print(f"[{_ts()}] {_C.YELLOW}WARN{_C.RESET}  {msg}")
+
+
+def err(msg):
+    print(f"[{_ts()}] {_C.RED}ERR{_C.RESET}   {msg}")
+
+
+def dim(msg):
+    print(f"{_C.DIM}{msg}{_C.RESET}")

From cb91ee82491a94b3b7671dc59c19a6bd7b30bd66 Mon Sep 17 00:00:00 2001
From: arcticfly <davidlcorbitt@gmail.com>
Date: Tue, 26 Aug 2025 12:41:09 -0700
Subject: [PATCH 2/5] Add to_tool_schema conversion

---
 src/art/mcp/types.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/art/mcp/types.py b/src/art/mcp/types.py
index d30ad5f74..b78d0e9b9 100644
--- a/src/art/mcp/types.py
+++ b/src/art/mcp/types.py
@@ -34,6 +34,13 @@ def to_dict(self) -> Dict[str, Any]:
             "parameters": self.parameters,
         }
 
+    def to_tool_schema(self) -> Dict[str, Any]:
+        """Convert the tool to a tool schema."""
+        return {
+            "type": "function",
+            "function": self.to_dict(),
+        }
+
 
 @dataclass
 class MCPResource:

From e69d3baa408066419142865312897363cbbf3b7d Mon Sep 17 00:00:00 2001
From: arcticfly <davidlcorbitt@gmail.com>
Date: Tue, 26 Aug 2025 12:46:41 -0700
Subject: [PATCH 3/5] Fix lint

---
 dev/demo_logging.py               | 243 ++++++++++++++++++++++++++++++
 src/art/mcp/generate_scenarios.py |   2 +-
 2 files changed, 244 insertions(+), 1 deletion(-)
 create mode 100644 dev/demo_logging.py

diff --git a/dev/demo_logging.py b/dev/demo_logging.py
new file mode 100644
index 000000000..725dd2332
--- /dev/null
+++ b/dev/demo_logging.py
@@ -0,0 +1,243 @@
+#!/usr/bin/env python3
+"""Demo of all logging functionality from art.utils.logging."""
+
+import time
+
+from art.utils.logging import _C, _ts, dim, err, info, ok, step, warn
+
+
+def demo_basic_logging():
+    """Demonstrate the basic logging functions."""
+    print("=" * 60)
+    print("BASIC LOGGING FUNCTIONS")
+    print("=" * 60)
+
+    info("This is an informational message")
+    step("This indicates a step in a process")
+    ok("This indicates successful completion")
+    warn("This is a warning message")
+    err("This is an error message")
+    dim("This is dimmed/secondary text")
+
+    print()
+
+
+def demo_color_codes():
+    """Demonstrate the color code constants."""
+    print("=" * 60)
+    print("COLOR CODE CONSTANTS (_C class)")
+    print("=" * 60)
+
+    print("Available color constants:")
+    print(f"{_C.RESET}RESET{_C.RESET} - Reset all formatting")
+    print(f"{_C.DIM}DIM{_C.RESET} - Dimmed text")
+    print(f"{_C.BOLD}BOLD{_C.RESET} - Bold text")
+    print(f"{_C.ITAL}ITAL{_C.RESET} - Italic text")
+    print(f"{_C.GRAY}GRAY{_C.RESET} - Gray color")
+    print(f"{_C.BLUE}BLUE{_C.RESET} - Blue color")
+    print(f"{_C.CYAN}CYAN{_C.RESET} - Cyan color")
+    print(f"{_C.GREEN}GREEN{_C.RESET} - Green color")
+    print(f"{_C.YELLOW}YELLOW{_C.RESET} - Yellow color")
+    print(f"{_C.RED}RED{_C.RESET} - Red color")
+    print(f"{_C.MAGENTA}MAGENTA{_C.RESET} - Magenta color")
+
+    print("\nCustom formatted messages:")
+    print(f"{_C.BOLD}{_C.BLUE}Bold Blue Text{_C.RESET}")
+    print(f"{_C.ITAL}{_C.GREEN}Italic Green Text{_C.RESET}")
+    print(f"{_C.DIM}{_C.GRAY}Dimmed Gray Text{_C.RESET}")
+
+    print()
+
+
+def demo_timestamp():
+    """Demonstrate the timestamp function."""
+    print("=" * 60)
+    print("TIMESTAMP FUNCTION (_ts)")
+    print("=" * 60)
+
+    print(f"Current timestamp: {_ts()}")
+    print(f"Timestamp format: HH:MM:SS")
+    print(f"Example with custom message: [{_ts()}] Custom log message")
+
+    print()
+
+
+def demo_real_world_usage():
+    """Demonstrate real-world usage scenarios."""
+    print("=" * 60)
+    print("REAL-WORLD USAGE SCENARIOS")
+    print("=" * 60)
+
+    # Simulating a process with multiple steps
+    info("Starting data processing pipeline")
+
+    step("Loading configuration file")
+    time.sleep(0.5)  # Simulate work
+    ok("Configuration loaded successfully")
+
+    step("Connecting to database")
+    time.sleep(0.3)  # Simulate work
+    ok("Database connection established")
+
+    step("Processing 1000 records")
+    time.sleep(0.7)  # Simulate work
+    warn("Skipped 2 invalid records")
+    ok("Processed 998/1000 records successfully")
+
+    step("Generating report")
+    time.sleep(0.4)  # Simulate work
+    ok("Report generated successfully")
+
+    info("Pipeline completed")
+    dim("   Total time: 2.1 seconds")
+    dim("   Records processed: 998")
+    dim("   Records skipped: 2")
+
+    print()
+
+
+def demo_progress_tracking():
+    """Demonstrate progress tracking with logging."""
+    print("=" * 60)
+    print("PROGRESS TRACKING EXAMPLE")
+    print("=" * 60)
+
+    total_items = 5
+    info(f"Processing {total_items} items")
+
+    for i in range(1, total_items + 1):
+        step(f"Processing item {i}/{total_items}")
+        time.sleep(0.2)  # Simulate work
+
+        if i == 3:
+            warn(f"Item {i} required additional validation")
+
+        ok(f"Item {i} completed")
+        dim(f"   Progress: {i}/{total_items} ({i / total_items * 100:.0f}%)")
+
+    ok("All items processed successfully")
+
+    print()
+
+
+def demo_error_scenarios():
+    """Demonstrate error reporting scenarios."""
+    print("=" * 60)
+    print("ERROR REPORTING SCENARIOS")
+    print("=" * 60)
+
+    info("Testing error handling scenarios")
+
+    step("Attempting risky operation 1")
+    warn("Operation completed with warnings")
+    dim("   Warning: Deprecated API used")
+
+    step("Attempting risky operation 2")
+    err("Operation failed with error")
+    dim("   Error: File not found: /path/to/missing/file.txt")
+    dim("   Suggestion: Check file path and permissions")
+
+    step("Attempting recovery")
+    ok("Successfully recovered using fallback method")
+
+    print()
+
+
+def demo_formatting_combinations():
+    """Demonstrate various formatting combinations."""
+    print("=" * 60)
+    print("ADVANCED FORMATTING COMBINATIONS")
+    print("=" * 60)
+
+    # Combining colors and styles
+    print("Style combinations:")
+    print(f"{_C.BOLD}{_C.RED}Bold Red Error{_C.RESET}")
+    print(f"{_C.BOLD}{_C.GREEN}Bold Green Success{_C.RESET}")
+    print(f"{_C.BOLD}{_C.YELLOW}Bold Yellow Warning{_C.RESET}")
+    print(f"{_C.ITAL}{_C.BLUE}Italic Blue Info{_C.RESET}")
+    print(f"{_C.DIM}{_C.GRAY}Dimmed Gray Details{_C.RESET}")
+
+    print("\nNested formatting:")
+    print(
+        f"Regular text with {_C.BOLD}bold{_C.RESET} and {_C.ITAL}italic{_C.RESET} sections"
+    )
+    print(
+        f"{_C.BLUE}Blue text with {_C.BOLD}bold section{_C.RESET}{_C.BLUE} continuing in blue{_C.RESET}"
+    )
+
+    print("\nStatus indicators:")
+    print(f"[{_C.GREEN}{_C.RESET}] Success indicator")
+    print(f"[{_C.YELLOW}!{_C.RESET}] Warning indicator")
+    print(f"[{_C.RED}{_C.RESET}] Error indicator")
+    print(f"[{_C.BLUE}i{_C.RESET}] Info indicator")
+
+    print()
+
+
+def demo_log_levels():
+    """Demonstrate different log levels in action."""
+    print("=" * 60)
+    print("LOG LEVELS DEMONSTRATION")
+    print("=" * 60)
+
+    print("Simulating application startup:")
+    info("Application starting up")
+    step("Initializing modules")
+    ok("Core modules loaded")
+    step("Starting services")
+    warn("Service A started with reduced performance mode")
+    ok("Service B started normally")
+    err("Service C failed to start")
+    dim("   Fallback: Using Service D instead")
+    ok("Service D started successfully")
+    info("Application startup complete")
+
+    print("\nSimulating application shutdown:")
+    info("Shutting down application")
+    step("Stopping services")
+    ok("All services stopped cleanly")
+    step("Cleaning up resources")
+    ok("Resources cleaned up")
+    info("Application shutdown complete")
+
+    print()
+
+
+def main():
+    """Run all logging demonstrations."""
+    print(f"{_C.BOLD}{_C.CYAN}ART Logging System Demo{_C.RESET}")
+    print(f"Timestamp: {_ts()}")
+    print()
+
+    # Run all demonstrations
+    demo_basic_logging()
+    demo_color_codes()
+    demo_timestamp()
+    demo_real_world_usage()
+    demo_progress_tracking()
+    demo_error_scenarios()
+    demo_formatting_combinations()
+    demo_log_levels()
+
+    # Final summary
+    print("=" * 60)
+    print("DEMO COMPLETE")
+    print("=" * 60)
+    ok("All logging functionality demonstrated successfully")
+    info("Available functions: info(), step(), ok(), warn(), err(), dim()")
+    info("Available constants: _C class with color codes, _ts() for timestamps")
+    dim("   For more details, see: src/art/utils/logging.py")
+
+    print(f"\n{_C.BOLD}Usage Examples:{_C.RESET}")
+    print("from art.utils.logging import info, step, ok, warn, err, dim, _C")
+    print("info('Starting process')")
+    print("step('Processing data')")
+    print("ok('Process completed')")
+    print("warn('Performance degraded')")
+    print("err('Operation failed')")
+    print("dim('Additional details')")
+    print(f"print(f'{_C.BOLD}Bold text{_C.RESET}')")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/art/mcp/generate_scenarios.py b/src/art/mcp/generate_scenarios.py
index 0a5233aa4..65ac2959b 100644
--- a/src/art/mcp/generate_scenarios.py
+++ b/src/art/mcp/generate_scenarios.py
@@ -65,7 +65,7 @@ async def generate_scenarios(
     # Validate that we have at least tools or resources
     if not tools and not resources:
         raise ValueError("At least one tool or resource must be provided")
-    
+
     ok(f"Using model: {generator_model}")
 
     # Convert tools to dictionaries

From 79ea00ec00c7688e55a0318347fcda4ebff96075 Mon Sep 17 00:00:00 2001
From: arcticfly <davidlcorbitt@gmail.com>
Date: Tue, 26 Aug 2025 13:33:28 -0700
Subject: [PATCH 4/5] Fix types

---
 src/art/mcp/generate_scenarios.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/art/mcp/generate_scenarios.py b/src/art/mcp/generate_scenarios.py
index 65ac2959b..df92ea3c4 100644
--- a/src/art/mcp/generate_scenarios.py
+++ b/src/art/mcp/generate_scenarios.py
@@ -70,14 +70,20 @@ async def generate_scenarios(
 
     # Convert tools to dictionaries
     if isinstance(tools, list) and tools and isinstance(tools[0], MCPTool):
-        tools_info = [tool.to_dict() for tool in tools]
+        tools_info = [tool.to_dict() for tool in tools]  # type: ignore
     else:
         # Assume it's already a list of dictionaries
         tools_info = [
             {
-                "name": tool.get("name", ""),
-                "description": tool.get("description", ""),
-                "parameters": tool.get("parameters", {}),
+                "name": tool.get("name", "")
+                if isinstance(tool, dict)
+                else getattr(tool, "name", ""),
+                "description": tool.get("description", "")
+                if isinstance(tool, dict)
+                else getattr(tool, "description", ""),
+                "parameters": tool.get("parameters", {})
+                if isinstance(tool, dict)
+                else getattr(tool, "parameters", {}),
             }
             for tool in tools
         ]
@@ -90,7 +96,7 @@ async def generate_scenarios(
         and resources
         and isinstance(resources[0], MCPResource)
     ):
-        resources_info = [resource.to_dict() for resource in resources]
+        resources_info = [resource.to_dict() for resource in resources]  # type: ignore
     else:
         # Assume it's already a list of dictionaries
         resources_info = resources or []
@@ -173,6 +179,9 @@ async def generate_scenarios(
     ok(f"Model responded in {dt:.2f}s.")
 
     content = response.choices[0].message.content
+    if content is None:
+        err("Model response content is None.")
+        raise ValueError("Model response content is None")
     info(f"Raw content length: {len(content)} chars.")
 
     # Parse JSON
@@ -182,7 +191,7 @@ async def generate_scenarios(
         err("Failed to parse JSON from model response.")
         dim(f"   Exception: {e}")
         dim("   First 500 chars of response content:")
-        dim(content[:500])
+        dim(content[:500] if content else "No content")
         raise
 
     # Extract scenarios

From b3f7c60fd619b3b8bb972485bc60e63c56b7ebcf Mon Sep 17 00:00:00 2001
From: arcticfly <davidlcorbitt@gmail.com>
Date: Tue, 26 Aug 2025 17:19:05 -0700
Subject: [PATCH 5/5] Release 0.4.10

---
 pyproject.toml | 2 +-
 uv.lock        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index b3dfad5d3..ba372594d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "openpipe-art"
-version = "0.4.9"
+version = "0.4.10"
 description = "The OpenPipe Agent Reinforcement Training (ART) library"
 readme = "README.md"
 requires-python = ">=3.10"
diff --git a/uv.lock b/uv.lock
index 2f30532a6..bdf0a25de 100644
--- a/uv.lock
+++ b/uv.lock
@@ -4026,7 +4026,7 @@ wheels = [
 
 [[package]]
 name = "openpipe-art"
-version = "0.4.9"
+version = "0.4.10"
 source = { editable = "." }
 dependencies = [
     { name = "litellm" },