From a0275feaee194cba79484fa731a71f0354e3c790 Mon Sep 17 00:00:00 2001 From: arcticfly Date: Tue, 26 Aug 2025 12:35:47 -0700 Subject: [PATCH 1/5] Add art.mcp package --- examples/mcp-rl/test_scenario_generation.py | 364 ++++++++++++++++++++ examples/mcp-rl/uv.lock | 35 +- src/art/mcp/__init__.py | 19 + src/art/mcp/default_tools.py | 16 + src/art/mcp/generate_scenarios.py | 213 ++++++++++++ src/art/mcp/types.py | 201 +++++++++++ src/art/utils/logging.py | 44 +++ 7 files changed, 880 insertions(+), 12 deletions(-) create mode 100644 examples/mcp-rl/test_scenario_generation.py create mode 100644 src/art/mcp/__init__.py create mode 100644 src/art/mcp/default_tools.py create mode 100644 src/art/mcp/generate_scenarios.py create mode 100644 src/art/mcp/types.py create mode 100644 src/art/utils/logging.py diff --git a/examples/mcp-rl/test_scenario_generation.py b/examples/mcp-rl/test_scenario_generation.py new file mode 100644 index 000000000..40b826ceb --- /dev/null +++ b/examples/mcp-rl/test_scenario_generation.py @@ -0,0 +1,364 @@ +#!/usr/bin/env python3 +"""Test scenario generation functionality.""" + +import asyncio +import os +from typing import List + +from dotenv import load_dotenv + +from art.mcp import MCPResource, MCPTool, generate_scenarios + +load_dotenv() + + +def create_sample_tools() -> List[MCPTool]: + """Create sample tools for testing.""" + return [ + MCPTool( + name="search_files", + description="Search for files by name or content pattern", + parameters={ + "type": "object", + "properties": { + "query": {"type": "string", "description": "Search query"}, + "file_type": { + "type": "string", + "enum": ["txt", "py", "json"], + "description": "File type filter", + }, + }, + "required": ["query"], + }, + ), + MCPTool( + name="read_file", + description="Read the contents of a specific file", + parameters={ + "type": "object", + "properties": { + "file_path": { + "type": "string", + "description": "Path to the file to read", + } + }, + "required": ["file_path"], + }, + ), + MCPTool( + name="analyze_code", + description="Analyze code quality and suggest improvements", + parameters={ + "type": "object", + "properties": { + "code": {"type": "string", "description": "Code to analyze"}, + "language": { + "type": "string", + "description": "Programming language", + }, + }, + "required": ["code"], + }, + ), + MCPTool( + name="execute_command", + description="Execute a shell command and return the output", + parameters={ + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "Shell command to execute", + }, + "timeout": { + "type": "integer", + "description": "Timeout in seconds", + "default": 30, + }, + }, + "required": ["command"], + }, + ), + ] + + +def create_sample_resources() -> List[MCPResource]: + """Create sample resources for testing.""" + return [ + MCPResource( + uri="file://docs/api.md", + name="API Documentation", + description="Complete API documentation with examples", + mime_type="text/markdown", + ), + MCPResource( + uri="file://src/main.py", + name="Main Application", + description="Primary application entry point", + mime_type="text/x-python", + ), + MCPResource( + uri="file://config.json", + name="Configuration File", + description="Application configuration settings", + mime_type="application/json", + ), + ] + + +async def test_basic_scenario_generation(): + """Test basic scenario generation with tools only.""" + print("[TEST] Testing basic scenario generation...") + + tools = create_sample_tools() + + try: + scenarios = await generate_scenarios( + tools=tools, + num_scenarios=5, + show_preview=True, + generator_model="openai/gpt-4o-mini", # Use a cheaper model for testing + ) + + print(f"[PASS] Generated {len(scenarios)} scenarios successfully") + print(f"[INFO] Summary: {scenarios.get_summary()}") + + # Test collection methods + print("\n[TEST] Testing collection methods...") + + # Test difficulty filtering + easy_scenarios = scenarios.filter_by_difficulty(max_difficulty=2) + print(f"[INFO] Easy scenarios (<=2): {len(easy_scenarios)}") + + # Test shuffling and splitting + shuffled = scenarios.shuffle() + if len(scenarios) >= 3: + train, val = shuffled.split(train_size=3) + print(f"[INFO] Train/Val split: {len(train)}/{len(val)}") + + # Test JSON serialization + json_str = scenarios.to_json(indent=2) + print(f"[INFO] JSON export: {len(json_str)} characters") + + return True + + except Exception as e: + print(f"[FAIL] Basic test failed: {e}") + return False + + +async def test_scenario_generation_with_resources(): + """Test scenario generation with both tools and resources.""" + print("\n[TEST] Testing scenario generation with resources...") + + tools = create_sample_tools() + resources = create_sample_resources() + + try: + scenarios = await generate_scenarios( + tools=tools, + resources=resources, + num_scenarios=3, + show_preview=True, + custom_instructions="Focus on file management and code analysis tasks.", + generator_model="openai/gpt-4o-mini", + ) + + print(f"[PASS] Generated {len(scenarios)} scenarios with resources") + + # Verify scenarios reference the available tools/resources appropriately + for i, scenario in enumerate(scenarios): + print( + f"[INFO] Scenario {i + 1} (Difficulty {scenario.difficulty}): {scenario.preview(80)}" + ) + + return True + + except Exception as e: + print(f"[FAIL] Resources test failed: {e}") + return False + + +async def test_dict_input_compatibility(): + """Test backward compatibility with dictionary inputs.""" + print("\n[TEST] Testing dictionary input compatibility...") + + tools_dict = [ + { + "name": "get_weather", + "description": "Get current weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": {"type": "string", "description": "City name"} + }, + "required": ["location"], + }, + }, + { + "name": "send_email", + "description": "Send an email message", + "parameters": { + "type": "object", + "properties": { + "to": {"type": "string", "description": "Recipient email"}, + "subject": {"type": "string", "description": "Email subject"}, + "body": {"type": "string", "description": "Email body"}, + }, + "required": ["to", "subject", "body"], + }, + }, + ] + + resources_dict = [ + { + "uri": "database://users", + "name": "User Database", + "description": "User account information", + "mimeType": "application/sql", + } + ] + + try: + scenarios = await generate_scenarios( + tools=tools_dict, + resources=resources_dict, + num_scenarios=3, + show_preview=False, # Don't show preview to keep output clean + generator_model="openai/gpt-4o-mini", + ) + + print(f"[PASS] Dictionary input test passed: {len(scenarios)} scenarios") + return True + + except Exception as e: + print(f"[FAIL] Dictionary input test failed: {e}") + return False + + +async def test_error_handling(): + """Test error handling scenarios.""" + print("\n[TEST] Testing error handling...") + + # Test with empty tools list + try: + await generate_scenarios( + tools=[], + num_scenarios=1, + show_preview=False, + generator_model="openai/gpt-4o-mini", + ) + print("[FAIL] Should have failed with empty tools list") + return False + except Exception as e: + print(f"[PASS] Correctly handled empty tools: {type(e).__name__}") + + # Test with invalid API key + tools = create_sample_tools()[:1] # Just one tool for speed + + try: + await generate_scenarios( + tools=tools, + num_scenarios=1, + show_preview=False, + generator_model="openai/gpt-4o-mini", + generator_api_key="invalid_key", + ) + print("[FAIL] Should have failed with invalid API key") + return False + except Exception as e: + print(f"[PASS] Correctly handled invalid API key: {type(e).__name__}") + + return True + + +def test_tool_resource_classes(): + """Test Tool and Resource class functionality.""" + print("\n[TEST] Testing Tool and Resource classes...") + + try: + # Test Tool class + tool_dict = { + "name": "test_tool", + "description": "A test tool", + "parameters": {"type": "object", "properties": {}}, + } + + tool = MCPTool.from_dict(tool_dict) + assert tool.name == "test_tool" + assert tool.to_dict() == tool_dict + print("[PASS] MCPTool class tests passed") + + # Test Resource class + resource_dict = { + "uri": "file://test.txt", + "name": "Test File", + "description": "A test file", + "mimeType": "text/plain", + } + + resource = MCPResource.from_dict(resource_dict) + assert resource.uri == "file://test.txt" + assert resource.mime_type == "text/plain" + + # Test alternative field name + resource_dict2 = resource_dict.copy() + resource_dict2["mime_type"] = resource_dict2.pop("mimeType") + resource2 = MCPResource.from_dict(resource_dict2) + assert resource2.mime_type == "text/plain" + + print("[PASS] MCPResource class tests passed") + return True + + except Exception as e: + print(f"[FAIL] Class tests failed: {e}") + return False + + +async def main(): + """Run all tests.""" + print("Starting MCP scenario generation tests...\n") + + # Check for API key + if not os.getenv("OPENROUTER_API_KEY"): + print("[WARN] OPENROUTER_API_KEY not set. Some tests may fail.") + print(" Set your API key: export OPENROUTER_API_KEY='your_key_here'") + print() + + test_results = [] + + # Run class tests (synchronous) + test_results.append(test_tool_resource_classes()) + + # Run async tests + if os.getenv("OPENROUTER_API_KEY"): + test_results.extend( + await asyncio.gather( + test_basic_scenario_generation(), + test_scenario_generation_with_resources(), + test_dict_input_compatibility(), + test_error_handling(), + return_exceptions=True, + ) + ) + else: + print("[SKIP] Skipping API-dependent tests (no API key)") + test_results.extend([True, True, True, True]) # Assume they would pass + + # Summary + passed = sum(1 for result in test_results if result is True) + total = len(test_results) + + print(f"\n[SUMMARY] Test Results: {passed}/{total} tests passed") + + if passed == total: + print("[SUCCESS] All tests passed!") + return 0 + else: + print("[FAILURE] Some tests failed") + return 1 + + +if __name__ == "__main__": + exit_code = asyncio.run(main()) + exit(exit_code) diff --git a/examples/mcp-rl/uv.lock b/examples/mcp-rl/uv.lock index 52ccf318b..6e2ba6e66 100644 --- a/examples/mcp-rl/uv.lock +++ b/examples/mcp-rl/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.10" resolution-markers = [ "python_full_version >= '3.13' and sys_platform == 'linux'", @@ -1890,7 +1890,7 @@ wheels = [ [[package]] name = "litellm" -version = "1.74.4" +version = "1.74.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -1905,9 +1905,9 @@ dependencies = [ { name = "tiktoken" }, { name = "tokenizers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/52/49/32f0e7052309f2757885737e7eb7ce6f5ea5b48fad455b10dfd21720f04e/litellm-1.74.4.tar.gz", hash = "sha256:ace3dd8c052b57b728a2dbd38e7061cf95e3506b13a58c61da39902f6ee4a6be", size = 9405133, upload-time = "2025-07-17T02:46:11.015Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c9/25/8253bbc904d69b61806fc76e6c9c11509b4270ac201eeff6e5f95a5f2d01/litellm-1.74.1.tar.gz", hash = "sha256:0e0c83356c33885dce379cd86d38a728e870dbaaf43ae50e9d0153e29c207a85", size = 9215296, upload-time = "2025-07-10T15:31:13.968Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/21/0c/88df53727c28c006b2fb36616f93a036cde7fb9e37f016f60f02422f52ae/litellm-1.74.4-py3-none-any.whl", hash = "sha256:28de09c9d4cdbe322402f94236ec8dbac9edc5356e2f3b628b9bab0fb39284e4", size = 8639543, upload-time = "2025-07-17T02:46:08.052Z" }, + { url = "https://files.pythonhosted.org/packages/b9/3e/440c4ea5088c2c251ea711930e7bb4b1021b091fb3cbf512ca426af16f1e/litellm-1.74.1-py3-none-any.whl", hash = "sha256:72fe93ad7310db872543b51cc3ec4b13d4b0e1d7e636f20cd3940544ce2fb020", size = 8564714, upload-time = "2025-07-10T15:31:11.106Z" }, ] [[package]] @@ -2350,7 +2350,7 @@ wheels = [ [[package]] name = "openpipe-art" -version = "0.4.4" +version = "0.4.9" source = { editable = "../../" } dependencies = [ { name = "litellm" }, @@ -2371,11 +2371,17 @@ requires-dist = [ { name = "awscli", marker = "extra == 'backend'", specifier = ">=1.38.1" }, { name = "bitsandbytes", marker = "extra == 'backend'", specifier = ">=0.45.2" }, { name = "hf-xet", marker = "extra == 'backend'", specifier = ">=1.1.0" }, - { name = "litellm", specifier = ">=1.63.0" }, + { name = "langchain-core", marker = "extra == 'langgraph'", specifier = ">=0.3.51" }, + { name = "langchain-openai", marker = "extra == 'langgraph'", specifier = ">=0.3.27" }, + { name = "langgraph", marker = "extra == 'langgraph'", specifier = ">=0.6.2" }, + { name = "litellm", specifier = "==1.74.1" }, { name = "matplotlib", marker = "extra == 'plotting'", specifier = ">=3.10.1" }, - { name = "openai", specifier = ">=1.65.5" }, + { name = "nbclient", marker = "extra == 'backend'", specifier = ">=0.10.1" }, + { name = "nbmake", marker = "extra == 'backend'", specifier = ">=1.5.5" }, + { name = "openai", specifier = ">=1.65.5,<=1.99.1" }, { name = "peft", marker = "extra == 'backend'", specifier = ">=0.14.0" }, { name = "polars", marker = "extra == 'backend'", specifier = ">=1.26.0" }, + { name = "pytest", marker = "extra == 'backend'", specifier = ">=8.4.1" }, { name = "seaborn", marker = "extra == 'plotting'", specifier = ">=0.13.2" }, { name = "semver", marker = "extra == 'skypilot'", specifier = ">=3.0.4" }, { name = "setproctitle", marker = "extra == 'backend'", specifier = ">=1.3.6" }, @@ -2389,13 +2395,13 @@ requires-dist = [ { name = "trl", marker = "extra == 'backend'", specifier = "==0.20.0" }, { name = "trl", marker = "extra == 'backend'", specifier = ">=0.19.0" }, { name = "typer", specifier = ">=0.15.2" }, - { name = "unsloth", marker = "extra == 'backend'", specifier = "==2025.8.1" }, - { name = "unsloth-zoo", marker = "extra == 'backend'", git = "https://github.com/bradhilton/unsloth-zoo" }, - { name = "vllm", marker = "extra == 'backend'", specifier = "==0.9.1" }, - { name = "wandb", marker = "extra == 'backend'", specifier = ">=0.19.8" }, + { name = "unsloth", marker = "extra == 'backend'", specifier = "==2025.8.6" }, + { name = "unsloth-zoo", marker = "extra == 'backend'", specifier = "==2025.8.5" }, + { name = "vllm", marker = "extra == 'backend'", specifier = ">=0.9.2,<=0.10.0" }, + { name = "wandb", marker = "extra == 'backend'", specifier = "==0.21.0" }, { name = "weave", specifier = ">=0.51.51" }, ] -provides-extras = ["plotting", "backend", "skypilot"] +provides-extras = ["plotting", "backend", "skypilot", "langgraph"] [package.metadata.requires-dev] dev = [ @@ -2403,7 +2409,12 @@ dev = [ { name = "hatch", specifier = ">=1.14.1" }, { name = "ipykernel", specifier = ">=6.29.5" }, { name = "ipywidgets", specifier = ">=8.1.5" }, + { name = "nbval", specifier = ">=0.11.0" }, { name = "openpipe", specifier = ">=4.49.0" }, + { name = "pyright", extras = ["nodejs"], specifier = ">=1.1.403" }, + { name = "pytest", specifier = ">=8.4.1" }, + { name = "pytest-asyncio", specifier = ">=1.1.0" }, + { name = "pytest-xdist", specifier = ">=3.8.0" }, { name = "ruff", specifier = ">=0.12.1" }, ] diff --git a/src/art/mcp/__init__.py b/src/art/mcp/__init__.py new file mode 100644 index 000000000..cb6021ec3 --- /dev/null +++ b/src/art/mcp/__init__.py @@ -0,0 +1,19 @@ +"""MCP utilities for Agent Reinforcement Training.""" + +from .default_tools import complete_task_tool +from .generate_scenarios import generate_scenarios +from .types import ( + GeneratedScenario, + GeneratedScenarioCollection, + MCPResource, + MCPTool, +) + +__all__ = [ + "MCPResource", + "MCPTool", + "GeneratedScenario", + "GeneratedScenarioCollection", + "complete_task_tool", + "generate_scenarios", +] diff --git a/src/art/mcp/default_tools.py b/src/art/mcp/default_tools.py new file mode 100644 index 000000000..9f11e3ee1 --- /dev/null +++ b/src/art/mcp/default_tools.py @@ -0,0 +1,16 @@ +from art.mcp.types import MCPTool + +complete_task_tool = MCPTool( + name="complete_task", + description="Complete a task", + parameters={ + "type": "object", + "properties": { + "summary": { + "type": "string", + "description": "Summary of accomplishments", + } + }, + "required": ["summary"], + }, +) diff --git a/src/art/mcp/generate_scenarios.py b/src/art/mcp/generate_scenarios.py new file mode 100644 index 000000000..0a5233aa4 --- /dev/null +++ b/src/art/mcp/generate_scenarios.py @@ -0,0 +1,213 @@ +"""Scenario generation for MCP tools.""" + +import json +import time +from typing import Any, Dict, List, Optional + +import openai + +from art.mcp.types import GeneratedScenarioCollection, MCPResource, MCPTool +from art.utils.logging import _C, dim, err, info, ok, step + + +def preview_scenarios(scenarios: List[Dict[str, Any]], n: int = 5): + """Preview generated scenarios.""" + n = min(n, len(scenarios)) + for i in range(n): + s = scenarios[i] + task_preview = s["task"][:120].strip() + ellipsis = "&" if len(s["task"]) > 120 else "" + difficulty = s.get("difficulty", "N/A") + dim( + f" {i + 1}. {task_preview}{ellipsis} " + f"{_C.GRAY}(difficulty {difficulty}/5){_C.RESET}" + ) + + +async def generate_scenarios( + tools: List[MCPTool] | List[Dict[str, Any]], + resources: List[MCPResource] | List[Dict[str, Any]] = [], + num_scenarios: int = 24, + show_preview: bool = True, + custom_instructions: Optional[str] = None, + generator_model: str = "openai/gpt-4.1-mini", + generator_api_key: Optional[str] = None, + generator_base_url: str = "https://openrouter.ai/api/v1", +) -> GeneratedScenarioCollection: + """ + Generate scenarios for MCP tools. + + Args: + tools: List of Tool objects or list of tool dictionaries + resources: Optional list of Resource objects or list of resource dictionaries + num_scenarios: Number of scenarios to generate (default: 24) + show_preview: Whether to show a preview of generated scenarios (default: True) + custom_instructions: Optional custom instructions for scenario generation + generator_model: Model to use for generation (default: "openai/gpt-4.1-mini") + generator_api_key: API key for the generator model. If None, will use OPENROUTER_API_KEY env var + generator_base_url: Base URL for the API (default: OpenRouter) + + Returns: + GeneratedScenarioCollection containing the generated scenarios + """ + import os + + t0 = time.perf_counter() + + # Handle API key + if generator_api_key is None: + generator_api_key = os.getenv("OPENROUTER_API_KEY") + if not generator_api_key: + raise ValueError( + "generator_api_key is required or OPENROUTER_API_KEY env var must be set" + ) + + # Validate that we have at least tools or resources + if not tools and not resources: + raise ValueError("At least one tool or resource must be provided") + + ok(f"Using model: {generator_model}") + + # Convert tools to dictionaries + if isinstance(tools, list) and tools and isinstance(tools[0], MCPTool): + tools_info = [tool.to_dict() for tool in tools] + else: + # Assume it's already a list of dictionaries + tools_info = [ + { + "name": tool.get("name", ""), + "description": tool.get("description", ""), + "parameters": tool.get("parameters", {}), + } + for tool in tools + ] + + # Convert resources to dictionaries + if resources is None: + resources_info = [] + elif ( + isinstance(resources, list) + and resources + and isinstance(resources[0], MCPResource) + ): + resources_info = [resource.to_dict() for resource in resources] + else: + # Assume it's already a list of dictionaries + resources_info = resources or [] + + info(f"Available: {len(tools_info)} tool(s), {len(resources_info)} resource(s).") + + step("Preparing prompt & JSON schema &") + tools_description = json.dumps(tools_info, indent=2) + resources_description = ( + json.dumps(resources_info, indent=2) + if resources_info + else "No resources available" + ) + + prompt = f"""You are an expert at creating realistic scenarios for testing AI agents that interact with MCP (Model Context Protocol) servers. + +Given the following available tools and resources from an MCP server, generate {num_scenarios} diverse, realistic scenarios that a user might want to accomplish using these tools. + +AVAILABLE TOOLS: +{tools_description} + +AVAILABLE RESOURCES: +{resources_description} + +Requirements for scenarios: +1. Each scenario should be a task that can be accomplished using the available tools +2. Scenarios should vary in complexity - some simple (1-2 tool calls), some complex (multiple tool calls) +3. Scenarios should cover different use cases and tool combinations (though the task should not specify which tools to use) +4. Each scenario should be realistic - something a real user might actually want to do +5. Assign a difficulty rating from 1 (easy, single tool call) to 5 (hard, complex multi-step analysis) +6. The task should always include generating a summary of the work done and a thorough analysis and report of the results + +You must respond with a JSON object containing a "scenarios" array of exactly {num_scenarios} objects. Each object must have: +- "task": string describing the scenario +- "difficulty": integer from 1-5 representing complexity +""" + + if custom_instructions: + prompt += f"\n\nPay close attention to the following instructions when generating scenarios:\n\n{custom_instructions}" + + response_schema = { + "type": "object", + "properties": { + "scenarios": { + "type": "array", + "items": { + "type": "object", + "properties": { + "task": {"type": "string"}, + "difficulty": {"type": "integer", "minimum": 1, "maximum": 5}, + }, + "required": ["task", "difficulty"], + "additionalProperties": False, + }, + "minItems": num_scenarios, + "maxItems": num_scenarios, + } + }, + "required": ["scenarios"], + "additionalProperties": False, + } + + step(f"Calling model: {_C.BOLD}{generator_model}{_C.RESET} &") + client_openai = openai.OpenAI( + api_key=generator_api_key, + base_url=generator_base_url, + ) + + t1 = time.perf_counter() + response = client_openai.chat.completions.create( + model=generator_model, + messages=[{"role": "user", "content": prompt}], + max_completion_tokens=8000, + response_format={ + "type": "json_schema", + "json_schema": {"name": "scenario_list", "schema": response_schema}, + }, + ) + dt = time.perf_counter() - t1 + ok(f"Model responded in {dt:.2f}s.") + + content = response.choices[0].message.content + info(f"Raw content length: {len(content)} chars.") + + # Parse JSON + try: + result = json.loads(content) + except Exception as e: + err("Failed to parse JSON from model response.") + dim(f" Exception: {e}") + dim(" First 500 chars of response content:") + dim(content[:500]) + raise + + # Extract scenarios + if "scenarios" in result: + scenarios = result["scenarios"] + else: + scenarios = result if isinstance(result, list) else list(result.values())[0] + + # Validate count + if len(scenarios) != num_scenarios: + err(f"Expected {num_scenarios} scenarios, got {len(scenarios)}.") + raise ValueError(f"Expected {num_scenarios} scenarios, got {len(scenarios)}") + + ok(f"Parsed {len(scenarios)} scenario(s) successfully.") + + # Convert to ScenarioCollection + scenario_collection = GeneratedScenarioCollection.from_dicts(scenarios) + + # Show difficulty distribution and preview using the collection methods + scenario_collection.print_difficulty_distribution() + + if show_preview: + scenario_collection.preview(n=min(5, num_scenarios)) + + total_time = time.perf_counter() - t0 + ok(f"Generated {len(scenario_collection)} scenarios in {total_time:.2f}s total.") + + return scenario_collection diff --git a/src/art/mcp/types.py b/src/art/mcp/types.py new file mode 100644 index 000000000..d30ad5f74 --- /dev/null +++ b/src/art/mcp/types.py @@ -0,0 +1,201 @@ +import json +import random +from collections import Counter +from dataclasses import dataclass +from typing import Any, Dict, List, Optional + +from openai.types.chat.chat_completion_tool import ChatCompletionTool + +from art.utils.logging import _C, dim, info + + +@dataclass +class MCPTool: + """Represents an MCP tool.""" + + name: str + description: str + parameters: Dict[str, Any] + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "MCPTool": + """Create a Tool from a dictionary.""" + return cls( + name=data.get("name", ""), + description=data.get("description", ""), + parameters=data.get("parameters", {}), + ) + + def to_dict(self) -> Dict[str, Any]: + """Convert the tool to a dictionary.""" + return { + "name": self.name, + "description": self.description, + "parameters": self.parameters, + } + + +@dataclass +class MCPResource: + """Represents an MCP resource.""" + + uri: str + name: str + description: str + mime_type: Optional[str] = None + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "MCPResource": + """Create a Resource from a dictionary.""" + return cls( + uri=data.get("uri", ""), + name=data.get("name", ""), + description=data.get("description", ""), + mime_type=data.get("mimeType") or data.get("mime_type"), + ) + + def to_dict(self) -> Dict[str, Any]: + """Convert the resource to a dictionary.""" + result = {"uri": self.uri, "name": self.name, "description": self.description} + if self.mime_type: + result["mimeType"] = self.mime_type + return result + + +@dataclass +class GeneratedScenario: + """A single scenario for testing AI agents.""" + + task: str + difficulty: int + + def __post_init__(self): + if not isinstance(self.difficulty, int) or not 1 <= self.difficulty <= 5: + raise ValueError("Difficulty must be an integer between 1 and 5") + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GeneratedScenario": + """Create a GeneratedScenario from a dictionary.""" + return cls(task=data["task"], difficulty=data["difficulty"]) + + def to_dict(self) -> Dict[str, Any]: + """Convert the scenario to a dictionary.""" + return {"task": self.task, "difficulty": self.difficulty} + + def preview(self, max_length: int = 120) -> str: + """Get a preview of the scenario task.""" + if len(self.task) <= max_length: + return self.task + return self.task[:max_length].strip() + "…" + + +class GeneratedScenarioCollection: + """A collection of scenarios with utilities for management and analysis.""" + + def __init__(self, scenarios: List[GeneratedScenario]): + self.scenarios = scenarios + + @classmethod + def from_dicts(cls, data: List[Dict[str, Any]]) -> "GeneratedScenarioCollection": + """Create a GeneratedScenarioCollection from a list of dictionaries.""" + scenarios = [GeneratedScenario.from_dict(item) for item in data] + return cls(scenarios) + + @classmethod + def from_json(cls, json_str: str) -> "GeneratedScenarioCollection": + """Create a GeneratedScenarioCollection from a JSON string.""" + data = json.loads(json_str) + if "scenarios" in data: + scenarios_data = data["scenarios"] + else: + scenarios_data = data if isinstance(data, list) else list(data.values())[0] + return cls.from_dicts(scenarios_data) + + def to_dicts(self) -> List[Dict[str, Any]]: + """Convert all scenarios to dictionaries.""" + return [scenario.to_dict() for scenario in self.scenarios] + + def to_json(self, indent: Optional[int] = None) -> str: + """Convert the collection to JSON.""" + return json.dumps({"scenarios": self.to_dicts()}, indent=indent) + + def __len__(self) -> int: + return len(self.scenarios) + + def __iter__(self): + return iter(self.scenarios) + + def __getitem__(self, index): + return self.scenarios[index] + + def shuffle(self) -> "GeneratedScenarioCollection": + """Return a new collection with shuffled scenarios.""" + shuffled = self.scenarios.copy() + random.shuffle(shuffled) + return GeneratedScenarioCollection(shuffled) + + def split( + self, train_size: int + ) -> tuple["GeneratedScenarioCollection", "GeneratedScenarioCollection"]: + """Split the collection into train and validation sets.""" + if train_size > len(self.scenarios): + raise ValueError( + f"train_size ({train_size}) cannot be larger than total scenarios ({len(self.scenarios)})" + ) + + train_scenarios = self.scenarios[:train_size] + val_scenarios = self.scenarios[train_size:] + + return GeneratedScenarioCollection( + train_scenarios + ), GeneratedScenarioCollection(val_scenarios) + + def filter_by_difficulty( + self, min_difficulty: int = 1, max_difficulty: int = 5 + ) -> "GeneratedScenarioCollection": + """Filter scenarios by difficulty range.""" + filtered = [ + scenario + for scenario in self.scenarios + if min_difficulty <= scenario.difficulty <= max_difficulty + ] + return GeneratedScenarioCollection(filtered) + + def get_difficulty_distribution(self) -> Counter: + """Get the distribution of difficulties.""" + return Counter(scenario.difficulty for scenario in self.scenarios) + + def preview(self, n: int = 5, max_task_length: int = 120) -> None: + """Preview the first n scenarios.""" + n = min(n, len(self.scenarios)) + for i in range(n): + scenario = self.scenarios[i] + preview_text = scenario.preview(max_task_length) + dim( + f" {i + 1}. {preview_text} " + f"{_C.GRAY}(difficulty {scenario.difficulty}/5){_C.RESET}" + ) + + def print_difficulty_distribution(self) -> None: + """Print a visual representation of the difficulty distribution.""" + diff_counts = self.get_difficulty_distribution() + info("Difficulty distribution:") + for d in range(1, 6): + cnt = diff_counts.get(d, 0) + bar = "█" * min(cnt, 30) + dim(f" {d}/5: {cnt:3d} {bar}") + + def get_summary(self) -> Dict[str, Any]: + """Get a summary of the scenario collection.""" + return { + "total_scenarios": len(self.scenarios), + "difficulty_distribution": dict(self.get_difficulty_distribution()), + "avg_difficulty": sum(s.difficulty for s in self.scenarios) + / len(self.scenarios) + if self.scenarios + else 0, + "avg_task_length": sum(len(s.task) for s in self.scenarios) + / len(self.scenarios) + if self.scenarios + else 0, + } diff --git a/src/art/utils/logging.py b/src/art/utils/logging.py new file mode 100644 index 000000000..2e84cdd28 --- /dev/null +++ b/src/art/utils/logging.py @@ -0,0 +1,44 @@ +import time + + +# ---------- lightweight "nice print" helpers ---------- +class _C: + RESET = "\x1b[0m" + DIM = "\x1b[2m" + BOLD = "\x1b[1m" + ITAL = "\x1b[3m" + GRAY = "\x1b[90m" + BLUE = "\x1b[34m" + CYAN = "\x1b[36m" + GREEN = "\x1b[32m" + YELLOW = "\x1b[33m" + RED = "\x1b[31m" + MAGENTA = "\x1b[35m" + + +def _ts(): + return time.strftime("%H:%M:%S") + + +def info(msg): + print(f"[{_ts()}] {_C.BLUE}INFO{_C.RESET} {msg}") + + +def step(msg): + print(f"[{_ts()}] {_C.CYAN}STEP{_C.RESET} {msg}") + + +def ok(msg): + print(f"[{_ts()}] {_C.GREEN}OK{_C.RESET} {msg}") + + +def warn(msg): + print(f"[{_ts()}] {_C.YELLOW}WARN{_C.RESET} {msg}") + + +def err(msg): + print(f"[{_ts()}] {_C.RED}ERR{_C.RESET} {msg}") + + +def dim(msg): + print(f"{_C.DIM}{msg}{_C.RESET}") From cb91ee82491a94b3b7671dc59c19a6bd7b30bd66 Mon Sep 17 00:00:00 2001 From: arcticfly Date: Tue, 26 Aug 2025 12:41:09 -0700 Subject: [PATCH 2/5] Add to_tool_schema conversion --- src/art/mcp/types.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/art/mcp/types.py b/src/art/mcp/types.py index d30ad5f74..b78d0e9b9 100644 --- a/src/art/mcp/types.py +++ b/src/art/mcp/types.py @@ -34,6 +34,13 @@ def to_dict(self) -> Dict[str, Any]: "parameters": self.parameters, } + def to_tool_schema(self) -> Dict[str, Any]: + """Convert the tool to a tool schema.""" + return { + "type": "function", + "function": self.to_dict(), + } + @dataclass class MCPResource: From e69d3baa408066419142865312897363cbbf3b7d Mon Sep 17 00:00:00 2001 From: arcticfly Date: Tue, 26 Aug 2025 12:46:41 -0700 Subject: [PATCH 3/5] Fix lint --- dev/demo_logging.py | 243 ++++++++++++++++++++++++++++++ src/art/mcp/generate_scenarios.py | 2 +- 2 files changed, 244 insertions(+), 1 deletion(-) create mode 100644 dev/demo_logging.py diff --git a/dev/demo_logging.py b/dev/demo_logging.py new file mode 100644 index 000000000..725dd2332 --- /dev/null +++ b/dev/demo_logging.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 +"""Demo of all logging functionality from art.utils.logging.""" + +import time + +from art.utils.logging import _C, _ts, dim, err, info, ok, step, warn + + +def demo_basic_logging(): + """Demonstrate the basic logging functions.""" + print("=" * 60) + print("BASIC LOGGING FUNCTIONS") + print("=" * 60) + + info("This is an informational message") + step("This indicates a step in a process") + ok("This indicates successful completion") + warn("This is a warning message") + err("This is an error message") + dim("This is dimmed/secondary text") + + print() + + +def demo_color_codes(): + """Demonstrate the color code constants.""" + print("=" * 60) + print("COLOR CODE CONSTANTS (_C class)") + print("=" * 60) + + print("Available color constants:") + print(f"{_C.RESET}RESET{_C.RESET} - Reset all formatting") + print(f"{_C.DIM}DIM{_C.RESET} - Dimmed text") + print(f"{_C.BOLD}BOLD{_C.RESET} - Bold text") + print(f"{_C.ITAL}ITAL{_C.RESET} - Italic text") + print(f"{_C.GRAY}GRAY{_C.RESET} - Gray color") + print(f"{_C.BLUE}BLUE{_C.RESET} - Blue color") + print(f"{_C.CYAN}CYAN{_C.RESET} - Cyan color") + print(f"{_C.GREEN}GREEN{_C.RESET} - Green color") + print(f"{_C.YELLOW}YELLOW{_C.RESET} - Yellow color") + print(f"{_C.RED}RED{_C.RESET} - Red color") + print(f"{_C.MAGENTA}MAGENTA{_C.RESET} - Magenta color") + + print("\nCustom formatted messages:") + print(f"{_C.BOLD}{_C.BLUE}Bold Blue Text{_C.RESET}") + print(f"{_C.ITAL}{_C.GREEN}Italic Green Text{_C.RESET}") + print(f"{_C.DIM}{_C.GRAY}Dimmed Gray Text{_C.RESET}") + + print() + + +def demo_timestamp(): + """Demonstrate the timestamp function.""" + print("=" * 60) + print("TIMESTAMP FUNCTION (_ts)") + print("=" * 60) + + print(f"Current timestamp: {_ts()}") + print(f"Timestamp format: HH:MM:SS") + print(f"Example with custom message: [{_ts()}] Custom log message") + + print() + + +def demo_real_world_usage(): + """Demonstrate real-world usage scenarios.""" + print("=" * 60) + print("REAL-WORLD USAGE SCENARIOS") + print("=" * 60) + + # Simulating a process with multiple steps + info("Starting data processing pipeline") + + step("Loading configuration file") + time.sleep(0.5) # Simulate work + ok("Configuration loaded successfully") + + step("Connecting to database") + time.sleep(0.3) # Simulate work + ok("Database connection established") + + step("Processing 1000 records") + time.sleep(0.7) # Simulate work + warn("Skipped 2 invalid records") + ok("Processed 998/1000 records successfully") + + step("Generating report") + time.sleep(0.4) # Simulate work + ok("Report generated successfully") + + info("Pipeline completed") + dim(" Total time: 2.1 seconds") + dim(" Records processed: 998") + dim(" Records skipped: 2") + + print() + + +def demo_progress_tracking(): + """Demonstrate progress tracking with logging.""" + print("=" * 60) + print("PROGRESS TRACKING EXAMPLE") + print("=" * 60) + + total_items = 5 + info(f"Processing {total_items} items") + + for i in range(1, total_items + 1): + step(f"Processing item {i}/{total_items}") + time.sleep(0.2) # Simulate work + + if i == 3: + warn(f"Item {i} required additional validation") + + ok(f"Item {i} completed") + dim(f" Progress: {i}/{total_items} ({i / total_items * 100:.0f}%)") + + ok("All items processed successfully") + + print() + + +def demo_error_scenarios(): + """Demonstrate error reporting scenarios.""" + print("=" * 60) + print("ERROR REPORTING SCENARIOS") + print("=" * 60) + + info("Testing error handling scenarios") + + step("Attempting risky operation 1") + warn("Operation completed with warnings") + dim(" Warning: Deprecated API used") + + step("Attempting risky operation 2") + err("Operation failed with error") + dim(" Error: File not found: /path/to/missing/file.txt") + dim(" Suggestion: Check file path and permissions") + + step("Attempting recovery") + ok("Successfully recovered using fallback method") + + print() + + +def demo_formatting_combinations(): + """Demonstrate various formatting combinations.""" + print("=" * 60) + print("ADVANCED FORMATTING COMBINATIONS") + print("=" * 60) + + # Combining colors and styles + print("Style combinations:") + print(f"{_C.BOLD}{_C.RED}Bold Red Error{_C.RESET}") + print(f"{_C.BOLD}{_C.GREEN}Bold Green Success{_C.RESET}") + print(f"{_C.BOLD}{_C.YELLOW}Bold Yellow Warning{_C.RESET}") + print(f"{_C.ITAL}{_C.BLUE}Italic Blue Info{_C.RESET}") + print(f"{_C.DIM}{_C.GRAY}Dimmed Gray Details{_C.RESET}") + + print("\nNested formatting:") + print( + f"Regular text with {_C.BOLD}bold{_C.RESET} and {_C.ITAL}italic{_C.RESET} sections" + ) + print( + f"{_C.BLUE}Blue text with {_C.BOLD}bold section{_C.RESET}{_C.BLUE} continuing in blue{_C.RESET}" + ) + + print("\nStatus indicators:") + print(f"[{_C.GREEN}{_C.RESET}] Success indicator") + print(f"[{_C.YELLOW}!{_C.RESET}] Warning indicator") + print(f"[{_C.RED}{_C.RESET}] Error indicator") + print(f"[{_C.BLUE}i{_C.RESET}] Info indicator") + + print() + + +def demo_log_levels(): + """Demonstrate different log levels in action.""" + print("=" * 60) + print("LOG LEVELS DEMONSTRATION") + print("=" * 60) + + print("Simulating application startup:") + info("Application starting up") + step("Initializing modules") + ok("Core modules loaded") + step("Starting services") + warn("Service A started with reduced performance mode") + ok("Service B started normally") + err("Service C failed to start") + dim(" Fallback: Using Service D instead") + ok("Service D started successfully") + info("Application startup complete") + + print("\nSimulating application shutdown:") + info("Shutting down application") + step("Stopping services") + ok("All services stopped cleanly") + step("Cleaning up resources") + ok("Resources cleaned up") + info("Application shutdown complete") + + print() + + +def main(): + """Run all logging demonstrations.""" + print(f"{_C.BOLD}{_C.CYAN}ART Logging System Demo{_C.RESET}") + print(f"Timestamp: {_ts()}") + print() + + # Run all demonstrations + demo_basic_logging() + demo_color_codes() + demo_timestamp() + demo_real_world_usage() + demo_progress_tracking() + demo_error_scenarios() + demo_formatting_combinations() + demo_log_levels() + + # Final summary + print("=" * 60) + print("DEMO COMPLETE") + print("=" * 60) + ok("All logging functionality demonstrated successfully") + info("Available functions: info(), step(), ok(), warn(), err(), dim()") + info("Available constants: _C class with color codes, _ts() for timestamps") + dim(" For more details, see: src/art/utils/logging.py") + + print(f"\n{_C.BOLD}Usage Examples:{_C.RESET}") + print("from art.utils.logging import info, step, ok, warn, err, dim, _C") + print("info('Starting process')") + print("step('Processing data')") + print("ok('Process completed')") + print("warn('Performance degraded')") + print("err('Operation failed')") + print("dim('Additional details')") + print(f"print(f'{_C.BOLD}Bold text{_C.RESET}')") + + +if __name__ == "__main__": + main() diff --git a/src/art/mcp/generate_scenarios.py b/src/art/mcp/generate_scenarios.py index 0a5233aa4..65ac2959b 100644 --- a/src/art/mcp/generate_scenarios.py +++ b/src/art/mcp/generate_scenarios.py @@ -65,7 +65,7 @@ async def generate_scenarios( # Validate that we have at least tools or resources if not tools and not resources: raise ValueError("At least one tool or resource must be provided") - + ok(f"Using model: {generator_model}") # Convert tools to dictionaries From 79ea00ec00c7688e55a0318347fcda4ebff96075 Mon Sep 17 00:00:00 2001 From: arcticfly Date: Tue, 26 Aug 2025 13:33:28 -0700 Subject: [PATCH 4/5] Fix types --- src/art/mcp/generate_scenarios.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/art/mcp/generate_scenarios.py b/src/art/mcp/generate_scenarios.py index 65ac2959b..df92ea3c4 100644 --- a/src/art/mcp/generate_scenarios.py +++ b/src/art/mcp/generate_scenarios.py @@ -70,14 +70,20 @@ async def generate_scenarios( # Convert tools to dictionaries if isinstance(tools, list) and tools and isinstance(tools[0], MCPTool): - tools_info = [tool.to_dict() for tool in tools] + tools_info = [tool.to_dict() for tool in tools] # type: ignore else: # Assume it's already a list of dictionaries tools_info = [ { - "name": tool.get("name", ""), - "description": tool.get("description", ""), - "parameters": tool.get("parameters", {}), + "name": tool.get("name", "") + if isinstance(tool, dict) + else getattr(tool, "name", ""), + "description": tool.get("description", "") + if isinstance(tool, dict) + else getattr(tool, "description", ""), + "parameters": tool.get("parameters", {}) + if isinstance(tool, dict) + else getattr(tool, "parameters", {}), } for tool in tools ] @@ -90,7 +96,7 @@ async def generate_scenarios( and resources and isinstance(resources[0], MCPResource) ): - resources_info = [resource.to_dict() for resource in resources] + resources_info = [resource.to_dict() for resource in resources] # type: ignore else: # Assume it's already a list of dictionaries resources_info = resources or [] @@ -173,6 +179,9 @@ async def generate_scenarios( ok(f"Model responded in {dt:.2f}s.") content = response.choices[0].message.content + if content is None: + err("Model response content is None.") + raise ValueError("Model response content is None") info(f"Raw content length: {len(content)} chars.") # Parse JSON @@ -182,7 +191,7 @@ async def generate_scenarios( err("Failed to parse JSON from model response.") dim(f" Exception: {e}") dim(" First 500 chars of response content:") - dim(content[:500]) + dim(content[:500] if content else "No content") raise # Extract scenarios From b3f7c60fd619b3b8bb972485bc60e63c56b7ebcf Mon Sep 17 00:00:00 2001 From: arcticfly Date: Tue, 26 Aug 2025 17:19:05 -0700 Subject: [PATCH 5/5] Release 0.4.10 --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b3dfad5d3..ba372594d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "openpipe-art" -version = "0.4.9" +version = "0.4.10" description = "The OpenPipe Agent Reinforcement Training (ART) library" readme = "README.md" requires-python = ">=3.10" diff --git a/uv.lock b/uv.lock index 2f30532a6..bdf0a25de 100644 --- a/uv.lock +++ b/uv.lock @@ -4026,7 +4026,7 @@ wheels = [ [[package]] name = "openpipe-art" -version = "0.4.9" +version = "0.4.10" source = { editable = "." } dependencies = [ { name = "litellm" },