From 3a89649cd5ec6950755d756c8f77c58347a1bf7e Mon Sep 17 00:00:00 2001
From: arcticfly <davidlcorbitt@gmail.com>
Date: Wed, 27 Aug 2025 16:42:30 -0700
Subject: [PATCH] =?UTF-8?q?Add=20MCP=E2=80=A2RL=20doc,=20remove=20GSPO?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/docs.json           |   7 +-
 docs/features/mcp-rl.mdx | 211 +++++++++++++++++++++++++++++++++++++++
 src/art/mcp/types.py     |   2 -
 3 files changed, 214 insertions(+), 6 deletions(-)
 create mode 100644 docs/features/mcp-rl.mdx

diff --git a/docs/docs.json b/docs/docs.json
index e53789dff..4abddbb01 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -64,7 +64,8 @@
         "group": "Features",
         "pages": [
           "features/checkpoint-forking",
-          "features/additional-histories"
+          "features/additional-histories",
+          "features/mcp-rl"
         ]
       },
       {
@@ -88,9 +89,7 @@
       },
       {
         "group": "Experimental",
-        "pages": [
-          "experimental/gspo"
-        ]
+        "pages": []
       }
     ]
   },
diff --git a/docs/features/mcp-rl.mdx b/docs/features/mcp-rl.mdx
new file mode 100644
index 000000000..30745fb23
--- /dev/null
+++ b/docs/features/mcp-rl.mdx
@@ -0,0 +1,211 @@
+---
+title: "MCP•RL: Training Agents to Use MCP Servers"
+sidebarTitle: "MCP Training"
+description: "Learn how to train language models to effectively use Model Context Protocol (MCP) servers using ART"
+---
+
+MCP•RL is a specialized application of ART that teaches language models to effectively use [Model Context Protocol (MCP) servers](https://modelcontextprotocol.io/). This approach enables you to train agents that can seamlessly interact with any MCP-compatible tool or service.
+
+## What is MCP•RL?
+
+MCP•RL combines two powerful technologies:
+
+- **Model Context Protocol (MCP)**: A standard for connecting AI assistants to external tools and data sources
+- **ART (Automated Reinforcement Training)**: OpenPipe's framework for training better AI agents using reinforcement learning
+
+The result is a training pipeline that can automatically teach any language model to use MCP servers effectively, without requiring manually labeled training data.
+
+## How MCP•RL Works
+
+The training process follows these key steps:
+
+### 1. **Server Discovery**
+
+```python
+# Query the MCP server to understand available tools
+tools_list = await mcp_client.list_tools()
+```
+
+### 2. **Scenario Generation**
+
+```python
+# Generate diverse training scenarios automatically
+from art.mcp import generate_scenarios
+
+scenario_collection = await generate_scenarios(
+    tools=tools_list,
+    num_scenarios=24,
+    show_preview=True,
+    generator_model="openai/gpt-4.1-mini",
+    generator_api_key="your_openrouter_key",
+    generator_base_url="https://openrouter.ai/api/v1",
+)
+```
+
+ART automatically generates diverse training scenarios that exercise different aspects of the MCP server: simple single-tool usage, complex multi-step workflows, edge cases and error handling, and creative combinations of available tools.
+
+### 3. **RULER Evaluation**
+
+```python
+from art.rewards import ruler_score_group
+
+# RULER evaluates responses without labeled data
+scored_group = await ruler_score_group(
+    group,
+    judge_model="openai/o4-mini",
+)
+```
+
+Instead of requiring human-labeled examples, RULER judges response quality by analyzing whether the agent accomplished the intended task, quality of tool usage, efficiency of the approach, and error handling.
+
+### 4. **Reinforcement Learning**
+
+```python
+# Train using RULER feedback
+groups = await gather_trajectory_groups(
+    trajectory_groups_generator,
+    pbar_desc="train gather step",
+)
+
+scored_groups = [
+    await ruler_score_group(
+        group,
+        judge_model="openai/o4-mini",
+    )
+    for group in groups
+]
+
+await model.train(
+    scored_groups,
+    config=art.TrainConfig(learning_rate=1e-5),
+)
+```
+
+The model learns from RULER feedback using reinforcement learning, improving its ability to select appropriate tools, use correct parameters, chain tools effectively, and handle failures gracefully.
+
+## Getting Started
+
+Optimizing against an MCP server can be surprisingly straightforward!
+
+### Prerequisites
+
+- Access to an MCP server you want to train on
+- OpenRouter API key for training
+- Python environment with ART installed
+
+### Basic Training Pipeline
+
+Here's a simplified example of training a model to use an MCP server:
+
+```python
+import art
+from art.mcp import generate_scenarios
+from art.rewards import ruler_score_group
+from art import gather_trajectory_groups
+
+# Initialize the model
+model = art.RemoteModel(
+    model="Qwen/Qwen2.5-3B-Instruct",
+    openrouter_api_key="your_openrouter_key"
+)
+
+# Generate training scenarios automatically
+scenario_collection = await generate_scenarios(
+    tools=tools_list,
+    resources=resources_list,
+    num_scenarios=100,
+    show_preview=False,
+    generator_model="gpt-4o-mini",
+    generator_api_key="your_openrouter_key",
+)
+
+# Gather trajectory groups
+groups = await gather_trajectory_groups(
+    (
+        art.TrajectoryGroup(
+            rollout(model, scenario, False)
+            for _ in range(4)  # rollouts per group
+        )
+        for scenario in scenario_collection
+    ),
+    pbar_desc="train gather step",
+)
+
+# Score groups using RULER
+scored_groups = [
+    await ruler_score_group(
+        group,
+        judge_model="gpt-4o-mini",
+        debug=True,
+        swallow_exceptions=True
+    )
+    for group in groups
+]
+
+# Train the model
+await model.train(
+    scored_groups,
+    config=art.TrainConfig(learning_rate=1e-5),
+)
+```
+
+### Example Use Cases
+
+- **Database Agent**: Train a model to query databases, understand schemas, and generate appropriate SQL commands via an MCP database server.
+
+- **File Management Agent**: Teach an agent to navigate file systems, read/write files, and perform complex file operations through an MCP file server.
+
+- **API Integration Agent**: Train models to interact with REST APIs, handle authentication, and process responses via MCP API wrappers.
+
+- **Development Tools Agent**: Create agents that can use development tools like Git, package managers, or testing frameworks through MCP servers.
+
+## What MCP•RL is Good At
+
+MCP•RL excels at training agents to effectively use MCP servers by:
+
+- **Tool Usage**: Teaching when and how to use specific tools with appropriate parameters
+- **Multi-Step Workflows**: Chaining tool calls and interpreting outputs to build complex workflows
+- **Domain Adaptation**: Learning specialized terminology and conventions for different server types
+
+## Best Practices
+
+- 📈 **Iterative Training** - Use checkpoint forking to experiment with different training approaches and parameters.
+
+- 🔍 **Monitor RULER Scores** - Pay attention to RULER evaluation metrics to understand where your agent excels and where it needs improvement.
+
+- 🧪 **Test Thoroughly** - Validate your trained agent on held-out scenarios that weren't used during training.
+
+- 📊 **Use Diverse Scenarios** - Ensure your training data covers the full range of tasks your agent will encounter in production.
+
+## Troubleshooting
+
+### Common Issues
+
+**Low RULER Scores**:
+
+- Check if your MCP server is responding correctly
+- Verify that generated scenarios are appropriate for your use case
+- Consider adjusting training parameters
+
+**Tool Selection Errors**:
+
+- Ensure the model has seen diverse examples of when to use each tool
+- Add more training scenarios that require careful tool selection
+
+**Parameter Issues**:
+
+- Include scenarios that demonstrate correct parameter usage
+- Consider adding validation examples to your training data
+
+## Next Steps
+
+- Explore the [complete MCP•RL notebook](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/mcp-rl/mcp-rl.ipynb)
+- Learn more about [RULER evaluation](/fundamentals/ruler)
+- Check out [checkpoint forking](/features/checkpoint-forking) for iterative training
+- Join our [Discord](https://discord.gg/zbBHRUpwf4) to discuss MCP•RL with the community
+
+<Note>
+  MCP•RL is particularly effective because RULER can judge response quality
+  purely from the agent's final output—no labeled data required! This makes it
+  possible to train high-quality MCP agents with minimal manual intervention.
+</Note>
diff --git a/src/art/mcp/types.py b/src/art/mcp/types.py
index b78d0e9b9..9adbddc19 100644
--- a/src/art/mcp/types.py
+++ b/src/art/mcp/types.py
@@ -4,8 +4,6 @@
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional
 
-from openai.types.chat.chat_completion_tool import ChatCompletionTool
-
 from art.utils.logging import _C, dim, info