From 005b2a943a3f6ec81a382e62c6a6776c4cea6782 Mon Sep 17 00:00:00 2001 From: arcticfly Date: Mon, 25 Aug 2025 13:07:24 -0700 Subject: [PATCH 1/3] Update README top section --- README.md | 53 ++++++++++++++++++++++------------------------------- 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 3bddc8b1a..933d982ab 100644 --- a/README.md +++ b/README.md @@ -21,48 +21,39 @@ Train multi-step agents for real-world tasks using GRPO. -## 🔌 MCP•RL: Teach your agents to master MCP +## 🦜🔗 LangGraph Integration: Build Smarter Multi-Step Agents - +ART's **LangGraph integration** enables you to train sophisticated ReAct-style agents that improve through reinforcement learning. Build agents that reason, use tools, and adapt their behavior over time without manual prompt engineering. -**MCP•RL** enables you to train agents to effectively use any MCP (Model Context Protocol) server with minimal setup. Simply provide a server URL and MCP•RL will: +✨ **Key Benefits:** -1. Automatically discover server tools -2. Design input tasks that utilize those tools -3. Train the model to improve performance on the MCP server using RULER -4. Test on new tasks to validate the trained model - -✨ **Key Features:** - -- **No labeled data** - MCP•RL learns what tasks a server will be used for by analyzing its tools -- **General-purpose** - Optimizes models for any MCP server -- **Strong performance** - Matches or exceeds SOTA performance in 2/3 benchmarks -- **Easy integration** - No customization of your MCP server required! +- **Automatic behavior improvement** - Train agents to get better at multi-step reasoning +- **Tool usage optimization** - Learn when and how to use tools more effectively +- **Seamless integration** - Drop-in replacement for LangGraph's LLM initialization +- **RULER compatibility** - Train without hand-crafted reward functions ```python -from art.rewards import ruler_score_group +import art +from art.langgraph import wrap_rollout, init_chat_model +from langgraph import create_react_agent -# Specialize a model for NWS MCP server -MCP_SERVER_URL = "https://server.smithery.ai/@smithery-ai/national-weather-service/mcp" +# Your existing tools +tools = [search_inbox, read_email, return_final_answer] -# Generate training scenarios based on MCP tools -scenarios = await generate_scenarios( - num_scenarios=24, - server_url=MCP_SERVER_URL, -) +@wrap_rollout(model) +async def run_agent(scenario: str) -> art.Trajectory: + # Create LangGraph agent with ART's LLM wrapper + agent = create_react_agent(init_chat_model(), tools) -# ...run the agent... + result = await agent.ainvoke({"messages": [("user", scenario)]}) + return art.Trajectory() # Automatically captured -# Use RULER to assign relative scores to each trajectory -scored_groups = [] -for group in groups: - judged_group = await ruler_score_group(group) - scored_groups.append(judged_group) - -# Train the model to improve performance on the MCP server -await model.train(scored_groups) +# Train with RULER - no reward engineering needed! +await art.train(model, reward_function="ruler") ``` +[📖 Learn more about LangGraph integration →](https://art.openpipe.ai/integrations/langgraph-integration) + ## ART Overview ART is an open-source RL framework that improves agent reliability by allowing LLMs to **learn from experience**. ART provides an ergonomic harness for integrating GRPO into any python application. For a quick hands-on introduction, run one of the notebooks below. When you're ready to learn more, check out the [docs](https://art.openpipe.ai). From ad78801960e75cc521381b5a7e2f06d47c01af11 Mon Sep 17 00:00:00 2001 From: arcticfly Date: Mon, 25 Aug 2025 13:10:02 -0700 Subject: [PATCH 2/3] Link to Notebook --- README.md | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 933d982ab..721df18a3 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ async def run_agent(scenario: str) -> art.Trajectory: await art.train(model, reward_function="ruler") ``` -[📖 Learn more about LangGraph integration →](https://art.openpipe.ai/integrations/langgraph-integration) +[📖 Learn more about LangGraph integration →](https://art.openpipe.ai/integrations/langgraph-integration) | [🏋️ Try the notebook →](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/langgraph/art-e-langgraph.ipynb) ## ART Overview @@ -60,15 +60,16 @@ ART is an open-source RL framework that improves agent reliability by allowing L ## 📒 Notebooks -| Agent Task | Example Notebook | Description | Comparative Performance | -| ------------------ | -------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| **MCP•RL** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/mcp-rl/mcp-rl.ipynb) | Qwen 2.5 3B masters the NWS MCP server | [Link coming soon] | -| **ART•E [RULER]** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/art-e.ipynb) | Qwen 2.5 7B learns to search emails using RULER | [benchmarks](/examples/art-e/art_e/evaluate/display_benchmarks.ipynb) | -| **2048** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/2048/2048.ipynb) | Qwen 2.5 3B learns to play 2048 | [benchmarks](/examples/2048/benchmark_2048.ipynb) | -| **Temporal Clue** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/temporal_clue/temporal-clue.ipynb) | Qwen 2.5 7B learns to solve Temporal Clue | [Link coming soon] | -| **Tic Tac Toe** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/tic_tac_toe/tic-tac-toe.ipynb) | Qwen 2.5 3B learns to play Tic Tac Toe | [benchmarks](/examples/tic_tac_toe/benchmark_tic_tac_toe.ipynb) | -| **Codenames** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/codenames/Codenames_RL.ipynb) | Qwen 2.5 3B learns to play Codenames | [benchmarks](/examples/codenames/Codenames_RL.ipynb) | -| **AutoRL [RULER]** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/auto_rl.ipynb) | Train Qwen 2.5 7B to master any task | [Link coming soon] | +| Agent Task | Example Notebook | Description | Comparative Performance | +| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **ART•E LangGraph** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/langgraph/art-e-langgraph.ipynb) | Qwen 2.5 7B learns to search emails using LangGraph | [Link coming soon] | +| **MCP•RL** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/mcp-rl/mcp-rl.ipynb) | Qwen 2.5 3B masters the NWS MCP server | [Link coming soon] | +| **ART•E [RULER]** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/art-e.ipynb) | Qwen 2.5 7B learns to search emails using RULER | [benchmarks](/examples/art-e/art_e/evaluate/display_benchmarks.ipynb) | +| **2048** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/2048/2048.ipynb) | Qwen 2.5 3B learns to play 2048 | [benchmarks](/examples/2048/benchmark_2048.ipynb) | +| **Temporal Clue** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/temporal_clue/temporal-clue.ipynb) | Qwen 2.5 7B learns to solve Temporal Clue | [Link coming soon] | +| **Tic Tac Toe** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/tic_tac_toe/tic-tac-toe.ipynb) | Qwen 2.5 3B learns to play Tic Tac Toe | [benchmarks](/examples/tic_tac_toe/benchmark_tic_tac_toe.ipynb) | +| **Codenames** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/codenames/Codenames_RL.ipynb) | Qwen 2.5 3B learns to play Codenames | [benchmarks](/examples/codenames/Codenames_RL.ipynb) | +| **AutoRL [RULER]** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/auto_rl.ipynb) | Train Qwen 2.5 7B to master any task | [Link coming soon] | ## 📰 ART News From 54dc51dd8e9f283d91e91f7f239fc03c1e5f9935 Mon Sep 17 00:00:00 2001 From: arcticfly Date: Mon, 25 Aug 2025 13:51:45 -0700 Subject: [PATCH 3/3] Add link to integrations --- docs/docs.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/docs.json b/docs/docs.json index b27b0b825..e53789dff 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -67,6 +67,12 @@ "features/additional-histories" ] }, + { + "group": "Integrations", + "pages": [ + "integrations/langgraph-integration" + ] + }, { "group": "Tutorials", "pages": [