diff --git a/README.md b/README.md
index 5579c59dc..c58a67a93 100644
--- a/README.md
+++ b/README.md
@@ -42,7 +42,7 @@ from art.serverless.backend import ServerlessBackend
model = art.TrainableModel(
project="voice-agent",
name="agent-001",
- base_model="Qwen/Qwen2.5-14B-Instruct"
+ base_model="OpenPipe/Qwen3-14B-Instruct"
)
backend = ServerlessBackend(
@@ -62,8 +62,8 @@ ART is an open-source RL framework that improves agent reliability by allowing L
| Agent Task | Example Notebook | Description | Comparative Performance |
| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| **ART•E [Serverless]** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/art-e.ipynb) | Qwen 2.5 14B learns to search emails using RULER |
[benchmarks](/dev/art-e/art_e/evaluate/display_benchmarks.ipynb) |
-| **2048 [Serverless]** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/2048/2048.ipynb) | Qwen 2.5 14B learns to play 2048 |
[benchmarks](/examples/2048/display_benchmarks.ipynb) |
+| **ART•E [Serverless]** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/art-e.ipynb) | Qwen3 14B learns to search emails using RULER |
[benchmarks](/dev/art-e/art_e/evaluate/display_benchmarks.ipynb) |
+| **2048 [Serverless]** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/2048/2048.ipynb) | Qwen3 14B learns to play 2048 |
[benchmarks](/examples/2048/display_benchmarks.ipynb) |
| **ART•E LangGraph** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/langgraph/art-e-langgraph.ipynb) | Qwen 2.5 7B learns to search emails using LangGraph | [Link coming soon] |
| **MCP•RL** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/mcp-rl/mcp-rl.ipynb) | Qwen 2.5 3B masters the NWS MCP server | [Link coming soon] |
| **Temporal Clue** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/temporal_clue/temporal-clue.ipynb) | Qwen 2.5 7B learns to solve Temporal Clue | [Link coming soon] |
diff --git a/docs/features/checkpoint-deletion.mdx b/docs/features/checkpoint-deletion.mdx
index d58ddff40..e8d81f8d4 100644
--- a/docs/features/checkpoint-deletion.mdx
+++ b/docs/features/checkpoint-deletion.mdx
@@ -17,7 +17,7 @@ from art.serverless.backend import ServerlessBackend
model = art.TrainableModel(
name="agent-001",
project="checkpoint-deletion-demo",
- base_model="Qwen/Qwen2.5-14B-Instruct",
+ base_model="OpenPipe/Qwen3-14B-Instruct",
)
backend = ServerlessBackend()
# in order for the model to know where to look for its existing checkpoints,
@@ -55,7 +55,7 @@ TRAINING_STEPS = 50
model = art.TrainableModel(
name="agent-001",
project="checkpoint-deletion-demo",
- base_model="Qwen/Qwen2.5-14B-Instruct",
+ base_model="OpenPipe/Qwen3-14B-Instruct",
)
backend = ServerlessBackend()
await model.register(backend)
diff --git a/docs/features/checkpoint-forking.mdx b/docs/features/checkpoint-forking.mdx
index f7e1cf4df..c3d3603df 100644
--- a/docs/features/checkpoint-forking.mdx
+++ b/docs/features/checkpoint-forking.mdx
@@ -36,7 +36,7 @@ async def train():
model = art.TrainableModel(
name="my-model-v2",
project="my-project",
- base_model="Qwen/Qwen2.5-14B-Instruct",
+ base_model="OpenPipe/Qwen3-14B-Instruct",
)
# Copy the checkpoint from another model
@@ -104,14 +104,14 @@ Here's a practical example of using checkpoint forking to test a lower learning
base_model = art.TrainableModel(
name="summarizer-base",
project="experiments",
- base_model="Qwen/Qwen2.5-14B-Instruct",
+ base_model="OpenPipe/Qwen3-14B-Instruct",
)
# Fork at step 1000 to try lower learning rate
low_lr_model = art.TrainableModel(
name="summarizer-low-lr",
project="experiments",
- base_model="Qwen/Qwen2.5-14B-Instruct",
+ base_model="OpenPipe/Qwen3-14B-Instruct",
)
async def experiment():
diff --git a/docs/features/mcp-rl.mdx b/docs/features/mcp-rl.mdx
index fe70cffd3..a9fe2830f 100644
--- a/docs/features/mcp-rl.mdx
+++ b/docs/features/mcp-rl.mdx
@@ -104,8 +104,8 @@ from art.rewards import ruler_score_group
from art import gather_trajectory_groups
# Initialize the model
-model = art.RemoteModel(
- model="Qwen/Qwen2.5-3B-Instruct",
+model = art.TrainableModel(
+ model="OpenPipe/Qwen3-14B-Instruct",
openrouter_api_key="your_openrouter_key"
)
diff --git a/docs/fundamentals/art-client.mdx b/docs/fundamentals/art-client.mdx
index 431f78d95..7374354d4 100644
--- a/docs/fundamentals/art-client.mdx
+++ b/docs/fundamentals/art-client.mdx
@@ -53,7 +53,7 @@ model = art.TrainableModel(
# for a given task to consistently group metrics
project="my-agentic-task",
# the model that you want to train from
- base_model="Qwen/Qwen2.5-14B-Instruct",
+ base_model="OpenPipe/Qwen3-14B-Instruct",
)
```
diff --git a/docs/getting-started/installation-setup.mdx b/docs/getting-started/installation-setup.mdx
index 43aa34752..e50b15327 100644
--- a/docs/getting-started/installation-setup.mdx
+++ b/docs/getting-started/installation-setup.mdx
@@ -31,7 +31,7 @@ backend = LocalBackend()
model = TrainableModel(
name="agent-001",
project="my-agentic-task",
- base_model="Qwen/Qwen2.5-14B-Instruct",
+ base_model="OpenPipe/Qwen3-14B-Instruct",
)
await model.register(backend)
@@ -57,7 +57,7 @@ backend = ServerlessBackend()
model = TrainableModel(
name="agent-001",
project="my-agentic-task",
- base_model="Qwen/Qwen2.5-14B-Instruct",
+ base_model="OpenPipe/Qwen3-14B-Instruct",
)
await model.register(backend)
@@ -87,7 +87,7 @@ backend = await SkyPilotBackend.initialize_cluster(
model = TrainableModel(
name="agent-001",
project="my-agentic-task",
- base_model="Qwen/Qwen2.5-14B-Instruct",
+ base_model="OpenPipe/Qwen3-14B-Instruct",
)
await model.register(backend)
diff --git a/docs/getting-started/notebooks.mdx b/docs/getting-started/notebooks.mdx
index a3e5477a6..c6c8877d5 100644
--- a/docs/getting-started/notebooks.mdx
+++ b/docs/getting-started/notebooks.mdx
@@ -9,13 +9,13 @@ icon: "book"
| Agent Task | Notebook | Description | Performance |
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| **ART•E [Serverless]** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/art-e.ipynb) | Qwen 2.5 14B learns to search emails using RULER |
|
-| **2048 [Serverless]** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/2048/2048.ipynb) | Qwen 2.5 14B learns to play 2048 |
|
-| **ART•E LangGraph** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/langgraph/art-e-langgraph.ipynb) | Qwen 2.5 7B learns to search emails using LangGraph | [Link coming soon] |
-| **MCP•RL** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/mcp-rl/mcp-rl.ipynb) | Qwen 2.5 3B masters the NWS MCP server | [Link coming soon] |
-| **Temporal Clue** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/temporal_clue/temporal-clue.ipynb) | Qwen 2.5 7B learns to solve Temporal Clue | [Link coming soon] |
-| **Tic Tac Toe** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/tic_tac_toe/tic-tac-toe.ipynb) | Qwen 2.5 3B learns to play Tic Tac Toe |
|
-| **Codenames** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/codenames/Codenames_RL.ipynb) | Qwen 2.5 3B learns to play Codenames |
|
-| **AutoRL [RULER]** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/auto_rl.ipynb) | Train Qwen 2.5 7B to master any task | [Link coming soon] |
+| **ART•E [Serverless]** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/art-e.ipynb) | Qwen3 14B learns to search emails using RULER |
|
+| **2048 [Serverless]** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/2048/2048.ipynb) | Qwen3 14B learns to play 2048 |
|
+| **ART•E LangGraph** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/langgraph/art-e-langgraph.ipynb) | Qwen2.5 7B learns to search emails using LangGraph | [Link coming soon] |
+| **MCP•RL** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/mcp-rl/mcp-rl.ipynb) | Qwen2.5 3B masters the NWS MCP server | [Link coming soon] |
+| **Temporal Clue** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/temporal_clue/temporal-clue.ipynb) | Qwen2.5 7B learns to solve Temporal Clue | [Link coming soon] |
+| **Tic Tac Toe** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/tic_tac_toe/tic-tac-toe.ipynb) | Qwen2.5 3B learns to play Tic Tac Toe |
|
+| **Codenames** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/codenames/Codenames_RL.ipynb) | Qwen2.5 3B learns to play Codenames |
|
+| **AutoRL [RULER]** | [🏋️ Train agent](https://colab.research.google.com/github/openpipe/art-notebooks/blob/main/examples/auto_rl.ipynb) | Train Qwen2.5 7B to master any task | [Link coming soon] |
diff --git a/docs/getting-started/quick-start.mdx b/docs/getting-started/quick-start.mdx
index 9d6370824..58eb0ccf0 100644
--- a/docs/getting-started/quick-start.mdx
+++ b/docs/getting-started/quick-start.mdx
@@ -4,7 +4,7 @@ description: "Get started with ART in a few quick steps."
icon: "forward"
---
-In this Quick Start tutorial, we'll be training Qwen 2.5 14B to play [2048](https://play2048.co/), a simple game that requires forward planning and basic math skills.
+In this Quick Start tutorial, we'll be training Qwen3 14B Instruct to play [2048](https://play2048.co/), a simple game that requires forward planning and basic math skills.
diff --git a/docs/resources/models.mdx b/docs/resources/models.mdx
index faf701672..ce5b1e1f9 100644
--- a/docs/resources/models.mdx
+++ b/docs/resources/models.mdx
@@ -5,13 +5,21 @@ description: "Train open source models on ART."
icon: "robot"
---
-## Recommended Models
+## Serverless Models
-- [Qwen 2.5 14B Instruct](https://huggingface.co/Qwen/Qwen2.5-14B-Instruct)
+We currently only support the following model for serverless training. We are actively adding support for both larger and smaller models. If there's a particular model you'd like to see serverless support for, please send a request to support@wandb.com.
+
+- [OpenPipe Qwen 3 14B Instruct](https://huggingface.co/OpenPipe/Qwen3-14B-Instruct)
- Good balance of performance and size. Has support for tool calling and generally trains well. This is our recommended model for users new to RL.
-- [Qwen 2.5 7B Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct)
+
+
+## Recommended Local Models
+
+If you're developing locally or in your own hardware, here are a couple other models you could try in addition to the recommended serverless list.
+
+- [Qwen2.5 7B Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct)
- Less capable than 14B, but smaller and faster
-- [Qwen 2.5 32B Instruct](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct)
+- [Qwen2.5 32B Instruct](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct)
- More capable than 14B, but larger and slower
## More Models
@@ -24,7 +32,7 @@ Here are additional models that we've tested and found to work well with ART:
- [Llama 3.2 1B Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct)
- [Llama 3.2 3B Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct)
- [Llama 3.3 70B Instruct](https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct)
-- [Qwen 2.5 72B Instruct](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct)
+- [Qwen2.5 72B Instruct](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct)
- Additionally, the [Qwen 3](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f) family of models is well supported for single-turn workflows. For multi-turn workflows the Qwen 3 chat template removes the `` tokens from previous turns, which makes training more complicated. It is still possible to use for multi-turn workflows by splitting each turn into a separate message history with our `additional_histories` trajectory parameter (see [Additional Histories](/features/additional-histories)).
If you're curious about a model that is not listed above, ask in the Discord [#support](https://discord.com/channels/1359674493949448375/1359674622965973185) channel.
diff --git a/docs/tutorials/open-deep-research.mdx b/docs/tutorials/open-deep-research.mdx
index cdfd53be3..27f63254f 100644
--- a/docs/tutorials/open-deep-research.mdx
+++ b/docs/tutorials/open-deep-research.mdx
@@ -5,11 +5,11 @@ description: "Train a deep research agent to exceed SOTA performance using GRPO
icon: "magnifying-glass"
---
-This tutorial demonstrates how to train your own deep research agent using GRPO to exceed Sonnet 4's perfromance. Specifically, you will be using the [ART](https://github.com/OpenPipe/ART) library to specialize Qwen 2.5 14B for [Langchain's open deep research](https://github.com/langchain-ai/open_deep_research) framework, and will evaluate your agent's performance using [DeepResearch Bench: A Comprehensive Benchmark for Deep Research Agents](https://github.com/Ayanami0730/deep_research_bench).
+This tutorial demonstrates how to train your own deep research agent using GRPO to exceed Sonnet 4's perfromance. Specifically, you will be using the [ART](https://github.com/OpenPipe/ART) library to specialize Qwen2.5 14B for [Langchain's open deep research](https://github.com/langchain-ai/open_deep_research) framework, and will evaluate your agent's performance using [DeepResearch Bench: A Comprehensive Benchmark for Deep Research Agents](https://github.com/Ayanami0730/deep_research_bench).
In addition to the GRPO training step, you will also run an initial SFT training run to improve the model's baseline performance.
-
+
