Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
4cd1b3c
Enhance WandbDeploymentConfig to support additional LoRA models and c…
bradhilton Dec 30, 2025
1a4e0d3
Refactor training logic by introducing shared utilities for batch pro…
bradhilton Dec 30, 2025
62064f7
Refactor health check logic in LocalBackend to include consecutive fa…
bradhilton Dec 30, 2025
054da3f
Update dependencies in pyproject.toml and uv.lock; bump versions for …
bradhilton Dec 30, 2025
3759719
Update dependencies in pyproject.toml and uv.lock; bump versions for …
bradhilton Dec 31, 2025
842afe2
Update yes-no-maybe.py to use Qwen3-30B-A3B-Instruct model and reintr…
bradhilton Dec 31, 2025
9d65f7c
Implement CPU offloading and reloading for training models in Decoupl…
bradhilton Dec 31, 2025
7a1848e
Enhance CPU offloading and reloading in UnslothState to utilize pinne…
bradhilton Dec 31, 2025
6107747
Refactor version comparison in get_model_config.py to utilize packagi…
bradhilton Dec 31, 2025
2af1940
Refactor imports and remove unused decoupling logic in get_model_conf…
bradhilton Jan 1, 2026
dd4e829
Remove unused debug logging and decoupled service logic in UnslothSta…
bradhilton Jan 1, 2026
9eca6e7
Refactor import statements in vllm modules for improved clarity and c…
bradhilton Jan 1, 2026
08f23d3
Update environment variable handling in __init__.py to remove unsuppo…
bradhilton Jan 1, 2026
a73fa92
Refactor Unsloth service architecture by consolidating TrainInputs an…
bradhilton Jan 1, 2026
ac263b4
Refactor import statements in profile.ipynb and reference_grpo_traine…
bradhilton Jan 1, 2026
581a453
Enhance guided completion functionality by asserting tool call type i…
bradhilton Jan 1, 2026
3804781
Refactor guided_completion.py to improve assertion formatting for too…
bradhilton Jan 1, 2026
803ff7a
Remove unused whitespace in patches.py to improve code readability an…
bradhilton Jan 1, 2026
39f12fc
Enhance auto_trajectory.py and openai.py by adding SSE parsing for ch…
bradhilton Jan 1, 2026
58d4e6a
Update yes-no-maybe.py to enhance model configuration by introducing …
bradhilton Jan 1, 2026
9ab7a7f
Update test_tokenize_trajectory_groups.ipynb to enhance execution met…
bradhilton Jan 1, 2026
6dc07a9
Update pyproject.toml to include 'art' as a known first-party package…
bradhilton Jan 1, 2026
c926910
Enhance import organization across multiple files by adding missing i…
bradhilton Jan 1, 2026
13a095b
Update execution metadata timestamps in test_tokenize_trajectory_grou…
bradhilton Jan 1, 2026
5a4bc2b
Refactor test_tokenize_trajectory_groups.ipynb by removing redundant …
bradhilton Jan 1, 2026
9797261
Add internal model configuration to yes-no-maybe.py for enhanced engi…
bradhilton Jan 1, 2026
a21d419
Merge branch 'main' into fix/decoupled-unsloth-service
bradhilton Jan 2, 2026
2731c59
Merge branch 'main' into fix/decoupled-unsloth-service
bradhilton Jan 2, 2026
6bbceda
Enhance import organization by adding missing imports and adjusting t…
bradhilton Jan 2, 2026
dae923a
Refactor trajectory logging to support new message types and improve …
bradhilton Jan 2, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dev/playwright_agent/playwright_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

import argparse
import asyncio
from dataclasses import dataclass
import json
import math
import os
import time
import uuid
from dataclasses import dataclass
from typing import Any, Dict, List
import uuid

from dotenv import load_dotenv
from mcp import ClientSession, StdioServerParameters
Expand Down
2 changes: 1 addition & 1 deletion dev/swebench/logs.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import logging
from logging import Handler, LogRecord

import litellm
from langfuse import Langfuse
from langfuse.decorators import langfuse_context
from langfuse.types import SpanLevel
import litellm
from sweagent.agent.agents import DefaultAgent
from sweagent.run.hooks.apply_patch import SaveApplyPatchHook

Expand Down
6 changes: 3 additions & 3 deletions dev/swebench/rollout.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
import asyncio
import json
from http.client import RemoteDisconnected
import json
from pathlib import Path
from typing import Any, Literal, overload

import modal
import requests
from aiolimiter import AsyncLimiter
from config import get_config
from eval import eval_instance
from grpclib.exceptions import StreamTerminatedError
from instances import Instance
from langfuse.decorators import observe
from logs import setup_agent_logger
import modal
from pydantic import BaseModel
import requests
from requests import adapters as requests_adapters
from requests.exceptions import ConnectionError, ConnectTimeout, SSLError
from run import run
Expand Down
2 changes: 1 addition & 1 deletion dev/swebench/sandbox/test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from dotenv import load_dotenv
import pytest

from ..instances import as_instances_iter, get_filtered_swe_smith_instances_df
from .new import new_sandbox
Expand Down
4 changes: 2 additions & 2 deletions dev/swebench/train.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@
"source": [
"import os\n",
"\n",
"import nest_asyncio\n",
"from dotenv import load_dotenv\n",
"import nest_asyncio\n",
"\n",
"# PREPARE ENVIRONMENT\n",
"os.environ[\"MODAL_IMAGE_BUILDER_VERSION\"] = \"2024.10\"\n",
Expand All @@ -60,8 +60,8 @@
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"from rollout import ModelConfig\n",
"import torch\n",
"\n",
"import art\n",
"from art.local import LocalBackend\n",
Expand Down
4 changes: 2 additions & 2 deletions dev/tau-bench/analyze_model_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

import argparse
import asyncio
from datetime import datetime
import json
import os
from datetime import datetime
from typing import Any, Dict, List

import litellm
from dotenv import load_dotenv
import litellm
from litellm import provider_list

# Import evaluate_model and rollout functions from run_rl
Expand Down
2 changes: 1 addition & 1 deletion dev/tau-bench/auto_error_identification.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Copyright Sierra

import argparse
import json
from concurrent.futures import ThreadPoolExecutor
from enum import Enum
import json
from typing import Any, Dict, List

from pydantic import BaseModel
Expand Down
4 changes: 2 additions & 2 deletions dev/tau-bench/benchmark_rl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

import argparse
import asyncio
from datetime import datetime
import json
import os
from datetime import datetime
from typing import Any, Dict, List

import litellm
from dotenv import load_dotenv
import litellm
from litellm import provider_list

# Import evaluate_model and rollout functions from run_rl
Expand Down
2 changes: 1 addition & 1 deletion dev/tau-bench/run.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"from dotenv import load_dotenv\n",
"from run import RunConfig\n",
"from run_rl import train\n",
"from tau_bench.types import TauBenchPolicyConfig, TauBenchTrainingConfig\n",
"import torch\n",
"\n",
"import art\n",
"\n",
Expand Down
2 changes: 1 addition & 1 deletion dev/tau-bench/run_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import textwrap
import traceback

import sky
from dotenv import dotenv_values
import sky
from sky import ClusterStatus
from tau_bench.types import RunConfig, TauBenchPolicyConfig, TauBenchTrainingConfig

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright Sierra

import json
from copy import deepcopy
import json
from typing import Any, Dict, List

from tau_bench.envs.tool import Tool
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright Sierra

import json
from copy import deepcopy
import json
from typing import Any, Dict, List

from tau_bench.envs.tool import Tool
Expand Down
2 changes: 1 addition & 1 deletion dev/tau-bench/tau_bench/envs/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright Sierra

import random
from hashlib import sha256
import random
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union

from tau_bench.envs.tool import Tool
Expand Down
4 changes: 2 additions & 2 deletions dev/tau-bench/tau_bench/model_utils/api/cache.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from collections import defaultdict
import functools
import hashlib
import inspect
import threading
from collections import defaultdict
from multiprocessing import Lock
import threading
from typing import Any, Callable, TypeVar

from pydantic import BaseModel
Expand Down
2 changes: 1 addition & 1 deletion dev/tau-bench/tau_bench/model_utils/api/exception.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from concurrent.futures import ThreadPoolExecutor
import json
import os
import time
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Callable, TypeVar

from tau_bench.model_utils.model.exception import ModelError, Result
Expand Down
4 changes: 2 additions & 2 deletions dev/tau-bench/tau_bench/rl_utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from datetime import datetime
import os
import time
import uuid
from datetime import datetime
from typing import Any, Dict, List, Optional
import uuid

from langfuse import Langfuse
from openai import AsyncOpenAI
Expand Down
8 changes: 4 additions & 4 deletions dev/tau-bench/tau_bench/run.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# Copyright Sierra
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
import json
from math import comb
import multiprocessing
import os
import random
import traceback
import warnings
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from math import comb
from typing import Any, Dict, List
import warnings

from langfuse import Langfuse
from litellm import provider_list
Expand Down
2 changes: 1 addition & 1 deletion dev/yes-no-maybe-vision/train.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@
"metadata": {},
"outputs": [],
"source": [
"import openai\n",
"from dotenv import load_dotenv\n",
"from generate_images import generate_yes_no_maybe_prompts, save_prompt_images\n",
"import openai\n",
"\n",
"import art\n",
"from art.local import LocalBackend\n",
Expand Down
33 changes: 6 additions & 27 deletions dev/yes-no-maybe.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -12,30 +12,9 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>\n",
".cell-output-ipywidget-background {\n",
" background-color: transparent !important;\n",
"}\n",
":root {\n",
" --jp-widgets-color: var(--vscode-editor-foreground);\n",
" --jp-widgets-font-size: var(--vscode-editor-font-size);\n",
"} \n",
"</style>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"%%html\n",
"<style>\n",
Expand All @@ -57,8 +36,8 @@
"source": [
"from itertools import permutations\n",
"\n",
"import openai\n",
"from dotenv import load_dotenv\n",
"import openai\n",
"\n",
"import art\n",
"from art.local import LocalBackend\n",
Expand All @@ -67,7 +46,7 @@
"\n",
"backend = LocalBackend()\n",
"model = art.TrainableModel(\n",
" name=\"009\",\n",
" name=\"010\",\n",
" project=\"yes-no-maybe\",\n",
" base_model=\"Qwen/Qwen2.5-7B-Instruct\",\n",
" # _internal_config=art.dev.InternalModelConfig(\n",
Expand Down Expand Up @@ -149,7 +128,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.18"
"version": "3.10.13"
}
},
"nbformat": 4,
Expand Down
90 changes: 90 additions & 0 deletions dev/yes-no-maybe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import asyncio
from itertools import permutations
import os

from dotenv import load_dotenv
import openai

import art
from art.local import LocalBackend


async def rollout(client: openai.AsyncOpenAI, prompt: str) -> art.Trajectory:
messages: art.Messages = [
{
"role": "user",
"content": prompt,
}
]
chat_completion = await client.chat.completions.create(
messages=messages, model=model.name, max_tokens=100, timeout=100
)
choice = chat_completion.choices[0]
content = choice.message.content
assert isinstance(content, str)
if content == "yes":
reward = 0.5
elif content == "no":
reward = 0.75
elif content == "maybe":
reward = 1.0
else:
reward = 0.0
return art.Trajectory(messages_and_choices=[*messages, choice], reward=reward)


def with_quotes(w: str) -> str:
return f"'{w}'"


async def main():
load_dotenv()

backend = LocalBackend()
global model
base_model = os.environ.get("BASE_MODEL", "Qwen/Qwen3-30B-A3B-Instruct-2507")
model = art.TrainableModel(
name=os.environ.get("MODEL_NAME", "011"),
project="yes-no-maybe",
base_model=base_model,
_internal_config=art.dev.InternalModelConfig(
engine_args=art.dev.EngineArgs(
max_lora_rank=1,
),
peft_args=art.dev.PeftArgs(
r=1,
),
),
)
await model.register(backend)

prompts = [
f"{prefix} with {', '.join([with_quotes(w) if use_quotes else w for w in words]) if len(words) == 3 else f'{words[0]}' + (f' or {words[1]}' if len(words) > 1 else '')}"
for prefix in ["respond", "just respond"]
for use_quotes in [True, False]
for words in (
list(p) for n in [3, 2] for p in permutations(["yes", "no", "maybe"], n)
)
]

openai_client = model.openai_client()
max_steps = int(os.environ.get("NUM_STEPS", "4"))
start_step = await model.get_step()
for _ in range(start_step, start_step + max_steps):
train_groups = await art.gather_trajectory_groups(
(
art.TrajectoryGroup(rollout(openai_client, prompt) for _ in range(32))
for prompt in prompts
)
)
await model.train(
train_groups,
config=art.TrainConfig(learning_rate=1e-4),
# _config=art.dev.TrainConfig(
# precalculate_logprobs=True,
# ),
)


if __name__ == "__main__":
asyncio.run(main())
Loading