diff --git a/dev/math-vista/math-vista.ipynb b/dev/math-vista/math-vista.ipynb
new file mode 100644
index 000000000..a33ba5614
--- /dev/null
+++ b/dev/math-vista/math-vista.ipynb
@@ -0,0 +1,207 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "46a6ad6d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "96d51078",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%html\n",
+    "<style>\n",
+    ".cell-output-ipywidget-background {\n",
+    "    background-color: transparent !important;\n",
+    "}\n",
+    ":root {\n",
+    "    --jp-widgets-color: var(--vscode-editor-foreground);\n",
+    "    --jp-widgets-font-size: var(--vscode-editor-font-size);\n",
+    "}  \n",
+    "</style>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7dd70e04",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import polars as pl\n",
+    "\n",
+    "splits = {\n",
+    "    \"testmini\": \"data/testmini-00000-of-00001-725687bf7a18d64b.parquet\",\n",
+    "    \"test\": \"data/test-*.parquet\",\n",
+    "}\n",
+    "df = pl.read_parquet(\"hf://datasets/AI4Math/MathVista/\" + splits[\"testmini\"]).sample(\n",
+    "    fraction=1.0, shuffle=True, seed=42\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "81e02b97",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import Iterator, TypedDict, cast\n",
+    "\n",
+    "\n",
+    "class DecodedImage(TypedDict):\n",
+    "    bytes: bytes\n",
+    "\n",
+    "\n",
+    "class Scenario(TypedDict):\n",
+    "    pid: int\n",
+    "    question: str\n",
+    "    answer: str\n",
+    "    image: str\n",
+    "    decoded_image: DecodedImage\n",
+    "\n",
+    "\n",
+    "val_scenarios = cast(list[Scenario], df.head(64).to_dicts())\n",
+    "train_scenarios_iter = cast(Iterator[Scenario], df.tail(-64).iter_rows(named=True))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9287d8fe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "\n",
+    "import art\n",
+    "from art.local import LocalBackend\n",
+    "\n",
+    "model = art.TrainableModel(\n",
+    "    name=\"002\",\n",
+    "    project=\"math-vista\",\n",
+    "    base_model=\"Qwen/Qwen2.5-VL-7B-Instruct\",\n",
+    ")\n",
+    "backend = LocalBackend()\n",
+    "await model.register(backend)\n",
+    "client = model.openai_client()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c92b4b11",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "async def rollout(scenario: Scenario) -> art.Trajectory:\n",
+    "    image_path = f\"/tmp/{scenario['image']}\"\n",
+    "\n",
+    "    import os\n",
+    "\n",
+    "    os.makedirs(os.path.dirname(image_path), exist_ok=True)\n",
+    "\n",
+    "    with open(image_path, \"wb\") as f:\n",
+    "        f.write(scenario[\"decoded_image\"][\"bytes\"])\n",
+    "\n",
+    "    trajectory = art.Trajectory(messages_and_choices=[], reward=0.0)\n",
+    "    trajectory.messages_and_choices = [\n",
+    "        {\n",
+    "            \"role\": \"user\",\n",
+    "            \"content\": [\n",
+    "                {\n",
+    "                    \"type\": \"text\",\n",
+    "                    \"text\": scenario[\"question\"]\n",
+    "                    + \"\\n\\nNote: Provide your answer in a LaTeX box.\",\n",
+    "                },\n",
+    "                {\"type\": \"image_url\", \"image_url\": {\"url\": f\"file://{image_path}\"}},\n",
+    "            ],\n",
+    "        }\n",
+    "    ]\n",
+    "    chat_completion = await client.chat.completions.create(\n",
+    "        model=model.name, messages=trajectory.messages()\n",
+    "    )\n",
+    "    choice = chat_completion.choices[0]\n",
+    "    trajectory.messages_and_choices.append(choice)\n",
+    "    content = choice.message.content\n",
+    "    assert content is not None\n",
+    "    if matches := list(re.finditer(r\"\\\\boxed\\{(.*?)\\}\", content, re.DOTALL)):\n",
+    "        match = matches[-1]\n",
+    "        answer = match.group(1)\n",
+    "        if answer.lower() == scenario[\"answer\"].lower():\n",
+    "            trajectory.reward = 1.0\n",
+    "    return trajectory"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "359e530d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import asyncio\n",
+    "import itertools\n",
+    "\n",
+    "SCENARIOS_PER_STEP = 8\n",
+    "TRAJECTORY_GROUP_SIZE = 8\n",
+    "start = await model.get_step()\n",
+    "train_scenarios_iter = itertools.cycle(train_scenarios_iter)\n",
+    "for _ in range(start * SCENARIOS_PER_STEP):\n",
+    "    next(train_scenarios_iter)\n",
+    "\n",
+    "for i in range(start, 1000):\n",
+    "    train_scenarios = [next(train_scenarios_iter) for _ in range(SCENARIOS_PER_STEP)]\n",
+    "    val_trajectories, train_trajectory_groups = await asyncio.gather(\n",
+    "        art.gather_trajectories(\n",
+    "            (rollout(scenario) for scenario in val_scenarios),\n",
+    "            pbar_desc=\"gather(val)\",\n",
+    "            max_exceptions=32,\n",
+    "        ),\n",
+    "        art.gather_trajectory_groups(\n",
+    "            (\n",
+    "                art.TrajectoryGroup(\n",
+    "                    rollout(scenario) for _ in range(TRAJECTORY_GROUP_SIZE)\n",
+    "                )\n",
+    "                for scenario in train_scenarios\n",
+    "            ),\n",
+    "            pbar_desc=\"gather(train)\",\n",
+    "            max_exceptions=32,\n",
+    "        ),\n",
+    "    )\n",
+    "    await model.log(val_trajectories)\n",
+    "    await model.train(train_trajectory_groups)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/dev/math-vista/math-vista.py b/dev/math-vista/math-vista.py
new file mode 100644
index 000000000..455bf764c
--- /dev/null
+++ b/dev/math-vista/math-vista.py
@@ -0,0 +1,136 @@
+import argparse
+import asyncio
+import itertools
+import os
+import re
+from typing import Iterator, TypedDict, cast
+
+import polars as pl
+
+import art
+from art.local import LocalBackend
+
+
+class DecodedImage(TypedDict):
+    bytes: bytes
+
+
+class Scenario(TypedDict):
+    pid: int
+    question: str
+    answer: str
+    image: str
+    decoded_image: DecodedImage
+
+
+async def main(model_name: str, steps: int) -> None:
+    # Load and shuffle the dataset
+    df = pl.read_parquet(
+        "hf://datasets/AI4Math/MathVista/data/testmini-00000-of-00001-725687bf7a18d64b.parquet"
+    ).sample(fraction=1.0, shuffle=True, seed=42)
+
+    val_scenarios = cast(list[Scenario], df.head(64).to_dicts())
+    train_scenarios_iter = cast(Iterator[Scenario], df.tail(-64).iter_rows(named=True))
+
+    # Initialize trainable model and backend
+    model = art.TrainableModel(
+        name=model_name,
+        project="math-vista",
+        base_model="Qwen/Qwen2.5-VL-7B-Instruct",
+    )
+
+    async def rollout(scenario: Scenario) -> art.Trajectory:
+        image_path = f"/tmp/{scenario['image']}"
+        os.makedirs(os.path.dirname(image_path), exist_ok=True)
+        with open(image_path, "wb") as f:
+            f.write(scenario["decoded_image"]["bytes"])
+
+        trajectory = art.Trajectory(messages_and_choices=[], reward=0.0)
+        trajectory.messages_and_choices = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": scenario["question"]
+                        + "\n\nNote: Provide your answer in a LaTeX box.",
+                    },
+                    {"type": "image_url", "image_url": {"url": f"file://{image_path}"}},
+                ],
+            }
+        ]
+
+        chat_completion = await client.chat.completions.create(
+            model=model.name, messages=trajectory.messages()
+        )
+        choice = chat_completion.choices[0]
+        trajectory.messages_and_choices.append(choice)
+        content = choice.message.content
+        assert content is not None
+
+        if matches := list(re.finditer(r"\\boxed\{(.*?)\}", content, re.DOTALL)):
+            match = matches[-1]
+            answer = match.group(1)
+            if answer.lower() == scenario["answer"].lower():
+                trajectory.reward = 1.0
+        return trajectory
+
+    SCENARIOS_PER_STEP = 8
+    TRAJECTORY_GROUP_SIZE = 8
+
+    with LocalBackend() as backend:
+        await model.register(backend)
+        client = model.openai_client()
+
+        start = await model.get_step()
+        train_scenarios_iter = itertools.cycle(train_scenarios_iter)
+        for _ in range(start * SCENARIOS_PER_STEP):
+            next(train_scenarios_iter)
+
+        # Training loop
+        for _ in range(start, steps):
+            train_scenarios = [
+                next(train_scenarios_iter) for _ in range(SCENARIOS_PER_STEP)
+            ]
+            val_trajectories, train_trajectory_groups = await asyncio.gather(
+                art.gather_trajectories(
+                    (rollout(scenario) for scenario in val_scenarios),
+                    pbar_desc="gather(val)",
+                    max_exceptions=32,
+                ),
+                art.gather_trajectory_groups(
+                    (
+                        art.TrajectoryGroup(
+                            rollout(scenario) for _ in range(TRAJECTORY_GROUP_SIZE)
+                        )
+                        for scenario in train_scenarios
+                    ),
+                    pbar_desc="gather(train)",
+                    max_exceptions=32,
+                ),
+            )
+            await model.log(val_trajectories)
+            await model.train(train_trajectory_groups)
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Minimal MathVista trainer script")
+    parser.add_argument(
+        "-n",
+        "--name",
+        required=True,
+        help="Run/model name to use for the TrainableModel",
+    )
+    parser.add_argument(
+        "-s",
+        "--steps",
+        type=int,
+        default=1000,
+        help="Number of training steps to run",
+    )
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    asyncio.run(main(args.name, args.steps))
diff --git a/dev/yes-no-maybe-vision/generate_images.py b/dev/yes-no-maybe-vision/generate_images.py
new file mode 100644
index 000000000..6e7410fb8
--- /dev/null
+++ b/dev/yes-no-maybe-vision/generate_images.py
@@ -0,0 +1,284 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any, Iterable, Sequence
+
+try:
+    from PIL import Image, ImageDraw, ImageFont
+except Exception as exc:  # pragma: no cover - clear import guidance
+    raise RuntimeError("Pillow is required. Install with: pip install pillow") from exc
+# (resampling constants removed; we no longer scale bitmap masks)
+
+
+# Minimal scalable font loader (raises if not found; use --font-path to provide one)
+def _load_font(font_path: str | Path | None, preferred_size: int) -> Any:
+    candidates: list[str] = []
+    if font_path is not None:
+        candidates.append(str(font_path))
+    candidates.extend(
+        [
+            "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
+            "DejaVuSans.ttf",
+        ]
+    )
+    for candidate in candidates:
+        try:
+            return ImageFont.truetype(candidate, preferred_size)
+        except Exception:
+            continue
+    # Fallback to PIL's default bitmap font so the script always runs
+    return ImageFont.load_default()
+
+
+# (scalable font check removed; scalable fonts are required by _load_font)
+
+
+def generate_yes_no_maybe_prompts() -> list[str]:
+    from itertools import permutations
+
+    prompts = []
+    for prefix in ("respond", "just respond"):
+        for n in (3, 2):
+            for words in permutations(("yes", "no", "maybe"), n):
+                prompts.append(
+                    f"{prefix} with {', '.join(words)}"
+                    if n == 3
+                    else f"{prefix} with {words[0]} or {words[1]}"
+                )
+    return prompts
+
+
+# _load_font implemented above
+
+
+def _wrap_text_to_width(
+    draw: ImageDraw.ImageDraw, text: str, font: Any, max_width: int
+) -> list[str]:
+    """Greedy word-wrapping that ensures each line fits within `max_width`."""
+    words = text.split()
+    if not words:
+        return [""]
+
+    lines: list[str] = []
+    current: list[str] = []
+    for word in words:
+        candidate = (" ".join(current + [word])).strip()
+        left, top, right, bottom = draw.textbbox((0, 0), candidate, font=font)
+        if right - left <= max_width or not current:
+            current.append(word)
+        else:
+            lines.append(" ".join(current))
+            current = [word]
+    if current:
+        lines.append(" ".join(current))
+    return lines
+
+
+# (fit helper removed; we use a single binary search to maximize font size)
+
+
+def _max_fit_font_size(
+    draw: ImageDraw.ImageDraw,
+    text: str,
+    image_width: int,
+    image_height: int,
+    margin_px: int,
+    min_size: int = 12,
+    max_size: int | None = None,
+    font_path: str | Path | None = None,
+) -> Any:
+    """Binary search the largest font that fits the canvas with wrapping.
+
+    This aggressively grows the font size to fill the available space
+    (subject to margins), then returns the largest working size.
+    """
+    if max_size is None:
+        max_size = min(image_width, image_height)
+
+    low = min_size
+    high = max_size
+    best_font: Any = _load_font(font_path, min_size)
+
+    while low <= high:
+        mid = (low + high) // 2
+        font = _load_font(font_path, mid)
+        lines = _wrap_text_to_width(draw, text, font, image_width - 2 * margin_px)
+        # measure
+        max_line_width = 0
+        line_heights: list[int] = []
+        for line in lines:
+            left, top, right, bottom = draw.textbbox((0, 0), line, font=font)
+            max_line_width = max(max_line_width, int(right - left))
+            line_heights.append(int(bottom - top))
+        avg_line_height = (
+            int(sum(line_heights) / len(line_heights)) if line_heights else mid
+        )
+        line_spacing = max(1, avg_line_height // 4)
+        total_height = sum(line_heights) + (len(lines) - 1) * line_spacing
+
+        fits = (
+            max_line_width <= image_width - 2 * margin_px
+            and total_height <= image_height - 2 * margin_px
+        )
+        if fits:
+            best_font = font
+            low = mid + 2
+        else:
+            high = mid - 2
+
+    return best_font
+
+
+def save_prompt_images(
+    prompts: Sequence[str] | Iterable[str],
+    output_dir: str | Path,
+    image_size: tuple[int, int] = (512, 512),
+    margin_px: int = 16,
+    font_path: str | Path | None = None,
+    font_size: int | None = None,
+    text_color: tuple[int, int, int] = (0, 0, 0),
+    background_color: tuple[int, int, int] = (255, 255, 255),
+) -> list[Path]:
+    """
+    Render each prompt as centered text on a white background and save as PNG.
+
+    Args:
+        prompts: Sequence of prompt strings.
+        output_dir: Directory to write images into (created if missing).
+        image_size: (width, height) for output images.
+        margin_px: Padding inside the canvas for text layout.
+        font_path: Optional path to a .ttf/.otf font.
+        font_size: Optional explicit font size; if None, will auto-fit.
+        text_color: RGB color for text.
+        background_color: RGB background color.
+
+    Returns:
+        List of file Paths written.
+    """
+    output_path = Path(output_dir)
+    output_path.mkdir(parents=True, exist_ok=True)
+
+    written_paths: list[Path] = []
+    used_names: set[str] = set()
+    width, height = image_size
+
+    for idx, raw_prompt in enumerate(prompts):
+        prompt = str(raw_prompt).strip()
+        if not prompt:
+            continue
+
+        image = Image.new("RGB", (width, height), color=background_color)
+        draw = ImageDraw.Draw(image)
+
+        if font_size is not None:
+            font = _load_font(font_path, font_size)
+        else:
+            # Aggressively maximize font size within margins.
+            max_size = min(width, height)
+            font = _max_fit_font_size(
+                draw,
+                prompt,
+                width,
+                height,
+                margin_px,
+                min_size=12,
+                max_size=max_size,
+                font_path=font_path,
+            )
+
+        lines = _wrap_text_to_width(draw, prompt, font, width - 2 * margin_px)
+
+        # Single drawing path: measure, center, render
+        line_bboxes = [draw.textbbox((0, 0), line, font=font) for line in lines]
+        line_heights = [int(b[3] - b[1]) for b in line_bboxes]
+        max_line_width = max((b[2] - b[0]) for b in line_bboxes) if line_bboxes else 0
+        avg_line_height = (
+            int(sum(line_heights) / len(line_heights)) if line_heights else 0
+        )
+        line_spacing = max(1, avg_line_height // 4) if avg_line_height else 8
+        total_text_height = sum(line_heights) + (len(lines) - 1) * line_spacing
+
+        y_start = (height - total_text_height) // 2
+        cursor_y = y_start
+        for i, line in enumerate(lines):
+            bbox = draw.textbbox((0, 0), line, font=font)
+            line_width = int(bbox[2] - bbox[0])
+            x = (width - line_width) // 2
+            draw.text((x, cursor_y), line, font=font, fill=text_color)
+            cursor_y += line_heights[i]
+            if i < len(lines) - 1:
+                cursor_y += line_spacing
+
+        # Build a deterministic, safe filename
+        slug = _slugify(prompt)
+        base_name = slug if slug else f"prompt_{idx:04d}"
+        name = base_name
+        suffix_counter = 1
+        while name in used_names:
+            name = f"{base_name}_{suffix_counter}"
+            suffix_counter += 1
+        used_names.add(name)
+
+        out_path = output_path / f"{name}.png"
+        image.save(out_path, format="PNG")
+        written_paths.append(out_path)
+
+    return written_paths
+
+
+def _slugify(text: str, max_length: int = 80) -> str:
+    """Create a filesystem-friendly slug from text."""
+    import re
+
+    # Lowercase, replace whitespace with underscores, strip quotes, keep alnum and _-
+    cleaned = text.lower().strip()
+    cleaned = cleaned.replace("'", "").replace('"', "")
+    cleaned = re.sub(r"\s+", "_", cleaned)
+    cleaned = re.sub(r"[^a-z0-9_\-]", "", cleaned)
+    return cleaned[:max_length].strip("_-")
+
+
+if __name__ == "__main__":
+    # Generate prompts and render images
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Render prompts as text images")
+    parser.add_argument(
+        "output_dir",
+        nargs="?",
+        default="dev/yes-no-maybe-vision/images",
+        help="Directory to write images (default: dev/yes-no-maybe-images)",
+    )
+    parser.add_argument(
+        "--size", type=int, default=None, help="Square px (overrides width/height)"
+    )
+    parser.add_argument(
+        "--width", type=int, default=None, help="Width px (default 256)"
+    )
+    parser.add_argument(
+        "--height", type=int, default=None, help="Height px (default 256)"
+    )
+    parser.add_argument("--margin", type=int, default=16, help="Margin px (default 16)")
+    parser.add_argument(
+        "--font-path", type=str, default=None, help="Path to .ttf/.otf font"
+    )
+    args = parser.parse_args()
+
+    output_dir = Path(args.output_dir)
+    if args.size is not None:
+        width = height = int(args.size)
+    else:
+        width = int(args.width) if args.width is not None else 256
+        height = int(args.height) if args.height is not None else 256
+    margin = int(args.margin)
+
+    saved = save_prompt_images(
+        generate_yes_no_maybe_prompts(),
+        output_dir,
+        image_size=(width, height),
+        margin_px=margin,
+        font_path=args.font_path,
+    )
+    print(
+        f"Wrote {len(saved)} images to {output_dir.resolve()} at size {width}x{height}"
+    )
diff --git a/dev/yes-no-maybe-vision/train.ipynb b/dev/yes-no-maybe-vision/train.ipynb
new file mode 100644
index 000000000..46d29c726
--- /dev/null
+++ b/dev/yes-no-maybe-vision/train.ipynb
@@ -0,0 +1,126 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%html\n",
+    "<style>\n",
+    ".cell-output-ipywidget-background {\n",
+    "    background-color: transparent !important;\n",
+    "}\n",
+    ":root {\n",
+    "    --jp-widgets-color: var(--vscode-editor-foreground);\n",
+    "    --jp-widgets-font-size: var(--vscode-editor-font-size);\n",
+    "}  \n",
+    "</style>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import openai\n",
+    "from dotenv import load_dotenv\n",
+    "from generate_images import generate_yes_no_maybe_prompts, save_prompt_images\n",
+    "\n",
+    "import art\n",
+    "from art.local import LocalBackend\n",
+    "\n",
+    "load_dotenv()\n",
+    "\n",
+    "backend = LocalBackend()\n",
+    "model = art.TrainableModel(\n",
+    "    name=\"009\",\n",
+    "    project=\"yes-no-maybe-vision\",\n",
+    "    base_model=\"Qwen/Qwen2.5-VL-7B-Instruct\",\n",
+    ")\n",
+    "await model.register(backend)\n",
+    "\n",
+    "\n",
+    "async def rollout(client: openai.AsyncOpenAI, image_path: str) -> art.Trajectory:\n",
+    "    messages: art.Messages = [\n",
+    "        {\n",
+    "            \"role\": \"user\",\n",
+    "            \"content\": [{\"type\": \"image_url\", \"image_url\": {\"url\": image_path}}],\n",
+    "        }\n",
+    "    ]\n",
+    "    chat_completion = await client.chat.completions.create(\n",
+    "        model=model.name, messages=messages, max_tokens=100, timeout=100\n",
+    "    )\n",
+    "    choice = chat_completion.choices[0]\n",
+    "    content = choice.message.content\n",
+    "    assert isinstance(content, str)\n",
+    "    if content == \"yes\":\n",
+    "        reward = 0.5\n",
+    "    elif content == \"no\":\n",
+    "        reward = 0.75\n",
+    "    elif content == \"maybe\":\n",
+    "        reward = 1.0\n",
+    "    else:\n",
+    "        reward = 0.0\n",
+    "    return art.Trajectory(messages_and_choices=[*messages, choice], reward=reward)\n",
+    "\n",
+    "\n",
+    "image_paths = save_prompt_images(\n",
+    "    generate_yes_no_maybe_prompts(),\n",
+    "    \"/tmp/yes-no-maybe-vision/images\",\n",
+    "    image_size=(256, 256),\n",
+    "    margin_px=16,\n",
+    "    font_path=None,\n",
+    ")\n",
+    "\n",
+    "\n",
+    "openai_client = model.openai_client()\n",
+    "for _ in range(await model.get_step(), 1_000):\n",
+    "    train_groups = await art.gather_trajectory_groups(\n",
+    "        (\n",
+    "            art.TrajectoryGroup(\n",
+    "                rollout(openai_client, image_path.as_uri()) for _ in range(32)\n",
+    "            )\n",
+    "            for image_path in image_paths\n",
+    "        )\n",
+    "    )\n",
+    "    await model.train(\n",
+    "        train_groups,\n",
+    "        config=art.TrainConfig(learning_rate=1e-4),\n",
+    "    )"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/art/dev/get_model_config.py b/src/art/dev/get_model_config.py
index 7499839d4..1b3a43de6 100644
--- a/src/art/dev/get_model_config.py
+++ b/src/art/dev/get_model_config.py
@@ -34,6 +34,7 @@ def get_model_config(
         init_args.pop("max_lora_rank")
         init_args.pop("use_async")
     engine_args = EngineArgs(
+        allowed_local_media_path="/tmp",
         disable_log_requests=True,
         enable_sleep_mode=enable_sleep_mode,
         generation_config="vllm",
diff --git a/src/art/local/backend.py b/src/art/local/backend.py
index a470cbb75..13a906b44 100644
--- a/src/art/local/backend.py
+++ b/src/art/local/backend.py
@@ -15,7 +15,8 @@
 import weave
 from openai import AsyncOpenAI
 from tqdm import auto as tqdm
-from transformers.models.auto.tokenization_auto import AutoTokenizer
+from transformers import AutoImageProcessor, AutoTokenizer
+from transformers.image_processing_utils import BaseImageProcessor
 from transformers.tokenization_utils_base import PreTrainedTokenizerBase
 from typing_extensions import Self
 from wandb.sdk.wandb_run import Run
@@ -80,7 +81,8 @@ def __init__(self, *, in_process: bool = False, path: str | None = None) -> None
 
         # Other initialization
         self._services: dict[str, ModelService] = {}
-        self._tokenizers: dict[str, "PreTrainedTokenizerBase"] = {}
+        self._tokenizers: dict[str, PreTrainedTokenizerBase] = {}
+        self._image_processors: dict[str, BaseImageProcessor | None] = {}
         self._wandb_runs: dict[str, Run] = {}
         self._weave_clients: dict[str, WeaveClient] = {}
 
@@ -181,6 +183,13 @@ def _get_packed_tensors(
             self._tokenizers[model.base_model] = AutoTokenizer.from_pretrained(
                 model.base_model
             )
+        if model.base_model not in self._image_processors:
+            try:
+                self._image_processors[model.base_model] = (
+                    AutoImageProcessor.from_pretrained(model.base_model, use_fast=True)
+                )
+            except Exception:
+                self._image_processors[model.base_model] = None
         tokenizer = self._tokenizers[model.base_model]
         tokenized_results = list(
             tokenize_trajectory_groups(
@@ -188,6 +197,7 @@ def _get_packed_tensors(
                 trajectory_groups,
                 allow_training_without_logprobs,
                 scale_rewards,
+                image_processor=self._image_processors[model.base_model],
             )
         )
         if not tokenized_results:
@@ -488,9 +498,9 @@ async def _train_model(
             num_gradient_steps = int(
                 result.pop("num_gradient_steps", estimated_gradient_steps)
             )
-            assert (
-                num_gradient_steps == estimated_gradient_steps
-            ), f"num_gradient_steps {num_gradient_steps} != estimated_gradient_steps {estimated_gradient_steps}"
+            assert num_gradient_steps == estimated_gradient_steps, (
+                f"num_gradient_steps {num_gradient_steps} != estimated_gradient_steps {estimated_gradient_steps}"
+            )
             results.append(result)
             yield {**result, "num_gradient_steps": num_gradient_steps}
             pbar.update(1)
diff --git a/src/art/preprocessing/pack.py b/src/art/preprocessing/pack.py
index fb1054d02..0e99176af 100644
--- a/src/art/preprocessing/pack.py
+++ b/src/art/preprocessing/pack.py
@@ -1,9 +1,10 @@
 import os
 import random
 import time
+from typing import Any, cast
 
 import torch
-from typing_extensions import TypedDict, Unpack
+from typing_extensions import NotRequired, TypedDict, Unpack
 
 from ..types import Verbosity
 from .tokenize import TokenizedResult
@@ -18,12 +19,16 @@ class PackedTensors(TypedDict):
     logprobs: torch.Tensor
     advantages: torch.Tensor
     weights: torch.Tensor
+    pixel_values: list[torch.Tensor | None]
+    image_grid_thw: list[torch.Tensor | None]
 
 
 class DiskPackedTensors(TypedDict):
     dir: str
     num_sequences: int
     sequence_length: int
+    pixel_values: NotRequired[tuple[int, list[int]]]
+    image_grid_thw: NotRequired[tuple[int, list[int]]]
 
 
 def packed_tensors_from_tokenized_results(
@@ -43,6 +48,8 @@ def packed_tensors_from_tokenized_results(
     logprobs: list[list[float]] = [[]]
     advantages: list[list[float]] = [[]]
     weights: list[list[float]] = [[]]
+    pixel_values: list[list[torch.Tensor]] = [[]]
+    image_grid_thw: list[list[torch.Tensor]] = [[]]
 
     for result in tokenized_results:
         if len(result.token_ids) > seq_len and not truncate_long_results:
@@ -71,6 +78,8 @@ def packed_tensors_from_tokenized_results(
             logprobs.append([])
             advantages.append([])
             weights.append([])
+            pixel_values.append([])
+            image_grid_thw.append([])
         group_id = random.randint(-(2**63), 2**63 - 1)
         if result.prompt_id in group_ids[-1]:
             result = result_without_prompt
@@ -85,6 +94,10 @@ def packed_tensors_from_tokenized_results(
         logprobs[-1].extend(result.logprobs)
         advantages[-1].extend([result.advantage] * len(result.token_ids))
         weights[-1].extend([result.weight] * len(result.token_ids))
+        if result.pixel_values is not None:
+            pixel_values[-1].append(result.pixel_values)
+        if result.image_grid_thw is not None:
+            image_grid_thw[-1].append(result.image_grid_thw)
         if truncate_long_results:
             token_ids[-1] = token_ids[-1][:seq_len]
             group_ids[-1] = group_ids[-1][:seq_len]
@@ -105,6 +118,8 @@ def packed_tensors_from_tokenized_results(
     logprobs = [logprobs[i] for i in permutation]
     advantages = [advantages[i] for i in permutation]
     weights = [weights[i] for i in permutation]
+    pixel_values = [pixel_values[i] for i in permutation]
+    image_grid_thw = [image_grid_thw[i] for i in permutation]
 
     def pad(values: list[list], pad_value) -> list[list]:
         max_len = seq_len
@@ -150,12 +165,18 @@ def pad(values: list[list], pad_value) -> list[list]:
         "logprobs": torch.tensor(pad(logprobs, float("nan"))),
         "advantages": advantages_tensor,
         "weights": weights_tensor,
+        "pixel_values": [
+            torch.concat(tensors) if tensors else None for tensors in pixel_values
+        ],
+        "image_grid_thw": [
+            torch.concat(tensors) if tensors else None for tensors in image_grid_thw
+        ],
     }
 
 
 def packed_tensors_from_dir(**kwargs: Unpack[DiskPackedTensors]) -> PackedTensors:
     os.makedirs(kwargs["dir"], exist_ok=True)
-    return {
+    packed_tensors = {
         key: torch.from_file(
             f"{kwargs['dir']}/{key}.pt",
             shared=True,
@@ -172,7 +193,33 @@ def packed_tensors_from_dir(**kwargs: Unpack[DiskPackedTensors]) -> PackedTensor
             "advantages": torch.float32,
             "weights": torch.float32,
         }.items()
-    }  # type: ignore
+    }
+    _add_tensor_list(packed_tensors, kwargs, "pixel_values", torch.float32)
+    _add_tensor_list(packed_tensors, kwargs, "image_grid_thw", torch.long)
+    return cast(PackedTensors, packed_tensors)
+
+
+def _add_tensor_list(
+    packed_tensors: dict[str, Any],
+    disk_packed_tensors: DiskPackedTensors,
+    key: str,
+    dtype: torch.dtype,
+) -> None:
+    if info := disk_packed_tensors.get(key):
+        packed_tensors[key] = []
+        inner_dim, offsets = cast(tuple[int, list[int]], info)
+        packed_pixel_values = torch.from_file(
+            f"{disk_packed_tensors['dir']}/{key}.pt",
+            shared=True,
+            size=offsets[-1] * inner_dim,
+            dtype=dtype,
+        ).view(-1, inner_dim)
+        for start, end in zip(offsets[:-1], offsets[1:]):
+            packed_tensors[key].append(
+                packed_pixel_values[start:end] if start < end else None
+            )
+    else:
+        packed_tensors[key] = [None] * disk_packed_tensors["num_sequences"]
 
 
 def packed_tensors_to_dir(tensors: PackedTensors, dir: str) -> DiskPackedTensors:
@@ -182,11 +229,36 @@ def packed_tensors_to_dir(tensors: PackedTensors, dir: str) -> DiskPackedTensors
         "num_sequences": tensors["tokens"].shape[0],
         "sequence_length": tensors["tokens"].shape[1],
     }
+    if info := _get_tensor_list_info(tensors["pixel_values"]):
+        disk_packed_tensors["pixel_values"] = info
+    if info := _get_tensor_list_info(tensors["image_grid_thw"]):
+        disk_packed_tensors["image_grid_thw"] = info
     for key, tensor in packed_tensors_from_dir(**disk_packed_tensors).items():
-        tensor.copy_(tensors[key])  # type: ignore
+        if isinstance(tensor, list):
+            for i, t in enumerate(tensor):
+                if t is not None:
+                    t.copy_(tensors[key][i])
+        else:
+            tensor.copy_(tensors[key])  # type: ignore
     return disk_packed_tensors
 
 
+def _get_tensor_list_info(
+    tensors: list[torch.Tensor | None],
+) -> tuple[int, list[int]] | None:
+    inner_dims = {tensor.shape[1] for tensor in tensors if tensor is not None}
+    if len(inner_dims) == 0:
+        return None
+    assert len(inner_dims) == 1, f"Inner dimensions of {tensors} are not the same"
+    offsets = [0]
+    for tensor in tensors:
+        if tensor is not None:
+            offsets.append(offsets[-1] + tensor.shape[0])
+        else:
+            offsets.append(offsets[-1])
+    return inner_dims.pop(), offsets
+
+
 def plot_packed_tensors(
     packed_tensors: PackedTensors, output_dir: str | None = None
 ) -> None:
diff --git a/src/art/preprocessing/tokenize.py b/src/art/preprocessing/tokenize.py
index c16c1348e..70dbd766a 100644
--- a/src/art/preprocessing/tokenize.py
+++ b/src/art/preprocessing/tokenize.py
@@ -4,6 +4,9 @@
 from itertools import takewhile
 from typing import Generator, cast
 
+import torch
+from PIL import Image
+from transformers.image_processing_utils import BaseImageProcessor
 from transformers.tokenization_utils_base import PreTrainedTokenizerBase
 
 from ..trajectories import History, TrajectoryGroup, get_messages
@@ -18,6 +21,8 @@ class TokenizedResult:
     input_pos: list[int]
     assistant_mask: list[int]
     logprobs: list[float]
+    pixel_values: torch.Tensor | None
+    image_grid_thw: torch.Tensor | None
     weight: float = 0.0
     prompt_id: int = 0
     prompt_length: int = 0
@@ -31,6 +36,8 @@ def without_prompt(self) -> "TokenizedResult":
             input_pos=self.input_pos[self.prompt_length :],
             assistant_mask=self.assistant_mask[self.prompt_length :],
             logprobs=self.logprobs[self.prompt_length :],
+            pixel_values=None,
+            image_grid_thw=None,
             weight=self.weight,
             prompt_id=self.prompt_id,
             prompt_length=0,
@@ -43,6 +50,7 @@ def tokenize_trajectory_groups(
     allow_training_without_logprobs: bool,
     scale_rewards: bool,
     shuffle_group_trajectories: bool = True,
+    image_processor: BaseImageProcessor | None = None,
 ) -> Generator["TokenizedResult", None, None]:
     for group in trajectory_groups:
         if not group:
@@ -72,6 +80,7 @@ def tokenize_trajectory_groups(
             ]:
                 if result := tokenize_trajectory(
                     tokenizer,
+                    image_processor,
                     history,
                     advantage,
                     allow_training_without_logprobs,
@@ -108,6 +117,7 @@ def tokenize_trajectory_groups(
 
 def tokenize_trajectory(
     tokenizer: "PreTrainedTokenizerBase",
+    image_processor: BaseImageProcessor | None,
     history: History,
     advantage: float,
     allow_training_without_logprobs: bool,
@@ -117,15 +127,15 @@ def tokenize_trajectory(
     """
     # Find the index of the last assistant message
     last_assistant_index = -1
-    for i, message_or_choice in enumerate(history.messages_and_choices):
+    for i, message in enumerate(history.messages_and_choices):
         if (
-            isinstance(message_or_choice, dict)
-            and message_or_choice["role"] == "assistant"
+            isinstance(message, dict)
+            and message["role"] == "assistant"
             and allow_training_without_logprobs
         ):
             last_assistant_index = i
-        elif not isinstance(message_or_choice, dict) and (
-            message_or_choice.logprobs or allow_training_without_logprobs
+        elif not isinstance(message, dict) and (
+            message.logprobs or allow_training_without_logprobs
         ):
             last_assistant_index = i
     # If there are no trainable assistant messages, return None
@@ -175,16 +185,13 @@ def tokenize_trajectory(
     )
     assistant_mask: list[int] = [0] * len(token_ids)
     logprobs = [float("nan")] * len(token_ids)
-    for message_or_choice in messages_and_choices:
-        if (
-            isinstance(message_or_choice, dict)
-            and not message_or_choice["role"] == "assistant"
-        ):
+    for message in messages_and_choices:
+        if isinstance(message, dict) and not message["role"] == "assistant":
             continue
         start = token_ids.index(sentinal_token_id)
         end = start + 1
-        if isinstance(message_or_choice, dict):
-            content = message_or_choice.get("content")
+        if isinstance(message, dict):
+            content = message.get("content")
             assert isinstance(content, str)
             content_token_ids = tokenizer.encode(
                 content,
@@ -194,7 +201,7 @@ def tokenize_trajectory(
             logprobs[start:end] = [float("nan")] * len(content_token_ids)
             assistant_mask[start:end] = [1] * len(content_token_ids)
         else:
-            choice = message_or_choice
+            choice = message
             assert choice.logprobs or allow_training_without_logprobs, (
                 "Chat completion choices must have logprobs"
             )
@@ -226,6 +233,47 @@ def tokenize_trajectory(
                 token_logprob.logprob for token_logprob in token_logprobs
             )
             assistant_mask[start:end] = [1] * len(token_logprobs)
+    if image_processor:
+        images: list[Image.Image] = []
+        for message in messages_and_choices:
+            if (
+                isinstance(message, dict)
+                and message["role"] == "user"
+                and isinstance(message["content"], (list, tuple))
+            ):
+                for content in message["content"]:
+                    if content["type"] == "image_url":
+                        image_url = content["image_url"]["url"].removeprefix("file://")
+                        images.append(Image.open(image_url))
+        image_token_id = cast(
+            int,
+            getattr(image_processor, "image_token_id", None)
+            or tokenizer.convert_tokens_to_ids(  # type: ignore
+                getattr(image_processor, "image_token", "<|image_pad|>")
+            ),
+        )
+        if images:
+            result = image_processor(images=images)
+            offset = 0
+            for num_image_tokens in (
+                image_grid_thw.prod().item()
+                // (getattr(image_processor, "merge_size", 1) ** 2)
+                for image_grid_thw in result["image_grid_thw"]
+            ):
+                start = token_ids.index(image_token_id, offset)
+                offset = start + num_image_tokens
+                end = start + 1
+                token_ids[start:end] = [image_token_id] * num_image_tokens
+                logprobs[start:end] = [float("nan")] * num_image_tokens
+                assistant_mask[start:end] = [0] * num_image_tokens
+            pixel_values = result["pixel_values"]
+            image_grid_thw = result["image_grid_thw"]
+        else:
+            pixel_values = None
+            image_grid_thw = None
+    else:
+        pixel_values = None
+        image_grid_thw = None
     return TokenizedResult(
         advantage=advantage,
         chat=chat,
@@ -234,4 +282,6 @@ def tokenize_trajectory(
         input_pos=list(range(len(token_ids))),
         assistant_mask=assistant_mask,
         logprobs=logprobs,
+        pixel_values=pixel_values,
+        image_grid_thw=image_grid_thw,
     )
diff --git a/src/art/transformers/patches.py b/src/art/transformers/patches.py
index 97fc9fb71..6b16424fe 100644
--- a/src/art/transformers/patches.py
+++ b/src/art/transformers/patches.py
@@ -14,12 +14,12 @@
 def _patched_preprocess_mask_arguments(
     config: PretrainedConfig,
     input_embeds: torch.Tensor,
-    attention_mask: Optional[Union[torch.Tensor, BlockMask]],
+    attention_mask: Optional[Union[torch.Tensor, "BlockMask"]],
     cache_position: torch.Tensor,
     past_key_values: Optional[Cache],
     position_ids: Optional[torch.Tensor],
     layer_idx: Optional[int],
-) -> tuple[bool, Optional[Union[torch.Tensor, BlockMask]], int, int]:
+) -> tuple[bool, Optional[Union[torch.Tensor, "BlockMask"]], int, int]:
     if position_ids is not None and len(position_ids.shape) == 3:
         position_ids = position_ids[0]
     return _preprocess_mask_arguments(
diff --git a/src/art/unsloth/decoupled_service.py b/src/art/unsloth/decoupled_service.py
index 69f0adddd..5ea3d082a 100644
--- a/src/art/unsloth/decoupled_service.py
+++ b/src/art/unsloth/decoupled_service.py
@@ -149,6 +149,12 @@ async def train(
                                         for k, v in packed_tensors.items()
                                         if isinstance(v, torch.Tensor)
                                     },
+                                    pixel_values=packed_tensors["pixel_values"][
+                                        _offset : _offset + 1
+                                    ],
+                                    image_grid_thw=packed_tensors["image_grid_thw"][
+                                        _offset : _offset + 1
+                                    ],
                                     config=config,
                                     _config=_config,
                                     return_new_logprobs=True,
@@ -169,6 +175,16 @@ async def train(
                             for k, v in packed_tensors.items()
                             if isinstance(v, torch.Tensor)
                         },
+                        pixel_values=(
+                            [None]
+                            if warmup
+                            else packed_tensors["pixel_values"][offset : offset + 1]
+                        ),
+                        image_grid_thw=(
+                            [None]
+                            if warmup
+                            else packed_tensors["image_grid_thw"][offset : offset + 1]
+                        ),
                         config=(
                             config.model_copy(
                                 update={"lr": 1e-9, "beta": 0.0, "kl_coef": 0.0}
diff --git a/src/art/unsloth/service.py b/src/art/unsloth/service.py
index 802a1a0b1..621bc9b0e 100644
--- a/src/art/unsloth/service.py
+++ b/src/art/unsloth/service.py
@@ -118,6 +118,12 @@ async def train(
                                             for k, v in packed_tensors.items()
                                             if isinstance(v, torch.Tensor)
                                         },
+                                        pixel_values=packed_tensors["pixel_values"][
+                                            _offset : _offset + 1
+                                        ],
+                                        image_grid_thw=packed_tensors["image_grid_thw"][
+                                            _offset : _offset + 1
+                                        ],
                                         config=config,
                                         _config=_config,
                                         return_new_logprobs=True,
@@ -140,6 +146,18 @@ async def train(
                                 for k, v in packed_tensors.items()
                                 if isinstance(v, torch.Tensor)
                             },
+                            pixel_values=(
+                                [None]
+                                if warmup
+                                else packed_tensors["pixel_values"][offset : offset + 1]
+                            ),
+                            image_grid_thw=(
+                                [None]
+                                if warmup
+                                else packed_tensors["image_grid_thw"][
+                                    offset : offset + 1
+                                ]
+                            ),
                             config=(
                                 config.model_copy(
                                     update={"lr": 1e-9, "beta": 0.0, "kl_coef": 0.0}
@@ -200,12 +218,21 @@ async def train(
 
     def _set_lora(self, lora_path: str) -> None:
         """Sets the LoRA adapter with ID 1 in the vLLM engine."""
-        lora_request: "LoRARequest" = self.state.peft_model.load_lora(
-            lora_path,
-            load_tensors=True,
-        )  # type: ignore
-        lora_request.lora_int_id = 1
-        lora_request.lora_name = self.model_name
-        lora_request.lora_path = lora_path
+        from vllm.lora.request import LoRARequest
+
+        if hasattr(self.state.peft_model, "load_lora"):
+            lora_request: LoRARequest = self.state.peft_model.load_lora(
+                lora_path,
+                load_tensors=True,
+            )  # type: ignore
+            lora_request.lora_int_id = 1
+            lora_request.lora_name = self.model_name
+            lora_request.lora_path = lora_path
+        else:
+            lora_request = LoRARequest(
+                lora_name=self.model_name,
+                lora_int_id=1,
+                lora_path=lora_path,
+            )
         self.state.vllm.async_engine.engine.remove_lora(1)
         self.state.vllm.async_engine.engine.add_lora(lora_request)  # type: ignore
diff --git a/src/art/unsloth/state.py b/src/art/unsloth/state.py
index d8d15288e..b32f646db 100644
--- a/src/art/unsloth/state.py
+++ b/src/art/unsloth/state.py
@@ -86,13 +86,15 @@ def _from_engine_args(
         torch.cuda.empty_cache()
         self.vllm = vLLMState(self.model.vllm_engine, enable_sleep_mode)
         # Initialize PEFT model
-        self.peft_model = cast(
-            peft.peft_model.PeftModelForCausalLM,
-            unsloth.FastLanguageModel.get_peft_model(
-                self.model, **config.get("peft_args", {})
-            ),
-        )
-        self.lora_model = cast(peft.tuners.lora.LoraModel, self.peft_model.base_model)
+        if isinstance(self.model, peft.peft_model.PeftModelForCausalLM):
+            self.peft_model = self.model
+        else:
+            self.peft_model = cast(
+                peft.peft_model.PeftModelForCausalLM,
+                unsloth.FastLanguageModel.get_peft_model(
+                    self.model, **config.get("peft_args", {})
+                ),
+            )
         # Initialize trainer
         data = {"prompt": ""}
         self.trainer = GRPOTrainer(
diff --git a/src/art/unsloth/train.py b/src/art/unsloth/train.py
index 1e6fc3135..658efd0b2 100644
--- a/src/art/unsloth/train.py
+++ b/src/art/unsloth/train.py
@@ -69,6 +69,15 @@ def compute_loss(
                     # if param_group.get("weight_decay"):
                     #     param_group["weight_decay"] = config.weight_decay
 
+        if inputs["pixel_values"][0] is not None:
+            inputs["pixel_values"] = inputs["pixel_values"][0]  # type: ignore
+        else:
+            del inputs["pixel_values"]  # type: ignore
+        if inputs["image_grid_thw"][0] is not None:
+            inputs["image_grid_thw"] = inputs["image_grid_thw"][0]  # type: ignore
+        else:
+            del inputs["image_grid_thw"]  # type: ignore
+
         # Move tensors to the correct device
         inputs = {
             key: tensor.to(trainer.accelerator.device)  # type: ignore
@@ -109,11 +118,17 @@ def compute_loss(
             f"Sequence length ({seq_len}) must be evenly divisible by chunk size ({chunk_size})"
         )
         os.environ["UNSLOTH_RETURN_HIDDEN_STATES"] = "1"
+        forward_kwargs = {}
+        if "pixel_values" in inputs:
+            forward_kwargs["pixel_values"] = inputs["pixel_values"]  # type: ignore
+        if "image_grid_thw" in inputs:
+            forward_kwargs["image_grid_thw"] = inputs["image_grid_thw"]  # type: ignore
         new_logprobs, entropies = calculate_logprobs(
             dtype_for_autocasting,
             trainer,
             inputs["tokens"],
             attn_bias,
+            forward_kwargs,
             next_input_ids,
             lm_head_t,
             chunk_size=chunk_size,
@@ -129,6 +144,7 @@ def compute_loss(
                 trainer,
                 inputs["tokens"],
                 attn_bias,
+                forward_kwargs,
                 next_input_ids,
                 lm_head_t,
                 chunk_size=chunk_size,
@@ -297,6 +313,7 @@ def calculate_logprobs(
     trainer: "GRPOTrainer",
     input_ids: torch.Tensor,
     causal_mask: torch.Tensor,
+    forward_kwargs: dict[str, torch.Tensor],
     next_input_ids: torch.Tensor,
     lm_head_t: torch.Tensor,
     chunk_size: int,
@@ -319,7 +336,7 @@ def calculate_logprobs(
         torch.amp.autocast_mode.autocast(device_type="cuda", dtype=dtype_for_autocast),
     ):
         hidden_states = trainer.model(  # type: ignore
-            input_ids=input_ids, causal_mask=causal_mask
+            input_ids=input_ids, causal_mask=causal_mask, **forward_kwargs
         ).logits  # Shape [B, S, H]
     return _calculate_logprobs(lm_head_t, hidden_states, next_input_ids, chunk_size)
 
diff --git a/src/art/utils/trajectory_logging.py b/src/art/utils/trajectory_logging.py
index 85bb2b653..42b7fdab9 100644
--- a/src/art/utils/trajectory_logging.py
+++ b/src/art/utils/trajectory_logging.py
@@ -1,5 +1,5 @@
 import json
-from typing import Any, cast
+from typing import Any, Iterator, cast
 
 import yaml
 
@@ -73,6 +73,9 @@ def message_or_choice_to_dict(message_or_choice: MessageOrChoice) -> dict[str, A
         # item is a choice with logprobs, remove the logprobs
         item_dict.pop("logprobs")
 
+    if "content" in item_dict and isinstance(item_dict["content"], Iterator):
+        item_dict["content"] = list(item_dict["content"])  # type: ignore
+
     return dict(item_dict)
 
 
diff --git a/tests/unit/test_tokenize_trajectory_groups.ipynb b/tests/unit/test_tokenize_trajectory_groups.ipynb
index b90739eda..63ddc2eaf 100644
--- a/tests/unit/test_tokenize_trajectory_groups.ipynb
+++ b/tests/unit/test_tokenize_trajectory_groups.ipynb
@@ -20,7 +20,7 @@
     {
      "data": {
       "text/plain": [
-       "TokenizedResult(advantage=-1.0, chat='<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n<|im_start|>user\\nWhat is the capital of France?<|im_end|>\\n<|im_start|>assistant\\nLondon<|im_end|>\\n', tokens=['<|im_start|>', 'system', '\\n', 'You', ' are', ' Q', 'wen', ',', ' created', ' by', ' Alibaba', ' Cloud', '.', ' You', ' are', ' a', ' helpful', ' assistant', '.', '<|im_end|>', '\\n', '<|im_start|>', 'user', '\\n', 'What', ' is', ' the', ' capital', ' of', ' France', '?', '<|im_end|>', '\\n', '<|im_start|>', 'assistant', '\\n', 'London', '<|im_end|>', '\\n'], token_ids=[151644, 8948, 198, 2610, 525, 1207, 16948, 11, 3465, 553, 54364, 14817, 13, 1446, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 3838, 374, 279, 6722, 315, 9625, 30, 151645, 198, 151644, 77091, 198, 39572, 151645, 198], input_pos=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38], assistant_mask=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], logprobs=[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan], weight=1.0, prompt_id=0, prompt_length=36)"
+       "TokenizedResult(advantage=-1.0, chat='<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n<|im_start|>user\\nWhat is the capital of France?<|im_end|>\\n<|im_start|>assistant\\nLondon<|im_end|>\\n', tokens=['<|im_start|>', 'system', '\\n', 'You', ' are', ' Q', 'wen', ',', ' created', ' by', ' Alibaba', ' Cloud', '.', ' You', ' are', ' a', ' helpful', ' assistant', '.', '<|im_end|>', '\\n', '<|im_start|>', 'user', '\\n', 'What', ' is', ' the', ' capital', ' of', ' France', '?', '<|im_end|>', '\\n', '<|im_start|>', 'assistant', '\\n', 'London', '<|im_end|>', '\\n'], token_ids=[151644, 8948, 198, 2610, 525, 1207, 16948, 11, 3465, 553, 54364, 14817, 13, 1446, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 3838, 374, 279, 6722, 315, 9625, 30, 151645, 198, 151644, 77091, 198, 39572, 151645, 198], input_pos=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38], assistant_mask=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], logprobs=[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan], pixel_values=None, image_grid_thw=None, weight=1.0, prompt_id=0, prompt_length=36)"
       ]
      },
      "metadata": {},
@@ -29,7 +29,7 @@
     {
      "data": {
       "text/plain": [
-       "TokenizedResult(advantage=1.0, chat='<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n<|im_start|>user\\nWhat is the capital of France?<|im_end|>\\n<|im_start|>assistant\\nParis<|im_end|>\\n', tokens=['<|im_start|>', 'system', '\\n', 'You', ' are', ' Q', 'wen', ',', ' created', ' by', ' Alibaba', ' Cloud', '.', ' You', ' are', ' a', ' helpful', ' assistant', '.', '<|im_end|>', '\\n', '<|im_start|>', 'user', '\\n', 'What', ' is', ' the', ' capital', ' of', ' France', '?', '<|im_end|>', '\\n', '<|im_start|>', 'assistant', '\\n', 'Paris', '<|im_end|>', '\\n'], token_ids=[151644, 8948, 198, 2610, 525, 1207, 16948, 11, 3465, 553, 54364, 14817, 13, 1446, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 3838, 374, 279, 6722, 315, 9625, 30, 151645, 198, 151644, 77091, 198, 59604, 151645, 198], input_pos=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38], assistant_mask=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], logprobs=[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, -0.01, nan, nan], weight=1.0, prompt_id=0, prompt_length=36)"
+       "TokenizedResult(advantage=1.0, chat='<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n<|im_start|>user\\nWhat is the capital of France?<|im_end|>\\n<|im_start|>assistant\\nParis<|im_end|>\\n', tokens=['<|im_start|>', 'system', '\\n', 'You', ' are', ' Q', 'wen', ',', ' created', ' by', ' Alibaba', ' Cloud', '.', ' You', ' are', ' a', ' helpful', ' assistant', '.', '<|im_end|>', '\\n', '<|im_start|>', 'user', '\\n', 'What', ' is', ' the', ' capital', ' of', ' France', '?', '<|im_end|>', '\\n', '<|im_start|>', 'assistant', '\\n', 'Paris', '<|im_end|>', '\\n'], token_ids=[151644, 8948, 198, 2610, 525, 1207, 16948, 11, 3465, 553, 54364, 14817, 13, 1446, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 3838, 374, 279, 6722, 315, 9625, 30, 151645, 198, 151644, 77091, 198, 59604, 151645, 198], input_pos=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38], assistant_mask=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], logprobs=[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, -0.01, nan, nan], pixel_values=None, image_grid_thw=None, weight=1.0, prompt_id=0, prompt_length=36)"
       ]
      },
      "metadata": {},