diff --git a/.gitignore b/.gitignore
index 1b6d4c0..d6d110b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,7 @@ src-tauri/target/
 src-tauri/resources/embedded/
 .runtime-stage/
 releases/
+assets/
 src-tauri/gen/
 .env
 .env.local
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 816859b..ccb3fa4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,14 @@
 # Changelog
 
+## v0.6.0 - 2026-04-19
+
+- Renamed the local `compression/` package to `cache_compression/` so it no longer shadows Python 3.14's PEP 784 stdlib `compression` namespace package. Fixes a `ModuleNotFoundError: No module named 'compression._common'` surfacing on Windows with Python 3.14 when PyTorch's import chain reached into the shadowed package.
+- Made the My Models library RAM estimate use the actual on-disk size + KV cache heuristic instead of the catalog flagship's `estimatedMemoryGb`, so differently-sized variants of the same family no longer all render as the same ~76 GB value. Added a parallel compressed-cache estimate for the Compressed column.
+- Video diffusion models (HunyuanVideo, Mochi, Wan2.x, LTX-Video, CogVideo, etc.) are now tagged `modelType="video"` during discovery and kept out of the chat-oriented My Models list and chat picker. They continue to surface under the dedicated Video section.
+- Video-gen memory safety now includes the model footprint (with device-class fragmentation factors) in the safety verdict, preventing the 40-frame Wan 2.1 T2V 1.3B MPS crash on 64 GB Macs.
+- Hardened Windows staging: `scripts/stage-runtime.mjs` now clears read-only attributes and retries on transient EPERM/EBUSY during `.runtime-stage` cleanup, and skips the dev-mode tar archive that Tauri ignores anyway. `build.ps1` pre-clears stale staging and installs the project via `pip install -e ".[desktop,images]"` so strict validation has its required extras.
+- Bumped the application version to `0.6.0` across the npm, Python, and Tauri package metadata.
+
 ## v0.5.3 - 2026-04-18
 
 - Fixed the GitHub Actions release workflow to use the valid `includeUpdaterJson` input for `tauri-apps/tauri-action@v0.6.0`, removing the repeated `uploadUpdaterJson` warnings from release builds.
diff --git a/CLAUDE.md b/CLAUDE.md
index b2b4799..c69fb0e 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -7,7 +7,7 @@ ChaosEngineAI is a desktop AI inference app built with:
 - **Desktop shell**: Tauri (Rust) — `src-tauri/`
 - **Backend**: Python FastAPI sidecar — `backend_service/`
 - **Inference engines**: MLX (Apple Silicon), llama.cpp (GGUF), vLLM (CUDA)
-- **Cache strategies**: Pluggable compression via `compression/` registry
+- **Cache strategies**: Pluggable compression via `cache_compression/` registry
 
 ### Key Directories
 
@@ -19,7 +19,7 @@ ChaosEngineAI is a desktop AI inference app built with:
 | `backend_service/inference.py` | Core inference engine — model loading, binary routing, generation |
 | `backend_service/routes/` | API endpoints (14 route modules) |
 | `backend_service/helpers/` | System stats, settings, persistence, cache estimation |
-| `compression/` | Cache strategy registry + adapters (native, rotorquant, turboquant, chaosengine, triattention) |
+| `cache_compression/` | Cache strategy registry + adapters (native, rotorquant, turboquant, chaosengine, triattention). Renamed from `compression/` so it doesn't shadow Python 3.14's stdlib `compression` namespace package. |
 | `dflash/` | DFlash speculative decoding — draft model registry + availability detection |
 | `scripts/` | Build, install, and update scripts |
 | `tests/` | Python tests (pytest) |
@@ -57,10 +57,11 @@ Check for updates to external repos we build from or depend on:
 | llama.cpp (standard) | `ggml-org/llama.cpp` | `master` | `git -C ../llama.cpp fetch && git -C ../llama.cpp log HEAD..origin/master --oneline` |
 | llama-server-turbo | `TheTom/llama-cpp-turboquant` | `feature/turboquant-kv-cache` | `git ls-remote https://github.com/TheTom/llama-cpp-turboquant.git refs/heads/feature/turboquant-kv-cache` |
 | ChaosEngine | `cryptopoly/ChaosEngine` | `main` | `git -C vendor/ChaosEngine fetch && git -C vendor/ChaosEngine log HEAD..origin/main --oneline` |
-| dflash-mlx | `bstnxbt/dflash-mlx` | — | `.venv/bin/pip index versions dflash-mlx 2>/dev/null` |
+| dflash-mlx | `bstnxbt/dflash-mlx` | `main` pinned to commit `f825ffb2` (upstream deleted all tags April 2026) | `git ls-remote https://github.com/bstnxbt/dflash-mlx.git refs/heads/main` |
 | turboquant | `back2matching/turboquant` | — | `.venv/bin/pip index versions turboquant 2>/dev/null` |
-| turboquant-mlx | `sharpner/turboquant-mlx` | — | `.venv/bin/pip index versions turboquant-mlx 2>/dev/null` |
+| turboquant-mlx | `arozanov/turboquant-mlx` | — | `.venv/bin/pip index versions turboquant-mlx 2>/dev/null` |
 | turboquant-mlx-full | `helgklaizar/turboquant_mlx` | — | `.venv/bin/pip index versions turboquant-mlx-full 2>/dev/null` |
+| DDTree (ported algorithm) | `liranringel/ddtree` | `main` | `git ls-remote https://github.com/liranringel/ddtree.git HEAD` |
 
 ### 4. Cache Strategy Health
 - [ ] ChaosEngine `llama_cpp_cache_flags()` only emits standard types: `f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1`
@@ -81,7 +82,7 @@ Check for updates to external repos we build from or depend on:
 
 | Area | Test File(s) | Command |
 |------|-------------|---------|
-| Cache strategies (`compression/`) | `test_cache_strategies.py` | `pytest tests/test_cache_strategies.py -v` |
+| Cache strategies (`cache_compression/`) | `test_cache_strategies.py` | `pytest tests/test_cache_strategies.py -v` |
 | DFlash / speculative decoding | `test_dflash.py` | `pytest tests/test_dflash.py -v` |
 | Inference / llama.cpp / binary routing | `test_inference.py` | `pytest tests/test_inference.py -v` |
 | Setup routes / install endpoints | `test_setup_routes.py` | `pytest tests/test_setup_routes.py -v` |
diff --git a/README.md b/README.md
index bde8415..7066105 100644
--- a/README.md
+++ b/README.md
@@ -298,7 +298,7 @@ ChaosEngineAI is three cooperating layers:
 - **`src-tauri/`** — Tauri 2 Rust shell + bundled runtime.
 - **`backend_service/`** — Python service that owns model lifecycle, the warm pool, the OpenAI-compatible API, the benchmark runner, and speculative decoding (DFlash + DDTree).
 - **`backend_service/routes/`** — FastAPI routes for chat, prompts, compare mode, benchmarks, plugins, images, server controls, and settings.
-- **`compression/`** — Pluggable cache/compression strategy system. Ships with native f16 and optional adapters for [RotorQuant](https://github.com/scrya-com/rotorquant), [TriAttention](https://github.com/WeianMao/triattention), [TurboQuant](https://pypi.org/project/turboquant-mlx/), and [ChaosEngine](https://github.com/cryptopoly/ChaosEngine).
+- **`cache_compression/`** — Pluggable cache/compression strategy system. Ships with native f16 and optional adapters for [RotorQuant](https://github.com/scrya-com/rotorquant), [TriAttention](https://github.com/WeianMao/triattention), [TurboQuant](https://pypi.org/project/turboquant-mlx/), and [ChaosEngine](https://github.com/cryptopoly/ChaosEngine).
 - **`dflash/`** — DFlash speculative decoding integration: draft model registry, fuzzy matching for quantized variants, MLX and vLLM backend detection.
 
 ---
@@ -317,7 +317,7 @@ ChaosEngineAI uses a pluggable cache strategy system. Out of the box, models run
 
 Install optional backends into the backend runtime (`./.venv/bin/python3 -m pip install ...`), then restart ChaosEngineAI. TriAttention is Linux/CUDA only, the current PyPI `turboquant-mlx` package may still leave TurboQuant disabled in the current build, and ChaosEngine can now be bundled directly into desktop builds by checking out `vendor/ChaosEngine` (or setting `CHAOSENGINE_VENDOR_PATH`) before `npm run stage:runtime`. Source/dev installs can still use the local editable install from GitHub.
 
-The system is designed so new compression methods can be added as single-file adapters in `compression/` without touching any other code.
+The system is designed so new compression methods can be added as single-file adapters in `cache_compression/` without touching any other code.
 
 ---
 
@@ -390,7 +390,7 @@ ChaosEngineAI/
 ├── src-tauri/            Tauri Rust shell + bundled runtime
 ├── scripts/              Build, release + runtime staging scripts
 ├── backend_service/      Python backend (engine adapters + HTTP server)
-├── compression/          Pluggable cache/compression strategy adapters
+├── cache_compression/    Pluggable cache/compression strategy adapters
 ├── dflash/               DFlash/DDTree speculative decoding integration
 ├── vendor/ChaosEngine/   ChaosEngine compression (git submodule)
 ├── tests/                Backend integration tests
diff --git a/THIRD_PARTY_NOTICES.md b/THIRD_PARTY_NOTICES.md
index c8ce065..9618fb7 100644
--- a/THIRD_PARTY_NOTICES.md
+++ b/THIRD_PARTY_NOTICES.md
@@ -78,3 +78,17 @@ If installed by the user, each is subject to its own licence:
 
 These libraries are **not bundled** with ChaosEngineAI. They are
 optional pip dependencies that the user may install independently.
+
+---
+
+## Ported Algorithms
+
+### DDTree (Diffusion Draft Tree)
+
+- **Upstream:** <https://github.com/liranringel/ddtree>
+- **Licence:** MIT
+- **Port location:** `backend_service/ddtree.py`
+- **Usage:** The tree-building and tree-mask compilation logic is ported
+  to ChaosEngineAI's MLX runtime. The draft model bundle is reused from
+  DFlash. No upstream code is bundled verbatim; this is a re-implementation
+  of the published algorithm.
diff --git a/backend_service/app.py b/backend_service/app.py
index 7d2ccd5..69aec03 100644
--- a/backend_service/app.py
+++ b/backend_service/app.py
@@ -18,7 +18,12 @@
     ImageGenerationConfig,
     ImageRuntimeManager,
 )
-from backend_service.models import ImageGenerationRequest
+from backend_service.video_runtime import (
+    VideoGenerationConfig,
+    VideoRuntimeManager,
+    start_torch_warmup,
+)
+from backend_service.models import ImageGenerationRequest, VideoGenerationRequest
 from backend_service.routes import register_routes
 from backend_service.state import ChaosEngineState
 
@@ -35,6 +40,12 @@
     _find_image_output as _find_image_output_impl,
     _delete_image_output as _delete_image_output_impl,
 )
+from backend_service.helpers.video import (
+    _load_video_outputs as _load_video_outputs_impl,
+    _save_video_artifact as _save_video_artifact_impl,
+    _find_video_output as _find_video_output_impl,
+    _delete_video_output as _delete_video_output_impl,
+)
 from backend_service.helpers.settings import (
     DataLocation,
     _default_settings as _default_settings_impl,
@@ -55,9 +66,15 @@
 WORKSPACE_ROOT = Path(__file__).resolve().parents[1]
 APP_STARTED_AT = time.time()
 HF_SNAPSHOT_DOWNLOAD_HELPER = (
-    "import sys\n"
+    "import json, sys\n"
     "from huggingface_hub import snapshot_download\n"
-    "snapshot_download(repo_id=sys.argv[1], resume_download=True)\n"
+    "repo_id = sys.argv[1]\n"
+    "raw_allow = sys.argv[2] if len(sys.argv) > 2 else ''\n"
+    "allow_patterns = json.loads(raw_allow) if raw_allow else None\n"
+    "kwargs = {'repo_id': repo_id, 'resume_download': True}\n"
+    "if allow_patterns:\n"
+    "    kwargs['allow_patterns'] = allow_patterns\n"
+    "snapshot_download(**kwargs)\n"
 )
 DEFAULT_PORT = int(os.getenv("CHAOSENGINE_PORT", "8876"))
 DEFAULT_HOST = os.getenv("CHAOSENGINE_HOST", "127.0.0.1")
@@ -72,6 +89,7 @@
 CHAT_SESSIONS_PATH = DATA_LOCATION.chat_sessions_path
 DOCUMENTS_DIR = DATA_LOCATION.documents_dir
 IMAGE_OUTPUTS_DIR = DATA_LOCATION.image_outputs_dir
+VIDEO_OUTPUTS_DIR = DATA_LOCATION.video_outputs_dir
 MAX_DOC_SIZE_BYTES = 50 * 1024 * 1024  # 50 MB per file
 MAX_SESSION_DOCS_BYTES = 200 * 1024 * 1024  # 200 MB per session
 DOC_ALLOWED_EXTENSIONS = {
@@ -93,6 +111,7 @@
 EXEMPT_AUTH_PATHS = frozenset({
     "/api/health",
     "/api/auth/session",
+    "/api/system/gpu-status",
 })
 
 
@@ -133,20 +152,76 @@ def _save_chat_sessions(sessions: list[dict[str, Any]], path: Path = CHAT_SESSIO
     return _save_chat_sessions_impl(sessions, path)
 
 
+def _resolve_output_dir_override(raw: str, default: Path) -> Path:
+    """Return the user-chosen output directory, or the default.
+
+    Empty / whitespace-only strings restore the default. A non-empty value is
+    expanded (``~`` → home), resolved to an absolute path, and the directory is
+    created if missing. If creation fails (path is unwritable, on a missing
+    volume, etc.) we transparently fall back to ``default`` so generation never
+    crashes just because the user pointed at a stale Dropbox folder.
+    """
+    value = (raw or "").strip()
+    if not value:
+        return default
+    try:
+        candidate = Path(os.path.expanduser(value)).resolve()
+        candidate.mkdir(parents=True, exist_ok=True)
+        return candidate
+    except OSError:
+        return default
+
+
+def _current_image_outputs_dir() -> Path:
+    # The module-level ``IMAGE_OUTPUTS_DIR`` is the install-time default and
+    # the override target tests use to redirect output into a tempdir. Anything
+    # the user typed in Settings takes precedence — but only when actually set,
+    # so test patches still win when no setting is configured.
+    settings = _load_settings()
+    return _resolve_output_dir_override(
+        str(settings.get("imageOutputsDirectory") or ""),
+        IMAGE_OUTPUTS_DIR,
+    )
+
+
+def _current_video_outputs_dir() -> Path:
+    settings = _load_settings()
+    return _resolve_output_dir_override(
+        str(settings.get("videoOutputsDirectory") or ""),
+        VIDEO_OUTPUTS_DIR,
+    )
+
+
 def _load_image_outputs() -> list[dict[str, Any]]:
-    return _load_image_outputs_impl(IMAGE_OUTPUTS_DIR)
+    return _load_image_outputs_impl(_current_image_outputs_dir())
 
 
 def _save_image_artifact(artifact: dict[str, Any]) -> dict[str, Any]:
-    return _save_image_artifact_impl(artifact, IMAGE_OUTPUTS_DIR)
+    return _save_image_artifact_impl(artifact, _current_image_outputs_dir())
 
 
 def _find_image_output(artifact_id: str) -> dict[str, Any] | None:
-    return _find_image_output_impl(artifact_id, IMAGE_OUTPUTS_DIR)
+    return _find_image_output_impl(artifact_id, _current_image_outputs_dir())
 
 
 def _delete_image_output(artifact_id: str) -> bool:
-    return _delete_image_output_impl(artifact_id, IMAGE_OUTPUTS_DIR)
+    return _delete_image_output_impl(artifact_id, _current_image_outputs_dir())
+
+
+def _load_video_outputs() -> list[dict[str, Any]]:
+    return _load_video_outputs_impl(_current_video_outputs_dir())
+
+
+def _save_video_artifact(artifact: dict[str, Any]) -> dict[str, Any]:
+    return _save_video_artifact_impl(artifact, _current_video_outputs_dir())
+
+
+def _find_video_output(artifact_id: str) -> dict[str, Any] | None:
+    return _find_video_output_impl(artifact_id, _current_video_outputs_dir())
+
+
+def _delete_video_output(artifact_id: str) -> bool:
+    return _delete_video_output_impl(artifact_id, _current_video_outputs_dir())
 
 
 def compute_cache_preview(
@@ -196,6 +271,16 @@ def _resolve_api_token(explicit_token: str | None = None) -> str:
     return token or secrets.token_urlsafe(32)
 
 
+def _resolve_require_api_auth(settings: dict[str, Any]) -> bool:
+    # Env var wins — useful for CI / headless scripts that need to drop
+    # the bearer requirement without touching settings.json. Accepts any
+    # of "0", "false", "no", "off" (case-insensitive) to disable.
+    env_override = os.getenv("CHAOSENGINE_REQUIRE_AUTH")
+    if env_override is not None:
+        return env_override.strip().lower() not in {"0", "false", "no", "off", ""}
+    return bool(settings.get("requireApiAuth", True))
+
+
 def _is_loopback_host(host: str | None) -> bool:
     if not host:
         return False
@@ -228,7 +313,7 @@ def _hf_repo_from_link(link: str | None) -> str | None:
 
 
 def _get_cache_strategies() -> list[dict[str, Any]]:
-    from compression import registry
+    from cache_compression import registry
     return registry.available()
 
 
@@ -284,6 +369,74 @@ def _generate_image_artifacts(
     return artifacts, runtime_status
 
 
+def _generate_video_artifact(
+    request: VideoGenerationRequest,
+    variant: dict[str, Any],
+    runtime_manager: VideoRuntimeManager,
+) -> tuple[dict[str, Any], dict[str, Any]]:
+    """Run a single video generation and persist it to the outputs dir.
+
+    Returns ``(artifact_dict, runtime_status_dict)``. Unlike the image path,
+    there is no placeholder fallback — if the runtime isn't ready or the
+    generation fails, the caller sees the exception and surfaces a proper
+    HTTP error rather than a fake clip.
+    """
+    import logging
+    logger = logging.getLogger("chaosengine.video")
+    logger.info(
+        "Generating video: model=%s repo=%s size=%dx%d frames=%d steps=%d",
+        variant.get("name"),
+        variant.get("repo"),
+        request.width,
+        request.height,
+        request.numFrames,
+        request.steps,
+    )
+
+    video, runtime_status = runtime_manager.generate(
+        VideoGenerationConfig(
+            modelId=request.modelId,
+            modelName=str(variant["name"]),
+            repo=str(variant["repo"]),
+            prompt=request.prompt,
+            negativePrompt=request.negativePrompt or "",
+            width=request.width,
+            height=request.height,
+            numFrames=request.numFrames,
+            fps=request.fps,
+            steps=request.steps,
+            guidance=request.guidance,
+            seed=request.seed,
+        )
+    )
+
+    created_at = datetime.utcnow().replace(microsecond=0).isoformat() + "Z"
+    clip_duration = round(video.frameCount / max(1, video.fps), 3)
+    artifact = {
+        "artifactId": f"vid-{uuid.uuid4().hex[:12]}",
+        "modelId": request.modelId,
+        "modelName": variant["name"],
+        "prompt": request.prompt,
+        "negativePrompt": request.negativePrompt or "",
+        "width": video.width,
+        "height": video.height,
+        "numFrames": video.frameCount,
+        "fps": video.fps,
+        "steps": request.steps,
+        "guidance": request.guidance,
+        "seed": video.seed,
+        "createdAt": created_at,
+        "durationSeconds": video.durationSeconds,
+        "clipDurationSeconds": clip_duration,
+        "videoBytes": video.bytes,
+        "videoMimeType": video.mimeType,
+        "videoExtension": video.extension,
+        "runtimeLabel": video.runtimeLabel,
+        "runtimeNote": video.runtimeNote,
+    }
+    return _save_video_artifact(artifact), runtime_status
+
+
 def create_app(
     state: ChaosEngineState | None = None,
     api_token: str | None = None,
@@ -300,6 +453,13 @@ def create_app(
     app.state.chaosengine = state or ChaosEngineState(server_port=DEFAULT_PORT)
     app.state.chaosengine_api_token = _resolve_api_token(api_token)
     app.state.chaosengine_allowed_origins = frozenset(allowed_origins)
+    # Bearer-token enforcement toggle. Reads from (in order) env override,
+    # then saved settings, defaulting to True (keep the existing secure
+    # default). Mutated live by state.update_settings so the user doesn't
+    # need to restart the server to toggle it.
+    app.state.chaosengine_require_api_auth = _resolve_require_api_auth(
+        app.state.chaosengine.settings,
+    )
 
     # Shutdown hook: kill any running llama-server / MLX worker children
     # on backend exit. Runs on clean shutdown (uvicorn SIGTERM), Ctrl-C,
@@ -360,6 +520,7 @@ async def require_api_auth(request: Request, call_next):
             request.method == "OPTIONS"
             or path in EXEMPT_AUTH_PATHS
             or not (path.startswith("/api/") or path.startswith("/v1/"))
+            or not getattr(app.state, "chaosengine_require_api_auth", True)
         ):
             return await call_next(request)
 
@@ -399,6 +560,12 @@ async def log_requests(request, call_next):
         return response
 
     register_routes(app)
+
+    # Kick off a background torch import so the first Video Studio probe
+    # doesn't pay the 30-60s cold-disk cost on Windows. Failures are captured
+    # and surfaced by probe() itself.
+    start_torch_warmup()
+
     return app
 
 
diff --git a/backend_service/catalog/__init__.py b/backend_service/catalog/__init__.py
index e2e4740..39e7736 100644
--- a/backend_service/catalog/__init__.py
+++ b/backend_service/catalog/__init__.py
@@ -1,2 +1,3 @@
 from .text_models import MODEL_FAMILIES as MODEL_FAMILIES, CATALOG as CATALOG
 from .image_models import IMAGE_MODEL_FAMILIES as IMAGE_MODEL_FAMILIES, LATEST_IMAGE_TRACKED_SEEDS as LATEST_IMAGE_TRACKED_SEEDS
+from .video_models import VIDEO_MODEL_FAMILIES as VIDEO_MODEL_FAMILIES
diff --git a/backend_service/catalog/image_models.py b/backend_service/catalog/image_models.py
index 6951e83..3c926e9 100644
--- a/backend_service/catalog/image_models.py
+++ b/backend_service/catalog/image_models.py
@@ -28,6 +28,7 @@
                 "recommendedResolution": "1024x1024",
                 "note": "Fastest concepting option in the curated image catalog.",
                 "estimatedGenerationSeconds": 4.2,
+                "releaseDate": "2024-08",
             }
         ],
     },
@@ -55,6 +56,7 @@
                 "recommendedResolution": "1024x1024",
                 "note": "Quality-oriented generalist for the curated image lineup.",
                 "estimatedGenerationSeconds": 7.4,
+                "releaseDate": "2024-08",
             }
         ],
     },
@@ -82,6 +84,7 @@
                 "recommendedResolution": "1024x1024",
                 "note": "Latest Stability offering targeting a good quality-to-resource balance.",
                 "estimatedGenerationSeconds": 5.8,
+                "releaseDate": "2024-10",
             }
         ],
     },
@@ -109,6 +112,7 @@
                 "recommendedResolution": "1024x1024",
                 "note": "Fastest high-fidelity model in the curated set.",
                 "estimatedGenerationSeconds": 3.1,
+                "releaseDate": "2024-10",
             }
         ],
     },
@@ -136,6 +140,7 @@
                 "recommendedResolution": "1024x1024",
                 "note": "Widely adopted SDXL baseline with strong community and LoRA ecosystem.",
                 "estimatedGenerationSeconds": 7.4,
+                "releaseDate": "2023-07",
             }
         ],
     },
@@ -154,6 +159,7 @@
         "gated": False,
         "pipelineTag": "text-to-image",
         "updatedLabel": "Tracked latest",
+        "releaseDate": "2025-08",
     },
     {
         "repo": "Qwen/Qwen-Image-Edit",
@@ -167,6 +173,7 @@
         "gated": False,
         "pipelineTag": "image-to-image",
         "updatedLabel": "Tracked latest",
+        "releaseDate": "2025-08",
     },
     {
         "repo": "HiDream-ai/HiDream-I1-Full",
@@ -180,6 +187,7 @@
         "gated": False,
         "pipelineTag": "text-to-image",
         "updatedLabel": "Tracked latest",
+        "releaseDate": "2025-04",
     },
     {
         "repo": "zai-org/GLM-Image",
@@ -206,6 +214,7 @@
         "gated": False,
         "pipelineTag": "text-to-image",
         "updatedLabel": "Tracked latest",
+        "releaseDate": "2025-03",
     },
     {
         "repo": "Efficient-Large-Model/Sana_Sprint_1.6B_1024px_diffusers",
@@ -219,5 +228,6 @@
         "gated": False,
         "pipelineTag": "text-to-image",
         "updatedLabel": "Tracked latest",
+        "releaseDate": "2025-03",
     },
 ]
diff --git a/backend_service/catalog/text_models.py b/backend_service/catalog/text_models.py
index 68979d1..f350348 100644
--- a/backend_service/catalog/text_models.py
+++ b/backend_service/catalog/text_models.py
@@ -302,6 +302,7 @@
                 "contextWindow": "128K",
                 "launchMode": "direct",
                 "backend": "llama.cpp",
+                "releaseDate": "2025-07",
             },
             {
                 "id": "mistralai/Devstral-Small-2507",
@@ -317,6 +318,7 @@
                 "contextWindow": "128K",
                 "launchMode": "convert",
                 "backend": "mlx",
+                "releaseDate": "2025-07",
             },
         ],
         "readme": [
@@ -410,6 +412,7 @@
                 "contextWindow": "128K",
                 "launchMode": "convert",
                 "backend": "mlx",
+                "releaseDate": "2024-09",
             },
             {
                 "id": "Qwen/Qwen2.5-7B-Instruct",
@@ -425,6 +428,7 @@
                 "contextWindow": "128K",
                 "launchMode": "convert",
                 "backend": "mlx",
+                "releaseDate": "2024-09",
             },
             {
                 "id": "Qwen/Qwen2.5-Coder-7B-Instruct",
@@ -440,6 +444,7 @@
                 "contextWindow": "128K",
                 "launchMode": "convert",
                 "backend": "mlx",
+                "releaseDate": "2024-11",
             },
             {
                 "id": "Qwen/Qwen2.5-32B-Instruct",
@@ -455,6 +460,7 @@
                 "contextWindow": "128K",
                 "launchMode": "convert",
                 "backend": "mlx",
+                "releaseDate": "2024-09",
             },
         ],
         "readme": [
@@ -493,6 +499,7 @@
                 "contextWindow": "128K",
                 "launchMode": "convert",
                 "backend": "mlx",
+                "releaseDate": "2024-12",
             },
             {
                 "id": "mlx-community/Llama-3.3-70B-Instruct-4bit",
@@ -508,6 +515,7 @@
                 "contextWindow": "128K",
                 "launchMode": "direct",
                 "backend": "mlx",
+                "releaseDate": "2024-12",
             },
         ],
         "readme": [
@@ -546,6 +554,7 @@
                 "contextWindow": "16K",
                 "launchMode": "convert",
                 "backend": "mlx",
+                "releaseDate": "2024-12",
             },
             {
                 "id": "mlx-community/phi-4-4bit",
@@ -561,6 +570,7 @@
                 "contextWindow": "16K",
                 "launchMode": "direct",
                 "backend": "mlx",
+                "releaseDate": "2025-01",
             },
         ],
         "readme": [
@@ -599,6 +609,7 @@
                 "contextWindow": "128K",
                 "launchMode": "convert",
                 "backend": "mlx",
+                "releaseDate": "2024-07",
             },
         ],
         "readme": [
diff --git a/backend_service/catalog/video_models.py b/backend_service/catalog/video_models.py
new file mode 100644
index 0000000..33a8e7c
--- /dev/null
+++ b/backend_service/catalog/video_models.py
@@ -0,0 +1,257 @@
+"""Curated catalog of video generation models we plan to support.
+
+This module mirrors the shape of ``image_models.py`` so the frontend can reuse
+the same UI patterns (families -> variants, downloads, discover tab).
+
+Only the first-wave candidate engines live here today. The runtime is not
+wired yet — see ``backend_service/routes/video.py`` for the API surface and
+``VideoPlaceholderTab`` on the frontend for the current UX.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+VIDEO_MODEL_FAMILIES: list[dict[str, Any]] = [
+    {
+        "id": "ltx-video",
+        "name": "LTX-Video",
+        "provider": "Lightricks",
+        "headline": "Fast text-to-video model tuned for consumer hardware.",
+        "summary": "First target for local video generation. Short clips (2-5s) at 768x512 with solid motion quality.",
+        "updatedLabel": "Planned — first wave",
+        "badges": ["Fast", "Small", "Apache 2.0"],
+        "defaultVariantId": "Lightricks/LTX-Video",
+        "variants": [
+            {
+                "id": "Lightricks/LTX-Video",
+                "familyId": "ltx-video",
+                "name": "LTX-Video",
+                "provider": "Lightricks",
+                "repo": "Lightricks/LTX-Video",
+                "link": "https://huggingface.co/Lightricks/LTX-Video",
+                "runtime": "diffusers LTXPipeline (planned)",
+                "styleTags": ["general", "fast", "motion"],
+                "taskSupport": ["txt2video"],
+                "sizeGb": 2.0,
+                "recommendedResolution": "768x512",
+                "defaultDurationSeconds": 4.0,
+                "note": "Small, fast, Apache 2.0 — best starter pick for a local video runtime.",
+                "estimatedGenerationSeconds": 45.0,
+                "availableLocally": False,
+                "releaseDate": "2024-11",
+            }
+        ],
+    },
+    {
+        "id": "wan-2-1",
+        "name": "Wan 2.1",
+        "provider": "Alibaba",
+        "headline": "Smaller Wan variants — the 1.3B is the fastest starter pick for local video.",
+        "summary": "Wan 2.1 ships in a 1.3B size that fits on modest hardware and a 14B size for higher quality. Both use the same WanPipeline in diffusers.",
+        "updatedLabel": "Planned — first wave",
+        "badges": ["Small", "Fast", "Apache 2.0"],
+        "defaultVariantId": "Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
+        "variants": [
+            {
+                "id": "Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
+                "familyId": "wan-2-1",
+                "name": "Wan 2.1 T2V 1.3B",
+                "provider": "Alibaba",
+                # The -Diffusers mirror ships the standard diffusers layout
+                # (model_index.json, scheduler/, text_encoder/, transformer/,
+                # vae/, tokenizer/) — the base Wan-AI repo uses a native Wan
+                # format that WanPipeline.from_pretrained can't load.
+                "repo": "Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
+                "link": "https://huggingface.co/Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
+                "runtime": "diffusers WanPipeline",
+                "styleTags": ["general", "fast", "small"],
+                "taskSupport": ["txt2video"],
+                # ~16GB on disk — 1.3B is just the transformer. The repo also
+                # ships a UMT5-XXL text encoder (~11GB) and VAE/CLIP weights.
+                "sizeGb": 16.4,
+                "recommendedResolution": "832x480",
+                "defaultDurationSeconds": 4.0,
+                "note": "1.3B transformer + UMT5 text encoder. ~16GB on disk. Best starter pick for trying local video end-to-end on modest hardware.",
+                "estimatedGenerationSeconds": 60.0,
+                "availableLocally": False,
+                "releaseDate": "2025-02",
+            },
+            {
+                "id": "Wan-AI/Wan2.1-T2V-14B-Diffusers",
+                "familyId": "wan-2-1",
+                "name": "Wan 2.1 T2V 14B",
+                "provider": "Alibaba",
+                "repo": "Wan-AI/Wan2.1-T2V-14B-Diffusers",
+                "link": "https://huggingface.co/Wan-AI/Wan2.1-T2V-14B-Diffusers",
+                "runtime": "diffusers WanPipeline",
+                "styleTags": ["general", "quality", "motion"],
+                "taskSupport": ["txt2video"],
+                # 14B transformer in bf16 (~28GB) + UMT5-XXL text encoder (~11GB)
+                # + VAE/CLIP weights.
+                "sizeGb": 45.0,
+                "recommendedResolution": "832x480",
+                "defaultDurationSeconds": 5.0,
+                "note": "Wan 2.1 quality tier. ~45GB. Same WanPipeline class as the 1.3B and Wan 2.2.",
+                "estimatedGenerationSeconds": 180.0,
+                "availableLocally": False,
+                "releaseDate": "2025-02",
+            },
+        ],
+    },
+    {
+        "id": "wan-2-2",
+        "name": "Wan 2.2",
+        "provider": "Alibaba",
+        "headline": "Strong text-to-video quality with competitive motion consistency.",
+        "summary": "Mid-sized Wan model that runs on 24GB+ VRAM or Apple Silicon with unified memory.",
+        "updatedLabel": "Planned — first wave",
+        "badges": ["Balanced", "Quality", "Apache 2.0"],
+        "defaultVariantId": "Wan-AI/Wan2.2-T2V-A14B-Diffusers",
+        "variants": [
+            {
+                "id": "Wan-AI/Wan2.2-T2V-A14B-Diffusers",
+                "familyId": "wan-2-2",
+                "name": "Wan 2.2 T2V A14B",
+                "provider": "Alibaba",
+                # -Diffusers mirror ships the standard diffusers layout; the
+                # base Wan-AI/Wan2.2-T2V-A14B repo uses the native Wan format.
+                "repo": "Wan-AI/Wan2.2-T2V-A14B-Diffusers",
+                "link": "https://huggingface.co/Wan-AI/Wan2.2-T2V-A14B-Diffusers",
+                "runtime": "diffusers WanPipeline",
+                "styleTags": ["general", "quality", "motion"],
+                "taskSupport": ["txt2video"],
+                "sizeGb": 14.0,
+                "recommendedResolution": "832x480",
+                "defaultDurationSeconds": 5.0,
+                "note": "Balanced quality vs size. Works on 24GB VRAM or 64GB unified memory.",
+                "estimatedGenerationSeconds": 180.0,
+                "availableLocally": False,
+                "releaseDate": "2025-07",
+            }
+        ],
+    },
+    {
+        "id": "hunyuan-video",
+        "name": "HunyuanVideo",
+        "provider": "Tencent",
+        "headline": "High-fidelity text-to-video with longer clips and stronger scene cohesion.",
+        "summary": "Heavy-duty model that needs 40GB+ class hardware. Ships longer clips and nicer compositions.",
+        "updatedLabel": "Planned — stretch target",
+        "badges": ["Quality", "Heavy", "Apache 2.0"],
+        "defaultVariantId": "hunyuanvideo-community/HunyuanVideo",
+        "variants": [
+            {
+                "id": "hunyuanvideo-community/HunyuanVideo",
+                "familyId": "hunyuan-video",
+                "name": "HunyuanVideo",
+                "provider": "Tencent",
+                # Community-maintained diffusers port of tencent/HunyuanVideo.
+                # The base tencent repo doesn't ship model_index.json — the
+                # -community mirror is the one HunyuanVideoPipeline loads.
+                "repo": "hunyuanvideo-community/HunyuanVideo",
+                "link": "https://huggingface.co/hunyuanvideo-community/HunyuanVideo",
+                "runtime": "diffusers HunyuanVideoPipeline",
+                "styleTags": ["general", "quality", "cinematic"],
+                "taskSupport": ["txt2video"],
+                "sizeGb": 25.0,
+                "recommendedResolution": "1280x720",
+                "defaultDurationSeconds": 5.0,
+                "note": "High quality. Needs 40GB+ VRAM or Apple Silicon Max/Ultra class memory.",
+                "estimatedGenerationSeconds": 420.0,
+                "availableLocally": False,
+                "releaseDate": "2024-12",
+            }
+        ],
+    },
+    {
+        "id": "mochi-1",
+        "name": "Mochi 1",
+        "provider": "Genmo",
+        "headline": "Open-weight video model with competitive motion quality.",
+        "summary": "Apache 2.0 licence, solid motion handling, mid-sized footprint.",
+        "updatedLabel": "Planned — first wave",
+        "badges": ["Open", "Balanced", "Apache 2.0"],
+        "defaultVariantId": "genmo/mochi-1-preview",
+        "variants": [
+            {
+                "id": "genmo/mochi-1-preview",
+                "familyId": "mochi-1",
+                "name": "Mochi 1 Preview",
+                "provider": "Genmo",
+                "repo": "genmo/mochi-1-preview",
+                "link": "https://huggingface.co/genmo/mochi-1-preview",
+                "runtime": "diffusers MochiPipeline (planned)",
+                "styleTags": ["general", "motion", "balanced"],
+                "taskSupport": ["txt2video"],
+                "sizeGb": 10.0,
+                "recommendedResolution": "848x480",
+                "defaultDurationSeconds": 5.4,
+                "note": "Apache 2.0, balanced footprint, strong motion quality.",
+                "estimatedGenerationSeconds": 150.0,
+                "availableLocally": False,
+                "releaseDate": "2024-10",
+            }
+        ],
+    },
+    {
+        "id": "cogvideox",
+        "name": "CogVideoX",
+        "provider": "THUDM",
+        "headline": "Tsinghua's open-weight video model — 2B fits 8 GB VRAM, 5B is the quality tier.",
+        "summary": (
+            "CogVideoX ships in a 2B size that runs on 8 GB consumer GPUs and a 5B size that "
+            "delivers higher fidelity on 24 GB+ cards or unified-memory Macs. Both use the same "
+            "CogVideoXPipeline in diffusers."
+        ),
+        "updatedLabel": "Planned — first wave",
+        "badges": ["Small", "Open", "Apache 2.0"],
+        "defaultVariantId": "THUDM/CogVideoX-2b",
+        "variants": [
+            {
+                "id": "THUDM/CogVideoX-2b",
+                "familyId": "cogvideox",
+                "name": "CogVideoX 2B",
+                "provider": "THUDM",
+                "repo": "THUDM/CogVideoX-2b",
+                "link": "https://huggingface.co/THUDM/CogVideoX-2b",
+                "runtime": "diffusers CogVideoXPipeline",
+                "styleTags": ["general", "fast", "small"],
+                "taskSupport": ["txt2video"],
+                # 2B transformer in fp16 (~4 GB) + T5 text encoder (~5 GB) +
+                # VAE. Fits comfortably on a 12 GB card; 8 GB works with
+                # CPU-offload tricks. Smaller than Wan 2.1 1.3B because there's
+                # no UMT5-XXL — just the standard T5.
+                "sizeGb": 9.0,
+                "recommendedResolution": "720x480",
+                "defaultDurationSeconds": 6.0,
+                "note": "Smallest CogVideoX. Apache 2.0 weights, ~9 GB on disk, runs on consumer GPUs.",
+                "estimatedGenerationSeconds": 90.0,
+                "availableLocally": False,
+                "releaseDate": "2024-08",
+            },
+            {
+                "id": "THUDM/CogVideoX-5b",
+                "familyId": "cogvideox",
+                "name": "CogVideoX 5B",
+                "provider": "THUDM",
+                "repo": "THUDM/CogVideoX-5b",
+                "link": "https://huggingface.co/THUDM/CogVideoX-5b",
+                "runtime": "diffusers CogVideoXPipeline",
+                "styleTags": ["general", "quality", "balanced"],
+                "taskSupport": ["txt2video"],
+                # 5B transformer (~10 GB) + T5 (~5 GB) + VAE. Lands in the
+                # same envelope as Wan 2.2 — needs 24 GB VRAM or 32 GB+
+                # unified memory.
+                "sizeGb": 18.0,
+                "recommendedResolution": "720x480",
+                "defaultDurationSeconds": 6.0,
+                "note": "Quality tier. ~18 GB on disk. Same CogVideoXPipeline class as the 2B.",
+                "estimatedGenerationSeconds": 200.0,
+                "availableLocally": False,
+                "releaseDate": "2024-08",
+            },
+        ],
+    },
+]
diff --git a/backend_service/helpers/cache.py b/backend_service/helpers/cache.py
index 7a3e75e..cc3069e 100644
--- a/backend_service/helpers/cache.py
+++ b/backend_service/helpers/cache.py
@@ -47,7 +47,7 @@ def compute_cache_preview(
     strategy: str = "native",
     build_system_snapshot=None,
 ) -> dict[str, Any]:
-    from compression import registry as _cache_registry
+    from cache_compression import registry as _cache_registry
 
     num_layers = max(1, num_layers)
     num_heads = max(1, num_heads)
diff --git a/backend_service/helpers/discovery.py b/backend_service/helpers/discovery.py
index 55a6a63..6847d96 100644
--- a/backend_service/helpers/discovery.py
+++ b/backend_service/helpers/discovery.py
@@ -329,6 +329,27 @@ def _detect_model_quantization(path: Path, fmt: str, *, name_hint: str = "") ->
 )
 
 
+# Video diffusion pipelines. Keep keywords specific enough that they don't
+# collide with chat LLMs or image diffusion checkpoints — e.g. "hunyuanvideo"
+# not "hunyuan" (which would catch the Hunyuan image model), "wan2" not "wan"
+# (too generic), "mochi-1" not "mochi". New video families added to
+# ``backend_service/catalog/video_models.py`` should also get a keyword here.
+_VIDEO_MODEL_KEYWORDS = (
+    "hunyuanvideo",
+    "wan-ai/",
+    "wan2.",
+    "wan2-",
+    "-t2v-",
+    "-i2v-",
+    "-v2v-",
+    "mochi-1",
+    "cogvideo",
+    "ltx-video",
+    "zeroscope",
+    "animatediff",
+)
+
+
 def _looks_like_draft_model(name: str) -> bool:
     """Return True if this looks like a speculative decoding draft model.
 
@@ -339,6 +360,24 @@ def _looks_like_draft_model(name: str) -> bool:
     return any(kw in lower for kw in _DRAFT_MODEL_KEYWORDS)
 
 
+def _looks_like_video_model(name: str) -> bool:
+    """Return True if this looks like a video diffusion pipeline.
+
+    Video models (LTX-Video, Wan, HunyuanVideo, Mochi, CogVideo, …) are
+    Diffusers pipelines with much larger VRAM footprints than LLMs and
+    their own dedicated Studio/Discover UI under the Video section. They
+    should be excluded from the chat-oriented My Models list.
+
+    Detection is keyword-only here because video Diffusers pipelines share
+    the ``model_index.json`` marker with image pipelines — we can't use that
+    to discriminate. When a partial HF cache download hasn't yet produced
+    ``model_index.json``, the name-based match is what keeps them out of
+    the LLM list.
+    """
+    lower = name.lower()
+    return any(kw in lower for kw in _VIDEO_MODEL_KEYWORDS)
+
+
 def _looks_like_image_model(path: Path, name: str) -> bool:
     """Return True if this looks like a diffusion / image generation model."""
     lower_name = name.lower()
@@ -623,7 +662,9 @@ def _discover_local_models(model_directories: list[dict[str, Any]], limit: int =
                 broken, broken_reason = _detect_broken_library_item(child, file_format, source_kind)
                 quantization = _detect_model_quantization(child, file_format, name_hint=name)
                 backend = "llama.cpp" if file_format == "GGUF" else "mlx"
-                if _looks_like_image_model(child, name):
+                if _looks_like_video_model(name):
+                    model_type = "video"
+                elif _looks_like_image_model(child, name):
                     model_type = "image"
                 elif _looks_like_draft_model(name):
                     model_type = "draft"
diff --git a/backend_service/helpers/formatting.py b/backend_service/helpers/formatting.py
index 71a11da..d37b60c 100644
--- a/backend_service/helpers/formatting.py
+++ b/backend_service/helpers/formatting.py
@@ -51,7 +51,7 @@ def _parse_context_label(label: str | None) -> int | None:
 
 
 def _benchmark_label(model_name: str, *, cache_strategy: str, bits: int, fp16_layers: int, context_tokens: int) -> str:
-    from compression import registry as _strategy_registry
+    from cache_compression import registry as _strategy_registry
     strat = _strategy_registry.get(cache_strategy) or _strategy_registry.default()
     cache_label = strat.label(bits, fp16_layers)
     return f"{model_name} / {cache_label} / {_context_label(context_tokens)} ctx"
diff --git a/backend_service/helpers/gpu.py b/backend_service/helpers/gpu.py
index 90f933b..2c4e84a 100644
--- a/backend_service/helpers/gpu.py
+++ b/backend_service/helpers/gpu.py
@@ -8,11 +8,23 @@
 from __future__ import annotations
 
 import platform
+import shutil
 import subprocess
 import json
+import threading
 from typing import Any
 
 
+# Windows: prevent every nvidia-smi / sysctl invocation from flashing a
+# console window. Without this, FastAPI worker threads on Windows pop a
+# brief cmd.exe window per probe — and on slower disks the spawn alone
+# can add 1-2s of latency to ``/api/video/runtime``, blowing past the
+# frontend's 15s fetch timeout and surfacing as "Failed to fetch".
+_SUBPROCESS_KWARGS: dict[str, Any] = {}
+if hasattr(subprocess, "CREATE_NO_WINDOW"):
+    _SUBPROCESS_KWARGS["creationflags"] = subprocess.CREATE_NO_WINDOW
+
+
 class GPUMonitor:
     """Cross-platform GPU/accelerator monitor."""
 
@@ -41,6 +53,7 @@ def _snapshot_macos(self) -> dict[str, Any]:
             chip = subprocess.check_output(
                 ["sysctl", "-n", "machdep.cpu.brand_string"],
                 text=True, timeout=5,
+                **_SUBPROCESS_KWARGS,
             ).strip()
             if chip:
                 gpu_name = chip
@@ -53,6 +66,7 @@ def _snapshot_macos(self) -> dict[str, Any]:
             total_bytes = int(subprocess.check_output(
                 ["sysctl", "-n", "hw.memsize"],
                 text=True, timeout=5,
+                **_SUBPROCESS_KWARGS,
             ).strip())
             vram_total_gb = round(total_bytes / (1024 ** 3), 2)
         except Exception:
@@ -72,6 +86,7 @@ def _snapshot_macos(self) -> dict[str, Any]:
             out = subprocess.check_output(
                 ["ioreg", "-r", "-d", "1", "-c", "AppleARMIODevice"],
                 text=True, timeout=5,
+                **_SUBPROCESS_KWARGS,
             )
             # Best-effort — ioreg doesn't reliably expose GPU util on all chips
         except Exception:
@@ -100,6 +115,7 @@ def _snapshot_nvidia(self) -> dict[str, Any]:
                 ],
                 text=True,
                 timeout=10,
+                **_SUBPROCESS_KWARGS,
             )
             parts = [p.strip() for p in out.strip().split(",")]
             if len(parts) >= 6:
@@ -151,3 +167,119 @@ def _fallback_psutil(self) -> dict[str, Any]:
 def get_gpu_metrics() -> dict[str, Any]:
     """Return a snapshot of current GPU / accelerator metrics."""
     return _monitor.snapshot()
+
+
+# VRAM total never changes for the life of a process — caching it lets the
+# video runtime probe stay snappy even when nvidia-smi takes a second or two
+# to spawn on Windows. Cleared by ``reset_vram_total_cache()`` for tests.
+_VRAM_TOTAL_LOCK = threading.Lock()
+_VRAM_TOTAL_CACHE: dict[str, float | None] = {}
+
+
+def get_device_vram_total_gb() -> float | None:
+    """Return total device memory in GB, cached for the process lifetime.
+
+    Hot path for ``backend_service.video_runtime._detect_device_memory_gb``.
+    The full ``snapshot()`` call shells out to ``nvidia-smi``/``sysctl`` every
+    time, which is fine for the metrics endpoint (live readings) but wasteful
+    for the video runtime probe (which only needs total VRAM, and a value
+    that is fixed per machine). On Windows the subprocess startup cost was
+    blowing past the frontend's 15s fetch timeout under load.
+    """
+    with _VRAM_TOTAL_LOCK:
+        if "value" in _VRAM_TOTAL_CACHE:
+            return _VRAM_TOTAL_CACHE["value"]
+
+    try:
+        snapshot = _monitor.snapshot()
+    except Exception:
+        snapshot = {}
+
+    total = snapshot.get("vram_total_gb")
+    value: float | None = float(total) if isinstance(total, (int, float)) and total > 0 else None
+
+    with _VRAM_TOTAL_LOCK:
+        _VRAM_TOTAL_CACHE["value"] = value
+    return value
+
+
+def reset_vram_total_cache() -> None:
+    """Clear the cached VRAM total. Used by tests."""
+    with _VRAM_TOTAL_LOCK:
+        _VRAM_TOTAL_CACHE.clear()
+
+
+def nvidia_gpu_present() -> bool:
+    """Cheap, side-effect-free check for an NVIDIA GPU on Linux/Windows.
+
+    We only look for ``nvidia-smi`` on ``PATH`` — invoking it is deliberately
+    avoided because some locked-down laptops and WSL installs without the
+    driver shim hang on the first call. Presence on ``PATH`` is a
+    reliable-enough signal for the "you probably wanted CUDA" diagnostic the
+    image/video runtimes surface when torch falls back to CPU.
+    """
+    return shutil.which("nvidia-smi") is not None
+
+
+_CUDA_WHEEL_HINT = (
+    "Click \"Install CUDA torch\" in this banner, or run: "
+    "pip install --upgrade --force-reinstall torch "
+    "--index-url https://download.pytorch.org/whl/cu124"
+)
+
+
+def gpu_status_snapshot() -> dict[str, Any]:
+    """Unified GPU status for the frontend warning banner.
+
+    Returns a dict with the host platform, whether an NVIDIA driver is
+    visible, whether torch can reach CUDA / MPS, and a recommendation string
+    when torch falls back to CPU on a machine with an NVIDIA GPU. All fields
+    are optional so this can be called before torch has been imported without
+    failing.
+    """
+    system = platform.system()
+    nvidia_present = nvidia_gpu_present()
+
+    torch_imported = False
+    cuda_available = False
+    mps_available = False
+    try:
+        import torch  # type: ignore
+    except Exception:
+        torch_module = None
+    else:
+        torch_module = torch
+        torch_imported = True
+
+    if torch_module is not None:
+        try:
+            cuda_available = bool(getattr(torch_module.cuda, "is_available", lambda: False)())
+        except Exception:
+            cuda_available = False
+        try:
+            mps_module = getattr(torch_module.backends, "mps", None)
+            if mps_module is not None:
+                mps_available = bool(getattr(mps_module, "is_available", lambda: False)())
+        except Exception:
+            mps_available = False
+
+    if system in ("Windows", "Linux") and nvidia_present and torch_imported and not cuda_available:
+        recommendation = (
+            "torch was imported but CUDA is unavailable — generation will run on CPU "
+            "(expect minutes per step). Reinstall the CUDA wheel: "
+            + _CUDA_WHEEL_HINT
+        )
+        warn = True
+    else:
+        recommendation = None
+        warn = False
+
+    return {
+        "platform": system,
+        "nvidiaGpuDetected": nvidia_present,
+        "torchImported": torch_imported,
+        "torchCudaAvailable": cuda_available,
+        "torchMpsAvailable": mps_available,
+        "cpuFallbackWarning": warn,
+        "recommendation": recommendation,
+    }
diff --git a/backend_service/helpers/huggingface.py b/backend_service/helpers/huggingface.py
index d22436a..371424f 100644
--- a/backend_service/helpers/huggingface.py
+++ b/backend_service/helpers/huggingface.py
@@ -11,7 +11,7 @@
 from pathlib import Path
 from typing import Any
 
-from backend_service.catalog import MODEL_FAMILIES, IMAGE_MODEL_FAMILIES
+from backend_service.catalog import MODEL_FAMILIES, IMAGE_MODEL_FAMILIES, VIDEO_MODEL_FAMILIES
 from backend_service.helpers.formatting import _bytes_to_gb
 from backend_service.helpers.discovery import _path_size_bytes
 
@@ -161,6 +161,7 @@ def _search_huggingface_hub(query: str, library: list[dict[str, Any]], limit: in
         downloads = model.get("downloads") or 0
         likes = model.get("likes") or 0
         last_modified = str(model.get("lastModified") or "").strip() or None
+        created_at = str(model.get("createdAt") or "").strip() or None
 
         results.append({
             "id": model_id,
@@ -176,6 +177,8 @@ def _search_huggingface_hub(query: str, library: list[dict[str, Any]], limit: in
             "likesLabel": f"{likes:,} likes",
             "lastModified": last_modified,
             "updatedLabel": _format_hf_updated_label(last_modified),
+            "createdAt": created_at,
+            "releaseLabel": _format_release_label(created_at),
             "availableLocally": available_locally,
             "launchMode": launch_mode,
             "backend": backend,
@@ -416,6 +419,32 @@ def _format_hf_updated_label(value: str | None) -> str | None:
     return f"Updated {month_label} {parsed.day}, {parsed.year}"
 
 
+def _format_release_label(value: str | None) -> str | None:
+    """Format a release date / HF ``createdAt`` into a short human label.
+
+    Accepts either a full ISO datetime (``2024-08-01T12:34:56Z`` — HF API)
+    or a year-month shorthand (``2024-08`` — curated catalog entries) and
+    returns ``"Released Aug 2024"``. Falls back to None when the input
+    can't be parsed.
+    """
+    if not value:
+        return None
+    parsed = _parse_iso_datetime(value)
+    if parsed is None:
+        # Try ``YYYY-MM`` or ``YYYY-MM-DD`` shorthand used in curated catalog
+        # entries — ``_parse_iso_datetime`` only handles the full datetime form.
+        text = str(value).strip()
+        for fmt in ("%Y-%m-%d", "%Y-%m", "%Y"):
+            try:
+                parsed = datetime.strptime(text, fmt).replace(tzinfo=timezone.utc)
+                break
+            except ValueError:
+                continue
+        if parsed is None:
+            return None
+    return f"Released {parsed.strftime('%b')} {parsed.year}"
+
+
 def _hf_number_label(value: int, noun: str) -> str:
     return f"{value:,} {noun}"
 
@@ -508,6 +537,17 @@ def _known_repo_size_gb(repo_id: str) -> float | None:
             if size_gb > 0:
                 return size_gb
 
+    for family in VIDEO_MODEL_FAMILIES:
+        for variant in family["variants"]:
+            if str(variant.get("repo") or "") != repo_id:
+                continue
+            try:
+                size_gb = float(variant.get("sizeGb") or 0)
+            except (TypeError, ValueError):
+                size_gb = 0.0
+            if size_gb > 0:
+                return size_gb
+
     return None
 
 
diff --git a/backend_service/helpers/images.py b/backend_service/helpers/images.py
index 73b8d9d..169be5c 100644
--- a/backend_service/helpers/images.py
+++ b/backend_service/helpers/images.py
@@ -18,11 +18,12 @@
 from backend_service.helpers.huggingface import (
     _classify_hub_file,
     _format_hf_updated_label,
+    _format_release_label,
     _hf_number_label,
     _hf_repo_snapshot_dir,
     _parse_iso_datetime,
 )
-from backend_service.helpers.discovery import _candidate_model_dirs
+from backend_service.helpers.discovery import _candidate_model_dirs, _path_size_bytes
 from backend_service.image_runtime import validate_local_diffusers_snapshot
 
 
@@ -31,6 +32,33 @@
 _LATEST_IMAGE_MODELS_CACHE: tuple[float, list[dict[str, Any]]] | None = None
 _LATEST_IMAGE_MODELS_TTL_SECONDS = 3 * 60 * 60
 
+# Cache keyed by (path, mtime_ns) — we recompute only when the snapshot dir
+# actually changes. A fresh os.stat() is cheap enough to do per payload call.
+_SNAPSHOT_SIZE_CACHE: dict[tuple[str, int], int] = {}
+
+
+def _snapshot_on_disk_bytes(snapshot_dir: Path | None) -> int | None:
+    """Walk the HF snapshot dir and return its true on-disk byte size.
+
+    Delegates to ``_path_size_bytes`` which dedupes by inode, so HF's
+    ``snapshots/<commit>/ -> blobs/<hash>`` symlink farm counts each blob
+    exactly once. Returns ``None`` when the path is missing or empty so
+    callers can distinguish "not on disk" from "zero bytes".
+    """
+    if snapshot_dir is None:
+        return None
+    try:
+        stat_result = snapshot_dir.stat()
+    except OSError:
+        return None
+    cache_key = (str(snapshot_dir), stat_result.st_mtime_ns)
+    cached = _SNAPSHOT_SIZE_CACHE.get(cache_key)
+    if cached is not None:
+        return cached or None
+    total = _path_size_bytes(snapshot_dir)
+    _SNAPSHOT_SIZE_CACHE[cache_key] = total
+    return total or None
+
 
 def _stable_image_hash(value: str) -> int:
     acc = 0
@@ -94,17 +122,30 @@ def _image_model_payloads(library: list[dict[str, Any]]) -> list[dict[str, Any]]
 
     families: list[dict[str, Any]] = []
     for family in IMAGE_MODEL_FAMILIES:
-        variants = [
-            {
-                **variant,
-                **repo_metadata.get(str(variant.get("repo") or ""), {}),
-                "source": "curated",
-                "familyName": family.get("name"),
-                "availableLocally": _image_variant_available_locally(variant, library),
-                "hasLocalData": _hf_repo_snapshot_dir(str(variant.get("repo") or "")) is not None,
-            }
-            for variant in family["variants"]
-        ]
+        variants = []
+        for variant in family["variants"]:
+            repo_id = str(variant.get("repo") or "")
+            snapshot_dir = _hf_repo_snapshot_dir(repo_id) if repo_id else None
+            live_metadata = repo_metadata.get(repo_id, {})
+            curated_release_date = str(variant.get("releaseDate") or "").strip() or None
+            curated_release_label = _format_release_label(curated_release_date)
+            release_label = curated_release_label or live_metadata.get("releaseLabel")
+            on_disk_bytes = _snapshot_on_disk_bytes(snapshot_dir)
+            variants.append(
+                {
+                    **variant,
+                    **live_metadata,
+                    "source": "curated",
+                    "familyName": family.get("name"),
+                    "availableLocally": _image_variant_available_locally(variant, library),
+                    "hasLocalData": snapshot_dir is not None,
+                    "localPath": str(snapshot_dir) if snapshot_dir else None,
+                    "releaseDate": curated_release_date,
+                    "releaseLabel": release_label,
+                    "onDiskBytes": on_disk_bytes,
+                    "onDiskGb": _bytes_to_gb(on_disk_bytes) if on_disk_bytes else None,
+                }
+            )
         families.append(
             {
                 **family,
@@ -202,6 +243,7 @@ def _image_repo_live_metadata(repo_id: str) -> dict[str, Any]:
         downloads = int(data.get("downloads") or 0)
         likes = int(data.get("likes") or 0)
         last_modified = str(data.get("lastModified") or "").strip() or None
+        created_at = str(data.get("createdAt") or "").strip() or None
         payload = {
             "downloads": downloads,
             "likes": likes,
@@ -209,6 +251,8 @@ def _image_repo_live_metadata(repo_id: str) -> dict[str, Any]:
             "likesLabel": _hf_number_label(likes, "likes") if likes > 0 else None,
             "lastModified": last_modified,
             "updatedLabel": _format_hf_updated_label(last_modified),
+            "createdAt": created_at,
+            "releaseLabel": _format_release_label(created_at),
             "license": license_value,
             "gated": bool(data.get("gated")),
             "pipelineTag": str(data.get("pipeline_tag") or "").strip() or None,
@@ -318,6 +362,9 @@ def _tracked_latest_seed_payloads(library: list[dict[str, Any]]) -> list[dict[st
         repo_id = str(seed.get("repo") or "")
         if not repo_id:
             continue
+        release_date = str(seed.get("releaseDate") or "").strip() or None
+        snapshot_dir = _hf_repo_snapshot_dir(repo_id)
+        on_disk_bytes = _snapshot_on_disk_bytes(snapshot_dir)
         payloads.append(
             {
                 "id": repo_id,
@@ -337,7 +384,10 @@ def _tracked_latest_seed_payloads(library: list[dict[str, Any]]) -> list[dict[st
                     or "Tracked latest image repo surfaced by ChaosEngineAI when the live latest lane is sparse."
                 ),
                 "availableLocally": _image_repo_runtime_ready(repo_id),
-                "hasLocalData": _hf_repo_snapshot_dir(repo_id) is not None,
+                "hasLocalData": snapshot_dir is not None,
+                "localPath": str(snapshot_dir) if snapshot_dir else None,
+                "onDiskBytes": on_disk_bytes,
+                "onDiskGb": _bytes_to_gb(on_disk_bytes) if on_disk_bytes else None,
                 "estimatedGenerationSeconds": None,
                 "downloads": None,
                 "likes": None,
@@ -345,6 +395,9 @@ def _tracked_latest_seed_payloads(library: list[dict[str, Any]]) -> list[dict[st
                 "likesLabel": None,
                 "lastModified": None,
                 "updatedLabel": str(seed.get("updatedLabel") or "Tracked latest"),
+                "createdAt": None,
+                "releaseDate": release_date,
+                "releaseLabel": _format_release_label(release_date),
                 "license": seed.get("license"),
                 "gated": seed.get("gated"),
                 "pipelineTag": seed.get("pipelineTag"),
@@ -458,6 +511,8 @@ def _latest_image_model_payloads(library: list[dict[str, Any]], limit: int = 10)
         tags = [str(tag) for tag in (model.get("tags") or [])]
         pipeline_tag = str(model.get("pipeline_tag") or "").strip() or None
         metadata = _image_repo_live_metadata(model_id)
+        snapshot_dir = _hf_repo_snapshot_dir(model_id)
+        on_disk_bytes = _snapshot_on_disk_bytes(snapshot_dir)
         candidates.append({
             "id": model_id,
             "familyId": "latest",
@@ -476,7 +531,10 @@ def _latest_image_model_payloads(library: list[dict[str, Any]], limit: int = 10)
                 "Review details on Hugging Face before treating it as a fully curated Studio default."
             ),
             "availableLocally": _image_repo_runtime_ready(model_id),
-            "hasLocalData": _hf_repo_snapshot_dir(model_id) is not None,
+            "hasLocalData": snapshot_dir is not None,
+            "localPath": str(snapshot_dir) if snapshot_dir else None,
+            "onDiskBytes": on_disk_bytes,
+            "onDiskGb": _bytes_to_gb(on_disk_bytes) if on_disk_bytes else None,
             "estimatedGenerationSeconds": None,
             "downloads": metadata.get("downloads"),
             "likes": metadata.get("likes"),
@@ -484,6 +542,8 @@ def _latest_image_model_payloads(library: list[dict[str, Any]], limit: int = 10)
             "likesLabel": metadata.get("likesLabel"),
             "lastModified": metadata.get("lastModified"),
             "updatedLabel": metadata.get("updatedLabel"),
+            "createdAt": metadata.get("createdAt"),
+            "releaseLabel": metadata.get("releaseLabel"),
             "license": metadata.get("license"),
             "gated": bool(metadata.get("gated")) if metadata.get("gated") is not None else None,
             "pipelineTag": metadata.get("pipelineTag") or pipeline_tag,
@@ -608,6 +668,50 @@ def _image_download_repo_ids() -> set[str]:
     return repos
 
 
+# Diffusers image pipelines (FLUX, SD3.5, SDXL, Sana, HiDream, Qwen-Image, ...)
+# always load from the per-component folder layout at the snapshot root. Many
+# repos also ship a legacy single-file checkpoint (e.g. ``flux1-schnell.safetensors``
+# in ``black-forest-labs/FLUX.1-schnell``) for ComfyUI/kijai users — ~24 GB of
+# duplicate weights the diffusers pipeline never touches. Without an allowlist
+# ``snapshot_download`` pulls both copies, so a 23 GB model lands on disk as
+# 57+ GB. Mirrors ``_VIDEO_DIFFUSERS_ALLOW_PATTERNS`` in ``helpers/video.py``.
+_IMAGE_DIFFUSERS_ALLOW_PATTERNS: list[str] = [
+    "model_index.json",
+    "scheduler/**",
+    "text_encoder/**",
+    "text_encoder_2/**",
+    "text_encoder_3/**",
+    "tokenizer/**",
+    "tokenizer_2/**",
+    "tokenizer_3/**",
+    "transformer/**",
+    "transformer_2/**",
+    "unet/**",
+    "vae/**",
+    "feature_extractor/**",
+    "image_encoder/**",
+    "safety_checker/**",
+    "*.md",
+    "LICENSE*",
+]
+
+
+def _image_repo_allow_patterns(repo_id: str) -> list[str] | None:
+    """Patterns to pass to ``snapshot_download`` for an image repo.
+
+    Returns ``None`` for repos that aren't known curated or tracked image
+    models so arbitrary Discover hub results still download in full. Returning
+    ``None`` (not an empty list) signals the caller to omit ``allow_patterns``
+    entirely — an empty list would match nothing and download zero files.
+    """
+    if not repo_id:
+        return None
+    known = _image_download_repo_ids()
+    if repo_id not in known:
+        return None
+    return list(_IMAGE_DIFFUSERS_ALLOW_PATTERNS)
+
+
 # ---- Image output CRUD ----
 
 def _image_output_directory(image_outputs_dir: Path, created_at: str | None = None) -> Path:
diff --git a/backend_service/helpers/settings.py b/backend_service/helpers/settings.py
index 23d6c06..b71b087 100644
--- a/backend_service/helpers/settings.py
+++ b/backend_service/helpers/settings.py
@@ -181,6 +181,14 @@ def images_dir(self) -> Path:
     def image_outputs_dir(self) -> Path:
         return self.images_dir / "outputs"
 
+    @property
+    def videos_dir(self) -> Path:
+        return self.data_dir / "videos"
+
+    @property
+    def video_outputs_dir(self) -> Path:
+        return self.videos_dir / "outputs"
+
 
 def _normalize_slug(value: str, fallback: str) -> str:
     cleaned = "".join(character.lower() if character.isalnum() else "-" for character in value.strip())
@@ -193,11 +201,20 @@ def _default_settings(default_port: int, data_dir: Path) -> dict[str, Any]:
         "modelDirectories": [dict(entry) for entry in DEFAULT_MODEL_DIRECTORIES],
         "preferredServerPort": default_port,
         "allowRemoteConnections": False,
+        # Default on — the API token is auto-generated and passed to the
+        # frontend via /api/auth/session, so the built-in UI works out of
+        # the box. Users who connect external clients (OpenWebUI, scripts,
+        # another desktop app) can flip this off from the Server tab.
+        "requireApiAuth": True,
         "autoStartServer": False,
         "launchPreferences": dict(DEFAULT_LAUNCH_PREFERENCES),
         "remoteProviders": [],
         "huggingFaceToken": "",
         "dataDirectory": str(data_dir),
+        # Empty string means "use the default under dataDirectory". Anything
+        # else is treated as an absolute (or ~-relative) override path.
+        "imageOutputsDirectory": "",
+        "videoOutputsDirectory": "",
     }
 
 
@@ -301,6 +318,9 @@ def _load_settings(path: Path, default_port: int, data_dir: Path) -> dict[str, A
         settings["preferredServerPort"] = default_port
 
     settings["allowRemoteConnections"] = bool(payload.get("allowRemoteConnections", False))
+    # Default True: if the key is missing from an older settings.json we
+    # preserve the secure default rather than silently opening the API.
+    settings["requireApiAuth"] = bool(payload.get("requireApiAuth", True))
     settings["autoStartServer"] = bool(payload.get("autoStartServer", False))
 
     settings["launchPreferences"] = _normalize_launch_preferences(payload.get("launchPreferences"))
@@ -311,6 +331,12 @@ def _load_settings(path: Path, default_port: int, data_dir: Path) -> dict[str, A
         if hf_token:
             os.environ["HF_TOKEN"] = hf_token
             os.environ["HUGGING_FACE_HUB_TOKEN"] = hf_token
+
+    for key in ("imageOutputsDirectory", "videoOutputsDirectory"):
+        raw = payload.get(key)
+        if isinstance(raw, str):
+            settings[key] = raw.strip()
+
     return settings
 
 
diff --git a/backend_service/helpers/system.py b/backend_service/helpers/system.py
index 4e87732..3749e56 100644
--- a/backend_service/helpers/system.py
+++ b/backend_service/helpers/system.py
@@ -413,7 +413,7 @@ def _build_system_snapshot(app_version: str, app_started_at: float) -> dict[str,
     )
 
     def _get_cache_strategies():
-        from compression import registry
+        from cache_compression import registry
         return registry.available()
 
     def _get_dflash_info():
diff --git a/backend_service/helpers/video.py b/backend_service/helpers/video.py
new file mode 100644
index 0000000..28b24f4
--- /dev/null
+++ b/backend_service/helpers/video.py
@@ -0,0 +1,307 @@
+"""Video model helpers: variant lookup, install detection, payload shaping,
+output CRUD.
+
+Mirrors ``helpers/images.py`` so the routes for ``/api/video/*`` can drop in
+alongside the image routes without a new mental model.
+"""
+
+from __future__ import annotations
+
+import base64
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+from backend_service.catalog import VIDEO_MODEL_FAMILIES
+from backend_service.helpers.formatting import _bytes_to_gb
+from backend_service.helpers.huggingface import _format_release_label, _hf_repo_snapshot_dir
+from backend_service.helpers.images import _snapshot_on_disk_bytes
+from backend_service.image_runtime import validate_local_diffusers_snapshot
+
+
+def _video_model_payloads(library: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    """Return the catalog families enriched with per-variant availability.
+
+    We deliberately don't hit Hugging Face live metadata from here — the image
+    version does for download counts, etc. We can bolt that on later if the
+    discover UX needs it. For now each variant just knows whether its local
+    snapshot is ready to load.
+    """
+    families: list[dict[str, Any]] = []
+    for family in VIDEO_MODEL_FAMILIES:
+        variants: list[dict[str, Any]] = []
+        for variant in family["variants"]:
+            enriched = dict(variant)
+            repo = str(enriched.get("repo") or "")
+            enriched["availableLocally"] = _video_repo_runtime_ready(repo) if repo else False
+            enriched["hasLocalData"] = enriched["availableLocally"] or _video_repo_has_any_local_data(repo)
+            enriched["familyName"] = family["name"]
+            release_date = str(enriched.get("releaseDate") or "").strip() or None
+            enriched["releaseDate"] = release_date
+            enriched["releaseLabel"] = _format_release_label(release_date)
+            # Absolute path to the HF snapshot, used by the Reveal File button.
+            # Only populated when there is actually something on disk so the
+            # UI can reliably hide the button otherwise.
+            snapshot_dir = _hf_repo_snapshot_dir(repo) if (enriched["hasLocalData"] and repo) else None
+            enriched["localPath"] = str(snapshot_dir) if snapshot_dir else None
+            on_disk_bytes = _snapshot_on_disk_bytes(snapshot_dir)
+            enriched["onDiskBytes"] = on_disk_bytes
+            enriched["onDiskGb"] = _bytes_to_gb(on_disk_bytes) if on_disk_bytes else None
+            variants.append(enriched)
+        payload = dict(family)
+        payload["variants"] = variants
+        families.append(payload)
+    return families
+
+
+def _find_video_variant(model_id: str) -> dict[str, Any] | None:
+    for family in VIDEO_MODEL_FAMILIES:
+        for variant in family["variants"]:
+            if variant["id"] == model_id:
+                return variant
+    return None
+
+
+def _find_video_variant_by_repo(repo: str) -> dict[str, Any] | None:
+    for family in VIDEO_MODEL_FAMILIES:
+        for variant in family["variants"]:
+            if variant["repo"] == repo:
+                return variant
+    return None
+
+
+def _is_video_repo(repo_id: str) -> bool:
+    return any(
+        str(variant.get("repo") or "") == repo_id
+        for family in VIDEO_MODEL_FAMILIES
+        for variant in family["variants"]
+    )
+
+
+def _video_repo_runtime_ready(repo_id: str) -> bool:
+    """True if the local snapshot is complete enough to load via diffusers."""
+    snapshot_dir = _hf_repo_snapshot_dir(repo_id)
+    if snapshot_dir is None:
+        return False
+    return validate_local_diffusers_snapshot(snapshot_dir, repo_id) is None
+
+
+def _video_repo_has_any_local_data(repo_id: str) -> bool:
+    """True if we have a partial or complete snapshot on disk.
+
+    Distinct from ``_video_repo_runtime_ready`` — this is the softer signal used
+    to tell the UI "something downloaded for this repo" even if it's incomplete.
+    """
+    snapshot_dir = _hf_repo_snapshot_dir(repo_id)
+    if snapshot_dir is None:
+        return False
+    root = Path(snapshot_dir)
+    if not root.exists():
+        return False
+    try:
+        return any(
+            candidate.is_file() or candidate.is_symlink()
+            for candidate in root.iterdir()
+            if not candidate.name.startswith(".")
+        )
+    except OSError:
+        return False
+
+
+def _video_variant_available_locally(variant: dict[str, Any]) -> bool:
+    repo = str(variant.get("repo") or "")
+    if not repo:
+        return False
+    return _video_repo_runtime_ready(repo)
+
+
+def _video_download_repo_ids() -> set[str]:
+    return {
+        str(variant.get("repo") or "")
+        for family in VIDEO_MODEL_FAMILIES
+        for variant in family["variants"]
+        if str(variant.get("repo") or "")
+    }
+
+
+# Diffusers pipelines only need the standard per-component folders
+# (scheduler/, text_encoder/, tokenizer/, transformer/ or unet/, vae/)
+# plus ``model_index.json`` at the root. Video repos frequently ship
+# historical checkpoints (``ltx-video-0.9.safetensors`` and friends) as
+# siblings — without an allowlist ``snapshot_download`` pulls every one
+# of them, which can inflate a 2 GB diffusers pipeline into a 200 GB
+# download. Keep this list conservative so future component folders still
+# come through, but block the legacy standalone safetensors.
+_VIDEO_DIFFUSERS_ALLOW_PATTERNS: list[str] = [
+    "model_index.json",
+    "scheduler/**",
+    "text_encoder/**",
+    "text_encoder_2/**",
+    "text_encoder_3/**",
+    "tokenizer/**",
+    "tokenizer_2/**",
+    "tokenizer_3/**",
+    "transformer/**",
+    "transformer_2/**",
+    "unet/**",
+    "vae/**",
+    "feature_extractor/**",
+    "image_encoder/**",
+    "safety_checker/**",
+    "*.md",
+    "LICENSE*",
+]
+
+
+def _video_repo_allow_patterns(repo_id: str) -> list[str] | None:
+    """Patterns to pass to ``snapshot_download`` for a video repo.
+
+    Returns ``None`` for non-video repos so the caller can pass the value
+    through unconditionally without special-casing. For video repos the
+    allowlist keeps the download scoped to the diffusers pipeline layout
+    — see the comment on ``_VIDEO_DIFFUSERS_ALLOW_PATTERNS`` for why this
+    matters.
+    """
+    if not _is_video_repo(repo_id):
+        return None
+    return list(_VIDEO_DIFFUSERS_ALLOW_PATTERNS)
+
+
+def _video_download_validation_error(repo_id: str) -> str | None:
+    if not _is_video_repo(repo_id):
+        return None
+    snapshot_dir = _hf_repo_snapshot_dir(repo_id)
+    if snapshot_dir is None:
+        return (
+            f"Download did not produce a local snapshot for {repo_id}. "
+            "Retry the download and make sure the backend can access Hugging Face."
+        )
+    return validate_local_diffusers_snapshot(snapshot_dir, repo_id)
+
+
+# ---- Video output CRUD ----
+#
+# Video artifacts differ from image artifacts in one important way: an mp4 is
+# the real deliverable and there's no cheap "preview" we can embed inline. The
+# frontend loads the file directly via a dedicated ``/file`` endpoint rather
+# than getting a base64 data URL in the list payload.
+
+
+def _video_output_directory(video_outputs_dir: Path, created_at: str | None = None) -> Path:
+    day_label = (created_at or datetime.utcnow().isoformat())[:10]
+    output_dir = video_outputs_dir / day_label
+    output_dir.mkdir(parents=True, exist_ok=True)
+    return output_dir
+
+
+def _hydrate_video_artifact(payload: dict[str, Any]) -> dict[str, Any]:
+    prompt = str(payload.get("prompt") or "")
+    model_name = str(payload.get("modelName") or payload.get("modelId") or "Video model")
+    return {
+        "artifactId": str(payload.get("artifactId") or ""),
+        "modelId": str(payload.get("modelId") or ""),
+        "modelName": model_name,
+        "prompt": prompt,
+        "negativePrompt": str(payload.get("negativePrompt") or ""),
+        "width": int(payload.get("width") or 768),
+        "height": int(payload.get("height") or 512),
+        "numFrames": int(payload.get("numFrames") or 0),
+        "fps": int(payload.get("fps") or 24),
+        "steps": int(payload.get("steps") or 0),
+        "guidance": float(payload.get("guidance") or 0.0),
+        "seed": int(payload.get("seed") or 0),
+        "createdAt": str(
+            payload.get("createdAt") or datetime.utcnow().replace(microsecond=0).isoformat() + "Z"
+        ),
+        "durationSeconds": float(payload.get("durationSeconds") or 0.0),
+        "clipDurationSeconds": float(payload.get("clipDurationSeconds") or 0.0),
+        "videoPath": str(payload.get("videoPath") or "") or None,
+        "metadataPath": str(payload.get("metadataPath") or "") or None,
+        "videoMimeType": str(payload.get("videoMimeType") or "video/mp4"),
+        "videoExtension": str(payload.get("videoExtension") or "mp4"),
+        "runtimeLabel": str(payload.get("runtimeLabel") or ""),
+        "runtimeNote": str(payload.get("runtimeNote") or "") or None,
+    }
+
+
+def _save_video_artifact(artifact: dict[str, Any], video_outputs_dir: Path) -> dict[str, Any]:
+    created_at = str(
+        artifact.get("createdAt") or datetime.utcnow().replace(microsecond=0).isoformat() + "Z"
+    )
+    output_dir = _video_output_directory(video_outputs_dir, created_at)
+    artifact_id = str(artifact["artifactId"])
+    extension = str(artifact.get("videoExtension") or "mp4").lstrip(".")
+    video_path = output_dir / f"{artifact_id}.{extension}"
+    metadata_path = output_dir / f"{artifact_id}.json"
+
+    video_bytes = artifact.get("videoBytes")
+    if isinstance(video_bytes, str):
+        video_bytes = base64.b64decode(video_bytes.encode("ascii"))
+    if isinstance(video_bytes, (bytes, bytearray)):
+        video_path.write_bytes(bytes(video_bytes))
+    else:
+        raise ValueError(
+            "Cannot persist video artifact: no raw bytes supplied. "
+            "Pass `videoBytes` as bytes from the generation pipeline."
+        )
+
+    persisted = {
+        **artifact,
+        "videoPath": str(video_path),
+        "metadataPath": str(metadata_path),
+    }
+    metadata_payload = {
+        key: value
+        for key, value in persisted.items()
+        if key not in {"videoBytes", "videoMimeType", "videoExtension"}
+    }
+    metadata_path.write_text(json.dumps(metadata_payload, indent=2), encoding="utf-8")
+    return _hydrate_video_artifact(persisted)
+
+
+def _load_video_outputs(video_outputs_dir: Path) -> list[dict[str, Any]]:
+    if not video_outputs_dir.exists():
+        return []
+    outputs: list[dict[str, Any]] = []
+    for metadata_path in video_outputs_dir.rglob("*.json"):
+        try:
+            payload = json.loads(metadata_path.read_text(encoding="utf-8"))
+        except (OSError, json.JSONDecodeError):
+            continue
+        if not isinstance(payload, dict):
+            continue
+        outputs.append(_hydrate_video_artifact({**payload, "metadataPath": str(metadata_path)}))
+    outputs.sort(key=lambda item: str(item.get("createdAt") or ""), reverse=True)
+    return outputs
+
+
+def _find_video_output(artifact_id: str, video_outputs_dir: Path) -> dict[str, Any] | None:
+    for output in _load_video_outputs(video_outputs_dir):
+        if output.get("artifactId") == artifact_id:
+            return output
+    return None
+
+
+def _delete_video_output(artifact_id: str, video_outputs_dir: Path) -> bool:
+    if not video_outputs_dir.exists():
+        return False
+    found = False
+    for metadata_path in video_outputs_dir.rglob(f"{artifact_id}.json"):
+        found = True
+        video_path = metadata_path.with_suffix(".mp4")
+        try:
+            payload = json.loads(metadata_path.read_text(encoding="utf-8"))
+            if isinstance(payload, dict) and payload.get("videoPath"):
+                video_path = Path(str(payload["videoPath"]))
+        except (OSError, json.JSONDecodeError):
+            pass
+        try:
+            metadata_path.unlink(missing_ok=True)
+        except OSError:
+            pass
+        try:
+            video_path.unlink(missing_ok=True)
+        except OSError:
+            pass
+    return found
diff --git a/backend_service/image_runtime.py b/backend_service/image_runtime.py
index 4c456cb..b4f4cf4 100644
--- a/backend_service/image_runtime.py
+++ b/backend_service/image_runtime.py
@@ -4,16 +4,28 @@
 import importlib.util
 import io
 import os
+import platform
 import textwrap
 import time
 import gc
 import secrets
+
+from backend_service.helpers.gpu import nvidia_gpu_present as _nvidia_gpu_present
 from colorsys import hsv_to_rgb
 from dataclasses import asdict, dataclass, field
 from pathlib import Path
 from threading import RLock
 from typing import Any
 
+from backend_service.progress import (
+    IMAGE_PROGRESS,
+    PHASE_DECODING,
+    PHASE_DIFFUSING,
+    PHASE_ENCODING,
+    PHASE_LOADING,
+    PHASE_SAVING,
+)
+
 
 WORKSPACE_ROOT = Path(__file__).resolve().parents[1]
 MAX_IMAGE_SEED = 2147483647
@@ -50,6 +62,59 @@ def validate_local_diffusers_snapshot(local_root: Path, repo: str | None = None)
             f"(missing model_index.json; found {visible_label}). {_snapshot_retry_guidance(repo)}"
         )
 
+    # Verify each component listed in model_index.json actually has its folder
+    # on disk with a recognisable config file. Diffusers will otherwise raise a
+    # cryptic "no file named config.json found in directory <snapshot_root>"
+    # error from inside ``from_pretrained`` that points at the snapshot root,
+    # which is hard to action without knowing which subfolder is missing.
+    # This typically happens when a download started before allow_patterns was
+    # applied — HF queues the legacy root-level safetensors first and the user
+    # tries to load before the per-component folders finish landing.
+    try:
+        pipeline_index = json.loads(model_index_path.read_text(encoding="utf-8"))
+    except (OSError, json.JSONDecodeError) as exc:
+        return (
+            "The local snapshot's model_index.json could not be read "
+            f"({exc}). {_snapshot_retry_guidance(repo)}"
+        )
+
+    missing_components: list[str] = []
+    if isinstance(pipeline_index, dict):
+        # Any of these names being present in a subfolder is enough to call it
+        # a real component directory — diffusers picks the right one based on
+        # the class type at load time.
+        component_config_names = (
+            "config.json",
+            "scheduler_config.json",
+            "tokenizer_config.json",
+            "preprocessor_config.json",
+        )
+        for component_name, descriptor in pipeline_index.items():
+            if component_name.startswith("_"):
+                continue  # ``_class_name`` / ``_diffusers_version`` metadata
+            if not isinstance(descriptor, (list, tuple)) or len(descriptor) < 2:
+                continue
+            # Pipelines list ``[null, null]`` for optional components that the
+            # checkpoint deliberately omits (e.g. safety_checker on community
+            # models). Skip those — they aren't expected on disk.
+            if descriptor[0] is None or descriptor[1] is None:
+                continue
+            component_dir = local_root / component_name
+            if not component_dir.is_dir():
+                missing_components.append(component_name)
+                continue
+            if not any((component_dir / name).exists() for name in component_config_names):
+                missing_components.append(component_name)
+
+    if missing_components:
+        label = ", ".join(missing_components[:4])
+        if len(missing_components) > 4:
+            label += f" (+{len(missing_components) - 4} more)"
+        return (
+            "The local snapshot is incomplete and cannot be opened as a diffusers pipeline "
+            f"(missing components: {label}). {_snapshot_retry_guidance(repo)}"
+        )
+
     broken_links: list[str] = []
     weight_index_paths: list[Path] = []
     try:
@@ -338,73 +403,144 @@ def probe(self) -> ImageRuntimeStatus:
             )
 
         device = self._detect_device(torch)
+        message = (
+            "Real local generation is available. Download an image model locally, then Image Studio "
+            "will use the diffusers runtime instead of the placeholder engine."
+        )
+        # A CPU-only torch on a machine with an NVIDIA GPU is the single
+        # most common "image gen takes 10 minutes per step" misconfiguration
+        # on Windows and Linux. Detect the NVIDIA driver via nvidia-smi and,
+        # if torch didn't pick up CUDA, surface an actionable hint instead
+        # of letting users watch the progress bar crawl.
+        if device == "cpu" and platform.system() in ("Windows", "Linux") and _nvidia_gpu_present():
+            message = (
+                "torch was imported but CUDA is unavailable — diffusion will run on CPU "
+                "(expect minutes per step). Reinstall with the CUDA wheel: "
+                "pip install --upgrade --force-reinstall torch "
+                "--index-url https://download.pytorch.org/whl/cu121"
+            )
         return ImageRuntimeStatus(
             activeEngine="diffusers",
             realGenerationAvailable=True,
             device=device,
             pythonExecutable=_resolve_image_python(),
-            message=(
-                "Real local generation is available. Download an image model locally, then Image Studio "
-                "will use the diffusers runtime instead of the placeholder engine."
-            ),
+            message=message,
             loadedModelRepo=self._loaded_repo,
         )
 
     def generate(self, config: ImageGenerationConfig) -> list[GeneratedImage]:
-        pipeline = self._ensure_pipeline(config.repo)
-        torch = self._torch
-        if torch is None:
-            raise RuntimeError("PyTorch was not initialised for the diffusers runtime.")
-        generator_device = "cpu" if self._device == "mps" else (self._device or "cpu")
-        base_seed = _resolve_base_seed(config.seed)
-        generators = [
-            torch.Generator(device=generator_device).manual_seed(base_seed + index)
-            for index in range(config.batchSize)
-        ]
-
-        kwargs = self._build_pipeline_kwargs(config, generators if len(generators) > 1 else generators[0])
-        lowered_repo = config.repo.lower()
-        if "flux" in lowered_repo:
-            kwargs.pop("negative_prompt", None)
-            kwargs["num_inference_steps"] = min(config.steps, 8)
-        if "turbo" in lowered_repo:
-            kwargs["num_inference_steps"] = min(config.steps, 8)
-            kwargs["guidance_scale"] = min(config.guidance, 2.5)
-
-        started = time.perf_counter()
+        # Begin reporting progress before we touch the pipeline. ``_ensure_pipeline``
+        # publishes its own ``loading`` phase if it actually has to materialise
+        # the pipeline, but we still want a tracker entry from the moment the
+        # request lands so the UI's first poll has something to render.
+        IMAGE_PROGRESS.begin(
+            run_label=self._format_run_label(config),
+            total_steps=max(1, int(config.steps)),
+            phase=PHASE_LOADING,
+            message=f"Preparing {config.modelName}",
+        )
         try:
-            result = pipeline(**kwargs)
-        except TypeError:
-            kwargs.pop("negative_prompt", None)
-            result = pipeline(**kwargs)
-        elapsed = max(0.1, time.perf_counter() - started)
-
-        artifacts: list[GeneratedImage] = []
-        for index, image in enumerate(getattr(result, "images", []) or []):
-            if image.mode != "RGB":
-                image = image.convert("RGB")
-            if image.getbbox() is None:
-                raise RuntimeError(
-                    "The image runtime returned an all-black frame instead of a real image. "
-                    f"Model: {config.repo}. Device: {self._device or 'cpu'}. "
-                    "Try restarting the backend and generating again. If this keeps happening on Apple Silicon, "
-                    "the model likely needs a safer precision path."
-                )
-            buffer = io.BytesIO()
-            image.save(buffer, format="PNG", optimize=True)
-            artifacts.append(
-                GeneratedImage(
-                    seed=base_seed + index,
-                    bytes=buffer.getvalue(),
-                    extension="png",
-                    mimeType="image/png",
-                    durationSeconds=round(elapsed / max(1, config.batchSize), 1),
-                    runtimeLabel=f"{self.runtime_label} ({self._device or 'cpu'})",
-                )
+            pipeline = self._ensure_pipeline(config.repo)
+            torch = self._torch
+            if torch is None:
+                raise RuntimeError("PyTorch was not initialised for the diffusers runtime.")
+            IMAGE_PROGRESS.set_phase(PHASE_ENCODING, message="Encoding prompt")
+            generator_device = "cpu" if self._device == "mps" else (self._device or "cpu")
+            base_seed = _resolve_base_seed(config.seed)
+            generators = [
+                torch.Generator(device=generator_device).manual_seed(base_seed + index)
+                for index in range(config.batchSize)
+            ]
+
+            kwargs = self._build_pipeline_kwargs(config, generators if len(generators) > 1 else generators[0])
+            lowered_repo = config.repo.lower()
+            if "flux" in lowered_repo:
+                kwargs.pop("negative_prompt", None)
+                kwargs["num_inference_steps"] = min(config.steps, 8)
+            if "turbo" in lowered_repo:
+                kwargs["num_inference_steps"] = min(config.steps, 8)
+                kwargs["guidance_scale"] = min(config.guidance, 2.5)
+
+            # Wire the diffusers per-step callback so the UI sees the bar move
+            # in lockstep with denoising, which is the bulk of the wall time on
+            # most models. ``callback_on_step_end`` is the non-deprecated name
+            # in modern diffusers (>=0.27); some pipelines also accept the
+            # legacy ``callback`` arg, but we prefer the new one.
+            total_steps = int(kwargs.get("num_inference_steps", config.steps) or config.steps)
+            IMAGE_PROGRESS.set_phase(
+                PHASE_DIFFUSING,
+                message=self._diffuse_message(config),
             )
-        if not artifacts:
-            raise RuntimeError("Diffusers returned no images.")
-        return artifacts
+            # Re-publish the totalSteps in case ``num_inference_steps`` was
+            # clamped above (Flux/Turbo cap at 8).
+            IMAGE_PROGRESS.set_step(0, total=max(1, total_steps))
+
+            def _on_step_end(_pipeline: Any, step: int, _timestep: Any, callback_kwargs: dict[str, Any]):
+                # Diffusers calls this *after* step ``step`` finishes, so step
+                # 0 means "one step done". Convert to the 1-indexed value the
+                # UI wants to display.
+                IMAGE_PROGRESS.set_step(step + 1, total=max(1, total_steps))
+                return callback_kwargs
+
+            kwargs.setdefault("callback_on_step_end", _on_step_end)
+
+            started = time.perf_counter()
+            try:
+                result = pipeline(**kwargs)
+            except TypeError as exc:
+                # Older diffusers versions don't accept ``callback_on_step_end``
+                # — drop it and retry once before bubbling the original error.
+                if "callback_on_step_end" in str(exc):
+                    kwargs.pop("callback_on_step_end", None)
+                    try:
+                        result = pipeline(**kwargs)
+                    except TypeError:
+                        kwargs.pop("negative_prompt", None)
+                        result = pipeline(**kwargs)
+                else:
+                    kwargs.pop("negative_prompt", None)
+                    result = pipeline(**kwargs)
+            elapsed = max(0.1, time.perf_counter() - started)
+
+            IMAGE_PROGRESS.set_phase(PHASE_DECODING, message="Decoding pixels")
+
+            artifacts: list[GeneratedImage] = []
+            for index, image in enumerate(getattr(result, "images", []) or []):
+                if image.mode != "RGB":
+                    image = image.convert("RGB")
+                if image.getbbox() is None:
+                    raise RuntimeError(
+                        "The image runtime returned an all-black frame instead of a real image. "
+                        f"Model: {config.repo}. Device: {self._device or 'cpu'}. "
+                        "Try restarting the backend and generating again. If this keeps happening on Apple Silicon, "
+                        "the model likely needs a safer precision path."
+                    )
+                buffer = io.BytesIO()
+                image.save(buffer, format="PNG", optimize=True)
+                artifacts.append(
+                    GeneratedImage(
+                        seed=base_seed + index,
+                        bytes=buffer.getvalue(),
+                        extension="png",
+                        mimeType="image/png",
+                        durationSeconds=round(elapsed / max(1, config.batchSize), 1),
+                        runtimeLabel=f"{self.runtime_label} ({self._device or 'cpu'})",
+                    )
+                )
+            if not artifacts:
+                raise RuntimeError("Diffusers returned no images.")
+            IMAGE_PROGRESS.set_phase(PHASE_SAVING, message="Saving to gallery")
+            return artifacts
+        finally:
+            IMAGE_PROGRESS.finish()
+
+    def _diffuse_message(self, config: ImageGenerationConfig) -> str:
+        if config.batchSize > 1:
+            return f"Diffusing {config.batchSize} images"
+        return "Diffusing image"
+
+    def _format_run_label(self, config: ImageGenerationConfig) -> str:
+        return f"{config.modelName} · {config.width}x{config.height}"
 
     def preload(self, repo: str) -> ImageRuntimeStatus:
         self._ensure_pipeline(repo)
@@ -422,6 +558,11 @@ def _ensure_pipeline(self, repo: str) -> Any:
             if self._pipeline is not None and self._loaded_repo == repo:
                 return self._pipeline
 
+            # Loading a pipeline can take 10-60s on cold disk. Surface that
+            # explicitly to the UI so the progress bar stops sitting at 0%
+            # while we read 5GB of weights from the SSD.
+            IMAGE_PROGRESS.set_phase(PHASE_LOADING, message=f"Loading {repo}")
+
             if self._pipeline is not None and self._loaded_repo != repo:
                 self._release_pipeline()
 
diff --git a/backend_service/inference.py b/backend_service/inference.py
index 9f33c9b..5a05a7c 100644
--- a/backend_service/inference.py
+++ b/backend_service/inference.py
@@ -1704,7 +1704,7 @@ def _build_command(
         *fell_back_to_native* is ``True`` when pre-validation detected
         unsupported cache types and silently switched to f16.
         """
-        from compression import registry as _strategy_registry
+        from cache_compression import registry as _strategy_registry
         strategy = _strategy_registry.get(cache_strategy) or _strategy_registry.default()
 
         binary = self._select_llama_binary(strategy)
@@ -1842,7 +1842,7 @@ def load_model(
         runtime_note = None
         actual_strategy = cache_strategy
         actual_fit = fit_model_in_memory
-        from compression import registry as _strategy_registry
+        from cache_compression import registry as _strategy_registry
         failed_strategy_name: str | None = None
 
         # Try the requested strategy first.  If it fails, try ChaosEngine
diff --git a/backend_service/mlx_worker.py b/backend_service/mlx_worker.py
index 92c6dcb..a87a4b2 100644
--- a/backend_service/mlx_worker.py
+++ b/backend_service/mlx_worker.py
@@ -780,7 +780,7 @@ def _runtime_fields(
 
     def _make_cache(self) -> tuple[Any | None, str | None]:
         """Build the prompt cache for the active strategy. Returns (cache, note)."""
-        from compression import registry
+        from cache_compression import registry
         strategy = registry.get(self.cache_strategy)
         if strategy is None or self.cache_strategy == "native":
             return None, None
@@ -801,7 +801,7 @@ def _make_cache(self) -> tuple[Any | None, str | None]:
 
     def _generate_dflash(self, request: dict[str, Any]) -> dict[str, Any]:
         """Generate using DFLASH speculative decoding."""
-        from dflash_mlx.runtime import generate_dflash_once
+        from dflash_mlx.runtime import stream_dflash_generate
 
         # Build prompt text
         system_prompt = request.get("systemPrompt")
@@ -822,7 +822,11 @@ def _generate_dflash(self, request: dict[str, Any]) -> dict[str, Any]:
         if eos_token_id is not None and int(eos_token_id) not in eos_token_ids:
             eos_token_ids.append(int(eos_token_id))
 
-        summary = generate_dflash_once(
+        # ``stream_dflash_generate`` (upstream v0.1.4) yields per-token events
+        # followed by a final ``{"event": "summary", ...}`` payload whose shape
+        # matches what the old ``generate_dflash_once`` helper returned.
+        summary: dict[str, Any] = {}
+        for event in stream_dflash_generate(
             target_model=self._dflash_target or self.model,
             tokenizer=self.tokenizer,
             draft_model=self._dflash_generator,
@@ -831,7 +835,9 @@ def _generate_dflash(self, request: dict[str, Any]) -> dict[str, Any]:
             use_chat_template=False,
             stop_token_ids=eos_token_ids,
             prompt_tokens_override=prompt_tokens,
-        )
+        ):
+            if event.get("event") == "summary":
+                summary = dict(event)
 
         gen_tokens = [int(token_id) for token_id in summary.get("generated_token_ids", [])]
         text = self.tokenizer.decode(gen_tokens).strip() if gen_tokens else ""
diff --git a/backend_service/models/__init__.py b/backend_service/models/__init__.py
index 8b5cd6d..20af2dc 100644
--- a/backend_service/models/__init__.py
+++ b/backend_service/models/__init__.py
@@ -111,11 +111,16 @@ class UpdateSettingsRequest(BaseModel):
     modelDirectories: list[ModelDirectoryRequest] | None = None
     preferredServerPort: int | None = Field(default=None, ge=1024, le=65535)
     allowRemoteConnections: bool | None = None
+    requireApiAuth: bool | None = None
     autoStartServer: bool | None = None
     launchPreferences: LaunchPreferencesRequest | None = None
     remoteProviders: list[RemoteProviderRequest] | None = None
     huggingFaceToken: str | None = Field(default=None, max_length=512)
     dataDirectory: str | None = Field(default=None, max_length=4096)
+    # Per-modality output overrides. Empty string clears the override and
+    # restores the default (data-dir/images/outputs or data-dir/videos/outputs).
+    imageOutputsDirectory: str | None = Field(default=None, max_length=4096)
+    videoOutputsDirectory: str | None = Field(default=None, max_length=4096)
 
 
 class OpenAIMessage(BaseModel):
@@ -207,3 +212,30 @@ class ImageRuntimePreloadRequest(BaseModel):
 
 class ImageRuntimeUnloadRequest(BaseModel):
     modelId: str | None = Field(default=None, min_length=1, max_length=256)
+
+
+class VideoRuntimePreloadRequest(BaseModel):
+    modelId: str = Field(min_length=1, max_length=256)
+
+
+class VideoRuntimeUnloadRequest(BaseModel):
+    modelId: str | None = Field(default=None, min_length=1, max_length=256)
+
+
+class VideoGenerationRequest(BaseModel):
+    """Shape accepted by POST /api/video/generate.
+
+    Defaults are intentionally conservative — num_frames and steps in particular
+    dominate generation time on consumer hardware, so we err on the side of a
+    short, fast clip and let the user dial up quality from the Studio UI.
+    """
+    modelId: str = Field(min_length=1, max_length=256)
+    prompt: str = Field(min_length=1, max_length=4000)
+    negativePrompt: str | None = Field(default=None, max_length=4000)
+    width: int = Field(default=768, ge=256, le=2048)
+    height: int = Field(default=512, ge=256, le=2048)
+    numFrames: int = Field(default=97, ge=8, le=257)
+    fps: int = Field(default=24, ge=1, le=60)
+    steps: int = Field(default=50, ge=1, le=100)
+    guidance: float = Field(default=3.0, ge=1.0, le=20.0)
+    seed: int | None = Field(default=None, ge=0, le=2147483647)
diff --git a/backend_service/plugins/__init__.py b/backend_service/plugins/__init__.py
index 79c8814..f218910 100644
--- a/backend_service/plugins/__init__.py
+++ b/backend_service/plugins/__init__.py
@@ -88,7 +88,7 @@ def discover_from_directory(self, plugins_dir: Path):
     def register_builtins(self):
         """Register all built-in components as plugins."""
         # Cache strategies
-        from compression import registry as cache_registry
+        from cache_compression import registry as cache_registry
         for strategy in cache_registry._strategies.values():
             manifest = PluginManifest(
                 id=f"cache.{strategy.strategy_id}",
diff --git a/backend_service/progress.py b/backend_service/progress.py
new file mode 100644
index 0000000..6d5b773
--- /dev/null
+++ b/backend_service/progress.py
@@ -0,0 +1,155 @@
+"""Real-time generation progress tracking for image + video runtimes.
+
+The diffusers pipelines used by ``image_runtime`` and ``video_runtime`` each
+take 30 seconds to several minutes to finish. The frontend used to render an
+arbitrary "estimated seconds" bar that drifted out of sync with reality on
+slower hardware. This module gives the runtimes a tiny thread-safe scratchpad
+they can update as they progress, and the routes a way to report that state
+back to the UI so the progress bar reflects what's actually happening.
+
+A tracker exposes four operations:
+
+* ``begin(...)`` — call when generation starts. Resets state and stamps the
+  start time.
+* ``set_phase(...)`` — call when the runtime moves into a new phase
+  (``loading``, ``encoding``, ``diffusing``, ``decoding``, ``saving``). The
+  string is opaque to the backend — the frontend maps it onto the same phase
+  IDs the modal already understands.
+* ``set_step(step, total)`` — call inside ``callback_on_step_end`` to publish
+  per-step progress during diffusion.
+* ``finish(...)`` — call after the pipeline returns (or raises). Marks the
+  tracker idle so the next poll cycle stops showing stale values.
+
+``snapshot()`` returns a JSON-serialisable dict the routes hand back to the
+frontend. ``active=False`` means "no run in flight" — callers should fall
+back to client-side estimates.
+
+Two module-level singletons (`IMAGE_PROGRESS`, `VIDEO_PROGRESS`) are exposed
+so the runtimes and routes share the same instance without the
+``ChaosEngineState`` plumbing having to know about it.
+"""
+
+from __future__ import annotations
+
+import time
+from threading import RLock
+from typing import Any
+
+
+# Phase IDs the frontend expects. Keep these in sync with the modal's phase
+# list — adding a new phase here without updating the modal will just show up
+# as "unknown phase" in the UI but won't crash.
+PHASE_IDLE = "idle"
+PHASE_LOADING = "loading"
+PHASE_ENCODING = "encoding"
+PHASE_DIFFUSING = "diffusing"
+PHASE_DECODING = "decoding"
+PHASE_SAVING = "saving"
+
+
+class ProgressTracker:
+    """Thread-safe scratchpad for one in-flight generation at a time.
+
+    The runtimes are already serialised through their own ``RLock``s — only
+    one image (or one video) can render at a time per process — so we don't
+    need to multiplex multiple runs. We just need the GET endpoint and the
+    pipeline callback to read/write the same state without tearing.
+    """
+
+    def __init__(self, *, kind: str) -> None:
+        self._lock = RLock()
+        # ``kind`` is included in the snapshot so logs can tell image and
+        # video apart at a glance.
+        self._kind = kind
+        self._active = False
+        self._phase = PHASE_IDLE
+        self._message = ""
+        self._step = 0
+        self._total_steps = 0
+        self._started_at = 0.0
+        self._updated_at = 0.0
+        # Optional run-shape metadata so the UI can render labels like
+        # "Diffusing 3 images" without a separate request.
+        self._run_label: str | None = None
+
+    def begin(
+        self,
+        *,
+        run_label: str | None = None,
+        total_steps: int = 0,
+        phase: str = PHASE_LOADING,
+        message: str = "",
+    ) -> None:
+        with self._lock:
+            now = time.time()
+            self._active = True
+            self._phase = phase
+            self._message = message
+            self._step = 0
+            self._total_steps = max(0, int(total_steps))
+            self._started_at = now
+            self._updated_at = now
+            self._run_label = run_label
+
+    def set_phase(self, phase: str, message: str = "") -> None:
+        """Move into a new phase. Resets ``step`` so per-phase progress is
+        measured from zero rather than carrying over the previous phase's
+        counter."""
+        with self._lock:
+            if not self._active:
+                # Setting a phase before ``begin()`` is meaningless — it would
+                # leave ``started_at`` at 0 and the elapsed time would be
+                # nonsense. Treat it as an implicit ``begin`` so callers don't
+                # have to remember the order on simple paths.
+                self._active = True
+                self._started_at = time.time()
+                self._step = 0
+                self._total_steps = 0
+                self._run_label = None
+            self._phase = phase
+            self._message = message
+            self._step = 0
+            self._updated_at = time.time()
+
+    def set_step(self, step: int, total: int | None = None) -> None:
+        with self._lock:
+            if not self._active:
+                return
+            self._step = max(0, int(step))
+            if total is not None:
+                self._total_steps = max(0, int(total))
+            self._updated_at = time.time()
+
+    def finish(self, *, message: str = "") -> None:
+        with self._lock:
+            self._active = False
+            self._phase = PHASE_IDLE
+            self._message = message
+            self._step = 0
+            self._total_steps = 0
+            self._updated_at = time.time()
+            self._run_label = None
+
+    def snapshot(self) -> dict[str, Any]:
+        with self._lock:
+            now = time.time()
+            elapsed = max(0.0, now - self._started_at) if self._active else 0.0
+            return {
+                "kind": self._kind,
+                "active": self._active,
+                "phase": self._phase,
+                "message": self._message,
+                "step": self._step,
+                "totalSteps": self._total_steps,
+                "startedAt": self._started_at if self._active else 0.0,
+                "updatedAt": self._updated_at,
+                "elapsedSeconds": round(elapsed, 3),
+                "runLabel": self._run_label,
+            }
+
+
+# Module-level singletons. The runtime managers and the route handlers both
+# import these directly so we don't have to thread the tracker through
+# ``ChaosEngineState`` constructor signatures.
+IMAGE_PROGRESS = ProgressTracker(kind="image")
+VIDEO_PROGRESS = ProgressTracker(kind="video")
diff --git a/backend_service/routes/__init__.py b/backend_service/routes/__init__.py
index 65dde43..aec2ec6 100644
--- a/backend_service/routes/__init__.py
+++ b/backend_service/routes/__init__.py
@@ -11,6 +11,7 @@ def register_routes(app: FastAPI) -> None:
     from .models import router as models_router
     from .chat import router as chat_router
     from .images import router as images_router
+    from .video import router as video_router
     from .benchmarks import router as benchmarks_router
     from .cache import router as cache_router
     from .server import router as server_router
@@ -29,6 +30,7 @@ def register_routes(app: FastAPI) -> None:
     app.include_router(chat_router)
     app.include_router(compare_router)
     app.include_router(images_router)
+    app.include_router(video_router)
     app.include_router(benchmarks_router)
     app.include_router(cache_router)
     app.include_router(server_router)
diff --git a/backend_service/routes/health.py b/backend_service/routes/health.py
index aac21a3..8ddf97e 100644
--- a/backend_service/routes/health.py
+++ b/backend_service/routes/health.py
@@ -4,6 +4,7 @@
 
 from fastapi import APIRouter, Request
 
+from backend_service.helpers.gpu import gpu_status_snapshot
 from backend_service.helpers.system import _runtime_label
 
 router = APIRouter()
@@ -42,3 +43,15 @@ def runtime_status(request: Request) -> dict[str, Any]:
         active_requests=state.active_requests,
         requests_served=state.requests_served,
     )
+
+
+@router.get("/api/system/gpu-status")
+def system_gpu_status() -> dict[str, Any]:
+    """Unified GPU availability summary for the frontend warning banner.
+
+    Returns whether torch sees CUDA / MPS on the current host, whether an
+    NVIDIA driver is visible on ``PATH``, and a human-readable recommendation
+    string when torch fell back to CPU on a box that clearly has an NVIDIA
+    GPU. Safe to call before any model is loaded.
+    """
+    return gpu_status_snapshot()
diff --git a/backend_service/routes/images.py b/backend_service/routes/images.py
index f91396a..0a27c88 100644
--- a/backend_service/routes/images.py
+++ b/backend_service/routes/images.py
@@ -25,6 +25,7 @@
     _find_image_output,
     _delete_image_output,
 )
+from backend_service.progress import IMAGE_PROGRESS
 
 router = APIRouter()
 
@@ -45,6 +46,17 @@ def image_runtime_status(request: Request) -> dict[str, Any]:
     return {"runtime": state.image_runtime.capabilities()}
 
 
+@router.get("/api/images/progress")
+def image_generation_progress() -> dict[str, Any]:
+    """Live progress snapshot for the in-flight image generation.
+
+    Polled by the generation modal every ~500ms while the bar is visible.
+    When ``active`` is false the UI falls back to its own client-side
+    estimates rather than freezing the bar at 0%.
+    """
+    return {"progress": IMAGE_PROGRESS.snapshot()}
+
+
 @router.post("/api/images/preload")
 def preload_image_model(request: Request, body: ImageRuntimePreloadRequest) -> dict[str, Any]:
     import traceback as _tb
diff --git a/backend_service/routes/settings.py b/backend_service/routes/settings.py
index b08139a..8e37f70 100644
--- a/backend_service/routes/settings.py
+++ b/backend_service/routes/settings.py
@@ -19,4 +19,10 @@ def settings(request: Request) -> dict[str, Any]:
 @router.patch("/api/settings")
 def update_settings(request: Request, body: UpdateSettingsRequest) -> dict[str, Any]:
     state = request.app.state.chaosengine
-    return state.update_settings(body)
+    result = state.update_settings(body)
+    # Hot-apply the API-auth toggle — the middleware reads this flag per
+    # request, so flipping it should take effect immediately without
+    # forcing the user to restart the server. Env var, if set, still wins.
+    from backend_service.app import _resolve_require_api_auth
+    request.app.state.chaosengine_require_api_auth = _resolve_require_api_auth(state.settings)
+    return result
diff --git a/backend_service/routes/setup.py b/backend_service/routes/setup.py
index 4c75276..e814d62 100644
--- a/backend_service/routes/setup.py
+++ b/backend_service/routes/setup.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import shutil
 import subprocess
 import time
 from pathlib import Path
@@ -17,8 +18,46 @@
     "vllm": "vllm",
     "mlx": "mlx",
     "mlx-lm": "mlx-lm",
-    "dflash-mlx": "dflash-mlx",
+    # PyPI build is stale at 0.1.0; the up-to-date code lives on GitHub.
+    # The upstream removed all tags in April 2026, so we pin to a specific
+    # commit on main instead — v0.1.4 no longer resolves and fresh clones
+    # failed with "pathspec 'v0.1.4' did not match any file(s) known to
+    # git". Bump the pin when we validate a newer main SHA.
+    "dflash-mlx": "dflash-mlx @ git+https://github.com/bstnxbt/dflash-mlx.git@f825ffb268e50d531e8b6524413b0847334a14dd",
     "dflash": "dflash",
+    # Video output encoding — diffusers can produce frames without these,
+    # but exporting mp4/gif requires imageio + the ffmpeg plugin. The Video
+    # Studio surfaces a one-click installer when they're missing.
+    "imageio": "imageio",
+    "imageio-ffmpeg": "imageio-ffmpeg",
+    # Pipeline-specific tokenizer / text-encoder packages. Diffusers itself
+    # imports without them, but individual video pipelines need one or more
+    # at preload / generate time:
+    #   - tiktoken: LTX-Video's T5 tokenizer ships in tiktoken format.
+    #   - sentencepiece: Wan (UMT5-XXL), HunyuanVideo, CogVideoX, Mochi (T5).
+    #   - protobuf: SentencePiece tokenizers HF loads.
+    #   - ftfy: prompt-text preprocessing several pipelines use.
+    "tiktoken": "tiktoken",
+    "sentencepiece": "sentencepiece",
+    "protobuf": "protobuf",
+    "ftfy": "ftfy",
+    # Core image / video runtime packages. Installed together via the
+    # one-click button in Image Studio / Video Studio when the probe
+    # reports the real engine as unavailable. Each is also individually
+    # installable so we can retry a single failed package without redoing
+    # the whole set.
+    #
+    # We deliberately do not pin versions here — the backend ships with
+    # ``pyproject.toml`` extras that constrain them, and a bare ``pip
+    # install diffusers`` resolves compatibly with whatever torch the user
+    # already has. For a coordinated install of all of these, the Studio
+    # calls this endpoint once per package in order so a single failure
+    # doesn't abort the whole sequence.
+    "diffusers": "diffusers",
+    "torch": "torch",
+    "accelerate": "accelerate",
+    "huggingface_hub": "huggingface_hub",
+    "pillow": "pillow",
 }
 
 _MANUAL_INSTALL_MESSAGES: dict[str, str] = {
@@ -143,6 +182,230 @@ def refresh_capabilities_endpoint(request: Request) -> dict[str, Any]:
     return {"capabilities": caps.to_dict()}
 
 
+# ------------------------------------------------------------------
+# CUDA torch install (Windows/Linux NVIDIA fallback recovery)
+# ------------------------------------------------------------------
+
+# cu124 covers Python 3.9-3.13 and driver 525+. cu121 only ships wheels
+# for Python up to 3.12, so fresh Windows installs (3.13) fail on it.
+# The nightly index sometimes has wheels for very new Python (e.g. 3.14)
+# before they land in stable — we try it last so users on bleeding-edge
+# Python aren't stuck. The endpoint walks this list in order and stops
+# at the first success.
+_CUDA_TORCH_INDEXES: list[str] = [
+    "https://download.pytorch.org/whl/cu124",
+    "https://download.pytorch.org/whl/cu126",
+    "https://download.pytorch.org/whl/cu128",
+    "https://download.pytorch.org/whl/cu121",
+    "https://download.pytorch.org/whl/nightly/cu128",
+]
+
+
+def _read_python_version(python: str) -> str | None:
+    """Return e.g. ``3.13.2`` for the given Python interpreter, or ``None``."""
+    try:
+        result = subprocess.run(
+            [python, "-c", "import sys; print('%d.%d.%d' % sys.version_info[:3])"],
+            capture_output=True, text=True, timeout=10,
+        )
+    except (OSError, subprocess.TimeoutExpired):
+        return None
+    if result.returncode != 0:
+        return None
+    return result.stdout.strip() or None
+
+
+def _site_packages_for(python_executable: str) -> Path | None:
+    """Return the site-packages directory for the given interpreter, or None."""
+    try:
+        result = subprocess.run(
+            [
+                python_executable, "-c",
+                "import sysconfig; print(sysconfig.get_paths().get('purelib') or sysconfig.get_paths().get('platlib') or '')",
+            ],
+            capture_output=True, text=True, timeout=10,
+        )
+    except (OSError, subprocess.TimeoutExpired):
+        return None
+    if result.returncode != 0:
+        return None
+    path = (result.stdout or "").strip()
+    return Path(path) if path else None
+
+
+def _purge_broken_distributions(site_packages: Path) -> list[str]:
+    """Delete ``~*`` stub directories pip leaves behind after an interrupted install.
+
+    On Windows, pip atomically renames the old version of a package to ``~<name>``
+    before unpacking the new one. If the process is killed mid-install (antivirus,
+    a file lock, Ctrl-C) the stub is left behind. Subsequent ``pip install`` runs
+    then print ``WARNING: Ignoring invalid distribution ~arkupsafe`` forever and
+    sometimes refuse to heal the tree. Removing these stubs is cheap and safe —
+    they contain no authoritative data.
+    """
+    if not site_packages.is_dir():
+        return []
+    removed: list[str] = []
+    for entry in site_packages.iterdir():
+        if not entry.name.startswith("~"):
+            continue
+        try:
+            if entry.is_dir():
+                shutil.rmtree(entry, ignore_errors=True)
+            else:
+                entry.unlink(missing_ok=True)
+            if not entry.exists():
+                removed.append(entry.name)
+        except OSError:
+            continue
+    return removed
+
+
+def _all_attempts_lack_wheel(attempts: list[dict[str, Any]]) -> bool:
+    """True when pip reported 'No matching distribution' for every attempt.
+
+    This is the signature of a Python version PyTorch doesn't ship wheels
+    for (either too old or too new) — the fix is a different Python, not
+    a different CUDA index. We surface that specifically to the UI so
+    the user doesn't keep retrying.
+    """
+    if not attempts:
+        return False
+    for attempt in attempts:
+        if attempt.get("ok"):
+            return False
+        text = (attempt.get("output") or "").lower()
+        if "no matching distribution" not in text and "from versions: none" not in text:
+            return False
+    return True
+
+
+@router.post("/api/setup/install-cuda-torch")
+def install_cuda_torch(request: Request) -> dict[str, Any]:
+    """Install a CUDA-enabled torch wheel into the backend runtime.
+
+    The fresh-Windows-install case is Python 3.13 + system pip, which has
+    no cu121 wheel at all — the install fails with "Could not find a
+    version that satisfies the requirement torch". We try cu124 first
+    (broadest Python 3.9-3.13 coverage), then cu126 / cu128 / cu121 in
+    case the user's driver doesn't match the newest, and finally the
+    nightly cu128 index for very-new Python (e.g. 3.14).
+
+    If every attempt fails with "No matching distribution", we set
+    ``noWheelForPython`` in the response — that means the user's Python
+    version is the problem, not the CUDA index, so the UI can tell them
+    to switch Python rather than keep retrying. The response always
+    includes ``pythonVersion`` so the UI can show which interpreter this
+    is targeting (important: it's the app's bundled venv, not the system
+    pip the user might reach from a shell).
+
+    Torch already imported in this process stays CPU until the user
+    restarts the backend — we flag ``requiresRestart`` in the response
+    so the frontend can prompt appropriately.
+    """
+    state = request.app.state.chaosengine
+    python = state.runtime.capabilities.pythonExecutable
+    python_version = _read_python_version(python)
+
+    # Sweep pip's "~<pkg>" stub directories before attempting the install.
+    # These are left behind by a prior interrupted install (common on Windows
+    # where Defender briefly locks .pyd files), and they cause two problems:
+    #   1. Noisy "WARNING: Ignoring invalid distribution ~arkupsafe" spam that
+    #      confuses users reading install output.
+    #   2. pip sometimes tries to repair them and fails with an "Access denied"
+    #      write to a .pyd that the running backend process has loaded (e.g.
+    #      markupsafe/_speedups.cp314-win_amd64.pyd via FastAPI -> Jinja2).
+    # Removing the stubs is always safe — they hold no authoritative data.
+    site_packages = _site_packages_for(python)
+    purged: list[str] = []
+    if site_packages is not None:
+        purged = _purge_broken_distributions(site_packages)
+        if purged:
+            state.add_log(
+                "server", "info",
+                f"Removed {len(purged)} broken pip stub(s) from {site_packages}: {', '.join(purged)}",
+            )
+
+    attempts: list[dict[str, Any]] = []
+    ok = False
+    winning_output = ""
+    winning_index: str | None = None
+
+    for index_url in _CUDA_TORCH_INDEXES:
+        # Two-pass install:
+        #   Pass 1: --force-reinstall --no-deps swaps the torch wheel (CPU -> CUDA)
+        #           without overwriting transitive deps like markupsafe. Those
+        #           extensions are loaded into this Python process via FastAPI
+        #           -> Jinja2; overwriting their .pyd / .so at runtime raises
+        #           WinError 5 "Access is denied" and aborts the install.
+        #   Pass 2: plain install (no --force) fills in any genuinely missing
+        #           deps (e.g. nvidia-cublas-cu12 on Linux when swapping from
+        #           CPU torch) without touching files that are already satisfied.
+        cmd_swap = [
+            python, "-m", "pip", "install",
+            "--upgrade", "--force-reinstall", "--no-deps",
+            "--index-url", index_url,
+            "torch>=2.4.0",
+        ]
+        state.add_log("server", "info", f"Installing CUDA torch from {index_url}")
+        try:
+            result = subprocess.run(cmd_swap, capture_output=True, text=True, timeout=900)
+            output = (result.stdout + "\n" + result.stderr).strip()
+            attempt_ok = result.returncode == 0
+        except subprocess.TimeoutExpired:
+            output = f"Install from {index_url} timed out after 15 minutes."
+            attempt_ok = False
+        except OSError as exc:
+            output = f"{index_url}: {exc}"
+            attempt_ok = False
+
+        if attempt_ok:
+            cmd_deps = [
+                python, "-m", "pip", "install",
+                "--index-url", index_url,
+                "torch>=2.4.0",
+            ]
+            try:
+                dep_result = subprocess.run(cmd_deps, capture_output=True, text=True, timeout=900)
+                dep_output = (dep_result.stdout + "\n" + dep_result.stderr).strip()
+                output = f"{output}\n\n--- deps pass ---\n{dep_output}" if dep_output else output
+            except (subprocess.TimeoutExpired, OSError):
+                # Best-effort: torch itself swapped successfully, a missing
+                # transitive dep will surface at runtime via an ImportError
+                # the user can resolve from the Setup page.
+                pass
+
+        attempts.append({"indexUrl": index_url, "ok": attempt_ok, "output": output})
+        if attempt_ok:
+            ok = True
+            winning_output = output
+            winning_index = index_url
+            break
+
+    # Re-probe so the UI can refresh its capabilities view. Note: torch
+    # already imported in this process is still the old module — the
+    # live cuda check won't flip to True without a restart.
+    state.runtime.refresh_capabilities(force=True)
+    caps = state.runtime.capabilities.to_dict()
+    no_wheel_for_python = (not ok) and _all_attempts_lack_wheel(attempts)
+    state.add_log(
+        "server", "info" if ok else "error",
+        f"CUDA torch install: {'succeeded via ' + winning_index if ok else 'failed after all candidates'}"
+        + (f" (no wheel for Python {python_version})" if no_wheel_for_python and python_version else ""),
+    )
+    return {
+        "ok": ok,
+        "output": winning_output or (attempts[-1]["output"] if attempts else ""),
+        "indexUrl": winning_index,
+        "attempts": attempts,
+        "requiresRestart": ok,
+        "pythonExecutable": python,
+        "pythonVersion": python_version,
+        "noWheelForPython": no_wheel_for_python,
+        "capabilities": caps,
+    }
+
+
 # ------------------------------------------------------------------
 # llama-server-turbo update check
 # ------------------------------------------------------------------
diff --git a/backend_service/routes/video.py b/backend_service/routes/video.py
new file mode 100644
index 0000000..ada6da2
--- /dev/null
+++ b/backend_service/routes/video.py
@@ -0,0 +1,303 @@
+"""Video generation API routes.
+
+Backed by ``backend_service.video_runtime.VideoRuntimeManager``. This module
+exposes the full preload / unload / download / generate / outputs lifecycle.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+from fastapi import APIRouter, HTTPException, Request
+from fastapi.responses import FileResponse
+
+from backend_service.helpers.video import (
+    _find_video_variant,
+    _find_video_variant_by_repo,
+    _is_video_repo,
+    _video_download_repo_ids,
+    _video_download_validation_error,
+    _video_model_payloads,
+    _video_variant_available_locally,
+)
+from backend_service.models import (
+    DownloadModelRequest,
+    VideoGenerationRequest,
+    VideoRuntimePreloadRequest,
+    VideoRuntimeUnloadRequest,
+)
+from backend_service.progress import VIDEO_PROGRESS
+
+
+router = APIRouter()
+
+
+@router.get("/api/video/catalog")
+def video_catalog(request: Request) -> dict[str, Any]:
+    """Return the curated catalog of video generation models."""
+    library = request.app.state.chaosengine._library()
+    return {
+        "families": _video_model_payloads(library),
+        "latest": [],
+    }
+
+
+@router.get("/api/video/runtime")
+def video_runtime_status(request: Request) -> dict[str, Any]:
+    """Report the live video runtime capability from diffusers + torch."""
+    state = request.app.state.chaosengine
+    return {"runtime": state.video_runtime.capabilities()}
+
+
+@router.get("/api/video/progress")
+def video_generation_progress() -> dict[str, Any]:
+    """Live progress snapshot for the in-flight video generation.
+
+    Same shape as ``/api/images/progress`` so the frontend can reuse the same
+    client code. Returns ``active=false`` when nothing is running so the UI
+    falls back to its estimate-driven view.
+    """
+    return {"progress": VIDEO_PROGRESS.snapshot()}
+
+
+@router.post("/api/video/preload")
+def preload_video_model(request: Request, body: VideoRuntimePreloadRequest) -> dict[str, Any]:
+    state = request.app.state.chaosengine
+    state.add_log("video", "info", f"Preload requested: modelId='{body.modelId}'")
+    variant = _find_video_variant(body.modelId)
+    if variant is None:
+        state.add_log("video", "error", f"Preload failed: model '{body.modelId}' not found")
+        raise HTTPException(status_code=404, detail=f"Unknown video model '{body.modelId}'.")
+
+    if not _video_variant_available_locally(variant):
+        validation_error = _video_download_validation_error(variant["repo"])
+        detail = validation_error or f"{variant['name']} is not installed locally yet."
+        raise HTTPException(status_code=409, detail=detail)
+
+    try:
+        runtime = state.video_runtime.preload(variant["repo"])
+    except RuntimeError as exc:
+        state.add_log("video", "error", f"Failed to preload {variant['name']}: {exc}")
+        raise HTTPException(status_code=400, detail=f"Failed to load {variant['name']}: {exc}") from exc
+    except Exception as exc:
+        state.add_log(
+            "video",
+            "error",
+            f"Unexpected error preloading {variant['name']}: {type(exc).__name__}: {exc}",
+        )
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to load {variant['name']}: {type(exc).__name__}: {exc}",
+        ) from exc
+
+    state.add_log("video", "info", f"Preloaded video model {variant['name']}.")
+    state.add_activity("Video model loaded", variant["name"])
+    return {"runtime": runtime}
+
+
+@router.post("/api/video/unload")
+def unload_video_model(request: Request, body: VideoRuntimeUnloadRequest | None = None) -> dict[str, Any]:
+    state = request.app.state.chaosengine
+    requested_repo: str | None = None
+    requested_name: str | None = None
+    if body and body.modelId:
+        variant = _find_video_variant(body.modelId)
+        if variant is None:
+            raise HTTPException(status_code=404, detail=f"Unknown video model '{body.modelId}'.")
+        requested_repo = variant["repo"]
+        requested_name = variant["name"]
+
+    current_runtime = state.video_runtime.capabilities()
+    current_repo = str(current_runtime.get("loadedModelRepo") or "") or None
+    try:
+        runtime = state.video_runtime.unload(requested_repo)
+    except RuntimeError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+
+    unloaded_repo = requested_repo or current_repo
+    if unloaded_repo and (requested_repo is None or requested_repo == current_repo):
+        unloaded_variant = _find_video_variant_by_repo(unloaded_repo)
+        unloaded_name = (
+            unloaded_variant["name"]
+            if unloaded_variant
+            else requested_name or unloaded_repo
+        )
+        state.add_log("video", "info", f"Unloaded video model {unloaded_name}.")
+        state.add_activity("Video model unloaded", unloaded_name)
+    return {"runtime": runtime}
+
+
+@router.get("/api/video/library")
+def video_library(request: Request) -> dict[str, Any]:
+    """Return the list of locally-installed video models."""
+    state = request.app.state.chaosengine
+    library = state._library()
+    installed_models: list[dict[str, Any]] = []
+    for family in _video_model_payloads(library):
+        for variant in family["variants"]:
+            if variant.get("availableLocally"):
+                installed_models.append(variant)
+    return {"models": installed_models}
+
+
+@router.get("/api/video/outputs")
+def video_outputs() -> dict[str, Any]:
+    """Return saved video outputs, newest first."""
+    from backend_service.app import _load_video_outputs
+    return {"outputs": _load_video_outputs()}
+
+
+@router.get("/api/video/outputs/{artifact_id}")
+def video_output_detail(artifact_id: str) -> dict[str, Any]:
+    from backend_service.app import _find_video_output
+    output = _find_video_output(artifact_id)
+    if output is None:
+        raise HTTPException(status_code=404, detail=f"Video output '{artifact_id}' not found.")
+    return {"artifact": output}
+
+
+@router.get("/api/video/outputs/{artifact_id}/file")
+def video_output_file(artifact_id: str) -> FileResponse:
+    """Stream the mp4 for a saved video output.
+
+    The frontend wires this up as the ``src`` of an HTML5 <video> element —
+    base64-encoding a video clip in the JSON payload is wasteful, especially
+    as clips easily exceed 10MB.
+    """
+    from backend_service.app import _find_video_output
+    output = _find_video_output(artifact_id)
+    if output is None:
+        raise HTTPException(status_code=404, detail=f"Video output '{artifact_id}' not found.")
+    video_path = str(output.get("videoPath") or "")
+    if not video_path or not Path(video_path).exists():
+        raise HTTPException(
+            status_code=410,
+            detail=f"Video file for '{artifact_id}' is missing from disk.",
+        )
+    return FileResponse(
+        path=video_path,
+        media_type=str(output.get("videoMimeType") or "video/mp4"),
+        filename=f"{artifact_id}.{output.get('videoExtension') or 'mp4'}",
+    )
+
+
+@router.delete("/api/video/outputs/{artifact_id}")
+def delete_video_output_endpoint(request: Request, artifact_id: str) -> dict[str, Any]:
+    from backend_service.app import _delete_video_output, _load_video_outputs
+    state = request.app.state.chaosengine
+    deleted = _delete_video_output(artifact_id)
+    if not deleted:
+        raise HTTPException(status_code=404, detail=f"Video output '{artifact_id}' not found.")
+    state.add_log("video", "info", f"Deleted video output {artifact_id}.")
+    return {"deleted": artifact_id, "outputs": _load_video_outputs()}
+
+
+@router.post("/api/video/generate")
+def generate_video(request: Request, body: VideoGenerationRequest) -> dict[str, Any]:
+    import traceback as _tb
+    from backend_service.app import _generate_video_artifact, _load_video_outputs
+
+    state = request.app.state.chaosengine
+    state.add_log(
+        "video",
+        "info",
+        f"Video generation requested: modelId='{body.modelId}', {body.width}x{body.height}, "
+        f"{body.numFrames} frames @ {body.fps}fps, {body.steps} steps",
+    )
+    variant = _find_video_variant(body.modelId)
+    if variant is None:
+        state.add_log("video", "error", f"Video model not found: '{body.modelId}'")
+        raise HTTPException(
+            status_code=404,
+            detail=f"Unknown video model '{body.modelId}'. The model isn't in the curated catalog.",
+        )
+
+    if not _video_variant_available_locally(variant):
+        validation_error = _video_download_validation_error(variant["repo"])
+        detail = validation_error or f"{variant['name']} is not installed locally yet."
+        raise HTTPException(status_code=409, detail=detail)
+
+    try:
+        artifact, runtime = _generate_video_artifact(body, variant, state.video_runtime)
+    except RuntimeError as exc:
+        state.add_log("video", "error", f"Video generation failed for {variant['name']}: {exc}")
+        raise HTTPException(
+            status_code=400,
+            detail=f"Video generation failed for {variant['name']}: {exc}",
+        ) from exc
+    except Exception as exc:
+        tb_str = _tb.format_exc()
+        state.add_log("video", "error", f"Video generation FAILED: {type(exc).__name__}: {exc}")
+        state.add_log("video", "error", f"Traceback:\n{tb_str[-500:]}")
+        raise HTTPException(
+            status_code=500,
+            detail=f"Video generation failed for {variant['name']}: {type(exc).__name__}: {exc}",
+        ) from exc
+
+    state.add_log(
+        "video",
+        "info",
+        f"Generated video with {variant['name']} via {runtime.get('activeEngine', 'unknown')} "
+        f"in {artifact.get('durationSeconds')}s.",
+    )
+    state.add_activity(
+        "Video generated",
+        f"{variant['name']} \u00b7 {body.width}x{body.height} \u00b7 {body.numFrames}f",
+    )
+    return {"artifact": artifact, "outputs": _load_video_outputs(), "runtime": runtime}
+
+
+@router.post("/api/video/download")
+def download_video_model(request: Request, body: DownloadModelRequest) -> dict[str, Any]:
+    """Start a Hugging Face snapshot download for a curated video model.
+
+    Only repos that appear in ``VIDEO_MODEL_FAMILIES`` are accepted — the
+    endpoint is intentionally locked down so the Video tab can't be pointed
+    at an arbitrary model via the API.
+    """
+    state = request.app.state.chaosengine
+    if not _is_video_repo(body.repo):
+        raise HTTPException(
+            status_code=404,
+            detail=f"Repo '{body.repo}' is not in the curated video model catalog.",
+        )
+    variant = _find_video_variant_by_repo(body.repo)
+    label = variant["name"] if variant else body.repo
+    state.add_log("video", "info", f"Video download requested: {label} ({body.repo})")
+    return {"download": state.start_download(body.repo)}
+
+
+@router.get("/api/video/download/status")
+def video_download_status(request: Request) -> dict[str, Any]:
+    """Return the live download status for every curated video repo only."""
+    state = request.app.state.chaosengine
+    video_repos = _video_download_repo_ids()
+    downloads = [
+        item
+        for item in state.download_status()
+        if str(item.get("repo") or "") in video_repos
+    ]
+    return {"downloads": downloads}
+
+
+@router.post("/api/video/download/cancel")
+def cancel_video_download(request: Request, body: DownloadModelRequest) -> dict[str, Any]:
+    state = request.app.state.chaosengine
+    if not _is_video_repo(body.repo):
+        raise HTTPException(
+            status_code=404,
+            detail=f"Repo '{body.repo}' is not in the curated video model catalog.",
+        )
+    return {"download": state.cancel_download(body.repo)}
+
+
+@router.post("/api/video/download/delete")
+def delete_video_download(request: Request, body: DownloadModelRequest) -> dict[str, Any]:
+    state = request.app.state.chaosengine
+    if not _is_video_repo(body.repo):
+        raise HTTPException(
+            status_code=404,
+            detail=f"Repo '{body.repo}' is not in the curated video model catalog.",
+        )
+    return {"result": state.delete_download(body.repo)}
diff --git a/backend_service/state.py b/backend_service/state.py
index 181ee49..e4d3bb3 100644
--- a/backend_service/state.py
+++ b/backend_service/state.py
@@ -18,12 +18,15 @@
 from fastapi import HTTPException
 from starlette.responses import StreamingResponse
 
-from compression import registry as cache_registry
+from cache_compression import registry as cache_registry
 from backend_service.catalog import CATALOG
 from backend_service.inference import RuntimeController
 from backend_service.image_runtime import (
     ImageRuntimeManager,
 )
+from backend_service.video_runtime import (
+    VideoRuntimeManager,
+)
 from backend_service.models import (
     LoadModelRequest,
     ConvertModelRequest,
@@ -50,6 +53,11 @@
 from backend_service.helpers.images import (
     _image_download_validation_error,
     _friendly_image_download_error,
+    _image_repo_allow_patterns,
+)
+from backend_service.helpers.video import (
+    _video_download_validation_error,
+    _video_repo_allow_patterns,
 )
 from backend_service.helpers.settings import (
     _save_data_location,
@@ -110,11 +118,23 @@ def _read_text_tail(path: str | None, *, limit: int = 4096) -> str:
     return content[-limit:]
 
 
-def _spawn_snapshot_download(repo: str, env: dict[str, str], log_handle: Any) -> subprocess.Popen[str]:
+def _spawn_snapshot_download(
+    repo: str,
+    env: dict[str, str],
+    log_handle: Any,
+    allow_patterns: list[str] | None = None,
+) -> subprocess.Popen[str]:
     from backend_service.app import HF_SNAPSHOT_DOWNLOAD_HELPER
 
+    args = [sys.executable, "-c", HF_SNAPSHOT_DOWNLOAD_HELPER, repo]
+    # The helper treats an empty string as "no allowlist"; a JSON-encoded
+    # list restricts the download to matching files. This is how we keep
+    # diffusers video repos from ballooning to hundreds of GB when the
+    # repo ships legacy standalone checkpoints alongside the pipeline
+    # layout.
+    args.append(json.dumps(allow_patterns) if allow_patterns else "")
     return subprocess.Popen(
-        [sys.executable, "-c", HF_SNAPSHOT_DOWNLOAD_HELPER, repo],
+        args,
         stdout=log_handle,
         stderr=subprocess.STDOUT,
         text=True,
@@ -171,6 +191,7 @@ def __init__(
         self._library_cache: tuple[float, list[dict[str, Any]]] | None = None
         self.runtime = RuntimeController()
         self.image_runtime = ImageRuntimeManager()
+        self.video_runtime = VideoRuntimeManager()
         self._chat_sessions_path = chat_sessions_path if chat_sessions_path is not None else CHAT_SESSIONS_PATH
         loaded_sessions = _load_chat_sessions(self._chat_sessions_path)
         self.chat_sessions = loaded_sessions
@@ -248,12 +269,18 @@ def _settings_payload(self, library: list[dict[str, Any]]) -> dict[str, Any]:
             "modelDirectories": directories,
             "preferredServerPort": self.settings["preferredServerPort"],
             "allowRemoteConnections": bool(self.settings.get("allowRemoteConnections", False)),
+            "requireApiAuth": bool(self.settings.get("requireApiAuth", True)),
             "autoStartServer": bool(self.settings.get("autoStartServer", False)),
             "launchPreferences": self._launch_preferences(),
             "remoteProviders": masked_providers,
             "huggingFaceToken": hf_token_masked,
             "hasHuggingFaceToken": bool(hf_token_value),
             "dataDirectory": str(DATA_LOCATION.data_dir),
+            # Per-modality output overrides (empty == use default under
+            # dataDirectory). The frontend uses these to render the picker
+            # value and the resolved path used for new artifacts.
+            "imageOutputsDirectory": str(self.settings.get("imageOutputsDirectory") or ""),
+            "videoOutputsDirectory": str(self.settings.get("videoOutputsDirectory") or ""),
         }
 
     def _bootstrap(self) -> None:
@@ -866,9 +893,12 @@ def update_settings(self, request: UpdateSettingsRequest) -> dict[str, Any]:
             next_settings["modelDirectories"] = [dict(entry) for entry in self.settings["modelDirectories"]]
             next_settings["preferredServerPort"] = self.settings["preferredServerPort"]
             next_settings["allowRemoteConnections"] = bool(self.settings.get("allowRemoteConnections", False))
+            next_settings["requireApiAuth"] = bool(self.settings.get("requireApiAuth", True))
             next_settings["launchPreferences"] = self._launch_preferences()
             next_settings["remoteProviders"] = list(self.settings.get("remoteProviders") or [])
             next_settings["huggingFaceToken"] = str(self.settings.get("huggingFaceToken") or "")
+            next_settings["imageOutputsDirectory"] = str(self.settings.get("imageOutputsDirectory") or "")
+            next_settings["videoOutputsDirectory"] = str(self.settings.get("videoOutputsDirectory") or "")
 
             if request.modelDirectories is not None:
                 next_settings["modelDirectories"] = _normalize_model_directories(
@@ -878,6 +908,8 @@ def update_settings(self, request: UpdateSettingsRequest) -> dict[str, Any]:
                 next_settings["preferredServerPort"] = request.preferredServerPort
             if request.allowRemoteConnections is not None:
                 next_settings["allowRemoteConnections"] = request.allowRemoteConnections
+            if request.requireApiAuth is not None:
+                next_settings["requireApiAuth"] = request.requireApiAuth
             if request.autoStartServer is not None:
                 next_settings["autoStartServer"] = request.autoStartServer
             if request.launchPreferences is not None:
@@ -923,6 +955,25 @@ def update_settings(self, request: UpdateSettingsRequest) -> dict[str, Any]:
                     os.environ.pop("HF_TOKEN", None)
                     os.environ.pop("HUGGING_FACE_HUB_TOKEN", None)
 
+            # Output directory overrides. Empty string clears the override.
+            # Anything non-empty must be absolute or ~-relative — same rule as
+            # dataDirectory — so we don't silently end up writing artifacts to
+            # the working directory of whoever launched the backend.
+            for field_name, label in (
+                ("imageOutputsDirectory", "imageOutputsDirectory"),
+                ("videoOutputsDirectory", "videoOutputsDirectory"),
+            ):
+                raw_value = getattr(request, field_name, None)
+                if raw_value is None:
+                    continue
+                cleaned = raw_value.strip()
+                if cleaned and not (cleaned.startswith("/") or cleaned.startswith("~")):
+                    raise HTTPException(
+                        status_code=400,
+                        detail=f"{label} must be an absolute path or start with ~.",
+                    )
+                next_settings[field_name] = cleaned
+
             data_migration: dict[str, Any] | None = None
             restart_required_for_data_dir = False
             if request.dataDirectory is not None:
@@ -2308,7 +2359,20 @@ def _progress_worker() -> None:
                 process_log_path = temp_log.name
                 temp_log.close()
                 with open(process_log_path, "w", encoding="utf-8", errors="replace") as process_log:
-                    process = _spawn_snapshot_download(repo, env, process_log)
+                    # Diffusers repos (image + video) get a component-folder
+                    # allowlist so we skip legacy single-file checkpoints the
+                    # pipelines never load. Both helpers return None for repos
+                    # outside their catalog, so only one ever applies.
+                    allow_patterns = (
+                        _video_repo_allow_patterns(repo)
+                        or _image_repo_allow_patterns(repo)
+                    )
+                    process = _spawn_snapshot_download(
+                        repo,
+                        env,
+                        process_log,
+                        allow_patterns=allow_patterns,
+                    )
                     with self._lock:
                         if self._download_tokens.get(repo) == download_token:
                             self._download_processes[repo] = process
@@ -2358,7 +2422,14 @@ def _progress_worker() -> None:
                 if returncode != 0:
                     raise RuntimeError(stderr_output or f"snapshot_download exited with status {returncode}")
 
-                validation_error = _image_download_validation_error(repo)
+                # Image catalog validation first; fall through to video so
+                # a successful video download isn't flagged for missing image
+                # shape. Each validator returns None for repos outside its
+                # catalog.
+                validation_error = (
+                    _image_download_validation_error(repo)
+                    or _video_download_validation_error(repo)
+                )
                 if validation_error:
                     with self._lock:
                         if self._download_tokens.get(repo) != download_token:
@@ -2451,6 +2522,11 @@ def _unload_repo_from_runtimes(self, repo: str, repo_cache_dir: Path) -> None:
         except Exception:
             pass
 
+        try:
+            self.video_runtime.unload(repo)
+        except Exception:
+            pass
+
         warm_pool = getattr(self.runtime, "_warm_pool", None)
         if not isinstance(warm_pool, dict):
             return
diff --git a/backend_service/video_runtime.py b/backend_service/video_runtime.py
new file mode 100644
index 0000000..fc18446
--- /dev/null
+++ b/backend_service/video_runtime.py
@@ -0,0 +1,845 @@
+"""Video runtime for ChaosEngineAI.
+
+Mirrors the shape of ``image_runtime.py`` so the frontend's runtime-status
+contract is identical. This phase ships:
+
+- Dependency probe (reports torch / diffusers availability, detected device,
+  and any missing packages — including the mp4 encoders needed later for
+  ``generate()``).
+- Preload / unload lifecycle for one active pipeline at a time.
+- Registry routing for the four first-wave engines (LTX-Video, Mochi 1,
+  Wan 2.2, HunyuanVideo) to the right diffusers pipeline class.
+
+Generation is intentionally not implemented yet — the preload-to-generate
+phase lands next. This keeps the surface area small and testable while
+the UX wiring stabilises.
+"""
+
+from __future__ import annotations
+
+import gc
+import importlib
+import importlib.util
+import os
+import platform
+import secrets
+import threading
+import time
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Any
+
+from backend_service.helpers.gpu import nvidia_gpu_present
+from backend_service.image_runtime import validate_local_diffusers_snapshot
+from backend_service.progress import (
+    PHASE_DECODING,
+    PHASE_DIFFUSING,
+    PHASE_ENCODING,
+    PHASE_LOADING,
+    PHASE_SAVING,
+    VIDEO_PROGRESS,
+)
+
+
+MAX_VIDEO_SEED = 2147483647
+
+
+def _resolve_video_seed(seed: int | None) -> int:
+    if seed is not None:
+        return seed
+    return secrets.randbelow(MAX_VIDEO_SEED + 1)
+
+
+# ---------------------------------------------------------------------------
+# Torch warmup
+# ---------------------------------------------------------------------------
+# Importing torch for the first time is expensive (30-60s on a cold Windows
+# SSD). Because probe() is a sync FastAPI route that calls ``import torch``,
+# the first probe blew past the frontend's 30s fetch timeout and surfaced as
+# "Video runtime did not respond" with every downstream endpoint cascading to
+# "Failed to fetch". We warm torch on a background thread at sidecar startup
+# so probe() can return a fast "initializing" status while the import is in
+# flight, and an accurate status the moment it completes. The import lock
+# means any in-flight probe still ends up serialized behind the warmup
+# anyway — the fast-path here is purely to keep the probe route itself from
+# blocking so the rest of the video API stays responsive.
+
+_torch_warmup_lock = threading.Lock()
+_torch_warmup_state: dict[str, Any] = {
+    "status": "not_started",  # "not_started" | "in_progress" | "ready" | "failed"
+    "error": None,  # exception message when status == "failed"
+    "started_at": None,
+}
+
+
+def _torch_warmup_worker() -> None:
+    try:
+        import torch  # type: ignore  # noqa: F401
+    except Exception as exc:  # pragma: no cover - import failure path
+        with _torch_warmup_lock:
+            _torch_warmup_state["status"] = "failed"
+            _torch_warmup_state["error"] = f"{type(exc).__name__}: {exc}"
+        return
+    # Pre-warm anything else the first probe() call would otherwise pay for
+    # inline. On Windows the nvidia-smi shell-out adds 1-2s per probe when
+    # uncached, and importlib.util.find_spec on a cold NTFS volume with
+    # antivirus scanning can be slow enough to push a probe past the
+    # frontend's fetch timeout. Doing both here keeps probe() a hashmap
+    # lookup in the common case.
+    try:
+        from backend_service.helpers.gpu import get_device_vram_total_gb
+        get_device_vram_total_gb()
+    except Exception:
+        pass
+    try:
+        for _pkg, module_name in _CORE_DEPS + _VIDEO_OUTPUT_DEPS + _VIDEO_MODEL_DEPS:
+            try:
+                importlib.util.find_spec(module_name)
+            except Exception:
+                pass
+    except Exception:
+        pass
+    with _torch_warmup_lock:
+        _torch_warmup_state["status"] = "ready"
+        _torch_warmup_state["error"] = None
+
+
+def start_torch_warmup() -> None:
+    """Kick off a one-shot background import of torch.
+
+    Called from ``create_app()`` at sidecar startup. Safe to call repeatedly —
+    only the first call spawns a thread. If torch is already importable
+    cheaply (e.g. the interpreter has seen it before in this process), the
+    worker finishes almost immediately.
+    """
+    with _torch_warmup_lock:
+        if _torch_warmup_state["status"] != "not_started":
+            return
+        _torch_warmup_state["status"] = "in_progress"
+        _torch_warmup_state["started_at"] = time.monotonic()
+    thread = threading.Thread(
+        target=_torch_warmup_worker,
+        name="chaosengine-torch-warmup",
+        daemon=True,
+    )
+    thread.start()
+
+
+def torch_warmup_status() -> dict[str, Any]:
+    """Snapshot of the warmup state. Used by ``probe()`` to avoid blocking."""
+    with _torch_warmup_lock:
+        return dict(_torch_warmup_state)
+
+
+WORKSPACE_ROOT = Path(__file__).resolve().parents[1]
+
+
+def _resolve_video_python() -> str:
+    override = os.getenv("CHAOSENGINE_MLX_PYTHON") or os.getenv("CHAOSENGINE_VIDEO_PYTHON")
+    if override:
+        return override
+    candidate = WORKSPACE_ROOT / ".venv" / "bin" / "python"
+    if candidate.exists():
+        return str(candidate)
+    return os.getenv("PYTHON", "python3")
+
+
+def _detect_device_memory_gb(device: str | None) -> float | None:
+    """Best-effort read of how much memory the inference device has access to.
+
+    - ``cuda``: dedicated VRAM from ``nvidia-smi`` (via ``get_gpu_metrics``).
+    - ``mps`` / ``cpu`` on macOS: unified memory from ``sysctl hw.memsize``.
+    - ``cpu`` on Linux/Windows: system RAM via psutil.
+
+    Returns ``None`` when detection fails — the frontend safety heuristic
+    treats ``None`` as "stay conservative" and falls back to its 16 GB-safe
+    thresholds rather than risk over-scaling on an unknown device.
+
+    Uses the cached fast path in ``helpers.gpu`` because total VRAM never
+    changes for the life of a process. The first call shells out to
+    ``nvidia-smi``/``sysctl``; every subsequent call is a dict lookup, which
+    keeps the ``/api/video/runtime`` probe well inside the frontend's
+    15s fetch budget on Windows.
+    """
+    try:
+        from backend_service.helpers.gpu import get_device_vram_total_gb
+    except Exception:
+        return None
+    try:
+        return get_device_vram_total_gb()
+    except Exception:
+        return None
+
+
+@dataclass(frozen=True)
+class VideoRuntimeStatus:
+    activeEngine: str
+    realGenerationAvailable: bool
+    message: str
+    device: str | None = None
+    pythonExecutable: str | None = None
+    missingDependencies: list[str] = field(default_factory=list)
+    loadedModelRepo: str | None = None
+    # Total memory available to the inference device, in GB. Used by the
+    # frontend safety heuristic (``assessVideoGenerationSafety``) to scale its
+    # attention-budget thresholds — a 64 GB M4 Max should tolerate far more
+    # frames than a 16 GB base M2, and a 24 GB RTX 4090 differs again. We
+    # source this from ``backend_service.helpers.gpu.get_gpu_metrics`` which
+    # already reads Apple Silicon unified memory via sysctl and NVIDIA VRAM
+    # via nvidia-smi. ``None`` means we couldn't detect it — the frontend
+    # falls back to its MPS-strict defaults in that case.
+    deviceMemoryGb: float | None = None
+
+    def to_dict(self) -> dict[str, Any]:
+        return asdict(self)
+
+
+@dataclass(frozen=True)
+class VideoGenerationConfig:
+    """Shape consumed by ``DiffusersVideoEngine.generate``."""
+    modelId: str
+    modelName: str
+    repo: str
+    prompt: str
+    negativePrompt: str
+    width: int
+    height: int
+    numFrames: int
+    fps: int
+    guidance: float
+    steps: int = 50
+    seed: int | None = None
+
+
+@dataclass(frozen=True)
+class GeneratedVideo:
+    """A single rendered mp4. Mirrors ``GeneratedImage`` from image_runtime."""
+    seed: int
+    bytes: bytes
+    extension: str
+    mimeType: str
+    durationSeconds: float
+    frameCount: int
+    fps: int
+    width: int
+    height: int
+    runtimeLabel: str
+    runtimeNote: str | None = None
+
+
+# Maps a Hugging Face repo id to the diffusers pipeline class that loads it.
+# The class name is looked up dynamically on the ``diffusers`` module so we
+# don't blow up at import time if the installed diffusers is older than
+# expected — users just see a clearer "unsupported pipeline" error at preload.
+PIPELINE_REGISTRY: dict[str, dict[str, str]] = {
+    "Lightricks/LTX-Video": {"class_name": "LTXPipeline", "task": "txt2video"},
+    "genmo/mochi-1-preview": {"class_name": "MochiPipeline", "task": "txt2video"},
+    # Wan 2.1 and 2.2 share the same pipeline class — the version difference
+    # lives in the weights, not the pipeline code. We route to the `-Diffusers`
+    # mirrors because the base Wan-AI repos ship in the native Wan format
+    # (no `model_index.json`) which WanPipeline.from_pretrained can't load.
+    "Wan-AI/Wan2.1-T2V-1.3B-Diffusers": {"class_name": "WanPipeline", "task": "txt2video"},
+    "Wan-AI/Wan2.1-T2V-14B-Diffusers": {"class_name": "WanPipeline", "task": "txt2video"},
+    "Wan-AI/Wan2.2-T2V-A14B-Diffusers": {"class_name": "WanPipeline", "task": "txt2video"},
+    # Community-maintained diffusers port of tencent/HunyuanVideo.
+    "hunyuanvideo-community/HunyuanVideo": {"class_name": "HunyuanVideoPipeline", "task": "txt2video"},
+    # CogVideoX 2B and 5B share the same diffusers pipeline class — the
+    # transformer scales but the loader is the same.
+    "THUDM/CogVideoX-2b": {"class_name": "CogVideoXPipeline", "task": "txt2video"},
+    "THUDM/CogVideoX-5b": {"class_name": "CogVideoXPipeline", "task": "txt2video"},
+}
+
+
+# Core packages that gate ``realGenerationAvailable``. Without these, the
+# runtime can't even preload a model.
+_CORE_DEPS: tuple[tuple[str, str], ...] = (
+    ("diffusers", "diffusers"),
+    ("torch", "torch"),
+    ("accelerate", "accelerate"),
+    ("huggingface_hub", "huggingface_hub"),
+    ("pillow", "PIL"),
+)
+
+
+# Packages required only to write the final mp4. Reported as missing so users
+# know what's needed for generation, but we don't block preload on them.
+_VIDEO_OUTPUT_DEPS: tuple[tuple[str, str], ...] = (
+    ("imageio", "imageio"),
+    ("imageio-ffmpeg", "imageio_ffmpeg"),
+)
+
+
+# Packages individual video pipelines pull in lazily — only at preload or
+# generate time, depending on the tokenizer / text encoder. Diffusers itself
+# imports cleanly without them, so they don't block the runtime, but a user
+# who picks LTX-Video without ``tiktoken`` installed sees a runtime error
+# mid-generate. Surfacing them in the probe lets the Studio offer a one-
+# click install before the user wastes a slow preload.
+#
+# Coverage at the time of writing:
+# - tiktoken: LTX-Video's T5 tokenizer ships in tiktoken format.
+# - sentencepiece: Wan (UMT5-XXL), HunyuanVideo, CogVideoX, Mochi (T5).
+# - protobuf: required by the SentencePiece-based tokenizers HF loads.
+# - ftfy: text-prep utility some pipelines use during prompt encoding.
+_VIDEO_MODEL_DEPS: tuple[tuple[str, str], ...] = (
+    ("tiktoken", "tiktoken"),
+    ("sentencepiece", "sentencepiece"),
+    ("protobuf", "google.protobuf"),
+    ("ftfy", "ftfy"),
+)
+
+
+def _find_missing(deps: tuple[tuple[str, str], ...]) -> list[str]:
+    return [package for package, module_name in deps if importlib.util.find_spec(module_name) is None]
+
+
+class DiffusersVideoEngine:
+    """Thin wrapper around diffusers video pipelines.
+
+    Single-pipeline at a time; preload() evicts the previous pipeline before
+    loading a new one to avoid OOM on unified-memory machines. Generation
+    is not implemented in this phase — see ``generate()`` which raises.
+    """
+
+    runtime_label = "Diffusers video engine"
+
+    def __init__(self) -> None:
+        self._lock = threading.RLock()
+        self._pipeline: Any | None = None
+        self._torch: Any | None = None
+        self._loaded_repo: str | None = None
+        self._loaded_path: str | None = None
+        self._device: str | None = None
+
+    # ---------- public API ----------
+
+    def probe(self) -> VideoRuntimeStatus:
+        # Check warmup status BEFORE any find_spec / subprocess work. On
+        # Windows a cold NTFS volume under antivirus can make both slow
+        # enough to push the probe past the frontend's 30s fetch budget —
+        # the fetch then aborts with TypeError "Failed to fetch" and the
+        # Studio freezes on "ENGINE: UNAVAILABLE" even though the backend
+        # itself is healthy. Returning "initializing" here is cheap and the
+        # frontend polls until the real status is ready.
+        #
+        # If torch is already in ``sys.modules`` (tests, or any process that
+        # imported it earlier), skip this gate entirely — the warmup thread
+        # exists purely to avoid the cold-import cost, and if we've already
+        # paid it there's nothing to wait for.
+        import sys
+        torch_already_imported = "torch" in sys.modules
+        if not torch_already_imported:
+            warmup = torch_warmup_status()
+            if warmup["status"] in ("in_progress", "not_started"):
+                # ``not_started`` means create_app() didn't run (rare:
+                # alternate launcher, tests that import probe directly).
+                # Kick off warmup now so the next poll has a chance to
+                # transition to "ready".
+                if warmup["status"] == "not_started":
+                    start_torch_warmup()
+                    warmup = torch_warmup_status()
+                elapsed = (
+                    time.monotonic() - warmup["started_at"]
+                    if warmup["started_at"] is not None
+                    else 0.0
+                )
+                return VideoRuntimeStatus(
+                    activeEngine="initializing",
+                    realGenerationAvailable=False,
+                    missingDependencies=[],
+                    pythonExecutable=_resolve_video_python(),
+                    message=(
+                        "Video runtime is starting — PyTorch is still loading "
+                        f"({elapsed:.0f}s elapsed). First boot can take up to 60s on "
+                        "Windows/Linux. This page will refresh automatically when it's ready."
+                    ),
+                    loadedModelRepo=self._loaded_repo,
+                )
+        else:
+            warmup = torch_warmup_status()
+
+        missing_core = _find_missing(_CORE_DEPS)
+        missing_output = _find_missing(_VIDEO_OUTPUT_DEPS)
+        missing_model = _find_missing(_VIDEO_MODEL_DEPS)
+
+        # All missing deps are reported so the UI can surface a clear install
+        # hint, but only ``_CORE_DEPS`` block ``realGenerationAvailable``.
+        # ``_VIDEO_MODEL_DEPS`` are pipeline-specific (tiktoken for LTX,
+        # sentencepiece for Wan/T5 etc.) — not all of them are needed for
+        # every model, but listing them lets the Studio install proactively.
+        missing_optional = missing_output + missing_model
+        missing_all = missing_core + missing_optional
+
+        if missing_core:
+            return VideoRuntimeStatus(
+                activeEngine="placeholder",
+                realGenerationAvailable=False,
+                missingDependencies=missing_all,
+                pythonExecutable=_resolve_video_python(),
+                message=(
+                    "Install the optional video runtime packages to enable local generation: "
+                    "pip install 'diffusers[torch]' accelerate imageio imageio-ffmpeg"
+                ),
+                loadedModelRepo=self._loaded_repo,
+            )
+
+        if warmup["status"] == "failed":
+            return VideoRuntimeStatus(
+                activeEngine="placeholder",
+                realGenerationAvailable=False,
+                missingDependencies=["torch"] + missing_optional,
+                pythonExecutable=_resolve_video_python(),
+                message=f"PyTorch could not be imported cleanly: {warmup['error']}",
+                loadedModelRepo=self._loaded_repo,
+            )
+
+        try:
+            import torch  # type: ignore
+        except Exception as exc:  # pragma: no cover - torch import side effects
+            return VideoRuntimeStatus(
+                activeEngine="placeholder",
+                realGenerationAvailable=False,
+                missingDependencies=["torch"] + missing_optional,
+                pythonExecutable=_resolve_video_python(),
+                message=f"PyTorch could not be imported cleanly: {exc}",
+                loadedModelRepo=self._loaded_repo,
+            )
+
+        device = self._detect_device(torch)
+        device_memory_gb = _detect_device_memory_gb(device)
+
+        if missing_output and missing_model:
+            message = (
+                "Video runtime is ready to load models, but mp4 encoding and tokenizer packages "
+                f"are missing — run `pip install {' '.join(missing_optional)}` before generating videos."
+            )
+        elif missing_output:
+            message = (
+                "Video runtime is ready to load models, but mp4 encoding packages are missing — "
+                "run `pip install imageio imageio-ffmpeg` before generating videos."
+            )
+        elif missing_model:
+            message = (
+                "Video runtime is ready, but some models need tokenizer packages that are not "
+                f"installed: {', '.join(missing_model)}. Install them now and the affected "
+                "models will load on next preload."
+            )
+        else:
+            message = (
+                "Real local video generation is available. Download a video model, then Video Studio "
+                "will use the diffusers runtime."
+            )
+
+        # Same CPU-fallback hint the image runtime surfaces: a CPU-only torch
+        # on a box with an NVIDIA GPU is the top "video generation takes
+        # forever" misconfiguration on Windows and Linux.
+        if device == "cpu" and platform.system() in ("Windows", "Linux") and nvidia_gpu_present():
+            message = (
+                "torch was imported but CUDA is unavailable — video generation will run on CPU "
+                "(expect minutes per step). Reinstall with the CUDA wheel: "
+                "pip install --upgrade --force-reinstall torch "
+                "--index-url https://download.pytorch.org/whl/cu121"
+            )
+
+        return VideoRuntimeStatus(
+            activeEngine="diffusers",
+            realGenerationAvailable=True,
+            device=device,
+            pythonExecutable=_resolve_video_python(),
+            missingDependencies=missing_optional,
+            message=message,
+            loadedModelRepo=self._loaded_repo,
+            deviceMemoryGb=device_memory_gb,
+        )
+
+    def preload(self, repo: str) -> VideoRuntimeStatus:
+        self._ensure_pipeline(repo)
+        return self.probe()
+
+    def unload(self, repo: str | None = None) -> VideoRuntimeStatus:
+        with self._lock:
+            if repo and self._loaded_repo != repo:
+                return self.probe()
+            self._release_pipeline()
+            return self.probe()
+
+    def generate(self, config: VideoGenerationConfig) -> GeneratedVideo:
+        """Run a single text-to-video generation and return the encoded mp4.
+
+        The hot path:
+            1. Ensure the right pipeline is loaded.
+            2. Build per-model kwargs.
+            3. Run the pipeline with a seeded generator.
+            4. Encode frames to mp4 via imageio-ffmpeg.
+            5. Return bytes + metadata.
+
+        We split the diffusers invocation and mp4 encoding into narrow seams
+        (``_invoke_pipeline``, ``_encode_frames_to_mp4``) so tests can stub
+        them without needing real 10+GB video weights on disk.
+        """
+        VIDEO_PROGRESS.begin(
+            run_label=self._format_run_label(config),
+            total_steps=max(1, int(config.steps)),
+            phase=PHASE_LOADING,
+            message=f"Preparing {config.modelName}",
+        )
+        try:
+            # mp4 encoding needs imageio-ffmpeg. Check before we spend 60+ seconds
+            # doing a full generation we then can't save anywhere.
+            missing_output = _find_missing(_VIDEO_OUTPUT_DEPS)
+            if missing_output:
+                raise RuntimeError(
+                    "Video generation requires the mp4 encoding packages: "
+                    f"missing {', '.join(missing_output)}. "
+                    "Run `pip install imageio imageio-ffmpeg` and retry."
+                )
+
+            pipeline = self._ensure_pipeline(config.repo)
+            torch = self._torch
+            if torch is None:
+                raise RuntimeError("PyTorch was not initialised for the video runtime.")
+
+            VIDEO_PROGRESS.set_phase(PHASE_ENCODING, message="Encoding prompt")
+
+            base_seed = _resolve_video_seed(config.seed)
+            # MPS generators don't seed the same way as CUDA/CPU — follow the
+            # diffusers docs and always build the generator on CPU for MPS.
+            generator_device = "cpu" if self._device == "mps" else (self._device or "cpu")
+            generator = torch.Generator(device=generator_device).manual_seed(base_seed)
+
+            kwargs = self._build_pipeline_kwargs(config, generator)
+
+            VIDEO_PROGRESS.set_phase(
+                PHASE_DIFFUSING,
+                message=f"Diffusing {config.numFrames} frames",
+            )
+            VIDEO_PROGRESS.set_step(0, total=max(1, int(config.steps)))
+
+            started = time.perf_counter()
+            frames = self._invoke_pipeline(pipeline, kwargs)
+            elapsed = max(0.1, time.perf_counter() - started)
+
+            if not frames:
+                raise RuntimeError(
+                    f"The video pipeline returned zero frames for {config.repo}. "
+                    "Try a smaller resolution or a different model."
+                )
+
+            VIDEO_PROGRESS.set_phase(PHASE_DECODING, message="Encoding mp4")
+            mp4_bytes = self._encode_frames_to_mp4(frames, config.fps)
+            if not mp4_bytes:
+                raise RuntimeError(
+                    "mp4 encoding produced an empty buffer. Check that imageio-ffmpeg is "
+                    "installed and healthy — run `python -m imageio_ffmpeg` to verify."
+                )
+
+            VIDEO_PROGRESS.set_phase(PHASE_SAVING, message="Saving to gallery")
+            return GeneratedVideo(
+                seed=base_seed,
+                bytes=mp4_bytes,
+                extension="mp4",
+                mimeType="video/mp4",
+                durationSeconds=round(elapsed, 2),
+                frameCount=len(frames),
+                fps=config.fps,
+                width=config.width,
+                height=config.height,
+                runtimeLabel=f"{self.runtime_label} ({self._device or 'cpu'})",
+            )
+        finally:
+            VIDEO_PROGRESS.finish()
+
+    def _format_run_label(self, config: VideoGenerationConfig) -> str:
+        return f"{config.modelName} · {config.numFrames}f @ {config.width}x{config.height}"
+
+    # ---------- internals ----------
+
+    def _build_pipeline_kwargs(
+        self,
+        config: VideoGenerationConfig,
+        generator: Any,
+    ) -> dict[str, Any]:
+        """Per-model kwarg shaping.
+
+        Most diffusers video pipelines accept the same shape, but there are
+        small variations — e.g. HunyuanVideoPipeline does not accept a
+        ``negative_prompt`` argument in its canonical signature.
+        """
+        kwargs: dict[str, Any] = {
+            "prompt": config.prompt,
+            "width": config.width,
+            "height": config.height,
+            "num_frames": config.numFrames,
+            "num_inference_steps": config.steps,
+            "guidance_scale": config.guidance,
+            "generator": generator,
+        }
+        lowered_repo = config.repo.lower()
+        if "hunyuanvideo" not in lowered_repo and config.negativePrompt.strip():
+            kwargs["negative_prompt"] = config.negativePrompt
+        return kwargs
+
+    def _invoke_pipeline(self, pipeline: Any, kwargs: dict[str, Any]) -> list[Any]:
+        """Run the diffusers pipeline and return the first batch's frames.
+
+        Carved out as a seam so tests can stub it without loading real
+        weights. Diffusers video pipelines return an output with a
+        ``.frames`` attribute shaped like ``list[list[PIL.Image]]`` — one
+        inner list per batch item. We only ever render batchSize=1, so
+        we return ``result.frames[0]``.
+
+        Wires the diffusers per-step callback into ``VIDEO_PROGRESS`` so the
+        UI bar tracks denoising in real time. Falls back to a callback-free
+        invocation on older diffusers versions that don't expose the kwarg.
+        """
+        total_steps = int(kwargs.get("num_inference_steps") or 0)
+
+        def _on_step_end(_pipeline: Any, step: int, _timestep: Any, callback_kwargs: dict[str, Any]):
+            VIDEO_PROGRESS.set_step(step + 1, total=max(1, total_steps))
+            return callback_kwargs
+
+        kwargs.setdefault("callback_on_step_end", _on_step_end)
+
+        try:
+            result = pipeline(**kwargs)
+        except TypeError as exc:
+            message = str(exc)
+            # Older diffusers / pipelines that don't accept ``callback_on_step_end``.
+            if "callback_on_step_end" in message:
+                kwargs = {k: v for k, v in kwargs.items() if k != "callback_on_step_end"}
+                try:
+                    result = pipeline(**kwargs)
+                except TypeError as inner:
+                    if "negative_prompt" in str(inner) and "negative_prompt" in kwargs:
+                        kwargs = {k: v for k, v in kwargs.items() if k != "negative_prompt"}
+                        result = pipeline(**kwargs)
+                    else:
+                        raise
+            elif "negative_prompt" in message and "negative_prompt" in kwargs:
+                # Some pipelines reject ``negative_prompt`` even when given a
+                # non-empty value. Fall back once without it rather than crashing
+                # the whole generation.
+                kwargs = {key: value for key, value in kwargs.items() if key != "negative_prompt"}
+                result = pipeline(**kwargs)
+            else:
+                raise
+
+        frames = getattr(result, "frames", None)
+        if frames is None:
+            raise RuntimeError(
+                "Video pipeline result is missing a `.frames` attribute. "
+                "This usually means the installed diffusers version returns a "
+                "different output shape. Upgrade diffusers: pip install -U diffusers"
+            )
+        if isinstance(frames, (list, tuple)) and frames and isinstance(frames[0], (list, tuple)):
+            return list(frames[0])
+        return list(frames)
+
+    def _encode_frames_to_mp4(self, frames: list[Any], fps: int) -> bytes:
+        """Encode a list of PIL.Image frames to an mp4 byte buffer.
+
+        Carved out as a seam so tests can stub it. We use ``imageio`` +
+        ``imageio-ffmpeg`` via the ``diffusers.utils.export_to_video`` helper
+        when available (it handles the numpy conversion), and fall back to a
+        direct ``imageio`` writer if diffusers hasn't exposed the helper on
+        the installed version.
+        """
+        import tempfile
+
+        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as handle:
+            tmp_path = handle.name
+        try:
+            export_to_video = None
+            try:
+                from diffusers.utils import export_to_video as _export  # type: ignore
+                export_to_video = _export
+            except Exception:
+                export_to_video = None
+
+            if export_to_video is not None:
+                export_to_video(frames, tmp_path, fps=fps)
+            else:
+                # Minimal fallback — avoids tying us to diffusers' helper
+                # layout. Uses the same pyav backend imageio-ffmpeg ships.
+                import numpy as np  # type: ignore
+                import imageio  # type: ignore
+
+                writer = imageio.get_writer(tmp_path, fps=fps, codec="libx264", quality=8)
+                try:
+                    for frame in frames:
+                        array = np.asarray(frame)
+                        if array.ndim == 2:
+                            array = np.stack([array] * 3, axis=-1)
+                        writer.append_data(array.astype("uint8"))
+                finally:
+                    writer.close()
+
+            return Path(tmp_path).read_bytes()
+        finally:
+            try:
+                Path(tmp_path).unlink(missing_ok=True)
+            except OSError:
+                pass
+
+    def _pipeline_class(self, repo: str) -> Any:
+        entry = PIPELINE_REGISTRY.get(repo)
+        if entry is None:
+            raise RuntimeError(
+                f"No diffusers pipeline is registered for repo '{repo}'. "
+                f"Supported repos: {sorted(PIPELINE_REGISTRY.keys())}"
+            )
+        class_name = entry["class_name"]
+        diffusers = importlib.import_module("diffusers")
+        pipeline_cls = getattr(diffusers, class_name, None)
+        if pipeline_cls is None:
+            raise RuntimeError(
+                f"The installed diffusers version does not expose '{class_name}'. "
+                "Upgrade diffusers: pip install -U diffusers"
+            )
+        return pipeline_cls
+
+    def _ensure_pipeline(self, repo: str) -> Any:
+        with self._lock:
+            if self._pipeline is not None and self._loaded_repo == repo:
+                return self._pipeline
+
+            # Loading a video pipeline can read 10+ GB from disk on cold cache.
+            # Publish the phase so the UI explicitly says "Loading model" while
+            # snapshot_download + from_pretrained run.
+            VIDEO_PROGRESS.set_phase(PHASE_LOADING, message=f"Loading {repo}")
+
+            if self._pipeline is not None and self._loaded_repo != repo:
+                self._release_pipeline()
+
+            import torch  # type: ignore
+            from huggingface_hub import snapshot_download  # type: ignore
+
+            pipeline_cls = self._pipeline_class(repo)
+
+            local_path = snapshot_download(
+                repo_id=repo,
+                local_files_only=True,
+                resume_download=True,
+            )
+            local_root = Path(local_path)
+            validation_error = validate_local_diffusers_snapshot(local_root, repo)
+            if validation_error is not None:
+                raise RuntimeError(validation_error)
+
+            device = self._detect_device(torch)
+            dtype = self._preferred_torch_dtype(torch, device)
+
+            pipeline = pipeline_cls.from_pretrained(
+                local_path,
+                torch_dtype=dtype,
+                local_files_only=True,
+            )
+
+            # Memory-saving knobs. Video pipelines are hungry on unified-memory
+            # and consumer GPUs alike — slice everything we reasonably can.
+            if hasattr(pipeline, "set_progress_bar_config"):
+                pipeline.set_progress_bar_config(disable=True)
+            if hasattr(pipeline, "enable_attention_slicing"):
+                pipeline.enable_attention_slicing()
+            vae = getattr(pipeline, "vae", None)
+            if vae is not None:
+                if hasattr(vae, "enable_slicing"):
+                    vae.enable_slicing()
+                if hasattr(vae, "enable_tiling"):
+                    vae.enable_tiling()
+
+            if device != "cpu":
+                # Try full-device placement first; fall back to sequential CPU
+                # offload if the model is too big to fit.
+                try:
+                    pipeline = pipeline.to(device)
+                except (RuntimeError, MemoryError):
+                    if hasattr(pipeline, "enable_sequential_cpu_offload"):
+                        pipeline.enable_sequential_cpu_offload()
+                    else:
+                        raise
+
+            self._pipeline = pipeline
+            self._torch = torch
+            self._loaded_repo = repo
+            self._loaded_path = local_path
+            self._device = device
+            return pipeline
+
+    def _release_pipeline(self) -> None:
+        pipeline = self._pipeline
+        torch = self._torch
+        device = self._device
+        self._pipeline = None
+        self._torch = None
+        self._loaded_repo = None
+        self._loaded_path = None
+        self._device = None
+        if pipeline is not None:
+            del pipeline
+        gc.collect()
+        if torch is not None:
+            try:
+                if device == "cuda" and getattr(torch.cuda, "is_available", lambda: False)():
+                    torch.cuda.empty_cache()
+            except Exception:
+                pass
+            try:
+                mps_backend = getattr(getattr(torch, "mps", None), "empty_cache", None)
+                if device == "mps" and callable(mps_backend):
+                    mps_backend()
+            except Exception:
+                pass
+
+    def _detect_device(self, torch: Any) -> str:
+        if getattr(torch.cuda, "is_available", lambda: False)():
+            return "cuda"
+        mps_backend = getattr(getattr(torch, "backends", None), "mps", None)
+        if mps_backend is not None and getattr(mps_backend, "is_available", lambda: False)():
+            return "mps"
+        return "cpu"
+
+    def _preferred_torch_dtype(self, torch: Any, device: str) -> Any:
+        if device == "cuda":
+            return torch.bfloat16
+        if device == "mps":
+            return torch.float16
+        return torch.float32
+
+
+class VideoRuntimeManager:
+    """State-level facade that mirrors ``ImageRuntimeManager``."""
+
+    def __init__(self) -> None:
+        self._lock = threading.RLock()
+        self._engine = DiffusersVideoEngine()
+
+    def capabilities(self) -> dict[str, Any]:
+        return self._engine.probe().to_dict()
+
+    def preload(self, repo: str) -> dict[str, Any]:
+        with self._lock:
+            status = self._engine.probe()
+            if not status.realGenerationAvailable:
+                raise RuntimeError(status.message)
+            return self._engine.preload(repo).to_dict()
+
+    def unload(self, repo: str | None = None) -> dict[str, Any]:
+        with self._lock:
+            return self._engine.unload(repo).to_dict()
+
+    def generate(self, config: VideoGenerationConfig) -> tuple[GeneratedVideo, dict[str, Any]]:
+        """Run a single video generation, returning (video, runtime_status).
+
+        Unlike the image manager, there is no placeholder fallback — video is
+        heavy enough that a silent fake clip would waste the user's time. If
+        the runtime isn't ready, raise a clear error so the route can return
+        a proper 4xx.
+        """
+        status = self._engine.probe()
+        if not status.realGenerationAvailable:
+            raise RuntimeError(status.message)
+        with self._lock:
+            video = self._engine.generate(config)
+            runtime = self._engine.probe().to_dict()
+        return video, runtime
diff --git a/backend_service/vllm_engine.py b/backend_service/vllm_engine.py
index 4e3bcd7..42f6a9e 100644
--- a/backend_service/vllm_engine.py
+++ b/backend_service/vllm_engine.py
@@ -82,7 +82,7 @@ def load_model(
 
         # Apply cache strategy patches (e.g. TriAttention monkeypatches)
         # BEFORE creating the LLM instance.
-        from compression import registry
+        from cache_compression import registry
         strategy = registry.get(cache_strategy)
         runtime_note = None
         actual_cache_strategy = cache_strategy
diff --git a/build.ps1 b/build.ps1
index c25e41e..8ddb7aa 100644
--- a/build.ps1
+++ b/build.ps1
@@ -1,35 +1,244 @@
 $ErrorActionPreference = "Stop"
 
+# PowerShell's $ErrorActionPreference = "Stop" only halts on cmdlet errors --
+# native command failures (pip, npm, node, npx, git) are silently ignored
+# unless we check $LASTEXITCODE ourselves. Without this helper, the previous
+# build printed "Build complete!" even when `tauri build` had failed.
+function Assert-LastExit {
+    param([string]$Step)
+    if ($LASTEXITCODE -ne 0) {
+        throw "$Step failed (exit $LASTEXITCODE)"
+    }
+}
+
 $ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
 Set-Location $ScriptDir
 
-# ── Python venv ──────────────────────────────────────────
+# -- Pre-flight cleanup -----------------------------------
+# Windows Defender / Explorer / pip occasionally hold file handles on the
+# staging tree. Clear it up front so the Node rmSync in stage-runtime.mjs
+# never has to fight with locked artefacts from a prior failed run.
+if (Test-Path .runtime-stage) {
+    Write-Host "==> Clearing stale .runtime-stage..."
+    Remove-Item -Recurse -Force .runtime-stage -ErrorAction SilentlyContinue
+}
+
+# -- Python venv ------------------------------------------
 if (-not (Test-Path .venv)) {
     Write-Host "==> Creating Python venv..."
     python -m venv .venv
+    Assert-LastExit "python -m venv"
 }
 
 Write-Host "==> Installing Python dependencies..."
 .\.venv\Scripts\pip install --upgrade pip -q
-.\.venv\Scripts\pip install fastapi psutil uvicorn "pypdf>=6.10.2" python-multipart huggingface_hub -q
+Assert-LastExit "pip install --upgrade pip"
+
+# vendor/ChaosEngine declares `license = "Apache-2.0"` per PEP 639. Setuptools
+# < 77 rejects the string form ("project.license must be valid exactly by one
+# definition"), and fresh venvs on Windows sometimes resolve an older
+# setuptools than the one bundled with CPython. Force an upgrade here so
+# stage-runtime.mjs can install the vendor package via `pip install --target`.
+#
+# Upper bound of <82 is load-bearing: recent torch wheels (e.g. 2.11.x)
+# declare ``setuptools<82`` as a runtime metadata constraint, and pip's
+# dependency-warning heuristic surfaces that as a loud yellow warning on
+# every invocation after setuptools 82 is installed. 77..81 covers PEP 639
+# while staying inside torch's supported range.
+.\.venv\Scripts\pip install --upgrade "setuptools>=77,<82" wheel -q
+Assert-LastExit "pip install --upgrade setuptools wheel"
+
+# On Windows, pip install torch from PyPI delivers the CPU-only wheel,
+# which leaves an RTX 4090 idle and makes FLUX.1 Dev spend ~8+ minutes on
+# a single step. Install the CUDA wheel first; the subsequent
+# .[desktop,images] install sees torch already satisfies >=2.4.0 and
+# leaves it alone.
+#
+# cu124 is the broadest match in 2026: it covers Python 3.9-3.13 and
+# works with driver 525+. cu121 only ships wheels for Python 3.8-3.12
+# so fresh Windows installs (which default to 3.13) fail outright with
+# "Could not find a version that satisfies the requirement torch". We
+# walk the list from newest-broad to oldest-broad and stop on the first
+# success. If CHAOSENGINE_TORCH_INDEX_URL is set it overrides everything
+# (set to "" to opt out entirely).
+$torchIndexCandidates = if ($null -ne $env:CHAOSENGINE_TORCH_INDEX_URL) {
+    if ($env:CHAOSENGINE_TORCH_INDEX_URL -eq "") {
+        @()
+    } else {
+        @($env:CHAOSENGINE_TORCH_INDEX_URL)
+    }
+} else {
+    @(
+        "https://download.pytorch.org/whl/cu124",
+        "https://download.pytorch.org/whl/cu126",
+        "https://download.pytorch.org/whl/cu128",
+        "https://download.pytorch.org/whl/cu121"
+    )
+}
+
+$torchInstalled = $false
+foreach ($idx in $torchIndexCandidates) {
+    Write-Host "==> Installing CUDA-enabled torch from $idx..."
+    .\.venv\Scripts\pip install -q --index-url $idx "torch>=2.4.0"
+    if ($LASTEXITCODE -eq 0) {
+        $torchInstalled = $true
+        break
+    }
+    Write-Warning "torch install from $idx failed (exit $LASTEXITCODE) -- trying next CUDA index."
+}
+
+if (-not $torchInstalled -and $torchIndexCandidates.Count -gt 0) {
+    Write-Warning "CUDA torch wheels unavailable from all candidates -- continuing with CPU torch."
+    Write-Warning "You can install CUDA torch later from the Running on CPU banner inside the app."
+}
+
+# Install the same extras that stage-runtime.mjs::validateBundledPythonPackages
+# checks for (desktop + images). Without `images` a release build fails strict
+# validation; in dev mode it merely warns.
+.\.venv\Scripts\pip install -q -e ".[desktop,images]"
+Assert-LastExit "pip install -e .[desktop,images]"
 
 $env:CHAOSENGINE_EMBED_PYTHON_BIN = "$ScriptDir\.venv\Scripts\python.exe"
 
-# ── npm dependencies ─────────────────────────────────────
+# -- Verify CUDA torch actually installed -----------------
+# If nvidia-smi is on PATH, the build machine has an NVIDIA GPU — the
+# bundled torch MUST have CUDA support or the installer ships with a
+# silent CPU-only runtime. Catching this at build time beats users
+# opening the app and seeing "Running on CPU" on an RTX 4090.
+$nvidiaPresent = $null -ne (Get-Command nvidia-smi -ErrorAction SilentlyContinue)
+if ($nvidiaPresent) {
+    Write-Host "==> Verifying bundled torch has CUDA support..."
+    $cudaCheck = & .\.venv\Scripts\python.exe -c @"
+import sys
+try:
+    import torch
+except Exception as exc:
+    print(f'torch-import-error:{exc}')
+    sys.exit(2)
+has_cuda = bool(getattr(torch.cuda, 'is_available', lambda: False)())
+build = getattr(torch.version, 'cuda', None)
+print(f'torch={torch.__version__} cuda_available={has_cuda} cuda_build={build}')
+sys.exit(0 if has_cuda else 1)
+"@ 2>&1
+    $cudaExitCode = $LASTEXITCODE
+    Write-Host "    $cudaCheck"
+    if ($cudaExitCode -ne 0) {
+        Write-Warning ""
+        Write-Warning "==> CUDA torch NOT detected on a machine with nvidia-smi!"
+        Write-Warning "    The bundled installer will run on CPU, which on an RTX 4090 means"
+        Write-Warning "    ~8 minutes per diffusion step instead of ~1 second."
+        Write-Warning ""
+        Write-Warning "    The pip install from download.pytorch.org probably fell back to the"
+        Write-Warning "    CPU wheel because your Python version doesn't have a matching CUDA"
+        Write-Warning "    wheel in the default cu124 index. Try another CUDA index:"
+        Write-Warning "      `$env:CHAOSENGINE_TORCH_INDEX_URL='https://download.pytorch.org/whl/cu126'"
+        Write-Warning "    or downgrade to Python 3.13 (cu124 doesn't publish 3.14 wheels yet)."
+        Write-Warning ""
+        Write-Warning "    Continuing the build. Set CHAOSENGINE_REQUIRE_CUDA_TORCH=1 to fail"
+        Write-Warning "    instead of shipping CPU-only torch."
+        if ($env:CHAOSENGINE_REQUIRE_CUDA_TORCH -eq "1") {
+            throw "CUDA torch required but not detected -- see warning above."
+        }
+    }
+}
+
+# -- npm dependencies -------------------------------------
 Write-Host "==> Installing npm dependencies..."
 npm ci --silent
+Assert-LastExit "npm ci"
+
+# -- llama.cpp pre-flight ---------------------------------
+# If llama.cpp isn't built locally, stage-runtime.mjs auto-downloads a
+# prebuilt release from ggml-org/llama.cpp (Vulkan wheel — works on every
+# GPU without bundling cuDNN). That makes the installer truly "works out
+# of the box" on a fresh Windows box with no cmake / VS Build Tools setup.
+# The ALLOW_NO_LLAMA flag still kicks in if the network is unreachable,
+# so CI and air-gapped builds don't break.
+$llamaBinDir = if ($env:CHAOSENGINE_LLAMA_BIN_DIR) {
+    $env:CHAOSENGINE_LLAMA_BIN_DIR
+} else {
+    Join-Path (Split-Path -Parent $ScriptDir) "llama.cpp\build\bin"
+}
+$llamaServerExe = Join-Path $llamaBinDir "llama-server.exe"
+if (-not (Test-Path $llamaServerExe)) {
+    # Opt OUT of the auto-download fallback with CHAOSENGINE_REQUIRE_LLAMA=1.
+    if ($env:CHAOSENGINE_REQUIRE_LLAMA -eq "1") {
+        Write-Host ""
+        Write-Host "==> ERROR: llama-server.exe not found at $llamaServerExe" -ForegroundColor Red
+        Write-Host ""
+        Write-Host "    CHAOSENGINE_REQUIRE_LLAMA=1 is set, so the build must ship with"
+        Write-Host "    a locally-compiled llama.cpp. Pick one:"
+        Write-Host ""
+        Write-Host "    1. Build llama.cpp: clone to ..\llama.cpp then run"
+        Write-Host "       cmake -B build; cmake --build build --config Release"
+        Write-Host "    2. Set CHAOSENGINE_LLAMA_BIN_DIR to your llama.cpp build directory"
+        Write-Host "    3. Unset CHAOSENGINE_REQUIRE_LLAMA and let stage-runtime"
+        Write-Host "       auto-download a prebuilt release from ggml-org/llama.cpp"
+        Write-Host ""
+        throw "llama-server.exe missing -- see message above."
+    }
+
+    Write-Host "==> llama-server.exe not found locally at $llamaServerExe"
+    Write-Host "    stage-runtime.mjs will attempt to auto-download a prebuilt release"
+    Write-Host "    from ggml-org/llama.cpp (Vulkan wheel). If download fails, the"
+    Write-Host "    installer ships without inference and users install it later."
+    # Allow graceful skip if the auto-download also fails (offline builds).
+    $env:CHAOSENGINE_RELEASE_ALLOW_NO_LLAMA = "1"
+}
 
-# ── Patch tauri.conf.json for local builds ───────────────
+# -- Patch tauri.conf.json for local builds ---------------
+# Delegated to a dedicated .mjs (see scripts/patch-tauri-conf.mjs). The
+# previous inline `node -e "..."` was fragile under PowerShell quoting --
+# a silent misparse left the JSON empty and tauri build failed with a
+# misleading EOF error.
 Write-Host "==> Patching tauri.conf.json for local build..."
-node -e "const fs = require('fs'); const conf = JSON.parse(fs.readFileSync('src-tauri/tauri.conf.json', 'utf8')); conf.build.beforeBundleCommand = 'npm run stage:runtime'; conf.bundle = conf.bundle || {}; conf.bundle.createUpdaterArtifacts = false; fs.writeFileSync('src-tauri/tauri.conf.json', JSON.stringify(conf, null, 2) + '\n');"
+node scripts/patch-tauri-conf.mjs patch
+Assert-LastExit "patch tauri.conf.json"
 
-# ── Build ────────────────────────────────────────────────
+# -- Build ------------------------------------------------
 Write-Host "==> Building Tauri app (NSIS installer)..."
-npx tauri build --bundles nsis
+$buildFailed = $false
+try {
+    npx tauri build --bundles nsis
+    Assert-LastExit "tauri build"
+} catch {
+    $buildFailed = $true
+    $buildError = $_
+}
+
+# -- Restore tauri.conf.json ------------------------------
+# Always run the restore, even if the build failed, so the working tree
+# is left clean for the next attempt. If the build succeeded and git
+# complains, surface it; if the build already failed, restore is
+# best-effort and the build error takes precedence.
+Write-Host "==> Restoring tauri.conf.json..."
+try {
+    node scripts/patch-tauri-conf.mjs restore
+    if (-not $buildFailed) {
+        Assert-LastExit "restore tauri.conf.json"
+    }
+} catch {
+    if (-not $buildFailed) { throw }
+    Write-Warning "Restore failed: $_ -- continuing to report original build error."
+}
+
+if ($buildFailed) {
+    throw $buildError
+}
 
-# Restore tauri.conf.json
-git checkout src-tauri/tauri.conf.json 2>&1 | Out-Null
+# -- Publish installers to /assets ------------------------
+# The Tauri bundle tree is three directories deep and differs per target,
+# which makes "where is my installer" a recurring question. Copy the
+# shippable artifacts into a flat ``assets/`` folder at the repo root.
+Write-Host "==> Publishing artifacts to assets/..."
+node scripts/publish-artifacts.mjs --bundles=nsis
+Assert-LastExit "publish-artifacts"
 
 Write-Host ""
 Write-Host "==> Build complete!"
-Write-Host "    Artifacts: src-tauri\target\release\bundle\nsis\"
+# Single-quoted literal so the parser never has to interpret the
+# backslash sequences. Keep this file pure ASCII - PS 5.1 on Windows
+# reads BOM-less scripts as Windows-1252, so any em-dash / box-drawing
+# char elsewhere in the file corrupts tokenization and surfaces here
+# as a spurious "string missing terminator" error.
+Write-Host '    Artifacts in assets\ (also in src-tauri\target\release\bundle\nsis)'
diff --git a/build.sh b/build.sh
index 4dbe13e..4a1d0bc 100755
--- a/build.sh
+++ b/build.sh
@@ -20,24 +20,53 @@ if [ ! -d .venv ]; then
 fi
 
 echo "==> Installing Python dependencies..."
+# vendor/ChaosEngine declares `license = "Apache-2.0"` per PEP 639. Setuptools
+# < 77 rejects the string form, so bump it across every platform before the
+# vendor install in stage-runtime.mjs runs.
 case "$PLATFORM" in
   darwin)
     .venv/bin/pip install --upgrade pip -q
+    .venv/bin/pip install --upgrade "setuptools>=77,<82" wheel -q
     .venv/bin/pip install mlx mlx-lm gguf fastapi psutil uvicorn "pypdf>=6.10.2" python-multipart huggingface_hub -q
     export CHAOSENGINE_EMBED_PYTHON_BIN="$SCRIPT_DIR/.venv/bin/python3"
     ;;
   linux)
     .venv/bin/pip install --upgrade pip -q
+    .venv/bin/pip install --upgrade "setuptools>=77,<82" wheel -q
     .venv/bin/pip install fastapi psutil uvicorn "pypdf>=6.10.2" python-multipart huggingface_hub -q
     export CHAOSENGINE_EMBED_PYTHON_BIN="$SCRIPT_DIR/.venv/bin/python3"
     ;;
   windows)
     .venv/Scripts/pip install --upgrade pip -q
+    .venv/Scripts/pip install --upgrade "setuptools>=77,<82" wheel -q
     .venv/Scripts/pip install fastapi psutil uvicorn "pypdf>=6.10.2" python-multipart huggingface_hub -q
     export CHAOSENGINE_EMBED_PYTHON_BIN="$SCRIPT_DIR/.venv/Scripts/python.exe"
     ;;
 esac
 
+# ── CUDA torch verification (Linux only) ─────────────────
+# If nvidia-smi is on PATH, the build machine has an NVIDIA GPU — the
+# bundled torch MUST have CUDA support or the installer ships with a
+# silent CPU-only runtime. Catching this at build time beats users
+# opening the app and seeing "Running on CPU" on a CUDA-capable GPU.
+if [ "$PLATFORM" = "linux" ] && command -v nvidia-smi >/dev/null 2>&1; then
+  echo "==> Verifying bundled torch has CUDA support..."
+  if ! .venv/bin/python -c "import sys; import torch; sys.exit(0 if torch.cuda.is_available() else 1)" 2>/dev/null; then
+    echo ""
+    echo "!! CUDA torch NOT detected on a machine with nvidia-smi."
+    echo "   The installer will run on CPU. Reinstall CUDA torch with e.g."
+    echo "     .venv/bin/pip install --index-url https://download.pytorch.org/whl/cu124 --force-reinstall 'torch>=2.4.0'"
+    echo "   Continuing. Set CHAOSENGINE_REQUIRE_CUDA_TORCH=1 to fail instead."
+    echo ""
+    if [ "${CHAOSENGINE_REQUIRE_CUDA_TORCH:-}" = "1" ]; then
+      echo "CUDA torch required but not detected -- aborting."
+      exit 1
+    fi
+  else
+    echo "    torch.cuda.is_available() == True"
+  fi
+fi
+
 # ── npm dependencies ─────────────────────────────────────
 echo "==> Installing npm dependencies..."
 npm ci --silent
@@ -86,6 +115,13 @@ node -e "
   fs.writeFileSync('src-tauri/tauri.conf.json', JSON.stringify(conf, null, 2) + '\n');
 " 2>/dev/null || true
 
+# ── Publish installers to /assets ────────────────────────
+# The Tauri bundle tree is three directories deep and differs per target.
+# Copy the shippable artifacts into a flat assets/ folder at the repo root
+# so every build lands in the same place regardless of platform.
+echo "==> Publishing artifacts to assets/..."
+node scripts/publish-artifacts.mjs --bundles="$BUNDLES"
+
 echo ""
 echo "==> Build complete!"
-echo "    Artifacts: src-tauri/target/release/bundle/"
+echo "    Artifacts: assets/ (also in src-tauri/target/release/bundle/)"
diff --git a/compression/__init__.py b/cache_compression/__init__.py
similarity index 97%
rename from compression/__init__.py
rename to cache_compression/__init__.py
index 16d12c9..d33f4f2 100644
--- a/compression/__init__.py
+++ b/cache_compression/__init__.py
@@ -164,7 +164,7 @@ def discover(self) -> list[CacheStrategy]:
             {
                 "id": "native",
                 "name": "Native f16",
-                "module": "compression.native",
+                "module": "cache_compression.native",
                 "class_name": "NativeStrategy",
                 "bit_range": None,
                 "default_bits": None,
@@ -174,7 +174,7 @@ def discover(self) -> list[CacheStrategy]:
             {
                 "id": "rotorquant",
                 "name": "RotorQuant",
-                "module": "compression.rotorquant",
+                "module": "cache_compression.rotorquant",
                 "class_name": "RotorQuantStrategy",
                 "bit_range": (3, 4),
                 "default_bits": 3,
@@ -184,7 +184,7 @@ def discover(self) -> list[CacheStrategy]:
             {
                 "id": "triattention",
                 "name": "TriAttention",
-                "module": "compression.triattention",
+                "module": "cache_compression.triattention",
                 "class_name": "TriAttentionStrategy",
                 "bit_range": (1, 4),
                 "default_bits": 3,
@@ -194,7 +194,7 @@ def discover(self) -> list[CacheStrategy]:
             {
                 "id": "turboquant",
                 "name": "TurboQuant",
-                "module": "compression.turboquant",
+                "module": "cache_compression.turboquant",
                 "class_name": "TurboQuantStrategy",
                 "bit_range": (1, 4),
                 "default_bits": 3,
@@ -204,7 +204,7 @@ def discover(self) -> list[CacheStrategy]:
             {
                 "id": "chaosengine",
                 "name": "ChaosEngine",
-                "module": "compression.chaosengine",
+                "module": "cache_compression.chaosengine",
                 "class_name": "ChaosEngineStrategy",
                 "bit_range": (2, 8),
                 "default_bits": 4,
diff --git a/compression/chaosengine.py b/cache_compression/chaosengine.py
similarity index 99%
rename from compression/chaosengine.py
rename to cache_compression/chaosengine.py
index 9463311..a022451 100644
--- a/compression/chaosengine.py
+++ b/cache_compression/chaosengine.py
@@ -19,7 +19,7 @@
 import importlib
 from typing import Any
 
-from compression import CacheStrategy
+from cache_compression import CacheStrategy
 
 
 def _load_chaosengine() -> Any | None:
diff --git a/compression/native.py b/cache_compression/native.py
similarity index 96%
rename from compression/native.py
rename to cache_compression/native.py
index 4473d98..3604ebb 100644
--- a/compression/native.py
+++ b/cache_compression/native.py
@@ -4,7 +4,7 @@
 
 from typing import Any
 
-from compression import CacheStrategy
+from cache_compression import CacheStrategy
 
 
 class NativeStrategy(CacheStrategy):
diff --git a/compression/rotorquant.py b/cache_compression/rotorquant.py
similarity index 98%
rename from compression/rotorquant.py
rename to cache_compression/rotorquant.py
index f50c86b..50958d9 100644
--- a/compression/rotorquant.py
+++ b/cache_compression/rotorquant.py
@@ -15,7 +15,7 @@
 import importlib
 from typing import Any
 
-from compression import CacheStrategy
+from cache_compression import CacheStrategy
 
 
 def _load_turboquant_module() -> Any | None:
diff --git a/compression/triattention.py b/cache_compression/triattention.py
similarity index 98%
rename from compression/triattention.py
rename to cache_compression/triattention.py
index b7b8a23..eb7de7a 100644
--- a/compression/triattention.py
+++ b/cache_compression/triattention.py
@@ -11,7 +11,7 @@
 
 from typing import Any
 
-from compression import CacheStrategy
+from cache_compression import CacheStrategy
 
 
 _triattention = None
diff --git a/compression/turboquant.py b/cache_compression/turboquant.py
similarity index 90%
rename from compression/turboquant.py
rename to cache_compression/turboquant.py
index 634c280..18cc672 100644
--- a/compression/turboquant.py
+++ b/cache_compression/turboquant.py
@@ -3,7 +3,10 @@
 TurboQuant provides PolarQuant KV cache compression with fused Metal
 kernels for MLX on Apple Silicon, and cache-type flags for llama.cpp.
 
-Install: ``./.venv/bin/python3 -m pip install turboquant-mlx``
+Install: ``./.venv/bin/python3 -m pip install turboquant-mlx-full``
+(the package ships on PyPI as ``turboquant-mlx-full`` but imports as
+``turboquant_mlx``; upstream development tracks
+``arozanov/turboquant-mlx`` since the sharpner fork went dormant).
 """
 
 from __future__ import annotations
@@ -13,7 +16,7 @@
 from pathlib import Path
 from typing import Any
 
-from compression import CacheStrategy
+from cache_compression import CacheStrategy
 
 
 _REQUIRED_HOOKS = ("make_adaptive_cache", "apply_patch")
@@ -84,8 +87,10 @@ def availability_reason(self) -> str | None:
         if self.is_available():
             return None
         return (
-            "Install turboquant-mlx-full into ChaosEngineAI's backend runtime: "
-            "./.venv/bin/python3 -m pip install turboquant-mlx-full — then restart the app."
+            "Install turboquant-mlx (arozanov fork; PyPI name "
+            "``turboquant-mlx-full``) into ChaosEngineAI's backend runtime: "
+            "./.venv/bin/python3 -m pip install turboquant-mlx-full — then "
+            "restart the app."
         )
 
     def supported_bit_range(self) -> tuple[int, int] | None:
diff --git a/package-lock.json b/package-lock.json
index ed0b78c..8945ade 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "chaosengine-desktop",
-  "version": "0.5.3",
+  "version": "0.6.0",
   "lockfileVersion": 2,
   "requires": true,
   "packages": {
     "": {
       "name": "chaosengine-desktop",
-      "version": "0.5.3",
+      "version": "0.6.0",
       "dependencies": {
         "@tauri-apps/api": "^2.1.0",
         "@tauri-apps/plugin-dialog": "^2.7.0",
diff --git a/package.json b/package.json
index 11f5bd2..3a13e63 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
   "name": "chaosengine-desktop",
   "private": true,
-  "version": "0.5.3",
+  "version": "0.6.0",
   "type": "module",
   "scripts": {
     "dev": "vite",
diff --git a/pyproject.toml b/pyproject.toml
index 05802e4..9a8cbb9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta:__legacy__"
 
 [project]
 name = "chaosengine-ai"
-version = "0.5.3"
+version = "0.6.0"
 description = "Local AI model runner with pluggable cache/compression strategies"
 readme = "README.md"
 license = {text = "Apache-2.0"}
@@ -27,7 +27,7 @@ triattention = ["triattention>=0.1.0", "vllm>=0.8.0"]
 rotorquant = ["turboquant>=0.2.0"]
 turboquant = ["turboquant-mlx-full>=0.1.0"]
 vllm = ["vllm>=0.8.0"]
-dflash-mlx = ["dflash-mlx>=0.1.0"]
+dflash-mlx = ["dflash-mlx @ git+https://github.com/bstnxbt/dflash-mlx.git@f825ffb268e50d531e8b6524413b0847334a14dd"]
 dflash = ["dflash>=0.1.0"]
 desktop = [
     "fastapi>=0.115.0",
@@ -51,4 +51,4 @@ testpaths = ["tests"]
 addopts = "--tb=short -q"
 
 [tool.setuptools.packages.find]
-include = ["backend_service*", "compression*", "dflash*"]
+include = ["backend_service*", "cache_compression*", "dflash*"]
diff --git a/scripts/patch-tauri-conf.mjs b/scripts/patch-tauri-conf.mjs
new file mode 100644
index 0000000..0e29863
--- /dev/null
+++ b/scripts/patch-tauri-conf.mjs
@@ -0,0 +1,86 @@
+#!/usr/bin/env node
+
+// Patch or restore src-tauri/tauri.conf.json for local release builds.
+//
+// Usage:
+//   node scripts/patch-tauri-conf.mjs patch
+//   node scripts/patch-tauri-conf.mjs restore
+//
+// Rationale: the previous implementation embedded this logic as an inline
+// `node -e "..."` call from build.ps1. On Windows PowerShell, the mix of
+// single quotes inside the double-quoted argument made the script unreliable
+// — one misparse left the JSON file empty, which then cascaded into a
+// confusing "EOF while parsing a value at line 1 column 0" from `tauri
+// build`. A standalone .mjs file has no quoting surface area, and we can
+// self-heal an empty/corrupt tauri.conf.json by restoring it from git
+// before patching.
+
+import { execFileSync } from "node:child_process";
+import fs from "node:fs";
+import path from "node:path";
+import { fileURLToPath } from "node:url";
+
+const scriptDir = path.dirname(fileURLToPath(import.meta.url));
+const projectRoot = path.resolve(scriptDir, "..");
+const confPath = path.join(projectRoot, "src-tauri", "tauri.conf.json");
+
+const action = process.argv[2];
+if (action !== "patch" && action !== "restore") {
+  console.error(`Usage: node ${path.basename(fileURLToPath(import.meta.url))} <patch|restore>`);
+  process.exit(2);
+}
+
+function readConf() {
+  let raw;
+  try {
+    raw = fs.readFileSync(confPath, "utf8");
+  } catch (err) {
+    throw new Error(`Cannot read ${confPath}: ${err.message}`);
+  }
+  if (!raw.trim()) {
+    // A prior failed build may have left the file empty. Restore it from
+    // git before continuing so we have something valid to patch.
+    console.log(`==> tauri.conf.json is empty — restoring from git`);
+    try {
+      execFileSync("git", ["checkout", "--", "src-tauri/tauri.conf.json"], {
+        cwd: projectRoot,
+        stdio: "inherit",
+      });
+    } catch (err) {
+      throw new Error(`tauri.conf.json is empty and git checkout failed: ${err.message}`);
+    }
+    raw = fs.readFileSync(confPath, "utf8");
+    if (!raw.trim()) {
+      throw new Error(`tauri.conf.json is still empty after git checkout`);
+    }
+  }
+  try {
+    return JSON.parse(raw);
+  } catch (err) {
+    throw new Error(`tauri.conf.json is not valid JSON: ${err.message}`);
+  }
+}
+
+function writeConf(conf) {
+  fs.writeFileSync(confPath, `${JSON.stringify(conf, null, 2)}\n`);
+}
+
+if (action === "patch") {
+  const conf = readConf();
+  // stage:runtime:release (not stage:runtime) — the dev variant writes
+  // mode=development into the manifest AND skips the runtime tar.gz, both
+  // of which produce a broken installer.
+  conf.build = conf.build || {};
+  conf.build.beforeBundleCommand = "npm run stage:runtime:release";
+  conf.bundle = conf.bundle || {};
+  conf.bundle.createUpdaterArtifacts = false;
+  writeConf(conf);
+  console.log(`==> Patched ${path.relative(projectRoot, confPath)} for local release build`);
+} else {
+  // restore: let git put back the committed version
+  execFileSync("git", ["checkout", "--quiet", "--", "src-tauri/tauri.conf.json"], {
+    cwd: projectRoot,
+    stdio: "inherit",
+  });
+  console.log(`==> Restored ${path.relative(projectRoot, confPath)} from git`);
+}
diff --git a/scripts/pre-build-check.sh b/scripts/pre-build-check.sh
index 59bb862..0859063 100755
--- a/scripts/pre-build-check.sh
+++ b/scripts/pre-build-check.sh
@@ -86,7 +86,7 @@ echo
 # ------------------------------------------------------------------
 echo "[5/7] Cache strategy validation..."
 CACHE_CHECK=$(.venv/bin/python -c "
-from compression import registry
+from cache_compression import registry
 registry.discover()
 valid = {'f32','f16','bf16','q8_0','q4_0','q4_1','iq4_nl','q5_0','q5_1'}
 ce = registry.get('chaosengine')
diff --git a/scripts/publish-artifacts.mjs b/scripts/publish-artifacts.mjs
new file mode 100644
index 0000000..140f2bf
--- /dev/null
+++ b/scripts/publish-artifacts.mjs
@@ -0,0 +1,132 @@
+#!/usr/bin/env node
+
+// Copy the freshly-built desktop installers to ``<repo>/assets`` so the
+// user doesn't have to hunt through ``src-tauri/target/release/bundle/**``
+// (or ``releases/macos``) every release. Flat output, one directory, every
+// platform's deliverables in the same place.
+//
+// Usage:
+//   node scripts/publish-artifacts.mjs                 # copy every bundle
+//   node scripts/publish-artifacts.mjs --bundles nsis  # limit to NSIS
+//
+// Called from build.ps1 / build.sh / release-macos.mjs. Safe to run more
+// than once — existing files in ``assets/`` are overwritten.
+
+import fs from "node:fs";
+import path from "node:path";
+import { fileURLToPath } from "node:url";
+
+const scriptDir = path.dirname(fileURLToPath(import.meta.url));
+const projectRoot = path.resolve(scriptDir, "..");
+const bundleRoot = path.join(projectRoot, "src-tauri", "target", "release", "bundle");
+const releaseMacRoot = path.join(projectRoot, "releases", "macos");
+const assetsRoot = path.join(projectRoot, "assets");
+
+// File extensions we treat as shippable installer artifacts. ``.app`` is a
+// directory bundle on macOS; everything else is a single file.
+const INSTALLER_EXTS = new Set([
+  ".dmg",
+  ".exe",
+  ".msi",
+  ".deb",
+  ".rpm",
+  ".AppImage",
+  ".appimage",
+  ".app",
+]);
+
+// Parse --bundles (comma-separated) so build.sh can pass through the same
+// filter it handed to ``tauri build``. When omitted we sweep every subdir.
+const bundlesArg = process.argv.find((value) => value.startsWith("--bundles="));
+const bundleFilter = bundlesArg
+  ? new Set(bundlesArg.split("=", 2)[1].split(",").map((value) => value.trim()).filter(Boolean))
+  : null;
+
+main();
+
+function main() {
+  fs.mkdirSync(assetsRoot, { recursive: true });
+
+  const copied = [];
+  collectFromBundleTree(bundleRoot, copied);
+  collectFromDir(releaseMacRoot, copied);
+
+  if (!copied.length) {
+    console.warn(`[publish-artifacts] no installer artifacts found under ${bundleRoot}`);
+    return;
+  }
+  console.log(`[publish-artifacts] copied ${copied.length} artifact(s) to ${assetsRoot}`);
+  for (const name of copied) {
+    console.log(`  - ${name}`);
+  }
+}
+
+function collectFromBundleTree(root, copied) {
+  if (!fs.existsSync(root)) return;
+  for (const subdir of fs.readdirSync(root)) {
+    if (bundleFilter && !bundleFilter.has(subdir)) {
+      continue;
+    }
+    const subPath = path.join(root, subdir);
+    // ``bundle/`` can contain stray files like ``.DS_Store`` — skip anything
+    // that isn't itself a directory before recursing.
+    let stat;
+    try { stat = fs.statSync(subPath); } catch { continue; }
+    if (!stat.isDirectory()) continue;
+    collectFromDir(subPath, copied);
+  }
+}
+
+function collectFromDir(dir, copied) {
+  if (!fs.existsSync(dir)) return;
+  for (const entry of fs.readdirSync(dir)) {
+    if (entry === ".DS_Store") continue;
+    const entryPath = path.join(dir, entry);
+    if (!isInstallerArtifact(entry, entryPath)) {
+      continue;
+    }
+    const dest = path.join(assetsRoot, entry);
+    copyArtifact(entryPath, dest);
+    copied.push(entry);
+  }
+}
+
+function isInstallerArtifact(entryName, entryPath) {
+  // Tauri's bundle_dmg writes an intermediate ``rw.<pid>.<name>.dmg`` as
+  // it fills the disk image before converting to the final compressed
+  // read-only DMG. Those files are tempdata the cleanup pass deletes —
+  // they should never be published.
+  if (/^rw\.\d+\./.test(entryName)) return false;
+  if (entryName.endsWith(".temp") || entryName.includes(".tmp.")) return false;
+
+  const ext = path.extname(entryName);
+  if (!ext) return false;
+  if (!INSTALLER_EXTS.has(ext)) {
+    // Case-insensitive fallback (``.AppImage`` vs ``.appimage``).
+    if (!INSTALLER_EXTS.has(ext.toLowerCase())) {
+      return false;
+    }
+  }
+  // ``.app`` is only an installer when it's a directory bundle.
+  if (ext === ".app") {
+    let stat;
+    try {
+      stat = fs.statSync(entryPath);
+    } catch {
+      return false;
+    }
+    return stat.isDirectory();
+  }
+  return true;
+}
+
+function copyArtifact(source, destination) {
+  const stat = fs.lstatSync(source);
+  if (stat.isDirectory()) {
+    fs.rmSync(destination, { recursive: true, force: true });
+    fs.cpSync(source, destination, { recursive: true, force: true, verbatimSymlinks: true });
+    return;
+  }
+  fs.rmSync(destination, { force: true });
+  fs.copyFileSync(source, destination);
+}
diff --git a/scripts/release-macos.mjs b/scripts/release-macos.mjs
index 5f1f30a..0ed9e42 100644
--- a/scripts/release-macos.mjs
+++ b/scripts/release-macos.mjs
@@ -82,10 +82,33 @@ function main() {
     console.warn("[release-macos] Notarization skipped because no Apple notary credentials were configured.");
   }
 
+  publishToAssets(releaseAppPath, releaseDmgPath);
+
   console.log(`[release-macos] app -> ${releaseAppPath}`);
   console.log(`[release-macos] dmg -> ${releaseDmgPath}`);
 }
 
+function publishToAssets(appPath, dmgPath) {
+  // Mirror the signed app + dmg into the shared ``assets/`` folder at the
+  // repo root so macOS, Windows, and Linux builds all deposit installers
+  // in one predictable location. We copy rather than move so the existing
+  // releases/macos/ layout stays intact for scripts that still reference it.
+  const assetsRoot = path.join(projectRoot, "assets");
+  fs.mkdirSync(assetsRoot, { recursive: true });
+
+  if (fs.existsSync(appPath)) {
+    const destApp = path.join(assetsRoot, path.basename(appPath));
+    fs.rmSync(destApp, { recursive: true, force: true });
+    fs.cpSync(appPath, destApp, { recursive: true, force: true, verbatimSymlinks: true });
+  }
+  if (fs.existsSync(dmgPath)) {
+    const destDmg = path.join(assetsRoot, path.basename(dmgPath));
+    fs.rmSync(destDmg, { force: true });
+    fs.copyFileSync(dmgPath, destDmg);
+  }
+  console.log(`[release-macos] published artifacts -> ${assetsRoot}`);
+}
+
 function prepareTauriBuildArgs() {
   const buildArgs = ["tauri", "build", "--bundles", "app", "--ci"];
   let cleanupConfigPath = null;
diff --git a/scripts/stage-runtime.mjs b/scripts/stage-runtime.mjs
index 7993500..07a17f8 100644
--- a/scripts/stage-runtime.mjs
+++ b/scripts/stage-runtime.mjs
@@ -40,8 +40,8 @@ main();
 function main() {
   ensureDir(embeddedResourcesRoot);
   cleanupStaleTauriResources();
-  fs.rmSync(path.join(resourcesRoot, "runtime"), { recursive: true, force: true });
-  fs.rmSync(stageRoot, { recursive: true, force: true });
+  safeRmSync(path.join(resourcesRoot, "runtime"));
+  safeRmSync(stageRoot);
   cleanupStagedRuntimeArtifacts();
   ensureDir(stageRoot);
 
@@ -61,7 +61,7 @@ function main() {
   pruneBundledProjectArtifacts();
 
   ensureDir(backendDest);
-  for (const relativePath of ["backend_service", "compression"]) {
+  for (const relativePath of ["backend_service", "cache_compression"]) {
     copyTree(path.join(workspaceRoot, relativePath), path.join(backendDest, relativePath));
   }
   for (const relativeFile of ["README.md", "pyproject.toml"]) {
@@ -96,16 +96,32 @@ function main() {
 
   fs.writeFileSync(path.join(stageRoot, "manifest.json"), JSON.stringify(embeddedRuntime, null, 2));
   fs.copyFileSync(path.join(stageRoot, "manifest.json"), manifestDest);
-  execFileSync("tar", ["-czf", archiveDest, "-C", stageRoot, "."], {
-    cwd: workspaceRoot,
-    env: { ...process.env, COPYFILE_DISABLE: "1" },
-    stdio: "inherit",
-  });
-  fs.rmSync(stageRoot, { recursive: true, force: true });
+
+  // In development mode the Tauri shell prefers the live source workspace
+  // (see src-tauri/src/lib.rs::resolve_embedded_runtime → "development embedded
+  // runtime detected; preferring source workspace"). Archiving the staging
+  // tree would add several minutes per dev iteration on Windows for output
+  // that is immediately discarded. Release builds still need the archive.
+  if (strict) {
+    console.log(`[stage-runtime] archiving runtime — this can take a few minutes on Windows...`);
+    execFileSync("tar", ["-czf", archiveDest, "-C", stageRoot, "."], {
+      cwd: workspaceRoot,
+      env: { ...process.env, COPYFILE_DISABLE: "1" },
+      stdio: "inherit",
+    });
+    console.log(`[stage-runtime] archive -> ${archiveDest}`);
+  } else {
+    // Clear any stale archive so the Rust shell doesn't surface an old one.
+    if (fs.existsSync(archiveDest)) {
+      safeRmSync(archiveDest);
+    }
+    console.log(`[stage-runtime] dev mode: skipping tar.gz archive (live workspace is used instead)`);
+  }
+
+  safeRmSync(stageRoot);
 
   console.log(`[stage-runtime] mode=${mode}`);
   console.log(`[stage-runtime] platform=${platformTag}`);
-  console.log(`[stage-runtime] archive -> ${archiveDest}`);
   console.log(`[stage-runtime] manifest -> ${manifestDest}`);
   if (llamaWarnings.length) {
     for (const warning of llamaWarnings) {
@@ -218,7 +234,7 @@ function validateBundledPythonPackages(pythonBinary) {
 function validateBundledProjectImports(pythonBinary) {
   const script = [
     "import json",
-    "from compression import registry",
+    "from cache_compression import registry",
     "print(json.dumps([entry['id'] for entry in registry.available()]))",
   ].join("\n");
 
@@ -254,6 +270,15 @@ function stageVendoredChaosEngine(pythonBinary) {
     return null;
   }
 
+  // vendor/ChaosEngine/pyproject.toml declares `license = "Apache-2.0"` per
+  // PEP 639. Setuptools < 77 rejects the string form with:
+  //   "project.license must be valid exactly by one definition (2 matches found)"
+  // Since we pass --no-build-isolation, pip uses whatever setuptools the build
+  // venv has — and fresh Windows venvs sometimes ship 65.x. Upgrade in place
+  // before the vendor install so the build works without requiring the user
+  // to run build.ps1 first (e.g. `npm run tauri:dev`).
+  ensureSetuptoolsForPep639(pythonBinary);
+
   console.log(`[stage-runtime] bundling ChaosEngine (${vendor.source})`);
   try {
     execFileSync(
@@ -289,6 +314,49 @@ function stageVendoredChaosEngine(pythonBinary) {
   }
 }
 
+function ensureSetuptoolsForPep639(pythonBinary) {
+  // Bound: >=77 for PEP 639 license strings, <82 because modern torch
+  // wheels declare ``setuptools<82`` and pip's resolver surfaces a loud
+  // "requires setuptools<82" warning on every subsequent invocation once
+  // 82.x is installed.
+  const checkScript = [
+    "import sys",
+    "try:",
+    "    from importlib.metadata import version",
+    "    v = version('setuptools')",
+    "except Exception:",
+    "    sys.exit(2)",
+    "parts = [int(p) for p in v.split('.')[:2] if p.isdigit()]",
+    "if not parts:",
+    "    sys.exit(1)",
+    "major = parts[0]",
+    "sys.exit(0 if 77 <= major < 82 else 1)",
+  ].join("\n");
+
+  let ok = false;
+  try {
+    execFileSync(pythonBinary, ["-c", checkScript], { stdio: "ignore" });
+    ok = true;
+  } catch {
+    ok = false;
+  }
+  if (ok) return;
+
+  console.log(`[stage-runtime] pinning setuptools to >=77,<82 for PEP 639 licenses + torch compatibility`);
+  try {
+    execFileSync(
+      pythonBinary,
+      ["-m", "pip", "install", "--disable-pip-version-check", "--upgrade", "setuptools>=77,<82", "wheel"],
+      { cwd: workspaceRoot, stdio: "inherit" },
+    );
+  } catch (err) {
+    console.warn(
+      `[stage-runtime] warning: could not pin setuptools (${err.message.split("\n")[0]}). ` +
+      `Vendor install may fail and pip may warn about torch incompatibility.`,
+    );
+  }
+}
+
 function resolveChaosEngineVendor() {
   const override = process.env.CHAOSENGINE_VENDOR_PATH;
   if (override) {
@@ -313,9 +381,14 @@ function stageOptionalRuntimePackages(pythonBinary) {
   // so that DFlash, TurboQuant, and RotorQuant work out of the box for
   // new users without requiring manual pip installs via the Setup page.
   //
-  // Each entry: [pip package name, import name used for verification]
+  // Each entry: [pip install target, import name used for verification]
+  // The first element is passed verbatim to ``pip install`` so it may be a
+  // PyPI name or a PEP 508 URL (e.g. ``pkg @ git+https://…@tag``).
   const optionalPackages = [
-    ["dflash-mlx", "dflash_mlx"],
+    [
+      "dflash-mlx @ git+https://github.com/bstnxbt/dflash-mlx.git@f825ffb268e50d531e8b6524413b0847334a14dd",
+      "dflash_mlx",
+    ],
     ["turboquant", "turboquant"],
     ["turboquant-mlx-full", "turboquant_mlx"],
   ];
@@ -386,28 +459,66 @@ function stageOptionalRuntimePackages(pythonBinary) {
 
 function stageLlamaBinaries() {
   const warnings = [];
-  const sourceDir = process.env.CHAOSENGINE_LLAMA_BIN_DIR || defaultLlamaBinDir();
-  if (!fs.existsSync(sourceDir)) {
-    if (strict) {
-      throw new Error("llama.cpp binary directory not found.");
+  const userSuppliedBinDir = process.env.CHAOSENGINE_LLAMA_BIN_DIR;
+  const sourceDir = userSuppliedBinDir || defaultLlamaBinDir();
+  // Allow a release build to ship without llama.cpp when the operator has
+  // explicitly opted in (e.g. building a Windows installer for diffusers-only
+  // testing on a machine that hasn't compiled llama.cpp from source). The
+  // installer still bundles the Python runtime and frontend; users can
+  // install llama-server via the Setup page at runtime.
+  const allowMissingLlama = process.env.CHAOSENGINE_RELEASE_ALLOW_NO_LLAMA === "1";
+  // Opt out of network download (air-gapped builds, CI without internet).
+  const disableAutoDownload = process.env.CHAOSENGINE_LLAMA_NO_AUTO_DOWNLOAD === "1";
+
+  let effectiveSourceDir = sourceDir;
+  if (!fs.existsSync(effectiveSourceDir)) {
+    // Auto-download a pre-built release from ggml-org/llama.cpp. This is
+    // how we keep "works out of the box" true: the user doesn't need to
+    // clone + cmake + VS Build Tools before running build.ps1. A
+    // user-supplied CHAOSENGINE_LLAMA_BIN_DIR takes precedence — we only
+    // auto-download when no local build exists AND no override is set.
+    if (!userSuppliedBinDir && !disableAutoDownload) {
+      const download = attemptPrebuiltLlamaDownload();
+      if (download.ok) {
+        console.log(`[stage-runtime] using prebuilt llama.cpp (${download.source})`);
+        effectiveSourceDir = download.binDir;
+      } else {
+        warnings.push(
+          `Could not auto-download llama.cpp prebuilt binaries (${download.reason}). ` +
+          `Falling back to the build-without-inference path.`,
+        );
+      }
+    }
+  }
+
+  if (!fs.existsSync(effectiveSourceDir)) {
+    const message = `llama.cpp binary directory not found at ${sourceDir}.`;
+    if (strict && !allowMissingLlama) {
+      throw new Error(
+        `${message} Build llama.cpp at ../llama.cpp/build/bin/, set CHAOSENGINE_LLAMA_BIN_DIR ` +
+        `to your build directory, or set CHAOSENGINE_RELEASE_ALLOW_NO_LLAMA=1 to ship without it.`,
+      );
     }
-    return ["llama.cpp binary directory not found."];
+    warnings.push(
+      `${message} Installer will ship without llama-server; users can install it via the Setup page.`,
+    );
+    return warnings;
   }
 
   ensureDir(binDest);
-  const entries = fs.readdirSync(sourceDir);
+  const entries = fs.readdirSync(effectiveSourceDir);
   const selected = entries.filter((entry) => shouldCopyLlamaEntry(entry));
 
   if (!selected.includes(binaryName("llama-server"))) {
     const message = `Missing ${binaryName("llama-server")} in the configured llama.cpp binary directory`;
-    if (strict) {
+    if (strict && !allowMissingLlama) {
       throw new Error(message);
     }
     warnings.push(message);
   }
 
   for (const entry of selected) {
-    const sourcePath = path.join(sourceDir, entry);
+    const sourcePath = path.join(effectiveSourceDir, entry);
     const destinationPath = path.join(binDest, entry);
     copyPath(sourcePath, destinationPath);
   }
@@ -426,6 +537,224 @@ function stageLlamaBinaries() {
   return warnings;
 }
 
+// Download pre-built llama.cpp binaries from ggml-org/llama.cpp GitHub
+// releases when the caller has no local build. Cached under
+// ~/.chaosengine/prebuilt-llama/<platform>/<release-tag>/bin so repeated
+// builds don't re-download.
+//
+// Preference order is Vulkan > CPU/AVX2 rather than CUDA/cuBLAS because:
+//  1. Vulkan works on every modern GPU (NVIDIA, AMD, Intel) without a
+//     bundled cuDNN runtime, so ChaosEngineAI doesn't have to ship
+//     cudart DLLs it can't legally redistribute.
+//  2. On an RTX 4090 the Vulkan backend is within ~10% of cuBLAS for
+//     llama.cpp token-gen workloads — good enough that "works out of the
+//     box" beats "fastest possible after a manual driver install".
+function attemptPrebuiltLlamaDownload() {
+  const platformKey = llamaPlatformKey();
+  if (!platformKey) {
+    return { ok: false, reason: `no prebuilt wheel pattern defined for ${process.platform}/${process.arch}` };
+  }
+  if (!commandAvailable("curl")) {
+    return { ok: false, reason: "curl not available on PATH — install curl or build llama.cpp from source" };
+  }
+
+  let release;
+  try {
+    release = fetchLatestLlamaRelease();
+  } catch (err) {
+    return { ok: false, reason: `github api fetch failed: ${shortError(err)}` };
+  }
+
+  const tag = release?.tag_name;
+  const assets = Array.isArray(release?.assets) ? release.assets : [];
+  if (!tag || assets.length === 0) {
+    return { ok: false, reason: "release payload missing tag or assets" };
+  }
+
+  const cacheRoot = path.join(os.homedir(), ".chaosengine", "prebuilt-llama", platformKey, tag);
+  const cacheBinDir = path.join(cacheRoot, "bin");
+  const serverName = binaryName("llama-server");
+  if (fs.existsSync(path.join(cacheBinDir, serverName))) {
+    return { ok: true, binDir: cacheBinDir, source: `cached ${tag}` };
+  }
+
+  const asset = pickLlamaAsset(assets, platformKey);
+  if (!asset) {
+    const names = assets.map((a) => a.name).slice(0, 8).join(", ");
+    return { ok: false, reason: `no asset matched platform=${platformKey} in ${tag}. first assets: ${names}` };
+  }
+
+  const tmpZip = path.join(os.tmpdir(), `llama-${tag}-${platformKey}.zip`);
+  const tmpExtract = path.join(os.tmpdir(), `llama-${tag}-${platformKey}-extract`);
+  safeRmSync(tmpZip);
+  safeRmSync(tmpExtract);
+
+  console.log(`[stage-runtime] downloading prebuilt llama.cpp ${tag} (${asset.name})...`);
+  try {
+    execFileSync(
+      "curl",
+      ["-fsSL", "-o", tmpZip, "-H", "User-Agent: ChaosEngineAI-build", asset.browser_download_url],
+      { stdio: "inherit" },
+    );
+  } catch (err) {
+    safeRmSync(tmpZip);
+    return { ok: false, reason: `download failed: ${shortError(err)}` };
+  }
+
+  ensureDir(tmpExtract);
+  try {
+    extractZip(tmpZip, tmpExtract);
+  } catch (err) {
+    safeRmSync(tmpZip);
+    safeRmSync(tmpExtract);
+    return { ok: false, reason: `extract failed: ${shortError(err)}` };
+  }
+
+  const sourceBin = findLlamaBinWithin(tmpExtract);
+  if (!sourceBin) {
+    safeRmSync(tmpZip);
+    safeRmSync(tmpExtract);
+    return { ok: false, reason: `no ${serverName} inside ${asset.name}` };
+  }
+
+  ensureDir(cacheBinDir);
+  for (const entry of fs.readdirSync(sourceBin)) {
+    copyPath(path.join(sourceBin, entry), path.join(cacheBinDir, entry));
+  }
+
+  safeRmSync(tmpZip);
+  safeRmSync(tmpExtract);
+
+  if (!fs.existsSync(path.join(cacheBinDir, serverName))) {
+    return { ok: false, reason: `copy completed but ${serverName} not present in cache` };
+  }
+  return { ok: true, binDir: cacheBinDir, source: `downloaded ${tag}` };
+}
+
+function fetchLatestLlamaRelease() {
+  const payload = execFileSync(
+    "curl",
+    [
+      "-fsSL",
+      "-H",
+      "User-Agent: ChaosEngineAI-build",
+      "-H",
+      "Accept: application/vnd.github+json",
+      "https://api.github.com/repos/ggml-org/llama.cpp/releases/latest",
+    ],
+    { encoding: "utf8" },
+  );
+  return JSON.parse(payload);
+}
+
+function llamaPlatformKey() {
+  const plat = process.platform;
+  const arch = process.arch;
+  if (plat === "win32" && arch === "x64") return "win-x64";
+  if (plat === "darwin" && arch === "arm64") return "macos-arm64";
+  if (plat === "darwin" && arch === "x64") return "macos-x64";
+  if (plat === "linux" && arch === "x64") return "linux-x64";
+  return null;
+}
+
+// Asset-name regex patterns in preference order. llama.cpp release naming
+// is of the form ``llama-b<build>-bin-<os>-<backend>-<arch>.zip`` with the
+// backend segment optional on macOS. We prefer Vulkan for portability; see
+// the comment on ``attemptPrebuiltLlamaDownload``.
+function pickLlamaAsset(assets, platformKey) {
+  const preferencePatterns = {
+    "win-x64": [
+      /^llama-.*-bin-win-vulkan-x64\.zip$/i,
+      /^llama-.*-bin-win-avx2-x64\.zip$/i,
+      /^llama-.*-bin-win-cpu-x64\.zip$/i,
+      /^llama-.*-bin-win-noavx-x64\.zip$/i,
+    ],
+    "macos-arm64": [/^llama-.*-bin-macos-arm64\.zip$/i],
+    "macos-x64": [/^llama-.*-bin-macos-x64\.zip$/i],
+    "linux-x64": [
+      /^llama-.*-bin-ubuntu-vulkan-x64\.zip$/i,
+      /^llama-.*-bin-ubuntu-x64\.zip$/i,
+      /^llama-.*-bin-linux-x64\.zip$/i,
+    ],
+  };
+  for (const pattern of preferencePatterns[platformKey] || []) {
+    const match = assets.find((asset) => pattern.test(asset.name || ""));
+    if (match) return match;
+  }
+  return null;
+}
+
+function extractZip(zipPath, destDir) {
+  if (process.platform === "win32") {
+    // PowerShell ships with Windows 10+; -NoProfile avoids slow profile
+    // loading that otherwise adds 1-2s per invocation.
+    execFileSync(
+      "powershell",
+      [
+        "-NoProfile",
+        "-NonInteractive",
+        "-Command",
+        `Expand-Archive -LiteralPath '${zipPath}' -DestinationPath '${destDir}' -Force`,
+      ],
+      { stdio: "inherit" },
+    );
+    return;
+  }
+  if (commandAvailable("unzip")) {
+    execFileSync("unzip", ["-q", "-o", zipPath, "-d", destDir], { stdio: "inherit" });
+    return;
+  }
+  throw new Error("no extractor available — install 'unzip' (apt/brew) or use a different prebuilt source");
+}
+
+function findLlamaBinWithin(rootDir) {
+  const serverName = binaryName("llama-server");
+  // GitHub release zips sometimes extract flat into the dest dir, sometimes
+  // as a single top-level folder (e.g. ``build/bin/``), and occasionally
+  // nest a ``bin/`` under a platform-tagged directory. Probe each layer.
+  const candidates = [rootDir];
+  const stack = [rootDir];
+  const maxDepth = 4;
+  let depth = 0;
+  while (stack.length && depth < maxDepth) {
+    const next = stack.shift();
+    let entries;
+    try {
+      entries = fs.readdirSync(next, { withFileTypes: true });
+    } catch {
+      continue;
+    }
+    for (const entry of entries) {
+      if (!entry.isDirectory()) continue;
+      const subdir = path.join(next, entry.name);
+      candidates.push(subdir);
+      stack.push(subdir);
+    }
+    depth++;
+  }
+  for (const candidate of candidates) {
+    if (fs.existsSync(path.join(candidate, serverName))) {
+      return candidate;
+    }
+  }
+  return null;
+}
+
+function commandAvailable(name) {
+  const probe = process.platform === "win32" ? "where" : "which";
+  try {
+    execFileSync(probe, [name], { stdio: "ignore" });
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+function shortError(err) {
+  const raw = typeof err === "string" ? err : err?.message || String(err);
+  return raw.split("\n")[0].slice(0, 200);
+}
+
 function defaultLlamaBinDir() {
   return path.resolve(workspaceRoot, "..", "llama.cpp", "build", "bin");
 }
@@ -480,6 +809,65 @@ function assertPathExists(targetPath, label) {
   }
 }
 
+/**
+ * Windows-robust replacement for fs.rmSync(..., { recursive: true, force: true }).
+ *
+ * On Windows, `force: true` doesn't actually chmod read-only files writable, and
+ * files touched by antivirus, Explorer thumbnails, or recent pip installs often
+ * return EPERM for a few hundred milliseconds after they become idle. Instead of
+ * failing the whole staging run on transient locks we:
+ *  - clear the read-only bit on every entry before unlinking (covers pip-installed
+ *    package metadata which ships with read-only attrs on some registries)
+ *  - retry with exponential backoff on EPERM / EBUSY / ENOTEMPTY
+ *
+ * Non-Windows platforms use the fast path unchanged.
+ */
+function safeRmSync(targetPath) {
+  if (!fs.existsSync(targetPath)) return;
+
+  if (process.platform !== "win32") {
+    fs.rmSync(targetPath, { recursive: true, force: true });
+    return;
+  }
+
+  const clearReadOnlyRecursive = (entry) => {
+    try {
+      const stat = fs.lstatSync(entry);
+      // Clear read-only bit (0o200 = owner-write).
+      try { fs.chmodSync(entry, stat.mode | 0o200); } catch { /* ignore */ }
+      if (stat.isDirectory()) {
+        for (const child of fs.readdirSync(entry)) {
+          clearReadOnlyRecursive(path.join(entry, child));
+        }
+      }
+    } catch { /* ignore — rm will surface the real error below */ }
+  };
+
+  const attempts = [0, 100, 300, 800, 2000];
+  let lastError = null;
+  for (const delay of attempts) {
+    if (delay > 0) {
+      const deadline = Date.now() + delay;
+      while (Date.now() < deadline) { /* spin — avoids async refactor */ }
+    }
+    try {
+      clearReadOnlyRecursive(targetPath);
+      fs.rmSync(targetPath, { recursive: true, force: true, maxRetries: 5, retryDelay: 200 });
+      return;
+    } catch (error) {
+      lastError = error;
+      if (!["EPERM", "EBUSY", "ENOTEMPTY", "EACCES"].includes(error.code)) {
+        throw error;
+      }
+    }
+  }
+  throw new Error(
+    `Could not remove ${targetPath} after ${attempts.length} attempts — last error: ${lastError?.code ?? lastError?.message}. ` +
+    `On Windows this is usually caused by Windows Defender or another process holding a file handle. ` +
+    `Try closing ChaosEngineAI.exe, add the repo to Defender exclusions, then retry.`,
+  );
+}
+
 function cleanupStaleTauriResources() {
   const targetRoot = path.join(tauriRoot, "target");
   const staleRoots = [
@@ -490,7 +878,7 @@ function cleanupStaleTauriResources() {
   ];
 
   for (const staleRoot of staleRoots) {
-    fs.rmSync(staleRoot, { recursive: true, force: true });
+    safeRmSync(staleRoot);
   }
 }
 
@@ -503,7 +891,7 @@ function cleanupStagedRuntimeArtifacts() {
     if (!entry.startsWith("runtime-")) {
       continue;
     }
-    fs.rmSync(path.join(embeddedResourcesRoot, entry), { recursive: true, force: true });
+    safeRmSync(path.join(embeddedResourcesRoot, entry));
   }
 }
 
@@ -836,7 +1224,7 @@ function pruneBundledProjectArtifacts() {
       entry.endsWith(".egg-link") ||
       /^chaosengine_ai-.*\.(dist-info|egg-info)$/.test(entry)
     ) {
-      fs.rmSync(fullPath, { recursive: true, force: true });
+      safeRmSync(fullPath);
       continue;
     }
 
@@ -860,7 +1248,7 @@ function pruneBundledProjectArtifacts() {
     if (filtered.trim()) {
       fs.writeFileSync(fullPath, `${filtered}\n`);
     } else {
-      fs.rmSync(fullPath, { force: true });
+      safeRmSync(fullPath);
     }
   }
 }
diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock
index 43b39ed..a4b6edf 100644
--- a/src-tauri/Cargo.lock
+++ b/src-tauri/Cargo.lock
@@ -455,7 +455,7 @@ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
 
 [[package]]
 name = "chaosengineai"
-version = "0.5.3"
+version = "0.6.0"
 dependencies = [
  "flate2",
  "libc",
diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml
index 0a742ee..6a53d17 100644
--- a/src-tauri/Cargo.toml
+++ b/src-tauri/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "chaosengineai"
-version = "0.5.3"
+version = "0.6.0"
 description = "ChaosEngineAI desktop shell for local AI model inference"
 authors = ["OpenAI Codex"]
 edition = "2021"
diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs
index a3cde2a..91912c6 100644
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -485,17 +485,48 @@ impl BackendManager {
                 // On Windows, child.kill() only kills the parent Python
                 // process, not its children (MLX worker, etc.).  Use
                 // `taskkill /T` to terminate the entire process tree.
+                //
+                // Previously this ignored taskkill's exit code, which made
+                // the "Restart Backend" button hang on machines where
+                // taskkill returned non-zero (race with process exit, UAC
+                // elevation mismatch, etc.) — child.wait() below would
+                // then block forever holding the BackendManager mutex, and
+                // subsequent runtime_info() calls deadlocked the UI.
                 let pid = child.id();
-                let _ = std::process::Command::new("taskkill")
+                let taskkill_ok = match std::process::Command::new("taskkill")
                     .args(["/F", "/T", "/PID", &pid.to_string()])
                     .creation_flags(0x08000000) // CREATE_NO_WINDOW
-                    .output();
+                    .output()
+                {
+                    Ok(out) => out.status.success(),
+                    Err(_) => false,
+                };
+                if !taskkill_ok {
+                    // Fall back to TerminateProcess on the parent. Any
+                    // grandchildren may leak, but the port-release poll in
+                    // restart_backend_sidecar covers the subsequent respawn.
+                    let _ = child.kill();
+                }
             }
             #[cfg(not(any(unix, windows)))]
             {
                 let _ = child.kill();
             }
-            let _ = child.wait();
+            // Bounded wait: try_wait in a loop so a hung child can't deadlock
+            // the shutdown path. std::process::Child::wait has no timeout.
+            let wait_deadline = Instant::now() + Duration::from_secs(3);
+            loop {
+                match child.try_wait() {
+                    Ok(Some(_)) => break,
+                    Ok(None) => {
+                        if Instant::now() >= wait_deadline {
+                            break;
+                        }
+                        thread::sleep(Duration::from_millis(50));
+                    }
+                    Err(_) => break,
+                }
+            }
         } else if let Some((port, api_token)) = attached_backend {
             let effective_token = api_token.or_else(|| fetch_backend_api_token(port));
             let _ = request_backend_shutdown(port, effective_token.as_deref());
diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json
index 60a63e0..349cc07 100644
--- a/src-tauri/tauri.conf.json
+++ b/src-tauri/tauri.conf.json
@@ -2,7 +2,7 @@
   "$schema": "https://schema.tauri.app/config/2",
   "productName": "ChaosEngineAI",
   "mainBinaryName": "ChaosEngineAI",
-  "version": "0.5.3",
+  "version": "0.6.0",
   "identifier": "com.chaosengineai.desktop",
   "build": {
     "beforeBuildCommand": "npm run build",
diff --git a/src/App.tsx b/src/App.tsx
index a64656c..5dc4e90 100644
--- a/src/App.tsx
+++ b/src/App.tsx
@@ -3,6 +3,7 @@ import {
   checkBackend,
   convertModel,
   deleteSessionDocument,
+  installCudaTorch,
   loadModel,
   getWorkspace,
   deleteModelPath,
@@ -14,6 +15,9 @@ import {
 import { LaunchModal } from "./components/LaunchModal";
 import { sanitizeSpeculativeSelection } from "./components/runtimeSupport";
 import { ImageGenerationModal } from "./components/ImageGenerationModal";
+import { VideoGenerationModal } from "./components/VideoGenerationModal";
+import { Sidebar } from "./components/Sidebar";
+import { SubtabBar } from "./components/SubtabBar";
 import { LogsTab } from "./features/logs/LogsTab";
 import { SettingsTab } from "./features/settings/SettingsTab";
 import { DashboardTab } from "./features/dashboard/DashboardTab";
@@ -32,6 +36,10 @@ import { ImageDiscoverTab } from "./features/images/ImageDiscoverTab";
 import { ImageModelsTab } from "./features/images/ImageModelsTab";
 import { ImageStudioTab } from "./features/images/ImageStudioTab";
 import { ImageGalleryTab } from "./features/images/ImageGalleryTab";
+import { VideoDiscoverTab } from "./features/video/VideoDiscoverTab";
+import { VideoModelsTab } from "./features/video/VideoModelsTab";
+import { VideoStudioTab } from "./features/video/VideoStudioTab";
+import { VideoGalleryTab } from "./features/video/VideoGalleryTab";
 import type {
   ChatSession,
   LibraryItem,
@@ -51,6 +59,8 @@ import {
   findVariantForReference,
   findLibraryItemForVariant,
   findCatalogVariantForLibraryItem,
+  estimateLibraryItemResidentGb,
+  estimateLibraryItemCompressedGb,
   libraryItemFormat,
   libraryItemQuantization,
   libraryItemBackend,
@@ -71,8 +81,11 @@ import {
   useModels,
   useChat,
   useImageState,
+  useVideoState,
   useBenchmarks,
   useSettings,
+  useSidebarPrefs,
+  useGpuStatus,
 } from "./hooks";
 
 export default function App() {
@@ -92,9 +105,52 @@ export default function App() {
   const [activeTab, setActiveTab] = useState<TabId>("dashboard");
   const [compareMode, setCompareMode] = useState(false);
   const [apiToken, setApiToken] = useState<string | null>(null);
+  const sidebarPrefs = useSidebarPrefs();
+  const gpuStatus = useGpuStatus(backendOnline);
+  const [installingCudaTorch, setInstallingCudaTorch] = useState(false);
+  const [cudaTorchResult, setCudaTorchResult] = useState<
+    | { ok: true; indexUrl: string | null; pythonVersion: string | null }
+    | { ok: false; message: string; pythonVersion: string | null; noWheelForPython: boolean }
+    | null
+  >(null);
+
+  const handleInstallCudaTorch = async () => {
+    if (installingCudaTorch) return;
+    setInstallingCudaTorch(true);
+    setCudaTorchResult(null);
+    try {
+      const result = await installCudaTorch();
+      if (result.ok) {
+        setCudaTorchResult({
+          ok: true,
+          indexUrl: result.indexUrl,
+          pythonVersion: result.pythonVersion,
+        });
+      } else {
+        const last = result.attempts[result.attempts.length - 1];
+        const tail = (last?.output ?? result.output ?? "").split("\n").slice(-3).join("\n");
+        setCudaTorchResult({
+          ok: false,
+          message: tail || "pip install failed — see backend logs for details.",
+          pythonVersion: result.pythonVersion,
+          noWheelForPython: result.noWheelForPython,
+        });
+      }
+    } catch (err) {
+      setCudaTorchResult({
+        ok: false,
+        message: err instanceof Error ? err.message : String(err),
+        pythonVersion: null,
+        noWheelForPython: false,
+      });
+    } finally {
+      setInstallingCudaTorch(false);
+    }
+  };
 
   // ── Settings / Server / Preview ────────────────────────────
   const imgState = useImageState(backendOnline, setError, setActiveTab);
+  const videoState = useVideoState(backendOnline, setError, setActiveTab);
   const settings = useSettings(
     workspace, setWorkspace,
     backendOnline, setBackendOnline,
@@ -125,6 +181,8 @@ export default function App() {
     handleUpdateDirectoryPath,
     pickDirectory,
     handlePickDataDirectory,
+    handlePickImageOutputsDirectory,
+    handlePickVideoOutputsDirectory,
     handleSaveSettings,
     handleStopServer,
     handleRestartServer,
@@ -206,6 +264,8 @@ export default function App() {
       displayQuantization: libraryItemQuantization(item, matchedVariant),
       displayBackend: libraryItemBackend(item, matchedVariant),
       sourceKind: libraryItemSourceKind(item),
+      estimatedRamGb: estimateLibraryItemResidentGb(item, matchedVariant),
+      estimatedCompressedGb: estimateLibraryItemCompressedGb(item, matchedVariant),
     };
   });
   const filteredLibraryRows = libraryRows
@@ -221,8 +281,8 @@ export default function App() {
         case "format": return dir * left.displayFormat.localeCompare(right.displayFormat);
         case "backend": return dir * left.displayBackend.localeCompare(right.displayBackend);
         case "size": return dir * (left.item.sizeGb - right.item.sizeGb);
-        case "ram": return compareOptionalNumber(left.matchedVariant?.estimatedMemoryGb, right.matchedVariant?.estimatedMemoryGb, dir);
-        case "compressed": return compareOptionalNumber(left.matchedVariant?.estimatedCompressedMemoryGb, right.matchedVariant?.estimatedCompressedMemoryGb, dir);
+        case "ram": return compareOptionalNumber(left.estimatedRamGb, right.estimatedRamGb, dir);
+        case "compressed": return compareOptionalNumber(left.estimatedCompressedGb, right.estimatedCompressedGb, dir);
         case "context": { const lc = parseContextK(left.matchedVariant?.contextWindow); const rc = parseContextK(right.matchedVariant?.contextWindow); return dir * (lc - rc); }
         case "modified": default: return dir * left.item.lastModified.localeCompare(right.item.lastModified);
       }
@@ -504,6 +564,7 @@ export default function App() {
     setActiveChatId(workspace.chatSessions[0]?.id ?? "");
     setThreadTitleDraft(workspace.chatSessions[0]?.title ?? "");
     void imgState.refreshImageData();
+    void videoState.refreshVideoData();
     // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [loading]);
 
@@ -1235,6 +1296,7 @@ export default function App() {
         busyAction={busyAction}
         activeImageDownloads={imgState.activeImageDownloads}
         selectedImageVariant={imgState.selectedImageVariant}
+        fileRevealLabel={fileRevealLabel}
         onActiveTabChange={setActiveTab}
         onOpenImageStudio={imgState.openImageStudio}
         onImageDownload={(repo) => void imgState.handleImageDownload(repo)}
@@ -1242,6 +1304,7 @@ export default function App() {
         onDeleteImageDownload={(repo) => void imgState.handleDeleteImageDownload(repo)}
         onOpenExternalUrl={(url) => void handleOpenExternalUrl(url)}
         onRestartServer={() => void handleRestartServer()}
+        onRevealPath={(path) => void handleRevealPath(path)}
       />
     );
   } else if (activeTab === "image-models") {
@@ -1250,12 +1313,14 @@ export default function App() {
         installedImageVariants={imgState.installedImageVariants}
         imageCatalog={imgState.imageCatalog}
         activeImageDownloads={imgState.activeImageDownloads}
+        fileRevealLabel={fileRevealLabel}
         onActiveTabChange={setActiveTab}
         onOpenImageStudio={imgState.openImageStudio}
         onImageDownload={(repo) => void imgState.handleImageDownload(repo)}
         onCancelImageDownload={(repo) => void imgState.handleCancelImageDownload(repo)}
         onDeleteImageDownload={(repo) => void imgState.handleDeleteImageDownload(repo)}
         onOpenExternalUrl={(url) => void handleOpenExternalUrl(url)}
+        onRevealPath={(path) => void handleRevealPath(path)}
       />
     );
   } else if (activeTab === "image-studio") {
@@ -1308,6 +1373,7 @@ export default function App() {
         onApplyImageQuality={imgState.applyImageQuality}
         onPreloadImageModel={(variant) => void imgState.handlePreloadImageModel(variant)}
         onUnloadImageModel={(variant) => void imgState.handleUnloadImageModel(variant)}
+        onInstallImageRuntime={() => imgState.handleInstallImageRuntime()}
         onImageDownload={(repo) => void imgState.handleImageDownload(repo)}
         onCancelImageDownload={(repo) => void imgState.handleCancelImageDownload(repo)}
         onDeleteImageDownload={(repo) => void imgState.handleDeleteImageDownload(repo)}
@@ -1352,6 +1418,116 @@ export default function App() {
         onDeleteImageArtifact={(id) => void imgState.handleDeleteImageArtifact(id)}
       />
     );
+  } else if (activeTab === "video-discover") {
+    content = (
+      <VideoDiscoverTab
+        combinedVideoDiscoverResults={videoState.combinedVideoDiscoverResults}
+        videoDiscoverSearchInput={videoState.videoDiscoverSearchInput}
+        onVideoDiscoverSearchInputChange={videoState.setVideoDiscoverSearchInput}
+        videoDiscoverTaskFilter={videoState.videoDiscoverTaskFilter}
+        onVideoDiscoverTaskFilterChange={videoState.setVideoDiscoverTaskFilter}
+        videoDiscoverHasActiveFilters={videoState.videoDiscoverHasActiveFilters}
+        videoDiscoverSearchQuery={videoState.videoDiscoverSearchQuery}
+        videoRuntimeStatus={videoState.videoRuntimeStatus}
+        tauriBackend={tauriBackend}
+        busy={busy}
+        busyAction={busyAction}
+        activeVideoDownloads={videoState.activeVideoDownloads}
+        selectedVideoVariant={videoState.selectedVideoVariant}
+        fileRevealLabel={fileRevealLabel}
+        onActiveTabChange={setActiveTab}
+        onOpenVideoStudio={videoState.openVideoStudio}
+        onVideoDownload={(repo) => void videoState.handleVideoDownload(repo)}
+        onCancelVideoDownload={(repo) => void videoState.handleCancelVideoDownload(repo)}
+        onDeleteVideoDownload={(repo) => void videoState.handleDeleteVideoDownload(repo)}
+        onOpenExternalUrl={(url) => void handleOpenExternalUrl(url)}
+        onRestartServer={() => void handleRestartServer()}
+        onRevealPath={(path) => void handleRevealPath(path)}
+      />
+    );
+  } else if (activeTab === "video-models") {
+    content = (
+      <VideoModelsTab
+        installedVideoVariants={videoState.installedVideoVariants}
+        videoCatalog={videoState.videoCatalog}
+        activeVideoDownloads={videoState.activeVideoDownloads}
+        videoRuntimeStatus={videoState.videoRuntimeStatus}
+        videoBusy={videoState.videoBusy}
+        videoBusyLabel={videoState.videoBusyLabel}
+        loadedVideoVariant={videoState.loadedVideoVariant}
+        fileRevealLabel={fileRevealLabel}
+        onActiveTabChange={setActiveTab}
+        onOpenVideoStudio={videoState.openVideoStudio}
+        onVideoDownload={(repo) => void videoState.handleVideoDownload(repo)}
+        onCancelVideoDownload={(repo) => void videoState.handleCancelVideoDownload(repo)}
+        onDeleteVideoDownload={(repo) => void videoState.handleDeleteVideoDownload(repo)}
+        onPreloadVideoModel={(variant) => void videoState.handlePreloadVideoModel(variant)}
+        onUnloadVideoModel={(variant) => void videoState.handleUnloadVideoModel(variant)}
+        onOpenExternalUrl={(url) => void handleOpenExternalUrl(url)}
+        onRevealPath={(path) => void handleRevealPath(path)}
+      />
+    );
+  } else if (activeTab === "video-studio") {
+    content = (
+      <VideoStudioTab
+        videoCatalog={videoState.videoCatalogWithLatest}
+        selectedVideoModelId={videoState.selectedVideoModelId}
+        onSelectedVideoModelIdChange={videoState.setSelectedVideoModelId}
+        selectedVideoVariant={videoState.selectedVideoVariant}
+        selectedVideoFamily={videoState.selectedVideoFamily}
+        selectedVideoLoaded={videoState.selectedVideoLoaded}
+        selectedVideoWillLoadOnGenerate={videoState.selectedVideoWillLoadOnGenerate}
+        videoRuntimeLoadedDifferentModel={videoState.videoRuntimeLoadedDifferentModel}
+        loadedVideoVariant={videoState.loadedVideoVariant}
+        videoRuntimeStatus={videoState.videoRuntimeStatus}
+        tauriBackend={tauriBackend}
+        busy={busy}
+        busyAction={busyAction}
+        videoBusy={videoState.videoBusy}
+        videoBusyLabel={videoState.videoBusyLabel}
+        backendOnline={backendOnline}
+        activeVideoDownloads={videoState.activeVideoDownloads}
+        videoPrompt={videoState.videoPrompt}
+        onVideoPromptChange={videoState.setVideoPrompt}
+        videoNegativePrompt={videoState.videoNegativePrompt}
+        onVideoNegativePromptChange={videoState.setVideoNegativePrompt}
+        videoUseRandomSeed={videoState.videoUseRandomSeed}
+        onVideoUseRandomSeedChange={videoState.setVideoUseRandomSeed}
+        videoSeedInput={videoState.videoSeedInput}
+        onVideoSeedInputChange={videoState.setVideoSeedInput}
+        videoWidth={videoState.videoWidth}
+        onVideoWidthChange={videoState.setVideoWidth}
+        videoHeight={videoState.videoHeight}
+        onVideoHeightChange={videoState.setVideoHeight}
+        videoNumFrames={videoState.videoNumFrames}
+        onVideoNumFramesChange={videoState.setVideoNumFrames}
+        videoFps={videoState.videoFps}
+        onVideoFpsChange={videoState.setVideoFps}
+        videoSteps={videoState.videoSteps}
+        onVideoStepsChange={videoState.setVideoSteps}
+        videoGuidance={videoState.videoGuidance}
+        onVideoGuidanceChange={videoState.setVideoGuidance}
+        onActiveTabChange={setActiveTab}
+        onPreloadVideoModel={(variant) => void videoState.handlePreloadVideoModel(variant)}
+        onUnloadVideoModel={(variant) => void videoState.handleUnloadVideoModel(variant)}
+        onVideoDownload={(repo) => void videoState.handleVideoDownload(repo)}
+        onGenerateVideo={() => void videoState.handleVideoGenerate()}
+        onOpenExternalUrl={(url) => void handleOpenExternalUrl(url)}
+        onRestartServer={() => void handleRestartServer()}
+        onInstallVideoOutputDeps={() => videoState.handleInstallVideoOutputDeps()}
+      />
+    );
+  } else if (activeTab === "video-gallery") {
+    content = (
+      <VideoGalleryTab
+        videoOutputs={videoState.videoOutputs}
+        videoBusy={videoState.videoBusy}
+        onActiveTabChange={setActiveTab}
+        onOpenVideoStudio={(modelId) => videoState.openVideoStudio(modelId)}
+        onRevealPath={(path) => void handleRevealPath(path)}
+        onDeleteVideoArtifact={(id) => void videoState.handleDeleteVideoOutput(id)}
+      />
+    );
   } else if (activeTab === "conversion") {
     content = (
       <ConversionTab
@@ -1545,6 +1721,8 @@ export default function App() {
       newDirectoryPath={newDirectoryPath}
       onNewDirectoryPathChange={setNewDirectoryPath}
       onPickDataDirectory={handlePickDataDirectory}
+      onPickImageOutputsDirectory={handlePickImageOutputsDirectory}
+      onPickVideoOutputsDirectory={handlePickVideoOutputsDirectory}
       onSaveSettings={handleSaveSettings}
       onPickDirectory={pickDirectory}
       onAddDirectory={handleAddDirectory}
@@ -1556,6 +1734,8 @@ export default function App() {
       serverPort={workspace.server.port}
       loadedModelName={workspace.runtime.loadedModel?.name}
       apiToken={apiToken}
+      sidebarMode={sidebarPrefs.mode}
+      onSidebarModeChange={sidebarPrefs.setMode}
     />;
   }
 
@@ -1578,43 +1758,21 @@ export default function App() {
 
   return (
     <div className="app-shell">
-      <aside className="sidebar">
-        <div className="brand-block">
-          <div className="brand-title-row">
-            <img src="/logo.svg" alt="ChaosEngineAI" className="brand-logo" />
-            <h1>ChaosEngineAI</h1>
-          </div>
-          <span className="brand-kicker">
-            Local AI model runner
-            {workspace.system.appVersion ? ` · v${workspace.system.appVersion}` : ""}
-          </span>
-        </div>
+      <Sidebar
+        activeTab={activeTab}
+        onTabChange={(tabId) => { setActiveTab(tabId); setError(null); }}
+        platform={workspace.system.platform}
+        appVersion={workspace.system.appVersion}
+        backendOnline={backendOnline}
+        engineLabel={workspace.runtime.engineLabel}
+        loadedModelName={workspace.runtime.loadedModel?.name ?? null}
+        mode={sidebarPrefs.mode}
+        collapsedGroups={sidebarPrefs.collapsedGroups}
+        onToggleGroupCollapsed={sidebarPrefs.toggleGroupCollapsed}
+        lastChildByGroup={sidebarPrefs.lastChildByGroup}
+        onRememberLastChild={sidebarPrefs.rememberLastChild}
+      />
 
-        <nav className="nav-list" aria-label="Primary">
-          {tabs.filter((tab) => {
-            if (tab.id === "conversion" && workspace.system.platform && workspace.system.platform !== "Darwin") return false;
-            return true;
-          }).map((tab) => (
-            <button
-              key={tab.id}
-              className={activeTab === tab.id ? "nav-button active" : "nav-button"}
-              type="button"
-              onClick={() => { setActiveTab(tab.id); setError(null); }}
-            >
-              <strong>{tab.label}</strong>
-              <span>{tab.caption}</span>
-            </button>
-          ))}
-        </nav>
-
-        <div className="sidebar-footer">
-          <span className={`badge ${backendOnline ? "success" : "warning"}`}>
-            {backendOnline ? "Backend online" : "Offline"}
-          </span>
-          <p>{workspace.runtime.engineLabel}</p>
-          <small>{workspace.runtime.loadedModel?.name ?? "No model loaded"}</small>
-        </div>
-      </aside>
 
       <main className="workspace">
         <header className="workspace-header">
@@ -1631,6 +1789,68 @@ export default function App() {
         </header>
 
         <div className="workspace-status-stack">
+          {gpuStatus.showBanner && gpuStatus.status ? (
+            <div className="notice-banner warn-banner">
+              <span>
+                <strong>Running on CPU.</strong>{" "}
+                {gpuStatus.status.recommendation ??
+                  "An NVIDIA GPU is visible but torch can't reach CUDA — image and video generation will be very slow."}
+                {cudaTorchResult?.ok ? (
+                  <>
+                    {" "}
+                    <strong>
+                      CUDA torch installed{cudaTorchResult.indexUrl
+                        ? ` from ${cudaTorchResult.indexUrl.replace("https://download.pytorch.org/whl/", "")}`
+                        : ""}
+                      {cudaTorchResult.pythonVersion ? ` into bundled Python ${cudaTorchResult.pythonVersion}` : ""}
+                      . Restart the app to use the GPU.
+                    </strong>
+                  </>
+                ) : cudaTorchResult && !cudaTorchResult.ok ? (
+                  <>
+                    {" "}
+                    {cudaTorchResult.noWheelForPython ? (
+                      <>
+                        <strong>
+                          No CUDA torch wheel for Python{cudaTorchResult.pythonVersion
+                            ? ` ${cudaTorchResult.pythonVersion}`
+                            : " (bundled venv)"}
+                          .
+                        </strong>{" "}
+                        PyTorch currently ships CUDA wheels for Python 3.9–3.13. Reinstall the
+                        app with a supported Python on PATH (e.g. 3.12 or 3.13) and try again.
+                      </>
+                    ) : (
+                      <>
+                        <strong>Install failed:</strong>{" "}
+                        <span className="mono-text">{cudaTorchResult.message}</span>
+                      </>
+                    )}
+                  </>
+                ) : installingCudaTorch ? (
+                  <>
+                    {" "}
+                    <em>Downloading ~2.5 GB CUDA wheel into the app's bundled Python — this can take several minutes.</em>
+                  </>
+                ) : null}
+              </span>
+              <button
+                className="primary-button"
+                type="button"
+                disabled={installingCudaTorch || cudaTorchResult?.ok === true}
+                onClick={() => void handleInstallCudaTorch()}
+              >
+                {installingCudaTorch
+                  ? "Installing CUDA torch..."
+                  : cudaTorchResult?.ok
+                    ? "Installed"
+                    : "Install CUDA torch"}
+              </button>
+              <button className="secondary-button" type="button" onClick={gpuStatus.dismiss}>
+                Dismiss
+              </button>
+            </div>
+          ) : null}
           {error ? (
             <div className="notice-banner error-banner">
               <span>{error}</span>
@@ -1649,6 +1869,15 @@ export default function App() {
           ) : null}
         </div>
 
+        {sidebarPrefs.mode === "tabs" ? (
+          <SubtabBar
+            activeTab={activeTab}
+            onTabChange={(tabId) => { setActiveTab(tabId); setError(null); }}
+            platform={workspace.system.platform}
+            onRememberLastChild={sidebarPrefs.rememberLastChild}
+          />
+        ) : null}
+
         <div className="workspace-content-frame">
           {loading ? (
             <div className="loading-state">Loading workspace state...</div>
@@ -1699,6 +1928,19 @@ export default function App() {
         onRevealPath={(path) => void handleRevealPath(path)}
         onDeleteArtifact={(id) => void imgState.handleDeleteImageArtifact(id)}
       />
+      <VideoGenerationModal
+        showVideoGenerationModal={videoState.showVideoGenerationModal}
+        videoBusy={videoState.videoBusy}
+        videoGenerationStartedAt={videoState.videoGenerationStartedAt}
+        videoGenerationError={videoState.videoGenerationError}
+        videoGenerationArtifact={videoState.videoGenerationArtifact}
+        videoGenerationRunInfo={videoState.videoGenerationRunInfo}
+        selectedVideoVariant={videoState.selectedVideoVariant}
+        onShowVideoGenerationModalChange={videoState.setShowVideoGenerationModal}
+        onActiveTabChange={setActiveTab}
+        onRevealPath={(path) => void handleRevealPath(path)}
+        onDeleteArtifact={(id) => void videoState.handleDeleteVideoOutput(id)}
+      />
       {(() => {
         if (!detailFamilyId) return null;
         const family = workspace.featuredModels.find((f) => f.id === detailFamilyId);
diff --git a/src/api.ts b/src/api.ts
index 20eed2b..120ec0d 100644
--- a/src/api.ts
+++ b/src/api.ts
@@ -7,6 +7,7 @@ import type {
   ConvertModelPayload,
   ConvertModelResponse,
   CreateSessionResponse,
+  GenerationProgressSnapshot,
   GeneratePayload,
   GenerateResponse,
   HubFileListResponse,
@@ -25,6 +26,11 @@ import type {
   TauriBackendInfo,
   UpdateSettingsPayload,
   UpdateSessionPayload,
+  VideoCatalogResponse,
+  VideoGenerationPayload,
+  VideoGenerationResponse,
+  VideoOutputArtifact,
+  VideoRuntimeStatus,
   WorkspaceData,
 } from "./types";
 
@@ -261,6 +267,20 @@ export async function checkBackend(): Promise<boolean> {
   }
 }
 
+export interface GpuStatus {
+  platform: string;
+  nvidiaGpuDetected: boolean;
+  torchImported: boolean;
+  torchCudaAvailable: boolean;
+  torchMpsAvailable: boolean;
+  cpuFallbackWarning: boolean;
+  recommendation: string | null;
+}
+
+export async function getGpuStatus(): Promise<GpuStatus> {
+  return await fetchJson<GpuStatus>("/api/system/gpu-status", 15000, { includeAuth: false });
+}
+
 export async function getSettings(): Promise<AppSettings> {
   const result = await fetchJson<{ settings: AppSettings }>("/api/settings");
   return result.settings;
@@ -305,6 +325,43 @@ export async function getImageRuntime(): Promise<ImageRuntimeStatus> {
   return result.runtime;
 }
 
+/**
+ * Polled by ImageGenerationModal while the bar is visible to override the
+ * client-side phase estimates with the runtime's actual phase / step count.
+ * Short timeout — if the backend is busy with the generation it can still
+ * answer this lightweight read in well under a second.
+ */
+export async function getImageGenerationProgress(): Promise<GenerationProgressSnapshot> {
+  const result = await fetchJson<{ progress: GenerationProgressSnapshot }>(
+    "/api/images/progress",
+    5000,
+  );
+  return result.progress;
+}
+
+export async function getVideoCatalog(): Promise<VideoCatalogResponse> {
+  return await fetchJson<VideoCatalogResponse>("/api/video/catalog", 25000);
+}
+
+export async function getVideoRuntime(): Promise<VideoRuntimeStatus> {
+  // 30s rather than the 15s default — the first call of a sidecar's life
+  // imports torch and (on Windows/Linux) shells out to nvidia-smi, both of
+  // which can take several seconds on cold disks. Backend caches the VRAM
+  // total after the first probe so subsequent calls are fast, but the
+  // initial one needs the headroom.
+  const result = await fetchJson<{ runtime: VideoRuntimeStatus }>("/api/video/runtime", 30000);
+  return result.runtime;
+}
+
+/** Mirror of ``getImageGenerationProgress`` for the video runtime. */
+export async function getVideoGenerationProgress(): Promise<GenerationProgressSnapshot> {
+  const result = await fetchJson<{ progress: GenerationProgressSnapshot }>(
+    "/api/video/progress",
+    5000,
+  );
+  return result.progress;
+}
+
 export async function getCachePreview(options: {
   bits: number;
   fp16Layers: number;
@@ -596,6 +653,79 @@ export async function unloadImageModel(modelId?: string): Promise<ImageRuntimeSt
   return result.runtime;
 }
 
+export async function downloadVideoModel(repo: string): Promise<DownloadStatus> {
+  const result = await postJson<{ download: DownloadStatus }>("/api/video/download", { repo });
+  return result.download;
+}
+
+export async function getVideoDownloadStatus(): Promise<DownloadStatus[]> {
+  const result = await fetchJson<{ downloads: DownloadStatus[] }>("/api/video/download/status");
+  return result.downloads;
+}
+
+export async function cancelVideoDownload(repo: string): Promise<DownloadStatus> {
+  const result = await postJson<{ download: DownloadStatus }>("/api/video/download/cancel", { repo });
+  return result.download;
+}
+
+export async function deleteVideoDownload(repo: string): Promise<DeleteDownloadResult> {
+  const result = await postJson<{ result: DeleteDownloadResult }>("/api/video/download/delete", { repo });
+  return result.result;
+}
+
+export async function preloadVideoModel(modelId: string): Promise<VideoRuntimeStatus> {
+  const result = await postJson<{ runtime: VideoRuntimeStatus }>("/api/video/preload", { modelId }, null);
+  return result.runtime;
+}
+
+export async function unloadVideoModel(modelId?: string): Promise<VideoRuntimeStatus> {
+  const result = await postJson<{ runtime: VideoRuntimeStatus }>(
+    "/api/video/unload",
+    modelId ? { modelId } : undefined,
+  );
+  return result.runtime;
+}
+
+export async function generateVideo(payload: VideoGenerationPayload): Promise<VideoGenerationResponse> {
+  // No client timeout — video generation legitimately takes minutes on consumer hardware.
+  return await postJson<VideoGenerationResponse>("/api/video/generate", payload, null);
+}
+
+export async function getVideoOutputs(): Promise<VideoOutputArtifact[]> {
+  const result = await fetchJson<{ outputs: VideoOutputArtifact[] }>("/api/video/outputs");
+  return result.outputs;
+}
+
+export async function deleteVideoOutput(
+  artifactId: string,
+): Promise<{ deleted: string; outputs: VideoOutputArtifact[] }> {
+  return await deleteJson<{ deleted: string; outputs: VideoOutputArtifact[] }>(
+    `/api/video/outputs/${encodeURIComponent(artifactId)}`,
+  );
+}
+
+/**
+ * Fetch a saved mp4 as a blob URL that an HTML5 <video> element can play.
+ *
+ * The backend auth middleware only reads the token from the ``Authorization``
+ * or ``x-chaosengine-token`` headers, so we can't just point a <video src> at
+ * the file endpoint directly. Fetching the bytes ourselves and handing back
+ * an object URL keeps auth clean and works even for clips > 25MB. Callers
+ * are responsible for calling ``URL.revokeObjectURL`` when the component
+ * unmounts.
+ */
+export async function fetchVideoOutputBlobUrl(artifactId: string): Promise<string> {
+  const response = await apiFetch(
+    `/api/video/outputs/${encodeURIComponent(artifactId)}/file`,
+    { method: "GET" },
+  );
+  if (!response.ok) {
+    throw new Error(`Failed to load video (${response.status} ${response.statusText})`);
+  }
+  const blob = await response.blob();
+  return URL.createObjectURL(blob);
+}
+
 export async function generateImage(payload: ImageGenerationPayload): Promise<ImageGenerationResponse> {
   return await postJson<ImageGenerationResponse>("/api/images/generate", payload, null);
 }
@@ -656,6 +786,30 @@ export async function installSystemPackage(packageName: string): Promise<Install
   return await postJson<InstallResult>("/api/setup/install-system-package", { package: packageName }, 660000);
 }
 
+export interface CudaTorchInstallAttempt {
+  indexUrl: string;
+  ok: boolean;
+  output: string;
+}
+
+export interface CudaTorchInstallResult {
+  ok: boolean;
+  output: string;
+  indexUrl: string | null;
+  attempts: CudaTorchInstallAttempt[];
+  requiresRestart: boolean;
+  pythonExecutable: string;
+  pythonVersion: string | null;
+  noWheelForPython: boolean;
+  capabilities: Record<string, unknown>;
+}
+
+export async function installCudaTorch(): Promise<CudaTorchInstallResult> {
+  // 15 minute timeout — torch CUDA wheels are ~2.5 GB, and the endpoint
+  // walks up to four CUDA indexes before giving up.
+  return await postJson<CudaTorchInstallResult>("/api/setup/install-cuda-torch", {}, 900000);
+}
+
 export interface TurboUpdateInfo {
   installed: boolean;
   installedCommit: string | null;
diff --git a/src/components/ImageGenerationModal.tsx b/src/components/ImageGenerationModal.tsx
index 5722801..dcd1456 100644
--- a/src/components/ImageGenerationModal.tsx
+++ b/src/components/ImageGenerationModal.tsx
@@ -1,5 +1,6 @@
 import { LiveProgress, type LiveProgressPhase } from "./LiveProgress";
 import { number, findImageVariantById, formatImageTimestamp, formatImageAccessError, isGatedImageAccessError } from "../utils";
+import { useGenerationProgress } from "../hooks/useGenerationProgress";
 import type { ImageModelFamily, ImageModelVariant, ImageOutputArtifact, TabId } from "../types";
 
 export interface ImageGenerationRunInfo {
@@ -51,6 +52,11 @@ export function ImageGenerationModal({
   onRevealPath,
   onDeleteArtifact,
 }: ImageGenerationModalProps) {
+  // Hook ordering rule: react requires hooks to run in the same order on every
+  // render, so we always invoke ``useGenerationProgress`` even when the modal
+  // is hidden. The hook itself short-circuits when ``active`` is false.
+  const realProgress = useGenerationProgress("image", imageBusy && Boolean(imageGenerationStartedAt));
+
   if (!showImageGenerationModal) {
     return null;
   }
@@ -62,21 +68,26 @@ export function ImageGenerationModal({
   const activeArtifactNeedsGatedAccess = isGatedImageAccessError(activeArtifact?.runtimeNote);
   const steps = runInfo?.steps ?? imageSteps;
   const batch = runInfo?.batchSize ?? 1;
-  // Generous time estimates so the progress bar doesn't outrun the actual generation.
-  // Diffusion is the bottleneck: ~2-6s per step depending on model size and hardware.
+  // Estimates are now a *fallback* — when the backend tracker is publishing
+  // step counts, LiveProgress drives the bar from the real step / total. The
+  // estimates still anchor the per-phase widths for the proportional fill so
+  // diffusion doesn't visually consume the whole bar in one go.
   const diffuseEstimate = Math.max(30, Math.round(steps * 3 * batch));
+  // Phase IDs are the same strings the backend ProgressTracker publishes
+  // (loading / encoding / diffusing / decoding / saving) so LiveProgress can
+  // match them up with the real-time signal. Keep the labels human-friendly.
   const imagePhases: LiveProgressPhase[] = [
     ...(runInfo?.needsPipelineLoad
-      ? [{ id: "load", label: "Loading model into memory", estimatedSeconds: 30 }]
+      ? [{ id: "loading", label: "Loading model into memory", estimatedSeconds: 30 }]
       : []),
-    { id: "prompt", label: "Encoding prompt", estimatedSeconds: 5 },
+    { id: "encoding", label: "Encoding prompt", estimatedSeconds: 5 },
     {
-      id: "diffuse",
+      id: "diffusing",
       label: `Diffusing ${batch} image${batch > 1 ? "s" : ""}`,
       estimatedSeconds: diffuseEstimate,
     },
-    { id: "decode", label: "Decoding pixels", estimatedSeconds: 8 },
-    { id: "save", label: "Saving to output gallery", estimatedSeconds: 3 },
+    { id: "decoding", label: "Decoding pixels", estimatedSeconds: 8 },
+    { id: "saving", label: "Saving to output gallery", estimatedSeconds: 3 },
   ];
 
   return (
@@ -106,6 +117,7 @@ export function ImageGenerationModal({
               startedAt={imageGenerationStartedAt}
               accent="image"
               phases={imagePhases}
+              realProgress={realProgress}
             />
           ) : imageGenerationError ? (
             <div className="callout error">
diff --git a/src/components/LatestImageDiscoverCard.tsx b/src/components/LatestImageDiscoverCard.tsx
index 9aadbc1..adf5f08 100644
--- a/src/components/LatestImageDiscoverCard.tsx
+++ b/src/components/LatestImageDiscoverCard.tsx
@@ -5,6 +5,7 @@ import {
   imageSecondarySizeLabel,
   formatImageLicenseLabel,
   formatImageAccessError,
+  formatReleaseLabel,
   isGatedImageAccessError,
 } from "../utils/format";
 import { downloadProgressLabel, downloadSizeTooltip } from "../utils/downloads";
@@ -12,21 +13,25 @@ import { downloadProgressLabel, downloadSizeTooltip } from "../utils/downloads";
 export interface LatestImageDiscoverCardProps {
   variant: ImageModelVariant;
   downloadState?: DownloadStatus;
+  fileRevealLabel?: string;
   onDownload: (repo: string) => void;
   onCancelDownload: (repo: string) => void;
   onDeleteDownload: (repo: string) => void;
   onOpenExternalUrl: (url: string) => void;
   onNavigateSettings: () => void;
+  onRevealPath?: (path: string) => void;
 }
 
 export function LatestImageDiscoverCard({
   variant,
   downloadState,
+  fileRevealLabel,
   onDownload,
   onCancelDownload,
   onDeleteDownload,
   onOpenExternalUrl,
   onNavigateSettings,
+  onRevealPath,
 }: LatestImageDiscoverCardProps) {
   const isDownloadPaused = downloadState?.state === "cancelled";
   const isDownloadComplete = downloadState?.state === "completed";
@@ -59,6 +64,9 @@ export function LatestImageDiscoverCard({
       </div>
 
       <div className="image-family-meta">
+        {formatReleaseLabel(variant.releaseLabel, variant.releaseDate ?? variant.createdAt) ? (
+          <span>{formatReleaseLabel(variant.releaseLabel, variant.releaseDate ?? variant.createdAt)}</span>
+        ) : null}
         {variant.downloadsLabel ? <span>{variant.downloadsLabel}</span> : null}
         {variant.likesLabel ? <span>{variant.likesLabel}</span> : null}
         {variant.license ? <span>{formatImageLicenseLabel(variant.license)}</span> : null}
@@ -142,6 +150,20 @@ export function LatestImageDiscoverCard({
             ) : null}
           </>
         )}
+        {variant.localPath && onRevealPath ? (
+          <button
+            className="secondary-button icon-button"
+            type="button"
+            title={fileRevealLabel ?? "Show in folder"}
+            onClick={() => onRevealPath(variant.localPath as string)}
+          >
+            <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
+              <path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6" />
+              <polyline points="15 3 21 3 21 9" />
+              <line x1="10" y1="14" x2="21" y2="3" />
+            </svg>
+          </button>
+        ) : null}
         <button className="secondary-button icon-link-button" type="button" onClick={() => onOpenExternalUrl(variant.link)}>
           Hugging Face <svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"><path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"/><polyline points="15 3 21 3 21 9"/><line x1="10" y1="14" x2="21" y2="3"/></svg>
         </button>
diff --git a/src/components/LiveProgress.tsx b/src/components/LiveProgress.tsx
index 07fff08..9f2f386 100644
--- a/src/components/LiveProgress.tsx
+++ b/src/components/LiveProgress.tsx
@@ -1,4 +1,5 @@
 import { useEffect, useState } from "react";
+import type { GenerationProgressSnapshot } from "../types";
 
 export interface LiveProgressPhase {
   id: string;
@@ -12,6 +13,16 @@ interface LiveProgressProps {
   phases: LiveProgressPhase[];
   startedAt: number;
   accent?: "convert" | "benchmark" | "image";
+  /**
+   * Optional real-time progress snapshot from the backend tracker. When
+   * present and ``active=true`` it overrides the time-based estimates: the
+   * active phase is whichever phase ID matches ``snapshot.phase``, and the
+   * fill percentage is driven by ``step / totalSteps`` during diffusion.
+   *
+   * Pass ``null`` (or omit) to keep the legacy pure-estimate behaviour —
+   * benchmark and convert modals don't have a backend signal yet.
+   */
+  realProgress?: GenerationProgressSnapshot | null;
 }
 
 function formatElapsed(seconds: number): string {
@@ -20,7 +31,14 @@ function formatElapsed(seconds: number): string {
   return `${m.toString().padStart(2, "0")}:${s.toString().padStart(2, "0")}`;
 }
 
-export function LiveProgress({ title, subtitle, phases, startedAt, accent = "benchmark" }: LiveProgressProps) {
+export function LiveProgress({
+  title,
+  subtitle,
+  phases,
+  startedAt,
+  accent = "benchmark",
+  realProgress,
+}: LiveProgressProps) {
   const [now, setNow] = useState(() => Date.now());
 
   useEffect(() => {
@@ -31,19 +49,55 @@ export function LiveProgress({ title, subtitle, phases, startedAt, accent = "ben
   const elapsedSec = Math.max(0, (now - startedAt) / 1000);
   const totalEstimated = phases.reduce((acc, p) => acc + p.estimatedSeconds, 0);
 
-  // Determine current phase index from cumulative seconds
-  let cumulative = 0;
-  let activeIndex = phases.length - 1;
-  for (let i = 0; i < phases.length; i++) {
-    if (elapsedSec < cumulative + phases[i].estimatedSeconds) {
-      activeIndex = i;
-      break;
+  // Determine current phase index. When the backend has reported a real
+  // phase ID, prefer that — it's accurate and the user sees the bar pin to
+  // exactly the step they're on. Otherwise fall back to time-based estimates.
+  const realPhase = realProgress?.active ? realProgress.phase : null;
+  let activeIndex: number;
+  if (realPhase) {
+    const matched = phases.findIndex((phase) => phase.id === realPhase);
+    activeIndex = matched >= 0 ? matched : 0;
+  } else {
+    let cumulative = 0;
+    activeIndex = phases.length - 1;
+    for (let i = 0; i < phases.length; i++) {
+      if (elapsedSec < cumulative + phases[i].estimatedSeconds) {
+        activeIndex = i;
+        break;
+      }
+      cumulative += phases[i].estimatedSeconds;
     }
-    cumulative += phases[i].estimatedSeconds;
   }
-  const phaseElapsed = Math.max(0, elapsedSec - cumulative);
-  const overrunning = elapsedSec >= totalEstimated;
-  const fillPct = Math.min(100, (elapsedSec / totalEstimated) * 100);
+
+  // Real diffusion progress (step counter from diffusers' callback_on_step_end)
+  // is the strongest signal we have. Use it to compute fillPct directly so the
+  // bar moves in lockstep with the model. Outside of the diffusion phase, fall
+  // back to the time-based estimate so we still get *some* movement.
+  const realStepFraction =
+    realProgress?.active && realProgress.phase === "diffusing" && realProgress.totalSteps > 0
+      ? Math.min(1, realProgress.step / realProgress.totalSteps)
+      : null;
+
+  let fillPct: number;
+  let overrunning: boolean;
+  if (realStepFraction !== null) {
+    // Distribute fill across the phases proportionally to their estimates so
+    // diffusion fills its own slice of the bar rather than stretching to 100%.
+    const cumulativeBefore = phases.slice(0, activeIndex).reduce((acc, p) => acc + p.estimatedSeconds, 0);
+    const phaseShare = phases[activeIndex]?.estimatedSeconds ?? 0;
+    const filled = cumulativeBefore + phaseShare * realStepFraction;
+    fillPct = Math.min(100, (filled / Math.max(1, totalEstimated)) * 100);
+    overrunning = false;
+  } else {
+    overrunning = elapsedSec >= totalEstimated;
+    fillPct = Math.min(100, (elapsedSec / Math.max(1, totalEstimated)) * 100);
+  }
+
+  // Cumulative time consumed by phases that come *before* the active one,
+  // used by the legacy "X.Xs / ~Ys" label on the active phase. With a real
+  // signal driving activeIndex this is meaningless, so we hide that label.
+  const cumulativeBeforeActive = phases.slice(0, activeIndex).reduce((acc, p) => acc + p.estimatedSeconds, 0);
+  const phaseElapsed = Math.max(0, elapsedSec - cumulativeBeforeActive);
 
   return (
     <div className={`live-progress live-progress--${accent}`}>
@@ -101,14 +155,29 @@ export function LiveProgress({ title, subtitle, phases, startedAt, accent = "ben
       <div className="live-progress__phases">
         {phases.map((phase, i) => {
           const state = i < activeIndex ? "done" : i === activeIndex ? "active" : "pending";
+          const showLiveStepLabel =
+            state === "active"
+            && realProgress?.active
+            && realProgress.phase === phase.id
+            && realProgress.totalSteps > 0;
           return (
             <div key={phase.id} className={`live-progress__phase live-progress__phase--${state}`}>
               <span className="live-progress__phase-dot" />
               <span className="live-progress__phase-label">{phase.label}</span>
               {state === "active" ? (
-                <span className="live-progress__phase-elapsed">
-                  {overrunning ? "finalizing…" : `${phaseElapsed.toFixed(1)}s / ~${phase.estimatedSeconds}s`}
-                </span>
+                showLiveStepLabel ? (
+                  <span className="live-progress__phase-elapsed">
+                    step {realProgress!.step} / {realProgress!.totalSteps}
+                  </span>
+                ) : realPhase ? (
+                  <span className="live-progress__phase-elapsed">
+                    {realProgress?.message || "in progress…"}
+                  </span>
+                ) : (
+                  <span className="live-progress__phase-elapsed">
+                    {overrunning ? "finalizing…" : `${phaseElapsed.toFixed(1)}s / ~${phase.estimatedSeconds}s`}
+                  </span>
+                )
               ) : null}
               {state === "done" ? <span className="live-progress__phase-check">✓</span> : null}
             </div>
diff --git a/src/components/RuntimeControls.tsx b/src/components/RuntimeControls.tsx
index 49f4eb6..2cabfe8 100644
--- a/src/components/RuntimeControls.tsx
+++ b/src/components/RuntimeControls.tsx
@@ -512,7 +512,7 @@ export function RuntimeControls({
             {!dflashInstalled ? (
               <div className="cache-strategy-install">
                 <span className="cache-strategy-meta-label">Install:</span>
-                <code>./.venv/bin/python3 -m pip install dflash-mlx</code>
+                <code>./.venv/bin/python3 -m pip install "dflash-mlx @ git+https://github.com/bstnxbt/dflash-mlx.git@f825ffb268e50d531e8b6524413b0847334a14dd"</code>
               </div>
             ) : null}
           </div>
diff --git a/src/components/Sidebar.tsx b/src/components/Sidebar.tsx
new file mode 100644
index 0000000..c848548
--- /dev/null
+++ b/src/components/Sidebar.tsx
@@ -0,0 +1,211 @@
+import { useMemo } from "react";
+import type { SidebarGroupId, SidebarMode, TabId } from "../types";
+import type { TabConfig } from "../constants";
+import { sidebarGroups, tabs as allTabs } from "../constants";
+import { ChevronIcon, groupIcon, standaloneTabIcon } from "./icons/SidebarIcons";
+
+interface SidebarProps {
+  activeTab: TabId;
+  onTabChange: (tabId: TabId) => void;
+  platform?: string;
+  appVersion?: string;
+  backendOnline: boolean;
+  engineLabel: string;
+  loadedModelName?: string | null;
+  mode: SidebarMode;
+  collapsedGroups: Set<SidebarGroupId>;
+  onToggleGroupCollapsed: (group: SidebarGroupId) => void;
+  lastChildByGroup: Partial<Record<SidebarGroupId, string>>;
+  onRememberLastChild: (group: SidebarGroupId, tabId: string) => void;
+}
+
+type SidebarItem =
+  | { kind: "tab"; tab: TabConfig }
+  | { kind: "group"; id: SidebarGroupId; label: string; children: TabConfig[] };
+
+function buildItems(visibleTabs: TabConfig[]): SidebarItem[] {
+  const items: SidebarItem[] = [];
+  const groupsSeen = new Set<SidebarGroupId>();
+
+  for (const tab of visibleTabs) {
+    if (!tab.group) {
+      items.push({ kind: "tab", tab });
+      continue;
+    }
+    if (groupsSeen.has(tab.group)) continue;
+    groupsSeen.add(tab.group);
+
+    const groupDef = sidebarGroups.find((g) => g.id === tab.group);
+    if (!groupDef) {
+      items.push({ kind: "tab", tab });
+      continue;
+    }
+    const children = visibleTabs.filter((t) => t.group === tab.group);
+    items.push({ kind: "group", id: groupDef.id, label: groupDef.label, children });
+  }
+
+  return items;
+}
+
+function resolveGroupTarget(
+  groupId: SidebarGroupId,
+  children: TabConfig[],
+  lastChildByGroup: Partial<Record<SidebarGroupId, string>>,
+): TabId | null {
+  const remembered = lastChildByGroup[groupId];
+  if (remembered) {
+    const match = children.find((c) => c.id === remembered);
+    if (match) return match.id;
+  }
+  const groupDef = sidebarGroups.find((g) => g.id === groupId);
+  if (groupDef) {
+    const defaultChild = children.find((c) => c.id === groupDef.defaultChild);
+    if (defaultChild) return defaultChild.id;
+  }
+  return children[0]?.id ?? null;
+}
+
+export function Sidebar({
+  activeTab,
+  onTabChange,
+  platform,
+  appVersion,
+  backendOnline,
+  engineLabel,
+  loadedModelName,
+  mode,
+  collapsedGroups,
+  onToggleGroupCollapsed,
+  lastChildByGroup,
+  onRememberLastChild,
+}: SidebarProps) {
+  const visibleTabs = useMemo(
+    () =>
+      allTabs.filter((tab) => {
+        if (tab.id === "conversion" && platform && platform !== "Darwin") return false;
+        return true;
+      }),
+    [platform],
+  );
+
+  const items = useMemo(() => buildItems(visibleTabs), [visibleTabs]);
+  const activeGroup = visibleTabs.find((t) => t.id === activeTab)?.group;
+
+  function handleTabClick(tab: TabConfig) {
+    if (tab.group) onRememberLastChild(tab.group, tab.id);
+    onTabChange(tab.id);
+  }
+
+  function handleGroupTabsClick(groupId: SidebarGroupId, children: TabConfig[]) {
+    const target = resolveGroupTarget(groupId, children, lastChildByGroup);
+    if (target) {
+      onRememberLastChild(groupId, target);
+      onTabChange(target);
+    }
+  }
+
+  return (
+    <aside className="sidebar">
+      <div className="brand-block">
+        <div className="brand-title-row">
+          <img src="/logo.svg" alt="ChaosEngineAI" className="brand-logo" />
+          <h1>ChaosEngineAI</h1>
+        </div>
+        <span className="brand-kicker">
+          Local AI model runner
+          {appVersion ? ` · v${appVersion}` : ""}
+        </span>
+      </div>
+
+      <nav className="nav-list" aria-label="Primary">
+        {items.map((item) => {
+          if (item.kind === "tab") {
+            const Icon = standaloneTabIcon[item.tab.id];
+            const isActive = activeTab === item.tab.id;
+            return (
+              <button
+                key={item.tab.id}
+                className={isActive ? "nav-button active" : "nav-button"}
+                type="button"
+                onClick={() => handleTabClick(item.tab)}
+              >
+                {Icon ? <Icon className="nav-icon" /> : null}
+                <span className="nav-label">
+                  <strong>{item.tab.label}</strong>
+                  <span>{item.tab.caption}</span>
+                </span>
+              </button>
+            );
+          }
+
+          const GroupIcon = groupIcon[item.id];
+          const hasActiveChild = activeGroup === item.id;
+
+          // Tabs mode: group header is a single nav button that navigates to defaultChild / lastChild
+          if (mode === "tabs") {
+            return (
+              <button
+                key={item.id}
+                type="button"
+                className={hasActiveChild ? "nav-button active" : "nav-button"}
+                onClick={() => handleGroupTabsClick(item.id, item.children)}
+              >
+                {GroupIcon ? <GroupIcon className="nav-icon" /> : null}
+                <span className="nav-label">
+                  <strong>{item.label}</strong>
+                  <span>{item.children.length} tabs</span>
+                </span>
+              </button>
+            );
+          }
+
+          // Collapsible mode: group header toggles; children render inline
+          const userCollapsed = collapsedGroups.has(item.id);
+          const isOpen = hasActiveChild || !userCollapsed;
+
+          return (
+            <div key={item.id} className={`nav-group ${isOpen ? "open" : "closed"}`}>
+              <button
+                type="button"
+                className="nav-group-header"
+                aria-expanded={isOpen}
+                aria-controls={`nav-group-${item.id}`}
+                onClick={() => onToggleGroupCollapsed(item.id)}
+              >
+                {GroupIcon ? <GroupIcon className="nav-icon" /> : null}
+                <span className="nav-group-label">{item.label}</span>
+                <ChevronIcon open={isOpen} className="nav-group-chevron" />
+              </button>
+              {isOpen ? (
+                <div className="nav-group-children" id={`nav-group-${item.id}`} role="group">
+                  {item.children.map((child) => {
+                    const isActive = activeTab === child.id;
+                    return (
+                      <button
+                        key={child.id}
+                        className={isActive ? "nav-child-button active" : "nav-child-button"}
+                        type="button"
+                        onClick={() => handleTabClick(child)}
+                      >
+                        <span className="nav-child-dot" aria-hidden />
+                        <span className="nav-child-label">{child.shortLabel ?? child.label}</span>
+                      </button>
+                    );
+                  })}
+                </div>
+              ) : null}
+            </div>
+          );
+        })}
+      </nav>
+
+      <div className="sidebar-footer">
+        <span className={`badge ${backendOnline ? "success" : "warning"}`}>
+          {backendOnline ? "Backend online" : "Offline"}
+        </span>
+        <p>{engineLabel}</p>
+        <small>{loadedModelName ?? "No model loaded"}</small>
+      </div>
+    </aside>
+  );
+}
diff --git a/src/components/SubtabBar.tsx b/src/components/SubtabBar.tsx
new file mode 100644
index 0000000..ba38645
--- /dev/null
+++ b/src/components/SubtabBar.tsx
@@ -0,0 +1,53 @@
+import type { SidebarGroupId, TabId } from "../types";
+import type { TabConfig } from "../constants";
+import { sidebarGroups, tabs as allTabs } from "../constants";
+
+interface SubtabBarProps {
+  activeTab: TabId;
+  onTabChange: (tabId: TabId) => void;
+  platform?: string;
+  onRememberLastChild: (group: SidebarGroupId, tabId: string) => void;
+}
+
+export function SubtabBar({ activeTab, onTabChange, platform, onRememberLastChild }: SubtabBarProps) {
+  const activeTabConfig = allTabs.find((t) => t.id === activeTab);
+  const groupId = activeTabConfig?.group;
+  if (!groupId) return null;
+
+  const groupDef = sidebarGroups.find((g) => g.id === groupId);
+  if (!groupDef) return null;
+
+  const children: TabConfig[] = allTabs.filter((t) => {
+    if (t.group !== groupId) return false;
+    if (t.id === "conversion" && platform && platform !== "Darwin") return false;
+    return true;
+  });
+
+  if (children.length <= 1) return null;
+
+  function handleClick(child: TabConfig) {
+    if (!groupId) return;
+    onRememberLastChild(groupId, child.id);
+    onTabChange(child.id);
+  }
+
+  return (
+    <div className="subtab-bar" role="tablist" aria-label={`${groupDef.label} tabs`}>
+      {children.map((child) => {
+        const isActive = activeTab === child.id;
+        return (
+          <button
+            key={child.id}
+            type="button"
+            role="tab"
+            aria-selected={isActive}
+            className={isActive ? "subtab active" : "subtab"}
+            onClick={() => handleClick(child)}
+          >
+            {child.shortLabel ?? child.label}
+          </button>
+        );
+      })}
+    </div>
+  );
+}
diff --git a/src/components/VideoGenerationModal.tsx b/src/components/VideoGenerationModal.tsx
new file mode 100644
index 0000000..f07d463
--- /dev/null
+++ b/src/components/VideoGenerationModal.tsx
@@ -0,0 +1,264 @@
+import { useEffect, useRef, useState } from "react";
+import { LiveProgress, type LiveProgressPhase } from "./LiveProgress";
+import { fetchVideoOutputBlobUrl } from "../api";
+import { useGenerationProgress } from "../hooks/useGenerationProgress";
+import { number, formatImageTimestamp } from "../utils";
+import type { TabId, VideoModelVariant, VideoOutputArtifact } from "../types";
+
+export interface VideoGenerationRunInfo {
+  modelName: string;
+  prompt: string;
+  numFrames: number;
+  fps: number;
+  steps: number;
+  needsPipelineLoad: boolean;
+}
+
+export interface VideoGenerationModalProps {
+  showVideoGenerationModal: boolean;
+  videoBusy: boolean;
+  videoGenerationStartedAt: number | null;
+  videoGenerationError: string | null;
+  videoGenerationArtifact: VideoOutputArtifact | null;
+  videoGenerationRunInfo: VideoGenerationRunInfo | null;
+  selectedVideoVariant: VideoModelVariant | null;
+  onShowVideoGenerationModalChange: (show: boolean) => void;
+  onActiveTabChange: (tab: TabId) => void;
+  onRevealPath: (path: string) => void;
+  onDeleteArtifact: (artifactId: string) => void;
+}
+
+/**
+ * Mirrors ImageGenerationModal: shows a LiveProgress bar driven by the
+ * backend ProgressTracker while the diffusers pipeline runs, then swaps to
+ * the rendered clip the moment the response lands.
+ *
+ * Video generation is the longest single operation in the app — easily
+ * 60-300s on consumer hardware — so the progress bar matters more here than
+ * in any other surface. The phases mirror the backend's ProgressTracker
+ * states (loading / encoding / diffusing / decoding / saving) so the
+ * realProgress signal can drive the bar in real time.
+ */
+export function VideoGenerationModal({
+  showVideoGenerationModal,
+  videoBusy,
+  videoGenerationStartedAt,
+  videoGenerationError,
+  videoGenerationArtifact,
+  videoGenerationRunInfo,
+  selectedVideoVariant,
+  onShowVideoGenerationModalChange,
+  onActiveTabChange,
+  onRevealPath,
+  onDeleteArtifact,
+}: VideoGenerationModalProps) {
+  // Hook ordering rule: invoke even when hidden so React's render order
+  // stays consistent across mounts/unmounts.
+  const realProgress = useGenerationProgress("video", videoBusy && Boolean(videoGenerationStartedAt));
+  const [videoUrl, setVideoUrl] = useState<string | null>(null);
+  const [videoLoadError, setVideoLoadError] = useState<string | null>(null);
+  const activeUrlRef = useRef<string | null>(null);
+
+  // Fetch the mp4 blob whenever a fresh artifact lands. We can't point
+  // <video src> directly at the auth-protected /file endpoint — the tag
+  // doesn't send custom headers — so we mirror VideoGalleryTab's blob trick.
+  useEffect(() => {
+    if (!videoGenerationArtifact) {
+      setVideoUrl(null);
+      setVideoLoadError(null);
+      if (activeUrlRef.current) {
+        URL.revokeObjectURL(activeUrlRef.current);
+        activeUrlRef.current = null;
+      }
+      return;
+    }
+    let cancelled = false;
+    setVideoLoadError(null);
+    fetchVideoOutputBlobUrl(videoGenerationArtifact.artifactId)
+      .then((url) => {
+        if (cancelled) {
+          URL.revokeObjectURL(url);
+          return;
+        }
+        if (activeUrlRef.current) URL.revokeObjectURL(activeUrlRef.current);
+        activeUrlRef.current = url;
+        setVideoUrl(url);
+      })
+      .catch((err: unknown) => {
+        if (cancelled) return;
+        setVideoLoadError(err instanceof Error ? err.message : "Could not load video.");
+      });
+    return () => {
+      cancelled = true;
+    };
+  }, [videoGenerationArtifact?.artifactId]);
+
+  // Cleanup the blob URL on unmount so the browser can free the mp4 buffer.
+  useEffect(() => {
+    return () => {
+      if (activeUrlRef.current) {
+        URL.revokeObjectURL(activeUrlRef.current);
+        activeUrlRef.current = null;
+      }
+    };
+  }, []);
+
+  if (!showVideoGenerationModal) {
+    return null;
+  }
+
+  const runInfo = videoGenerationRunInfo;
+  const steps = runInfo?.steps ?? 50;
+  const numFrames = runInfo?.numFrames ?? 0;
+  // Estimates are *fallback* — when the backend tracker publishes step counts,
+  // LiveProgress drives the bar from the real signal. The estimates still set
+  // the proportional widths of each phase block so diffusion doesn't fill the
+  // entire bar in one second when an estimate is way off.
+  // Video diffusion is heavier per-step than image: ~5-12s/step on Apple
+  // Silicon, depending on resolution.
+  const diffuseEstimate = Math.max(60, Math.round(steps * 8));
+  const decodeEstimate = Math.max(15, Math.round(numFrames * 0.4));
+  const videoPhases: LiveProgressPhase[] = [
+    ...(runInfo?.needsPipelineLoad
+      ? [{ id: "loading", label: "Loading model into memory", estimatedSeconds: 60 }]
+      : []),
+    { id: "encoding", label: "Encoding prompt", estimatedSeconds: 8 },
+    {
+      id: "diffusing",
+      label: numFrames > 0 ? `Diffusing ${numFrames} frames` : "Diffusing frames",
+      estimatedSeconds: diffuseEstimate,
+    },
+    { id: "decoding", label: "Encoding mp4", estimatedSeconds: decodeEstimate },
+    { id: "saving", label: "Saving to gallery", estimatedSeconds: 4 },
+  ];
+
+  const artifact = videoGenerationArtifact;
+  const clipSeconds = artifact
+    ? artifact.clipDurationSeconds || artifact.numFrames / Math.max(1, artifact.fps)
+    : 0;
+
+  return (
+    <div className="modal-overlay image-result-modal">
+      <div className="modal-content" onClick={(event) => event.stopPropagation()}>
+        <div className="modal-header">
+          <h3>
+            {videoBusy
+              ? "Generating video"
+              : videoGenerationError
+                ? "Video generation failed"
+                : "Video ready"}
+          </h3>
+          {!videoBusy && !videoGenerationError && artifact ? (
+            <p>
+              {artifact.modelName} · {formatImageTimestamp(artifact.createdAt)}
+            </p>
+          ) : null}
+        </div>
+        <div className="modal-body">
+          {videoBusy && videoGenerationStartedAt ? (
+            <LiveProgress
+              title="Generating video"
+              subtitle={runInfo?.modelName ?? selectedVideoVariant?.name ?? undefined}
+              startedAt={videoGenerationStartedAt}
+              accent="image"
+              phases={videoPhases}
+              realProgress={realProgress}
+            />
+          ) : videoGenerationError ? (
+            <div className="callout error">
+              <h3>Video generation failed</h3>
+              <p>{videoGenerationError}</p>
+              <p className="muted-text">
+                Adjust the prompt, resolution, or frame count, then try again. The gallery keeps any earlier successful clips.
+              </p>
+            </div>
+          ) : artifact ? (
+            <div className="image-generation-result">
+              <div className="image-generation-preview-shell">
+                {videoUrl ? (
+                  <video
+                    className="image-generation-preview"
+                    src={videoUrl}
+                    controls
+                    loop
+                    muted
+                    playsInline
+                    autoPlay
+                    preload="metadata"
+                  />
+                ) : (
+                  <div
+                    className="image-generation-preview"
+                    style={{ display: "grid", placeItems: "center", minHeight: 220 }}
+                  >
+                    <span className="muted-text">{videoLoadError ?? "Loading clip..."}</span>
+                  </div>
+                )}
+              </div>
+              <div className="image-generation-info">
+                <div className="chip-row">
+                  <span className="badge success">Saved To Gallery</span>
+                  {artifact.runtimeLabel ? <span className="badge subtle">{artifact.runtimeLabel}</span> : null}
+                </div>
+                <div>
+                  <h3>{artifact.modelName}</h3>
+                  <p className="image-output-prompt">{artifact.prompt}</p>
+                  {artifact.runtimeNote ? <p className="muted-text">{artifact.runtimeNote}</p> : null}
+                </div>
+                <div className="image-output-meta">
+                  <span>{artifact.width} x {artifact.height}</span>
+                  <span>{artifact.numFrames} frames</span>
+                  <span>{artifact.fps} fps</span>
+                  <span>{number(clipSeconds)}s clip</span>
+                  <span>{artifact.steps} steps</span>
+                  <span>CFG {artifact.guidance}</span>
+                  <span>Seed {artifact.seed}</span>
+                  <span>{number(artifact.durationSeconds)}s render</span>
+                </div>
+                <div className="button-row">
+                  <button
+                    className="secondary-button"
+                    type="button"
+                    onClick={() => {
+                      onShowVideoGenerationModalChange(false);
+                      onActiveTabChange("video-gallery");
+                    }}
+                  >
+                    Open Gallery
+                  </button>
+                  {artifact.videoPath ? (
+                    <button
+                      className="secondary-button"
+                      type="button"
+                      onClick={() => onRevealPath(artifact.videoPath as string)}
+                    >
+                      Reveal File
+                    </button>
+                  ) : null}
+                  <button
+                    className="secondary-button danger-button"
+                    type="button"
+                    onClick={() => onDeleteArtifact(artifact.artifactId)}
+                  >
+                    Delete
+                  </button>
+                </div>
+              </div>
+            </div>
+          ) : null}
+        </div>
+        {!videoBusy ? (
+          <div className="modal-footer">
+            <button
+              className="primary-button"
+              type="button"
+              onClick={() => onShowVideoGenerationModalChange(false)}
+            >
+              {videoGenerationError ? "Close" : "Done"}
+            </button>
+          </div>
+        ) : null}
+      </div>
+    </div>
+  );
+}
diff --git a/src/components/icons/SidebarIcons.tsx b/src/components/icons/SidebarIcons.tsx
new file mode 100644
index 0000000..f1c62c0
--- /dev/null
+++ b/src/components/icons/SidebarIcons.tsx
@@ -0,0 +1,149 @@
+import type { SVGProps } from "react";
+
+const baseProps: SVGProps<SVGSVGElement> = {
+  xmlns: "http://www.w3.org/2000/svg",
+  width: 18,
+  height: 18,
+  viewBox: "0 0 24 24",
+  fill: "none",
+  stroke: "currentColor",
+  strokeWidth: 1.75,
+  strokeLinecap: "round",
+  strokeLinejoin: "round",
+  "aria-hidden": true,
+};
+
+type IconProps = Omit<SVGProps<SVGSVGElement>, "children">;
+
+export function DashboardIcon(props: IconProps) {
+  return (
+    <svg {...baseProps} {...props}>
+      <rect x="3" y="3" width="7" height="9" rx="1" />
+      <rect x="14" y="3" width="7" height="5" rx="1" />
+      <rect x="14" y="12" width="7" height="9" rx="1" />
+      <rect x="3" y="16" width="7" height="5" rx="1" />
+    </svg>
+  );
+}
+
+export function ChatIcon(props: IconProps) {
+  return (
+    <svg {...baseProps} {...props}>
+      <path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z" />
+    </svg>
+  );
+}
+
+export function ModelsIcon(props: IconProps) {
+  return (
+    <svg {...baseProps} {...props}>
+      <path d="M21 16V8a2 2 0 0 0-1-1.73l-7-4a2 2 0 0 0-2 0l-7 4A2 2 0 0 0 3 8v8a2 2 0 0 0 1 1.73l7 4a2 2 0 0 0 2 0l7-4A2 2 0 0 0 21 16z" />
+      <polyline points="3.27 6.96 12 12.01 20.73 6.96" />
+      <line x1="12" y1="22.08" x2="12" y2="12" />
+    </svg>
+  );
+}
+
+export function ImagesIcon(props: IconProps) {
+  return (
+    <svg {...baseProps} {...props}>
+      <rect x="3" y="3" width="18" height="18" rx="2" />
+      <circle cx="8.5" cy="8.5" r="1.5" />
+      <polyline points="21 15 16 10 5 21" />
+    </svg>
+  );
+}
+
+export function VideoIcon(props: IconProps) {
+  return (
+    <svg {...baseProps} {...props}>
+      <rect x="2" y="6" width="14" height="12" rx="2" />
+      <polygon points="22 8 16 12 22 16 22 8" />
+    </svg>
+  );
+}
+
+export function ServerIcon(props: IconProps) {
+  return (
+    <svg {...baseProps} {...props}>
+      <rect x="2" y="3" width="20" height="7" rx="2" />
+      <rect x="2" y="14" width="20" height="7" rx="2" />
+      <line x1="6" y1="6.5" x2="6.01" y2="6.5" />
+      <line x1="6" y1="17.5" x2="6.01" y2="17.5" />
+    </svg>
+  );
+}
+
+export function BenchmarksIcon(props: IconProps) {
+  return (
+    <svg {...baseProps} {...props}>
+      <polyline points="22 12 18 12 15 21 9 3 6 12 2 12" />
+    </svg>
+  );
+}
+
+export function ToolsIcon(props: IconProps) {
+  return (
+    <svg {...baseProps} {...props}>
+      <path d="M14.7 6.3a1 1 0 0 0 0 1.4l1.6 1.6a1 1 0 0 0 1.4 0l3.77-3.77a6 6 0 0 1-7.94 7.94l-6.91 6.91a2.12 2.12 0 0 1-3-3l6.91-6.91a6 6 0 0 1 7.94-7.94l-3.76 3.76z" />
+    </svg>
+  );
+}
+
+export function LogsIcon(props: IconProps) {
+  return (
+    <svg {...baseProps} {...props}>
+      <path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z" />
+      <polyline points="14 2 14 8 20 8" />
+      <line x1="16" y1="13" x2="8" y2="13" />
+      <line x1="16" y1="17" x2="8" y2="17" />
+    </svg>
+  );
+}
+
+export function SettingsIcon(props: IconProps) {
+  return (
+    <svg {...baseProps} {...props}>
+      <circle cx="12" cy="12" r="3" />
+      <path d="M19.4 15a1.65 1.65 0 0 0 .33 1.82l.06.06a2 2 0 1 1-2.83 2.83l-.06-.06a1.65 1.65 0 0 0-1.82-.33 1.65 1.65 0 0 0-1 1.51V21a2 2 0 0 1-4 0v-.09a1.65 1.65 0 0 0-1-1.51 1.65 1.65 0 0 0-1.82.33l-.06.06a2 2 0 1 1-2.83-2.83l.06-.06a1.65 1.65 0 0 0 .33-1.82 1.65 1.65 0 0 0-1.51-1H3a2 2 0 0 1 0-4h.09a1.65 1.65 0 0 0 1.51-1 1.65 1.65 0 0 0-.33-1.82l-.06-.06a2 2 0 1 1 2.83-2.83l.06.06a1.65 1.65 0 0 0 1.82.33H9a1.65 1.65 0 0 0 1-1.51V3a2 2 0 0 1 4 0v.09a1.65 1.65 0 0 0 1 1.51 1.65 1.65 0 0 0 1.82-.33l.06-.06a2 2 0 1 1 2.83 2.83l-.06.06a1.65 1.65 0 0 0-.33 1.82V9a1.65 1.65 0 0 0 1.51 1H21a2 2 0 0 1 0 4h-.09a1.65 1.65 0 0 0-1.51 1z" />
+    </svg>
+  );
+}
+
+export function ChevronIcon(props: IconProps & { open?: boolean }) {
+  const { open, style, ...rest } = props;
+  return (
+    <svg
+      {...baseProps}
+      width={14}
+      height={14}
+      {...rest}
+      style={{
+        transition: "transform 140ms ease",
+        transform: open ? "rotate(90deg)" : "rotate(0deg)",
+        ...style,
+      }}
+    >
+      <polyline points="9 18 15 12 9 6" />
+    </svg>
+  );
+}
+
+import type { ComponentType } from "react";
+import type { SidebarGroupId, TabId } from "../../types";
+
+export const groupIcon: Record<SidebarGroupId, ComponentType<IconProps>> = {
+  models: ModelsIcon,
+  images: ImagesIcon,
+  video: VideoIcon,
+  benchmarks: BenchmarksIcon,
+  tools: ToolsIcon,
+};
+
+export const standaloneTabIcon: Partial<Record<TabId, ComponentType<IconProps>>> = {
+  dashboard: DashboardIcon,
+  chat: ChatIcon,
+  server: ServerIcon,
+  logs: LogsIcon,
+  settings: SettingsIcon,
+};
diff --git a/src/constants/__tests__/sidebarGroups.test.ts b/src/constants/__tests__/sidebarGroups.test.ts
new file mode 100644
index 0000000..706752f
--- /dev/null
+++ b/src/constants/__tests__/sidebarGroups.test.ts
@@ -0,0 +1,35 @@
+import { describe, expect, it } from "vitest";
+import { sidebarGroups } from "../sidebarGroups";
+import { tabs } from "../tabs";
+
+describe("sidebarGroups", () => {
+  it("each group's defaultChild is a real tab in that group", () => {
+    for (const group of sidebarGroups) {
+      const child = tabs.find((t) => t.id === group.defaultChild);
+      expect(child, `group "${group.id}" defaultChild "${group.defaultChild}" not found`).toBeDefined();
+      expect(child?.group, `defaultChild "${group.defaultChild}" must belong to group "${group.id}"`).toBe(group.id);
+    }
+  });
+
+  it("every tab.group references a known group", () => {
+    const groupIds = new Set(sidebarGroups.map((g) => g.id));
+    for (const tab of tabs) {
+      if (!tab.group) continue;
+      expect(groupIds.has(tab.group), `tab "${tab.id}" references unknown group "${tab.group}"`).toBe(true);
+    }
+  });
+
+  it("every group has at least one tab", () => {
+    for (const group of sidebarGroups) {
+      const children = tabs.filter((t) => t.group === group.id);
+      expect(children.length, `group "${group.id}" has no tabs`).toBeGreaterThan(0);
+    }
+  });
+
+  it("grouped tabs have a shortLabel", () => {
+    for (const tab of tabs) {
+      if (!tab.group) continue;
+      expect(tab.shortLabel, `tab "${tab.id}" in group "${tab.group}" missing shortLabel`).toBeTruthy();
+    }
+  });
+});
diff --git a/src/constants/index.ts b/src/constants/index.ts
index 833d71b..6bd2844 100644
--- a/src/constants/index.ts
+++ b/src/constants/index.ts
@@ -1,4 +1,7 @@
 export { tabs } from "./tabs";
+export type { TabConfig } from "./tabs";
+export { sidebarGroups } from "./sidebarGroups";
+export type { SidebarGroup } from "./sidebarGroups";
 export { CAPABILITY_META } from "./capabilities";
 export { IMAGE_RATIO_PRESETS, IMAGE_QUALITY_PRESETS } from "./image";
 export { BENCHMARK_PROMPTS } from "./benchmarks";
diff --git a/src/constants/sidebarGroups.ts b/src/constants/sidebarGroups.ts
new file mode 100644
index 0000000..ce78a6e
--- /dev/null
+++ b/src/constants/sidebarGroups.ts
@@ -0,0 +1,16 @@
+import type { SidebarGroupId, TabId } from "../types";
+
+export interface SidebarGroup {
+  id: SidebarGroupId;
+  label: string;
+  caption: string;
+  defaultChild: TabId;
+}
+
+export const sidebarGroups: SidebarGroup[] = [
+  { id: "models", label: "Models", caption: "Language models", defaultChild: "my-models" },
+  { id: "images", label: "Images", caption: "Image generation", defaultChild: "image-models" },
+  { id: "video", label: "Video", caption: "Video generation", defaultChild: "video-models" },
+  { id: "benchmarks", label: "Benchmarks", caption: "Performance tests", defaultChild: "benchmarks" },
+  { id: "tools", label: "Tools", caption: "Conversion, fine-tuning, prompts, plugins", defaultChild: "conversion" },
+];
diff --git a/src/constants/tabs.ts b/src/constants/tabs.ts
index 6af3eb0..1b30943 100644
--- a/src/constants/tabs.ts
+++ b/src/constants/tabs.ts
@@ -1,21 +1,33 @@
-import type { TabId } from "../types";
+import type { SidebarGroupId, TabId } from "../types";
 
-export const tabs: Array<{ id: TabId; label: string; caption: string }> = [
+export interface TabConfig {
+  id: TabId;
+  label: string;
+  caption: string;
+  group?: SidebarGroupId;
+  shortLabel?: string;
+}
+
+export const tabs: TabConfig[] = [
   { id: "dashboard", label: "Dashboard", caption: "System overview" },
   { id: "chat", label: "Chat", caption: "Local AI chat" },
-  { id: "online-models", label: "Discover", caption: "Browse and download AI models" },
-  { id: "my-models", label: "My Models", caption: "Models on this machine" },
-  { id: "image-discover", label: "Image Discover", caption: "Browse image models" },
-  { id: "image-models", label: "Image Models", caption: "Installed image generators" },
-  { id: "image-studio", label: "Image Studio", caption: "Prompt, generate, and iterate" },
-  { id: "image-gallery", label: "Image Gallery", caption: "Saved outputs and filters" },
+  { id: "my-models", label: "My Models", caption: "Models on this machine", group: "models", shortLabel: "My Models" },
+  { id: "online-models", label: "Discover", caption: "Browse and download AI models", group: "models", shortLabel: "Discover" },
+  { id: "image-models", label: "Image Models", caption: "Installed image generators", group: "images", shortLabel: "My Models" },
+  { id: "image-discover", label: "Image Discover", caption: "Browse image models", group: "images", shortLabel: "Discover" },
+  { id: "image-studio", label: "Image Studio", caption: "Prompt, generate, and iterate", group: "images", shortLabel: "Studio" },
+  { id: "image-gallery", label: "Image Gallery", caption: "Saved outputs and filters", group: "images", shortLabel: "Gallery" },
+  { id: "video-models", label: "Video Models", caption: "Installed video generators", group: "video", shortLabel: "My Models" },
+  { id: "video-discover", label: "Video Discover", caption: "Browse video models", group: "video", shortLabel: "Discover" },
+  { id: "video-studio", label: "Video Studio", caption: "Prompt, generate, and iterate", group: "video", shortLabel: "Studio" },
+  { id: "video-gallery", label: "Video Gallery", caption: "Saved outputs and filters", group: "video", shortLabel: "Gallery" },
   { id: "server", label: "Server", caption: "OpenAI-compatible local API" },
-  { id: "benchmarks", label: "Benchmarks", caption: "Run a new benchmark" },
-  { id: "benchmark-history", label: "History", caption: "Compare saved runs" },
-  { id: "conversion", label: "Conversion", caption: "Convert models to MLX format" },
-  { id: "finetuning", label: "Fine-Tuning", caption: "LoRA adapters and training" },
-  { id: "prompt-library", label: "Prompts", caption: "Reusable prompt templates" },
-  { id: "plugins", label: "Plugins", caption: "Extensions and plugin system" },
+  { id: "benchmarks", label: "Benchmarks", caption: "Run a new benchmark", group: "benchmarks", shortLabel: "Run" },
+  { id: "benchmark-history", label: "History", caption: "Compare saved runs", group: "benchmarks", shortLabel: "History" },
+  { id: "conversion", label: "Conversion", caption: "Convert models to MLX format", group: "tools", shortLabel: "Conversion" },
+  { id: "finetuning", label: "Fine-Tuning", caption: "LoRA adapters and training", group: "tools", shortLabel: "Fine-Tuning" },
+  { id: "prompt-library", label: "Prompts", caption: "Reusable prompt templates", group: "tools", shortLabel: "Prompts" },
+  { id: "plugins", label: "Plugins", caption: "Extensions and plugin system", group: "tools", shortLabel: "Plugins" },
   { id: "logs", label: "Logs", caption: "Runtime events" },
   { id: "settings", label: "Settings", caption: "Directories and defaults" },
 ];
diff --git a/src/defaults.ts b/src/defaults.ts
index 525302c..f5f0949 100644
--- a/src/defaults.ts
+++ b/src/defaults.ts
@@ -30,6 +30,7 @@ export const emptySettings: AppSettings = {
   modelDirectories: [],
   preferredServerPort: 8876,
   allowRemoteConnections: false,
+  requireApiAuth: true,
   autoStartServer: false,
   launchPreferences: emptyLaunchPreferences,
   remoteProviders: [],
diff --git a/src/features/images/ImageDiscoverTab.tsx b/src/features/images/ImageDiscoverTab.tsx
index 5ea5669..b8e6ab8 100644
--- a/src/features/images/ImageDiscoverTab.tsx
+++ b/src/features/images/ImageDiscoverTab.tsx
@@ -28,6 +28,7 @@ export interface ImageDiscoverTabProps {
   busyAction: string | null;
   activeImageDownloads: Record<string, DownloadStatus>;
   selectedImageVariant: ImageModelVariant | null;
+  fileRevealLabel: string;
   onActiveTabChange: (tab: TabId) => void;
   onOpenImageStudio: (modelId?: string) => void;
   onImageDownload: (repo: string) => void;
@@ -35,6 +36,7 @@ export interface ImageDiscoverTabProps {
   onDeleteImageDownload: (repo: string) => void;
   onOpenExternalUrl: (url: string) => void;
   onRestartServer: () => void;
+  onRevealPath: (path: string) => void;
 }
 
 export function ImageDiscoverTab({
@@ -53,6 +55,7 @@ export function ImageDiscoverTab({
   busyAction,
   activeImageDownloads,
   selectedImageVariant,
+  fileRevealLabel,
   onActiveTabChange,
   onOpenImageStudio,
   onImageDownload,
@@ -60,6 +63,7 @@ export function ImageDiscoverTab({
   onDeleteImageDownload,
   onOpenExternalUrl,
   onRestartServer,
+  onRevealPath,
 }: ImageDiscoverTabProps) {
   return (
     <div className="image-discover-stack">
@@ -198,11 +202,13 @@ export function ImageDiscoverTab({
               key={variant.id}
               variant={variant}
               downloadState={activeImageDownloads[variant.repo]}
+              fileRevealLabel={fileRevealLabel}
               onDownload={(repo) => onImageDownload(repo)}
               onCancelDownload={(repo) => onCancelImageDownload(repo)}
               onDeleteDownload={(repo) => onDeleteImageDownload(repo)}
               onOpenExternalUrl={(url) => onOpenExternalUrl(url)}
               onNavigateSettings={() => onActiveTabChange("settings")}
+              onRevealPath={(path) => onRevealPath(path)}
             />
           ))}
         </div>
diff --git a/src/features/images/ImageModelsTab.tsx b/src/features/images/ImageModelsTab.tsx
index a9715e4..f17cd86 100644
--- a/src/features/images/ImageModelsTab.tsx
+++ b/src/features/images/ImageModelsTab.tsx
@@ -6,32 +6,37 @@ import type {
   TabId,
 } from "../../types";
 import {
-  sizeLabel,
   downloadProgressLabel,
+  formatReleaseLabel,
+  imagePrimarySizeLabel,
 } from "../../utils";
 
 export interface ImageModelsTabProps {
   installedImageVariants: ImageModelVariant[];
   imageCatalog: ImageModelFamily[];
   activeImageDownloads: Record<string, DownloadStatus>;
+  fileRevealLabel: string;
   onActiveTabChange: (tab: TabId) => void;
   onOpenImageStudio: (modelId?: string) => void;
   onImageDownload: (repo: string) => void;
   onCancelImageDownload: (repo: string) => void;
   onDeleteImageDownload: (repo: string) => void;
   onOpenExternalUrl: (url: string) => void;
+  onRevealPath: (path: string) => void;
 }
 
 export function ImageModelsTab({
   installedImageVariants,
   imageCatalog,
   activeImageDownloads,
+  fileRevealLabel,
   onActiveTabChange,
   onOpenImageStudio,
   onImageDownload,
   onCancelImageDownload,
   onDeleteImageDownload,
   onOpenExternalUrl,
+  onRevealPath,
 }: ImageModelsTabProps) {
   return (
     <div className="content-grid image-page-grid">
@@ -83,8 +88,11 @@ export function ImageModelsTab({
                     ) : null}
                   </div>
                   <div className="image-library-stats">
-                    <span>{sizeLabel(variant.sizeGb)}</span>
+                    <span>{imagePrimarySizeLabel(variant)}</span>
                     <span>{variant.recommendedResolution}</span>
+                    {formatReleaseLabel(variant.releaseLabel, variant.releaseDate ?? variant.createdAt) ? (
+                      <span>{formatReleaseLabel(variant.releaseLabel, variant.releaseDate ?? variant.createdAt)}</span>
+                    ) : null}
                     {variant.styleTags.slice(0, 3).map((tag) => (
                       <span key={tag} className="badge subtle">{tag}</span>
                     ))}
@@ -115,6 +123,20 @@ export function ImageModelsTab({
                         {isDownloading ? "Cancel" : "Delete"}
                       </button>
                     ) : null}
+                    {variant.localPath ? (
+                      <button
+                        className="secondary-button icon-button"
+                        type="button"
+                        title={fileRevealLabel}
+                        onClick={() => onRevealPath(variant.localPath as string)}
+                      >
+                        <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
+                          <path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6" />
+                          <polyline points="15 3 21 3 21 9" />
+                          <line x1="10" y1="14" x2="21" y2="3" />
+                        </svg>
+                      </button>
+                    ) : null}
                     <button className="secondary-button" type="button" onClick={() => onOpenExternalUrl(variant.link)}>
                       Model Card
                     </button>
diff --git a/src/features/images/ImageStudioTab.tsx b/src/features/images/ImageStudioTab.tsx
index 04cdeb1..0c2f5ad 100644
--- a/src/features/images/ImageStudioTab.tsx
+++ b/src/features/images/ImageStudioTab.tsx
@@ -1,7 +1,7 @@
-import { useEffect, useMemo } from "react";
+import { useEffect, useMemo, useState } from "react";
 import { Panel } from "../../components/Panel";
 import { ImageOutputCard } from "../../components/ImageOutputCard";
-import type { DownloadStatus } from "../../api";
+import type { DownloadStatus, InstallResult } from "../../api";
 import type {
   ImageModelFamily,
   ImageModelVariant,
@@ -67,6 +67,7 @@ export interface ImageStudioTabProps {
   onApplyImageQuality: (presetId: ImageQualityPreset) => void;
   onPreloadImageModel: (variant: ImageModelVariant) => void;
   onUnloadImageModel: (variant?: ImageModelVariant) => void;
+  onInstallImageRuntime: () => Promise<InstallResult>;
   onImageDownload: (repo: string) => void;
   onCancelImageDownload: (repo: string) => void;
   onDeleteImageDownload: (repo: string) => void;
@@ -126,6 +127,7 @@ export function ImageStudioTab({
   onApplyImageQuality,
   onPreloadImageModel,
   onUnloadImageModel,
+  onInstallImageRuntime,
   onImageDownload,
   onCancelImageDownload,
   onDeleteImageDownload,
@@ -136,6 +138,18 @@ export function ImageStudioTab({
   onRevealPath,
   onDeleteImageArtifact,
 }: ImageStudioTabProps) {
+  const [installingImageRuntime, setInstallingImageRuntime] = useState(false);
+
+  async function handleInstallImageRuntime() {
+    if (installingImageRuntime) return;
+    setInstallingImageRuntime(true);
+    try {
+      await onInstallImageRuntime();
+    } finally {
+      setInstallingImageRuntime(false);
+    }
+  }
+
   // Only offer models that are actually downloaded in the picker. The
   // Image Studio is the "generate right now" surface — a user selecting an
   // unavailable model here would hit a download-required callout and be
@@ -290,15 +304,23 @@ export function ImageStudioTab({
             <div className="image-runtime-actions">
               <p className="muted-text">
                 {imageRuntimeStatus.activeEngine === "unavailable"
-                  ? "Install the diffusers runtime to enable local image generation."
+                  ? "Install the optional image runtime packages (diffusers, torch, accelerate, huggingface_hub, pillow) to enable real local generation."
                   : "Restart the backend if you recently installed image packages."}
               </p>
               <div className="button-row">
-                {tauriBackend?.managedByTauri ? (
-                  <button className="secondary-button" type="button" onClick={() => onRestartServer()} disabled={busy}>
-                    {busyAction === "Restarting server..." ? "Restarting..." : "Restart Backend"}
+                {imageRuntimeStatus.activeEngine === "unavailable" ? (
+                  <button
+                    className="primary-button"
+                    type="button"
+                    onClick={() => void handleInstallImageRuntime()}
+                    disabled={installingImageRuntime || !backendOnline}
+                  >
+                    {installingImageRuntime ? "Installing..." : "Install image runtime"}
                   </button>
                 ) : null}
+                <button className="secondary-button" type="button" onClick={() => onRestartServer()} disabled={busy}>
+                  {busyAction === "Restarting server..." ? "Restarting..." : "Restart Backend"}
+                </button>
               </div>
             </div>
           ) : null}
diff --git a/src/features/models/MyModelsTab.test.tsx b/src/features/models/MyModelsTab.test.tsx
index ae14832..374506e 100644
--- a/src/features/models/MyModelsTab.test.tsx
+++ b/src/features/models/MyModelsTab.test.tsx
@@ -110,6 +110,8 @@ describe("MyModelsTab", () => {
       displayQuantization: "BF16",
       displayBackend: "mlx",
       sourceKind: "HF cache",
+      estimatedRamGb: null,
+      estimatedCompressedGb: null,
     };
 
     const markup = renderTab(row, {});
@@ -129,6 +131,8 @@ describe("MyModelsTab", () => {
       displayQuantization: "BF16",
       displayBackend: "mlx",
       sourceKind: "HF cache",
+      estimatedRamGb: null,
+      estimatedCompressedGb: null,
     };
 
     const markup = renderTab(row, {
@@ -158,6 +162,8 @@ describe("MyModelsTab", () => {
       displayQuantization: "BF16",
       displayBackend: "mlx",
       sourceKind: "HF cache",
+      estimatedRamGb: null,
+      estimatedCompressedGb: null,
     };
 
     const markup = renderTab(row, {});
@@ -185,6 +191,8 @@ describe("MyModelsTab", () => {
       displayQuantization: "Q4_K_S",
       displayBackend: "llama.cpp",
       sourceKind: "Directory",
+      estimatedRamGb: null,
+      estimatedCompressedGb: null,
     };
 
     const markup = renderTab(row, {
diff --git a/src/features/models/MyModelsTab.tsx b/src/features/models/MyModelsTab.tsx
index c97c43f..4b3dee1 100644
--- a/src/features/models/MyModelsTab.tsx
+++ b/src/features/models/MyModelsTab.tsx
@@ -14,6 +14,7 @@ import {
   inferHfRepoFromLocalPath,
   downloadProgressLabel,
   downloadSizeTooltip,
+  formatReleaseLabel,
 } from "../../utils";
 import { CAPABILITY_META } from "../../constants";
 import { candidateKeys } from "../../components/runtimeSupport";
@@ -25,6 +26,8 @@ export interface LibraryRow {
   displayQuantization: string | null;
   displayBackend: string;
   sourceKind: string;
+  estimatedRamGb: number | null;
+  estimatedCompressedGb: number | null;
 }
 
 interface StrategyCompatInfo {
@@ -346,7 +349,7 @@ export function MyModelsTab({
               <span className="sort-header"></span>
             </div>
             <div className="library-rows">
-              {capFilteredLibrary.map(({ item, matchedVariant, displayFormat, displayQuantization, displayBackend, sourceKind }) => {
+              {capFilteredLibrary.map(({ item, matchedVariant, displayFormat, displayQuantization, displayBackend, sourceKind, estimatedRamGb, estimatedCompressedGb }) => {
                 const isExpanded = expandedLibraryPath === item.path;
                 const repo = inferHfRepoFromLocalPath(item.path) ?? matchedVariant?.repo ?? (item.name.includes("/") ? item.name : null);
                 const row: LibraryRow = {
@@ -356,6 +359,8 @@ export function MyModelsTab({
                   displayQuantization,
                   displayBackend,
                   sourceKind,
+                  estimatedRamGb,
+                  estimatedCompressedGb,
                 };
                 const downloadState = repo
                   ? activeDownloads[repo] ?? inferredPartialDownload(row, repo)
@@ -413,8 +418,12 @@ export function MyModelsTab({
                       <span>{displayQuantization ?? "-"}</span>
                       <span>{displayBackend}</span>
                       <span>{sizeLabel(item.sizeGb)}</span>
-                      <span>{matchedVariant?.estimatedMemoryGb ? `~${number(matchedVariant.estimatedMemoryGb)}GB` : "?"}</span>
-                      <span>{matchedVariant?.estimatedCompressedMemoryGb ? `~${number(matchedVariant.estimatedCompressedMemoryGb)}GB` : "?"}</span>
+                      <span title="Rough resident memory at 8K context (weights + KV + framework)">
+                        {estimatedRamGb != null ? `~${number(estimatedRamGb)} GB` : "?"}
+                      </span>
+                      <span title="Rough resident memory with a compressed KV cache strategy">
+                        {estimatedCompressedGb != null ? `~${number(estimatedCompressedGb)} GB` : "?"}
+                      </span>
                       <span>{matchedVariant?.contextWindow ?? ""}</span>
                       <div className="library-row-actions" onClick={(e) => e.stopPropagation()}>
                         {hasDownloadOverlay && repo ? (
@@ -475,6 +484,11 @@ export function MyModelsTab({
                         <div className="library-detail-left">
                           <p className="mono-text library-path">{item.path}</p>
                           {matchedVariant?.note ? <p className="variant-note">{matchedVariant.note}</p> : null}
+                          {formatReleaseLabel(matchedVariant?.releaseLabel, matchedVariant?.releaseDate) ? (
+                            <p className="muted-text variant-release-label">
+                              {formatReleaseLabel(matchedVariant?.releaseLabel, matchedVariant?.releaseDate)}
+                            </p>
+                          ) : null}
                         </div>
                       </div>
                     ) : null}
diff --git a/src/features/models/OnlineModelsTab.tsx b/src/features/models/OnlineModelsTab.tsx
index 4b7eea1..699dd63 100644
--- a/src/features/models/OnlineModelsTab.tsx
+++ b/src/features/models/OnlineModelsTab.tsx
@@ -13,6 +13,7 @@ import {
   capabilityMeta,
   findLibraryItemForVariant,
   downloadProgressLabel,
+  formatReleaseLabel,
   handleActionKeyDown,
 } from "../../utils";
 import { CAPABILITY_META } from "../../constants";
@@ -329,6 +330,11 @@ export function OnlineModelsTab({
                               <div className="variant-detail-expand">
                                 <div className="variant-detail-left">
                                   <p>{variant.note}</p>
+                                  {formatReleaseLabel(variant.releaseLabel, variant.releaseDate) ? (
+                                    <p className="muted-text variant-release-label">
+                                      {formatReleaseLabel(variant.releaseLabel, variant.releaseDate)}
+                                    </p>
+                                  ) : null}
                                   {matchedLocal ? <p className="mono-text variant-local-path">{matchedLocal.path}</p> : null}
                                   <a
                                     className="text-link"
@@ -405,6 +411,9 @@ export function OnlineModelsTab({
                       {!model.availableLocally && isDownloadComplete ? <span className="badge success">Download complete</span> : null}
                     </div>
                     <div className="discover-card-meta">
+                      {formatReleaseLabel(model.releaseLabel, model.createdAt) ? (
+                        <small>{formatReleaseLabel(model.releaseLabel, model.createdAt)}</small>
+                      ) : null}
                       {model.updatedLabel ? <small>{model.updatedLabel}</small> : null}
                       <small>{model.downloadsLabel}</small>
                       <small>{model.likesLabel}</small>
diff --git a/src/features/server/ServerTab.tsx b/src/features/server/ServerTab.tsx
index 73ff3c2..017f8f4 100644
--- a/src/features/server/ServerTab.tsx
+++ b/src/features/server/ServerTab.tsx
@@ -366,6 +366,18 @@ export function ServerTab({
                   />
                   LAN access
                 </label>
+                <label
+                  className="check-row"
+                  title="Disable to let external clients (OpenWebUI, curl, other apps) hit /api and /v1 without a bearer token. Leave on for local-only use."
+                >
+                  <input
+                    type="checkbox"
+                    checked={settingsDraft.requireApiAuth}
+                    disabled={busy}
+                    onChange={(event) => onSettingsDraftChange((c) => ({ ...c, requireApiAuth: event.target.checked }))}
+                  />
+                  Require API token
+                </label>
                 <label className="check-row">
                   <input
                     type="checkbox"
diff --git a/src/features/settings/SettingsTab.tsx b/src/features/settings/SettingsTab.tsx
index e8c4110..90c6da6 100644
--- a/src/features/settings/SettingsTab.tsx
+++ b/src/features/settings/SettingsTab.tsx
@@ -1,6 +1,7 @@
-import type { SetStateAction } from "react";
+import { useState, type SetStateAction } from "react";
 import { Panel } from "../../components/Panel";
 import type { SettingsDraft } from "../../types/chat";
+import type { SidebarMode } from "../../types";
 
 export interface SettingsTabProps {
   settingsDraft: SettingsDraft;
@@ -10,6 +11,8 @@ export interface SettingsTabProps {
   newDirectoryPath: string;
   onNewDirectoryPathChange: (path: string) => void;
   onPickDataDirectory: () => void;
+  onPickImageOutputsDirectory: () => void;
+  onPickVideoOutputsDirectory: () => void;
   onSaveSettings: () => void;
   onPickDirectory: (currentPath?: string) => Promise<string | null>;
   onAddDirectory: () => void;
@@ -21,8 +24,33 @@ export interface SettingsTabProps {
   serverPort: number;
   loadedModelName: string | undefined;
   apiToken: string | null;
+  sidebarMode: SidebarMode;
+  onSidebarModeChange: (mode: SidebarMode) => void;
 }
 
+// The Settings page used to be one long stack of seven panels — Appearance,
+// Data Directory, Delivery Folders, Model Directories, Remote Providers,
+// Hugging Face, and Integrations — which felt squished on the 2-column grid
+// and forced users to scroll past unrelated controls to reach the one they
+// wanted. We now group those panels into four logical sections and navigate
+// between them with a horizontal sub-tab bar. (We tried matching the user's
+// ``sidebarMode`` preference with a side-menu variant for collapsible users
+// but it felt clunky at the viewport widths this app runs at — tabs always,
+// for Settings only.)
+type SettingsSectionId = "general" | "storage" | "providers" | "integrations";
+
+interface SettingsSectionDef {
+  id: SettingsSectionId;
+  label: string;
+}
+
+const SETTINGS_SECTIONS: SettingsSectionDef[] = [
+  { id: "general", label: "General" },
+  { id: "storage", label: "Storage" },
+  { id: "providers", label: "Providers" },
+  { id: "integrations", label: "Integrations" },
+];
+
 export function SettingsTab({
   settingsDraft,
   onSettingsDraftChange,
@@ -31,6 +59,8 @@ export function SettingsTab({
   newDirectoryPath,
   onNewDirectoryPathChange,
   onPickDataDirectory,
+  onPickImageOutputsDirectory,
+  onPickVideoOutputsDirectory,
   onSaveSettings,
   onPickDirectory,
   onAddDirectory,
@@ -42,39 +72,165 @@ export function SettingsTab({
   serverPort,
   loadedModelName,
   apiToken,
+  sidebarMode,
+  onSidebarModeChange,
 }: SettingsTabProps) {
   const integrationApiToken = apiToken ?? "<chaosengine-api-token>";
-  return (
-    <div className="content-grid">
+  // Section selection lives in component state because it's purely a UI
+  // concern — there's no need to thread it through the App-level workspace
+  // or persist it across reloads (the user lands on "General" each time,
+  // which matches the macOS Settings idiom of opening to the first pane).
+  const [activeSection, setActiveSection] = useState<SettingsSectionId>("general");
+
+  // Resolve the effective paths for the delivery folders so we can show the
+  // real value in each input rather than a placeholder-flavoured hint. The
+  // frontend doesn't expand ``~`` — we leave that for the backend — so the
+  // user sees the same shape the backend prints in logs. When the user
+  // hasn't set an override we mark the row with a small "default" tag so
+  // it's unambiguous whether the path is inherited or explicit.
+  const effectiveDataDirectory = settingsDraft.dataDirectory || "~/.chaosengine";
+  const imagesOutputsOverride = (settingsDraft.imageOutputsDirectory ?? "").trim();
+  const videosOutputsOverride = (settingsDraft.videoOutputsDirectory ?? "").trim();
+  const effectiveImagesOutputs = imagesOutputsOverride || `${effectiveDataDirectory}/images/outputs`;
+  const effectiveVideosOutputs = videosOutputsOverride || `${effectiveDataDirectory}/videos/outputs`;
+
+  const generalPanels = (
+    <div className="content-grid settings-section-grid">
       <Panel
-        title="Data Directory"
-        subtitle="Where ChaosEngineAI stores chat history, settings, and benchmark runs. Change to a cloud-synced folder (Dropbox, iCloud) to back up or share across machines."
+        title="Appearance"
+        subtitle="Choose how the sidebar organises grouped tabs. Switches are instant and remembered across restarts."
       >
         <div className="control-stack">
-          <div className="directory-add-row">
-            <input
-              className="text-input directory-add-path mono-text"
-              type="text"
-              readOnly
-              value={settingsDraft.dataDirectory || "~/.chaosengine"}
-            />
-            <button className="secondary-button" type="button" onClick={() => void onPickDataDirectory()}>
-              Browse...
+          <div className="segmented" role="radiogroup" aria-label="Sidebar style">
+            <button
+              type="button"
+              role="radio"
+              aria-checked={sidebarMode === "collapsible"}
+              className={sidebarMode === "collapsible" ? "segment active" : "segment"}
+              onClick={() => onSidebarModeChange("collapsible")}
+            >
+              Collapsible
             </button>
             <button
-              className="secondary-button"
               type="button"
-              onClick={() => onSettingsDraftChange((current) => ({ ...current, dataDirectory: "" }))}
+              role="radio"
+              aria-checked={sidebarMode === "tabs"}
+              className={sidebarMode === "tabs" ? "segment active" : "segment"}
+              onClick={() => onSidebarModeChange("tabs")}
             >
-              Reset to default
+              Tabs
             </button>
           </div>
           <p className="help-text">
-            Changes take effect after the backend restarts. Existing data will be copied to the new location; the
-            old files are left in place.
+            <strong>Collapsible</strong> shows all groups expanded with children listed inline — one click per
+            destination. <strong>Tabs</strong> keeps the sidebar compact: groups behave like single buttons that jump
+            to their last-used tab, with a sub-tab bar above the content.
           </p>
         </div>
       </Panel>
+    </div>
+  );
+
+  // Show a "default" badge beside the path when the user hasn't set an
+  // override so it's unambiguous whether the row is inherited from the data
+  // directory or explicit. The data directory itself also gets a default
+  // marker when it's empty (the fallback path is shown in the input).
+  const dataDirectoryIsDefault = !settingsDraft.dataDirectory?.trim();
+  const imagesOutputsIsDefault = !imagesOutputsOverride;
+  const videosOutputsIsDefault = !videosOutputsOverride;
+
+  const storagePanels = (
+    <div className="settings-storage-grid">
+      <div className="settings-storage-col">
+        <Panel
+          title="Data Directory"
+          subtitle="Where ChaosEngineAI stores chat history, settings, and benchmark runs. Change to a cloud-synced folder (Dropbox, iCloud) to back up or share across machines."
+        >
+          <div className="control-stack">
+            <div className="directory-add-row">
+              <input
+                className="text-input directory-add-path mono-text"
+                type="text"
+                readOnly
+                value={effectiveDataDirectory}
+              />
+              {dataDirectoryIsDefault ? <span className="badge muted">default</span> : null}
+              <button className="secondary-button" type="button" onClick={() => void onPickDataDirectory()}>
+                Browse...
+              </button>
+              <button
+                className="secondary-button"
+                type="button"
+                disabled={dataDirectoryIsDefault}
+                onClick={() => onSettingsDraftChange((current) => ({ ...current, dataDirectory: "" }))}
+              >
+                Reset to default
+              </button>
+            </div>
+            <p className="help-text">
+              Changes take effect after the backend restarts. Existing data will be copied to the new location; the
+              old files are left in place.
+            </p>
+          </div>
+        </Panel>
+        <Panel
+          title="Delivery Folders"
+          subtitle="Where newly generated images and videos land. Override the defaults to drop finished renders straight into a client folder, Dropbox sync, or an external SSD."
+        >
+          <div className="control-stack">
+            <div className="field-label-row">
+              <label className="field-label">Images</label>
+              {imagesOutputsIsDefault ? <span className="badge muted">default</span> : null}
+            </div>
+            <div className="directory-add-row">
+              <input
+                className="text-input directory-add-path mono-text"
+                type="text"
+                readOnly
+                value={effectiveImagesOutputs}
+              />
+              <button className="secondary-button" type="button" onClick={() => void onPickImageOutputsDirectory()}>
+                Browse...
+              </button>
+              <button
+                className="secondary-button"
+                type="button"
+                disabled={imagesOutputsIsDefault}
+                onClick={() => onSettingsDraftChange((current) => ({ ...current, imageOutputsDirectory: "" }))}
+              >
+                Reset to default
+              </button>
+            </div>
+            <div className="field-label-row">
+              <label className="field-label">Videos</label>
+              {videosOutputsIsDefault ? <span className="badge muted">default</span> : null}
+            </div>
+            <div className="directory-add-row">
+              <input
+                className="text-input directory-add-path mono-text"
+                type="text"
+                readOnly
+                value={effectiveVideosOutputs}
+              />
+              <button className="secondary-button" type="button" onClick={() => void onPickVideoOutputsDirectory()}>
+                Browse...
+              </button>
+              <button
+                className="secondary-button"
+                type="button"
+                disabled={videosOutputsIsDefault}
+                onClick={() => onSettingsDraftChange((current) => ({ ...current, videoOutputsDirectory: "" }))}
+              >
+                Reset to default
+              </button>
+            </div>
+            <p className="help-text">
+              New artifacts go to the folder you pick right away — no backend restart needed. Existing renders stay where
+              they were written. Reset returns the row to the default under the Data Directory.
+            </p>
+          </div>
+        </Panel>
+      </div>
       <Panel
         title="Model Directories"
         subtitle="Add the folders ChaosEngineAI should scan for local models, including custom, Ollama, LM Studio, or shared model paths."
@@ -156,7 +312,11 @@ export function SettingsTab({
           </div>
         </div>
       </Panel>
+    </div>
+  );
 
+  const providerPanels = (
+    <div className="content-grid settings-section-grid">
       <Panel
         title="Remote Providers"
         subtitle="Configure cloud OpenAI-compatible APIs as a fallback. Keys are stored locally with 0600 permissions."
@@ -278,7 +438,11 @@ export function SettingsTab({
           </p>
         </div>
       </Panel>
+    </div>
+  );
 
+  const integrationPanels = (
+    <div className="content-grid settings-section-grid">
       <Panel
         title="Integrations"
         subtitle="Connect external tools to ChaosEngineAI's OpenAI-compatible API."
@@ -309,4 +473,51 @@ export function SettingsTab({
       </Panel>
     </div>
   );
+
+  // Keep this dispatch exhaustive — TypeScript's ``SettingsSectionId``
+  // discriminant makes it a compile error to forget a branch when we add a
+  // new section.
+  const sectionContent =
+    activeSection === "general"
+      ? generalPanels
+      : activeSection === "storage"
+        ? storagePanels
+        : activeSection === "providers"
+          ? providerPanels
+          : integrationPanels;
+
+  // Horizontal sub-tab bar, always — the vertical side-menu variant we
+  // tried originally felt clunky at the viewport widths this app runs at
+  // (too much wasted horizontal space for four shortish labels). The
+  // app-wide ``sidebarMode`` preference still controls the top-level
+  // sidebar, but Settings always uses tabs regardless; the hint line on
+  // each section lives in the panel subtitles instead.
+  //
+  // The inner body is a bare ``settings-content`` frame — each section
+  // brings its own grid wrapper. That lets Storage use a 2-column layout
+  // that stacks Data + Delivery on the left and gives Model Directories
+  // the full right column to breathe, while Providers and Integrations
+  // keep the standard 2-col ``content-grid``.
+  return (
+    <div className="settings-layout">
+      <div className="subtab-bar settings-subtab-bar" role="tablist" aria-label="Settings sections">
+        {SETTINGS_SECTIONS.map((section) => {
+          const isActive = section.id === activeSection;
+          return (
+            <button
+              key={section.id}
+              type="button"
+              role="tab"
+              aria-selected={isActive}
+              className={isActive ? "subtab active" : "subtab"}
+              onClick={() => setActiveSection(section.id)}
+            >
+              {section.label}
+            </button>
+          );
+        })}
+      </div>
+      <div className="settings-content">{sectionContent}</div>
+    </div>
+  );
 }
diff --git a/src/features/video/VideoDiscoverTab.tsx b/src/features/video/VideoDiscoverTab.tsx
new file mode 100644
index 0000000..bc6b58e
--- /dev/null
+++ b/src/features/video/VideoDiscoverTab.tsx
@@ -0,0 +1,277 @@
+import { Panel } from "../../components/Panel";
+import type { DownloadStatus } from "../../api";
+import type {
+  TabId,
+  TauriBackendInfo,
+  VideoModelVariant,
+  VideoRuntimeStatus,
+} from "../../types";
+import type { VideoDiscoverTaskFilter } from "../../types/video";
+import {
+  downloadProgressLabel,
+  downloadSizeTooltip,
+  formatReleaseLabel,
+  number,
+  sizeLabel,
+} from "../../utils";
+
+export interface VideoDiscoverTabProps {
+  combinedVideoDiscoverResults: VideoModelVariant[];
+  videoDiscoverSearchInput: string;
+  onVideoDiscoverSearchInputChange: (value: string) => void;
+  videoDiscoverTaskFilter: VideoDiscoverTaskFilter;
+  onVideoDiscoverTaskFilterChange: (value: VideoDiscoverTaskFilter) => void;
+  videoDiscoverHasActiveFilters: boolean;
+  videoDiscoverSearchQuery: string;
+  videoRuntimeStatus: VideoRuntimeStatus;
+  tauriBackend: TauriBackendInfo | null;
+  busy: boolean;
+  busyAction: string | null;
+  activeVideoDownloads: Record<string, DownloadStatus>;
+  selectedVideoVariant: VideoModelVariant | null;
+  fileRevealLabel: string;
+  onActiveTabChange: (tab: TabId) => void;
+  onOpenVideoStudio: (modelId?: string) => void;
+  onVideoDownload: (repo: string) => void;
+  onCancelVideoDownload: (repo: string) => void;
+  onDeleteVideoDownload: (repo: string) => void;
+  onOpenExternalUrl: (url: string) => void;
+  onRestartServer: () => void;
+  onRevealPath: (path: string) => void;
+}
+
+export function VideoDiscoverTab({
+  combinedVideoDiscoverResults,
+  videoDiscoverSearchInput,
+  onVideoDiscoverSearchInputChange,
+  videoDiscoverTaskFilter,
+  onVideoDiscoverTaskFilterChange,
+  videoDiscoverHasActiveFilters,
+  videoDiscoverSearchQuery,
+  videoRuntimeStatus,
+  tauriBackend,
+  busy,
+  busyAction,
+  activeVideoDownloads,
+  selectedVideoVariant,
+  fileRevealLabel,
+  onActiveTabChange,
+  onOpenVideoStudio,
+  onVideoDownload,
+  onCancelVideoDownload,
+  onDeleteVideoDownload,
+  onOpenExternalUrl,
+  onRestartServer,
+  onRevealPath,
+}: VideoDiscoverTabProps) {
+  return (
+    <div className="image-discover-stack">
+      <Panel
+        title="Video Discover"
+        subtitle={`${combinedVideoDiscoverResults.length} curated video models`}
+      >
+        <div className="image-hero">
+          <div>
+            <h3>Browse and download video models for local generation.</h3>
+            <p className="muted-text">
+              First-wave engines only. Download any model to use it in Video Studio.
+            </p>
+          </div>
+          <div className="image-hero-actions">
+            <button className="secondary-button" type="button" onClick={() => onActiveTabChange("video-models")}>
+              Installed Models
+            </button>
+            <button className="primary-button" type="button" onClick={() => onOpenVideoStudio(selectedVideoVariant?.id)}>
+              Open Studio
+            </button>
+          </div>
+        </div>
+
+        <div className="callout image-callout image-runtime-callout">
+          <p>{videoRuntimeStatus.message}</p>
+          <div className="chip-row">
+            <span className={`badge ${videoRuntimeStatus.realGenerationAvailable ? "success" : "warning"}`}>
+              {videoRuntimeStatus.realGenerationAvailable
+                ? "Real engine ready"
+                : videoRuntimeStatus.activeEngine === "unavailable"
+                  ? "Runtime unavailable"
+                  : "Fallback active"}
+            </span>
+            <span className="badge muted">Engine: {videoRuntimeStatus.activeEngine}</span>
+            {videoRuntimeStatus.device ? <span className="badge muted">Device: {videoRuntimeStatus.device}</span> : null}
+            {(videoRuntimeStatus.missingDependencies ?? []).slice(0, 4).map((dependency) => (
+              <span key={dependency} className="badge subtle">{dependency}</span>
+            ))}
+          </div>
+          {!videoRuntimeStatus.realGenerationAvailable ? (
+            <div className="image-runtime-actions">
+              {videoRuntimeStatus.pythonExecutable ?? tauriBackend?.pythonExecutable ? (
+                <span className="mono-text muted-text">
+                  Backend Python: {videoRuntimeStatus.pythonExecutable ?? tauriBackend?.pythonExecutable}
+                </span>
+              ) : null}
+              {tauriBackend?.managedByTauri ? (
+                <button className="secondary-button" type="button" onClick={() => onRestartServer()} disabled={busy}>
+                  {busyAction === "Restarting server..." ? "Restarting..." : "Restart Backend"}
+                </button>
+              ) : null}
+            </div>
+          ) : null}
+        </div>
+
+        <div className="image-discover-filter-row">
+          <label className="image-discover-search">
+            Search
+            <input
+              className="text-input"
+              type="search"
+              value={videoDiscoverSearchInput}
+              onChange={(event) => onVideoDiscoverSearchInputChange(event.target.value)}
+              placeholder="Search LTX, Wan, Mochi, provider, tags..."
+            />
+          </label>
+          <label>
+            Task
+            <select
+              className="text-input"
+              value={videoDiscoverTaskFilter}
+              onChange={(event) => onVideoDiscoverTaskFilterChange(event.target.value as VideoDiscoverTaskFilter)}
+            >
+              <option value="all">All tasks</option>
+              <option value="txt2video">Text to video</option>
+              <option value="img2video">Image to video</option>
+              <option value="video2video">Video to video</option>
+            </select>
+          </label>
+          <div className="image-discover-filter-actions">
+            <button
+              className="secondary-button"
+              type="button"
+              onClick={() => {
+                onVideoDiscoverSearchInputChange("");
+                onVideoDiscoverTaskFilterChange("all");
+              }}
+              disabled={!videoDiscoverHasActiveFilters}
+            >
+              Clear Filters
+            </button>
+          </div>
+        </div>
+
+        <div className="image-discover-results-summary">
+          <span>
+            {combinedVideoDiscoverResults.length} model{combinedVideoDiscoverResults.length !== 1 ? "s" : ""}
+          </span>
+          {videoDiscoverSearchQuery ? (
+            <span className="badge subtle">Search: {videoDiscoverSearchInput.trim()}</span>
+          ) : null}
+          {videoDiscoverTaskFilter !== "all" ? (
+            <span className="badge muted">Task: {videoDiscoverTaskFilter}</span>
+          ) : null}
+        </div>
+      </Panel>
+
+      {combinedVideoDiscoverResults.length === 0 ? (
+        <Panel title="Video Models" subtitle="No models match the current filters" className="image-discover-section-panel">
+          <div className="empty-state image-empty-state">
+            <p>Try broadening the filters or search terms.</p>
+          </div>
+        </Panel>
+      ) : (
+        <div className="image-discover-grid image-discover-grid--latest">
+          {combinedVideoDiscoverResults.map((variant) => {
+            const downloadState = activeVideoDownloads[variant.repo];
+            const isDownloading = downloadState?.state === "downloading";
+            const isPaused = downloadState?.state === "cancelled";
+            const isDownloadComplete = downloadState?.state === "completed";
+            const isDownloadFailed = downloadState?.state === "failed";
+            const isComplete = variant.availableLocally || isDownloadComplete;
+            const isPartial = !isComplete && variant.hasLocalData;
+            const canDeleteLocalData = Boolean(
+              isComplete || isDownloadComplete || isPaused || isDownloadFailed || isPartial,
+            );
+            return (
+              <article key={variant.id} className="image-library-card">
+                <div className="image-library-card-head">
+                  <div>
+                    <h3>{variant.name}</h3>
+                    <p>{variant.familyName ?? variant.provider}</p>
+                  </div>
+                  {isComplete ? (
+                    <span className="badge success">Installed</span>
+                  ) : isDownloading ? (
+                    <span className="badge accent" title={downloadSizeTooltip(downloadState)}>
+                      {downloadProgressLabel(downloadState)}
+                    </span>
+                  ) : isPaused ? (
+                    <span className="badge warning">{downloadProgressLabel(downloadState)}</span>
+                  ) : isDownloadFailed ? (
+                    <span className="badge warning">Download Failed</span>
+                  ) : isPartial ? (
+                    <span className="badge warning">Incomplete</span>
+                  ) : null}
+                </div>
+                <div className="image-library-stats">
+                  <span>{sizeLabel(variant.sizeGb)}</span>
+                  <span>{variant.recommendedResolution}</span>
+                  <span>{number(variant.defaultDurationSeconds)}s clip</span>
+                  {formatReleaseLabel(variant.releaseLabel, variant.releaseDate) ? (
+                    <span>{formatReleaseLabel(variant.releaseLabel, variant.releaseDate)}</span>
+                  ) : null}
+                  {variant.styleTags.slice(0, 3).map((tag) => (
+                    <span key={tag} className="badge subtle">{tag}</span>
+                  ))}
+                </div>
+                <p className="muted-text">{variant.note}</p>
+                {isDownloadFailed && downloadState?.error ? (
+                  <p className="muted-text" style={{ color: "var(--error, #e26d6d)" }}>{downloadState.error}</p>
+                ) : null}
+                <div className="button-row">
+                  {isComplete ? (
+                    <button className="primary-button" type="button" onClick={() => onOpenVideoStudio(variant.id)}>
+                      Generate
+                    </button>
+                  ) : isDownloading ? (
+                    <button className="secondary-button" type="button" onClick={() => onCancelVideoDownload(variant.repo)}>
+                      Pause
+                    </button>
+                  ) : isPaused ? (
+                    <button className="secondary-button" type="button" onClick={() => onVideoDownload(variant.repo)}>
+                      Resume
+                    </button>
+                  ) : (
+                    <button className="secondary-button" type="button" onClick={() => onVideoDownload(variant.repo)}>
+                      {isDownloadFailed ? "Retry" : isPartial ? "Resume Download" : "Download"}
+                    </button>
+                  )}
+                  {isDownloading || canDeleteLocalData ? (
+                    <button className="secondary-button danger-button" type="button" onClick={() => onDeleteVideoDownload(variant.repo)}>
+                      {isDownloading ? "Cancel" : "Delete"}
+                    </button>
+                  ) : null}
+                  {variant.localPath ? (
+                    <button
+                      className="secondary-button icon-button"
+                      type="button"
+                      title={fileRevealLabel}
+                      onClick={() => onRevealPath(variant.localPath as string)}
+                    >
+                      <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
+                        <path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6" />
+                        <polyline points="15 3 21 3 21 9" />
+                        <line x1="10" y1="14" x2="21" y2="3" />
+                      </svg>
+                    </button>
+                  ) : null}
+                  <button className="secondary-button" type="button" onClick={() => onOpenExternalUrl(variant.link)}>
+                    Model Card
+                  </button>
+                </div>
+              </article>
+            );
+          })}
+        </div>
+      )}
+    </div>
+  );
+}
diff --git a/src/features/video/VideoGalleryTab.tsx b/src/features/video/VideoGalleryTab.tsx
new file mode 100644
index 0000000..7bc0e04
--- /dev/null
+++ b/src/features/video/VideoGalleryTab.tsx
@@ -0,0 +1,184 @@
+import { useEffect, useRef, useState } from "react";
+import { Panel } from "../../components/Panel";
+import { fetchVideoOutputBlobUrl } from "../../api";
+import type { TabId, VideoOutputArtifact } from "../../types";
+import { formatImageTimestamp, number } from "../../utils";
+
+export interface VideoGalleryTabProps {
+  videoOutputs: VideoOutputArtifact[];
+  videoBusy: boolean;
+  onActiveTabChange: (tab: TabId) => void;
+  onOpenVideoStudio: (modelId?: string) => void;
+  onRevealPath: (path: string) => void;
+  onDeleteVideoArtifact: (artifactId: string) => void;
+}
+
+export function VideoGalleryTab({
+  videoOutputs,
+  videoBusy,
+  onActiveTabChange,
+  onOpenVideoStudio,
+  onRevealPath,
+  onDeleteVideoArtifact,
+}: VideoGalleryTabProps) {
+  return (
+    <div className="content-grid image-page-grid">
+      <Panel
+        title="Video Gallery"
+        subtitle={
+          videoOutputs.length > 0
+            ? `${videoOutputs.length} saved clip${videoOutputs.length === 1 ? "" : "s"}`
+            : "Saved video renders"
+        }
+        className="span-2"
+        actions={
+          <div className="button-row">
+            <button className="secondary-button" type="button" onClick={() => onOpenVideoStudio()}>
+              Open Studio
+            </button>
+            <button className="secondary-button" type="button" onClick={() => onActiveTabChange("video-models")}>
+              Installed
+            </button>
+          </div>
+        }
+      >
+        {videoOutputs.length === 0 ? (
+          <div className="empty-state image-empty-state">
+            <p>
+              No video outputs yet. Load a model in the Studio and generate a clip — finished renders will
+              land here.
+            </p>
+            <div className="button-row" style={{ marginTop: 12 }}>
+              <button className="secondary-button" type="button" onClick={() => onOpenVideoStudio()}>
+                Open Video Studio
+              </button>
+            </div>
+          </div>
+        ) : (
+          <div className="image-gallery-grid">
+            {videoOutputs.map((artifact) => (
+              <VideoOutputCard
+                key={artifact.artifactId}
+                artifact={artifact}
+                videoBusy={videoBusy}
+                onRevealPath={onRevealPath}
+                onDelete={onDeleteVideoArtifact}
+              />
+            ))}
+          </div>
+        )}
+      </Panel>
+    </div>
+  );
+}
+
+interface VideoOutputCardProps {
+  artifact: VideoOutputArtifact;
+  videoBusy: boolean;
+  onRevealPath: (path: string) => void;
+  onDelete: (artifactId: string) => void;
+}
+
+/**
+ * Fetches the mp4 as an authenticated blob and streams it via an object URL.
+ * We can't just hit /file directly as a plain <video src=...> because the
+ * backend requires the x-chaosengine-token header — the <video> tag won't
+ * send it. Fetching as a blob + createObjectURL sidesteps that cleanly and
+ * still lets the browser seek / buffer normally.
+ */
+function VideoOutputCard({ artifact, videoBusy, onRevealPath, onDelete }: VideoOutputCardProps) {
+  const [videoUrl, setVideoUrl] = useState<string | null>(null);
+  const [loadError, setLoadError] = useState<string | null>(null);
+  const activeUrlRef = useRef<string | null>(null);
+
+  useEffect(() => {
+    let cancelled = false;
+    setLoadError(null);
+    fetchVideoOutputBlobUrl(artifact.artifactId)
+      .then((url) => {
+        if (cancelled) {
+          URL.revokeObjectURL(url);
+          return;
+        }
+        if (activeUrlRef.current) URL.revokeObjectURL(activeUrlRef.current);
+        activeUrlRef.current = url;
+        setVideoUrl(url);
+      })
+      .catch((err: unknown) => {
+        if (cancelled) return;
+        setLoadError(err instanceof Error ? err.message : "Could not load video.");
+      });
+    return () => {
+      cancelled = true;
+      if (activeUrlRef.current) {
+        URL.revokeObjectURL(activeUrlRef.current);
+        activeUrlRef.current = null;
+      }
+    };
+  }, [artifact.artifactId]);
+
+  const clipSeconds = artifact.clipDurationSeconds || artifact.numFrames / Math.max(1, artifact.fps);
+  return (
+    <article className="image-output-card">
+      {videoUrl ? (
+        <video
+          className="image-output-preview"
+          src={videoUrl}
+          controls
+          loop
+          muted
+          playsInline
+          preload="metadata"
+        />
+      ) : (
+        <div className="image-output-preview" style={{ display: "grid", placeItems: "center", minHeight: 160 }}>
+          <span className="muted-text">{loadError ?? "Loading clip..."}</span>
+        </div>
+      )}
+      <div className="image-output-card-body">
+        <div className="image-output-card-head">
+          <strong>{artifact.modelName}</strong>
+          <span className="badge muted">{formatImageTimestamp(artifact.createdAt)}</span>
+        </div>
+        {artifact.runtimeLabel ? (
+          <div className="chip-row">
+            <span className="badge subtle">{artifact.runtimeLabel}</span>
+            <span className="badge muted">
+              {artifact.width} × {artifact.height}
+            </span>
+          </div>
+        ) : null}
+        <p className="image-output-prompt">{artifact.prompt}</p>
+        {artifact.runtimeNote ? <p className="muted-text">{artifact.runtimeNote}</p> : null}
+        <div className="image-output-meta">
+          <span>{artifact.numFrames} frames</span>
+          <span>{artifact.fps} fps</span>
+          <span>{number(clipSeconds)}s clip</span>
+          <span>{artifact.steps} steps</span>
+          <span>CFG {artifact.guidance}</span>
+          <span>Seed {artifact.seed}</span>
+          <span>{number(artifact.durationSeconds)}s render</span>
+        </div>
+        <div className="button-row">
+          {artifact.videoPath ? (
+            <button
+              className="secondary-button"
+              type="button"
+              onClick={() => onRevealPath(artifact.videoPath as string)}
+            >
+              Reveal File
+            </button>
+          ) : null}
+          <button
+            className="secondary-button"
+            type="button"
+            disabled={videoBusy}
+            onClick={() => onDelete(artifact.artifactId)}
+          >
+            Delete
+          </button>
+        </div>
+      </div>
+    </article>
+  );
+}
diff --git a/src/features/video/VideoModelsTab.tsx b/src/features/video/VideoModelsTab.tsx
new file mode 100644
index 0000000..83760b3
--- /dev/null
+++ b/src/features/video/VideoModelsTab.tsx
@@ -0,0 +1,190 @@
+import { Panel } from "../../components/Panel";
+import type { DownloadStatus } from "../../api";
+import type {
+  TabId,
+  VideoModelFamily,
+  VideoModelVariant,
+  VideoRuntimeStatus,
+} from "../../types";
+import { downloadProgressLabel, formatReleaseLabel, number, videoPrimarySizeLabel } from "../../utils";
+
+export interface VideoModelsTabProps {
+  installedVideoVariants: VideoModelVariant[];
+  videoCatalog: VideoModelFamily[];
+  activeVideoDownloads: Record<string, DownloadStatus>;
+  videoRuntimeStatus: VideoRuntimeStatus;
+  videoBusy: boolean;
+  videoBusyLabel: string | null;
+  loadedVideoVariant: VideoModelVariant | null;
+  fileRevealLabel: string;
+  onActiveTabChange: (tab: TabId) => void;
+  onOpenVideoStudio: (modelId?: string) => void;
+  onVideoDownload: (repo: string) => void;
+  onCancelVideoDownload: (repo: string) => void;
+  onDeleteVideoDownload: (repo: string) => void;
+  onPreloadVideoModel: (variant: VideoModelVariant) => void;
+  onUnloadVideoModel: (variant?: VideoModelVariant) => void;
+  onOpenExternalUrl: (url: string) => void;
+  onRevealPath: (path: string) => void;
+}
+
+export function VideoModelsTab({
+  installedVideoVariants,
+  videoCatalog,
+  activeVideoDownloads,
+  videoRuntimeStatus,
+  videoBusy,
+  videoBusyLabel,
+  loadedVideoVariant,
+  fileRevealLabel,
+  onActiveTabChange,
+  onOpenVideoStudio,
+  onVideoDownload,
+  onCancelVideoDownload,
+  onDeleteVideoDownload,
+  onPreloadVideoModel,
+  onUnloadVideoModel,
+  onOpenExternalUrl,
+  onRevealPath,
+}: VideoModelsTabProps) {
+  return (
+    <div className="content-grid image-page-grid">
+      <Panel
+        title="Installed Video Models"
+        subtitle={installedVideoVariants.length > 0
+          ? `${installedVideoVariants.length} model${installedVideoVariants.length !== 1 ? "s" : ""} with local data`
+          : "No video models detected locally yet"}
+        className="span-2"
+        actions={
+          <button className="secondary-button" type="button" onClick={() => onActiveTabChange("video-discover")}>
+            Browse Catalog
+          </button>
+        }
+      >
+        {installedVideoVariants.length === 0 ? (
+          <div className="empty-state image-empty-state">
+            <p>Download a video model from Video Discover to get started.</p>
+          </div>
+        ) : (
+          <div className="image-library-grid">
+            {installedVideoVariants.map((variant) => {
+              const family = videoCatalog.find((item) =>
+                item.variants.some((candidate) => candidate.id === variant.id),
+              );
+              const isComplete = variant.availableLocally;
+              const isPartial = !isComplete && variant.hasLocalData;
+              const downloadState = activeVideoDownloads[variant.repo];
+              const isDownloading = downloadState?.state === "downloading";
+              const isPaused = downloadState?.state === "cancelled";
+              const isDownloadComplete = downloadState?.state === "completed";
+              const isDownloadFailed = downloadState?.state === "failed";
+              const canDeleteLocalData = Boolean(
+                isComplete || isDownloadComplete || isPaused || isDownloadFailed || isPartial,
+              );
+              const isLoadedInMemory = loadedVideoVariant?.id === variant.id;
+              const canPreload = isComplete && videoRuntimeStatus.realGenerationAvailable && !isLoadedInMemory;
+              return (
+                <article key={variant.id} className="image-library-card">
+                  <div className="image-library-card-head">
+                    <div>
+                      <h3>{variant.name}</h3>
+                      <p>{family?.name ?? variant.provider}</p>
+                    </div>
+                    {isLoadedInMemory ? (
+                      <span className="badge accent">In Memory</span>
+                    ) : isComplete || isDownloadComplete ? (
+                      <span className="badge success">Installed</span>
+                    ) : isDownloading ? (
+                      <span className="badge accent">{downloadProgressLabel(downloadState)}</span>
+                    ) : isPaused ? (
+                      <span className="badge warning">{downloadProgressLabel(downloadState)}</span>
+                    ) : isDownloadFailed ? (
+                      <span className="badge warning">Download Failed</span>
+                    ) : isPartial ? (
+                      <span className="badge warning">Incomplete</span>
+                    ) : null}
+                  </div>
+                  <div className="image-library-stats">
+                    <span>{videoPrimarySizeLabel(variant)}</span>
+                    <span>{variant.recommendedResolution}</span>
+                    <span>{number(variant.defaultDurationSeconds)}s clip</span>
+                    {formatReleaseLabel(variant.releaseLabel, variant.releaseDate) ? (
+                      <span>{formatReleaseLabel(variant.releaseLabel, variant.releaseDate)}</span>
+                    ) : null}
+                    {variant.styleTags.slice(0, 3).map((tag) => (
+                      <span key={tag} className="badge subtle">{tag}</span>
+                    ))}
+                  </div>
+                  {isDownloadFailed && downloadState?.error ? (
+                    <p className="muted-text" style={{ color: "var(--error, #e26d6d)" }}>{downloadState.error}</p>
+                  ) : null}
+                  <div className="button-row">
+                    {isComplete || isDownloadComplete ? (
+                      <button className="primary-button" type="button" onClick={() => onOpenVideoStudio(variant.id)}>
+                        Open in Studio
+                      </button>
+                    ) : isDownloading ? (
+                      <button className="secondary-button" type="button" onClick={() => onCancelVideoDownload(variant.repo)}>
+                        Pause
+                      </button>
+                    ) : isPaused ? (
+                      <button className="secondary-button" type="button" onClick={() => onVideoDownload(variant.repo)}>
+                        Resume
+                      </button>
+                    ) : (
+                      <button className="secondary-button" type="button" onClick={() => onVideoDownload(variant.repo)}>
+                        {isDownloadFailed ? "Retry" : isPartial ? "Resume Download" : "Download"}
+                      </button>
+                    )}
+                    {canPreload ? (
+                      <button
+                        className="secondary-button"
+                        type="button"
+                        disabled={videoBusy}
+                        onClick={() => onPreloadVideoModel(variant)}
+                      >
+                        {videoBusy && videoBusyLabel?.includes(variant.name) ? "Loading..." : "Load into memory"}
+                      </button>
+                    ) : null}
+                    {isLoadedInMemory ? (
+                      <button
+                        className="secondary-button"
+                        type="button"
+                        disabled={videoBusy}
+                        onClick={() => onUnloadVideoModel(variant)}
+                      >
+                        {videoBusy && videoBusyLabel?.includes("Unloading") ? "Unloading..." : "Unload"}
+                      </button>
+                    ) : null}
+                    {isDownloading || canDeleteLocalData ? (
+                      <button className="secondary-button danger-button" type="button" onClick={() => onDeleteVideoDownload(variant.repo)}>
+                        {isDownloading ? "Cancel" : "Delete"}
+                      </button>
+                    ) : null}
+                    {variant.localPath ? (
+                      <button
+                        className="secondary-button icon-button"
+                        type="button"
+                        title={fileRevealLabel}
+                        onClick={() => onRevealPath(variant.localPath as string)}
+                      >
+                        <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
+                          <path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6" />
+                          <polyline points="15 3 21 3 21 9" />
+                          <line x1="10" y1="14" x2="21" y2="3" />
+                        </svg>
+                      </button>
+                    ) : null}
+                    <button className="secondary-button" type="button" onClick={() => onOpenExternalUrl(variant.link)}>
+                      Model Card
+                    </button>
+                  </div>
+                </article>
+              );
+            })}
+          </div>
+        )}
+      </Panel>
+    </div>
+  );
+}
diff --git a/src/features/video/VideoPlaceholderTab.tsx b/src/features/video/VideoPlaceholderTab.tsx
new file mode 100644
index 0000000..a7d1299
--- /dev/null
+++ b/src/features/video/VideoPlaceholderTab.tsx
@@ -0,0 +1,112 @@
+import { Panel } from "../../components/Panel";
+
+export type VideoPlaceholderVariant = "models" | "discover" | "studio" | "gallery";
+
+interface PlannedEngine {
+  name: string;
+  provider: string;
+  note: string;
+  size: string;
+}
+
+const plannedEngines: PlannedEngine[] = [
+  {
+    name: "LTX-Video",
+    provider: "Lightricks",
+    size: "~2 GB",
+    note: "Fast, lightweight — designed for consumer hardware. Good first target.",
+  },
+  {
+    name: "Wan 2.2",
+    provider: "Alibaba",
+    size: "~14 GB",
+    note: "Strong text-to-video quality. Runs on 24GB+ VRAM or Apple Silicon with unified memory.",
+  },
+  {
+    name: "HunyuanVideo",
+    provider: "Tencent",
+    size: "~25 GB",
+    note: "High fidelity, longer clips. Needs 40GB+ class hardware.",
+  },
+  {
+    name: "Mochi 1",
+    provider: "Genmo",
+    size: "~10 GB",
+    note: "Open-weight with competitive motion quality. Apache 2.0 licence.",
+  },
+];
+
+const copy: Record<VideoPlaceholderVariant, { title: string; subtitle: string; body: string }> = {
+  models: {
+    title: "Installed Video Models",
+    subtitle: "Models on this machine",
+    body: "No video models detected locally yet. Once the video engine ships, downloaded weights will show up here alongside their quantisation, size on disk, and runtime target.",
+  },
+  discover: {
+    title: "Discover Video Models",
+    subtitle: "Browse and download video generators",
+    body: "The catalogue below previews the engines we plan to support first. Download buttons will light up once the video runtime is wired in.",
+  },
+  studio: {
+    title: "Video Studio",
+    subtitle: "Prompt, generate, and iterate",
+    body: "Studio will host the prompt editor, aspect ratio and duration controls, seed pinning, and per-frame previews. Sit tight — no engine loaded yet.",
+  },
+  gallery: {
+    title: "Video Gallery",
+    subtitle: "Saved outputs and filters",
+    body: "Finished renders will land here with filters by model, duration, and resolution. Drag-to-reorder and favourites are on the roadmap.",
+  },
+};
+
+interface VideoPlaceholderTabProps {
+  variant: VideoPlaceholderVariant;
+}
+
+export function VideoPlaceholderTab({ variant }: VideoPlaceholderTabProps) {
+  const { title, subtitle, body } = copy[variant];
+  const showCatalog = variant === "discover" || variant === "models";
+
+  return (
+    <div className="content-grid image-page-grid">
+      <Panel title={title} subtitle={subtitle} className="span-2">
+        <div className="empty-state">
+          <p>
+            <span className="badge warning" style={{ marginRight: 8 }}>Coming soon</span>
+            {body}
+          </p>
+          <p className="muted-text" style={{ marginTop: 12 }}>
+            Local video generation is on the ChaosEngineAI roadmap. This tab is a placeholder so we can ship
+            the routing and UX shell early — the inference engine will slot in behind it.
+          </p>
+        </div>
+      </Panel>
+
+      {showCatalog ? (
+        <Panel
+          title="Planned engines"
+          subtitle="Candidates we're evaluating for the first release"
+          className="span-2"
+        >
+          <div className="image-library-grid">
+            {plannedEngines.map((engine) => (
+              <article key={engine.name} className="image-library-card">
+                <div className="image-library-card-head">
+                  <div>
+                    <h3>{engine.name}</h3>
+                    <p>{engine.provider}</p>
+                  </div>
+                  <span className="badge subtle">Planned</span>
+                </div>
+                <div className="image-library-stats">
+                  <span>{engine.size}</span>
+                </div>
+                <p className="muted-text">{engine.note}</p>
+              </article>
+            ))}
+          </div>
+        </Panel>
+      ) : null}
+    </div>
+  );
+}
diff --git a/src/features/video/VideoStudioTab.tsx b/src/features/video/VideoStudioTab.tsx
new file mode 100644
index 0000000..988405a
--- /dev/null
+++ b/src/features/video/VideoStudioTab.tsx
@@ -0,0 +1,788 @@
+import { useEffect, useMemo, useState } from "react";
+import { Panel } from "../../components/Panel";
+import type { DownloadStatus, InstallResult } from "../../api";
+import type {
+  TabId,
+  TauriBackendInfo,
+  VideoModelFamily,
+  VideoModelVariant,
+  VideoRuntimeStatus,
+} from "../../types";
+import {
+  assessVideoGenerationSafety,
+  defaultVideoVariantForFamily,
+  downloadProgressLabel,
+  number,
+  sizeLabel,
+} from "../../utils";
+
+export interface VideoStudioTabProps {
+  videoCatalog: VideoModelFamily[];
+  selectedVideoModelId: string;
+  onSelectedVideoModelIdChange: (id: string) => void;
+  selectedVideoVariant: VideoModelVariant | null;
+  selectedVideoFamily: VideoModelFamily | null;
+  selectedVideoLoaded: boolean;
+  selectedVideoWillLoadOnGenerate: boolean;
+  videoRuntimeLoadedDifferentModel: boolean;
+  loadedVideoVariant: VideoModelVariant | null;
+  videoRuntimeStatus: VideoRuntimeStatus;
+  tauriBackend: TauriBackendInfo | null;
+  busy: boolean;
+  busyAction: string | null;
+  videoBusy: boolean;
+  videoBusyLabel: string | null;
+  backendOnline: boolean;
+  activeVideoDownloads: Record<string, DownloadStatus>;
+  videoPrompt: string;
+  onVideoPromptChange: (value: string) => void;
+  videoNegativePrompt: string;
+  onVideoNegativePromptChange: (value: string) => void;
+  videoUseRandomSeed: boolean;
+  onVideoUseRandomSeedChange: (value: boolean) => void;
+  videoSeedInput: string;
+  onVideoSeedInputChange: (value: string) => void;
+  videoWidth: number;
+  onVideoWidthChange: (value: number) => void;
+  videoHeight: number;
+  onVideoHeightChange: (value: number) => void;
+  videoNumFrames: number;
+  onVideoNumFramesChange: (value: number) => void;
+  videoFps: number;
+  onVideoFpsChange: (value: number) => void;
+  videoSteps: number;
+  onVideoStepsChange: (value: number) => void;
+  videoGuidance: number;
+  onVideoGuidanceChange: (value: number) => void;
+  onActiveTabChange: (tab: TabId) => void;
+  onPreloadVideoModel: (variant: VideoModelVariant) => void;
+  onUnloadVideoModel: (variant?: VideoModelVariant) => void;
+  onVideoDownload: (repo: string) => void;
+  onGenerateVideo: () => void;
+  onOpenExternalUrl: (url: string) => void;
+  onRestartServer: () => void;
+  onInstallVideoOutputDeps: (packages?: readonly string[]) => Promise<InstallResult>;
+}
+
+// Pipeline-specific tokenizer / text-encoder packages that diffusers loads
+// lazily — surfaced by the runtime probe via missingDependencies. Mirrors
+// _VIDEO_MODEL_DEPS in backend_service/video_runtime.py so the Studio knows
+// which "missing dep" chips it can offer a one-click install for.
+const KNOWN_INSTALLABLE_VIDEO_DEPS: ReadonlySet<string> = new Set([
+  "imageio",
+  "imageio-ffmpeg",
+  "tiktoken",
+  "sentencepiece",
+  "protobuf",
+  "ftfy",
+]);
+
+// Numeric input handling that tolerates transient empty states during editing.
+// The naive pattern ``onChange={e => setValue(Number(e.target.value) || fallback)}``
+// treats an empty string as ``0`` and snaps back to the fallback — which means
+// the user can never delete the last digit of a value (they see the default
+// reappear). Instead we carry ``NaN`` as "user is mid-edit / field is empty",
+// render it as "" in the input, and on blur snap to the fallback if still
+// invalid. ``handleVideoGenerate`` + ``clampNumFrames`` defend against any
+// ``NaN`` that slips through to the payload.
+function onNumericChange(
+  event: React.ChangeEvent<HTMLInputElement>,
+  setter: (value: number) => void,
+): void {
+  const raw = event.target.value;
+  if (raw === "") {
+    setter(Number.NaN);
+    return;
+  }
+  const parsed = Number(raw);
+  if (Number.isFinite(parsed)) setter(parsed);
+}
+
+function onNumericBlur(
+  current: number,
+  setter: (value: number) => void,
+  fallback: number,
+  minimum: number = 1,
+): void {
+  if (!Number.isFinite(current) || current < minimum) setter(fallback);
+}
+
+function displayNumber(value: number): number | string {
+  return Number.isFinite(value) ? value : "";
+}
+
+export function VideoStudioTab({
+  videoCatalog,
+  selectedVideoModelId,
+  onSelectedVideoModelIdChange,
+  selectedVideoVariant,
+  selectedVideoFamily,
+  selectedVideoLoaded,
+  selectedVideoWillLoadOnGenerate,
+  videoRuntimeLoadedDifferentModel,
+  loadedVideoVariant,
+  videoRuntimeStatus,
+  tauriBackend,
+  busy,
+  busyAction,
+  videoBusy,
+  videoBusyLabel,
+  backendOnline,
+  activeVideoDownloads,
+  videoPrompt,
+  onVideoPromptChange,
+  videoNegativePrompt,
+  onVideoNegativePromptChange,
+  videoUseRandomSeed,
+  onVideoUseRandomSeedChange,
+  videoSeedInput,
+  onVideoSeedInputChange,
+  videoWidth,
+  onVideoWidthChange,
+  videoHeight,
+  onVideoHeightChange,
+  videoNumFrames,
+  onVideoNumFramesChange,
+  videoFps,
+  onVideoFpsChange,
+  videoSteps,
+  onVideoStepsChange,
+  videoGuidance,
+  onVideoGuidanceChange,
+  onActiveTabChange,
+  onPreloadVideoModel,
+  onUnloadVideoModel,
+  onVideoDownload,
+  onGenerateVideo,
+  onOpenExternalUrl,
+  onRestartServer,
+  onInstallVideoOutputDeps,
+}: VideoStudioTabProps) {
+  const [installingOutputDeps, setInstallingOutputDeps] = useState(false);
+  const missingDependencies = videoRuntimeStatus.missingDependencies ?? [];
+  // imageio + imageio-ffmpeg are the two pip packages diffusers video
+  // pipelines need to export mp4s. Everything else we surface as a badge;
+  // these two get a dedicated install button because they're the thing that
+  // actually blocks "Generate video" from producing an output for ANY model.
+  const mp4EncoderMissing = missingDependencies.some(
+    (dep) => dep === "imageio" || dep === "imageio-ffmpeg",
+  );
+  // Tokenizer / text-encoder packages individual pipelines need lazily —
+  // tiktoken for LTX-Video, sentencepiece for Wan / HunyuanVideo / CogVideoX
+  // / Mochi, plus the protobuf + ftfy support libs. We list them out as a
+  // single "Install missing video dependencies" button so the user doesn't
+  // get a "tiktoken is required" mid-generate error after waiting on a long
+  // model preload.
+  const missingTokenizerDeps = missingDependencies.filter(
+    (dep) => KNOWN_INSTALLABLE_VIDEO_DEPS.has(dep) && dep !== "imageio" && dep !== "imageio-ffmpeg",
+  );
+  const otherMissingDependencies = missingDependencies.filter(
+    (dep) => !KNOWN_INSTALLABLE_VIDEO_DEPS.has(dep),
+  );
+
+  async function handleInstallOutputDeps() {
+    if (installingOutputDeps) return;
+    setInstallingOutputDeps(true);
+    try {
+      await onInstallVideoOutputDeps();
+    } finally {
+      setInstallingOutputDeps(false);
+    }
+  }
+
+  async function handleInstallTokenizerDeps() {
+    if (installingOutputDeps) return;
+    if (missingTokenizerDeps.length === 0) return;
+    setInstallingOutputDeps(true);
+    try {
+      await onInstallVideoOutputDeps(missingTokenizerDeps);
+    } finally {
+      setInstallingOutputDeps(false);
+    }
+  }
+  // Only offer variants the user can actually generate with. We include
+  // models that are currently downloading because the user will want to keep
+  // them selected while the download finishes. Everything else lives in
+  // Discover / My Models.
+  const studioFamilies = useMemo(
+    () =>
+      videoCatalog
+        .map((family) => ({
+          ...family,
+          variants: family.variants.filter((variant) => {
+            if (variant.availableLocally) return true;
+            if (variant.hasLocalData) return true;
+            const downloadState = activeVideoDownloads[variant.repo];
+            return downloadState?.state === "downloading" || downloadState?.state === "completed";
+          }),
+        }))
+        .filter((family) => family.variants.length > 0),
+    [videoCatalog, activeVideoDownloads],
+  );
+  const hasAnyInstalled = studioFamilies.length > 0;
+
+  // Ensure a valid model is selected once the catalog loads. Prefer an
+  // installed model; fall back to the first catalog entry so the studio
+  // still renders a stub when nothing is downloaded yet.
+  useEffect(() => {
+    if (selectedVideoModelId) {
+      const stillValid = videoCatalog.some((family) =>
+        family.variants.some((variant) => variant.id === selectedVideoModelId),
+      );
+      if (stillValid) return;
+    }
+    const installed = studioFamilies[0]?.variants[0];
+    if (installed?.id) {
+      onSelectedVideoModelIdChange(installed.id);
+      return;
+    }
+    const fallback = defaultVideoVariantForFamily(videoCatalog[0]);
+    if (fallback?.id) onSelectedVideoModelIdChange(fallback.id);
+  }, [selectedVideoModelId, videoCatalog, studioFamilies, onSelectedVideoModelIdChange]);
+
+  const downloadState = useMemo(
+    () => (selectedVideoVariant ? activeVideoDownloads[selectedVideoVariant.repo] : undefined),
+    [activeVideoDownloads, selectedVideoVariant],
+  );
+  const isDownloading = downloadState?.state === "downloading";
+  const isDownloaded =
+    !!selectedVideoVariant && (selectedVideoVariant.availableLocally || downloadState?.state === "completed");
+  const hasPrompt = videoPrompt.trim().length > 0;
+  const generateButtonLabel =
+    videoBusy && videoBusyLabel?.startsWith("Generating")
+      ? videoBusyLabel
+      : "Generate video";
+  // We compute the disable *reason* (not just the boolean) so the user can see
+  // inline why a previous failure might have left the button in a stuck state —
+  // the hover-only tooltip wasn't enough ("generate stays disabled after a Wan
+  // crash" bug report, April 2026). ``null`` means enabled.
+  const generateDisabledReason: string | null = !selectedVideoVariant
+    ? "Choose a video model first."
+    : !isDownloaded
+      ? `${selectedVideoVariant.name} is not installed locally yet.`
+      : !videoRuntimeStatus.realGenerationAvailable
+        ? (videoRuntimeStatus.message || "Video runtime is not ready.")
+        : !hasPrompt
+          ? "Write a prompt before generating."
+          : !backendOnline
+            ? "Backend is offline."
+            : videoBusy
+              ? (videoBusyLabel ?? "Busy…")
+              : null;
+  const generateTitle = generateDisabledReason ?? "Start generating this clip.";
+  const generationDisabled = generateDisabledReason !== null;
+
+  // Safety estimate for the chosen width × height × frames against the active
+  // device. We surface this *before* the user hits Generate because on Apple
+  // Silicon the failure mode is a hard sidecar crash (MPS assertion → Tauri
+  // restart loop), not a graceful error — by the time the user sees "Load
+  // failed" in the runtime status, the process has already died. See
+  // ``assessVideoGenerationSafety`` for the heuristic and the bug it traces.
+  //
+  // We pass the selected variant's ``sizeGb`` through as ``baseModelFootprintGb``
+  // so the estimate accounts for the dominant cost on MPS — weights + text
+  // encoder + VAE sitting in unified memory — rather than estimating only
+  // the attention kernel peak. Wan 2.1 T2V 1.3B is the key case: its 16 GB
+  // on-disk footprint inflates to ~23 GB resident, which is the actual
+  // reason it detonates 64 GB Macs at modest frame counts.
+  const generationSafety = useMemo(
+    () =>
+      assessVideoGenerationSafety({
+        width: videoWidth,
+        height: videoHeight,
+        numFrames: videoNumFrames,
+        device: videoRuntimeStatus.device,
+        deviceMemoryGb: videoRuntimeStatus.deviceMemoryGb,
+        baseModelFootprintGb: selectedVideoVariant?.sizeGb,
+      }),
+    [
+      videoWidth,
+      videoHeight,
+      videoNumFrames,
+      videoRuntimeStatus.device,
+      videoRuntimeStatus.deviceMemoryGb,
+      selectedVideoVariant?.sizeGb,
+    ],
+  );
+
+  // Format GB with one decimal for small numbers so 2.3 GB / 7.5 GB read
+  // clearly, but drop the decimal once we're at 10+ (no user needs "14.0 GB").
+  const formatGb = (gb: number): string => (gb >= 10 ? `${gb.toFixed(0)} GB` : `${gb.toFixed(1)} GB`);
+
+  // A concise always-visible capacity label next to the generation knobs so
+  // the user can see at a glance how close to their limit they are. We
+  // surface it even when ``riskLevel === "safe"`` so it serves as
+  // reassurance ("this run wants 3 GB on 32 GB available") rather than only
+  // appearing when something is already wrong. When the model-footprint
+  // term is known (``modelFootprintGb > 0``), we show a breakdown so the
+  // user sees that "the model itself is eating 23 GB" rather than
+  // attributing the whole peak to their chosen frame count.
+  // Prefer the device the backend reported. When it's missing (probe never
+  // came back, "Failed to fetch" sticking) we fall through to the device
+  // bucket the safety helper inferred from the host OS — so a Windows
+  // RTX 4090 user doesn't see "Apple Silicon" while the backend is
+  // unreachable. We tag the inferred case so the user knows it's a guess.
+  const inferredDeviceLabel =
+    generationSafety.effectiveDevice === "cuda"
+      ? "GPU (detected)"
+      : generationSafety.effectiveDevice === "cpu"
+        ? "CPU (detected)"
+        : "Apple Silicon (detected)";
+  const deviceLabel = videoRuntimeStatus.device
+    ? videoRuntimeStatus.device.toUpperCase().startsWith("CUDA")
+      ? "GPU"
+      : videoRuntimeStatus.device.toUpperCase() === "MPS"
+        ? "Apple Silicon"
+        : videoRuntimeStatus.device.toUpperCase()
+    : inferredDeviceLabel;
+  // Mark the memory figure as a fallback when the backend didn't actually
+  // report it — e.g. a stale sidecar that pre-dates the deviceMemoryGb
+  // field (we shipped it mid-release cycle) or a platform where detection
+  // failed. Without this tag a user on a 64 GB M4 Max sees "16 GB total"
+  // and has no way to know the number is inferred, not measured. The "~"
+  // prefix + "(default)" suffix reads as "we're guessing" without scaring
+  // the user about a real hardware issue.
+  const backendReportedMemory =
+    videoRuntimeStatus.deviceMemoryGb != null
+    && Number.isFinite(videoRuntimeStatus.deviceMemoryGb)
+    && videoRuntimeStatus.deviceMemoryGb > 0;
+  const memoryLabel = backendReportedMemory
+    ? formatGb(generationSafety.deviceMemoryGb)
+    : `~${formatGb(generationSafety.deviceMemoryGb)} (default — restart backend for real detection)`;
+  const capacityLine =
+    generationSafety.modelFootprintGb > 0
+      ? `${deviceLabel} · ${memoryLabel} total · model ≈ ${formatGb(generationSafety.modelFootprintGb)}, this run peak ≈ ${formatGb(generationSafety.estimatedPeakGb)}`
+      : `${deviceLabel} · ${memoryLabel} total · this run peak ≈ ${formatGb(generationSafety.estimatedPeakGb)}`;
+
+  function handleApplySafeSettings(): void {
+    const suggestion = generationSafety.suggestion;
+    if (!suggestion) return;
+    onVideoWidthChange(suggestion.width);
+    onVideoHeightChange(suggestion.height);
+    onVideoNumFramesChange(suggestion.numFrames);
+  }
+
+  return (
+    <div className="content-grid image-page-grid">
+      <Panel
+        title="Video Studio"
+        subtitle={selectedVideoVariant?.name ?? "Choose a video model to get started"}
+        className="span-2"
+        actions={
+          <div className="button-row">
+            <button className="secondary-button" type="button" onClick={() => onActiveTabChange("video-discover")}>
+              Browse Catalog
+            </button>
+            <button className="secondary-button" type="button" onClick={() => onActiveTabChange("video-models")}>
+              Installed Models
+            </button>
+          </div>
+        }
+      >
+        <div className="callout image-callout image-runtime-callout">
+          <p>{videoRuntimeStatus.message}</p>
+          <div className="chip-row">
+            <span className={`badge ${videoRuntimeStatus.realGenerationAvailable ? "success" : "warning"}`}>
+              {videoRuntimeStatus.realGenerationAvailable ? "Real engine ready" : "Fallback active"}
+            </span>
+            <span className="badge muted">Engine: {videoRuntimeStatus.activeEngine}</span>
+            {videoRuntimeStatus.device ? <span className="badge muted">Device: {videoRuntimeStatus.device}</span> : null}
+            {loadedVideoVariant ? (
+              <span className="badge accent">Loaded: {loadedVideoVariant.name}</span>
+            ) : null}
+            {mp4EncoderMissing ? (
+              <span className="badge warning">mp4 encoder missing</span>
+            ) : null}
+            {missingTokenizerDeps.map((dependency) => (
+              <span key={dependency} className="badge warning">{dependency} missing</span>
+            ))}
+            {otherMissingDependencies.slice(0, 4).map((dependency) => (
+              <span key={dependency} className="badge subtle">{dependency}</span>
+            ))}
+          </div>
+          {mp4EncoderMissing ? (
+            <div className="image-runtime-actions">
+              <p className="muted-text">
+                Video generation needs imageio + imageio-ffmpeg to write mp4 files. Install them
+                into the backend environment now?
+              </p>
+              <button
+                className="primary-button"
+                type="button"
+                onClick={() => void handleInstallOutputDeps()}
+                disabled={installingOutputDeps || !backendOnline}
+              >
+                {installingOutputDeps ? "Installing..." : "Install mp4 encoder"}
+              </button>
+            </div>
+          ) : null}
+          {missingTokenizerDeps.length > 0 ? (
+            <div className="image-runtime-actions">
+              <p className="muted-text">
+                Some video models load tokenizer / text-encoder packages on demand. The
+                following are missing and would block generation: <strong>{missingTokenizerDeps.join(", ")}</strong>.
+                Install them now to avoid a mid-generate error.
+              </p>
+              <button
+                className="primary-button"
+                type="button"
+                onClick={() => void handleInstallTokenizerDeps()}
+                disabled={installingOutputDeps || !backendOnline}
+              >
+                {installingOutputDeps
+                  ? "Installing..."
+                  : `Install ${missingTokenizerDeps.join(" + ")}`}
+              </button>
+            </div>
+          ) : null}
+          {!videoRuntimeStatus.realGenerationAvailable ? (
+            <div className="image-runtime-actions">
+              <button className="secondary-button" type="button" onClick={() => onRestartServer()} disabled={busy}>
+                {busyAction === "Restarting server..." ? "Restarting..." : "Restart Backend"}
+              </button>
+            </div>
+          ) : null}
+        </div>
+
+        <div className="image-studio-grid" style={{ display: "grid", gap: "1rem", gridTemplateColumns: "1fr" }}>
+          <label>
+            Video Model
+            {hasAnyInstalled ? (
+              <select
+                className="text-input"
+                value={selectedVideoModelId}
+                onChange={(event) => onSelectedVideoModelIdChange(event.target.value)}
+              >
+                {studioFamilies.flatMap((family) =>
+                  family.variants.map((variant) => {
+                    const downloadState = activeVideoDownloads[variant.repo];
+                    const isDownloadingVariant = downloadState?.state === "downloading";
+                    const suffix = variant.availableLocally
+                      ? " (installed)"
+                      : isDownloadingVariant
+                        ? ` (${downloadProgressLabel(downloadState)})`
+                        : " (incomplete)";
+                    return (
+                      <option key={variant.id} value={variant.id}>
+                        {variant.name} — {family.name}
+                        {suffix}
+                      </option>
+                    );
+                  }),
+                )}
+              </select>
+            ) : (
+              <div className="callout image-callout">
+                <p>No video models installed yet. Browse the catalog to download one.</p>
+                <div className="button-row">
+                  <button
+                    className="primary-button"
+                    type="button"
+                    onClick={() => onActiveTabChange("video-discover")}
+                  >
+                    Open Video Discover
+                  </button>
+                </div>
+              </div>
+            )}
+          </label>
+
+          {selectedVideoVariant ? (
+            <div className="image-library-stats">
+              <span>{sizeLabel(selectedVideoVariant.sizeGb)}</span>
+              <span>{selectedVideoVariant.recommendedResolution}</span>
+              <span>{number(selectedVideoVariant.defaultDurationSeconds)}s clip</span>
+              <span className="badge subtle">{selectedVideoFamily?.name ?? selectedVideoVariant.provider}</span>
+              {isDownloaded ? (
+                <span className="badge success">Installed</span>
+              ) : isDownloading ? (
+                <span className="badge accent">{downloadProgressLabel(downloadState)}</span>
+              ) : (
+                <span className="badge warning">Not downloaded</span>
+              )}
+              {selectedVideoLoaded ? <span className="badge accent">In Memory</span> : null}
+              {videoRuntimeLoadedDifferentModel && loadedVideoVariant ? (
+                <span className="badge muted">Loaded model: {loadedVideoVariant.name}</span>
+              ) : null}
+            </div>
+          ) : null}
+
+          <label>
+            Prompt
+            <textarea
+              className="text-input"
+              rows={3}
+              value={videoPrompt}
+              onChange={(event) => onVideoPromptChange(event.target.value)}
+              placeholder="A cinematic drone shot of a misty pine forest at dawn..."
+            />
+          </label>
+
+          <label>
+            Negative prompt
+            <input
+              className="text-input"
+              type="text"
+              value={videoNegativePrompt}
+              onChange={(event) => onVideoNegativePromptChange(event.target.value)}
+              placeholder="Optional: things to avoid (low quality, watermark, etc.)"
+            />
+          </label>
+
+          {/*
+            Per-run knobs. We expose these because Wan 2.1 / LTX defaults at
+            full resolution + step count can detonate Apple Silicon's MPS
+            backend (the attention QK^T matrix scales with width × height ×
+            num_frames squared — a 73 GB allocation killed the sidecar at
+            832x480 × 96 frames × 50 steps during testing). Letting the user
+            dial down resolution / frames / steps is the only way to keep
+            consumer hardware in the safe envelope.
+
+            ``numFrames`` step is 4 because Wan-family pipelines require
+            ``(num_frames - 1) % 4 == 0``; the parent hook re-snaps on
+            generate as a defensive backstop.
+          */}
+          <div className="field-grid image-field-grid">
+            <label>
+              Width
+              <input
+                className="text-input"
+                type="number"
+                min={256}
+                max={2048}
+                step={64}
+                value={displayNumber(videoWidth)}
+                onChange={(event) => onNumericChange(event, onVideoWidthChange)}
+                onBlur={() => onNumericBlur(videoWidth, onVideoWidthChange, 832, 256)}
+              />
+            </label>
+            <label>
+              Height
+              <input
+                className="text-input"
+                type="number"
+                min={256}
+                max={2048}
+                step={64}
+                value={displayNumber(videoHeight)}
+                onChange={(event) => onNumericChange(event, onVideoHeightChange)}
+                onBlur={() => onNumericBlur(videoHeight, onVideoHeightChange, 480, 256)}
+              />
+            </label>
+            <label>
+              Frames
+              <input
+                className="text-input"
+                type="number"
+                min={1}
+                max={257}
+                step={4}
+                value={displayNumber(videoNumFrames)}
+                onChange={(event) => onNumericChange(event, onVideoNumFramesChange)}
+                onBlur={() => onNumericBlur(videoNumFrames, onVideoNumFramesChange, 33)}
+              />
+            </label>
+            <label>
+              FPS
+              <input
+                className="text-input"
+                type="number"
+                min={1}
+                max={60}
+                value={displayNumber(videoFps)}
+                onChange={(event) => onNumericChange(event, onVideoFpsChange)}
+                onBlur={() => onNumericBlur(videoFps, onVideoFpsChange, 24)}
+              />
+            </label>
+            <label>
+              Steps
+              <input
+                className="text-input"
+                type="number"
+                min={1}
+                max={100}
+                value={displayNumber(videoSteps)}
+                onChange={(event) => onNumericChange(event, onVideoStepsChange)}
+                onBlur={() => onNumericBlur(videoSteps, onVideoStepsChange, 30)}
+              />
+            </label>
+            <label>
+              Guidance
+              <input
+                className="text-input"
+                type="number"
+                min={1}
+                max={20}
+                step={0.5}
+                value={displayNumber(videoGuidance)}
+                onChange={(event) => onNumericChange(event, onVideoGuidanceChange)}
+                onBlur={() => onNumericBlur(videoGuidance, onVideoGuidanceChange, 5)}
+              />
+            </label>
+          </div>
+
+          {/*
+            Always-on "device capacity" line so the user sees their envelope
+            alongside the controls, not only when something's already gone
+            wrong. Pairs with the safety callout below when risk rises.
+          */}
+          <p className="muted-text" aria-live="polite">
+            {capacityLine}
+          </p>
+
+          {/*
+            Pre-flight safety callout. Surfaces the memory-budget heuristic
+            before the user hits Generate so they can recover by clicking
+            "Use safer settings" rather than triggering a sidecar crash +
+            restart loop. Scaled by ``deviceMemoryGb`` so a 64 GB Mac doesn't
+            see the same warnings as a 16 GB one, and scaled by the
+            selected model's ``sizeGb`` so the estimate reflects the real
+            memory pressure (weights + text encoder, not just attention).
+            See ``assessVideoGenerationSafety`` in ``src/utils/videos.ts``
+            for the heuristic and the bug it traces ("Wan 2.1 T2V 1.3B at
+            832×480 × 40 frames" detonation on 64 GB M4 Max, Apr 2026).
+
+            The "Use safer settings" button only shows when a per-request
+            tweak can actually recover. When the model itself is too big
+            for the device, the heuristic returns ``suggestion: null`` and
+            the callout explains that a smaller model is required —
+            clicking through to "480×320 × 17 frames" would just produce a
+            second crash, which is strictly worse than no button.
+          */}
+          {generationSafety.riskLevel !== "safe" ? (
+            <div
+              className={`callout image-callout ${
+                generationSafety.riskLevel === "danger" ? "error" : "warning"
+              }`}
+              role="alert"
+            >
+              <p>
+                <strong>
+                  {generationSafety.riskLevel === "danger"
+                    ? "Likely to crash the backend"
+                    : "Heads up — may struggle on this device"}
+                  :
+                </strong>{" "}
+                {generationSafety.reason}
+              </p>
+              {generationSafety.suggestion ? (
+                <div className="button-row">
+                  <button
+                    className="secondary-button"
+                    type="button"
+                    onClick={handleApplySafeSettings}
+                    disabled={videoBusy}
+                    title={`Apply ${generationSafety.suggestion.label}`}
+                  >
+                    Use safer settings ({generationSafety.suggestion.label})
+                  </button>
+                </div>
+              ) : (
+                <div className="button-row">
+                  <button
+                    className="secondary-button"
+                    type="button"
+                    onClick={() => onActiveTabChange("video-discover")}
+                    disabled={videoBusy}
+                  >
+                    Browse smaller models
+                  </button>
+                </div>
+              )}
+            </div>
+          ) : null}
+
+          <div className="button-row">
+            <label className="inline-label" style={{ display: "flex", alignItems: "center", gap: ".4rem" }}>
+              <input
+                type="checkbox"
+                checked={videoUseRandomSeed}
+                onChange={(event) => onVideoUseRandomSeedChange(event.target.checked)}
+              />
+              Random seed
+            </label>
+            {!videoUseRandomSeed ? (
+              <input
+                className="text-input"
+                type="number"
+                value={videoSeedInput}
+                onChange={(event) => onVideoSeedInputChange(event.target.value)}
+                placeholder="Seed (integer)"
+                style={{ maxWidth: 200 }}
+              />
+            ) : null}
+          </div>
+
+          <div className="button-row">
+            {selectedVideoVariant && !isDownloaded && !isDownloading ? (
+              <button
+                className="secondary-button"
+                type="button"
+                disabled={!backendOnline}
+                onClick={() => selectedVideoVariant && onVideoDownload(selectedVideoVariant.repo)}
+              >
+                Download model
+              </button>
+            ) : null}
+            {selectedVideoVariant && isDownloaded && !selectedVideoLoaded ? (
+              <button
+                className="secondary-button"
+                type="button"
+                disabled={videoBusy || !videoRuntimeStatus.realGenerationAvailable}
+                onClick={() => selectedVideoVariant && onPreloadVideoModel(selectedVideoVariant)}
+              >
+                {videoBusy && videoBusyLabel?.includes("Loading") ? videoBusyLabel : "Load into memory"}
+              </button>
+            ) : null}
+            {selectedVideoLoaded ? (
+              <button
+                className="secondary-button"
+                type="button"
+                disabled={videoBusy}
+                onClick={() => selectedVideoVariant && onUnloadVideoModel(selectedVideoVariant)}
+              >
+                {videoBusy && videoBusyLabel?.includes("Unloading") ? videoBusyLabel : "Unload"}
+              </button>
+            ) : null}
+            <button
+              className="primary-button"
+              type="button"
+              disabled={generationDisabled}
+              title={generateTitle}
+              onClick={() => onGenerateVideo()}
+            >
+              {generateButtonLabel}
+            </button>
+            {selectedVideoVariant ? (
+              <button
+                className="secondary-button"
+                type="button"
+                onClick={() => onOpenExternalUrl(selectedVideoVariant.link)}
+              >
+                Model Card
+              </button>
+            ) : null}
+          </div>
+
+          {/*
+            Make the disable reason visible even when the user isn't hovering
+            the button. A failure-recovery flow that left the button stuck
+            (real bug, April 2026) was only diagnosable via the tooltip, which
+            is easy to miss — this turns the same string into an always-on
+            callout so the root cause is obvious at a glance.
+          */}
+          {generateDisabledReason && !videoBusy ? (
+            <p className="muted-text">Generate disabled: {generateDisabledReason}</p>
+          ) : null}
+
+          {selectedVideoWillLoadOnGenerate ? (
+            <p className="muted-text">
+              The selected model will be loaded into memory on the next generate. First load can take a
+              minute for the larger variants.
+            </p>
+          ) : null}
+        </div>
+      </Panel>
+    </div>
+  );
+}
diff --git a/src/hooks/index.ts b/src/hooks/index.ts
index fa00587..e8f5ba8 100644
--- a/src/hooks/index.ts
+++ b/src/hooks/index.ts
@@ -2,6 +2,9 @@ export { useWorkspace } from "./useWorkspace";
 export { useModels } from "./useModels";
 export { useChat } from "./useChat";
 export { useImageState } from "./useImageState";
+export { useVideoState } from "./useVideoState";
 export { useBenchmarks } from "./useBenchmarks";
 export { useSettings } from "./useSettings";
 export { useServerLog } from "./useServerLog";
+export { useSidebarPrefs } from "./useSidebarPrefs";
+export { useGpuStatus } from "./useGpuStatus";
diff --git a/src/hooks/useGenerationProgress.ts b/src/hooks/useGenerationProgress.ts
new file mode 100644
index 0000000..6ffb639
--- /dev/null
+++ b/src/hooks/useGenerationProgress.ts
@@ -0,0 +1,62 @@
+import { useEffect, useRef, useState } from "react";
+import { getImageGenerationProgress, getVideoGenerationProgress } from "../api";
+import type { GenerationProgressSnapshot } from "../types";
+
+const POLL_INTERVAL_MS = 500;
+
+/**
+ * Polls the backend's image / video progress endpoint while a generation is
+ * in flight. The poll only runs while ``active`` is true so we don't pester
+ * the backend in the idle case — the modal flips ``active`` on as soon as it
+ * fires the generation request, and off again after the response lands.
+ *
+ * Returns ``null`` whenever the backend reports ``active: false`` so the
+ * LiveProgress component knows to fall back to its time-based estimates.
+ */
+export function useGenerationProgress(
+  kind: "image" | "video",
+  active: boolean,
+): GenerationProgressSnapshot | null {
+  const [snapshot, setSnapshot] = useState<GenerationProgressSnapshot | null>(null);
+  const cancelledRef = useRef(false);
+
+  useEffect(() => {
+    cancelledRef.current = false;
+    if (!active) {
+      // Reset so the *next* run starts from a clean slate rather than
+      // briefly flashing the previous run's last step.
+      setSnapshot(null);
+      return;
+    }
+
+    const fetcher = kind === "image" ? getImageGenerationProgress : getVideoGenerationProgress;
+    let timer: number | null = null;
+
+    const poll = async () => {
+      if (cancelledRef.current) return;
+      try {
+        const next = await fetcher();
+        if (cancelledRef.current) return;
+        setSnapshot(next.active ? next : null);
+      } catch {
+        // Transient network errors are common during a heavy generation —
+        // the worker thread can pause Python's event loop just long enough
+        // for a poll to fail. Silently keep the previous snapshot rather
+        // than blanking the bar.
+      }
+      if (cancelledRef.current) return;
+      timer = window.setTimeout(poll, POLL_INTERVAL_MS);
+    };
+
+    void poll();
+
+    return () => {
+      cancelledRef.current = true;
+      if (timer !== null) {
+        window.clearTimeout(timer);
+      }
+    };
+  }, [kind, active]);
+
+  return snapshot;
+}
diff --git a/src/hooks/useGpuStatus.ts b/src/hooks/useGpuStatus.ts
new file mode 100644
index 0000000..7825495
--- /dev/null
+++ b/src/hooks/useGpuStatus.ts
@@ -0,0 +1,91 @@
+import { useCallback, useEffect, useState } from "react";
+import { getGpuStatus, type GpuStatus } from "../api";
+import { isTransientNetworkError } from "../utils/runtime";
+
+const DISMISSED_KEY = "chaosengine.gpuStatus.dismissed.v1";
+
+// Poll the /api/system/gpu-status endpoint at startup until torch is imported
+// (the backend warms torch lazily, so the first few polls may come back with
+// torchImported=false even on boxes that have a working CUDA setup). Once
+// torch has been imported the answer won't change without a restart, so we
+// stop polling to avoid burning cycles.
+const POLL_INTERVAL_MS = 10_000;
+const MAX_POLLS = 30; // ~5 minutes — plenty for torch cold-start on Windows
+
+function readDismissed(): boolean {
+  if (typeof window === "undefined") return false;
+  try {
+    return window.localStorage.getItem(DISMISSED_KEY) === "1";
+  } catch {
+    return false;
+  }
+}
+
+function writeDismissed(value: boolean) {
+  if (typeof window === "undefined") return;
+  try {
+    if (value) {
+      window.localStorage.setItem(DISMISSED_KEY, "1");
+    } else {
+      window.localStorage.removeItem(DISMISSED_KEY);
+    }
+  } catch {
+    /* ignore */
+  }
+}
+
+export interface UseGpuStatus {
+  status: GpuStatus | null;
+  dismissed: boolean;
+  dismiss: () => void;
+  showBanner: boolean;
+}
+
+export function useGpuStatus(backendOnline: boolean): UseGpuStatus {
+  const [status, setStatus] = useState<GpuStatus | null>(null);
+  const [dismissed, setDismissedState] = useState<boolean>(() => readDismissed());
+
+  useEffect(() => {
+    if (!backendOnline) return;
+
+    let cancelled = false;
+    let polls = 0;
+    let timer: ReturnType<typeof setTimeout> | null = null;
+
+    const tick = async () => {
+      if (cancelled) return;
+      polls += 1;
+      try {
+        const next = await getGpuStatus();
+        if (cancelled) return;
+        setStatus(next);
+        // Once torch has been imported, further polls won't change the
+        // answer — stop polling.
+        if (next.torchImported) return;
+      } catch (err) {
+        if (!isTransientNetworkError(err)) {
+          // Give up on non-transient errors (e.g. 401 if auth wiring drifts)
+          // rather than hammering a known-broken endpoint.
+          return;
+        }
+      }
+      if (polls >= MAX_POLLS) return;
+      timer = setTimeout(tick, POLL_INTERVAL_MS);
+    };
+
+    void tick();
+    return () => {
+      cancelled = true;
+      if (timer) clearTimeout(timer);
+    };
+  }, [backendOnline]);
+
+  const dismiss = useCallback(() => {
+    setDismissedState(true);
+    writeDismissed(true);
+  }, []);
+
+  const showBanner = Boolean(status?.cpuFallbackWarning) && !dismissed;
+
+  return { status, dismissed, dismiss, showBanner };
+}
diff --git a/src/hooks/useImageState.ts b/src/hooks/useImageState.ts
index b3705bd..eddb946 100644
--- a/src/hooks/useImageState.ts
+++ b/src/hooks/useImageState.ts
@@ -9,10 +9,11 @@ import {
   getImageDownloadStatus,
   getImageOutputs,
   getImageRuntime,
+  installPipPackage,
   preloadImageModel,
   unloadImageModel,
 } from "../api";
-import type { DownloadStatus } from "../api";
+import type { DownloadStatus, InstallResult } from "../api";
 
 import { IMAGE_RATIO_PRESETS, IMAGE_QUALITY_PRESETS } from "../constants";
 import {
@@ -30,6 +31,7 @@ import {
   buildDownloadStatusMap,
   pendingDownloadStatus,
   failedDownloadStatus,
+  isTransientNetworkError,
 } from "../utils";
 import type {
   ImageModelFamily,
@@ -302,7 +304,15 @@ export function useImageState(
 
     if (failures.length > 0) {
       const firstError = failures[0].reason;
-      setError(firstError instanceof Error ? firstError.message : "Could not load image runtime data.");
+      // Swallow transient network errors from background refreshes. These
+      // fire on startup before the backend has bound its port, and on
+      // Windows the race window is several seconds — long enough that the
+      // user sees a sticky "Failed to fetch" banner even though the app
+      // is working. Real backend errors (HTTP 4xx/5xx with detail) still
+      // surface as usual.
+      if (!isTransientNetworkError(firstError)) {
+        setError(firstError instanceof Error ? firstError.message : "Could not load image runtime data.");
+      }
     }
   }
 
@@ -493,6 +503,51 @@ export function useImageState(
     }
   }
 
+  // One-click install for the diffusers image runtime. The probe reports
+  // exactly which core packages are missing in ``missingDependencies``; if
+  // that list is empty (e.g. a re-probe hasn't landed yet), fall back to the
+  // full set so the button still does the right thing. Each package is
+  // installed in its own call so a partial failure (e.g. a broken torch
+  // wheel on this platform) doesn't block the rest.
+  async function handleInstallImageRuntime(): Promise<InstallResult> {
+    const fallbackPackages = ["diffusers", "torch", "accelerate", "huggingface_hub", "pillow"];
+    const targets =
+      imageRuntimeStatus.missingDependencies && imageRuntimeStatus.missingDependencies.length > 0
+        ? imageRuntimeStatus.missingDependencies.slice()
+        : fallbackPackages;
+    setImageBusyLabel(`Installing image runtime (${targets.join(", ")})...`);
+    const failures: string[] = [];
+    let lastOutput = "";
+    try {
+      for (const pkg of targets) {
+        try {
+          const result = await installPipPackage(pkg);
+          lastOutput = result.output;
+          if (!result.ok) {
+            failures.push(`${pkg}: ${result.output.slice(0, 200)}`);
+          }
+        } catch (err) {
+          failures.push(`${pkg}: ${err instanceof Error ? err.message : String(err)}`);
+        }
+      }
+      try {
+        const runtime = await getImageRuntime();
+        setImageRuntimeStatus(runtime);
+      } catch {
+        // keep the pre-install status if the probe itself fails
+      }
+      if (failures.length > 0) {
+        const message = `Image runtime install failed:\n${failures.join("\n")}`;
+        setError(message);
+        return { ok: false, output: message, capabilities: {} };
+      }
+      setError(null);
+      return { ok: true, output: lastOutput, capabilities: {} };
+    } finally {
+      setImageBusyLabel(null);
+    }
+  }
+
   async function handleUnloadImageModel(variant?: ImageModelVariant | null) {
     setImageBusyLabel(`Unloading ${(variant?.name ?? loadedImageVariant?.name ?? "image model")} from memory...`);
     try {
@@ -646,6 +701,7 @@ export function useImageState(
     submitImageGeneration,
     handlePreloadImageModel,
     handleUnloadImageModel,
+    handleInstallImageRuntime,
     handleDeleteImageArtifact,
     handleVaryImageSeed,
     handleUseSameImageSettings,
diff --git a/src/hooks/useSettings.ts b/src/hooks/useSettings.ts
index 7daadd9..c5b00bf 100644
--- a/src/hooks/useSettings.ts
+++ b/src/hooks/useSettings.ts
@@ -206,6 +206,29 @@ export function useSettings(
     }
   }
 
+  async function handlePickOutputDirectory(field: "imageOutputsDirectory" | "videoOutputsDirectory") {
+    // Same Tauri command the data-directory picker uses — gives us a native
+    // folder dialog that returns an absolute path. Falls back gracefully if
+    // the user cancels (`null`) or the dialog raises (no Tauri runtime).
+    try {
+      const { invoke: tauriInvoke } = await import("@tauri-apps/api/core");
+      const picked = await tauriInvoke<string | null>("pick_directory");
+      if (picked) {
+        setSettingsDraft((current) => ({ ...current, [field]: picked }));
+      }
+    } catch (err) {
+      setError(err instanceof Error ? err.message : "Could not open the directory picker.");
+    }
+  }
+
+  async function handlePickImageOutputsDirectory() {
+    await handlePickOutputDirectory("imageOutputsDirectory");
+  }
+
+  async function handlePickVideoOutputsDirectory() {
+    await handlePickOutputDirectory("videoOutputsDirectory");
+  }
+
   async function handleSaveSettings() {
     setBusyAction("Saving settings...");
     try {
@@ -214,6 +237,7 @@ export function useSettings(
         modelDirectories: settingsDraft.modelDirectories,
         preferredServerPort: settingsDraft.preferredServerPort,
         allowRemoteConnections: settingsDraft.allowRemoteConnections,
+        requireApiAuth: settingsDraft.requireApiAuth,
         autoStartServer: settingsDraft.autoStartServer,
         launchPreferences: launchSettings,
         remoteProviders: (settingsDraft.remoteProviders ?? []).map((p) => ({
@@ -229,6 +253,14 @@ export function useSettings(
         ...(settingsDraft.dataDirectory !== (workspace.settings?.dataDirectory ?? "")
           ? { dataDirectory: settingsDraft.dataDirectory }
           : {}),
+        // Always send the per-modality overrides — empty string is a valid
+        // "use the default" signal that the backend persists explicitly.
+        ...(settingsDraft.imageOutputsDirectory !== (workspace.settings?.imageOutputsDirectory ?? "")
+          ? { imageOutputsDirectory: settingsDraft.imageOutputsDirectory }
+          : {}),
+        ...(settingsDraft.videoOutputsDirectory !== (workspace.settings?.videoOutputsDirectory ?? "")
+          ? { videoOutputsDirectory: settingsDraft.videoOutputsDirectory }
+          : {}),
       });
       const settings = response.settings;
       setSettingsDraft(settingsDraftFromWorkspace(settings));
@@ -433,6 +465,8 @@ export function useSettings(
     handleUpdateDirectoryPath,
     pickDirectory,
     handlePickDataDirectory,
+    handlePickImageOutputsDirectory,
+    handlePickVideoOutputsDirectory,
     handleSaveSettings,
     handleStopServer,
     handleRestartServer,
diff --git a/src/hooks/useSidebarPrefs.ts b/src/hooks/useSidebarPrefs.ts
new file mode 100644
index 0000000..a5fc277
--- /dev/null
+++ b/src/hooks/useSidebarPrefs.ts
@@ -0,0 +1,84 @@
+import { useCallback, useEffect, useState } from "react";
+import type { SidebarGroupId, SidebarMode } from "../types";
+
+const STORAGE_KEY = "chaosengine.sidebar.prefs.v1";
+
+interface StoredPrefs {
+  mode?: SidebarMode;
+  collapsedGroups?: SidebarGroupId[];
+  lastChildByGroup?: Partial<Record<SidebarGroupId, string>>;
+}
+
+interface SidebarPrefs {
+  mode: SidebarMode;
+  collapsedGroups: Set<SidebarGroupId>;
+  lastChildByGroup: Partial<Record<SidebarGroupId, string>>;
+  toggleGroupCollapsed: (group: SidebarGroupId) => void;
+  setMode: (mode: SidebarMode) => void;
+  rememberLastChild: (group: SidebarGroupId, tabId: string) => void;
+}
+
+function readStored(): StoredPrefs {
+  if (typeof window === "undefined") return {};
+  try {
+    const raw = window.localStorage.getItem(STORAGE_KEY);
+    if (!raw) return {};
+    const parsed = JSON.parse(raw) as StoredPrefs;
+    return parsed ?? {};
+  } catch {
+    return {};
+  }
+}
+
+function writeStored(prefs: StoredPrefs) {
+  if (typeof window === "undefined") return;
+  try {
+    window.localStorage.setItem(STORAGE_KEY, JSON.stringify(prefs));
+  } catch {
+    /* ignore quota / privacy mode errors */
+  }
+}
+
+export function useSidebarPrefs(): SidebarPrefs {
+  const [mode, setModeState] = useState<SidebarMode>(() => readStored().mode ?? "collapsible");
+  const [collapsedGroups, setCollapsedGroups] = useState<Set<SidebarGroupId>>(
+    () => new Set(readStored().collapsedGroups ?? []),
+  );
+  const [lastChildByGroup, setLastChildByGroup] = useState<Partial<Record<SidebarGroupId, string>>>(
+    () => readStored().lastChildByGroup ?? {},
+  );
+
+  useEffect(() => {
+    writeStored({
+      mode,
+      collapsedGroups: Array.from(collapsedGroups),
+      lastChildByGroup,
+    });
+  }, [mode, collapsedGroups, lastChildByGroup]);
+
+  const toggleGroupCollapsed = useCallback((group: SidebarGroupId) => {
+    setCollapsedGroups((prev) => {
+      const next = new Set(prev);
+      if (next.has(group)) next.delete(group);
+      else next.add(group);
+      return next;
+    });
+  }, []);
+
+  const setMode = useCallback((next: SidebarMode) => {
+    setModeState(next);
+  }, []);
+
+  const rememberLastChild = useCallback((group: SidebarGroupId, tabId: string) => {
+    setLastChildByGroup((prev) => ({ ...prev, [group]: tabId }));
+  }, []);
+
+  return {
+    mode,
+    collapsedGroups,
+    lastChildByGroup,
+    toggleGroupCollapsed,
+    setMode,
+    rememberLastChild,
+  };
+}
diff --git a/src/hooks/useVideoState.ts b/src/hooks/useVideoState.ts
new file mode 100644
index 0000000..b813504
--- /dev/null
+++ b/src/hooks/useVideoState.ts
@@ -0,0 +1,736 @@
+import { useDeferredValue, useEffect, useRef, useState } from "react";
+import {
+  cancelVideoDownload,
+  deleteVideoDownload,
+  deleteVideoOutput,
+  downloadVideoModel,
+  generateVideo,
+  getVideoCatalog,
+  getVideoDownloadStatus,
+  getVideoOutputs,
+  getVideoRuntime,
+  installPipPackage,
+  preloadVideoModel,
+  unloadVideoModel,
+} from "../api";
+import type { DownloadStatus, InstallResult } from "../api";
+import {
+  buildDownloadStatusMap,
+  defaultVideoVariantForFamily,
+  failedDownloadStatus,
+  findVideoVariantById,
+  findVideoVariantByRepo,
+  flattenVideoVariants,
+  isTransientNetworkError,
+  pendingDownloadStatus,
+  videoDiscoverFamilyMatchesQuery,
+  videoDiscoverVariantMatchesQuery,
+  videoRuntimeErrorStatus,
+  videoVariantMatchesDiscoverFilters,
+} from "../utils";
+import type {
+  TabId,
+  VideoGenerationPayload,
+  VideoModelFamily,
+  VideoModelVariant,
+  VideoOutputArtifact,
+  VideoRuntimeStatus,
+} from "../types";
+import type { VideoDiscoverTaskFilter } from "../types/video";
+
+const MAX_VIDEO_SEED = 2147483647;
+
+// Default starting point for the Studio sliders. We deliberately choose a
+// short clip (~1.4s @ 24fps) and a moderate step count so the *first* generate
+// out of the box completes on Apple Silicon unified memory rather than
+// detonating Metal with a 70+ GB attention tensor (issue: Wan 2.1 1.3B at
+// 832x480 × 96 frames × 50 steps blew up MPS during initial testing).
+// Users can dial up via the Studio controls once they know their hardware.
+const DEFAULT_VIDEO_NUM_FRAMES = 33;
+const DEFAULT_VIDEO_FPS = 24;
+const DEFAULT_VIDEO_STEPS = 30;
+const DEFAULT_VIDEO_GUIDANCE = 5.0;
+
+// Wan-family pipelines require ``(num_frames - 1) % 4 == 0``. We round to
+// the nearest valid value so the user can type any frame count and we still
+// hand the backend something it can run.
+function clampNumFrames(value: number): number {
+  if (!Number.isFinite(value)) return DEFAULT_VIDEO_NUM_FRAMES;
+  const clamped = Math.max(1, Math.min(257, Math.round(value)));
+  // Snap to the nearest n where (n - 1) % 4 == 0 (i.e. 1, 5, 9, 13, ...)
+  const remainder = (clamped - 1) % 4;
+  if (remainder === 0) return clamped;
+  const down = clamped - remainder;
+  const up = down + 4;
+  return up - clamped < clamped - down ? up : down;
+}
+
+/** Parse "832x480" (or similar) into [width, height], falling back to defaults. */
+function parseRecommendedResolution(
+  value: string | null | undefined,
+  defaultWidth: number,
+  defaultHeight: number,
+): [number, number] {
+  if (!value) return [defaultWidth, defaultHeight];
+  const match = String(value).trim().match(/^(\d+)\s*[xX\u00d7]\s*(\d+)/);
+  if (!match) return [defaultWidth, defaultHeight];
+  const width = Number(match[1]);
+  const height = Number(match[2]);
+  if (!Number.isFinite(width) || !Number.isFinite(height)) return [defaultWidth, defaultHeight];
+  if (width < 256 || width > 2048 || height < 256 || height > 2048) {
+    return [defaultWidth, defaultHeight];
+  }
+  return [width, height];
+}
+
+export function useVideoState(
+  backendOnline: boolean,
+  setError: (msg: string | null) => void,
+  setActiveTab: (tab: TabId) => void,
+) {
+  const [videoCatalog, setVideoCatalog] = useState<VideoModelFamily[]>([]);
+  const [latestVideoDiscoverResults, setLatestVideoDiscoverResults] = useState<VideoModelVariant[]>([]);
+  const [videoDiscoverTaskFilter, setVideoDiscoverTaskFilter] = useState<VideoDiscoverTaskFilter>("all");
+  const [videoDiscoverSearchInput, setVideoDiscoverSearchInput] = useState("");
+  const deferredVideoDiscoverSearch = useDeferredValue(videoDiscoverSearchInput);
+  const [selectedVideoModelId, setSelectedVideoModelId] = useState("");
+  const [videoPrompt, setVideoPrompt] = useState("");
+  const [videoNegativePrompt, setVideoNegativePrompt] = useState("");
+  const [videoSeedInput, setVideoSeedInput] = useState("");
+  const [videoUseRandomSeed, setVideoUseRandomSeed] = useState(true);
+  // Generation knobs the user can tweak in Studio. Defaults are populated
+  // from the selected variant's catalog hint when the model changes (see
+  // the reset effect below) but stay user-editable thereafter.
+  const [videoWidth, setVideoWidth] = useState<number>(832);
+  const [videoHeight, setVideoHeight] = useState<number>(480);
+  const [videoNumFrames, setVideoNumFrames] = useState<number>(DEFAULT_VIDEO_NUM_FRAMES);
+  const [videoFps, setVideoFps] = useState<number>(DEFAULT_VIDEO_FPS);
+  const [videoSteps, setVideoSteps] = useState<number>(DEFAULT_VIDEO_STEPS);
+  const [videoGuidance, setVideoGuidance] = useState<number>(DEFAULT_VIDEO_GUIDANCE);
+  const [videoRuntimeStatus, setVideoRuntimeStatus] = useState<VideoRuntimeStatus>({
+    activeEngine: "placeholder",
+    realGenerationAvailable: false,
+    message: "Video runtime not initialised yet.",
+    missingDependencies: [],
+  });
+  const [videoBusyLabel, setVideoBusyLabel] = useState<string | null>(null);
+  const videoBusy = videoBusyLabel !== null;
+  const [activeVideoDownloads, setActiveVideoDownloads] = useState<Record<string, DownloadStatus>>({});
+  const [videoOutputs, setVideoOutputs] = useState<VideoOutputArtifact[]>([]);
+  // Modal state — mirrors useImageState's image-generation modal lifecycle.
+  // Opening the modal as soon as ``handleVideoGenerate`` runs lets the user
+  // watch the real-time progress bar instead of staring at the studio form
+  // wondering whether anything is happening for 60-300s.
+  const [showVideoGenerationModal, setShowVideoGenerationModal] = useState(false);
+  const [videoGenerationStartedAt, setVideoGenerationStartedAt] = useState<number | null>(null);
+  const [videoGenerationError, setVideoGenerationError] = useState<string | null>(null);
+  const [videoGenerationArtifact, setVideoGenerationArtifact] = useState<VideoOutputArtifact | null>(null);
+  const [videoGenerationRunInfo, setVideoGenerationRunInfo] = useState<{
+    modelName: string;
+    prompt: string;
+    numFrames: number;
+    fps: number;
+    steps: number;
+    needsPipelineLoad: boolean;
+  } | null>(null);
+
+  // ── Computed values ─────────────────────────────────────────
+  const videoVariants = flattenVideoVariants(videoCatalog);
+  const selectedVideoVariant =
+    findVideoVariantById(videoCatalog, selectedVideoModelId)
+    ?? latestVideoDiscoverResults.find((variant) => variant.id === selectedVideoModelId)
+    ?? videoVariants[0]
+    ?? null;
+  const selectedVideoFamily =
+    videoCatalog.find((family) =>
+      family.variants.some((variant) => variant.id === selectedVideoVariant?.id),
+    ) ?? null;
+  const loadedVideoVariant =
+    findVideoVariantByRepo(videoCatalog, videoRuntimeStatus.loadedModelRepo)
+    ?? (videoRuntimeStatus.loadedModelRepo
+      ? latestVideoDiscoverResults.find((variant) => variant.repo === videoRuntimeStatus.loadedModelRepo)
+      : null)
+    ?? null;
+  const selectedVideoLoaded =
+    !!selectedVideoVariant
+    && !!videoRuntimeStatus.loadedModelRepo
+    && videoRuntimeStatus.loadedModelRepo === selectedVideoVariant.repo;
+  const selectedVideoWillLoadOnGenerate =
+    !!selectedVideoVariant
+    && selectedVideoVariant.availableLocally
+    && videoRuntimeStatus.realGenerationAvailable
+    && !selectedVideoLoaded;
+  const videoRuntimeLoadedDifferentModel =
+    !!selectedVideoVariant
+    && !!loadedVideoVariant
+    && loadedVideoVariant.repo !== selectedVideoVariant.repo;
+
+  const installedCatalogVariants = videoVariants.filter(
+    (variant) => variant.availableLocally || variant.hasLocalData,
+  );
+  const installedLatestVariants = latestVideoDiscoverResults.filter(
+    (variant) => variant.availableLocally || variant.hasLocalData,
+  );
+  const seenRepos = new Set(installedCatalogVariants.map((variant) => variant.repo));
+  const installedVideoVariants = [
+    ...installedCatalogVariants,
+    ...installedLatestVariants.filter((variant) => !seenRepos.has(variant.repo)),
+  ];
+
+  // Augmented catalog for dropdowns that need to see tracked-but-uncurated entries
+  const catalogRepoSet = new Set(videoVariants.map((variant) => variant.repo));
+  const latestNotInCatalog = latestVideoDiscoverResults.filter(
+    (variant) => !catalogRepoSet.has(variant.repo),
+  );
+  const videoCatalogWithLatest: VideoModelFamily[] = latestNotInCatalog.length > 0
+    ? [
+        ...videoCatalog,
+        {
+          id: "latest-tracked",
+          name: "Latest / Tracked",
+          provider: "Community",
+          headline: "Tracked video models not in the curated catalog",
+          summary: "Additional video models tracked by ChaosEngineAI",
+          updatedLabel: "Tracked",
+          badges: [],
+          defaultVariantId: latestNotInCatalog[0]?.id ?? "",
+          variants: latestNotInCatalog,
+        },
+      ]
+    : videoCatalog;
+
+  const videoDiscoverSearchQuery = deferredVideoDiscoverSearch.trim().toLowerCase();
+
+  const filteredVideoDiscoverFamilies = videoCatalog
+    .map((family) => ({
+      ...family,
+      variants: family.variants.filter((variant) => {
+        if (!videoVariantMatchesDiscoverFilters(variant, videoDiscoverTaskFilter)) return false;
+        return (
+          videoDiscoverFamilyMatchesQuery(family, videoDiscoverSearchQuery)
+          || videoDiscoverVariantMatchesQuery(variant, videoDiscoverSearchQuery)
+        );
+      }),
+    }))
+    .filter((family) => family.variants.length > 0);
+
+  const filteredLatestVideoDiscoverResults = latestVideoDiscoverResults.filter(
+    (variant) =>
+      videoVariantMatchesDiscoverFilters(variant, videoDiscoverTaskFilter)
+      && videoDiscoverVariantMatchesQuery(variant, videoDiscoverSearchQuery),
+  );
+
+  const combinedVideoDiscoverResults: VideoModelVariant[] = [
+    ...filteredVideoDiscoverFamilies.flatMap((family) => {
+      const variant = defaultVideoVariantForFamily(family);
+      return variant ? [{ ...variant, familyName: variant.familyName ?? family.name }] : [];
+    }),
+    ...filteredLatestVideoDiscoverResults,
+  ];
+
+  const videoDiscoverHasActiveFilters =
+    videoDiscoverTaskFilter !== "all" || videoDiscoverSearchQuery.length > 0;
+
+  // ── Selection sync ──────────────────────────────────────────
+  useEffect(() => {
+    if (!videoCatalog.length) {
+      setSelectedVideoModelId("");
+      return;
+    }
+    const variants = flattenVideoVariants(videoCatalog);
+    if (variants.some((variant) => variant.id === selectedVideoModelId)) return;
+    if (latestVideoDiscoverResults.some((variant) => variant.id === selectedVideoModelId)) return;
+    const preferred =
+      variants.find((variant) => variant.availableLocally)
+      ?? defaultVideoVariantForFamily(videoCatalog[0]);
+    setSelectedVideoModelId(preferred?.id ?? "");
+  }, [videoCatalog, selectedVideoModelId, latestVideoDiscoverResults]);
+
+  // ── Reset generation knobs when the model changes ───────────
+  // We pull the *resolution* hint from the variant catalog (e.g. "832x480")
+  // but keep frames / steps at our short, MPS-safe defaults so the first
+  // generate succeeds on consumer hardware. The user can dial up via the
+  // Studio controls.
+  useEffect(() => {
+    if (!selectedVideoVariant) return;
+    const [w, h] = parseRecommendedResolution(
+      selectedVideoVariant.recommendedResolution,
+      832,
+      480,
+    );
+    setVideoWidth(w);
+    setVideoHeight(h);
+    setVideoNumFrames(DEFAULT_VIDEO_NUM_FRAMES);
+    setVideoFps(DEFAULT_VIDEO_FPS);
+    setVideoSteps(DEFAULT_VIDEO_STEPS);
+    setVideoGuidance(DEFAULT_VIDEO_GUIDANCE);
+    // Intentionally only depend on the variant ID so we don't clobber the
+    // user's edits when unrelated catalog fields refresh.
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [selectedVideoVariant?.id]);
+
+  // ── Retry on model change when the runtime is stuck ─────────
+  // If the runtime status is ``unavailable`` (usually because a previous
+  // fetch rejected with a transport error after a sidecar crash) and the
+  // user switches to a different model, re-probe the backend. This gives
+  // the user a natural way to say "try again with a smaller model" — they
+  // pick LTX in the dropdown and we immediately refetch the runtime state
+  // so the Generate button wakes up if the backend is actually fine now.
+  // Guarded on ``backendOnline`` so we don't retry while the sidecar is
+  // still down (the recovery effect above handles that case).
+  useEffect(() => {
+    if (!selectedVideoVariant) return;
+    if (!backendOnline) return;
+    if (videoRuntimeStatus.activeEngine !== "unavailable") return;
+    void refreshVideoData();
+    // Key on the variant id so we only fire when the user actually changes
+    // selection, not on every re-render while the runtime is still unavailable.
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [selectedVideoVariant?.id]);
+
+  // ── Poll while the runtime is warming up ────────────────────
+  // When the sidecar is still importing PyTorch the probe returns either
+  // ``activeEngine: "initializing"`` (fast-path from the background warmup
+  // worker) or ``"unavailable"`` (when the 30s fetchJson cap fired before
+  // warmup finished). In both cases the correct UX is "retry every few
+  // seconds and flip to Ready when torch lands in the module cache" — the
+  // timeout message promises exactly that, so we need to actually do it.
+  // Stops as soon as the engine reports any other state.
+  useEffect(() => {
+    if (!backendOnline) return;
+    const engine = videoRuntimeStatus.activeEngine;
+    if (engine !== "initializing" && engine !== "unavailable") return;
+    const interval = window.setInterval(() => {
+      void refreshVideoData();
+    }, 5000);
+    return () => window.clearInterval(interval);
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [backendOnline, videoRuntimeStatus.activeEngine]);
+
+  // ── Download polling ────────────────────────────────────────
+  const hasActiveVideoDownloads = Object.values(activeVideoDownloads).some(
+    (download) => download.state === "downloading",
+  );
+  useEffect(() => {
+    if (!hasActiveVideoDownloads || !backendOnline) return;
+    const interval = window.setInterval(() => {
+      void (async () => {
+        try {
+          const statuses = await getVideoDownloadStatus();
+          setActiveVideoDownloads(buildDownloadStatusMap(statuses));
+          if (statuses.some((status) => status.state === "completed")) {
+            void refreshVideoData();
+          }
+        } catch {
+          // keep the last known state until the next poll
+        }
+      })();
+    }, 2000);
+    return () => window.clearInterval(interval);
+  }, [hasActiveVideoDownloads, backendOnline]);
+
+  // ── Backend recovery ────────────────────────────────────────
+  // When the Python sidecar dies (e.g. Wan 2.1 OOMs MPS on a 64 GB Mac →
+  // Metal asserts → sidecar process is killed), every in-flight fetch
+  // rejects with WebKit's ``TypeError: Load failed`` and the runtime status
+  // ends up frozen as ``activeEngine: "unavailable"``. Tauri's managed
+  // sidecar restarts shortly after, which flips ``backendOnline`` back to
+  // true — but we never re-probe the video runtime, so the Studio stays
+  // stuck with "ENGINE: UNAVAILABLE" / "Fallback active" badges and the
+  // Generate button disabled even after the user picks a smaller model
+  // (e.g. LTX-Video) that would have run fine.
+  //
+  // Fix: whenever ``backendOnline`` transitions from false to true, fire
+  // a one-shot ``refreshVideoData()`` so the runtime status reflects the
+  // fresh sidecar's actual capabilities. Using a ref to track the previous
+  // value avoids a refresh on first mount (where ``backendOnline`` starts
+  // at whatever the backend poll reports — already handled by the initial
+  // data-load effect in ``App.tsx``).
+  const previousBackendOnlineRef = useRef(backendOnline);
+  useEffect(() => {
+    const wasOffline = !previousBackendOnlineRef.current;
+    previousBackendOnlineRef.current = backendOnline;
+    if (backendOnline && wasOffline) {
+      void refreshVideoData();
+    }
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [backendOnline]);
+
+  // ── Data fetching ───────────────────────────────────────────
+  async function refreshVideoData() {
+    const [catalog, statuses, runtime, outputs] = await Promise.allSettled([
+      getVideoCatalog(),
+      getVideoDownloadStatus(),
+      getVideoRuntime(),
+      getVideoOutputs(),
+    ]);
+    const failures = [catalog, statuses, runtime, outputs].filter(
+      (result): result is PromiseRejectedResult => result.status === "rejected",
+    );
+
+    if (catalog.status === "fulfilled") {
+      setVideoCatalog(catalog.value.families);
+      setLatestVideoDiscoverResults(catalog.value.latest ?? []);
+    }
+    if (statuses.status === "fulfilled") {
+      setActiveVideoDownloads(buildDownloadStatusMap(statuses.value));
+    }
+    if (runtime.status === "fulfilled") {
+      setVideoRuntimeStatus(runtime.value);
+    } else if (failures.length > 0) {
+      setVideoRuntimeStatus(videoRuntimeErrorStatus(failures[0].reason));
+    }
+    if (outputs.status === "fulfilled") {
+      setVideoOutputs(outputs.value);
+    }
+
+    if (failures.length > 0) {
+      const firstError = failures[0].reason;
+      // See useImageState.refreshImageData — swallow transient network
+      // errors from background refreshes so a cold-start race doesn't
+      // leave a sticky "Failed to fetch" banner on the Chat tab.
+      if (!isTransientNetworkError(firstError)) {
+        setError(firstError instanceof Error ? firstError.message : "Could not load video runtime data.");
+      }
+    }
+  }
+
+  // ── Download handlers ───────────────────────────────────────
+  async function handleVideoDownload(repo: string) {
+    try {
+      setActiveVideoDownloads((prev) => ({ ...prev, [repo]: pendingDownloadStatus(repo, prev[repo]) }));
+      const download = await downloadVideoModel(repo);
+      setActiveVideoDownloads((prev) => ({ ...prev, [repo]: download }));
+      void refreshVideoData();
+    } catch (err) {
+      setError(err instanceof Error ? err.message : "Video download failed");
+      setActiveVideoDownloads((prev) => ({ ...prev, [repo]: failedDownloadStatus(repo, String(err)) }));
+    }
+  }
+
+  async function handleCancelVideoDownload(repo: string) {
+    try {
+      const download = await cancelVideoDownload(repo);
+      setActiveVideoDownloads((prev) => ({ ...prev, [repo]: download }));
+    } catch (err) {
+      setError(err instanceof Error ? err.message : "Could not pause video download");
+    }
+  }
+
+  async function handleDeleteVideoDownload(repo: string) {
+    try {
+      await deleteVideoDownload(repo);
+      const statuses = await getVideoDownloadStatus();
+      setActiveVideoDownloads(buildDownloadStatusMap(statuses));
+      await refreshVideoData();
+    } catch (err) {
+      setError(err instanceof Error ? err.message : "Could not delete video download");
+    }
+  }
+
+  // ── Runtime handlers ────────────────────────────────────────
+  async function handlePreloadVideoModel(variant?: VideoModelVariant | null) {
+    if (!variant) {
+      setError("Choose an installed video model first.");
+      return;
+    }
+    setVideoBusyLabel(`Loading ${variant.name} into memory...`);
+    try {
+      const runtime = await preloadVideoModel(variant.id);
+      setVideoRuntimeStatus(runtime);
+      setError(null);
+    } catch (err) {
+      setError(err instanceof Error ? err.message : "Could not preload the video model.");
+    } finally {
+      setVideoBusyLabel(null);
+    }
+  }
+
+  async function handleUnloadVideoModel(variant?: VideoModelVariant | null) {
+    setVideoBusyLabel(
+      `Unloading ${variant?.name ?? loadedVideoVariant?.name ?? "video model"} from memory...`,
+    );
+    try {
+      const runtime = await unloadVideoModel(variant?.id);
+      setVideoRuntimeStatus(runtime);
+      setError(null);
+    } catch (err) {
+      setError(err instanceof Error ? err.message : "Could not unload the video model.");
+    } finally {
+      setVideoBusyLabel(null);
+    }
+  }
+
+  // ── Generation handlers ─────────────────────────────────────
+  async function handleVideoGenerate() {
+    if (!selectedVideoVariant) {
+      setError("Pick a video model before generating.");
+      return;
+    }
+    if (!selectedVideoVariant.availableLocally) {
+      setError(`${selectedVideoVariant.name} is not installed locally yet. Download it first.`);
+      return;
+    }
+    if (!videoRuntimeStatus.realGenerationAvailable) {
+      setError(videoRuntimeStatus.message || "Video runtime is not ready.");
+      return;
+    }
+    const trimmedPrompt = videoPrompt.trim();
+    if (!trimmedPrompt) {
+      setError("Write a prompt before generating.");
+      return;
+    }
+    const parsedSeed = videoUseRandomSeed ? null : Number(videoSeedInput);
+    if (
+      parsedSeed !== null
+      && (!Number.isFinite(parsedSeed) || parsedSeed < 0 || parsedSeed > MAX_VIDEO_SEED)
+    ) {
+      setError("Seed must be a non-negative integer.");
+      return;
+    }
+
+    // The Studio controls drive every per-run knob. We snap ``numFrames`` to
+    // a Wan-compatible value here as a defensive measure — the input field
+    // already does this on change but a stale value (e.g. 50 from before the
+    // snap landed) would otherwise reach the backend and trip its rounding
+    // warning. We also guard against ``NaN`` on every numeric field because
+    // the Studio inputs now use ``NaN`` to represent "user is mid-edit / field
+    // is empty" — any NaN that reaches here must be replaced with a default
+    // so the backend doesn't see ``null`` in the JSON payload.
+    const safeNumFrames = clampNumFrames(videoNumFrames);
+    const safeSteps = Number.isFinite(videoSteps)
+      ? Math.max(1, Math.min(100, Math.round(videoSteps)))
+      : DEFAULT_VIDEO_STEPS;
+    const safeFps = Number.isFinite(videoFps)
+      ? Math.max(1, Math.min(60, Math.round(videoFps)))
+      : DEFAULT_VIDEO_FPS;
+    const safeGuidance = Number.isFinite(videoGuidance)
+      ? Math.max(1, Math.min(20, videoGuidance))
+      : DEFAULT_VIDEO_GUIDANCE;
+    const safeWidth = Number.isFinite(videoWidth)
+      ? Math.max(256, Math.min(2048, Math.round(videoWidth)))
+      : 832;
+    const safeHeight = Number.isFinite(videoHeight)
+      ? Math.max(256, Math.min(2048, Math.round(videoHeight)))
+      : 480;
+
+    const payload: VideoGenerationPayload = {
+      modelId: selectedVideoVariant.id,
+      prompt: trimmedPrompt,
+      negativePrompt: videoNegativePrompt.trim() || undefined,
+      width: safeWidth,
+      height: safeHeight,
+      numFrames: safeNumFrames,
+      fps: safeFps,
+      steps: safeSteps,
+      guidance: safeGuidance,
+      seed: parsedSeed,
+    };
+
+    // The pipeline is "loaded" when the runtime reports the same repo as
+    // currently selected. Anything else means we're paying the load cost on
+    // this generation, which the modal needs to know to show the right phases.
+    const willLoadPipeline =
+      videoRuntimeStatus.realGenerationAvailable
+      && videoRuntimeStatus.loadedModelRepo !== selectedVideoVariant.repo;
+
+    setShowVideoGenerationModal(true);
+    setVideoGenerationStartedAt(Date.now());
+    setVideoGenerationError(null);
+    setVideoGenerationArtifact(null);
+    setVideoGenerationRunInfo({
+      modelName: selectedVideoVariant.name,
+      prompt: trimmedPrompt,
+      numFrames: safeNumFrames,
+      fps: safeFps,
+      steps: safeSteps,
+      needsPipelineLoad: willLoadPipeline,
+    });
+    setVideoBusyLabel(
+      willLoadPipeline
+        ? `Loading ${selectedVideoVariant.name} into memory...`
+        : `Generating ${safeNumFrames}-frame clip with ${selectedVideoVariant.name}...`,
+    );
+    setError(null);
+    try {
+      const response = await generateVideo(payload);
+      setVideoOutputs(response.outputs);
+      if (response.runtime) setVideoRuntimeStatus(response.runtime);
+      setVideoGenerationArtifact(response.artifact);
+    } catch (err) {
+      const message = err instanceof Error ? err.message : "Video generation failed.";
+      const detail = `${message}. Check the Logs tab (filter: video) for backend details.`;
+      setError(detail);
+      setVideoGenerationError(detail);
+      // Resync catalog + runtime in the background. A sidecar crash (e.g. the
+      // Wan 2.1 MPS assertion) can leave ``videoRuntimeStatus`` stale, which
+      // has been observed to leave the Studio's Generate button in a mystery
+      // disabled state after the user dismisses the failure modal. Refreshing
+      // restores a known-good view of what the backend actually reports now.
+      void refreshVideoData();
+    } finally {
+      setVideoBusyLabel(null);
+      setVideoGenerationStartedAt(null);
+    }
+  }
+
+  // ── Dependency install ──────────────────────────────────────
+  //
+  // Diffusers can render frames without imageio, but exporting them as an mp4
+  // needs both the `imageio` Python package and its `imageio-ffmpeg` plugin.
+  // Individual video pipelines also need pipeline-specific tokenizer packages
+  // (tiktoken for LTX-Video, sentencepiece for Wan/HunyuanVideo/CogVideoX).
+  // The Video Studio calls this when it detects any are missing so the user
+  // can unstick things without dropping to a terminal.
+  //
+  // ``packages`` defaults to the mp4 pair for backwards compatibility with
+  // the original "Install mp4 encoder" button; the Studio passes an explicit
+  // list (sourced from the runtime's missingDependencies) for the generic
+  // "Install missing video dependencies" path.
+  async function handleInstallVideoOutputDeps(
+    packages?: readonly string[],
+  ): Promise<InstallResult> {
+    const targetPackages =
+      packages && packages.length > 0 ? Array.from(packages) : ["imageio", "imageio-ffmpeg"];
+    const isMp4Only =
+      targetPackages.length === 2
+      && targetPackages.includes("imageio")
+      && targetPackages.includes("imageio-ffmpeg");
+    const friendlyLabel = isMp4Only
+      ? "Installing mp4 encoder (imageio + imageio-ffmpeg)..."
+      : `Installing video runtime packages (${targetPackages.join(", ")})...`;
+    setVideoBusyLabel(friendlyLabel);
+    const failures: string[] = [];
+    let lastOutput = "";
+    try {
+      for (const pkg of targetPackages) {
+        try {
+          const result = await installPipPackage(pkg);
+          lastOutput = result.output;
+          if (!result.ok) {
+            failures.push(`${pkg}: ${result.output.slice(0, 200)}`);
+          }
+        } catch (err) {
+          failures.push(`${pkg}: ${err instanceof Error ? err.message : String(err)}`);
+        }
+      }
+      // Re-probe regardless — even a partial install can flip one flag.
+      try {
+        const runtime = await getVideoRuntime();
+        setVideoRuntimeStatus(runtime);
+      } catch {
+        // keep the pre-install status if the probe itself fails
+      }
+      if (failures.length > 0) {
+        const failureSummary = isMp4Only
+          ? "mp4 encoder install failed"
+          : "Video runtime package install failed";
+        const message = `${failureSummary}:\n${failures.join("\n")}`;
+        setError(message);
+        return { ok: false, output: message, capabilities: {} };
+      }
+      setError(null);
+      return { ok: true, output: lastOutput, capabilities: {} };
+    } finally {
+      setVideoBusyLabel(null);
+    }
+  }
+
+  async function handleDeleteVideoOutput(artifactId: string) {
+    try {
+      const { outputs } = await deleteVideoOutput(artifactId);
+      setVideoOutputs(outputs);
+      // If the user just deleted the clip currently rendered in the modal,
+      // clear it. If the modal has nothing left to show and isn't busy,
+      // close it so we don't leave an empty shell on screen.
+      if (videoGenerationArtifact?.artifactId === artifactId) {
+        setVideoGenerationArtifact(null);
+        if (showVideoGenerationModal && !videoBusy) {
+          setShowVideoGenerationModal(false);
+        }
+      }
+    } catch (err) {
+      setError(err instanceof Error ? err.message : "Could not delete video output.");
+    }
+  }
+
+  // ── Navigation helpers ──────────────────────────────────────
+  function openVideoStudio(modelId?: string) {
+    if (modelId) setSelectedVideoModelId(modelId);
+    setActiveTab("video-studio");
+    setError(null);
+  }
+
+  return {
+    videoCatalog,
+    videoCatalogWithLatest,
+    setVideoCatalog,
+    latestVideoDiscoverResults,
+    setLatestVideoDiscoverResults,
+    videoDiscoverTaskFilter,
+    setVideoDiscoverTaskFilter,
+    videoDiscoverSearchInput,
+    setVideoDiscoverSearchInput,
+    selectedVideoModelId,
+    setSelectedVideoModelId,
+    videoPrompt,
+    setVideoPrompt,
+    videoNegativePrompt,
+    setVideoNegativePrompt,
+    videoSeedInput,
+    setVideoSeedInput,
+    videoUseRandomSeed,
+    setVideoUseRandomSeed,
+    videoWidth,
+    setVideoWidth,
+    videoHeight,
+    setVideoHeight,
+    videoNumFrames,
+    setVideoNumFrames,
+    videoFps,
+    setVideoFps,
+    videoSteps,
+    setVideoSteps,
+    videoGuidance,
+    setVideoGuidance,
+    videoRuntimeStatus,
+    setVideoRuntimeStatus,
+    videoBusyLabel,
+    videoBusy,
+    showVideoGenerationModal,
+    setShowVideoGenerationModal,
+    videoGenerationStartedAt,
+    videoGenerationError,
+    videoGenerationArtifact,
+    videoGenerationRunInfo,
+    activeVideoDownloads,
+    setActiveVideoDownloads,
+    videoOutputs,
+    setVideoOutputs,
+    // Computed
+    videoVariants,
+    selectedVideoVariant,
+    selectedVideoFamily,
+    loadedVideoVariant,
+    selectedVideoLoaded,
+    selectedVideoWillLoadOnGenerate,
+    videoRuntimeLoadedDifferentModel,
+    installedVideoVariants,
+    videoDiscoverSearchQuery,
+    filteredVideoDiscoverFamilies,
+    filteredLatestVideoDiscoverResults,
+    combinedVideoDiscoverResults,
+    videoDiscoverHasActiveFilters,
+    // Handlers
+    refreshVideoData,
+    handleVideoDownload,
+    handleCancelVideoDownload,
+    handleDeleteVideoDownload,
+    handlePreloadVideoModel,
+    handleUnloadVideoModel,
+    handleVideoGenerate,
+    handleDeleteVideoOutput,
+    handleInstallVideoOutputDeps,
+    openVideoStudio,
+  };
+}
diff --git a/src/mockData.ts b/src/mockData.ts
index cb36bce..d77d017 100644
--- a/src/mockData.ts
+++ b/src/mockData.ts
@@ -666,6 +666,7 @@ export const mockWorkspace: WorkspaceData = {
     ],
     preferredServerPort: 8876,
     allowRemoteConnections: false,
+    requireApiAuth: true,
     autoStartServer: false,
     launchPreferences: {
       contextTokens: 8192,
diff --git a/src/styles.css b/src/styles.css
index f4ff416..c8a0906 100644
--- a/src/styles.css
+++ b/src/styles.css
@@ -167,26 +167,162 @@ strong {
 .nav-button {
   background: transparent;
   color: var(--text);
-  border: 1px solid var(--border);
+  border: 1px solid transparent;
   border-radius: var(--radius-md);
   padding: 10px 12px;
   text-align: left;
+  display: flex;
+  flex-direction: row;
+  align-items: center;
+  gap: 10px;
+}
+
+.nav-button .nav-label {
   display: flex;
   flex-direction: column;
   gap: 2px;
+  min-width: 0;
 }
 
-.nav-button strong {
+.nav-button .nav-label strong {
   font-size: 0.95rem;
 }
 
-.nav-button span {
+.nav-button .nav-label > span {
   display: none;
 }
 
 .nav-button.active {
   background: var(--panel);
-  border-color: var(--accent);
+}
+
+.nav-icon {
+  flex-shrink: 0;
+  color: var(--muted);
+}
+
+.nav-button.active .nav-icon,
+.nav-group-header[aria-expanded="true"] .nav-icon {
+  color: var(--accent);
+}
+
+.nav-group {
+  display: flex;
+  flex-direction: column;
+  gap: 4px;
+}
+
+.nav-group-header {
+  background: transparent;
+  color: var(--text);
+  border: 1px solid transparent;
+  border-radius: var(--radius-md);
+  padding: 8px 10px;
+  text-align: left;
+  display: flex;
+  flex-direction: row;
+  align-items: center;
+  gap: 10px;
+  cursor: pointer;
+  font-size: 0.85rem;
+  text-transform: uppercase;
+  letter-spacing: 0.06em;
+  color: var(--muted);
+}
+
+.nav-group-header:hover {
+  color: var(--text);
+}
+
+.nav-group-label {
+  flex: 1;
+  min-width: 0;
+}
+
+.nav-group-chevron {
+  flex-shrink: 0;
+  opacity: 0.7;
+}
+
+.nav-group-children {
+  display: flex;
+  flex-direction: column;
+  gap: 2px;
+  margin-left: 12px;
+  padding-left: 12px;
+}
+
+.nav-child-button {
+  background: transparent;
+  color: var(--text);
+  border: 1px solid transparent;
+  border-radius: var(--radius-sm, 6px);
+  padding: 6px 10px;
+  text-align: left;
+  display: flex;
+  flex-direction: row;
+  align-items: center;
+  gap: 8px;
+  font-size: 0.9rem;
+  cursor: pointer;
+}
+
+.nav-child-button:hover {
+  background: rgba(255, 255, 255, 0.03);
+  border-color: var(--border);
+}
+
+.nav-child-button.active {
+  background: var(--panel);
+  color: var(--text);
+}
+
+.nav-child-dot {
+  width: 4px;
+  height: 4px;
+  border-radius: 50%;
+  background: var(--muted);
+  flex-shrink: 0;
+  opacity: 0.5;
+}
+
+.nav-child-button.active .nav-child-dot {
+  background: var(--accent);
+  opacity: 1;
+}
+
+.subtab-bar {
+  display: flex;
+  flex-direction: row;
+  gap: 4px;
+  padding: 4px;
+  background: var(--panel);
+  border: 1px solid var(--border);
+  border-radius: var(--radius-md);
+  align-self: flex-start;
+  flex-wrap: wrap;
+}
+
+.subtab {
+  background: transparent;
+  color: var(--muted);
+  border: 1px solid transparent;
+  border-radius: var(--radius-sm, 6px);
+  padding: 6px 14px;
+  font-size: 0.9rem;
+  cursor: pointer;
+  transition: color 0.12s, background 0.12s;
+}
+
+.subtab:hover {
+  color: var(--text);
+  background: rgba(255, 255, 255, 0.04);
+}
+
+.subtab.active {
+  color: var(--text);
+  background: var(--surface);
+  border-color: var(--border);
 }
 
 .sidebar-footer {
@@ -568,11 +704,14 @@ strong {
 }
 
 .secondary-button:hover,
-.segment:hover,
-.nav-button:hover {
+.segment:hover {
   border-color: var(--accent);
 }
 
+.nav-button:not(.active):hover {
+  background: rgba(255, 255, 255, 0.04);
+}
+
 .badge {
   display: inline-flex;
   align-items: center;
@@ -2005,6 +2144,19 @@ select.text-input {
   flex: 1;
   min-width: 0;
 }
+.warn-banner {
+  background: rgba(230, 184, 110, 0.08);
+  border-color: rgba(230, 184, 110, 0.45);
+  color: #c99236;
+  display: flex;
+  align-items: center;
+  gap: 12px;
+  justify-content: space-between;
+}
+.warn-banner > span {
+  flex: 1;
+  min-width: 0;
+}
 .rebuild-output {
   max-height: 240px;
   overflow: auto;
@@ -3109,6 +3261,81 @@ select.text-input {
   padding-right: 4px;
 }
 
+/* Settings page split into sections, navigated via the same horizontal
+   ``.subtab-bar`` that the top-level tabs mode uses, so the look is
+   consistent. Only one section's panels render at a time; each section
+   brings its own grid wrapper inside ``.settings-content`` so that
+   Storage can use a custom 2-col shape while Providers, Integrations,
+   and General stay on the default ``.content-grid``. */
+.settings-layout {
+  display: grid;
+  grid-template-rows: auto minmax(0, 1fr);
+  gap: var(--app-gap);
+  min-height: 0;
+}
+
+.settings-content {
+  min-height: 0;
+  height: 100%;
+  width: 100%;
+  display: flex;
+  flex-direction: column;
+}
+
+.settings-content > * {
+  flex: 1;
+  min-height: 0;
+}
+
+.settings-layout .settings-section-grid {
+  min-height: 0;
+}
+
+/* When only one section panel is showing (e.g. General = Appearance only)
+   let it consume the full width rather than leaving the 2-col grid's
+   second column empty. content-grid's 2-col default is still right for
+   sections with multiple panels (Providers). */
+.settings-layout .settings-section-grid:has(> .panel:only-child) {
+  grid-template-columns: minmax(0, 1fr);
+}
+
+.settings-subtab-bar {
+  align-self: start;
+}
+
+/* Storage section gets a bespoke 2-col layout: the left column stacks
+   the two small panels (Data Directory, Delivery Folders) so their
+   related "where files live" controls read as a group, and the right
+   column hands the full height to Model Directories so its scrollable
+   list has room to breathe. At narrow widths the grid falls back to a
+   single column (handled by the viewport breakpoint lower down). */
+.settings-layout .settings-storage-grid {
+  display: grid;
+  grid-template-columns: repeat(2, minmax(0, 1fr));
+  gap: var(--app-gap);
+  min-height: 0;
+  height: 100%;
+  width: 100%;
+  overflow: hidden;
+}
+
+.settings-storage-col {
+  display: flex;
+  flex-direction: column;
+  gap: var(--app-gap);
+  min-width: 0;
+  min-height: 0;
+}
+
+/* Pair a field label with an inline status badge (e.g. "default") on the
+   same baseline. The badge stays optional — when absent the label
+   renders on its own, unchanged. */
+.field-label-row {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+}
+
 .integration-card {
   border: 1px solid var(--border);
   border-radius: 12px;
@@ -4131,6 +4358,7 @@ select.text-input {
   .server-stat-grid,
   .compact-field-grid,
   .message-detail-grid,
+  .settings-storage-grid,
   .field-grid {
     grid-template-columns: 1fr;
   }
diff --git a/src/types.ts b/src/types.ts
index 736029d..bd2bed9 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -6,6 +6,10 @@ export type TabId =
   | "image-models"
   | "image-studio"
   | "image-gallery"
+  | "video-models"
+  | "video-discover"
+  | "video-studio"
+  | "video-gallery"
   | "conversion"
   | "chat"
   | "server"
@@ -17,6 +21,15 @@ export type TabId =
   | "logs"
   | "settings";
 
+export type SidebarGroupId =
+  | "models"
+  | "images"
+  | "video"
+  | "benchmarks"
+  | "tools";
+
+export type SidebarMode = "collapsible" | "tabs";
+
 export interface SystemStats {
   platform: string;
   arch: string;
@@ -106,6 +119,8 @@ export interface ModelVariant {
   launchMode: ModelLaunchMode;
   backend: "mlx" | "llama.cpp" | "auto";
   maxContext?: number | null;
+  releaseDate?: string | null;
+  releaseLabel?: string | null;
 }
 
 export interface ModelFamily {
@@ -181,12 +196,21 @@ export interface AppSettings {
   modelDirectories: ModelDirectorySetting[];
   preferredServerPort: number;
   allowRemoteConnections: boolean;
+  // When false, the backend disables bearer-token enforcement so external
+  // clients (OpenWebUI, curl, another desktop app) can hit /api and /v1
+  // endpoints without a token. Default true.
+  requireApiAuth: boolean;
   autoStartServer: boolean;
   launchPreferences: LaunchPreferences;
   remoteProviders?: RemoteProvider[];
   huggingFaceToken?: string | null;
   hasHuggingFaceToken?: boolean;
   dataDirectory?: string;
+  // Empty string means "use the default under dataDirectory". A non-empty
+  // value redirects new image / video artifacts to a custom folder (e.g. an
+  // external SSD or a cloud-synced delivery folder).
+  imageOutputsDirectory?: string;
+  videoOutputsDirectory?: string;
 }
 
 export interface SettingsUpdateResponse {
@@ -700,9 +724,16 @@ export interface ImageModelVariant {
   repoSizeGb?: number | null;
   coreWeightsBytes?: number | null;
   coreWeightsGb?: number | null;
+  onDiskBytes?: number | null;
+  onDiskGb?: number | null;
   metadataWarning?: string | null;
   source?: "curated" | "latest" | "experimental";
   familyName?: string | null;
+  /** Absolute path to the local HF snapshot, when something is on disk. */
+  localPath?: string | null;
+  releaseDate?: string | null;
+  createdAt?: string | null;
+  releaseLabel?: string | null;
 }
 
 export interface ImageModelFamily {
@@ -722,6 +753,109 @@ export interface ImageCatalogResponse {
   latest: ImageModelVariant[];
 }
 
+export type VideoModelTask = "txt2video" | "img2video" | "video2video";
+
+export interface VideoModelVariant {
+  id: string;
+  familyId: string;
+  name: string;
+  provider: string;
+  repo: string;
+  link: string;
+  runtime: string;
+  styleTags: string[];
+  taskSupport: VideoModelTask[];
+  sizeGb: number;
+  recommendedResolution: string;
+  defaultDurationSeconds: number;
+  note: string;
+  availableLocally: boolean;
+  hasLocalData?: boolean;
+  estimatedGenerationSeconds: number | null;
+  onDiskBytes?: number | null;
+  onDiskGb?: number | null;
+  familyName?: string | null;
+  /** Absolute path to the local HF snapshot, when something is on disk. */
+  localPath?: string | null;
+  releaseDate?: string | null;
+  releaseLabel?: string | null;
+}
+
+export interface VideoModelFamily {
+  id: string;
+  name: string;
+  provider: string;
+  headline: string;
+  summary: string;
+  updatedLabel: string;
+  badges: string[];
+  defaultVariantId: string;
+  variants: VideoModelVariant[];
+}
+
+export interface VideoCatalogResponse {
+  families: VideoModelFamily[];
+  latest: VideoModelVariant[];
+}
+
+export interface VideoRuntimeStatus {
+  activeEngine: string;
+  realGenerationAvailable: boolean;
+  message: string;
+  device?: string | null;
+  pythonExecutable?: string | null;
+  missingDependencies?: string[];
+  loadedModelRepo?: string | null;
+  /** Total device memory in GB — used by the video-gen safety heuristic to
+   * scale attention-budget thresholds per hardware capability. Nullable
+   * because detection can fail (unsupported platform, nvidia-smi absent on a
+   * non-CUDA Linux box, etc.); consumers treat null as "stay conservative". */
+  deviceMemoryGb?: number | null;
+}
+
+export interface VideoOutputArtifact {
+  artifactId: string;
+  modelId: string;
+  modelName: string;
+  prompt: string;
+  negativePrompt?: string | null;
+  width: number;
+  height: number;
+  numFrames: number;
+  fps: number;
+  steps: number;
+  guidance: number;
+  seed: number;
+  createdAt: string;
+  durationSeconds: number;
+  clipDurationSeconds: number;
+  videoPath?: string | null;
+  metadataPath?: string | null;
+  videoMimeType?: string | null;
+  videoExtension?: string | null;
+  runtimeLabel?: string | null;
+  runtimeNote?: string | null;
+}
+
+export interface VideoGenerationPayload {
+  modelId: string;
+  prompt: string;
+  negativePrompt?: string;
+  width: number;
+  height: number;
+  numFrames: number;
+  fps: number;
+  steps: number;
+  guidance: number;
+  seed?: number | null;
+}
+
+export interface VideoGenerationResponse {
+  artifact: VideoOutputArtifact;
+  outputs: VideoOutputArtifact[];
+  runtime?: VideoRuntimeStatus;
+}
+
 export interface ImageOutputArtifact {
   artifactId: string;
   modelId: string;
@@ -771,6 +905,25 @@ export interface ImageGenerationResponse {
   runtime?: ImageRuntimeStatus;
 }
 
+/**
+ * Live snapshot of the in-flight image / video generation as published by the
+ * backend ProgressTracker. ``active=false`` means nothing is running (or the
+ * runtime hasn't published a phase yet) and the UI should fall back to its
+ * client-side estimates.
+ */
+export interface GenerationProgressSnapshot {
+  kind: "image" | "video";
+  active: boolean;
+  phase: "idle" | "loading" | "encoding" | "diffusing" | "decoding" | "saving";
+  message: string;
+  step: number;
+  totalSteps: number;
+  startedAt: number;
+  updatedAt: number;
+  elapsedSeconds: number;
+  runLabel: string | null;
+}
+
 export interface HubModel {
   id: string;
   repo: string;
@@ -785,6 +938,8 @@ export interface HubModel {
   likesLabel: string;
   lastModified?: string | null;
   updatedLabel?: string | null;
+  createdAt?: string | null;
+  releaseLabel?: string | null;
   availableLocally: boolean;
   launchMode: string;
   backend: string;
@@ -813,9 +968,12 @@ export interface UpdateSettingsPayload {
   modelDirectories?: ModelDirectorySetting[];
   preferredServerPort?: number;
   allowRemoteConnections?: boolean;
+  requireApiAuth?: boolean;
   autoStartServer?: boolean;
   launchPreferences?: LaunchPreferences;
   remoteProviders?: Array<{ id: string; label: string; apiBase: string; apiKey: string; model: string }>;
   huggingFaceToken?: string | null;
   dataDirectory?: string | null;
+  imageOutputsDirectory?: string | null;
+  videoOutputsDirectory?: string | null;
 }
diff --git a/src/types/chat.ts b/src/types/chat.ts
index 2c216a9..fc1a37c 100644
--- a/src/types/chat.ts
+++ b/src/types/chat.ts
@@ -32,10 +32,13 @@ export type SettingsDraft = {
   modelDirectories: AppSettings["modelDirectories"];
   preferredServerPort: number;
   allowRemoteConnections: boolean;
+  requireApiAuth: boolean;
   autoStartServer: boolean;
   remoteProviders: AppSettings["remoteProviders"];
   huggingFaceToken: string;
   hasHuggingFaceToken: boolean;
   huggingFaceTokenMasked: string;
   dataDirectory: string;
+  imageOutputsDirectory: string;
+  videoOutputsDirectory: string;
 };
diff --git a/src/types/index.ts b/src/types/index.ts
index a96e991..80f6a9f 100644
--- a/src/types/index.ts
+++ b/src/types/index.ts
@@ -1,3 +1,4 @@
 export * from "../types";
 export * from "./image";
+export * from "./video";
 export * from "./chat";
diff --git a/src/types/video.ts b/src/types/video.ts
new file mode 100644
index 0000000..c6056ef
--- /dev/null
+++ b/src/types/video.ts
@@ -0,0 +1 @@
+export type VideoDiscoverTaskFilter = "all" | "txt2video" | "img2video" | "video2video";
diff --git a/src/utils/__tests__/format.test.ts b/src/utils/__tests__/format.test.ts
index e0a4603..a2f0560 100644
--- a/src/utils/__tests__/format.test.ts
+++ b/src/utils/__tests__/format.test.ts
@@ -4,6 +4,7 @@ import {
   formatImageAccessError,
   formatImageLicenseLabel,
   formatImageTimestamp,
+  formatReleaseLabel,
   imageRuntimeErrorStatus,
   isGatedImageAccessError,
   number,
@@ -142,3 +143,27 @@ describe("imageRuntimeErrorStatus()", () => {
     expect(status.message).toBe("Image runtime unavailable.");
   });
 });
+
+describe("formatReleaseLabel()", () => {
+  it("prefers an already-formatted backend label", () => {
+    expect(formatReleaseLabel("Released Aug 2024", "2024-08-01")).toBe("Released Aug 2024");
+  });
+
+  it("parses a YYYY-MM curated shorthand", () => {
+    expect(formatReleaseLabel(null, "2024-08")).toBe("Released Aug 2024");
+  });
+
+  it("parses a YYYY-MM-DD curated shorthand", () => {
+    expect(formatReleaseLabel(null, "2025-02-15")).toBe("Released Feb 2025");
+  });
+
+  it("parses a full ISO datetime from the Hugging Face API", () => {
+    expect(formatReleaseLabel(null, "2024-11-12T09:30:00.000Z")).toBe("Released Nov 2024");
+  });
+
+  it("returns null when both inputs are empty or invalid", () => {
+    expect(formatReleaseLabel(null, null)).toBeNull();
+    expect(formatReleaseLabel(null, "not-a-date")).toBeNull();
+    expect(formatReleaseLabel("", "")).toBeNull();
+  });
+});
diff --git a/src/utils/__tests__/library.test.ts b/src/utils/__tests__/library.test.ts
index 68a9fb9..98324a1 100644
--- a/src/utils/__tests__/library.test.ts
+++ b/src/utils/__tests__/library.test.ts
@@ -1,6 +1,10 @@
 import { describe, expect, it } from "vitest";
 
-import { findCatalogVariantForLibraryItem } from "../library";
+import {
+  estimateLibraryItemCompressedGb,
+  estimateLibraryItemResidentGb,
+  findCatalogVariantForLibraryItem,
+} from "../library";
 import type { LibraryItem, ModelFamily, ModelVariant } from "../../types";
 
 function makeVariant(overrides: Partial<ModelVariant> & { id: string; name: string; repo: string }): ModelVariant {
@@ -126,3 +130,87 @@ describe("findCatalogVariantForLibraryItem()", () => {
     expect(matched?.id).toBe(qwen9.id);
   });
 });
+
+describe("estimateLibraryItemResidentGb()", () => {
+  it("scales with actual on-disk size, not a stale catalog guess", () => {
+    const tiny = estimateLibraryItemResidentGb(makeItem({ name: "a", path: "/a", sizeGb: 0.9 }));
+    const small = estimateLibraryItemResidentGb(makeItem({ name: "b", path: "/b", sizeGb: 1.4 }));
+    const medium = estimateLibraryItemResidentGb(makeItem({ name: "c", path: "/c", sizeGb: 15.3 }));
+    const big = estimateLibraryItemResidentGb(makeItem({ name: "d", path: "/d", sizeGb: 67 }));
+
+    expect(tiny).not.toBeNull();
+    expect(small).not.toBeNull();
+    expect(medium).not.toBeNull();
+    expect(big).not.toBeNull();
+
+    // The broken behaviour before this fix: three differently-sized Qwen models
+    // all rendered as ~76.6 GB because they fell back to the catalog flagship.
+    // Now every size maps to a distinct estimate that tracks the disk footprint.
+    expect(tiny! < small!).toBe(true);
+    expect(small! < medium!).toBe(true);
+    expect(medium! < big!).toBe(true);
+  });
+
+  it("gives a sane ballpark for a typical 8B BF16 model", () => {
+    // 15.3 GB weights on disk. Resident with small KV + framework overhead
+    // should land in the mid-teens, not in the 70s.
+    const ram = estimateLibraryItemResidentGb(makeItem({
+      name: "Qwen3-8B",
+      path: "/hf/models--Qwen--Qwen3-8B",
+      sizeGb: 15.3,
+    }));
+    expect(ram).not.toBeNull();
+    expect(ram!).toBeGreaterThan(15);
+    expect(ram!).toBeLessThan(18);
+  });
+
+  it("gives a sane ballpark for a tiny 0.5B model", () => {
+    // 0.9 GB on disk should produce a sub-2 GB estimate, never the old 76.6 GB.
+    const ram = estimateLibraryItemResidentGb(makeItem({
+      name: "Qwen2.5-0.5B-Instruct",
+      path: "/hf/models--Qwen--Qwen2.5-0.5B-Instruct",
+      sizeGb: 0.9,
+    }));
+    expect(ram).not.toBeNull();
+    expect(ram!).toBeLessThan(2);
+  });
+
+  it("ignores non-positive or non-finite sizeGb and falls back to the catalog estimate", () => {
+    const matched = makeVariant({
+      id: "Qwen/Qwen3-8B",
+      name: "Qwen3 8B",
+      repo: "Qwen/Qwen3-8B",
+      paramsB: 8,
+      estimatedMemoryGb: 16,
+    });
+
+    expect(estimateLibraryItemResidentGb(makeItem({ name: "x", path: "/x", sizeGb: 0 }), matched)).toBe(16);
+    expect(estimateLibraryItemResidentGb(makeItem({ name: "x", path: "/x", sizeGb: -5 }), matched)).toBe(16);
+    expect(estimateLibraryItemResidentGb(makeItem({ name: "x", path: "/x", sizeGb: Number.NaN }), matched)).toBe(16);
+  });
+
+  it("returns null when both on-disk size and catalog fallback are missing", () => {
+    const ram = estimateLibraryItemResidentGb(makeItem({ name: "x", path: "/x", sizeGb: 0 }));
+    expect(ram).toBeNull();
+  });
+});
+
+describe("estimateLibraryItemCompressedGb()", () => {
+  it("is slightly below the uncompressed estimate at short contexts", () => {
+    const item = makeItem({ name: "Qwen3-8B", path: "/hf/qwen3-8b", sizeGb: 15.3 });
+    const uncompressed = estimateLibraryItemResidentGb(item)!;
+    const compressed = estimateLibraryItemCompressedGb(item)!;
+    expect(compressed).toBeLessThan(uncompressed);
+    // At 8K context the KV term is small; the delta should be small too — that's
+    // the honest signal that compression shows its value at long contexts.
+    expect(uncompressed - compressed).toBeLessThan(1);
+  });
+
+  it("scales with the on-disk size, same as uncompressed", () => {
+    const small = estimateLibraryItemCompressedGb(makeItem({ name: "a", path: "/a", sizeGb: 0.9 }));
+    const big = estimateLibraryItemCompressedGb(makeItem({ name: "b", path: "/b", sizeGb: 67 }));
+    expect(small).not.toBeNull();
+    expect(big).not.toBeNull();
+    expect(big! > small!).toBe(true);
+  });
+});
diff --git a/src/utils/__tests__/videos.test.ts b/src/utils/__tests__/videos.test.ts
new file mode 100644
index 0000000..9a928c9
--- /dev/null
+++ b/src/utils/__tests__/videos.test.ts
@@ -0,0 +1,737 @@
+import { afterEach, describe, expect, it } from "vitest";
+
+import {
+  assessVideoGenerationSafety,
+  inferDeviceFromHostPlatform,
+  videoRuntimeErrorStatus,
+} from "../videos";
+
+// The safety heuristic now scales with device memory rather than a flat
+// token threshold — a 64 GB M4 Max should tolerate far more frames than a
+// 16 GB base M2. These tests pin both ends of that scale against the concrete
+// bug report (Wan 2.1 T2V 1.3B at 832×480 × 96 frames detonating MPS) and
+// the Studio defaults (832×480 × 33 frames staying safe).
+
+describe("assessVideoGenerationSafety()", () => {
+  describe("safe envelope on base hardware", () => {
+    it("returns safe for the Studio defaults on a 16 GB M2", () => {
+      // 832×480 × 33 frames ≈ 14k latent tokens, ~2.9 GB estimated peak.
+      // On a 16 GB Mac the effective MPS budget is ~8 GB, so ratio ≈ 0.37
+      // — comfortably below the 0.5 caution threshold.
+      const result = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 33,
+        device: "mps",
+        deviceMemoryGb: 16,
+      });
+      expect(result.riskLevel).toBe("safe");
+      expect(result.reason).toBeNull();
+      expect(result.suggestion).toBeNull();
+      // The memory-estimate contract: always positive for a real request.
+      expect(result.estimatedPeakGb).toBeGreaterThan(0);
+      expect(result.deviceMemoryGb).toBe(16);
+      expect(result.exceedsDevice).toBe(false);
+    });
+
+    it("returns safe for tiny clips even on a 16 GB Mac", () => {
+      const result = assessVideoGenerationSafety({
+        width: 480,
+        height: 320,
+        numFrames: 17,
+        device: "mps",
+        deviceMemoryGb: 16,
+      });
+      expect(result.riskLevel).toBe("safe");
+    });
+
+    it("uses the 16 GB MPS fallback when device memory is not reported", () => {
+      // The whole point of the fallback is that over-warning a beefy machine
+      // beats silently crashing a small one — so unknown-memory should behave
+      // as if we're on a 16 GB MPS Mac.
+      const result = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 33,
+        device: "mps",
+        deviceMemoryGb: null,
+      });
+      expect(result.riskLevel).toBe("safe");
+      expect(result.deviceMemoryGb).toBe(16);
+    });
+
+    it("ignores non-finite inputs (mid-edit NaN values)", () => {
+      // The Studio inputs use NaN to represent "user is mid-edit / field is
+      // empty". The safety helper must not flag a phantom warning at that
+      // moment — the user hasn't committed to anything yet.
+      const result = assessVideoGenerationSafety({
+        width: Number.NaN,
+        height: 480,
+        numFrames: 33,
+        device: "mps",
+        deviceMemoryGb: 16,
+      });
+      expect(result.riskLevel).toBe("safe");
+      expect(result.suggestion).toBeNull();
+      expect(result.estimatedPeakGb).toBe(0);
+    });
+  });
+
+  describe("memory-scaled thresholds (Option A)", () => {
+    it("a 64 GB M4 Max does NOT flag 832×480 × 50 as caution", () => {
+      // This is the headline win of the memory-aware heuristic: a moderate
+      // clip that the flat-threshold version would have flagged on every
+      // Mac now comes back as safe on a 64 GB box — where it actually fits
+      // in RAM with room to spare.
+      const result = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 50,
+        device: "mps",
+        deviceMemoryGb: 64,
+      });
+      expect(result.riskLevel).toBe("safe");
+    });
+
+    it("a 16 GB M2 DOES flag the same 832×480 × 50 as caution", () => {
+      // Same config, smaller machine — it's close to the 8 GB MPS budget so
+      // the user gets a heads-up that it might struggle.
+      const result = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 50,
+        device: "mps",
+        deviceMemoryGb: 16,
+      });
+      expect(result.riskLevel).toBe("caution");
+      // The message now says "peak memory" rather than "attention memory"
+      // because the estimate can also include the model's resident footprint
+      // when the caller passes ``baseModelFootprintGb``. Without it (this
+      // case), the peak is just the attention term, but the wording is
+      // consistent across both paths so the Studio UI doesn't need to
+      // branch its copy.
+      expect(result.reason).toMatch(/peak memory/);
+    });
+
+    it("a 128 GB M3 Ultra handles the observed-crash config safely", () => {
+      // With 128 GB of unified memory and a 64 GB MPS budget, even 96 frames
+      // at 832×480 (~16 GB peak) fits with lots of headroom.
+      const result = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 96,
+        device: "mps",
+        deviceMemoryGb: 128,
+      });
+      expect(result.riskLevel).toBe("safe");
+    });
+  });
+
+  describe("danger band — the bug we're preventing", () => {
+    it("flags danger for the observed-crash config on a 16 GB Mac", () => {
+      // The exact configuration from the user's bug report, worst-case Mac.
+      const result = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 96,
+        device: "mps",
+        deviceMemoryGb: 16,
+      });
+      expect(result.riskLevel).toBe("danger");
+      expect(result.reason).toMatch(/crash/);
+      expect(result.suggestion).not.toBeNull();
+      expect(result.suggestion!.numFrames).toBeLessThan(96);
+      // Peak estimate should exceed the device budget — that's the whole
+      // reason we're warning.
+      expect(result.exceedsDevice).toBe(true);
+    });
+
+    it("suggestion lands back in the safe envelope", () => {
+      // Whatever the helper hands back, applying it must not re-trigger the
+      // warning — otherwise the user clicks "Use safer settings" and the
+      // callout doesn't go away, which is worse than not having a button.
+      const original = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 96,
+        device: "mps",
+        deviceMemoryGb: 16,
+      });
+      expect(original.suggestion).not.toBeNull();
+      const suggestion = original.suggestion!;
+      const recheck = assessVideoGenerationSafety({
+        width: suggestion.width,
+        height: suggestion.height,
+        numFrames: suggestion.numFrames,
+        device: "mps",
+        deviceMemoryGb: 16,
+      });
+      expect(recheck.riskLevel).toBe("safe");
+    });
+
+    it("suggested frame count is Wan-compatible (n - 1) % 4 == 0", () => {
+      // Wan / LTX pipelines reject any other frame count — handing back e.g.
+      // 33 (valid) or 17 (valid) is fine; handing back 50 (invalid) breaks
+      // the user's next generate.
+      const result = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 96,
+        device: "mps",
+        deviceMemoryGb: 16,
+      });
+      const frames = result.suggestion!.numFrames;
+      expect((frames - 1) % 4).toBe(0);
+    });
+  });
+
+  describe("CUDA gets more headroom than MPS at the same memory size", () => {
+    it("24 GB CUDA verdicts a config that 24 GB MPS would flag danger", () => {
+      // Same config (832×480 × 50 frames, ~4.6 GB peak), same total memory
+      // (24 GB), but CUDA's larger effective budget (70% vs 50%) and looser
+      // ratios (caution 0.7 vs 0.5) mean the same request is safe on a 4090
+      // and only 'caution' on a 24 GB Mac. That's the asymmetry we want.
+      const cuda = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 50,
+        device: "cuda:0",
+        deviceMemoryGb: 24,
+      });
+      const mps = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 50,
+        device: "mps",
+        deviceMemoryGb: 24,
+      });
+      expect(cuda.riskLevel).toBe("safe");
+      expect(mps.riskLevel).toBe("caution");
+    });
+
+    it("still flags danger when the peak genuinely exceeds CUDA VRAM", () => {
+      // A 4090 with 24 GB can't really handle 832×480 × 96 frames without
+      // model offload (~20 GB attention peak vs 16.8 GB effective budget)
+      // so the heuristic correctly stays at danger here. This test locks
+      // that behaviour so we don't accidentally tune CUDA to be too loose.
+      const result = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 96,
+        device: "cuda:0",
+        deviceMemoryGb: 24,
+      });
+      expect(result.riskLevel).toBe("danger");
+    });
+
+    it("A100-class (40 GB) lands the observed-crash config at caution", () => {
+      // With a larger dedicated VRAM pool, the same 96-frame clip is still
+      // close to the limit (~20.9 GB peak vs 28 GB budget ≈ 75%) so the
+      // user gets a heads-up without a hard block.
+      const result = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 96,
+        device: "cuda:0",
+        deviceMemoryGb: 40,
+      });
+      expect(result.riskLevel).toBe("caution");
+    });
+
+    it("the observed-crash config on CPU is danger", () => {
+      // CPU uses fp32 (4 bytes/element) so the attention peak doubles, and
+      // CPU inference is slow anyway — we warn harder.
+      const result = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 96,
+        device: "cpu",
+        deviceMemoryGb: 32,
+      });
+      expect(result.riskLevel).toBe("danger");
+    });
+
+    it("uses the CUDA fallback memory when not specified", () => {
+      // Unknown CUDA device defaults to 12 GB — what most mid-range consumer
+      // cards ship with. Overwarning a 4090 user in the "no telemetry" case
+      // is still better than green-lighting a crash on a 3060.
+      const result = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 33,
+        device: "cuda",
+        deviceMemoryGb: null,
+      });
+      expect(result.deviceMemoryGb).toBe(12);
+    });
+  });
+
+  describe("consumer-facing fields", () => {
+    it("returns a positive estimatedPeakGb for every valid request", () => {
+      // The Studio uses this number in the always-on capacity line
+      // ("this run ≈ 2.9 GB of peak memory"). It must always be positive
+      // when inputs are valid — a zero here would read as "this run wants
+      // 0 GB", which is nonsense UX.
+      const result = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 33,
+        device: "mps",
+        deviceMemoryGb: 16,
+      });
+      expect(result.estimatedPeakGb).toBeGreaterThan(0);
+    });
+
+    it("preserves the device memory it used in the reply", () => {
+      // The Studio reads ``deviceMemoryGb`` from the reply to format the
+      // capacity line — whatever we computed against must match what we
+      // hand back, otherwise the user sees an inconsistent total.
+      const result = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 33,
+        device: "mps",
+        deviceMemoryGb: 96.5,
+      });
+      expect(result.deviceMemoryGb).toBe(96.5);
+    });
+
+    it("reports modelFootprintGb as 0 when no baseModelFootprintGb is passed", () => {
+      // The Studio capacity line only shows a "model ≈ X GB" breakdown
+      // when this is non-zero. The tests / attention-only path must leave
+      // it at 0 so the UI falls back to the simple peak-memory framing.
+      const result = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 33,
+        device: "mps",
+        deviceMemoryGb: 64,
+      });
+      expect(result.modelFootprintGb).toBe(0);
+    });
+  });
+
+  describe("model-footprint-aware estimate (the real Wan 2.1 crash case)", () => {
+    // This block calibrates against the actual bug report: Wan 2.1 T2V
+    // 1.3B (catalog sizeGb = 16.4) at 832×480 × 40 frames detonated MPS on
+    // a 64 GB M4 Max because the model weights + UMT5-XXL text encoder
+    // dominate memory — not the attention kernel. The Studio passes
+    // ``selectedVariant.sizeGb`` as ``baseModelFootprintGb`` so the
+    // warning reflects that reality.
+
+    it("flags danger for Wan 2.1 1.3B at 40 frames on a 64 GB M4 Max", () => {
+      // The exact config that crashed the user's backend. With the
+      // resident-model term included (16.4 GB disk × 1.4 MPS fragmentation
+      // ≈ 23 GB) the estimate now realistically lands in "danger".
+      const result = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 40,
+        device: "mps",
+        deviceMemoryGb: 64,
+        baseModelFootprintGb: 16.4,
+      });
+      expect(result.riskLevel).toBe("danger");
+      // The resident term is the majority of the peak — the user needs to
+      // see that it's the model itself, not just the attention kernel.
+      expect(result.modelFootprintGb).toBeGreaterThan(result.estimatedPeakGb / 2);
+      expect(result.reason).not.toBeNull();
+    });
+
+    it("hands back a null suggestion when the model alone doesn't fit", () => {
+      // On a 64 GB M4 Max, Wan 2.1 1.3B's 23 GB resident footprint fills
+      // most of the 32 GB MPS budget all by itself — no per-request tweak
+      // (smaller resolution, fewer frames) can recover. The right answer
+      // is "try a smaller model", which we signal by a null suggestion.
+      const result = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 40,
+        device: "mps",
+        deviceMemoryGb: 64,
+        baseModelFootprintGb: 16.4,
+      });
+      expect(result.suggestion).toBeNull();
+      expect(result.reason).toMatch(/model weights|text encoder/i);
+    });
+
+    it("flags danger for Wan 2.1 1.3B on a 16 GB M2 regardless of frame count", () => {
+      // The model's resident footprint (~23 GB on MPS) is already three
+      // times the 8 GB MPS budget — even a 9-frame 480×320 request is
+      // doomed. Confirms the short-circuit triggers for small machines.
+      const result = assessVideoGenerationSafety({
+        width: 480,
+        height: 320,
+        numFrames: 9,
+        device: "mps",
+        deviceMemoryGb: 16,
+        baseModelFootprintGb: 16.4,
+      });
+      expect(result.riskLevel).toBe("danger");
+      expect(result.suggestion).toBeNull();
+    });
+
+    it("stays safe for Wan 2.1 1.3B on a 128 GB M3 Ultra", () => {
+      // 64 GB MPS budget easily swallows 23 GB resident + ~3 GB attention.
+      // The calibration target: a big machine actually gets to generate.
+      const result = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 33,
+        device: "mps",
+        deviceMemoryGb: 128,
+        baseModelFootprintGb: 16.4,
+      });
+      expect(result.riskLevel).toBe("safe");
+      expect(result.modelFootprintGb).toBeGreaterThan(0);
+    });
+
+    it("stays safe for LTX-Video (small model) on a 32 GB Mac", () => {
+      // LTX ships at ~2 GB on disk so residency is ~2.8 GB on MPS — fits
+      // with lots of headroom even on a 32 GB machine. Proves the
+      // heuristic doesn't over-warn small models.
+      const result = assessVideoGenerationSafety({
+        width: 768,
+        height: 512,
+        numFrames: 41,
+        device: "mps",
+        deviceMemoryGb: 32,
+        baseModelFootprintGb: 2.0,
+      });
+      expect(result.riskLevel).toBe("safe");
+    });
+
+    it("flags danger for Wan 2.1 14B on a 24 GB RTX 4090", () => {
+      // 45 GB catalog size × 1.05 CUDA factor ≈ 47 GB resident. A 4090's
+      // 16.8 GB effective VRAM can't hold the weights at all without
+      // aggressive offload, so the heuristic correctly short-circuits
+      // regardless of resolution / frames.
+      const result = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 33,
+        device: "cuda:0",
+        deviceMemoryGb: 24,
+        baseModelFootprintGb: 45,
+      });
+      expect(result.riskLevel).toBe("danger");
+      expect(result.suggestion).toBeNull();
+    });
+
+    it("MPS fragmentation factor is larger than CUDA's (same model, same memory)", () => {
+      // Apples-to-apples: 10 GB model on the same 32 GB total, MPS vs
+      // CUDA. MPS should report a higher resident estimate because
+      // unified-memory allocator fragmentation inflates the real footprint
+      // more than CUDA's dedicated pool does. Locks that asymmetry so
+      // future tweaks don't accidentally flip it.
+      const mps = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 33,
+        device: "mps",
+        deviceMemoryGb: 32,
+        baseModelFootprintGb: 10,
+      });
+      const cuda = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 33,
+        device: "cuda:0",
+        deviceMemoryGb: 32,
+        baseModelFootprintGb: 10,
+      });
+      expect(mps.modelFootprintGb).toBeGreaterThan(cuda.modelFootprintGb);
+    });
+
+    it("surfaces effectiveDevice and effectiveDeviceWasInferred for explicit MPS", () => {
+      // When the backend told us the device, the safety result should echo
+      // it back unchanged — and tag it as not inferred. The Studio uses
+      // these two fields together to decide whether to mark the device
+      // label as a guess in the always-on capacity line.
+      const result = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 33,
+        device: "mps",
+        deviceMemoryGb: 16,
+      });
+      expect(result.effectiveDevice).toBe("mps");
+      expect(result.effectiveDeviceWasInferred).toBe(false);
+    });
+
+    it("surfaces effectiveDevice for explicit CUDA", () => {
+      const result = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 33,
+        device: "cuda:0",
+        deviceMemoryGb: 24,
+      });
+      expect(result.effectiveDevice).toBe("cuda");
+      expect(result.effectiveDeviceWasInferred).toBe(false);
+    });
+
+    it("ignores non-positive or non-finite baseModelFootprintGb values", () => {
+      // Guard: the caller should be able to pass through whatever the
+      // catalog hands them (which occasionally ships a 0 sizeGb for
+      // placeholder entries) without the heuristic blowing up or
+      // silently applying a nonsense offset.
+      const zero = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 33,
+        device: "mps",
+        deviceMemoryGb: 64,
+        baseModelFootprintGb: 0,
+      });
+      const negative = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 33,
+        device: "mps",
+        deviceMemoryGb: 64,
+        baseModelFootprintGb: -5,
+      });
+      const nan = assessVideoGenerationSafety({
+        width: 832,
+        height: 480,
+        numFrames: 33,
+        device: "mps",
+        deviceMemoryGb: 64,
+        baseModelFootprintGb: Number.NaN,
+      });
+      expect(zero.modelFootprintGb).toBe(0);
+      expect(negative.modelFootprintGb).toBe(0);
+      expect(nan.modelFootprintGb).toBe(0);
+    });
+  });
+});
+
+describe("host-platform device inference (Windows/Linux fallback)", () => {
+  // When the backend probe hasn't come back (sidecar dead, Failed to fetch,
+  // first-launch race), ``device`` is null. Before this behaviour landed we
+  // defaulted to "mps" unconditionally — meaning a Windows RTX 4090 user saw
+  // "close to the safe limit on Apple Silicon" even though there is no
+  // Apple Silicon on their machine. These tests pin the new fallback: we
+  // infer from the host OS and the label now matches the machine.
+
+  const originalNavigator = globalThis.navigator;
+
+  afterEach(() => {
+    // Restore whatever Vitest's happy-dom handed us so we don't poison other tests.
+    Object.defineProperty(globalThis, "navigator", {
+      value: originalNavigator,
+      configurable: true,
+      writable: true,
+    });
+  });
+
+  function stubNavigator(stub: Partial<Navigator> & { userAgentData?: { platform?: string } }): void {
+    Object.defineProperty(globalThis, "navigator", {
+      value: stub,
+      configurable: true,
+      writable: true,
+    });
+  }
+
+  it("infers MPS on macOS via userAgentData", () => {
+    stubNavigator({ userAgentData: { platform: "macOS" } });
+    expect(inferDeviceFromHostPlatform()).toBe("mps");
+  });
+
+  it("infers CUDA on Windows via userAgentData", () => {
+    stubNavigator({ userAgentData: { platform: "Windows" } });
+    expect(inferDeviceFromHostPlatform()).toBe("cuda");
+  });
+
+  it("infers CUDA on Linux via userAgentData", () => {
+    stubNavigator({ userAgentData: { platform: "Linux" } });
+    expect(inferDeviceFromHostPlatform()).toBe("cuda");
+  });
+
+  it("falls back to legacy navigator.platform when userAgentData is missing (WKWebView)", () => {
+    // macOS Tauri ships a WKWebView that doesn't expose the modern UA-CH API.
+    // The legacy ``platform`` string is still "MacIntel" there — locking this
+    // in means a user whose backend probe times out on first launch doesn't
+    // see a Windows label on their M4 Mac.
+    stubNavigator({ platform: "MacIntel", userAgent: "Mozilla/5.0 (Macintosh)" });
+    expect(inferDeviceFromHostPlatform()).toBe("mps");
+  });
+
+  it("falls back to user agent substring for macOS when platform is generic", () => {
+    // Some embedded WebViews report "" for platform but keep "Mac OS" in the UA.
+    stubNavigator({ platform: "", userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)" });
+    expect(inferDeviceFromHostPlatform()).toBe("mps");
+  });
+
+  it("defaults to CUDA for an unknown non-macOS platform", () => {
+    // Windows 10 with legacy fields only — the exact shape WebView2 has
+    // shipped historically.
+    stubNavigator({ platform: "Win32", userAgent: "Mozilla/5.0 (Windows NT 10.0)" });
+    expect(inferDeviceFromHostPlatform()).toBe("cuda");
+  });
+
+  it("uses the inferred bucket in assessVideoGenerationSafety when device is null", () => {
+    // The core of the Windows RTX 4090 bug: when the backend probe never
+    // arrives, ``device`` is null. On Windows the safety helper must now
+    // bucket into CUDA so the memory warning quotes "this GPU" instead of
+    // "Apple Silicon (MPS)".
+    stubNavigator({ userAgentData: { platform: "Windows" } });
+    const result = assessVideoGenerationSafety({
+      width: 832,
+      height: 480,
+      numFrames: 33,
+      device: null,
+      deviceMemoryGb: null,
+    });
+    expect(result.effectiveDevice).toBe("cuda");
+    expect(result.effectiveDeviceWasInferred).toBe(true);
+    // With the CUDA fallback we also use the CUDA default memory (12 GB) —
+    // not the MPS default (16 GB). Locks the two paths together.
+    expect(result.deviceMemoryGb).toBe(12);
+  });
+
+  it("still uses MPS fallback on macOS when device is null", () => {
+    // The complementary case: the original behaviour was correct for
+    // macOS and shouldn't regress. A macOS user with a dead probe still
+    // sees MPS-strict defaults.
+    stubNavigator({ userAgentData: { platform: "macOS" } });
+    const result = assessVideoGenerationSafety({
+      width: 832,
+      height: 480,
+      numFrames: 33,
+      device: null,
+      deviceMemoryGb: null,
+    });
+    expect(result.effectiveDevice).toBe("mps");
+    expect(result.effectiveDeviceWasInferred).toBe(true);
+    expect(result.deviceMemoryGb).toBe(16);
+  });
+
+  it("does not flag danger on a Windows RTX 4090 for Studio defaults with no backend probe", () => {
+    // Regression guard for the exact user-reported symptom: Windows machine
+    // with a 24 GB RTX 4090 seeing an Apple Silicon memory warning. With the
+    // inferred CUDA bucket + caller-supplied 24 GB memory (which the Studio
+    // passes when the backend has at least reported it once before going
+    // stale), the Studio defaults stay comfortably safe.
+    stubNavigator({ userAgentData: { platform: "Windows" } });
+    const result = assessVideoGenerationSafety({
+      width: 832,
+      height: 480,
+      numFrames: 33,
+      device: null,
+      deviceMemoryGb: 24,
+    });
+    expect(result.riskLevel).toBe("safe");
+    expect(result.effectiveDevice).toBe("cuda");
+    // The reason string shouldn't mention Apple Silicon when we're on a
+    // Windows host — that was the cryptic bit that confused the user.
+    expect(result.reason).toBeNull();
+  });
+
+  it("quotes 'this GPU' rather than 'Apple Silicon' in the warning on inferred-CUDA hosts", () => {
+    // When the inferred path does fire a warning (e.g. too many frames),
+    // the copy has to match the inferred bucket — otherwise we've only
+    // half-fixed the bug.
+    stubNavigator({ userAgentData: { platform: "Windows" } });
+    const result = assessVideoGenerationSafety({
+      width: 1920,
+      height: 1080,
+      numFrames: 97,
+      device: null,
+      deviceMemoryGb: 8, // deliberately tight so the warning fires
+    });
+    expect(result.riskLevel).not.toBe("safe");
+    expect(result.reason).not.toBeNull();
+    expect(result.reason).not.toMatch(/Apple Silicon/i);
+    expect(result.reason).toMatch(/this GPU/i);
+  });
+});
+
+describe("videoRuntimeErrorStatus()", () => {
+  // The runtime status that Studio shows ("ENGINE: UNAVAILABLE / Fallback
+  // active") is driven straight from this function when a runtime probe
+  // fails. The important bit is the ``message`` we pass through — when the
+  // Python sidecar dies (e.g. Wan 2.1 OOMs MPS), every fetch rejects with a
+  // WebKit-specific string that reads to users as a Diffusers problem
+  // rather than a backend transport problem. These tests lock the
+  // translation in so the Studio surfaces actionable copy.
+
+  it("translates WebKit's \"Load failed\" into an actionable message", () => {
+    // The sidecar-crash bug report: after Wan 2.1 detonated the MPS
+    // allocator, the runtime status showed the literal words "Load failed"
+    // — WebKit's cryptic signal for "couldn't reach the server at all".
+    // Users read that as a video-runtime problem rather than a transport
+    // problem and didn't know to click Restart Backend.
+    const status = videoRuntimeErrorStatus(new TypeError("Load failed"));
+    expect(status.activeEngine).toBe("unavailable");
+    expect(status.realGenerationAvailable).toBe(false);
+    expect(status.message).toMatch(/Restart Backend/i);
+    expect(status.message).not.toMatch(/^load failed$/i);
+  });
+
+  it("names the video runtime specifically, not the backend", () => {
+    // The global BACKEND ONLINE pill is driven from the health probe and
+    // can stay green while the video runtime probe fails (during restart,
+    // or on the first boot-time probe while torch is importing). Saying
+    // "Backend is not responding" in that state contradicts the pill and
+    // confuses users. The message must name the video runtime instead.
+    const status = videoRuntimeErrorStatus(new TypeError("Failed to fetch"));
+    expect(status.message).toMatch(/video runtime/i);
+    expect(status.message).not.toMatch(/^Backend is not responding/i);
+  });
+
+  it("translates Chromium's \"Failed to fetch\" the same way", () => {
+    // Chromium-based runtimes (Linux Tauri via WebKitGTK, desktop Chrome
+    // during dev, Windows WebView2) use a different canonical string for
+    // the same condition. Both should route through the same translation.
+    const status = videoRuntimeErrorStatus(new TypeError("Failed to fetch"));
+    expect(status.message).toMatch(/Restart Backend/i);
+  });
+
+  it("is case-insensitive about the transport-error match", () => {
+    // Defensive: different runtime versions sometimes capitalise
+    // differently. We don't want a future Safari update that returns
+    // "load failed" (lowercase) to slip through and resurface the cryptic
+    // copy in the Studio.
+    const status = videoRuntimeErrorStatus(new TypeError("LOAD FAILED"));
+    expect(status.message).toMatch(/Restart Backend/i);
+  });
+
+  it("surfaces a dedicated message when the runtime probe times out", () => {
+    // fetchJson re-throws AbortController-driven timeouts as "Request to
+    // <path> timed out after Xs". That's different from Failed-to-fetch
+    // (fetch rejected outright) — it means the backend accepted the
+    // connection but didn't respond in time, which typically happens
+    // during the first probe while torch is importing on Windows. We
+    // translate both distinctly so users know whether to wait or to
+    // Restart Backend.
+    const status = videoRuntimeErrorStatus(
+      new Error("Request to /api/video/runtime timed out after 30s"),
+    );
+    expect(status.message).toMatch(/video runtime/i);
+    expect(status.message).toMatch(/timed out/i);
+    expect(status.message).toMatch(/Restart Backend/i);
+  });
+
+  it("preserves real backend error messages unchanged", () => {
+    // When the sidecar is alive and rejecting with a real message (e.g.
+    // "Diffusers is not installed"), we want that to surface to the user
+    // as-is — it's already actionable. The translation should ONLY catch
+    // the opaque transport strings.
+    const status = videoRuntimeErrorStatus(
+      new Error("Diffusers is not installed — run pip install diffusers."),
+    );
+    expect(status.message).toMatch(/Diffusers is not installed/);
+    expect(status.message).not.toMatch(/Restart Backend/i);
+  });
+
+  it("falls back to a generic message for unknown error shapes", () => {
+    // Anything that isn't an Error instance (e.g. a rejected Promise with
+    // a string, an object thrown by mistake) should still produce a
+    // readable message rather than leaking "[object Object]" or "".
+    const status = videoRuntimeErrorStatus("something weird");
+    expect(status.message).toMatch(/unavailable/i);
+    expect(status.activeEngine).toBe("unavailable");
+  });
+});
diff --git a/src/utils/format.ts b/src/utils/format.ts
index 0186fd0..7fd1fc8 100644
--- a/src/utils/format.ts
+++ b/src/utils/format.ts
@@ -1,4 +1,4 @@
-import type { ImageModelVariant, ImageRuntimeStatus } from "../types";
+import type { ImageModelVariant, ImageRuntimeStatus, VideoModelVariant } from "../types";
 
 export function number(value: number, digits = 1) {
   return value.toFixed(digits);
@@ -24,6 +24,37 @@ export function formatImageTimestamp(value: string) {
   });
 }
 
+const MONTH_SHORT = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"];
+
+/**
+ * Return a short "Released MMM YYYY" label from a curated ``YYYY-MM`` /
+ * ``YYYY-MM-DD`` release date *or* a Hugging Face ISO ``createdAt`` value.
+ *
+ * Prefers an already-computed label from the backend (``releaseLabel``) so the
+ * Python ``_format_release_label`` helper stays the source of truth. Falls
+ * back to parsing the raw date in the browser when the backend label is
+ * missing, which keeps older cached responses working.
+ */
+export function formatReleaseLabel(
+  primary?: string | null,
+  secondary?: string | null,
+): string | null {
+  if (primary && primary.trim().length > 0) return primary;
+  const raw = (secondary ?? "").trim();
+  if (!raw) return null;
+  const shortMatch = /^(\d{4})(?:-(\d{1,2}))?(?:-(\d{1,2}))?$/.exec(raw);
+  if (shortMatch) {
+    const year = Number(shortMatch[1]);
+    const monthIndex = shortMatch[2] ? Number(shortMatch[2]) - 1 : 0;
+    if (Number.isFinite(year) && monthIndex >= 0 && monthIndex <= 11) {
+      return `Released ${MONTH_SHORT[monthIndex]} ${year}`;
+    }
+  }
+  const parsed = new Date(raw);
+  if (Number.isNaN(parsed.getTime())) return null;
+  return `Released ${MONTH_SHORT[parsed.getUTCMonth()]} ${parsed.getUTCFullYear()}`;
+}
+
 export function formatImageLicenseLabel(value?: string | null) {
   if (!value) return null;
   return value
@@ -32,6 +63,9 @@ export function formatImageLicenseLabel(value?: string | null) {
 }
 
 export function imagePrimarySizeLabel(variant: ImageModelVariant) {
+  if (typeof variant.onDiskGb === "number" && variant.onDiskGb > 0) {
+    return `${sizeLabel(variant.onDiskGb)} on disk`;
+  }
   if (typeof variant.coreWeightsGb === "number" && variant.coreWeightsGb > 0) {
     return `${sizeLabel(variant.coreWeightsGb)} weights`;
   }
@@ -54,6 +88,13 @@ export function imageSecondarySizeLabel(variant: ImageModelVariant) {
   return null;
 }
 
+export function videoPrimarySizeLabel(variant: VideoModelVariant) {
+  if (typeof variant.onDiskGb === "number" && variant.onDiskGb > 0) {
+    return `${sizeLabel(variant.onDiskGb)} on disk`;
+  }
+  return sizeLabel(variant.sizeGb);
+}
+
 export function imageRuntimeErrorStatus(error: unknown): ImageRuntimeStatus {
   return {
     activeEngine: "unavailable",
diff --git a/src/utils/index.ts b/src/utils/index.ts
index 7bcb980..58f1b26 100644
--- a/src/utils/index.ts
+++ b/src/utils/index.ts
@@ -1,6 +1,7 @@
 export * from "./format";
 export * from "./models";
 export * from "./images";
+export * from "./videos";
 export * from "./library";
 export * from "./downloads";
 export * from "./chat";
diff --git a/src/utils/library.ts b/src/utils/library.ts
index 6773974..ce6ac25 100644
--- a/src/utils/library.ts
+++ b/src/utils/library.ts
@@ -147,3 +147,59 @@ export function findCatalogVariantForLibraryItem(families: ModelFamily[], item:
   }
   return best && best.score >= 12 ? best.variant : null;
 }
+
+function roundGb(value: number): number {
+  return Math.round(value * 10) / 10;
+}
+
+/**
+ * Estimate peak resident memory when this library item is loaded for inference.
+ *
+ * Prefers the on-disk size (ground truth — MLX, GGUF, and safetensors formats
+ * all store weights at their runtime precision) plus modest KV-cache and
+ * framework overheads sized for typical single-user desktop usage.
+ *
+ * Assumptions (deliberately rough — the Studio and Launch panels model
+ * specific workloads more precisely):
+ *  - Weights sit in memory at their stored precision. Size on disk ≈ resident weights.
+ *  - KV cache ≈ 4% of weights at an 8K context for a modern GQA model. Scale
+ *    linearly with chosen context length elsewhere.
+ *  - Framework overhead is small and largely size-invariant (~0.6 GB).
+ *
+ * Falls back to the matched catalog variant's estimate only when the on-disk
+ * size is unknown or zero.
+ */
+export function estimateLibraryItemResidentGb(
+  item: LibraryItem,
+  matchedVariant?: ModelVariant | null,
+): number | null {
+  const sizeGb = Number.isFinite(item.sizeGb) && item.sizeGb > 0 ? item.sizeGb : null;
+  if (sizeGb == null) {
+    const fallback = matchedVariant?.estimatedMemoryGb;
+    return fallback != null && Number.isFinite(fallback) ? fallback : null;
+  }
+  const kvCacheGb = sizeGb * 0.04;
+  const frameworkOverheadGb = 0.6;
+  return roundGb(sizeGb + kvCacheGb + frameworkOverheadGb);
+}
+
+/**
+ * Same baseline as estimateLibraryItemResidentGb but assumes a compressed KV
+ * cache strategy (ChaosEngine / TurboQuant / RotorQuant), which roughly halves
+ * the KV term. Weights and framework overhead are unchanged. At short contexts
+ * the delta against the uncompressed estimate is small by design — compression
+ * pays off at long contexts, which this column foreshadows.
+ */
+export function estimateLibraryItemCompressedGb(
+  item: LibraryItem,
+  matchedVariant?: ModelVariant | null,
+): number | null {
+  const sizeGb = Number.isFinite(item.sizeGb) && item.sizeGb > 0 ? item.sizeGb : null;
+  if (sizeGb == null) {
+    const fallback = matchedVariant?.estimatedCompressedMemoryGb;
+    return fallback != null && Number.isFinite(fallback) ? fallback : null;
+  }
+  const kvCacheGb = sizeGb * 0.04 * 0.5;
+  const frameworkOverheadGb = 0.6;
+  return roundGb(sizeGb + kvCacheGb + frameworkOverheadGb);
+}
diff --git a/src/utils/runtime.ts b/src/utils/runtime.ts
index c669fa3..b3ab58e 100644
--- a/src/utils/runtime.ts
+++ b/src/utils/runtime.ts
@@ -5,6 +5,28 @@ export function serverOriginFromBase(baseUrl: string) {
   return baseUrl.replace(/\/v1\/?$/, "");
 }
 
+// Background refreshes can fire before the Tauri sidecar has bound its
+// port (cold start on Windows is especially slow: extract embedded
+// runtime, spawn Python, import FastAPI + torch). A transient fetch
+// failure in that window is not a user-facing error — callers should
+// swallow it and rely on the next poll/interaction to recover.
+export function isTransientNetworkError(err: unknown): boolean {
+  if (err instanceof TypeError) {
+    // Browser-level fetch failures ("Failed to fetch", "Network request
+    // failed", "NetworkError when attempting to fetch resource", etc.)
+    // all surface as TypeError.
+    return true;
+  }
+  if (err instanceof Error) {
+    const msg = err.message.toLowerCase();
+    if (msg.includes("failed to fetch")) return true;
+    if (msg.includes("timed out after")) return true;
+    if (msg.includes("networkerror")) return true;
+    if (msg.includes("load failed")) return true; // Safari
+  }
+  return false;
+}
+
 export function serverBaseFromOrigin(origin: string) {
   return `${origin.replace(/\/$/, "")}/v1`;
 }
@@ -63,11 +85,14 @@ export function settingsDraftFromWorkspace(settings: AppSettings): SettingsDraft
     modelDirectories: settings?.modelDirectories ?? [],
     preferredServerPort: settings?.preferredServerPort ?? 8876,
     allowRemoteConnections: settings?.allowRemoteConnections ?? false,
+    requireApiAuth: settings?.requireApiAuth ?? true,
     autoStartServer: settings?.autoStartServer ?? false,
     remoteProviders: settings?.remoteProviders ?? [],
     huggingFaceToken: "",
     hasHuggingFaceToken: settings?.hasHuggingFaceToken ?? false,
     huggingFaceTokenMasked: settings?.huggingFaceToken ?? "",
     dataDirectory: settings?.dataDirectory ?? "",
+    imageOutputsDirectory: settings?.imageOutputsDirectory ?? "",
+    videoOutputsDirectory: settings?.videoOutputsDirectory ?? "",
   };
 }
diff --git a/src/utils/videos.ts b/src/utils/videos.ts
new file mode 100644
index 0000000..9ace7d8
--- /dev/null
+++ b/src/utils/videos.ts
@@ -0,0 +1,538 @@
+import type { VideoModelFamily, VideoModelVariant, VideoRuntimeStatus } from "../types";
+import type { VideoDiscoverTaskFilter } from "../types/video";
+
+export function flattenVideoVariants(families: VideoModelFamily[]): VideoModelVariant[] {
+  return families.flatMap((family) => family.variants);
+}
+
+export function defaultVideoVariantForFamily(family?: VideoModelFamily | null): VideoModelVariant | null {
+  if (!family) return null;
+  return family.variants.find((variant) => variant.id === family.defaultVariantId) ?? family.variants[0] ?? null;
+}
+
+export function findVideoVariantById(families: VideoModelFamily[], variantId: string): VideoModelVariant | null {
+  for (const family of families) {
+    const match = family.variants.find((variant) => variant.id === variantId);
+    if (match) return match;
+  }
+  return null;
+}
+
+export function findVideoVariantByRepo(
+  families: VideoModelFamily[],
+  repo: string | null | undefined,
+): VideoModelVariant | null {
+  if (!repo) return null;
+  for (const family of families) {
+    const match = family.variants.find((variant) => variant.repo === repo);
+    if (match) return match;
+  }
+  return null;
+}
+
+export function videoVariantMatchesDiscoverFilters(
+  variant: VideoModelVariant,
+  taskFilter: VideoDiscoverTaskFilter,
+): boolean {
+  if (taskFilter !== "all" && !variant.taskSupport.includes(taskFilter)) {
+    return false;
+  }
+  return true;
+}
+
+export function videoDiscoverVariantHaystack(variant: VideoModelVariant): string {
+  return [
+    variant.name,
+    variant.familyName ?? "",
+    variant.provider,
+    variant.repo,
+    variant.runtime,
+    variant.recommendedResolution,
+    variant.note,
+    String(variant.sizeGb),
+    String(variant.defaultDurationSeconds),
+    variant.taskSupport.join(" "),
+    variant.styleTags.join(" "),
+  ]
+    .join(" ")
+    .toLowerCase();
+}
+
+export function videoDiscoverFamilyHaystack(family: VideoModelFamily): string {
+  return [
+    family.name,
+    family.provider,
+    family.headline,
+    family.summary,
+    family.updatedLabel,
+    family.badges.join(" "),
+    ...family.variants.map((variant) => videoDiscoverVariantHaystack(variant)),
+  ]
+    .join(" ")
+    .toLowerCase();
+}
+
+export function videoDiscoverVariantMatchesQuery(variant: VideoModelVariant, query: string): boolean {
+  if (!query) return true;
+  return videoDiscoverVariantHaystack(variant).includes(query);
+}
+
+export function videoDiscoverFamilyMatchesQuery(family: VideoModelFamily, query: string): boolean {
+  if (!query) return true;
+  return videoDiscoverFamilyHaystack(family).includes(query);
+}
+
+/** WebKit's ``fetch()`` (Safari + macOS Tauri's WKWebView) produces the
+ * literal string ``"Load failed"`` when it can't reach the server — for
+ * example after the Python sidecar dies from an MPS OOM. Chromium produces
+ * ``"Failed to fetch"``. Both bubble up unchanged through ``fetchJson`` and
+ * land in the runtime status message verbatim, where they read to the user
+ * as a Diffusers / video runtime problem rather than a backend transport
+ * problem. We translate them here so the Studio shows actionable copy
+ * (mentioning Restart Backend) instead of two cryptic words. */
+function isFetchTransportError(message: string): boolean {
+  const trimmed = message.trim().toLowerCase();
+  return (
+    trimmed === "load failed"
+    || trimmed === "failed to fetch"
+    || trimmed === "networkerror when attempting to fetch resource."
+    || trimmed.startsWith("networkerror")
+  );
+}
+
+/** Timeout strings from ``fetchJson`` — the video runtime probe has a 30s
+ * cap and will re-throw as ``"Request to /api/video/runtime timed out
+ * after 30s"`` when the backend is too slow to answer (e.g. first-boot
+ * torch import on Windows with a cold disk). */
+function isFetchTimeoutError(message: string): boolean {
+  return /\btimed out after\b/i.test(message);
+}
+
+export function videoRuntimeErrorStatus(error: unknown): VideoRuntimeStatus {
+  const rawMessage = error instanceof Error ? error.message : "";
+  let message: string;
+  if (isFetchTransportError(rawMessage)) {
+    // Don't say "Backend is not responding" — the global BACKEND ONLINE
+    // pill is driven from the ``/api/health`` probe and may well still be
+    // green while the video runtime probe fails (common during a backend
+    // restart, or the first probe of a sidecar's life when torch is
+    // importing). Name the specific subsystem so the UI isn't internally
+    // contradictory.
+    message = "Video runtime did not respond — the backend is likely still loading PyTorch. "
+      + "First boot can take 30–60 seconds on Windows or Linux. This page will retry automatically; "
+      + "use Restart Backend if it persists for more than a minute.";
+  } else if (isFetchTimeoutError(rawMessage)) {
+    message = "Video runtime probe timed out after 30 seconds — PyTorch is still importing in the backend. "
+      + "On Windows/Linux with a cold disk this can take up to ~60 seconds on first boot. "
+      + "This page will retry automatically; use Restart Backend if it persists for more than a minute.";
+  } else {
+    message = rawMessage || "Video runtime unavailable.";
+  }
+  return {
+    activeEngine: "unavailable",
+    realGenerationAvailable: false,
+    message,
+    missingDependencies: [],
+  };
+}
+
+export type VideoGenerationRiskLevel = "safe" | "caution" | "danger";
+
+export type VideoEffectiveDevice = "mps" | "cuda" | "cpu";
+
+export interface VideoGenerationSafety {
+  riskLevel: VideoGenerationRiskLevel;
+  /** Estimated latent token count (rough — used in the message and for the threshold). */
+  latentTokens: number;
+  /** Rough upper-bound estimate of peak memory for this request, in GB. The
+   * sum of the resident model footprint (weights + text encoder + VAE) and
+   * the attention-matrix peak. Consumer-facing. */
+  estimatedPeakGb: number;
+  /** Resident memory estimate for the model itself (weights + text encoder +
+   * VAE, adjusted for device-specific allocator overhead). Zero when the
+   * caller didn't pass ``baseModelFootprintGb`` — used to show a breakdown
+   * in the Studio capacity line. */
+  modelFootprintGb: number;
+  /** Device memory we compared against when computing the risk level, in GB. */
+  deviceMemoryGb: number;
+  /** True if the peak estimate exceeds the device's effective budget. */
+  exceedsDevice: boolean;
+  /** The device bucket actually used for the calculation. When the caller
+   * passes ``"mps"``/``"cuda"``/``"cpu"`` we echo it back; when the caller
+   * passes null/empty (backend probe failed, sidecar dead) we infer from
+   * the host OS — Windows/Linux fall through to "cuda", everything else
+   * to "mps". The Studio reads this so the always-visible capacity line
+   * doesn't say "Apple Silicon" on a Windows RTX 4090 just because the
+   * backend probe never came back. */
+  effectiveDevice: VideoEffectiveDevice;
+  /** True when ``effectiveDevice`` was inferred from the host OS rather
+   * than supplied by the backend. Lets the Studio mark the device label
+   * as a guess (consistent with how it tags the memory fallback). */
+  effectiveDeviceWasInferred: boolean;
+  /** Plain-English reason — null only when riskLevel is "safe". */
+  reason: string | null;
+  /** Concrete fallback to drop the user back into the safe envelope. Null
+   * when the model's resident footprint alone exceeds the safe envelope —
+   * in that case, no per-request tweak can recover and the user needs a
+   * smaller model or a bigger machine. */
+  suggestion: {
+    width: number;
+    height: number;
+    numFrames: number;
+    label: string;
+  } | null;
+}
+
+/** Best-effort host-platform detection for the case where the backend probe
+ * never came back (sidecar crashed, "Failed to fetch", first-launch race).
+ * We use ``navigator.userAgentData.platform`` when available (modern
+ * Chromium) and fall back to ``navigator.platform`` / ``navigator.userAgent``
+ * — the WKWebView shipped inside macOS Tauri only exposes the legacy
+ * fields, but they're reliable for the macOS-vs-not split.
+ *
+ * The bucket is intentionally coarse: we only need to decide "should the
+ * memory warning say Apple Silicon or this GPU?" so anything that isn't
+ * obviously macOS is treated as a CUDA host. Linux without an NVIDIA card
+ * still ends up in the CUDA bucket — over-stating the headroom there is
+ * less harmful than the inverse (telling a Windows RTX 4090 user that
+ * their 24 GB card is "close to the safe limit on Apple Silicon"). */
+export function inferDeviceFromHostPlatform(): "mps" | "cuda" {
+  if (typeof navigator === "undefined") return "mps";
+  // ``userAgentData`` is the modern UA-CH API; it's narrower and more
+  // reliable than the deprecated ``platform`` string but isn't supported
+  // by Safari / WKWebView yet — hence the layered fallback below.
+  const uaData = (navigator as unknown as { userAgentData?: { platform?: string } }).userAgentData;
+  const uaDataPlatform = (uaData?.platform ?? "").toLowerCase();
+  if (uaDataPlatform) {
+    if (uaDataPlatform.includes("mac")) return "mps";
+    return "cuda";
+  }
+  const legacyPlatform = (navigator.platform ?? "").toLowerCase();
+  const ua = (navigator.userAgent ?? "").toLowerCase();
+  if (legacyPlatform.includes("mac") || ua.includes("mac os")) return "mps";
+  return "cuda";
+}
+
+/** Default memory baselines when the backend can't tell us. We assume 16 GB
+ * on MPS because that's the base-model M1/M2/M3/M4 and it's the most
+ * failure-prone config in the wild. On CUDA we assume 12 GB because mid-
+ * range consumer cards cluster there (3060, 4070, etc.). CPU gets a small
+ * number so we don't accidentally green-light a crash. */
+const DEFAULT_MPS_MEMORY_GB = 16;
+const DEFAULT_CUDA_MEMORY_GB = 12;
+const DEFAULT_CPU_MEMORY_GB = 8;
+
+/** Effective concurrent-head × slab multiplier for the attention peak.
+ *
+ * Diffusers video pipelines cluster around 12–32 attention heads per block,
+ * but only a handful are live concurrently under MPS / flash-attn tiling,
+ * and ``softmax(QK^T)·V`` adds one more slab of similar shape. This single
+ * constant bakes both factors into one ballpark — the attention term in
+ * the overall peak estimate.
+ *
+ * In practice the Wan-family OOMs on MPS are dominated by the resident
+ * model footprint (transformer + UMT5-XXL text encoder + VAE) rather than
+ * this attention slab, so the crash-case calibration lives in
+ * ``estimateResidentModelGb``. This constant is left modest so the
+ * attention-only path (when no model footprint is known) is still
+ * reasonable for the Studio defaults on 16 GB base Macs. */
+const EFFECTIVE_HEAD_SLAB_MULTIPLIER = 8;
+
+/** Bytes per element for the attention compute path. MPS/CUDA run fp16/bf16,
+ * CPU stays in fp32. Matters because the peak-memory estimate we show the
+ * user should track what actually gets allocated. */
+function bytesPerElementForDevice(device: "mps" | "cuda" | "cpu"): number {
+  if (device === "cpu") return 4;
+  return 2;
+}
+
+/** Effective share of total device memory available to attention. The OS,
+ * model weights, VAE, text encoder, and diffusers overhead all compete — in
+ * practice only ~50% of unified memory on MPS is realistically free for the
+ * attention peak. CUDA is more predictable (no OS paging of VRAM) so ~70%
+ * is a safer assumption there. */
+function effectiveMemoryBudgetGb(totalGb: number, device: "mps" | "cuda" | "cpu"): number {
+  if (device === "cuda") return totalGb * 0.7;
+  return totalGb * 0.5;
+}
+
+/** Estimate peak attention-matrix memory in bytes from latent token count.
+ *
+ * The Q·K^T matrix is ``tokens × tokens × heads × bytes``; in practice only
+ * a small subset of heads is live concurrently and fused kernels compute in
+ * tiles. Rather than try to model every pipeline's exact head count and
+ * kernel, we use a single empirically-calibrated multiplier — see
+ * ``EFFECTIVE_HEAD_SLAB_MULTIPLIER``. Over-estimating within reason is the
+ * right trade-off: we'd rather the user dial down than silently crash. */
+function estimatePeakAttentionBytes(
+  latentTokens: number,
+  device: "mps" | "cuda" | "cpu",
+): number {
+  const bytesPerElement = bytesPerElementForDevice(device);
+  return latentTokens * latentTokens * bytesPerElement * EFFECTIVE_HEAD_SLAB_MULTIPLIER;
+}
+
+/** Translate a model's on-disk size into an approximate resident-memory
+ * estimate during generation.
+ *
+ * The on-disk size (from the catalog ``sizeGb``) packs the transformer
+ * weights, text encoder (often the dominant term — UMT5-XXL is ~11 GB for
+ * Wan 2.1), VAE, and CLIP in fp16. Once the pipeline is live, PyTorch's
+ * allocator holds slightly more than that because of intermediate buffers
+ * it doesn't aggressively free between denoising steps. On MPS the factor
+ * runs ~1.4× in practice (calibrated against the Wan 2.1 1.3B crash on a
+ * 64 GB M4 Max: disk = 16.4 GB → resident ≈ 23 GB → peak with attention ≈
+ * 27 GB → observed 88 GB under PyTorch's 1.4× watermark). CUDA is tighter
+ * because dedicated VRAM + better allocator reuse; CPU sits in between. */
+function estimateResidentModelGb(
+  baseFootprintGb: number,
+  device: "mps" | "cuda" | "cpu",
+): number {
+  if (!(baseFootprintGb > 0) || !Number.isFinite(baseFootprintGb)) return 0;
+  const factor = device === "mps" ? 1.4 : device === "cpu" ? 1.3 : 1.05;
+  return baseFootprintGb * factor;
+}
+
+/**
+ * Estimate whether a video generation request is in danger of detonating
+ * the inference device. The estimate combines two memory terms:
+ *
+ *   1. **Resident model footprint** — weights + text encoder + VAE sitting
+ *      in memory the whole time. For Wan 2.1 / HunyuanVideo this is the
+ *      dominant cost on MPS, where the UMT5-XXL text encoder alone is ~11
+ *      GB. Opt-in via ``baseModelFootprintGb``; the Studio passes
+ *      ``selectedVariant.sizeGb`` so the warning actually reflects the
+ *      real memory pressure the user is about to create.
+ *
+ *   2. **Attention peak** — scales with ``tokens² × heads × dtype`` where
+ *      ``tokens = (W/16) × (H/16) × (F/4)`` for the typical 16× spatial /
+ *      4× temporal downsample. Purely request-driven.
+ *
+ * Thresholds are computed from ``estimatedPeakGb / budgetGb`` so one
+ * formula scales cleanly from an 8 GB base M1 to a 128 GB M3 Ultra to a 24
+ * GB RTX 4090 — no per-platform tables. When ``deviceMemoryGb`` is null
+ * (detection failed), we fall back to conservative defaults; over-warning
+ * a beefy machine is strictly better than silently crashing a small one.
+ *
+ * The returned shape carries the risk level plus the raw numbers so the
+ * Studio UI can show "model ≈ 23 GB, this run peak ≈ 27 GB on 32 GB
+ * available" even when risk is "safe" — users asked for that framing
+ * ("37k latent tokens" is not a number anyone thinks in).
+ *
+ * Calibration points:
+ * - Studio defaults 832×480 × 33 frames (no baseFootprint): stays "safe"
+ *   on 16 GB+ — preserves the attention-only heuristic when the caller
+ *   doesn't know which model is loaded.
+ * - Wan 2.1 T2V 1.3B (baseFootprint 16.4 GB) at 832×480 × 40 frames on a
+ *   64 GB M4 Max: lands on "danger" — matches the observed-crash bug
+ *   report where actual MPS use hit 88 GB under PyTorch's 1.4× watermark.
+ * - Same Wan config on 128 GB M3 Ultra: stays "safe" — the machine has
+ *   real headroom for the 23 GB resident footprint + attention peak.
+ * - LTX-Video (baseFootprint 2 GB) at 768×512 × 41 frames on 32 GB:
+ *   stays "safe" — small model, proven to run on consumer Macs.
+ */
+export function assessVideoGenerationSafety(opts: {
+  width: number;
+  height: number;
+  numFrames: number;
+  device: string | null | undefined;
+  deviceMemoryGb?: number | null;
+  /** On-disk / fp16 size of the selected model (catalog ``sizeGb``). When
+   * provided, the estimate includes the resident model footprint — crucial
+   * on MPS where the text encoder + weights are the dominant cost. Leave
+   * unset for the narrow "attention-only" question. */
+  baseModelFootprintGb?: number | null;
+}): VideoGenerationSafety {
+  const { width, height, numFrames, device, deviceMemoryGb, baseModelFootprintGb } = opts;
+
+  const normalisedDevice = (device ?? "").toLowerCase();
+  const isCuda = normalisedDevice.startsWith("cuda");
+  const isCpu = normalisedDevice === "cpu";
+  const isMps = normalisedDevice === "mps";
+  // When the backend hasn't told us (probe failed, sidecar dead, "Failed to
+  // fetch"), we used to default to "mps" unconditionally. That was wrong on
+  // Windows / Linux — a user on an RTX 4090 saw their 24 GB card warned
+  // about as if it were "Apple Silicon (MPS) (~8 GB safe)". Now we infer
+  // from the host OS so the fallback bucket matches the machine the user is
+  // actually on. The macOS branch keeps its old behaviour (MPS-strict).
+  const effectiveDevice: VideoEffectiveDevice = isCuda
+    ? "cuda"
+    : isCpu
+      ? "cpu"
+      : isMps
+        ? "mps"
+        : inferDeviceFromHostPlatform();
+  const effectiveDeviceWasInferred = !isCuda && !isCpu && !isMps;
+
+  const fallbackMemory =
+    effectiveDevice === "cuda"
+      ? DEFAULT_CUDA_MEMORY_GB
+      : effectiveDevice === "cpu"
+        ? DEFAULT_CPU_MEMORY_GB
+        : DEFAULT_MPS_MEMORY_GB;
+  const totalMemoryGb =
+    deviceMemoryGb != null && Number.isFinite(deviceMemoryGb) && deviceMemoryGb > 0
+      ? deviceMemoryGb
+      : fallbackMemory;
+  const budgetGb = effectiveMemoryBudgetGb(totalMemoryGb, effectiveDevice);
+
+  const baseFootprint =
+    baseModelFootprintGb != null
+    && Number.isFinite(baseModelFootprintGb)
+    && baseModelFootprintGb > 0
+      ? baseModelFootprintGb
+      : 0;
+  const modelFootprintGb = estimateResidentModelGb(baseFootprint, effectiveDevice);
+
+  if (
+    !Number.isFinite(width)
+    || !Number.isFinite(height)
+    || !Number.isFinite(numFrames)
+    || width <= 0
+    || height <= 0
+    || numFrames <= 0
+  ) {
+    return {
+      riskLevel: "safe",
+      latentTokens: 0,
+      estimatedPeakGb: 0,
+      modelFootprintGb: 0,
+      deviceMemoryGb: totalMemoryGb,
+      exceedsDevice: false,
+      effectiveDevice,
+      effectiveDeviceWasInferred,
+      reason: null,
+      suggestion: null,
+    };
+  }
+
+  const latentTokens =
+    Math.ceil(width / 16) * Math.ceil(height / 16) * Math.ceil(numFrames / 4);
+  const attentionPeakGb =
+    estimatePeakAttentionBytes(latentTokens, effectiveDevice) / 1024 ** 3;
+  const estimatedPeakGb = modelFootprintGb + attentionPeakGb;
+
+  // MPS has a lower danger ratio (0.8 vs CUDA 1.0) because Apple's Metal
+  // backend has historically been less tolerant of approaching the ceiling
+  // — it asserts and kills the process where CUDA would surface a catchable
+  // OOM. We want an earlier warning specifically on MPS.
+  const cautionRatio = effectiveDevice === "cuda" ? 0.7 : 0.5;
+  const dangerRatio = effectiveDevice === "cuda" ? 1.0 : 0.8;
+  const ratio = estimatedPeakGb / budgetGb;
+  const exceedsDevice = estimatedPeakGb > budgetGb;
+  const riskLevel: VideoGenerationRiskLevel =
+    ratio >= dangerRatio ? "danger" : ratio >= cautionRatio ? "caution" : "safe";
+
+  if (riskLevel === "safe") {
+    return {
+      riskLevel,
+      latentTokens,
+      estimatedPeakGb,
+      modelFootprintGb,
+      deviceMemoryGb: totalMemoryGb,
+      exceedsDevice,
+      effectiveDevice,
+      effectiveDeviceWasInferred,
+      reason: null,
+      suggestion: null,
+    };
+  }
+
+  const fmt = (g: number) => (g >= 10 ? g.toFixed(0) : g.toFixed(1));
+  const platform =
+    effectiveDevice === "cuda"
+      ? "this GPU"
+      : effectiveDevice === "cpu"
+        ? "CPU generation"
+        : "Apple Silicon (MPS)";
+
+  // Short-circuit the suggestion loop when the model's resident footprint
+  // alone is too big for this device. No matter how small the request,
+  // the weights + text encoder still have to live in memory — the right
+  // answer is "pick a smaller model or run on a bigger machine", not
+  // "try 480×320 × 17 frames" (which would also crash). We threshold at
+  // the caution ratio rather than danger so we don't hand back bogus
+  // suggestions in the caution band either.
+  const safeRatioTarget = cautionRatio * 0.7; // leave a real margin after apply
+  if (modelFootprintGb > cautionRatio * budgetGb) {
+    // Phrase the comparison against the caution threshold (the point where
+    // we start warning), not the total budget — it's the number the user
+    // actually needs to stay under. Avoids the "23 GB is bigger than
+    // 32 GB??" head-scratch.
+    const cautionBudgetGb = cautionRatio * budgetGb;
+    const reason =
+      riskLevel === "danger"
+        ? `The model needs ~${fmt(modelFootprintGb)} GB just to hold its weights + text encoder. On ${platform} with ${fmt(totalMemoryGb)} GB total, safe usage tops out around ${fmt(cautionBudgetGb)} GB — the model alone is already over that. Even the smallest clip would be likely to crash the backend. Try a smaller model (LTX-Video is ~2 GB) or a machine with more memory.`
+        : `The model needs ~${fmt(modelFootprintGb)} GB just to hold its weights + text encoder. On ${platform} with ${fmt(totalMemoryGb)} GB total, safe usage tops out around ${fmt(cautionBudgetGb)} GB — you're right on the edge. Generation may run slowly or fail; consider a smaller model.`;
+    return {
+      riskLevel,
+      latentTokens,
+      estimatedPeakGb,
+      modelFootprintGb,
+      deviceMemoryGb: totalMemoryGb,
+      exceedsDevice,
+      effectiveDevice,
+      effectiveDeviceWasInferred,
+      reason,
+      suggestion: null,
+    };
+  }
+
+  // Normal suggestion loop. Halve the frame count first (biggest single
+  // lever because latent seq length is linear in frames but the QK^T cost
+  // is quadratic in sequence length); if still over budget, nudge
+  // resolution down in 64-px steps. Snap frames to ``(n - 1) % 4 == 0``
+  // (Wan / LTX requirement) so the suggestion we hand back is directly
+  // applicable — duplicated from useVideoState's ``clampNumFrames`` rather
+  // than imported so this util has no hook dependency.
+  let suggestedFrames = numFrames;
+  let suggestedWidth = width;
+  let suggestedHeight = height;
+  for (let attempt = 0; attempt < 8; attempt += 1) {
+    const tokens =
+      Math.ceil(suggestedWidth / 16)
+      * Math.ceil(suggestedHeight / 16)
+      * Math.ceil(suggestedFrames / 4);
+    const peakGb =
+      modelFootprintGb
+      + estimatePeakAttentionBytes(tokens, effectiveDevice) / 1024 ** 3;
+    if (peakGb / budgetGb < safeRatioTarget) break;
+    if (suggestedFrames > 17) {
+      suggestedFrames = Math.max(17, Math.floor(suggestedFrames * 0.6));
+      const remainder = (suggestedFrames - 1) % 4;
+      if (remainder !== 0) suggestedFrames = suggestedFrames - remainder;
+    } else if (suggestedWidth > 480 || suggestedHeight > 320) {
+      suggestedWidth = Math.max(480, Math.floor((suggestedWidth * 0.75) / 64) * 64);
+      suggestedHeight = Math.max(320, Math.floor((suggestedHeight * 0.75) / 64) * 64);
+    } else {
+      break;
+    }
+  }
+
+  const breakdown =
+    modelFootprintGb > 0
+      ? ` (model ≈ ${fmt(modelFootprintGb)} GB + attention ≈ ${fmt(attentionPeakGb)} GB)`
+      : "";
+  const reason =
+    riskLevel === "danger"
+      ? `These settings would need around ${fmt(estimatedPeakGb)} GB of peak memory${breakdown} — above what ${platform} can safely allocate (~${fmt(budgetGb)} GB of ${fmt(totalMemoryGb)} GB total). Generation is likely to crash the backend.`
+      : `These settings need around ${fmt(estimatedPeakGb)} GB of peak memory${breakdown} — close to the safe limit on ${platform} (~${fmt(budgetGb)} GB of ${fmt(totalMemoryGb)} GB total). Generation may run slowly or fail.`;
+
+  return {
+    riskLevel,
+    latentTokens,
+    estimatedPeakGb,
+    modelFootprintGb,
+    deviceMemoryGb: totalMemoryGb,
+    exceedsDevice,
+    effectiveDevice,
+    effectiveDeviceWasInferred,
+    reason,
+    suggestion: {
+      width: suggestedWidth,
+      height: suggestedHeight,
+      numFrames: suggestedFrames,
+      label: `${suggestedWidth}×${suggestedHeight} · ${suggestedFrames} frames`,
+    },
+  };
+}
diff --git a/tests/test_backend_service.py b/tests/test_backend_service.py
index 7154522..cf17af5 100644
--- a/tests/test_backend_service.py
+++ b/tests/test_backend_service.py
@@ -354,6 +354,39 @@ def test_health_reports_runtime_metadata(self):
         self.assertIn("appVersion", payload)
         self.assertEqual(payload["engine"], "mock")
 
+    def test_system_gpu_status_reports_expected_keys(self):
+        response = self.client.get("/api/system/gpu-status")
+        self.assertEqual(response.status_code, 200)
+        payload = response.json()
+        for key in (
+            "platform",
+            "nvidiaGpuDetected",
+            "torchImported",
+            "torchCudaAvailable",
+            "torchMpsAvailable",
+            "cpuFallbackWarning",
+            "recommendation",
+        ):
+            self.assertIn(key, payload)
+        self.assertIsInstance(payload["platform"], str)
+        self.assertIsInstance(payload["nvidiaGpuDetected"], bool)
+        self.assertIsInstance(payload["cpuFallbackWarning"], bool)
+
+    def test_system_gpu_status_exempt_from_auth(self):
+        # The banner polls this endpoint before the token is wired up, so it
+        # must stay reachable even when require_api_auth is enforced.
+        state = ChaosEngineState(
+            system_snapshot_provider=fake_system_snapshot,
+            library_provider=fake_library,
+            settings_path=self.settings_path,
+            benchmarks_path=self.benchmarks_path,
+            chat_sessions_path=self.chat_sessions_path,
+        )
+        state.runtime = FakeRuntime()
+        client = TestClient(create_app(state=state, api_token=TEST_API_TOKEN))
+        response = client.get("/api/system/gpu-status")
+        self.assertEqual(response.status_code, 200)
+
     def test_auth_session_bootstrap_returns_local_token(self):
         response = self.client.get(
             "/api/auth/session",
@@ -376,6 +409,61 @@ def test_protected_route_rejects_missing_auth(self):
         response = client.post("/api/chat/sessions", json={"title": "Blocked"})
         self.assertEqual(response.status_code, 401)
 
+    def test_protected_route_allows_missing_auth_when_require_api_auth_disabled(self):
+        state = ChaosEngineState(
+            system_snapshot_provider=fake_system_snapshot,
+            library_provider=fake_library,
+            settings_path=self.settings_path,
+            benchmarks_path=self.benchmarks_path,
+            chat_sessions_path=self.chat_sessions_path,
+        )
+        state.runtime = FakeRuntime()
+        state.settings["requireApiAuth"] = False
+        app = create_app(state=state, api_token=TEST_API_TOKEN)
+        client = TestClient(app)
+        # With the toggle off, a tokenless call from an external client
+        # should succeed instead of returning 401.
+        response = client.post("/api/chat/sessions", json={"title": "Allowed"})
+        self.assertIn(response.status_code, (200, 201))
+
+    def test_require_api_auth_hot_applies_after_settings_update(self):
+        state = ChaosEngineState(
+            system_snapshot_provider=fake_system_snapshot,
+            library_provider=fake_library,
+            settings_path=self.settings_path,
+            benchmarks_path=self.benchmarks_path,
+            chat_sessions_path=self.chat_sessions_path,
+        )
+        state.runtime = FakeRuntime()
+        app = create_app(state=state, api_token=TEST_API_TOKEN)
+        client = TestClient(app)
+        # Rejected while auth is required.
+        self.assertEqual(client.get("/api/workspace").status_code, 401)
+        # Toggle off via the PATCH endpoint (with the token, since auth is
+        # still on at this point).
+        patch_response = client.patch(
+            "/api/settings",
+            json={"requireApiAuth": False},
+            headers={"Authorization": f"Bearer {TEST_API_TOKEN}"},
+        )
+        self.assertEqual(patch_response.status_code, 200)
+        self.assertIs(patch_response.json()["settings"]["requireApiAuth"], False)
+        # Without restarting the server, anonymous requests now succeed.
+        self.assertEqual(client.get("/api/workspace").status_code, 200)
+
+    def test_require_api_auth_env_override_disables_auth(self):
+        with mock.patch.dict(os.environ, {"CHAOSENGINE_REQUIRE_AUTH": "0"}):
+            state = ChaosEngineState(
+                system_snapshot_provider=fake_system_snapshot,
+                library_provider=fake_library,
+                settings_path=self.settings_path,
+                benchmarks_path=self.benchmarks_path,
+                chat_sessions_path=self.chat_sessions_path,
+            )
+            state.runtime = FakeRuntime()
+            client = TestClient(create_app(state=state, api_token=TEST_API_TOKEN))
+        self.assertEqual(client.get("/api/workspace").status_code, 200)
+
     def test_fresh_state_starts_without_seeded_workspace_data(self):
         workspace = self.client.get("/api/workspace").json()
 
@@ -1297,6 +1385,31 @@ def test_snapshot_download_process_redirects_progress_output_to_log_file(self):
         self.assertEqual(kwargs["stderr"], subprocess.STDOUT)
         self.assertEqual(kwargs["text"], True)
 
+    def test_snapshot_download_passes_empty_allowlist_for_standard_repos(self):
+        """Non-video repos should not receive an allowlist arg (empty string)."""
+        with tempfile.NamedTemporaryFile(mode="w+", encoding="utf-8") as handle:
+            with mock.patch("backend_service.state.subprocess.Popen") as popen:
+                _spawn_snapshot_download("org/model", {}, handle)
+
+        args = popen.call_args.args[0]
+        # args are [python, "-c", helper, repo, allow_patterns_json]. The
+        # final slot is the empty string when no allowlist is set.
+        self.assertEqual(args[3], "org/model")
+        self.assertEqual(args[4], "")
+
+    def test_snapshot_download_passes_allowlist_when_supplied(self):
+        """A supplied allowlist arrives at the subprocess as JSON."""
+        patterns = ["model_index.json", "transformer/**"]
+        with tempfile.NamedTemporaryFile(mode="w+", encoding="utf-8") as handle:
+            with mock.patch("backend_service.state.subprocess.Popen") as popen:
+                _spawn_snapshot_download(
+                    "org/video-model", {}, handle, allow_patterns=patterns,
+                )
+
+        args = popen.call_args.args[0]
+        self.assertEqual(args[3], "org/video-model")
+        self.assertEqual(json.loads(args[4]), patterns)
+
     def test_preview_math_reduces_cache_size(self):
         preview = compute_cache_preview(
             bits=3,
@@ -1783,5 +1896,50 @@ def test_reveal_model_path_endpoint_returns_resolved_path(self):
         popen.assert_called()
 
 
+class VideoRepoAllowPatternsTests(unittest.TestCase):
+    """``_video_repo_allow_patterns`` scopes video downloads to the diffusers
+    layout. Without this guard ``snapshot_download`` pulls every historical
+    checkpoint sibling in repos like Lightricks/LTX-Video — turning a 2 GB
+    pipeline into a 200+ GB download.
+    """
+
+    def test_returns_none_for_non_video_repos(self):
+        from backend_service.helpers.video import _video_repo_allow_patterns
+
+        self.assertIsNone(_video_repo_allow_patterns("meta-llama/Llama-2-7b-hf"))
+        self.assertIsNone(_video_repo_allow_patterns("stabilityai/stable-diffusion-xl-base-1.0"))
+        self.assertIsNone(_video_repo_allow_patterns(""))
+
+    def test_returns_diffusers_layout_for_known_video_repo(self):
+        from backend_service.helpers.video import _video_repo_allow_patterns
+
+        patterns = _video_repo_allow_patterns("Lightricks/LTX-Video")
+        self.assertIsNotNone(patterns)
+        assert patterns is not None  # for the type-checker
+        # These folders are the core of every diffusers video pipeline we
+        # ship. If any of them disappears the download will start and then
+        # fail to load — so they're worth asserting on explicitly.
+        self.assertIn("model_index.json", patterns)
+        self.assertIn("transformer/**", patterns)
+        self.assertIn("vae/**", patterns)
+        self.assertIn("text_encoder/**", patterns)
+        self.assertIn("scheduler/**", patterns)
+        self.assertIn("tokenizer/**", patterns)
+
+    def test_returns_fresh_list_each_call(self):
+        """Callers get their own copy so mutating the list doesn't leak
+        back into the module-level constant."""
+        from backend_service.helpers.video import _video_repo_allow_patterns
+
+        first = _video_repo_allow_patterns("Lightricks/LTX-Video")
+        second = _video_repo_allow_patterns("Lightricks/LTX-Video")
+        self.assertEqual(first, second)
+        assert first is not None  # for the type-checker
+        first.append("leak-check")
+        again = _video_repo_allow_patterns("Lightricks/LTX-Video")
+        assert again is not None
+        self.assertNotIn("leak-check", again)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_cache_strategies.py b/tests/test_cache_strategies.py
index 778765c..d8a42f0 100644
--- a/tests/test_cache_strategies.py
+++ b/tests/test_cache_strategies.py
@@ -3,11 +3,11 @@
 from types import SimpleNamespace
 from unittest.mock import patch
 
-from compression import CacheStrategyRegistry
-from compression.native import NativeStrategy
-from compression.rotorquant import RotorQuantStrategy
-from compression.triattention import TriAttentionStrategy
-from compression.turboquant import TurboQuantStrategy
+from cache_compression import CacheStrategyRegistry
+from cache_compression.native import NativeStrategy
+from cache_compression.rotorquant import RotorQuantStrategy
+from cache_compression.triattention import TriAttentionStrategy
+from cache_compression.turboquant import TurboQuantStrategy
 
 
 class CacheStrategyRegistryTests(unittest.TestCase):
@@ -43,12 +43,12 @@ def test_discover_keeps_placeholder_when_optional_adapter_import_fails(self):
         real_import_module = importlib.import_module
 
         def fake_import(name, package=None):
-            if name == "compression.rotorquant":
+            if name == "cache_compression.rotorquant":
                 raise RuntimeError("broken rotorquant import")
             return real_import_module(name, package)
 
         registry = CacheStrategyRegistry()
-        with patch("compression.importlib.import_module", side_effect=fake_import):
+        with patch("cache_compression.importlib.import_module", side_effect=fake_import):
             registry.discover()
 
         rotor = registry.get("rotorquant")
@@ -155,12 +155,12 @@ def test_rotorquant_bits_clamped(self):
     def test_rotorquant_is_available_with_current_turboquant_exports(self):
         rq = RotorQuantStrategy()
         module = SimpleNamespace(TurboQuantMSE=object(), TurboQuantCache=object())
-        with patch("compression.rotorquant._load_turboquant_module", return_value=module):
+        with patch("cache_compression.rotorquant._load_turboquant_module", return_value=module):
             self.assertTrue(rq.is_available())
 
     def test_rotorquant_is_unavailable_without_supported_marker(self):
         rq = RotorQuantStrategy()
-        with patch("compression.rotorquant._load_turboquant_module", return_value=object()):
+        with patch("cache_compression.rotorquant._load_turboquant_module", return_value=object()):
             self.assertFalse(rq.is_available())
 
     # ------------------------------------------------------------------
@@ -170,7 +170,7 @@ def test_rotorquant_is_unavailable_without_supported_marker(self):
     def test_turboquant_is_available_when_required_hooks_exist(self):
         tq = TurboQuantStrategy()
         with patch(
-            "compression.turboquant._turboquant_mlx_source_blobs",
+            "cache_compression.turboquant._turboquant_mlx_source_blobs",
             return_value=["def make_adaptive_cache():\n    pass", "def apply_patch():\n    pass"],
         ):
             self.assertTrue(tq.is_available())
@@ -178,7 +178,7 @@ def test_turboquant_is_available_when_required_hooks_exist(self):
     def test_turboquant_is_unavailable_without_required_hooks(self):
         tq = TurboQuantStrategy()
         with patch(
-            "compression.turboquant._turboquant_mlx_source_blobs",
+            "cache_compression.turboquant._turboquant_mlx_source_blobs",
             return_value=["TurboQuant = PolarQuant"],
         ):
             self.assertFalse(tq.is_available())
@@ -186,7 +186,7 @@ def test_turboquant_is_unavailable_without_required_hooks(self):
     def test_turboquant_mlx_cache_raises_helpful_message_without_hooks(self):
         tq = TurboQuantStrategy()
         with patch(
-            "compression.turboquant._turboquant_mlx_source_blobs",
+            "cache_compression.turboquant._turboquant_mlx_source_blobs",
             return_value=["TurboQuant = PolarQuant"],
         ):
             with self.assertRaises(NotImplementedError) as ctx:
@@ -254,12 +254,12 @@ def test_broken_strategy_preserves_required_llama_binary(self):
         real_import_module = importlib.import_module
 
         def fake_import(name, package=None):
-            if name == "compression.rotorquant":
+            if name == "cache_compression.rotorquant":
                 raise RuntimeError("broken")
             return real_import_module(name, package)
 
         registry = CacheStrategyRegistry()
-        with patch("compression.importlib.import_module", side_effect=fake_import):
+        with patch("cache_compression.importlib.import_module", side_effect=fake_import):
             registry.discover()
 
         rotor = registry.get("rotorquant")
diff --git a/tests/test_gpu.py b/tests/test_gpu.py
index 35fba0c..d030c61 100644
--- a/tests/test_gpu.py
+++ b/tests/test_gpu.py
@@ -1,7 +1,16 @@
+import subprocess
 import unittest
 from unittest.mock import patch, MagicMock
 
-from backend_service.helpers.gpu import GPUMonitor, get_gpu_metrics
+from backend_service.helpers import gpu as gpu_module
+from backend_service.helpers.gpu import (
+    GPUMonitor,
+    get_device_vram_total_gb,
+    get_gpu_metrics,
+    gpu_status_snapshot,
+    nvidia_gpu_present,
+    reset_vram_total_cache,
+)
 
 
 _EXPECTED_KEYS = {"gpu_name", "vram_total_gb", "vram_used_gb", "utilization_pct", "temperature_c", "power_w"}
@@ -112,5 +121,128 @@ def test_returns_dict_with_expected_keys(self):
             self.assertIn(key, metrics)
 
 
+class CachedVramTotalTests(unittest.TestCase):
+    """The cache is what keeps the video runtime probe under 15s on Windows."""
+
+    def setUp(self):
+        reset_vram_total_cache()
+
+    def tearDown(self):
+        reset_vram_total_cache()
+
+    def test_caches_value_after_first_call(self):
+        with patch.object(gpu_module._monitor, "snapshot") as mock_snapshot:
+            mock_snapshot.return_value = {"vram_total_gb": 24.0}
+            first = get_device_vram_total_gb()
+            second = get_device_vram_total_gb()
+            third = get_device_vram_total_gb()
+        self.assertEqual(first, 24.0)
+        self.assertEqual(second, 24.0)
+        self.assertEqual(third, 24.0)
+        # Snapshot must only be called ONCE — that's the whole point of the
+        # cache. If this fails the Windows probe regression is back.
+        mock_snapshot.assert_called_once()
+
+    def test_caches_none_when_detection_fails(self):
+        with patch.object(gpu_module._monitor, "snapshot") as mock_snapshot:
+            mock_snapshot.side_effect = RuntimeError("boom")
+            first = get_device_vram_total_gb()
+            second = get_device_vram_total_gb()
+        self.assertIsNone(first)
+        self.assertIsNone(second)
+        mock_snapshot.assert_called_once()
+
+    def test_caches_none_for_zero_or_missing_value(self):
+        with patch.object(gpu_module._monitor, "snapshot") as mock_snapshot:
+            mock_snapshot.return_value = {"vram_total_gb": 0}
+            self.assertIsNone(get_device_vram_total_gb())
+        with patch.object(gpu_module._monitor, "snapshot") as mock_snapshot:
+            # Already cached as None — the second snapshot should never be called.
+            mock_snapshot.return_value = {"vram_total_gb": 12.0}
+            self.assertIsNone(get_device_vram_total_gb())
+            mock_snapshot.assert_not_called()
+
+
+class GpuStatusSnapshotTests(unittest.TestCase):
+    """The /api/system/gpu-status endpoint feeds the frontend CPU-fallback banner."""
+
+    def test_snapshot_has_expected_shape(self):
+        snapshot = gpu_status_snapshot()
+        for key in (
+            "platform",
+            "nvidiaGpuDetected",
+            "torchImported",
+            "torchCudaAvailable",
+            "torchMpsAvailable",
+            "cpuFallbackWarning",
+            "recommendation",
+        ):
+            self.assertIn(key, snapshot)
+        self.assertIsInstance(snapshot["platform"], str)
+        self.assertIsInstance(snapshot["nvidiaGpuDetected"], bool)
+        self.assertIsInstance(snapshot["cpuFallbackWarning"], bool)
+
+    @patch("backend_service.helpers.gpu.platform.system", return_value="Windows")
+    @patch("backend_service.helpers.gpu.nvidia_gpu_present", return_value=True)
+    def test_recommendation_when_nvidia_present_but_cuda_unavailable(self, _nv, _plat):
+        fake_torch = MagicMock()
+        fake_torch.cuda.is_available.return_value = False
+        fake_torch.backends.mps.is_available.return_value = False
+        with patch.dict("sys.modules", {"torch": fake_torch}):
+            snapshot = gpu_status_snapshot()
+        self.assertTrue(snapshot["torchImported"])
+        self.assertFalse(snapshot["torchCudaAvailable"])
+        self.assertTrue(snapshot["cpuFallbackWarning"])
+        self.assertIsNotNone(snapshot["recommendation"])
+        # We recommend cu124 now (cu121 has no Python 3.13 wheels and broke
+        # fresh Windows installs). Accept either the PyTorch index URL or
+        # the in-app button copy.
+        self.assertTrue(
+            "cu124" in snapshot["recommendation"]
+            or "Install CUDA torch" in snapshot["recommendation"]
+        )
+
+    @patch("backend_service.helpers.gpu.platform.system", return_value="Windows")
+    @patch("backend_service.helpers.gpu.nvidia_gpu_present", return_value=True)
+    def test_no_warning_when_cuda_available(self, _nv, _plat):
+        fake_torch = MagicMock()
+        fake_torch.cuda.is_available.return_value = True
+        fake_torch.backends.mps.is_available.return_value = False
+        with patch.dict("sys.modules", {"torch": fake_torch}):
+            snapshot = gpu_status_snapshot()
+        self.assertTrue(snapshot["torchCudaAvailable"])
+        self.assertFalse(snapshot["cpuFallbackWarning"])
+        self.assertIsNone(snapshot["recommendation"])
+
+    @patch("backend_service.helpers.gpu.platform.system", return_value="Darwin")
+    @patch("backend_service.helpers.gpu.nvidia_gpu_present", return_value=False)
+    def test_no_warning_on_macos_even_with_cpu_torch(self, _nv, _plat):
+        fake_torch = MagicMock()
+        fake_torch.cuda.is_available.return_value = False
+        fake_torch.backends.mps.is_available.return_value = True
+        with patch.dict("sys.modules", {"torch": fake_torch}):
+            snapshot = gpu_status_snapshot()
+        self.assertFalse(snapshot["cpuFallbackWarning"])
+        self.assertIsNone(snapshot["recommendation"])
+
+    def test_nvidia_gpu_present_respects_path(self):
+        with patch("backend_service.helpers.gpu.shutil.which", return_value="/usr/bin/nvidia-smi"):
+            self.assertTrue(nvidia_gpu_present())
+        with patch("backend_service.helpers.gpu.shutil.which", return_value=None):
+            self.assertFalse(nvidia_gpu_present())
+
+
+@unittest.skipUnless(hasattr(subprocess, "CREATE_NO_WINDOW"), "Windows-only flag")
+class WindowsConsoleSuppressionTests(unittest.TestCase):
+    """nvidia-smi must not pop a console window on Windows."""
+
+    def test_subprocess_kwargs_includes_create_no_window(self):
+        self.assertIn("creationflags", gpu_module._SUBPROCESS_KWARGS)
+        self.assertEqual(
+            gpu_module._SUBPROCESS_KWARGS["creationflags"],
+            subprocess.CREATE_NO_WINDOW,
+        )
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_image_runtime.py b/tests/test_image_runtime.py
index 464d4ff..979c50b 100644
--- a/tests/test_image_runtime.py
+++ b/tests/test_image_runtime.py
@@ -1,12 +1,35 @@
+import json
+import tempfile
 import unittest
+from pathlib import Path
 
 from backend_service.image_runtime import (
     DiffusersTextToImageEngine,
     ImageGenerationConfig,
     PlaceholderImageEngine,
+    validate_local_diffusers_snapshot,
 )
 
 
+def _ltx_model_index() -> dict:
+    """The LTX-Video pipeline contract — five required components."""
+    return {
+        "_class_name": "LTXPipeline",
+        "_diffusers_version": "0.32.0.dev0",
+        "scheduler": ["diffusers", "FlowMatchEulerDiscreteScheduler"],
+        "text_encoder": ["transformers", "T5EncoderModel"],
+        "tokenizer": ["transformers", "T5Tokenizer"],
+        "transformer": ["diffusers", "LTXVideoTransformer3DModel"],
+        "vae": ["diffusers", "AutoencoderKLLTXVideo"],
+    }
+
+
+def _seed_component(root: Path, name: str, config_filename: str = "config.json") -> None:
+    component_dir = root / name
+    component_dir.mkdir(parents=True, exist_ok=True)
+    (component_dir / config_filename).write_text("{}", encoding="utf-8")
+
+
 class PlaceholderImageEngineTests(unittest.TestCase):
     def test_placeholder_generates_svg_without_pillow(self):
         engine = PlaceholderImageEngine()
@@ -83,5 +106,112 @@ def test_non_qwen_image_keeps_guidance_scale(self):
         self.assertEqual(kwargs["negative_prompt"], "blurry")
 
 
+class ValidateLocalDiffusersSnapshotTests(unittest.TestCase):
+    """Regression coverage for the LTX-Video corruption case.
+
+    Real bug: a user pulled LTX-Video before the allow_patterns scoping landed,
+    HF queued the legacy root-level safetensors first, and the snapshot ended
+    up with model_index.json + scheduler/ + text_encoder/ but no transformer/
+    or vae/. Diffusers then raised a generic "no file named config.json" error
+    pointing at the snapshot root, which is unhelpful. The validator should
+    catch this BEFORE we hand the directory to ``from_pretrained``.
+    """
+
+    def test_returns_none_when_all_components_present(self):
+        with tempfile.TemporaryDirectory() as tempdir:
+            root = Path(tempdir)
+            (root / "model_index.json").write_text(
+                json.dumps(_ltx_model_index()), encoding="utf-8",
+            )
+            _seed_component(root, "scheduler", "scheduler_config.json")
+            _seed_component(root, "text_encoder")
+            _seed_component(root, "tokenizer", "tokenizer_config.json")
+            _seed_component(root, "transformer")
+            _seed_component(root, "vae")
+
+            self.assertIsNone(validate_local_diffusers_snapshot(root, "Lightricks/LTX-Video"))
+
+    def test_flags_missing_component_subfolders(self):
+        with tempfile.TemporaryDirectory() as tempdir:
+            root = Path(tempdir)
+            (root / "model_index.json").write_text(
+                json.dumps(_ltx_model_index()), encoding="utf-8",
+            )
+            _seed_component(root, "scheduler", "scheduler_config.json")
+            _seed_component(root, "text_encoder")
+            # transformer/, tokenizer/, vae/ deliberately missing — exactly the
+            # corrupt-LTX shape we hit in the wild.
+
+            error = validate_local_diffusers_snapshot(root, "Lightricks/LTX-Video")
+
+            self.assertIsNotNone(error)
+            self.assertIn("missing components", error)
+            self.assertIn("transformer", error)
+            self.assertIn("tokenizer", error)
+            self.assertIn("vae", error)
+            self.assertNotIn("scheduler", error)
+            self.assertNotIn("text_encoder", error)
+
+    def test_flags_component_folder_present_but_empty(self):
+        """An empty subfolder is the same failure mode as a missing one."""
+        with tempfile.TemporaryDirectory() as tempdir:
+            root = Path(tempdir)
+            (root / "model_index.json").write_text(
+                json.dumps(_ltx_model_index()), encoding="utf-8",
+            )
+            _seed_component(root, "scheduler", "scheduler_config.json")
+            _seed_component(root, "text_encoder")
+            _seed_component(root, "tokenizer", "tokenizer_config.json")
+            _seed_component(root, "vae")
+            (root / "transformer").mkdir()  # exists but no config.json
+
+            error = validate_local_diffusers_snapshot(root, "Lightricks/LTX-Video")
+
+            self.assertIsNotNone(error)
+            self.assertIn("transformer", error)
+
+    def test_skips_optional_null_components(self):
+        """Pipelines list ``[null, null]`` for opted-out components.
+
+        The validator must not flag them — they're deliberately absent on
+        community checkpoints (e.g. SDXL without safety_checker).
+        """
+        with tempfile.TemporaryDirectory() as tempdir:
+            root = Path(tempdir)
+            index = {
+                "_class_name": "StableDiffusionPipeline",
+                "scheduler": ["diffusers", "DDIMScheduler"],
+                "text_encoder": ["transformers", "CLIPTextModel"],
+                "tokenizer": ["transformers", "CLIPTokenizer"],
+                "unet": ["diffusers", "UNet2DConditionModel"],
+                "vae": ["diffusers", "AutoencoderKL"],
+                "safety_checker": [None, None],
+                "feature_extractor": [None, None],
+            }
+            (root / "model_index.json").write_text(json.dumps(index), encoding="utf-8")
+            _seed_component(root, "scheduler", "scheduler_config.json")
+            _seed_component(root, "text_encoder")
+            _seed_component(root, "tokenizer", "tokenizer_config.json")
+            _seed_component(root, "unet")
+            _seed_component(root, "vae")
+
+            self.assertIsNone(validate_local_diffusers_snapshot(root, "runwayml/stable-diffusion-v1-5"))
+
+    def test_still_flags_missing_model_index(self):
+        with tempfile.TemporaryDirectory() as tempdir:
+            root = Path(tempdir)
+            error = validate_local_diffusers_snapshot(root, "Lightricks/LTX-Video")
+            self.assertIsNotNone(error)
+            self.assertIn("model_index.json", error)
+
+    def test_handles_malformed_model_index(self):
+        with tempfile.TemporaryDirectory() as tempdir:
+            root = Path(tempdir)
+            (root / "model_index.json").write_text("{not valid json", encoding="utf-8")
+            error = validate_local_diffusers_snapshot(root, "Lightricks/LTX-Video")
+            self.assertIsNotNone(error)
+            self.assertIn("model_index.json", error)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_mlx_worker.py b/tests/test_mlx_worker.py
index 5c1ef7e..a7ff62d 100644
--- a/tests/test_mlx_worker.py
+++ b/tests/test_mlx_worker.py
@@ -217,8 +217,14 @@ def test_generate_dflash_uses_runtime_summary_shape(self):
         worker._dflash_target = "dflash_target"
         worker._dflash_generator = "draft_bundle"
 
-        fake_runtime = SimpleNamespace(
-            generate_dflash_once=lambda **kwargs: {
+        def _fake_stream(**_kwargs):
+            # Emit a couple of per-token events followed by the summary event,
+            # matching the v0.1.4 ``stream_dflash_generate`` protocol.
+            yield {"event": "token", "token_id": 10, "generated_tokens": 1}
+            yield {"event": "token", "token_id": 11, "generated_tokens": 2}
+            yield {"event": "token", "token_id": 12, "generated_tokens": 3}
+            yield {
+                "event": "summary",
                 "generated_token_ids": [10, 11, 12],
                 "generation_tokens": 3,
                 "prompt_token_count": 3,
@@ -228,7 +234,8 @@ def test_generate_dflash_uses_runtime_summary_shape(self):
                 "phase_timings_us": {"prefill": 500_000},
                 "peak_memory_gb": 12.34,
             }
-        )
+
+        fake_runtime = SimpleNamespace(stream_dflash_generate=_fake_stream)
         fake_pkg = SimpleNamespace(runtime=fake_runtime)
 
         with patch.dict("sys.modules", {"dflash_mlx": fake_pkg, "dflash_mlx.runtime": fake_runtime}):
diff --git a/tests/test_progress.py b/tests/test_progress.py
new file mode 100644
index 0000000..9306d90
--- /dev/null
+++ b/tests/test_progress.py
@@ -0,0 +1,272 @@
+"""Unit tests for ``backend_service.progress`` and the GET endpoints that
+surface its snapshot to the frontend.
+
+The module backs the live progress bars in the image + video generation
+modals. A regression here would make the bars revert to client-side time
+estimates — not catastrophic, but the user explicitly asked for the real
+signal, so we lock the contract down.
+
+We exercise:
+
+* ``ProgressTracker`` lifecycle (idle -> begin -> set_phase -> set_step ->
+  finish), including out-of-order calls that real callers might make.
+* The snapshot shape — every key the frontend reads.
+* The two route endpoints (``/api/images/progress`` and ``/api/video/progress``)
+  reflecting the singleton trackers' live state.
+"""
+
+from __future__ import annotations
+
+import time
+import unittest
+
+from backend_service.progress import (
+    IMAGE_PROGRESS,
+    PHASE_DECODING,
+    PHASE_DIFFUSING,
+    PHASE_ENCODING,
+    PHASE_IDLE,
+    PHASE_LOADING,
+    PHASE_SAVING,
+    ProgressTracker,
+    VIDEO_PROGRESS,
+)
+
+from tests.test_video_routes import make_client, restore_env
+
+
+# Every key the frontend reads from the snapshot. If a key disappears the
+# UI silently breaks (the LiveProgress component falls back to estimates),
+# so we assert presence rather than relying on type checks alone.
+_REQUIRED_SNAPSHOT_KEYS = {
+    "kind",
+    "active",
+    "phase",
+    "message",
+    "step",
+    "totalSteps",
+    "startedAt",
+    "updatedAt",
+    "elapsedSeconds",
+    "runLabel",
+}
+
+
+class ProgressTrackerTests(unittest.TestCase):
+    """Exercise the tracker in isolation — no FastAPI app, no singletons."""
+
+    def setUp(self) -> None:
+        # Use a fresh tracker per test rather than the module singletons so
+        # tests don't bleed state into each other or into the real app.
+        self.tracker = ProgressTracker(kind="image")
+
+    def test_snapshot_before_begin_reports_idle(self):
+        snap = self.tracker.snapshot()
+        self.assertFalse(snap["active"])
+        self.assertEqual(snap["phase"], PHASE_IDLE)
+        self.assertEqual(snap["step"], 0)
+        self.assertEqual(snap["totalSteps"], 0)
+        self.assertEqual(snap["startedAt"], 0.0)
+        self.assertEqual(snap["elapsedSeconds"], 0.0)
+        self.assertIsNone(snap["runLabel"])
+
+    def test_snapshot_keys_are_complete(self):
+        # Lock the contract: any key removal here is a frontend break.
+        self.assertEqual(set(self.tracker.snapshot().keys()), _REQUIRED_SNAPSHOT_KEYS)
+
+    def test_kind_round_trips_on_snapshot(self):
+        video = ProgressTracker(kind="video")
+        self.assertEqual(self.tracker.snapshot()["kind"], "image")
+        self.assertEqual(video.snapshot()["kind"], "video")
+
+    def test_begin_marks_active_with_run_metadata(self):
+        self.tracker.begin(run_label="SDXL · 1 image", total_steps=30, message="loading model")
+        snap = self.tracker.snapshot()
+        self.assertTrue(snap["active"])
+        self.assertEqual(snap["phase"], PHASE_LOADING)
+        self.assertEqual(snap["totalSteps"], 30)
+        self.assertEqual(snap["runLabel"], "SDXL · 1 image")
+        self.assertEqual(snap["message"], "loading model")
+        self.assertGreater(snap["startedAt"], 0.0)
+
+    def test_begin_resets_step_counters(self):
+        # Simulate a previous run that left counters non-zero.
+        self.tracker.begin(total_steps=10)
+        self.tracker.set_step(7)
+        self.tracker.finish()
+        # Now begin a fresh run — step + total should reset, not carry over.
+        self.tracker.begin(total_steps=20)
+        snap = self.tracker.snapshot()
+        self.assertEqual(snap["step"], 0)
+        self.assertEqual(snap["totalSteps"], 20)
+
+    def test_set_phase_clears_step_counter(self):
+        # Step counts are per-phase: when diffusion ends and decoding starts,
+        # the bar shouldn't keep showing "step 30 of 30" until the next
+        # callback fires.
+        self.tracker.begin(total_steps=30, phase=PHASE_DIFFUSING)
+        self.tracker.set_step(30, total=30)
+        self.tracker.set_phase(PHASE_DECODING, "decoding pixels")
+        snap = self.tracker.snapshot()
+        self.assertEqual(snap["phase"], PHASE_DECODING)
+        self.assertEqual(snap["step"], 0)
+        self.assertEqual(snap["message"], "decoding pixels")
+
+    def test_set_phase_implicitly_begins_when_idle(self):
+        # If a runtime jumps straight to ``set_phase`` without ``begin``, we
+        # don't want elapsedSeconds to read as ~now (since startedAt would
+        # otherwise be 0). The tracker stamps ``started_at`` on the implicit
+        # begin instead.
+        self.tracker.set_phase(PHASE_ENCODING, "encoding prompt")
+        snap = self.tracker.snapshot()
+        self.assertTrue(snap["active"])
+        self.assertEqual(snap["phase"], PHASE_ENCODING)
+        self.assertGreater(snap["startedAt"], 0.0)
+        # Elapsed should be a tiny positive number, not the whole epoch.
+        self.assertLess(snap["elapsedSeconds"], 1.0)
+
+    def test_set_step_publishes_progress_during_diffusion(self):
+        self.tracker.begin(total_steps=30, phase=PHASE_DIFFUSING)
+        self.tracker.set_step(12, total=30)
+        snap = self.tracker.snapshot()
+        self.assertEqual(snap["step"], 12)
+        self.assertEqual(snap["totalSteps"], 30)
+
+    def test_set_step_after_finish_is_a_noop(self):
+        # The diffusers callback can race with ``finish()`` — if a stray
+        # callback fires after the tracker has been marked idle, it must not
+        # resurrect the bar.
+        self.tracker.begin(total_steps=10)
+        self.tracker.finish()
+        self.tracker.set_step(5, total=10)
+        snap = self.tracker.snapshot()
+        self.assertFalse(snap["active"])
+        self.assertEqual(snap["step"], 0)
+
+    def test_set_step_clamps_negative_values(self):
+        self.tracker.begin(total_steps=10)
+        self.tracker.set_step(-5, total=10)
+        self.assertEqual(self.tracker.snapshot()["step"], 0)
+
+    def test_finish_clears_run_label_and_steps(self):
+        self.tracker.begin(run_label="LTX · 24f", total_steps=40)
+        self.tracker.set_step(10, total=40)
+        self.tracker.finish(message="done")
+        snap = self.tracker.snapshot()
+        self.assertFalse(snap["active"])
+        self.assertEqual(snap["phase"], PHASE_IDLE)
+        self.assertEqual(snap["step"], 0)
+        self.assertEqual(snap["totalSteps"], 0)
+        self.assertIsNone(snap["runLabel"])
+        self.assertEqual(snap["message"], "done")
+
+    def test_elapsed_seconds_grows_over_time(self):
+        self.tracker.begin(total_steps=10)
+        first = self.tracker.snapshot()["elapsedSeconds"]
+        # Sleep long enough that we're confident elapsed advances even on a
+        # heavily loaded CI runner; 50ms is plenty without slowing the suite.
+        time.sleep(0.05)
+        second = self.tracker.snapshot()["elapsedSeconds"]
+        self.assertGreater(second, first)
+
+    def test_elapsed_seconds_is_zero_when_idle(self):
+        self.tracker.begin(total_steps=10)
+        time.sleep(0.05)
+        self.tracker.finish()
+        snap = self.tracker.snapshot()
+        self.assertEqual(snap["elapsedSeconds"], 0.0)
+
+    def test_phase_constants_match_frontend_contract(self):
+        # The frontend modal phase IDs must match these strings exactly —
+        # see ``ImageGenerationModal.tsx`` / ``VideoGenerationModal.tsx``.
+        self.assertEqual(PHASE_LOADING, "loading")
+        self.assertEqual(PHASE_ENCODING, "encoding")
+        self.assertEqual(PHASE_DIFFUSING, "diffusing")
+        self.assertEqual(PHASE_DECODING, "decoding")
+        self.assertEqual(PHASE_SAVING, "saving")
+        self.assertEqual(PHASE_IDLE, "idle")
+
+
+class ProgressEndpointTests(unittest.TestCase):
+    """Exercise the GET endpoints that wrap ``snapshot()`` for the frontend."""
+
+    def setUp(self) -> None:
+        self.client, self.tempdir, self.env_snapshot = make_client()
+        # Reset both singletons so a previous test (or a real backend run
+        # during interactive dev) can't leak state into the assertions.
+        IMAGE_PROGRESS.finish()
+        VIDEO_PROGRESS.finish()
+
+    def tearDown(self) -> None:
+        IMAGE_PROGRESS.finish()
+        VIDEO_PROGRESS.finish()
+        restore_env(self.env_snapshot)
+        self.tempdir.cleanup()
+
+    def test_image_progress_endpoint_returns_idle_by_default(self):
+        response = self.client.get("/api/images/progress")
+        self.assertEqual(response.status_code, 200)
+        payload = response.json()
+        self.assertIn("progress", payload)
+        snap = payload["progress"]
+        self.assertEqual(snap["kind"], "image")
+        self.assertFalse(snap["active"])
+        self.assertEqual(set(snap.keys()), _REQUIRED_SNAPSHOT_KEYS)
+
+    def test_video_progress_endpoint_returns_idle_by_default(self):
+        response = self.client.get("/api/video/progress")
+        self.assertEqual(response.status_code, 200)
+        payload = response.json()
+        self.assertIn("progress", payload)
+        snap = payload["progress"]
+        self.assertEqual(snap["kind"], "video")
+        self.assertFalse(snap["active"])
+        self.assertEqual(set(snap.keys()), _REQUIRED_SNAPSHOT_KEYS)
+
+    def test_image_progress_endpoint_reflects_live_singleton(self):
+        # Drive the singleton the same way the runtime would — the endpoint
+        # must surface that state without any extra plumbing.
+        IMAGE_PROGRESS.begin(run_label="SDXL · 1 image", total_steps=30, phase=PHASE_DIFFUSING)
+        IMAGE_PROGRESS.set_step(11, total=30)
+        snap = self.client.get("/api/images/progress").json()["progress"]
+        self.assertTrue(snap["active"])
+        self.assertEqual(snap["phase"], PHASE_DIFFUSING)
+        self.assertEqual(snap["step"], 11)
+        self.assertEqual(snap["totalSteps"], 30)
+        self.assertEqual(snap["runLabel"], "SDXL · 1 image")
+        self.assertGreater(snap["startedAt"], 0.0)
+
+    def test_video_progress_endpoint_reflects_live_singleton(self):
+        VIDEO_PROGRESS.begin(run_label="LTX · 24f", total_steps=50, phase=PHASE_DIFFUSING)
+        VIDEO_PROGRESS.set_step(7, total=50)
+        snap = self.client.get("/api/video/progress").json()["progress"]
+        self.assertTrue(snap["active"])
+        self.assertEqual(snap["phase"], PHASE_DIFFUSING)
+        self.assertEqual(snap["step"], 7)
+        self.assertEqual(snap["totalSteps"], 50)
+        self.assertEqual(snap["runLabel"], "LTX · 24f")
+
+    def test_image_progress_endpoint_returns_idle_after_finish(self):
+        IMAGE_PROGRESS.begin(total_steps=30)
+        IMAGE_PROGRESS.set_step(15, total=30)
+        IMAGE_PROGRESS.finish()
+        snap = self.client.get("/api/images/progress").json()["progress"]
+        self.assertFalse(snap["active"])
+        self.assertEqual(snap["phase"], PHASE_IDLE)
+        self.assertEqual(snap["step"], 0)
+        self.assertEqual(snap["totalSteps"], 0)
+
+    def test_image_and_video_singletons_are_isolated(self):
+        # The two modals can't run at once in practice, but the singletons
+        # must not share state — toggling image must not affect video.
+        IMAGE_PROGRESS.begin(run_label="image", total_steps=10, phase=PHASE_DIFFUSING)
+        image_snap = self.client.get("/api/images/progress").json()["progress"]
+        video_snap = self.client.get("/api/video/progress").json()["progress"]
+        self.assertTrue(image_snap["active"])
+        self.assertFalse(video_snap["active"])
+        self.assertEqual(image_snap["kind"], "image")
+        self.assertEqual(video_snap["kind"], "video")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_setup_routes.py b/tests/test_setup_routes.py
index 022bfd0..665e311 100644
--- a/tests/test_setup_routes.py
+++ b/tests/test_setup_routes.py
@@ -113,6 +113,71 @@ def test_install_pip_reports_failure(self):
         self.assertFalse(body["ok"])
         self.assertIn("No matching distribution", body["output"])
 
+    def test_install_pip_accepts_imageio(self):
+        """Video Studio installs this directly when the mp4 encoder is missing."""
+        with mock.patch("backend_service.routes.setup.subprocess.run") as mock_run:
+            mock_run.return_value = mock.Mock(returncode=0, stdout="Successfully installed imageio", stderr="")
+            resp = self.client.post("/api/setup/install-package", json={"package": "imageio"})
+        self.assertEqual(resp.status_code, 200)
+        self.assertTrue(resp.json()["ok"])
+        # Confirm we actually invoked pip install with the right distribution name.
+        cmd = mock_run.call_args[0][0]
+        self.assertIn("imageio", cmd)
+
+    def test_install_pip_accepts_imageio_ffmpeg(self):
+        """The ffmpeg plugin is the other half of mp4 export — must also be whitelisted."""
+        with mock.patch("backend_service.routes.setup.subprocess.run") as mock_run:
+            mock_run.return_value = mock.Mock(returncode=0, stdout="Successfully installed imageio-ffmpeg", stderr="")
+            resp = self.client.post("/api/setup/install-package", json={"package": "imageio-ffmpeg"})
+        self.assertEqual(resp.status_code, 200)
+        self.assertTrue(resp.json()["ok"])
+        cmd = mock_run.call_args[0][0]
+        self.assertIn("imageio-ffmpeg", cmd)
+
+    def test_video_output_deps_are_whitelisted(self):
+        """Regression guard — the Video Studio install button targets these exact keys.
+
+        If someone renames or removes them we want the test suite to scream before
+        the UI starts handing users a 400 on ``/api/setup/install-package``.
+        """
+        from backend_service.routes.setup import _INSTALLABLE_PIP_PACKAGES
+
+        self.assertIn("imageio", _INSTALLABLE_PIP_PACKAGES)
+        self.assertIn("imageio-ffmpeg", _INSTALLABLE_PIP_PACKAGES)
+        # Distribution names should match the short keys so the UI doesn't need
+        # its own translation table.
+        self.assertEqual(_INSTALLABLE_PIP_PACKAGES["imageio"], "imageio")
+        self.assertEqual(_INSTALLABLE_PIP_PACKAGES["imageio-ffmpeg"], "imageio-ffmpeg")
+
+    def test_video_model_tokenizer_deps_are_whitelisted(self):
+        """LTX-Video / Wan / Hunyuan / CogVideoX need these tokenizer packages.
+
+        The Studio's "Install missing video dependencies" button targets these
+        exact keys; if the install allow-list drops them, the user gets a 400
+        and the button looks broken instead of unblocking generation.
+        """
+        from backend_service.routes.setup import _INSTALLABLE_PIP_PACKAGES
+
+        for pkg in ("tiktoken", "sentencepiece", "protobuf", "ftfy"):
+            self.assertIn(
+                pkg,
+                _INSTALLABLE_PIP_PACKAGES,
+                f"{pkg} must be whitelisted so the Studio install button works",
+            )
+            self.assertEqual(_INSTALLABLE_PIP_PACKAGES[pkg], pkg)
+
+    def test_install_pip_accepts_tiktoken(self):
+        """LTX-Video's exact missing-dep error — the user-reported case."""
+        with mock.patch("backend_service.routes.setup.subprocess.run") as mock_run:
+            mock_run.return_value = mock.Mock(
+                returncode=0, stdout="Successfully installed tiktoken", stderr=""
+            )
+            resp = self.client.post("/api/setup/install-package", json={"package": "tiktoken"})
+        self.assertEqual(resp.status_code, 200)
+        self.assertTrue(resp.json()["ok"])
+        cmd = mock_run.call_args[0][0]
+        self.assertIn("tiktoken", cmd)
+
     # ------------------------------------------------------------------
     # System package install
     # ------------------------------------------------------------------
@@ -147,6 +212,156 @@ def test_refresh_capabilities_returns_dict(self):
         self.assertEqual(resp.status_code, 200)
         self.assertIn("capabilities", resp.json())
 
+    # ------------------------------------------------------------------
+    # CUDA torch install (Windows/Linux NVIDIA fallback)
+    # ------------------------------------------------------------------
+
+    def _patch_cuda_helpers(self):
+        """Neutralise the pre-install helpers so tests focus on the pip loop.
+
+        ``_site_packages_for`` and ``_purge_broken_distributions`` add
+        their own subprocess and filesystem side effects that would
+        confuse the subprocess.run mock below; patching them to no-op
+        keeps each test asserting exactly the behaviour it names.
+        """
+        return (
+            mock.patch("backend_service.routes.setup._site_packages_for", return_value=None),
+            mock.patch("backend_service.routes.setup._purge_broken_distributions", return_value=[]),
+        )
+
+    def test_install_cuda_torch_stops_at_first_success(self):
+        """First working index wins — we must not keep trying after success."""
+        sp_patch, purge_patch = self._patch_cuda_helpers()
+        with sp_patch, purge_patch, mock.patch(
+            "backend_service.routes.setup._read_python_version", return_value="3.12.5"
+        ):
+            with mock.patch("backend_service.routes.setup.subprocess.run") as mock_run:
+                mock_run.return_value = mock.Mock(
+                    returncode=0, stdout="Successfully installed torch-2.5.0+cu124", stderr=""
+                )
+                resp = self.client.post("/api/setup/install-cuda-torch", json={})
+        self.assertEqual(resp.status_code, 200)
+        body = resp.json()
+        self.assertTrue(body["ok"])
+        self.assertTrue(body["requiresRestart"])
+        self.assertIsNotNone(body["indexUrl"])
+        self.assertIn("cu124", body["indexUrl"])
+        self.assertEqual(body["pythonVersion"], "3.12.5")
+        self.assertFalse(body["noWheelForPython"])
+        # Only one index should have been attempted — cu124 succeeded so
+        # cu126 / cu128 / cu121 / nightly must not be tried. Each successful
+        # attempt issues two pip calls: pass 1 swaps torch (--no-deps),
+        # pass 2 fills in any missing transitive deps.
+        self.assertEqual(len(body["attempts"]), 1)
+        self.assertEqual(mock_run.call_count, 2)
+
+    def test_install_cuda_torch_falls_through_to_later_indexes(self):
+        """If cu124 has no wheel for the user's Python, move on to cu126."""
+        call_results = [
+            # cu124 swap fails → deps pass skipped
+            mock.Mock(returncode=1, stdout="", stderr="ERROR: No matching distribution found for torch"),
+            # cu126 swap succeeds → deps pass runs
+            mock.Mock(returncode=0, stdout="Successfully installed torch-2.6.0+cu126", stderr=""),
+            mock.Mock(returncode=0, stdout="Requirement already satisfied: sympy", stderr=""),
+        ]
+        sp_patch, purge_patch = self._patch_cuda_helpers()
+        with sp_patch, purge_patch, mock.patch(
+            "backend_service.routes.setup._read_python_version", return_value="3.13.1"
+        ):
+            with mock.patch("backend_service.routes.setup.subprocess.run", side_effect=call_results):
+                resp = self.client.post("/api/setup/install-cuda-torch", json={})
+        body = resp.json()
+        self.assertTrue(body["ok"])
+        self.assertEqual(len(body["attempts"]), 2)
+        self.assertFalse(body["attempts"][0]["ok"])
+        self.assertTrue(body["attempts"][1]["ok"])
+        self.assertIn("cu126", body["indexUrl"])
+        self.assertFalse(body["noWheelForPython"])
+
+    def test_install_cuda_torch_reports_failure_after_all_attempts(self):
+        """All indexes fail — surface the last error to the UI."""
+        fail = mock.Mock(returncode=1, stdout="", stderr="ERROR: Install failed, disk full")
+        sp_patch, purge_patch = self._patch_cuda_helpers()
+        with sp_patch, purge_patch, mock.patch(
+            "backend_service.routes.setup._read_python_version", return_value="3.12.5"
+        ):
+            with mock.patch("backend_service.routes.setup.subprocess.run", return_value=fail):
+                resp = self.client.post("/api/setup/install-cuda-torch", json={})
+        body = resp.json()
+        self.assertFalse(body["ok"])
+        self.assertFalse(body["requiresRestart"])
+        self.assertIsNone(body["indexUrl"])
+        from backend_service.routes.setup import _CUDA_TORCH_INDEXES
+        self.assertEqual(len(body["attempts"]), len(_CUDA_TORCH_INDEXES))
+        # Generic failure (not a wheel mismatch) must NOT be flagged as
+        # noWheelForPython — the user can usefully retry.
+        self.assertFalse(body["noWheelForPython"])
+
+    def test_install_cuda_torch_flags_no_wheel_when_every_attempt_misses(self):
+        """Python 3.14 case — every index returns "No matching distribution".
+
+        The UI uses noWheelForPython to tell the user their Python version
+        is the problem, not the CUDA index, so they stop retrying.
+        """
+        no_wheel = mock.Mock(
+            returncode=1,
+            stdout="",
+            stderr="ERROR: Could not find a version that satisfies the requirement torch "
+                   "(from versions: none)\nERROR: No matching distribution found for torch",
+        )
+        sp_patch, purge_patch = self._patch_cuda_helpers()
+        with sp_patch, purge_patch, mock.patch(
+            "backend_service.routes.setup._read_python_version", return_value="3.14.0"
+        ):
+            with mock.patch("backend_service.routes.setup.subprocess.run", return_value=no_wheel):
+                resp = self.client.post("/api/setup/install-cuda-torch", json={})
+        body = resp.json()
+        self.assertFalse(body["ok"])
+        self.assertTrue(body["noWheelForPython"])
+        self.assertEqual(body["pythonVersion"], "3.14.0")
+
+    def test_install_cuda_torch_sweeps_broken_stub_dists(self):
+        """Broken ``~<pkg>`` dirs are removed before pip runs.
+
+        Real-world trigger: a prior interrupted pip install leaves
+        ``~arkupsafe/`` in site-packages. Without cleanup, subsequent
+        installs print a "Ignoring invalid distribution" warning and
+        sometimes fail mid-install trying to heal the stub.
+        """
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            tmp_path = Path(tmp_dir)
+            broken = tmp_path / "~arkupsafe"
+            broken.mkdir()
+            (broken / "stub.txt").write_text("leftover")
+
+            sp_patch = mock.patch(
+                "backend_service.routes.setup._site_packages_for", return_value=tmp_path,
+            )
+            with sp_patch, mock.patch(
+                "backend_service.routes.setup._read_python_version", return_value="3.12.5"
+            ):
+                with mock.patch("backend_service.routes.setup.subprocess.run") as mock_run:
+                    mock_run.return_value = mock.Mock(returncode=0, stdout="", stderr="")
+                    resp = self.client.post("/api/setup/install-cuda-torch", json={})
+
+            self.assertEqual(resp.status_code, 200)
+            self.assertFalse(broken.exists(), "broken ~arkupsafe stub should be removed")
+
+    def test_install_cuda_torch_default_list_starts_with_cu124(self):
+        """cu124 is the broadest 3.9-3.13 match; cu121 must not be first anymore."""
+        from backend_service.routes.setup import _CUDA_TORCH_INDEXES
+        self.assertTrue(_CUDA_TORCH_INDEXES[0].endswith("cu124"))
+        self.assertIn("https://download.pytorch.org/whl/cu126", _CUDA_TORCH_INDEXES)
+        self.assertIn("https://download.pytorch.org/whl/cu128", _CUDA_TORCH_INDEXES)
+        # cu121 is still in the list for old Python + old driver combos,
+        # just no longer leading.
+        self.assertIn("https://download.pytorch.org/whl/cu121", _CUDA_TORCH_INDEXES)
+        # The nightly index is our last-resort for bleeding-edge Python
+        # (e.g. 3.14) — PyTorch sometimes ships nightly wheels before the
+        # stable index catches up.
+        self.assertIn("https://download.pytorch.org/whl/nightly/cu128", _CUDA_TORCH_INDEXES)
+        self.assertEqual(_CUDA_TORCH_INDEXES[-1], "https://download.pytorch.org/whl/nightly/cu128")
+
     # ------------------------------------------------------------------
     # Turbo update check
     # ------------------------------------------------------------------
diff --git a/tests/test_video_routes.py b/tests/test_video_routes.py
new file mode 100644
index 0000000..0b27c53
--- /dev/null
+++ b/tests/test_video_routes.py
@@ -0,0 +1,655 @@
+"""Contract tests for the video generation API.
+
+Routes covered:
+- GET  /api/video/catalog           -> always lists the curated engines
+- GET  /api/video/runtime           -> delegates to VideoRuntimeManager.capabilities
+- GET  /api/video/library           -> filters catalog by local snapshot readiness
+- GET  /api/video/outputs           -> empty until generation lands
+- POST /api/video/preload           -> 404 unknown, 409 not-installed, 200 happy
+- POST /api/video/unload            -> 404 unknown, 200 default
+- POST /api/video/generate          -> 501 until generation lands
+- POST /api/video/download          -> 404 unknown, 200 starts HF snapshot download
+- GET  /api/video/download/status   -> filters download list to video repos
+- POST /api/video/download/cancel   -> 404 unknown, 200 pauses
+- POST /api/video/download/delete   -> 404 unknown, 200 wipes cache + evicts runtime
+
+All tests redirect HF_HUB_CACHE into a per-test tempdir so no test can ever
+observe or wipe the user's real downloaded model snapshots.
+"""
+
+from __future__ import annotations
+
+import os
+import tempfile
+import unittest
+from pathlib import Path
+from typing import Any
+from unittest import mock
+
+from fastapi.testclient import TestClient
+
+from backend_service import app as app_module
+from backend_service.app import create_app
+from backend_service.state import ChaosEngineState
+from backend_service import video_runtime as video_runtime_mod
+from backend_service.routes import video as video_routes
+from backend_service.video_runtime import GeneratedVideo
+
+from tests.test_backend_service import (
+    TEST_API_TOKEN,
+    FakeRuntime,
+    fake_library,
+    fake_system_snapshot,
+)
+
+
+# Environment variables we redirect into the per-test tempdir so the test
+# process never observes or touches the user's real Hugging Face cache.
+# Critical: without this, a ``delete_download`` test against a valid video
+# repo would physically wipe the user's in-progress snapshot on disk.
+_HF_CACHE_ENV_VARS: tuple[str, ...] = (
+    "HF_HUB_CACHE",
+    "HUGGINGFACE_HUB_CACHE",
+    "HF_HOME",
+)
+
+# Sentinel keys used in the snapshot dict to carry original module-level
+# attributes that we patch in ``make_client``. Prefixed with ``__`` so the
+# env-var restore loop skips them (env vars can't begin with that in POSIX).
+_MODULE_ATTR_KEYS: tuple[str, ...] = (
+    "__video_outputs_dir__",
+    "__image_outputs_dir__",
+    "__settings_path__",
+)
+
+
+def make_client() -> tuple[TestClient, tempfile.TemporaryDirectory, dict[str, Any]]:
+    """Spin up an isolated FastAPI test client.
+
+    Beyond the obvious tempdir for settings/benchmarks/chat-sessions and the
+    HF cache env vars, we also redirect the module-level output-dir and
+    settings-path constants in ``backend_service.app``. Those get captured
+    at import time from the user's real ``~/.chaosengine/`` dir and are not
+    affected by the test's ``settings_path`` argument — so without patching
+    them, the ``GET /api/video/outputs`` route reads the user's real saved
+    clips, and any settings-driven output override would leak in too.
+
+    The returned snapshot carries both the env-var originals and the
+    module-attribute originals. ``restore_env`` uses both halves on teardown.
+    """
+    tempdir = tempfile.TemporaryDirectory()
+    settings_path = Path(tempdir.name) / "settings.json"
+    benchmarks_path = Path(tempdir.name) / "benchmark-history.json"
+    chat_sessions_path = Path(tempdir.name) / "chat-sessions.json"
+    hf_cache = Path(tempdir.name) / "hf-cache"
+    hf_cache.mkdir(parents=True, exist_ok=True)
+    # Pre-create the output dirs so the /outputs listing helpers can ``rglob``
+    # them without tripping a missing-dir path.
+    video_outputs_dir = Path(tempdir.name) / "video-outputs"
+    image_outputs_dir = Path(tempdir.name) / "image-outputs"
+    video_outputs_dir.mkdir(parents=True, exist_ok=True)
+    image_outputs_dir.mkdir(parents=True, exist_ok=True)
+
+    snapshot: dict[str, Any] = {key: os.environ.get(key) for key in _HF_CACHE_ENV_VARS}
+    snapshot["__video_outputs_dir__"] = app_module.VIDEO_OUTPUTS_DIR
+    snapshot["__image_outputs_dir__"] = app_module.IMAGE_OUTPUTS_DIR
+    snapshot["__settings_path__"] = app_module.SETTINGS_PATH
+
+    os.environ["HF_HUB_CACHE"] = str(hf_cache)
+    os.environ["HUGGINGFACE_HUB_CACHE"] = str(hf_cache)
+    os.environ["HF_HOME"] = str(tempdir.name)
+    app_module.VIDEO_OUTPUTS_DIR = video_outputs_dir
+    app_module.IMAGE_OUTPUTS_DIR = image_outputs_dir
+    # Point the module-level settings path at the tempdir so the no-arg
+    # ``_load_settings()`` calls inside ``_current_video_outputs_dir`` read
+    # defaults instead of the user's real settings file (which may contain a
+    # ``videoOutputsDirectory`` override that would bypass our patched
+    # VIDEO_OUTPUTS_DIR and point back at a real location).
+    app_module.SETTINGS_PATH = settings_path
+
+    state = ChaosEngineState(
+        system_snapshot_provider=fake_system_snapshot,
+        library_provider=fake_library,
+        settings_path=settings_path,
+        benchmarks_path=benchmarks_path,
+        chat_sessions_path=chat_sessions_path,
+    )
+    state.runtime = FakeRuntime()
+    client = TestClient(create_app(state=state, api_token=TEST_API_TOKEN))
+    client.headers.update({"Authorization": f"Bearer {TEST_API_TOKEN}"})
+    return client, tempdir, snapshot
+
+
+def restore_env(env_snapshot: dict[str, Any]) -> None:
+    """Reverse the env-var and module-attr patching done by ``make_client``."""
+    # Module attrs first, so any env-var-triggered import-time re-resolution
+    # (none today, but future-proofing) sees the final state.
+    if "__video_outputs_dir__" in env_snapshot:
+        app_module.VIDEO_OUTPUTS_DIR = env_snapshot["__video_outputs_dir__"]
+    if "__image_outputs_dir__" in env_snapshot:
+        app_module.IMAGE_OUTPUTS_DIR = env_snapshot["__image_outputs_dir__"]
+    if "__settings_path__" in env_snapshot:
+        app_module.SETTINGS_PATH = env_snapshot["__settings_path__"]
+    for key, value in env_snapshot.items():
+        if key in _MODULE_ATTR_KEYS:
+            continue
+        if value is None:
+            os.environ.pop(key, None)
+        else:
+            os.environ[key] = value
+
+
+class VideoCatalogRouteTests(unittest.TestCase):
+    def setUp(self) -> None:
+        self.client, self.tempdir, self.env_snapshot = make_client()
+
+    def tearDown(self) -> None:
+        restore_env(self.env_snapshot)
+        self.tempdir.cleanup()
+
+    def test_catalog_returns_families_and_latest_shape(self):
+        response = self.client.get("/api/video/catalog")
+        self.assertEqual(response.status_code, 200)
+        payload = response.json()
+        self.assertIn("families", payload)
+        self.assertIn("latest", payload)
+        self.assertIsInstance(payload["families"], list)
+        self.assertIsInstance(payload["latest"], list)
+
+    def test_catalog_surfaces_all_first_wave_engines(self):
+        payload = self.client.get("/api/video/catalog").json()
+        family_ids = {family["id"] for family in payload["families"]}
+        for expected in ("ltx-video", "wan-2-1", "wan-2-2", "hunyuan-video", "mochi-1"):
+            self.assertIn(expected, family_ids, f"expected {expected} in video catalog")
+
+    def test_catalog_includes_wan_2_1_small_starter_variant(self):
+        """The 1.3B variant is our recommended first-download target — make sure it's visible."""
+        payload = self.client.get("/api/video/catalog").json()
+        variant_ids = {
+            variant["id"]
+            for family in payload["families"]
+            for variant in family["variants"]
+        }
+        self.assertIn("Wan-AI/Wan2.1-T2V-1.3B-Diffusers", variant_ids)
+        # Sanity-check: the 1.3B is materially smaller than the 14B / A14B options.
+        wan_21_family = next(family for family in payload["families"] if family["id"] == "wan-2-1")
+        sizes = {variant["id"]: variant["sizeGb"] for variant in wan_21_family["variants"]}
+        self.assertLess(
+            sizes["Wan-AI/Wan2.1-T2V-1.3B-Diffusers"],
+            sizes["Wan-AI/Wan2.1-T2V-14B-Diffusers"],
+        )
+
+    def test_catalog_variants_have_frontend_ready_fields(self):
+        payload = self.client.get("/api/video/catalog").json()
+        for family in payload["families"]:
+            self.assertIn("name", family)
+            self.assertIn("variants", family)
+            self.assertGreater(len(family["variants"]), 0, f"{family['id']} has no variants")
+            for variant in family["variants"]:
+                for key in ("id", "repo", "name", "provider", "sizeGb", "taskSupport"):
+                    self.assertIn(key, variant, f"{variant.get('id')} missing {key}")
+                self.assertIn("txt2video", variant["taskSupport"])
+                # availableLocally should be False on a fresh test env (no snapshots).
+                self.assertEqual(variant.get("availableLocally"), False)
+                self.assertEqual(variant.get("familyName"), family["name"])
+
+
+class VideoRuntimeRouteTests(unittest.TestCase):
+    def setUp(self) -> None:
+        self.client, self.tempdir, self.env_snapshot = make_client()
+
+    def tearDown(self) -> None:
+        restore_env(self.env_snapshot)
+        self.tempdir.cleanup()
+
+    def test_runtime_delegates_to_video_runtime_manager(self):
+        response = self.client.get("/api/video/runtime")
+        self.assertEqual(response.status_code, 200)
+        runtime = response.json()["runtime"]
+        # Shape is fixed regardless of whether diffusers is installed.
+        for key in ("activeEngine", "realGenerationAvailable", "message"):
+            self.assertIn(key, runtime)
+
+    def test_runtime_reports_placeholder_when_core_deps_missing(self):
+        with mock.patch.object(
+            video_runtime_mod,
+            "_find_missing",
+            # Three calls now: core, output, model-specific deps. The probe
+            # short-circuits on missing core deps but still asks the other
+            # two so they're surfaced in the install hint.
+            side_effect=[["diffusers", "torch"], [], []],
+        ):
+            runtime = self.client.get("/api/video/runtime").json()["runtime"]
+        self.assertFalse(runtime["realGenerationAvailable"])
+        self.assertEqual(runtime["activeEngine"], "placeholder")
+
+    def test_library_is_empty_when_no_snapshots_are_installed(self):
+        response = self.client.get("/api/video/library")
+        self.assertEqual(response.status_code, 200)
+        # Fresh test env: no local snapshots, so every variant is excluded.
+        self.assertEqual(response.json(), {"models": []})
+
+    def test_outputs_is_empty_until_generation_lands(self):
+        response = self.client.get("/api/video/outputs")
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json(), {"outputs": []})
+
+
+class VideoPreloadRouteTests(unittest.TestCase):
+    def setUp(self) -> None:
+        self.client, self.tempdir, self.env_snapshot = make_client()
+
+    def tearDown(self) -> None:
+        restore_env(self.env_snapshot)
+        self.tempdir.cleanup()
+
+    def test_preload_returns_404_for_unknown_model(self):
+        response = self.client.post("/api/video/preload", json={"modelId": "ghost/nope"})
+        self.assertEqual(response.status_code, 404)
+
+    def test_preload_returns_409_when_model_not_installed_locally(self):
+        response = self.client.post(
+            "/api/video/preload",
+            json={"modelId": "Lightricks/LTX-Video"},
+        )
+        # Nothing is downloaded in this test env — expect 409 Conflict.
+        self.assertEqual(response.status_code, 409)
+        detail = response.json()["detail"]
+        self.assertTrue(
+            "not installed" in detail.lower() or "did not produce" in detail.lower(),
+            f"unexpected detail: {detail}",
+        )
+
+    def test_preload_happy_path_returns_runtime_status(self):
+        # Pretend the model is installed and diffusers is ready; monkey-patch
+        # the engine preload seam so we never actually load weights.
+        with mock.patch.object(
+            video_routes,
+            "_video_variant_available_locally",
+            return_value=True,
+        ), mock.patch.object(video_runtime_mod, "_find_missing", return_value=[]), \
+                mock.patch.object(
+                    video_runtime_mod.DiffusersVideoEngine,
+                    "_ensure_pipeline",
+                    return_value=mock.MagicMock(),
+                ):
+            response = self.client.post(
+                "/api/video/preload",
+                json={"modelId": "Lightricks/LTX-Video"},
+            )
+        self.assertEqual(response.status_code, 200, response.text)
+        runtime = response.json()["runtime"]
+        self.assertTrue(runtime["realGenerationAvailable"])
+
+
+class VideoUnloadRouteTests(unittest.TestCase):
+    def setUp(self) -> None:
+        self.client, self.tempdir, self.env_snapshot = make_client()
+
+    def tearDown(self) -> None:
+        restore_env(self.env_snapshot)
+        self.tempdir.cleanup()
+
+    def test_unload_no_body_returns_runtime_status(self):
+        response = self.client.post("/api/video/unload")
+        self.assertEqual(response.status_code, 200)
+        self.assertIn("runtime", response.json())
+
+    def test_unload_unknown_model_returns_404(self):
+        response = self.client.post("/api/video/unload", json={"modelId": "ghost/nope"})
+        self.assertEqual(response.status_code, 404)
+
+
+class VideoGenerateRouteTests(unittest.TestCase):
+    """End-to-end contract tests for POST /api/video/generate.
+
+    We cannot run a real diffusion pass inside a unit test — the weights are
+    10+ GB and the pipeline needs a GPU. Instead we stub ``VideoRuntimeManager.generate``
+    to return a synthetic ``GeneratedVideo`` with a tiny fake mp4 payload, then
+    walk the full pipeline: persistence -> /outputs listing -> /outputs/{id}/file
+    streaming -> /outputs/{id} delete. That lets us verify the route wiring,
+    the filesystem layout, and the FileResponse headers without touching a GPU.
+    """
+
+    # A minimal 'mp4-looking' payload. We don't require it to be a valid video
+    # — only that bytes round-trip through disk unchanged.
+    FAKE_MP4_BYTES = b"\x00\x00\x00\x20ftypmp42" + b"\x00" * 64
+
+    def setUp(self) -> None:
+        self.client, self.tempdir, self.env_snapshot = make_client()
+        # Redirect the global VIDEO_OUTPUTS_DIR into our tempdir so tests never
+        # touch the real ~/Library/Application Support location.
+        self.outputs_dir = Path(self.tempdir.name) / "video-outputs"
+        self.outputs_dir.mkdir(parents=True, exist_ok=True)
+        self._original_outputs_dir = app_module.VIDEO_OUTPUTS_DIR
+        app_module.VIDEO_OUTPUTS_DIR = self.outputs_dir
+
+    def tearDown(self) -> None:
+        app_module.VIDEO_OUTPUTS_DIR = self._original_outputs_dir
+        restore_env(self.env_snapshot)
+        self.tempdir.cleanup()
+
+    def _fake_generated_video(self, seed: int = 42) -> GeneratedVideo:
+        return GeneratedVideo(
+            seed=seed,
+            bytes=self.FAKE_MP4_BYTES,
+            extension="mp4",
+            mimeType="video/mp4",
+            durationSeconds=3.5,
+            frameCount=24,
+            fps=24,
+            width=768,
+            height=512,
+            runtimeLabel="diffusers-test-stub",
+            runtimeNote=None,
+        )
+
+    def _payload(self, **overrides: Any) -> dict[str, Any]:
+        body = {
+            "modelId": "Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
+            "prompt": "A cinematic shot of a misty pine forest at dawn.",
+            "width": 768,
+            "height": 512,
+            "numFrames": 24,
+            "fps": 24,
+            "steps": 20,
+            "guidance": 3.0,
+            "seed": 42,
+        }
+        body.update(overrides)
+        return body
+
+    def test_generate_rejects_unknown_model_with_404(self):
+        response = self.client.post(
+            "/api/video/generate",
+            json=self._payload(modelId="ghost/nope"),
+        )
+        self.assertEqual(response.status_code, 404)
+
+    def test_generate_rejects_not_installed_model_with_409(self):
+        response = self.client.post("/api/video/generate", json=self._payload())
+        # No snapshot on disk in this test env -> expect 409 Conflict.
+        self.assertEqual(response.status_code, 409)
+
+    def test_generate_requires_fields(self):
+        response = self.client.post("/api/video/generate", json={})
+        # Pydantic validation rejects the missing required fields.
+        self.assertEqual(response.status_code, 422)
+
+    def test_generate_happy_path_persists_artifact_and_returns_outputs(self):
+        state = self.client.app.state.chaosengine
+        runtime_capabilities = {
+            "activeEngine": "diffusers",
+            "realGenerationAvailable": True,
+            "message": "Ready",
+            "missingDependencies": [],
+            "loadedModelRepo": "Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
+        }
+        generate_mock = mock.MagicMock(
+            return_value=(self._fake_generated_video(), runtime_capabilities)
+        )
+        state.video_runtime.generate = generate_mock  # type: ignore[method-assign]
+
+        with mock.patch.object(
+            video_routes,
+            "_video_variant_available_locally",
+            return_value=True,
+        ):
+            response = self.client.post("/api/video/generate", json=self._payload())
+
+        self.assertEqual(response.status_code, 200, response.text)
+        payload = response.json()
+        self.assertIn("artifact", payload)
+        self.assertIn("outputs", payload)
+        self.assertIn("runtime", payload)
+
+        artifact = payload["artifact"]
+        self.assertTrue(artifact["artifactId"].startswith("vid-"))
+        self.assertEqual(artifact["modelId"], "Wan-AI/Wan2.1-T2V-1.3B-Diffusers")
+        self.assertEqual(artifact["prompt"], self._payload()["prompt"])
+        self.assertEqual(artifact["fps"], 24)
+        self.assertEqual(artifact["numFrames"], 24)
+        self.assertEqual(artifact["steps"], 20)
+        self.assertEqual(artifact["seed"], 42)
+        self.assertEqual(artifact["videoMimeType"], "video/mp4")
+        self.assertEqual(artifact["videoExtension"], "mp4")
+        self.assertTrue(artifact["videoPath"], "videoPath should be set after save")
+        self.assertTrue(artifact["metadataPath"], "metadataPath should be set after save")
+
+        # Bytes actually landed on disk in the day-bucketed directory.
+        video_path = Path(artifact["videoPath"])
+        self.assertTrue(video_path.exists(), f"video not written to {video_path}")
+        self.assertEqual(video_path.read_bytes(), self.FAKE_MP4_BYTES)
+
+        # The outputs list in the response mirrors the newly saved artifact.
+        self.assertEqual(len(payload["outputs"]), 1)
+        self.assertEqual(payload["outputs"][0]["artifactId"], artifact["artifactId"])
+
+        # And a fresh GET /outputs finds it too — proving metadata persisted.
+        listing = self.client.get("/api/video/outputs").json()["outputs"]
+        self.assertEqual(len(listing), 1)
+        self.assertEqual(listing[0]["artifactId"], artifact["artifactId"])
+
+    def test_generate_then_stream_file_then_delete_round_trip(self):
+        state = self.client.app.state.chaosengine
+        state.video_runtime.generate = mock.MagicMock(  # type: ignore[method-assign]
+            return_value=(
+                self._fake_generated_video(),
+                {
+                    "activeEngine": "diffusers",
+                    "realGenerationAvailable": True,
+                    "message": "Ready",
+                    "missingDependencies": [],
+                },
+            )
+        )
+
+        with mock.patch.object(
+            video_routes,
+            "_video_variant_available_locally",
+            return_value=True,
+        ):
+            generate_resp = self.client.post("/api/video/generate", json=self._payload())
+        self.assertEqual(generate_resp.status_code, 200, generate_resp.text)
+        artifact_id = generate_resp.json()["artifact"]["artifactId"]
+
+        # Detail endpoint returns the freshly saved artifact.
+        detail_resp = self.client.get(f"/api/video/outputs/{artifact_id}")
+        self.assertEqual(detail_resp.status_code, 200)
+        self.assertEqual(detail_resp.json()["artifact"]["artifactId"], artifact_id)
+
+        # File endpoint streams the mp4 bytes through with the right headers.
+        file_resp = self.client.get(f"/api/video/outputs/{artifact_id}/file")
+        self.assertEqual(file_resp.status_code, 200)
+        self.assertEqual(file_resp.headers["content-type"], "video/mp4")
+        self.assertEqual(file_resp.content, self.FAKE_MP4_BYTES)
+
+        # Delete clears it and the listing goes empty again.
+        delete_resp = self.client.delete(f"/api/video/outputs/{artifact_id}")
+        self.assertEqual(delete_resp.status_code, 200)
+        self.assertEqual(delete_resp.json()["deleted"], artifact_id)
+        self.assertEqual(delete_resp.json()["outputs"], [])
+
+        # After delete, follow-up GETs surface 404 for detail and file.
+        self.assertEqual(
+            self.client.get(f"/api/video/outputs/{artifact_id}").status_code,
+            404,
+        )
+        self.assertEqual(
+            self.client.get(f"/api/video/outputs/{artifact_id}/file").status_code,
+            404,
+        )
+
+    def test_generate_surfaces_runtime_error_as_400(self):
+        state = self.client.app.state.chaosengine
+        state.video_runtime.generate = mock.MagicMock(  # type: ignore[method-assign]
+            side_effect=RuntimeError("Pipeline did not return any frames."),
+        )
+        with mock.patch.object(
+            video_routes,
+            "_video_variant_available_locally",
+            return_value=True,
+        ):
+            response = self.client.post("/api/video/generate", json=self._payload())
+        self.assertEqual(response.status_code, 400)
+        self.assertIn("Pipeline did not return any frames", response.json()["detail"])
+
+    def test_delete_nonexistent_output_returns_404(self):
+        response = self.client.delete("/api/video/outputs/vid-doesnotexist")
+        self.assertEqual(response.status_code, 404)
+
+
+class VideoOutputFileMissingTests(unittest.TestCase):
+    """Cover the 410 Gone path when metadata exists but the mp4 is gone."""
+
+    def setUp(self) -> None:
+        self.client, self.tempdir, self.env_snapshot = make_client()
+        self.outputs_dir = Path(self.tempdir.name) / "video-outputs"
+        self.outputs_dir.mkdir(parents=True, exist_ok=True)
+        self._original_outputs_dir = app_module.VIDEO_OUTPUTS_DIR
+        app_module.VIDEO_OUTPUTS_DIR = self.outputs_dir
+
+    def tearDown(self) -> None:
+        app_module.VIDEO_OUTPUTS_DIR = self._original_outputs_dir
+        restore_env(self.env_snapshot)
+        self.tempdir.cleanup()
+
+    def test_file_endpoint_returns_410_when_mp4_missing_on_disk(self):
+        # Seed metadata that points at a non-existent file.
+        day = self.outputs_dir / "2026-04-18"
+        day.mkdir(parents=True, exist_ok=True)
+        stub_id = "vid-ghost123abc"
+        metadata_path = day / f"{stub_id}.json"
+        fake_video_path = day / f"{stub_id}.mp4"
+        import json
+        metadata_path.write_text(
+            json.dumps(
+                {
+                    "artifactId": stub_id,
+                    "modelId": "Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
+                    "modelName": "Wan 2.1 T2V 1.3B",
+                    "prompt": "orphaned metadata",
+                    "width": 768,
+                    "height": 512,
+                    "numFrames": 24,
+                    "fps": 24,
+                    "steps": 20,
+                    "guidance": 3.0,
+                    "seed": 7,
+                    "createdAt": "2026-04-18T00:00:00Z",
+                    "durationSeconds": 3.5,
+                    "clipDurationSeconds": 1.0,
+                    "videoPath": str(fake_video_path),
+                    "videoMimeType": "video/mp4",
+                    "videoExtension": "mp4",
+                    "runtimeLabel": "test",
+                }
+            )
+        )
+        # NOTE: we deliberately do NOT create fake_video_path.
+
+        resp = self.client.get(f"/api/video/outputs/{stub_id}/file")
+        self.assertEqual(resp.status_code, 410)
+
+
+class VideoDownloadRouteTests(unittest.TestCase):
+    """Contract tests for /api/video/download* endpoints.
+
+    We deliberately do not exercise a real HF snapshot_download from a unit
+    test — that's an integration test path, taken by pulling ``Wan-AI/Wan2.1-T2V-1.3B``
+    via the real endpoint. These tests assert the validation surface and the
+    shape of the ``not_found`` / ``deleted`` paths which can be exercised
+    without any network or subprocess activity.
+    """
+
+    def setUp(self) -> None:
+        self.client, self.tempdir, self.env_snapshot = make_client()
+
+    def tearDown(self) -> None:
+        restore_env(self.env_snapshot)
+        self.tempdir.cleanup()
+
+    def test_download_rejects_unknown_repo(self):
+        response = self.client.post("/api/video/download", json={"repo": "ghost/nope"})
+        self.assertEqual(response.status_code, 404)
+        self.assertIn("video model catalog", response.json()["detail"])
+
+    def test_download_requires_repo_field(self):
+        response = self.client.post("/api/video/download", json={})
+        # Pydantic validation rejects the missing repo field before the route runs.
+        self.assertEqual(response.status_code, 422)
+
+    def test_download_status_is_empty_when_nothing_is_downloading(self):
+        response = self.client.get("/api/video/download/status")
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json(), {"downloads": []})
+
+    def test_download_status_filters_out_non_video_repos(self):
+        # Seed an unrelated repo directly in state — /api/video/download/status
+        # should ignore it so the Video UI never sees image/text download rows.
+        state = self.client.app.state.chaosengine
+        state._downloads["fake/other-repo"] = {
+            "repo": "fake/other-repo",
+            "state": "downloading",
+            "progress": 0.5,
+            "downloadedGb": 1.0,
+            "totalGb": 2.0,
+            "error": None,
+        }
+        response = self.client.get("/api/video/download/status")
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json(), {"downloads": []})
+
+    def test_download_status_surfaces_video_repos(self):
+        # Seed a valid video repo directly so we don't need to call the real
+        # start_download (which would hit Hugging Face). The filter must keep it.
+        state = self.client.app.state.chaosengine
+        state._downloads["Wan-AI/Wan2.1-T2V-1.3B-Diffusers"] = {
+            "repo": "Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
+            "state": "downloading",
+            "progress": 0.25,
+            "downloadedGb": 0.6,
+            "totalGb": 2.5,
+            "error": None,
+        }
+        response = self.client.get("/api/video/download/status")
+        self.assertEqual(response.status_code, 200)
+        downloads = response.json()["downloads"]
+        self.assertEqual(len(downloads), 1)
+        self.assertEqual(downloads[0]["repo"], "Wan-AI/Wan2.1-T2V-1.3B-Diffusers")
+
+    def test_cancel_rejects_repo_outside_video_catalog(self):
+        response = self.client.post(
+            "/api/video/download/cancel",
+            json={"repo": "ghost/nope"},
+        )
+        self.assertEqual(response.status_code, 404)
+
+    def test_cancel_known_but_not_downloading_returns_not_found_state(self):
+        response = self.client.post(
+            "/api/video/download/cancel",
+            json={"repo": "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"},
+        )
+        self.assertEqual(response.status_code, 200)
+        download = response.json()["download"]
+        self.assertEqual(download["state"], "not_found")
+
+    def test_delete_rejects_repo_outside_video_catalog(self):
+        response = self.client.post(
+            "/api/video/download/delete",
+            json={"repo": "ghost/nope"},
+        )
+        self.assertEqual(response.status_code, 404)
+
+    def test_delete_known_but_not_downloaded_is_noop(self):
+        response = self.client.post(
+            "/api/video/download/delete",
+            json={"repo": "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"},
+        )
+        self.assertEqual(response.status_code, 200)
+        result = response.json()["result"]
+        self.assertEqual(result["state"], "not_found")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_video_runtime.py b/tests/test_video_runtime.py
new file mode 100644
index 0000000..fd78ac7
--- /dev/null
+++ b/tests/test_video_runtime.py
@@ -0,0 +1,501 @@
+"""Unit tests for ``backend_service.video_runtime``.
+
+We never actually load a video model in tests (those weights are 10-25GB).
+The tests exercise the surface logic: probe dependency detection, pipeline
+class registry routing, preload/unload lifecycle via monkey-patched seams.
+"""
+
+from __future__ import annotations
+
+import unittest
+from types import SimpleNamespace
+from unittest import mock
+
+from backend_service import video_runtime
+from backend_service.video_runtime import (
+    DiffusersVideoEngine,
+    GeneratedVideo,
+    PIPELINE_REGISTRY,
+    VideoGenerationConfig,
+    VideoRuntimeManager,
+    VideoRuntimeStatus,
+)
+
+
+class ProbeTests(unittest.TestCase):
+    def setUp(self):
+        # Bypass the Windows cold-start warmup gate in probe(). The gate
+        # returns "initializing" early when torch hasn't been imported yet
+        # so the HTTP request stays under the frontend's 30s fetch budget —
+        # but in tests we want to exercise the ``_find_missing`` branch
+        # directly, so we pretend torch is ready.
+        self._warmup_patch = mock.patch.object(
+            video_runtime,
+            "torch_warmup_status",
+            return_value={"status": "ready", "error": None, "started_at": 0.0},
+        )
+        self._warmup_patch.start()
+
+    def tearDown(self):
+        self._warmup_patch.stop()
+
+    def test_probe_flags_missing_core_deps_as_unavailable(self):
+        engine = DiffusersVideoEngine()
+        # Simulate a machine with no diffusers/torch installed. Three calls
+        # to ``_find_missing`` now: core, output, model-specific (tiktoken etc.).
+        with mock.patch.object(
+            video_runtime,
+            "_find_missing",
+            side_effect=[
+                ["diffusers", "torch"],
+                ["imageio", "imageio-ffmpeg"],
+                ["tiktoken", "sentencepiece"],
+            ],
+        ):
+            status = engine.probe()
+        self.assertIsInstance(status, VideoRuntimeStatus)
+        self.assertFalse(status.realGenerationAvailable)
+        self.assertEqual(status.activeEngine, "placeholder")
+        # All missing deps (core + output + model) surface in the list for a
+        # single clear install hint.
+        self.assertIn("diffusers", status.missingDependencies)
+        self.assertIn("torch", status.missingDependencies)
+        self.assertIn("imageio", status.missingDependencies)
+        self.assertIn("tiktoken", status.missingDependencies)
+
+    def test_probe_reports_ready_when_all_deps_and_torch_import_cleanly(self):
+        engine = DiffusersVideoEngine()
+
+        # Core deps present, output deps present, model deps present.
+        with mock.patch.object(video_runtime, "_find_missing", return_value=[]):
+            status = engine.probe()
+
+        self.assertTrue(status.realGenerationAvailable)
+        self.assertEqual(status.activeEngine, "diffusers")
+        self.assertIn(status.device, {"cuda", "mps", "cpu"})
+        self.assertEqual(status.missingDependencies, [])
+
+    def test_probe_reports_ready_but_warns_when_only_output_deps_missing(self):
+        engine = DiffusersVideoEngine()
+
+        with mock.patch.object(
+            video_runtime,
+            "_find_missing",
+            # First call: core deps — all present.
+            # Second call: output deps — imageio missing.
+            # Third call: model deps — all present.
+            side_effect=[[], ["imageio", "imageio-ffmpeg"], []],
+        ):
+            status = engine.probe()
+
+        self.assertTrue(status.realGenerationAvailable)
+        self.assertEqual(status.activeEngine, "diffusers")
+        self.assertIn("imageio", status.missingDependencies)
+        self.assertIn("mp4", status.message.lower())
+
+    def test_probe_reports_ready_but_warns_when_only_model_deps_missing(self):
+        """LTX-Video and friends need tiktoken / sentencepiece — surface them."""
+        engine = DiffusersVideoEngine()
+
+        with mock.patch.object(
+            video_runtime,
+            "_find_missing",
+            # Core present, output present, only model deps missing.
+            side_effect=[[], [], ["tiktoken"]],
+        ):
+            status = engine.probe()
+
+        self.assertTrue(status.realGenerationAvailable)
+        self.assertIn("tiktoken", status.missingDependencies)
+        # Message should mention tokenizer packages so the user knows why
+        # it's flagged even though the engine is "ready".
+        self.assertIn("tokenizer", status.message.lower())
+
+    def test_probe_lists_both_output_and_model_deps_when_both_missing(self):
+        """A fresh install often misses both buckets — surface them together."""
+        engine = DiffusersVideoEngine()
+
+        with mock.patch.object(
+            video_runtime,
+            "_find_missing",
+            side_effect=[
+                [],
+                ["imageio-ffmpeg"],
+                ["tiktoken", "sentencepiece"],
+            ],
+        ):
+            status = engine.probe()
+
+        self.assertTrue(status.realGenerationAvailable)
+        self.assertIn("imageio-ffmpeg", status.missingDependencies)
+        self.assertIn("tiktoken", status.missingDependencies)
+        self.assertIn("sentencepiece", status.missingDependencies)
+
+    def test_probe_returns_initializing_fast_path_when_warmup_in_progress(self):
+        """Windows cold-disk fix: probe must return BEFORE find_spec runs when
+        the warmup worker is still loading torch, so the fetch stays under
+        the frontend's 30s timeout even on slow disks."""
+        import sys as _sys
+        engine = DiffusersVideoEngine()
+        saved_torch = _sys.modules.pop("torch", None)
+        # Override the setUp "ready" stub for this test so we exercise the
+        # in-progress branch.
+        self._warmup_patch.stop()
+        try:
+            with mock.patch.object(
+                video_runtime,
+                "torch_warmup_status",
+                return_value={"status": "in_progress", "error": None, "started_at": 0.0},
+            ):
+                # _find_missing must NOT be called — that's the whole point.
+                with mock.patch.object(
+                    video_runtime,
+                    "_find_missing",
+                    side_effect=AssertionError("_find_missing ran before warmup fast-path"),
+                ):
+                    status = engine.probe()
+        finally:
+            if saved_torch is not None:
+                _sys.modules["torch"] = saved_torch
+            self._warmup_patch.start()
+
+        self.assertEqual(status.activeEngine, "initializing")
+        self.assertFalse(status.realGenerationAvailable)
+        self.assertIn("PyTorch is still loading", status.message)
+
+
+class PipelineRegistryTests(unittest.TestCase):
+    def test_registry_covers_all_first_wave_engines(self):
+        expected = {
+            "Lightricks/LTX-Video",
+            "genmo/mochi-1-preview",
+            "Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
+            "Wan-AI/Wan2.1-T2V-14B-Diffusers",
+            "Wan-AI/Wan2.2-T2V-A14B-Diffusers",
+            "hunyuanvideo-community/HunyuanVideo",
+            "THUDM/CogVideoX-2b",
+            "THUDM/CogVideoX-5b",
+        }
+        self.assertEqual(set(PIPELINE_REGISTRY.keys()), expected)
+        for entry in PIPELINE_REGISTRY.values():
+            self.assertIn("class_name", entry)
+            self.assertEqual(entry["task"], "txt2video")
+
+    def test_wan_variants_all_route_to_wanpipeline(self):
+        """Wan 2.1 and 2.2 use the same pipeline class — version difference is in the weights."""
+        wan_repos = [repo for repo in PIPELINE_REGISTRY if repo.startswith("Wan-AI/")]
+        self.assertGreaterEqual(len(wan_repos), 3, "expected 1.3B, 14B, and A14B Wan entries")
+        for repo in wan_repos:
+            self.assertEqual(PIPELINE_REGISTRY[repo]["class_name"], "WanPipeline")
+
+    def test_cogvideox_variants_all_route_to_cogvideoxpipeline(self):
+        """CogVideoX 2B and 5B share the same diffusers pipeline class."""
+        cog_repos = [repo for repo in PIPELINE_REGISTRY if repo.startswith("THUDM/CogVideoX")]
+        self.assertGreaterEqual(len(cog_repos), 2, "expected 2B and 5B CogVideoX entries")
+        for repo in cog_repos:
+            self.assertEqual(PIPELINE_REGISTRY[repo]["class_name"], "CogVideoXPipeline")
+
+    def test_pipeline_class_raises_for_unknown_repo(self):
+        engine = DiffusersVideoEngine()
+        with self.assertRaises(RuntimeError) as ctx:
+            engine._pipeline_class("not-a-real/repo")
+        self.assertIn("not-a-real/repo", str(ctx.exception))
+
+    def test_pipeline_class_resolves_known_repo(self):
+        """Sanity-check: diffusers 0.37+ actually exposes the classes we registered."""
+        engine = DiffusersVideoEngine()
+        for repo, entry in PIPELINE_REGISTRY.items():
+            try:
+                pipeline_cls = engine._pipeline_class(repo)
+            except RuntimeError:
+                # Older diffusers — the runtime itself handles this case with a
+                # helpful error. Skip individual pipeline class assertions on
+                # older builds rather than failing the test suite.
+                continue
+            self.assertEqual(pipeline_cls.__name__, entry["class_name"])
+
+
+class PreloadLifecycleTests(unittest.TestCase):
+    def test_preload_reports_ready_status_after_ensure_pipeline(self):
+        engine = DiffusersVideoEngine()
+        stub_pipeline = mock.MagicMock()
+        with mock.patch.object(engine, "_ensure_pipeline", return_value=stub_pipeline) as ensure:
+            with mock.patch.object(video_runtime, "_find_missing", return_value=[]):
+                status = engine.preload("Lightricks/LTX-Video")
+        ensure.assert_called_once_with("Lightricks/LTX-Video")
+        self.assertTrue(status.realGenerationAvailable)
+
+    def test_unload_without_active_pipeline_is_noop(self):
+        engine = DiffusersVideoEngine()
+        with mock.patch.object(video_runtime, "_find_missing", return_value=[]):
+            status = engine.unload()
+        self.assertIsNone(status.loadedModelRepo)
+
+    def test_unload_with_mismatched_repo_is_noop(self):
+        engine = DiffusersVideoEngine()
+        engine._pipeline = mock.MagicMock()
+        engine._loaded_repo = "Lightricks/LTX-Video"
+        engine._torch = SimpleNamespace(cuda=SimpleNamespace(is_available=lambda: False))
+        engine._device = "cpu"
+        with mock.patch.object(video_runtime, "_find_missing", return_value=[]):
+            status = engine.unload("tencent/HunyuanVideo")
+        # Engine should NOT have cleared state since the repo didn't match.
+        self.assertEqual(status.loadedModelRepo, "Lightricks/LTX-Video")
+        self.assertIsNotNone(engine._pipeline)
+
+    def test_unload_clears_state_when_repo_matches(self):
+        engine = DiffusersVideoEngine()
+        engine._pipeline = mock.MagicMock()
+        engine._loaded_repo = "Lightricks/LTX-Video"
+        engine._torch = SimpleNamespace(
+            cuda=SimpleNamespace(is_available=lambda: False),
+            mps=None,
+        )
+        engine._device = "cpu"
+        with mock.patch.object(video_runtime, "_find_missing", return_value=[]):
+            status = engine.unload("Lightricks/LTX-Video")
+        self.assertIsNone(status.loadedModelRepo)
+        self.assertIsNone(engine._pipeline)
+        self.assertIsNone(engine._loaded_repo)
+
+
+class VideoRuntimeManagerTests(unittest.TestCase):
+    def test_capabilities_delegates_to_engine_probe(self):
+        manager = VideoRuntimeManager()
+        with mock.patch.object(video_runtime, "_find_missing", return_value=[]):
+            capabilities = manager.capabilities()
+        self.assertIn("realGenerationAvailable", capabilities)
+        self.assertIn("activeEngine", capabilities)
+
+    def test_preload_raises_when_runtime_is_not_ready(self):
+        manager = VideoRuntimeManager()
+        with mock.patch.object(
+            video_runtime,
+            "_find_missing",
+            side_effect=[["diffusers"], [], []],
+        ):
+            with self.assertRaises(RuntimeError) as ctx:
+                manager.preload("Lightricks/LTX-Video")
+        self.assertIn("diffusers", str(ctx.exception).lower())
+
+
+class GenerationConfigTests(unittest.TestCase):
+    def test_config_is_frozen_dataclass_with_expected_fields(self):
+        cfg = VideoGenerationConfig(
+            modelId="Lightricks/LTX-Video",
+            modelName="LTX-Video",
+            repo="Lightricks/LTX-Video",
+            prompt="a cat riding a bike",
+            negativePrompt="",
+            width=768,
+            height=512,
+            numFrames=97,
+            fps=24,
+            guidance=3.0,
+            seed=42,
+        )
+        self.assertEqual(cfg.prompt, "a cat riding a bike")
+        self.assertEqual(cfg.seed, 42)
+        # Frozen dataclass: mutation should raise.
+        with self.assertRaises(Exception):
+            cfg.seed = 7  # type: ignore[misc]
+
+
+class GenerateTests(unittest.TestCase):
+    """Exercise ``DiffusersVideoEngine.generate()`` without loading real weights.
+
+    We stub the three heavy seams: ``_ensure_pipeline`` (weight loading),
+    ``_invoke_pipeline`` (the actual diffusion pass), and ``_encode_frames_to_mp4``
+    (ffmpeg muxing). The engine's own orchestration logic — seed resolution,
+    generator construction, kwarg building, timing, frame-count validation —
+    is what we actually want to test here.
+    """
+
+    FAKE_BYTES = b"\x00\x00\x00\x20ftypmp42" + b"\x00" * 48
+
+    def _config(self, seed: int | None = 42) -> VideoGenerationConfig:
+        return VideoGenerationConfig(
+            modelId="Lightricks/LTX-Video",
+            modelName="LTX-Video",
+            repo="Lightricks/LTX-Video",
+            prompt="a cinematic shot of a misty forest",
+            negativePrompt="blurry, low quality",
+            width=768,
+            height=512,
+            numFrames=24,
+            fps=24,
+            steps=20,
+            guidance=3.0,
+            seed=seed,
+        )
+
+    def _install_torch_shim(self, engine: DiffusersVideoEngine) -> mock.MagicMock:
+        """Install a minimal torch shim on the engine so generate() can build a Generator."""
+        generator_instance = mock.MagicMock(name="torch_generator")
+        generator_instance.manual_seed.return_value = generator_instance
+        generator_cls = mock.MagicMock(return_value=generator_instance)
+        torch_shim = SimpleNamespace(
+            Generator=generator_cls,
+            cuda=SimpleNamespace(is_available=lambda: False, empty_cache=lambda: None),
+        )
+        engine._torch = torch_shim  # type: ignore[assignment]
+        engine._device = "cpu"
+        return generator_cls
+
+    def test_generate_happy_path_returns_generated_video_with_real_metadata(self):
+        engine = DiffusersVideoEngine()
+        generator_cls = self._install_torch_shim(engine)
+
+        fake_pipeline = mock.MagicMock(name="pipeline")
+        fake_frames = [mock.MagicMock(name=f"frame_{idx}") for idx in range(24)]
+
+        with mock.patch.object(video_runtime, "_find_missing", return_value=[]), \
+                mock.patch.object(engine, "_ensure_pipeline", return_value=fake_pipeline), \
+                mock.patch.object(engine, "_invoke_pipeline", return_value=fake_frames) as invoke, \
+                mock.patch.object(
+                    engine,
+                    "_encode_frames_to_mp4",
+                    return_value=self.FAKE_BYTES,
+                ) as encode:
+            result = engine.generate(self._config(seed=42))
+
+        self.assertIsInstance(result, GeneratedVideo)
+        self.assertEqual(result.seed, 42)
+        self.assertEqual(result.bytes, self.FAKE_BYTES)
+        self.assertEqual(result.frameCount, 24)
+        self.assertEqual(result.fps, 24)
+        self.assertEqual(result.width, 768)
+        self.assertEqual(result.height, 512)
+        self.assertEqual(result.mimeType, "video/mp4")
+        self.assertEqual(result.extension, "mp4")
+        self.assertIn("cpu", result.runtimeLabel)
+        self.assertGreater(result.durationSeconds, 0.0)
+
+        # Confirm the generator was constructed on CPU (MPS fallback) and seeded
+        # with the provided value so callers can actually reproduce a run.
+        generator_cls.assert_called_once()
+        invoke.assert_called_once()
+        encode.assert_called_once()
+
+    def test_generate_resolves_random_seed_when_none_provided(self):
+        engine = DiffusersVideoEngine()
+        self._install_torch_shim(engine)
+
+        with mock.patch.object(video_runtime, "_find_missing", return_value=[]), \
+                mock.patch.object(engine, "_ensure_pipeline", return_value=mock.MagicMock()), \
+                mock.patch.object(
+                    engine,
+                    "_invoke_pipeline",
+                    return_value=[mock.MagicMock() for _ in range(24)],
+                ), \
+                mock.patch.object(
+                    engine,
+                    "_encode_frames_to_mp4",
+                    return_value=self.FAKE_BYTES,
+                ):
+            result = engine.generate(self._config(seed=None))
+
+        # No seed in the config -> engine must pick a deterministic integer so
+        # the user can see it in the UI and reproduce the render.
+        self.assertIsInstance(result.seed, int)
+        self.assertGreaterEqual(result.seed, 0)
+        self.assertLess(result.seed, 2**31)
+
+    def test_generate_rejects_empty_frame_output(self):
+        engine = DiffusersVideoEngine()
+        self._install_torch_shim(engine)
+
+        with mock.patch.object(video_runtime, "_find_missing", return_value=[]), \
+                mock.patch.object(engine, "_ensure_pipeline", return_value=mock.MagicMock()), \
+                mock.patch.object(engine, "_invoke_pipeline", return_value=[]):
+            with self.assertRaises(RuntimeError) as ctx:
+                engine.generate(self._config())
+        self.assertIn("zero frames", str(ctx.exception).lower())
+
+    def test_generate_raises_when_output_deps_missing(self):
+        engine = DiffusersVideoEngine()
+
+        # First _find_missing call (in preload path during _ensure_pipeline) we bypass by
+        # mocking _ensure_pipeline itself. The call we care about is the output-deps
+        # check at the top of generate(): imageio / imageio-ffmpeg missing.
+        with mock.patch.object(
+            video_runtime,
+            "_find_missing",
+            return_value=["imageio", "imageio-ffmpeg"],
+        ):
+            with self.assertRaises(RuntimeError) as ctx:
+                engine.generate(self._config())
+        self.assertIn("imageio", str(ctx.exception).lower())
+
+
+class VideoRuntimeManagerGenerateTests(unittest.TestCase):
+    """Exercise the ``VideoRuntimeManager.generate`` facade without any pipeline."""
+
+    def test_generate_raises_when_runtime_unavailable(self):
+        manager = VideoRuntimeManager()
+        # Core deps missing -> probe() reports not ready -> facade must refuse.
+        with mock.patch.object(
+            video_runtime,
+            "_find_missing",
+            side_effect=[["diffusers", "torch"], ["imageio"], []],
+        ):
+            with self.assertRaises(RuntimeError):
+                manager.generate(
+                    VideoGenerationConfig(
+                        modelId="Lightricks/LTX-Video",
+                        modelName="LTX-Video",
+                        repo="Lightricks/LTX-Video",
+                        prompt="test",
+                        negativePrompt="",
+                        width=768,
+                        height=512,
+                        numFrames=24,
+                        fps=24,
+                        guidance=3.0,
+                        seed=1,
+                    )
+                )
+
+    def test_generate_returns_video_and_runtime_dict(self):
+        manager = VideoRuntimeManager()
+        fake_video = GeneratedVideo(
+            seed=123,
+            bytes=b"fake-mp4",
+            extension="mp4",
+            mimeType="video/mp4",
+            durationSeconds=2.5,
+            frameCount=24,
+            fps=24,
+            width=768,
+            height=512,
+            runtimeLabel="test",
+            runtimeNote=None,
+        )
+        with mock.patch.object(video_runtime, "_find_missing", return_value=[]), \
+                mock.patch.object(
+                    manager._engine,
+                    "generate",
+                    return_value=fake_video,
+                ):
+            video, runtime = manager.generate(
+                VideoGenerationConfig(
+                    modelId="Lightricks/LTX-Video",
+                    modelName="LTX-Video",
+                    repo="Lightricks/LTX-Video",
+                    prompt="test",
+                    negativePrompt="",
+                    width=768,
+                    height=512,
+                    numFrames=24,
+                    fps=24,
+                    guidance=3.0,
+                    seed=123,
+                )
+            )
+        self.assertIs(video, fake_video)
+        self.assertIn("realGenerationAvailable", runtime)
+        self.assertTrue(runtime["realGenerationAvailable"])
+
+
+if __name__ == "__main__":
+    unittest.main()