cryptopoly · cryptopoly · Apr 20, 2026 · Apr 18, 2026 · Apr 18, 2026 · Apr 18, 2026
diff --git a/.gitignore b/.gitignore
@@ -10,6 +10,7 @@ src-tauri/target/
 src-tauri/resources/embedded/
 .runtime-stage/
 releases/
+assets/
 src-tauri/gen/
 .env
 .env.local

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,14 @@
 # Changelog
 
+## v0.6.0 - 2026-04-19
+
+- Renamed the local `compression/` package to `cache_compression/` so it no longer shadows Python 3.14's PEP 784 stdlib `compression` namespace package. Fixes a `ModuleNotFoundError: No module named 'compression._common'` surfacing on Windows with Python 3.14 when PyTorch's import chain reached into the shadowed package.
+- Made the My Models library RAM estimate use the actual on-disk size + KV cache heuristic instead of the catalog flagship's `estimatedMemoryGb`, so differently-sized variants of the same family no longer all render as the same ~76 GB value. Added a parallel compressed-cache estimate for the Compressed column.
+- Video diffusion models (HunyuanVideo, Mochi, Wan2.x, LTX-Video, CogVideo, etc.) are now tagged `modelType="video"` during discovery and kept out of the chat-oriented My Models list and chat picker. They continue to surface under the dedicated Video section.
+- Video-gen memory safety now includes the model footprint (with device-class fragmentation factors) in the safety verdict, preventing the 40-frame Wan 2.1 T2V 1.3B MPS crash on 64 GB Macs.
+- Hardened Windows staging: `scripts/stage-runtime.mjs` now clears read-only attributes and retries on transient EPERM/EBUSY during `.runtime-stage` cleanup, and skips the dev-mode tar archive that Tauri ignores anyway. `build.ps1` pre-clears stale staging and installs the project via `pip install -e ".[desktop,images]"` so strict validation has its required extras.
+- Bumped the application version to `0.6.0` across the npm, Python, and Tauri package metadata.
+
 ## v0.5.3 - 2026-04-18
 
 - Fixed the GitHub Actions release workflow to use the valid `includeUpdaterJson` input for `tauri-apps/tauri-action@v0.6.0`, removing the repeated `uploadUpdaterJson` warnings from release builds.

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -7,7 +7,7 @@ ChaosEngineAI is a desktop AI inference app built with:
 - **Desktop shell**: Tauri (Rust) — `src-tauri/`
 - **Backend**: Python FastAPI sidecar — `backend_service/`
 - **Inference engines**: MLX (Apple Silicon), llama.cpp (GGUF), vLLM (CUDA)
-- **Cache strategies**: Pluggable compression via `compression/` registry
+- **Cache strategies**: Pluggable compression via `cache_compression/` registry
 
 ### Key Directories
 
@@ -19,7 +19,7 @@ ChaosEngineAI is a desktop AI inference app built with:
 | `backend_service/inference.py` | Core inference engine — model loading, binary routing, generation |
 | `backend_service/routes/` | API endpoints (14 route modules) |
 | `backend_service/helpers/` | System stats, settings, persistence, cache estimation |
-| `compression/` | Cache strategy registry + adapters (native, rotorquant, turboquant, chaosengine, triattention) |
+| `cache_compression/` | Cache strategy registry + adapters (native, rotorquant, turboquant, chaosengine, triattention). Renamed from `compression/` so it doesn't shadow Python 3.14's stdlib `compression` namespace package. |
 | `dflash/` | DFlash speculative decoding — draft model registry + availability detection |
 | `scripts/` | Build, install, and update scripts |
 | `tests/` | Python tests (pytest) |
@@ -57,10 +57,11 @@ Check for updates to external repos we build from or depend on:
 | llama.cpp (standard) | `ggml-org/llama.cpp` | `master` | `git -C ../llama.cpp fetch && git -C ../llama.cpp log HEAD..origin/master --oneline` |
 | llama-server-turbo | `TheTom/llama-cpp-turboquant` | `feature/turboquant-kv-cache` | `git ls-remote https://github.com/TheTom/llama-cpp-turboquant.git refs/heads/feature/turboquant-kv-cache` |
 | ChaosEngine | `cryptopoly/ChaosEngine` | `main` | `git -C vendor/ChaosEngine fetch && git -C vendor/ChaosEngine log HEAD..origin/main --oneline` |
-| dflash-mlx | `bstnxbt/dflash-mlx` | — | `.venv/bin/pip index versions dflash-mlx 2>/dev/null` |
+| dflash-mlx | `bstnxbt/dflash-mlx` | `main` pinned to commit `f825ffb2` (upstream deleted all tags April 2026) | `git ls-remote https://github.com/bstnxbt/dflash-mlx.git refs/heads/main` |
 | turboquant | `back2matching/turboquant` | — | `.venv/bin/pip index versions turboquant 2>/dev/null` |
-| turboquant-mlx | `sharpner/turboquant-mlx` | — | `.venv/bin/pip index versions turboquant-mlx 2>/dev/null` |
+| turboquant-mlx | `arozanov/turboquant-mlx` | — | `.venv/bin/pip index versions turboquant-mlx 2>/dev/null` |
 | turboquant-mlx-full | `helgklaizar/turboquant_mlx` | — | `.venv/bin/pip index versions turboquant-mlx-full 2>/dev/null` |
+| DDTree (ported algorithm) | `liranringel/ddtree` | `main` | `git ls-remote https://github.com/liranringel/ddtree.git HEAD` |
 
 ### 4. Cache Strategy Health
 - [ ] ChaosEngine `llama_cpp_cache_flags()` only emits standard types: `f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1`
@@ -81,7 +82,7 @@ Check for updates to external repos we build from or depend on:
 
 | Area | Test File(s) | Command |
 |------|-------------|---------|
-| Cache strategies (`compression/`) | `test_cache_strategies.py` | `pytest tests/test_cache_strategies.py -v` |
+| Cache strategies (`cache_compression/`) | `test_cache_strategies.py` | `pytest tests/test_cache_strategies.py -v` |
 | DFlash / speculative decoding | `test_dflash.py` | `pytest tests/test_dflash.py -v` |
 | Inference / llama.cpp / binary routing | `test_inference.py` | `pytest tests/test_inference.py -v` |
 | Setup routes / install endpoints | `test_setup_routes.py` | `pytest tests/test_setup_routes.py -v` |

diff --git a/README.md b/README.md
@@ -298,7 +298,7 @@ ChaosEngineAI is three cooperating layers:
 - **`src-tauri/`** — Tauri 2 Rust shell + bundled runtime.
 - **`backend_service/`** — Python service that owns model lifecycle, the warm pool, the OpenAI-compatible API, the benchmark runner, and speculative decoding (DFlash + DDTree).
 - **`backend_service/routes/`** — FastAPI routes for chat, prompts, compare mode, benchmarks, plugins, images, server controls, and settings.
-- **`compression/`** — Pluggable cache/compression strategy system. Ships with native f16 and optional adapters for [RotorQuant](https://github.com/scrya-com/rotorquant), [TriAttention](https://github.com/WeianMao/triattention), [TurboQuant](https://pypi.org/project/turboquant-mlx/), and [ChaosEngine](https://github.com/cryptopoly/ChaosEngine).
+- **`cache_compression/`** — Pluggable cache/compression strategy system. Ships with native f16 and optional adapters for [RotorQuant](https://github.com/scrya-com/rotorquant), [TriAttention](https://github.com/WeianMao/triattention), [TurboQuant](https://pypi.org/project/turboquant-mlx/), and [ChaosEngine](https://github.com/cryptopoly/ChaosEngine).
 - **`dflash/`** — DFlash speculative decoding integration: draft model registry, fuzzy matching for quantized variants, MLX and vLLM backend detection.
 
 ---
@@ -317,7 +317,7 @@ ChaosEngineAI uses a pluggable cache strategy system. Out of the box, models run
 
 Install optional backends into the backend runtime (`./.venv/bin/python3 -m pip install ...`), then restart ChaosEngineAI. TriAttention is Linux/CUDA only, the current PyPI `turboquant-mlx` package may still leave TurboQuant disabled in the current build, and ChaosEngine can now be bundled directly into desktop builds by checking out `vendor/ChaosEngine` (or setting `CHAOSENGINE_VENDOR_PATH`) before `npm run stage:runtime`. Source/dev installs can still use the local editable install from GitHub.
 
-The system is designed so new compression methods can be added as single-file adapters in `compression/` without touching any other code.
+The system is designed so new compression methods can be added as single-file adapters in `cache_compression/` without touching any other code.
 
 ---
 
@@ -390,7 +390,7 @@ ChaosEngineAI/
 ├── src-tauri/            Tauri Rust shell + bundled runtime
 ├── scripts/              Build, release + runtime staging scripts
 ├── backend_service/      Python backend (engine adapters + HTTP server)
-├── compression/          Pluggable cache/compression strategy adapters
+├── cache_compression/    Pluggable cache/compression strategy adapters
 ├── dflash/               DFlash/DDTree speculative decoding integration
 ├── vendor/ChaosEngine/   ChaosEngine compression (git submodule)
 ├── tests/                Backend integration tests

diff --git a/THIRD_PARTY_NOTICES.md b/THIRD_PARTY_NOTICES.md
@@ -78,3 +78,17 @@ If installed by the user, each is subject to its own licence:
 
 These libraries are **not bundled** with ChaosEngineAI. They are
 optional pip dependencies that the user may install independently.
+
+---
+
+## Ported Algorithms
+
+### DDTree (Diffusion Draft Tree)
+
+- **Upstream:** <https://github.com/liranringel/ddtree>
+- **Licence:** MIT
+- **Port location:** `backend_service/ddtree.py`
+- **Usage:** The tree-building and tree-mask compilation logic is ported
+  to ChaosEngineAI's MLX runtime. The draft model bundle is reused from
+  DFlash. No upstream code is bundled verbatim; this is a re-implementation
+  of the published algorithm.
diff --git a/backend_service/app.py b/backend_service/app.py
@@ -18,7 +18,12 @@
     ImageGenerationConfig,
     ImageRuntimeManager,
 )
-from backend_service.models import ImageGenerationRequest
+from backend_service.video_runtime import (
+    VideoGenerationConfig,
+    VideoRuntimeManager,
+    start_torch_warmup,
+)
+from backend_service.models import ImageGenerationRequest, VideoGenerationRequest
 from backend_service.routes import register_routes
 from backend_service.state import ChaosEngineState
 
@@ -35,6 +40,12 @@
     _find_image_output as _find_image_output_impl,
     _delete_image_output as _delete_image_output_impl,
 )
+from backend_service.helpers.video import (
+    _load_video_outputs as _load_video_outputs_impl,
+    _save_video_artifact as _save_video_artifact_impl,
+    _find_video_output as _find_video_output_impl,
+    _delete_video_output as _delete_video_output_impl,
+)
 from backend_service.helpers.settings import (
     DataLocation,
     _default_settings as _default_settings_impl,
@@ -55,9 +66,15 @@
 WORKSPACE_ROOT = Path(__file__).resolve().parents[1]
 APP_STARTED_AT = time.time()
 HF_SNAPSHOT_DOWNLOAD_HELPER = (
-    "import sys\n"
+    "import json, sys\n"
     "from huggingface_hub import snapshot_download\n"
-    "snapshot_download(repo_id=sys.argv[1], resume_download=True)\n"
+    "repo_id = sys.argv[1]\n"
+    "raw_allow = sys.argv[2] if len(sys.argv) > 2 else ''\n"
+    "allow_patterns = json.loads(raw_allow) if raw_allow else None\n"
+    "kwargs = {'repo_id': repo_id, 'resume_download': True}\n"
+    "if allow_patterns:\n"
+    "    kwargs['allow_patterns'] = allow_patterns\n"
+    "snapshot_download(**kwargs)\n"
 )
 DEFAULT_PORT = int(os.getenv("CHAOSENGINE_PORT", "8876"))
 DEFAULT_HOST = os.getenv("CHAOSENGINE_HOST", "127.0.0.1")
@@ -72,6 +89,7 @@
 CHAT_SESSIONS_PATH = DATA_LOCATION.chat_sessions_path
 DOCUMENTS_DIR = DATA_LOCATION.documents_dir
 IMAGE_OUTPUTS_DIR = DATA_LOCATION.image_outputs_dir
+VIDEO_OUTPUTS_DIR = DATA_LOCATION.video_outputs_dir
 MAX_DOC_SIZE_BYTES = 50 * 1024 * 1024  # 50 MB per file
 MAX_SESSION_DOCS_BYTES = 200 * 1024 * 1024  # 200 MB per session
 DOC_ALLOWED_EXTENSIONS = {
@@ -93,6 +111,7 @@
 EXEMPT_AUTH_PATHS = frozenset({
     "/api/health",
     "/api/auth/session",
+    "/api/system/gpu-status",
 })
 
 
@@ -133,20 +152,76 @@ def _save_chat_sessions(sessions: list[dict[str, Any]], path: Path = CHAT_SESSIO
     return _save_chat_sessions_impl(sessions, path)
 
 
+def _resolve_output_dir_override(raw: str, default: Path) -> Path:
+    """Return the user-chosen output directory, or the default.
+
+    Empty / whitespace-only strings restore the default. A non-empty value is
+    expanded (``~`` → home), resolved to an absolute path, and the directory is
+    created if missing. If creation fails (path is unwritable, on a missing
+    volume, etc.) we transparently fall back to ``default`` so generation never
+    crashes just because the user pointed at a stale Dropbox folder.
+    """
+    value = (raw or "").strip()
+    if not value:
+        return default
+    try:
+        candidate = Path(os.path.expanduser(value)).resolve()
+        candidate.mkdir(parents=True, exist_ok=True)
+        return candidate
+    except OSError:
+        return default
+
+
+def _current_image_outputs_dir() -> Path:
+    # The module-level ``IMAGE_OUTPUTS_DIR`` is the install-time default and
+    # the override target tests use to redirect output into a tempdir. Anything
+    # the user typed in Settings takes precedence — but only when actually set,
+    # so test patches still win when no setting is configured.
+    settings = _load_settings()
+    return _resolve_output_dir_override(
+        str(settings.get("imageOutputsDirectory") or ""),
+        IMAGE_OUTPUTS_DIR,
+    )
+
+
+def _current_video_outputs_dir() -> Path:
+    settings = _load_settings()
+    return _resolve_output_dir_override(
+        str(settings.get("videoOutputsDirectory") or ""),
+        VIDEO_OUTPUTS_DIR,
+    )
+
+
 def _load_image_outputs() -> list[dict[str, Any]]:
-    return _load_image_outputs_impl(IMAGE_OUTPUTS_DIR)
+    return _load_image_outputs_impl(_current_image_outputs_dir())
 
 
 def _save_image_artifact(artifact: dict[str, Any]) -> dict[str, Any]:
-    return _save_image_artifact_impl(artifact, IMAGE_OUTPUTS_DIR)
+    return _save_image_artifact_impl(artifact, _current_image_outputs_dir())
 
 
 def _find_image_output(artifact_id: str) -> dict[str, Any] | None:
-    return _find_image_output_impl(artifact_id, IMAGE_OUTPUTS_DIR)
+    return _find_image_output_impl(artifact_id, _current_image_outputs_dir())
 
 
 def _delete_image_output(artifact_id: str) -> bool:
-    return _delete_image_output_impl(artifact_id, IMAGE_OUTPUTS_DIR)
+    return _delete_image_output_impl(artifact_id, _current_image_outputs_dir())
+
+
+def _load_video_outputs() -> list[dict[str, Any]]:
+    return _load_video_outputs_impl(_current_video_outputs_dir())
+
+
+def _save_video_artifact(artifact: dict[str, Any]) -> dict[str, Any]:
+    return _save_video_artifact_impl(artifact, _current_video_outputs_dir())
+
+
+def _find_video_output(artifact_id: str) -> dict[str, Any] | None:
+    return _find_video_output_impl(artifact_id, _current_video_outputs_dir())
+
+
+def _delete_video_output(artifact_id: str) -> bool:
+    return _delete_video_output_impl(artifact_id, _current_video_outputs_dir())
 
 
 def compute_cache_preview(
@@ -196,6 +271,16 @@ def _resolve_api_token(explicit_token: str | None = None) -> str:
     return token or secrets.token_urlsafe(32)
 
 
+def _resolve_require_api_auth(settings: dict[str, Any]) -> bool:
+    # Env var wins — useful for CI / headless scripts that need to drop
+    # the bearer requirement without touching settings.json. Accepts any
+    # of "0", "false", "no", "off" (case-insensitive) to disable.
+    env_override = os.getenv("CHAOSENGINE_REQUIRE_AUTH")
+    if env_override is not None:
+        return env_override.strip().lower() not in {"0", "false", "no", "off", ""}
+    return bool(settings.get("requireApiAuth", True))
+
+
 def _is_loopback_host(host: str | None) -> bool:
     if not host:
         return False
@@ -228,7 +313,7 @@ def _hf_repo_from_link(link: str | None) -> str | None:
 
 
 def _get_cache_strategies() -> list[dict[str, Any]]:
-    from compression import registry
+    from cache_compression import registry
     return registry.available()
 
 
@@ -284,6 +369,74 @@ def _generate_image_artifacts(
     return artifacts, runtime_status
 
 
+def _generate_video_artifact(
+    request: VideoGenerationRequest,
+    variant: dict[str, Any],
+    runtime_manager: VideoRuntimeManager,
+) -> tuple[dict[str, Any], dict[str, Any]]:
+    """Run a single video generation and persist it to the outputs dir.
+
+    Returns ``(artifact_dict, runtime_status_dict)``. Unlike the image path,
+    there is no placeholder fallback — if the runtime isn't ready or the
+    generation fails, the caller sees the exception and surfaces a proper
+    HTTP error rather than a fake clip.
+    """
+    import logging
+    logger = logging.getLogger("chaosengine.video")
+    logger.info(
+        "Generating video: model=%s repo=%s size=%dx%d frames=%d steps=%d",
+        variant.get("name"),
+        variant.get("repo"),
+        request.width,
+        request.height,
+        request.numFrames,
+        request.steps,
+    )
+
+    video, runtime_status = runtime_manager.generate(
+        VideoGenerationConfig(
+            modelId=request.modelId,
+            modelName=str(variant["name"]),
+            repo=str(variant["repo"]),
+            prompt=request.prompt,
+            negativePrompt=request.negativePrompt or "",
+            width=request.width,
+            height=request.height,
+            numFrames=request.numFrames,
+            fps=request.fps,
+            steps=request.steps,
+            guidance=request.guidance,
+            seed=request.seed,
+        )
+    )
+
+    created_at = datetime.utcnow().replace(microsecond=0).isoformat() + "Z"
+    clip_duration = round(video.frameCount / max(1, video.fps), 3)
+    artifact = {
+        "artifactId": f"vid-{uuid.uuid4().hex[:12]}",
+        "modelId": request.modelId,
+        "modelName": variant["name"],
+        "prompt": request.prompt,
+        "negativePrompt": request.negativePrompt or "",
+        "width": video.width,
+        "height": video.height,
+        "numFrames": video.frameCount,
+        "fps": video.fps,
+        "steps": request.steps,
+        "guidance": request.guidance,
+        "seed": video.seed,
+        "createdAt": created_at,
+        "durationSeconds": video.durationSeconds,
+        "clipDurationSeconds": clip_duration,
+        "videoBytes": video.bytes,
+        "videoMimeType": video.mimeType,
+        "videoExtension": video.extension,
+        "runtimeLabel": video.runtimeLabel,
+        "runtimeNote": video.runtimeNote,
+    }
+    return _save_video_artifact(artifact), runtime_status
+
+
 def create_app(
     state: ChaosEngineState | None = None,
     api_token: str | None = None,
@@ -300,6 +453,13 @@ def create_app(
     app.state.chaosengine = state or ChaosEngineState(server_port=DEFAULT_PORT)
     app.state.chaosengine_api_token = _resolve_api_token(api_token)
     app.state.chaosengine_allowed_origins = frozenset(allowed_origins)
+    # Bearer-token enforcement toggle. Reads from (in order) env override,
+    # then saved settings, defaulting to True (keep the existing secure
+    # default). Mutated live by state.update_settings so the user doesn't
+    # need to restart the server to toggle it.
+    app.state.chaosengine_require_api_auth = _resolve_require_api_auth(
+        app.state.chaosengine.settings,
+    )
 
     # Shutdown hook: kill any running llama-server / MLX worker children
     # on backend exit. Runs on clean shutdown (uvicorn SIGTERM), Ctrl-C,
@@ -360,6 +520,7 @@ async def require_api_auth(request: Request, call_next):
             request.method == "OPTIONS"
             or path in EXEMPT_AUTH_PATHS
             or not (path.startswith("/api/") or path.startswith("/v1/"))
+            or not getattr(app.state, "chaosengine_require_api_auth", True)
         ):
             return await call_next(request)
 
@@ -399,6 +560,12 @@ async def log_requests(request, call_next):
         return response
 
     register_routes(app)
+
+    # Kick off a background torch import so the first Video Studio probe
+    # doesn't pay the 30-60s cold-disk cost on Windows. Failures are captured
+    # and surfaced by probe() itself.
+    start_torch_warmup()
+
     return app
 
 

diff --git a/backend_service/catalog/__init__.py b/backend_service/catalog/__init__.py
@@ -1,2 +1,3 @@
 from .text_models import MODEL_FAMILIES as MODEL_FAMILIES, CATALOG as CATALOG
 from .image_models import IMAGE_MODEL_FAMILIES as IMAGE_MODEL_FAMILIES, LATEST_IMAGE_TRACKED_SEEDS as LATEST_IMAGE_TRACKED_SEEDS
+from .video_models import VIDEO_MODEL_FAMILIES as VIDEO_MODEL_FAMILIES