cryptopoly · cryptopoly · May 1, 2026 · Apr 29, 2026 · Apr 30, 2026 · Apr 30, 2026
diff --git a/backend_service/app.py b/backend_service/app.py
@@ -8,25 +8,20 @@
 import uuid
 from datetime import datetime
 from pathlib import Path
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
 from fastapi import FastAPI, Request
 from fastapi.middleware.cors import CORSMiddleware
 from starlette.responses import JSONResponse
 
-from backend_service.image_runtime import (
-    ImageGenerationConfig,
-    ImageRuntimeManager,
-)
-from backend_service.video_runtime import (
-    VideoGenerationConfig,
-    VideoRuntimeManager,
-    start_torch_warmup,
-)
 from backend_service.models import ImageGenerationRequest, VideoGenerationRequest
 from backend_service.routes import register_routes
 from backend_service.state import ChaosEngineState
 
+if TYPE_CHECKING:
+    from backend_service.image_runtime import ImageRuntimeManager
+    from backend_service.video_runtime import VideoRuntimeManager
+
 # ---------------------------------------------------------------------------
 # Helper modules -- extracted from this file for maintainability.
 # ---------------------------------------------------------------------------
@@ -121,8 +116,8 @@
 # extracted signatures require them explicitly.
 # ---------------------------------------------------------------------------
 
-def _build_system_snapshot() -> dict[str, Any]:
-    return _build_system_snapshot_impl(app_version, APP_STARTED_AT)
+def _build_system_snapshot(*, capabilities: Any | None = None) -> dict[str, Any]:
+    return _build_system_snapshot_impl(app_version, APP_STARTED_AT, capabilities=capabilities)
 
 
 def _default_settings() -> dict[str, Any]:
@@ -231,6 +226,7 @@ def compute_cache_preview(
     fp16_layers: int = 4,
     num_layers: int = 32,
     num_heads: int = 32,
+    num_kv_heads: int | None = None,
     hidden_size: int = 4096,
     context_tokens: int = 8192,
     params_b: float = 7.0,
@@ -242,6 +238,7 @@ def compute_cache_preview(
         fp16_layers=fp16_layers,
         num_layers=num_layers,
         num_heads=num_heads,
+        num_kv_heads=num_kv_heads,
         hidden_size=hidden_size,
         context_tokens=context_tokens,
         params_b=params_b,
@@ -343,6 +340,8 @@ def _generate_image_artifacts(
     runtime_manager: ImageRuntimeManager | None = None,
 ) -> tuple[list[dict[str, Any]], dict[str, Any]]:
     import logging
+    from backend_service.image_runtime import ImageGenerationConfig, ImageRuntimeManager
+
     logger = logging.getLogger("chaosengine.images")
     effective_width, effective_height = (
         _apply_draft_resolution(request.width, request.height)
@@ -413,6 +412,8 @@ def _generate_video_artifact(
     HTTP error rather than a fake clip.
     """
     import logging
+    from backend_service.video_runtime import VideoGenerationConfig
+
     logger = logging.getLogger("chaosengine.video")
     logger.info(
         "Generating video: model=%s repo=%s size=%dx%d frames=%d steps=%d",
@@ -489,7 +490,10 @@ def create_app(
         allow_methods=["GET", "POST", "PATCH", "DELETE", "OPTIONS"],
         allow_headers=["Accept", "Authorization", "Content-Type", "X-ChaosEngine-Token"],
     )
-    app.state.chaosengine = state or ChaosEngineState(server_port=DEFAULT_PORT)
+    app.state.chaosengine = state or ChaosEngineState(
+        server_port=DEFAULT_PORT,
+        background_capability_probe=True,
+    )
     app.state.chaosengine_api_token = _resolve_api_token(api_token)
     app.state.chaosengine_allowed_origins = frozenset(allowed_origins)
     # Bearer-token enforcement toggle. Reads from (in order) env override,

diff --git a/backend_service/catalog/image_models.py b/backend_service/catalog/image_models.py
@@ -44,7 +44,7 @@
                 "taskSupport": ["txt2img"],
                 "sizeGb": 6.8,
                 "recommendedResolution": "1024x1024",
-                "note": "GGUF Q4_K_M — runs on ~8 GB VRAM / Apple Silicon with near-unchanged quality.",
+                "note": "GGUF Q4_K_M — quantizes the FLUX transformer; the full diffusers pipeline still carries the base text encoders/VAE in memory.",
                 "estimatedGenerationSeconds": 5.2,
                 "releaseDate": "2024-09",
             },
@@ -62,7 +62,7 @@
                 "taskSupport": ["txt2img"],
                 "sizeGb": 12.7,
                 "recommendedResolution": "1024x1024",
-                "note": "GGUF Q8_0 — near-bf16 quality at ~half the memory footprint.",
+                "note": "GGUF Q8_0 — near-bf16 transformer quality; text encoders/VAE still make the full FLUX runtime memory-heavy.",
                 "estimatedGenerationSeconds": 4.8,
                 "releaseDate": "2024-09",
             },
@@ -125,7 +125,7 @@
                 "taskSupport": ["txt2img"],
                 "sizeGb": 6.8,
                 "recommendedResolution": "1024x1024",
-                "note": "GGUF Q4_K_M — fits FLUX Dev on 8 GB VRAM / Apple Silicon with minimal quality loss.",
+                "note": "GGUF Q4_K_M — quantizes the FLUX Dev transformer; expect the full diffusers pipeline to remain memory-heavy from text encoders/VAE.",
                 "estimatedGenerationSeconds": 9.0,
                 "releaseDate": "2024-09",
             },
@@ -143,7 +143,7 @@
                 "taskSupport": ["txt2img"],
                 "sizeGb": 9.9,
                 "recommendedResolution": "1024x1024",
-                "note": "GGUF Q6_K — mid-point between Q4 size and Q8 quality.",
+                "note": "GGUF Q6_K — mid-point between Q4 size and Q8 transformer quality; FLUX text encoders/VAE still dominate runtime memory.",
                 "estimatedGenerationSeconds": 8.4,
                 "releaseDate": "2024-09",
             },
@@ -161,7 +161,7 @@
                 "taskSupport": ["txt2img"],
                 "sizeGb": 12.7,
                 "recommendedResolution": "1024x1024",
-                "note": "GGUF Q8_0 — near-bf16 quality at roughly half the memory.",
+                "note": "GGUF Q8_0 — near-bf16 transformer quality; text encoders/VAE still make the full FLUX runtime memory-heavy.",
                 "estimatedGenerationSeconds": 7.8,
                 "releaseDate": "2024-09",
             },
@@ -271,13 +271,164 @@
 ]
 
 LATEST_IMAGE_TRACKED_SEEDS: list[dict[str, Any]] = [
+    {
+        "repo": "baidu/ERNIE-Image",
+        "name": "ERNIE-Image",
+        "provider": "Baidu",
+        "styleTags": ["general", "detailed"],
+        "taskSupport": ["txt2img"],
+        "sizeGb": 29.43,
+        "runtimeFootprintGb": 24.0,
+        "runtimeFootprintMpsGb": 32.0,
+        "runtimeFootprintCpuGb": 36.0,
+        "coreWeightsGb": 29.43,
+        "repoSizeGb": 29.47,
+        "recommendedResolution": "1024x1024",
+        "note": "Tracked current text-to-image DiT release from Baidu.",
+        "gated": False,
+        "pipelineTag": "text-to-image",
+        "updatedLabel": "Tracked latest",
+        "releaseDate": "2026-04",
+    },
+    {
+        "repo": "baidu/ERNIE-Image-Turbo",
+        "name": "ERNIE-Image-Turbo",
+        "provider": "Baidu",
+        "styleTags": ["general", "fast"],
+        "taskSupport": ["txt2img"],
+        "sizeGb": 29.43,
+        "runtimeFootprintGb": 24.0,
+        "runtimeFootprintMpsGb": 32.0,
+        "runtimeFootprintCpuGb": 36.0,
+        "coreWeightsGb": 29.43,
+        "repoSizeGb": 29.47,
+        "recommendedResolution": "1024x1024",
+        "note": "Tracked faster ERNIE-Image lane for current local image generation discovery.",
+        "gated": False,
+        "pipelineTag": "text-to-image",
+        "updatedLabel": "Tracked latest",
+        "releaseDate": "2026-04",
+    },
+    {
+        "repo": "NucleusAI/Nucleus-Image",
+        "name": "Nucleus-Image",
+        "provider": "NucleusAI",
+        "styleTags": ["general", "detailed"],
+        "taskSupport": ["txt2img"],
+        "sizeGb": 48.09,
+        "runtimeFootprintGb": 48.0,
+        "runtimeFootprintMpsGb": 55.0,
+        "runtimeFootprintCpuGb": 60.0,
+        "coreWeightsGb": 48.09,
+        "repoSizeGb": 48.11,
+        "recommendedResolution": "1024x1024",
+        "note": "Tracked current diffusers-compatible text-to-image release.",
+        "gated": False,
+        "pipelineTag": "text-to-image",
+        "updatedLabel": "Tracked latest",
+        "releaseDate": "2026-04",
+    },
+    {
+        "repo": "black-forest-labs/FLUX.2-dev",
+        "name": "FLUX.2 Dev",
+        "provider": "Black Forest Labs",
+        "styleTags": ["general", "detailed", "flux"],
+        "taskSupport": ["txt2img", "img2img"],
+        "sizeGb": 64.7,
+        "runtimeFootprintGb": 65.0,
+        "runtimeFootprintMpsGb": 78.0,
+        "runtimeFootprintCpuGb": 90.0,
+        "recommendedResolution": "1024x1024",
+        "note": "Tracked FLUX.2 generation-and-editing release.",
+        "gated": True,
+        "pipelineTag": "image-to-image",
+        "updatedLabel": "Tracked latest",
+        "releaseDate": "2026-02",
+    },
+    {
+        "repo": "black-forest-labs/FLUX.2-klein-9B",
+        "name": "FLUX.2 Klein 9B",
+        "provider": "Black Forest Labs",
+        "styleTags": ["general", "flux", "fast"],
+        "taskSupport": ["txt2img", "img2img"],
+        "sizeGb": 49.23,
+        "runtimeFootprintGb": 49.0,
+        "runtimeFootprintMpsGb": 55.0,
+        "runtimeFootprintCpuGb": 64.0,
+        "coreWeightsGb": 49.23,
+        "repoSizeGb": 49.26,
+        "recommendedResolution": "1024x1024",
+        "note": "Tracked smaller FLUX.2 lane.",
+        "gated": False,
+        "pipelineTag": "image-to-image",
+        "updatedLabel": "Tracked latest",
+        "releaseDate": "2026-02",
+    },
+    {
+        "repo": "Tongyi-MAI/Z-Image-Turbo",
+        "name": "Z-Image-Turbo",
+        "provider": "Tongyi-MAI",
+        "styleTags": ["general", "fast"],
+        "taskSupport": ["txt2img"],
+        "sizeGb": 30.58,
+        "runtimeFootprintGb": 16.0,
+        "runtimeFootprintMpsGb": 20.0,
+        "runtimeFootprintCpuGb": 24.0,
+        "coreWeightsGb": 30.58,
+        "repoSizeGb": 30.64,
+        "recommendedResolution": "1024x1024",
+        "note": "Tracked current Z-Image turbo text-to-image release.",
+        "gated": False,
+        "pipelineTag": "text-to-image",
+        "updatedLabel": "Tracked latest",
+        "releaseDate": "2026-01",
+    },
+    {
+        "repo": "Tongyi-MAI/Z-Image",
+        "name": "Z-Image",
+        "provider": "Tongyi-MAI",
+        "styleTags": ["general", "detailed"],
+        "taskSupport": ["txt2img"],
+        "sizeGb": 19.11,
+        "runtimeFootprintGb": 22.0,
+        "runtimeFootprintMpsGb": 24.0,
+        "runtimeFootprintCpuGb": 30.0,
+        "coreWeightsGb": 19.11,
+        "repoSizeGb": 19.14,
+        "recommendedResolution": "1024x1024",
+        "note": "Tracked current Z-Image text-to-image release.",
+        "gated": False,
+        "pipelineTag": "text-to-image",
+        "updatedLabel": "Tracked latest",
+        "releaseDate": "2026-01",
+    },
+    {
+        "repo": "Qwen/Qwen-Image-Edit-2511",
+        "name": "Qwen-Image-Edit-2511",
+        "provider": "Qwen",
+        "styleTags": ["edit", "qwenimage", "general"],
+        "taskSupport": ["img2img"],
+        "sizeGb": 57.7,
+        "runtimeFootprintGb": 58.0,
+        "runtimeFootprintMpsGb": 72.0,
+        "runtimeFootprintCpuGb": 72.0,
+        "recommendedResolution": "1024x1024",
+        "note": "Tracked newer Qwen image editing release with improved consistency.",
+        "gated": False,
+        "pipelineTag": "image-to-image",
+        "updatedLabel": "Tracked latest",
+        "releaseDate": "2025-12",
+    },
     {
         "repo": "Qwen/Qwen-Image",
         "name": "Qwen-Image",
         "provider": "Qwen",
         "styleTags": ["general", "detailed", "qwenimage"],
         "taskSupport": ["txt2img"],
         "sizeGb": 57.7,
+        "runtimeFootprintGb": 58.0,
+        "runtimeFootprintMpsGb": 72.0,
+        "runtimeFootprintCpuGb": 72.0,
         "recommendedResolution": "1024x1024",
         "note": "Tracked diffusers-native Qwen image generation family.",
         "gated": False,
@@ -292,6 +443,9 @@
         "styleTags": ["edit", "qwenimage", "general"],
         "taskSupport": ["img2img"],
         "sizeGb": 57.7,
+        "runtimeFootprintGb": 58.0,
+        "runtimeFootprintMpsGb": 72.0,
+        "runtimeFootprintCpuGb": 72.0,
         "recommendedResolution": "1024x1024",
         "note": "Tracked Qwen edit lane so Image Discover can surface newer editing-capable models too.",
         "gated": False,
@@ -306,6 +460,9 @@
         "styleTags": ["hidream", "detailed", "quality"],
         "taskSupport": ["txt2img"],
         "sizeGb": 47.2,
+        "runtimeFootprintGb": 58.0,
+        "runtimeFootprintMpsGb": 62.0,
+        "runtimeFootprintCpuGb": 70.0,
         "recommendedResolution": "1024x1024",
         "note": "Tracked larger open-image generation lane from the HiDream family.",
         "gated": False,
@@ -320,6 +477,9 @@
         "styleTags": ["general", "edit", "detailed"],
         "taskSupport": ["txt2img", "img2img"],
         "sizeGb": 35.8,
+        "runtimeFootprintGb": 40.0,
+        "runtimeFootprintMpsGb": 45.0,
+        "runtimeFootprintCpuGb": 52.0,
         "recommendedResolution": "1024x1024",
         "note": "Tracked unified generation-and-editing lane from the GLM image family.",
         "gated": False,
@@ -333,6 +493,9 @@
         "styleTags": ["sana", "fast", "small"],
         "taskSupport": ["txt2img"],
         "sizeGb": 7.7,
+        "runtimeFootprintGb": 8.0,
+        "runtimeFootprintMpsGb": 10.0,
+        "runtimeFootprintCpuGb": 12.0,
         "recommendedResolution": "1024x1024",
         "note": "Tracked smaller Sana Sprint lane for faster local image generation.",
         "gated": False,
@@ -347,6 +510,9 @@
         "styleTags": ["sana", "fast", "detailed"],
         "taskSupport": ["txt2img"],
         "sizeGb": 9.74,
+        "runtimeFootprintGb": 10.0,
+        "runtimeFootprintMpsGb": 12.0,
+        "runtimeFootprintCpuGb": 15.0,
         "recommendedResolution": "1024x1024",
         "note": "Tracked larger Sana Sprint lane with a better quality-to-speed balance.",
         "gated": False,