Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 17 additions & 13 deletions backend_service/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,20 @@
import uuid
from datetime import datetime
from pathlib import Path
from typing import Any
from typing import TYPE_CHECKING, Any

from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from starlette.responses import JSONResponse

from backend_service.image_runtime import (
ImageGenerationConfig,
ImageRuntimeManager,
)
from backend_service.video_runtime import (
VideoGenerationConfig,
VideoRuntimeManager,
start_torch_warmup,
)
from backend_service.models import ImageGenerationRequest, VideoGenerationRequest
from backend_service.routes import register_routes
from backend_service.state import ChaosEngineState

if TYPE_CHECKING:
from backend_service.image_runtime import ImageRuntimeManager
from backend_service.video_runtime import VideoRuntimeManager

# ---------------------------------------------------------------------------
# Helper modules -- extracted from this file for maintainability.
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -121,8 +116,8 @@
# extracted signatures require them explicitly.
# ---------------------------------------------------------------------------

def _build_system_snapshot() -> dict[str, Any]:
return _build_system_snapshot_impl(app_version, APP_STARTED_AT)
def _build_system_snapshot(*, capabilities: Any | None = None) -> dict[str, Any]:
return _build_system_snapshot_impl(app_version, APP_STARTED_AT, capabilities=capabilities)


def _default_settings() -> dict[str, Any]:
Expand Down Expand Up @@ -231,6 +226,7 @@ def compute_cache_preview(
fp16_layers: int = 4,
num_layers: int = 32,
num_heads: int = 32,
num_kv_heads: int | None = None,
hidden_size: int = 4096,
context_tokens: int = 8192,
params_b: float = 7.0,
Expand All @@ -242,6 +238,7 @@ def compute_cache_preview(
fp16_layers=fp16_layers,
num_layers=num_layers,
num_heads=num_heads,
num_kv_heads=num_kv_heads,
hidden_size=hidden_size,
context_tokens=context_tokens,
params_b=params_b,
Expand Down Expand Up @@ -343,6 +340,8 @@ def _generate_image_artifacts(
runtime_manager: ImageRuntimeManager | None = None,
) -> tuple[list[dict[str, Any]], dict[str, Any]]:
import logging
from backend_service.image_runtime import ImageGenerationConfig, ImageRuntimeManager

logger = logging.getLogger("chaosengine.images")
effective_width, effective_height = (
_apply_draft_resolution(request.width, request.height)
Expand Down Expand Up @@ -413,6 +412,8 @@ def _generate_video_artifact(
HTTP error rather than a fake clip.
"""
import logging
from backend_service.video_runtime import VideoGenerationConfig

logger = logging.getLogger("chaosengine.video")
logger.info(
"Generating video: model=%s repo=%s size=%dx%d frames=%d steps=%d",
Expand Down Expand Up @@ -489,7 +490,10 @@ def create_app(
allow_methods=["GET", "POST", "PATCH", "DELETE", "OPTIONS"],
allow_headers=["Accept", "Authorization", "Content-Type", "X-ChaosEngine-Token"],
)
app.state.chaosengine = state or ChaosEngineState(server_port=DEFAULT_PORT)
app.state.chaosengine = state or ChaosEngineState(
server_port=DEFAULT_PORT,
background_capability_probe=True,
)
app.state.chaosengine_api_token = _resolve_api_token(api_token)
app.state.chaosengine_allowed_origins = frozenset(allowed_origins)
# Bearer-token enforcement toggle. Reads from (in order) env override,
Expand Down
176 changes: 171 additions & 5 deletions backend_service/catalog/image_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
"taskSupport": ["txt2img"],
"sizeGb": 6.8,
"recommendedResolution": "1024x1024",
"note": "GGUF Q4_K_M — runs on ~8 GB VRAM / Apple Silicon with near-unchanged quality.",
"note": "GGUF Q4_K_M — quantizes the FLUX transformer; the full diffusers pipeline still carries the base text encoders/VAE in memory.",
"estimatedGenerationSeconds": 5.2,
"releaseDate": "2024-09",
},
Expand All @@ -62,7 +62,7 @@
"taskSupport": ["txt2img"],
"sizeGb": 12.7,
"recommendedResolution": "1024x1024",
"note": "GGUF Q8_0 — near-bf16 quality at ~half the memory footprint.",
"note": "GGUF Q8_0 — near-bf16 transformer quality; text encoders/VAE still make the full FLUX runtime memory-heavy.",
"estimatedGenerationSeconds": 4.8,
"releaseDate": "2024-09",
},
Expand Down Expand Up @@ -125,7 +125,7 @@
"taskSupport": ["txt2img"],
"sizeGb": 6.8,
"recommendedResolution": "1024x1024",
"note": "GGUF Q4_K_M — fits FLUX Dev on 8 GB VRAM / Apple Silicon with minimal quality loss.",
"note": "GGUF Q4_K_M — quantizes the FLUX Dev transformer; expect the full diffusers pipeline to remain memory-heavy from text encoders/VAE.",
"estimatedGenerationSeconds": 9.0,
"releaseDate": "2024-09",
},
Expand All @@ -143,7 +143,7 @@
"taskSupport": ["txt2img"],
"sizeGb": 9.9,
"recommendedResolution": "1024x1024",
"note": "GGUF Q6_K — mid-point between Q4 size and Q8 quality.",
"note": "GGUF Q6_K — mid-point between Q4 size and Q8 transformer quality; FLUX text encoders/VAE still dominate runtime memory.",
"estimatedGenerationSeconds": 8.4,
"releaseDate": "2024-09",
},
Expand All @@ -161,7 +161,7 @@
"taskSupport": ["txt2img"],
"sizeGb": 12.7,
"recommendedResolution": "1024x1024",
"note": "GGUF Q8_0 — near-bf16 quality at roughly half the memory.",
"note": "GGUF Q8_0 — near-bf16 transformer quality; text encoders/VAE still make the full FLUX runtime memory-heavy.",
"estimatedGenerationSeconds": 7.8,
"releaseDate": "2024-09",
},
Expand Down Expand Up @@ -271,13 +271,164 @@
]

LATEST_IMAGE_TRACKED_SEEDS: list[dict[str, Any]] = [
{
"repo": "baidu/ERNIE-Image",
"name": "ERNIE-Image",
"provider": "Baidu",
"styleTags": ["general", "detailed"],
"taskSupport": ["txt2img"],
"sizeGb": 29.43,
"runtimeFootprintGb": 24.0,
"runtimeFootprintMpsGb": 32.0,
"runtimeFootprintCpuGb": 36.0,
"coreWeightsGb": 29.43,
"repoSizeGb": 29.47,
"recommendedResolution": "1024x1024",
"note": "Tracked current text-to-image DiT release from Baidu.",
"gated": False,
"pipelineTag": "text-to-image",
"updatedLabel": "Tracked latest",
"releaseDate": "2026-04",
},
{
"repo": "baidu/ERNIE-Image-Turbo",
"name": "ERNIE-Image-Turbo",
"provider": "Baidu",
"styleTags": ["general", "fast"],
"taskSupport": ["txt2img"],
"sizeGb": 29.43,
"runtimeFootprintGb": 24.0,
"runtimeFootprintMpsGb": 32.0,
"runtimeFootprintCpuGb": 36.0,
"coreWeightsGb": 29.43,
"repoSizeGb": 29.47,
"recommendedResolution": "1024x1024",
"note": "Tracked faster ERNIE-Image lane for current local image generation discovery.",
"gated": False,
"pipelineTag": "text-to-image",
"updatedLabel": "Tracked latest",
"releaseDate": "2026-04",
},
{
"repo": "NucleusAI/Nucleus-Image",
"name": "Nucleus-Image",
"provider": "NucleusAI",
"styleTags": ["general", "detailed"],
"taskSupport": ["txt2img"],
"sizeGb": 48.09,
"runtimeFootprintGb": 48.0,
"runtimeFootprintMpsGb": 55.0,
"runtimeFootprintCpuGb": 60.0,
"coreWeightsGb": 48.09,
"repoSizeGb": 48.11,
"recommendedResolution": "1024x1024",
"note": "Tracked current diffusers-compatible text-to-image release.",
"gated": False,
"pipelineTag": "text-to-image",
"updatedLabel": "Tracked latest",
"releaseDate": "2026-04",
},
{
"repo": "black-forest-labs/FLUX.2-dev",
"name": "FLUX.2 Dev",
"provider": "Black Forest Labs",
"styleTags": ["general", "detailed", "flux"],
"taskSupport": ["txt2img", "img2img"],
"sizeGb": 64.7,
"runtimeFootprintGb": 65.0,
"runtimeFootprintMpsGb": 78.0,
"runtimeFootprintCpuGb": 90.0,
"recommendedResolution": "1024x1024",
"note": "Tracked FLUX.2 generation-and-editing release.",
"gated": True,
"pipelineTag": "image-to-image",
"updatedLabel": "Tracked latest",
"releaseDate": "2026-02",
},
{
"repo": "black-forest-labs/FLUX.2-klein-9B",
"name": "FLUX.2 Klein 9B",
"provider": "Black Forest Labs",
"styleTags": ["general", "flux", "fast"],
"taskSupport": ["txt2img", "img2img"],
"sizeGb": 49.23,
"runtimeFootprintGb": 49.0,
"runtimeFootprintMpsGb": 55.0,
"runtimeFootprintCpuGb": 64.0,
"coreWeightsGb": 49.23,
"repoSizeGb": 49.26,
"recommendedResolution": "1024x1024",
"note": "Tracked smaller FLUX.2 lane.",
"gated": False,
"pipelineTag": "image-to-image",
"updatedLabel": "Tracked latest",
"releaseDate": "2026-02",
},
{
"repo": "Tongyi-MAI/Z-Image-Turbo",
"name": "Z-Image-Turbo",
"provider": "Tongyi-MAI",
"styleTags": ["general", "fast"],
"taskSupport": ["txt2img"],
"sizeGb": 30.58,
"runtimeFootprintGb": 16.0,
"runtimeFootprintMpsGb": 20.0,
"runtimeFootprintCpuGb": 24.0,
"coreWeightsGb": 30.58,
"repoSizeGb": 30.64,
"recommendedResolution": "1024x1024",
"note": "Tracked current Z-Image turbo text-to-image release.",
"gated": False,
"pipelineTag": "text-to-image",
"updatedLabel": "Tracked latest",
"releaseDate": "2026-01",
},
{
"repo": "Tongyi-MAI/Z-Image",
"name": "Z-Image",
"provider": "Tongyi-MAI",
"styleTags": ["general", "detailed"],
"taskSupport": ["txt2img"],
"sizeGb": 19.11,
"runtimeFootprintGb": 22.0,
"runtimeFootprintMpsGb": 24.0,
"runtimeFootprintCpuGb": 30.0,
"coreWeightsGb": 19.11,
"repoSizeGb": 19.14,
"recommendedResolution": "1024x1024",
"note": "Tracked current Z-Image text-to-image release.",
"gated": False,
"pipelineTag": "text-to-image",
"updatedLabel": "Tracked latest",
"releaseDate": "2026-01",
},
{
"repo": "Qwen/Qwen-Image-Edit-2511",
"name": "Qwen-Image-Edit-2511",
"provider": "Qwen",
"styleTags": ["edit", "qwenimage", "general"],
"taskSupport": ["img2img"],
"sizeGb": 57.7,
"runtimeFootprintGb": 58.0,
"runtimeFootprintMpsGb": 72.0,
"runtimeFootprintCpuGb": 72.0,
"recommendedResolution": "1024x1024",
"note": "Tracked newer Qwen image editing release with improved consistency.",
"gated": False,
"pipelineTag": "image-to-image",
"updatedLabel": "Tracked latest",
"releaseDate": "2025-12",
},
{
"repo": "Qwen/Qwen-Image",
"name": "Qwen-Image",
"provider": "Qwen",
"styleTags": ["general", "detailed", "qwenimage"],
"taskSupport": ["txt2img"],
"sizeGb": 57.7,
"runtimeFootprintGb": 58.0,
"runtimeFootprintMpsGb": 72.0,
"runtimeFootprintCpuGb": 72.0,
"recommendedResolution": "1024x1024",
"note": "Tracked diffusers-native Qwen image generation family.",
"gated": False,
Expand All @@ -292,6 +443,9 @@
"styleTags": ["edit", "qwenimage", "general"],
"taskSupport": ["img2img"],
"sizeGb": 57.7,
"runtimeFootprintGb": 58.0,
"runtimeFootprintMpsGb": 72.0,
"runtimeFootprintCpuGb": 72.0,
"recommendedResolution": "1024x1024",
"note": "Tracked Qwen edit lane so Image Discover can surface newer editing-capable models too.",
"gated": False,
Expand All @@ -306,6 +460,9 @@
"styleTags": ["hidream", "detailed", "quality"],
"taskSupport": ["txt2img"],
"sizeGb": 47.2,
"runtimeFootprintGb": 58.0,
"runtimeFootprintMpsGb": 62.0,
"runtimeFootprintCpuGb": 70.0,
"recommendedResolution": "1024x1024",
"note": "Tracked larger open-image generation lane from the HiDream family.",
"gated": False,
Expand All @@ -320,6 +477,9 @@
"styleTags": ["general", "edit", "detailed"],
"taskSupport": ["txt2img", "img2img"],
"sizeGb": 35.8,
"runtimeFootprintGb": 40.0,
"runtimeFootprintMpsGb": 45.0,
"runtimeFootprintCpuGb": 52.0,
"recommendedResolution": "1024x1024",
"note": "Tracked unified generation-and-editing lane from the GLM image family.",
"gated": False,
Expand All @@ -333,6 +493,9 @@
"styleTags": ["sana", "fast", "small"],
"taskSupport": ["txt2img"],
"sizeGb": 7.7,
"runtimeFootprintGb": 8.0,
"runtimeFootprintMpsGb": 10.0,
"runtimeFootprintCpuGb": 12.0,
"recommendedResolution": "1024x1024",
"note": "Tracked smaller Sana Sprint lane for faster local image generation.",
"gated": False,
Expand All @@ -347,6 +510,9 @@
"styleTags": ["sana", "fast", "detailed"],
"taskSupport": ["txt2img"],
"sizeGb": 9.74,
"runtimeFootprintGb": 10.0,
"runtimeFootprintMpsGb": 12.0,
"runtimeFootprintCpuGb": 15.0,
"recommendedResolution": "1024x1024",
"note": "Tracked larger Sana Sprint lane with a better quality-to-speed balance.",
"gated": False,
Expand Down
Loading
Loading