From 1515632ace4308c769e7a75e8fc51823079219d3 Mon Sep 17 00:00:00 2001 From: Cryptopoly <31970407+cryptopoly@users.noreply.github.com> Date: Fri, 1 May 2026 13:02:59 +0100 Subject: [PATCH] Fix Windows CUDA detection + post-install runtime probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related Windows-only bugs surfaced by the v0.7.2 smoke test on an RTX 4090 box: Bug #6 — RTX 4090 reported as 12 GB total GPUMonitor._snapshot_nvidia() shells out to nvidia-smi, and on Windows boxes without it on PATH (driver installed but no CUDA toolkit) it fell through to _fallback_psutil() which returns psutil.virtual_memory().total — system RAM, not VRAM. The image / video safety estimators then read that as the GPU budget and produced 'Likely to crash' warnings on a 24 GB card holding an 11 GB FLUX model. Fix: - Try torch.cuda.get_device_properties(0).total_memory first. When the GPU bundle is installed this is the most reliable source — it reads through the CUDA driver, no PATH needed. - Fall back to nvidia-smi as before. - Drop the psutil fallback. When neither answers we now return {'vram_total_gb': None}, which the TS estimators (utils/images.ts, utils/videos.ts) already treat as 'unknown' via the DEFAULT_*_MEMORY_GB fallbacks. Better an honest 'unknown' than a wrong 12 GB. Bug #7 — Image gen produces gibberish placeholder after install DiffusersImageEngine.probe() uses importlib.util.find_spec to decide between the placeholder engine and the real diffusers pipeline. Once the GPU bundle install lands new packages into the extras dir, importlib's negative-lookup cache still answers None for the new modules until invalidate_caches() is called. The probe kept reporting realGenerationAvailable=False and the generation pipeline returned the SVG placeholder, which lands as a gibberish image when the frontend renders it as data:image/svg+xml. Fix: - probe() now calls importlib.invalidate_caches() before find_spec so newly-installed packages are picked up without a backend restart. - The GPU bundle worker (_gpu_bundle_job_worker) now also calls invalidate_caches and resets the VRAM total cache when it transitions to phase=done, so the immediately-following capabilities snapshot reflects the freshly-importable torch. Tests tests/test_gpu_detection.py — 9 unit tests covering torch.cuda detection, nvidia-smi precedence, the new no-system-RAM fallback path, and the process-lifetime cache. All pass; existing pytest suite still green. --- backend_service/helpers/gpu.py | 64 +++++++++++- backend_service/image_runtime.py | 11 ++ backend_service/routes/setup.py | 17 ++++ tests/test_gpu_detection.py | 170 +++++++++++++++++++++++++++++++ 4 files changed, 260 insertions(+), 2 deletions(-) create mode 100644 tests/test_gpu_detection.py diff --git a/backend_service/helpers/gpu.py b/backend_service/helpers/gpu.py index 2c4e84a..8556bf9 100644 --- a/backend_service/helpers/gpu.py +++ b/backend_service/helpers/gpu.py @@ -106,6 +106,14 @@ def _snapshot_macos(self) -> dict[str, Any]: # ------------------------------------------------------------------ def _snapshot_nvidia(self) -> dict[str, Any]: + # Try torch.cuda first — when the GPU bundle is installed it reads + # the right total VRAM via the CUDA driver without shelling out, + # and works even if ``nvidia-smi`` isn't on PATH (common on Windows + # when the user installs the driver but not the CUDA toolkit). + torch_snapshot = self._snapshot_torch_cuda() + if torch_snapshot is not None: + return torch_snapshot + try: out = subprocess.check_output( [ @@ -130,8 +138,60 @@ def _snapshot_nvidia(self) -> dict[str, Any]: except (FileNotFoundError, subprocess.SubprocessError, ValueError): pass - # Fallback: system RAM via psutil - return self._fallback_psutil() + # No GPU detected — return a None-VRAM dict rather than reporting + # system RAM as if it were VRAM. The image / video safety + # estimators downstream treat ``vram_total_gb is None`` as + # "unknown" and skip the crash warning, which is the correct + # behaviour when we genuinely don't know the card's capacity. + return self._no_gpu_detected() + + def _snapshot_torch_cuda(self) -> dict[str, Any] | None: + """Read total + used VRAM from torch.cuda when available. + + Returns ``None`` if torch isn't importable, has no CUDA build, or + no CUDA device is currently visible (driver missing, GPU + passthrough disabled, etc.). The caller then falls through to + ``nvidia-smi``. + + Importing torch is heavy (~200ms first time) but the result is + cached one level up by ``get_device_vram_total_gb``, so the cost + is paid at most once per backend session. + """ + try: + import torch # type: ignore + except Exception: + return None + try: + if not torch.cuda.is_available(): + return None + device = torch.cuda.current_device() + props = torch.cuda.get_device_properties(device) + total_bytes = int(props.total_memory) + try: + free_bytes, _ = torch.cuda.mem_get_info(device) + used_bytes = max(0, total_bytes - int(free_bytes)) + except Exception: + used_bytes = 0 + return { + "gpu_name": props.name, + "vram_total_gb": round(total_bytes / (1024 ** 3), 2), + "vram_used_gb": round(used_bytes / (1024 ** 3), 2), + "utilization_pct": None, + "temperature_c": None, + "power_w": None, + } + except Exception: + return None + + def _no_gpu_detected(self) -> dict[str, Any]: + return { + "gpu_name": "No GPU detected", + "vram_total_gb": None, + "vram_used_gb": None, + "utilization_pct": None, + "temperature_c": None, + "power_w": None, + } # ------------------------------------------------------------------ # Fallback diff --git a/backend_service/image_runtime.py b/backend_service/image_runtime.py index 5fd46ea..1c73d43 100644 --- a/backend_service/image_runtime.py +++ b/backend_service/image_runtime.py @@ -537,6 +537,17 @@ def probe(self) -> ImageRuntimeStatus: # find_spec answers "is it installable?" without triggering the # import side effects. Device detection (cuda vs cpu) is deferred # to preload/generate where we're about to import torch anyway. + # + # ``invalidate_caches`` matters when the GPU bundle install has + # finished mid-process: pip writes the new packages into the + # extras dir (already on ``sys.path`` from process start), but + # ``importlib`` keeps a per-finder cache of negative lookups, so + # the find_spec calls below would still report None even though + # the .dist-info folders are sitting on disk. Calling + # ``invalidate_caches`` first re-walks the path entries so the + # newly installed packages are picked up without a process + # restart. + importlib.invalidate_caches() missing = [ package for package, module_name in ( diff --git a/backend_service/routes/setup.py b/backend_service/routes/setup.py index dcdfd92..ee381e6 100644 --- a/backend_service/routes/setup.py +++ b/backend_service/routes/setup.py @@ -1067,6 +1067,23 @@ def _gpu_bundle_job_worker(python: str, extras_dir: Path) -> None: state.cuda_verified = cuda_ok state.attempts.append({"phase": "verify", "ok": cuda_ok, "output": detail[-2000:]}) + # Tell the import system to re-scan ``sys.path`` so packages + # written into the extras dir during this run are visible to the + # next ``importlib.util.find_spec`` call (the image-runtime probe + # uses one). Without this, the runtime continues reporting + # "placeholder" until a backend restart even though the bundle + # is on disk. Also reset the cached VRAM total so the post-install + # capabilities snapshot reflects the freshly importable torch. + try: + importlib.invalidate_caches() + except Exception: + pass + try: + from backend_service.helpers.gpu import reset_vram_total_cache + reset_vram_total_cache() + except Exception: + pass + state.phase = "done" state.percent = 100.0 state.done = True diff --git a/tests/test_gpu_detection.py b/tests/test_gpu_detection.py new file mode 100644 index 0000000..3a410b2 --- /dev/null +++ b/tests/test_gpu_detection.py @@ -0,0 +1,170 @@ +"""Tests for the Windows / Linux GPU detection helper. + +The pre-fix path returned system RAM via ``psutil.virtual_memory().total`` +when ``nvidia-smi`` wasn't on PATH — so an RTX 4090 box on Windows showed +12 GB total in the safety estimator instead of 24 GB. The new path tries +``torch.cuda`` first, falls back to ``nvidia-smi``, and only returns a +``vram_total_gb=None`` when neither answers. The frontend treats ``None`` +as "unknown" and skips the spurious crash warning. +""" + +from __future__ import annotations + +import sys +import types +import unittest +from unittest import mock + +from backend_service.helpers import gpu as gpu_module + + +def _fake_torch_with_cuda(total_bytes: int, free_bytes: int, name: str = "NVIDIA GeForce RTX 4090") -> types.ModuleType: + cuda = types.SimpleNamespace() + cuda.is_available = lambda: True + cuda.current_device = lambda: 0 + + class _Props: + def __init__(self, mem: int, gpu_name: str) -> None: + self.total_memory = mem + self.name = gpu_name + + cuda.get_device_properties = lambda device: _Props(total_bytes, name) + cuda.mem_get_info = lambda device: (free_bytes, total_bytes) + + fake = types.ModuleType("torch") + fake.cuda = cuda # type: ignore[attr-defined] + return fake + + +def _fake_torch_no_cuda() -> types.ModuleType: + cuda = types.SimpleNamespace() + cuda.is_available = lambda: False + fake = types.ModuleType("torch") + fake.cuda = cuda # type: ignore[attr-defined] + return fake + + +class SnapshotTorchCudaTests(unittest.TestCase): + def setUp(self) -> None: + gpu_module.reset_vram_total_cache() + self.monitor = gpu_module.GPUMonitor() + # Force the monitor onto the nvidia path even when running these + # tests on a Mac developer machine. + self.monitor._system = "Linux" + + def tearDown(self) -> None: + gpu_module.reset_vram_total_cache() + + def test_torch_cuda_returns_full_vram_for_rtx_4090(self) -> None: + twenty_four_gb = 24 * 1024 ** 3 + free = 22 * 1024 ** 3 + with mock.patch.dict(sys.modules, {"torch": _fake_torch_with_cuda(twenty_four_gb, free)}): + snapshot = self.monitor._snapshot_torch_cuda() + self.assertIsNotNone(snapshot) + assert snapshot is not None # type narrow + self.assertEqual(snapshot["gpu_name"], "NVIDIA GeForce RTX 4090") + self.assertEqual(snapshot["vram_total_gb"], 24.0) + # 24 - 22 = 2 GB used. + self.assertEqual(snapshot["vram_used_gb"], 2.0) + + def test_torch_cuda_unavailable_returns_none(self) -> None: + with mock.patch.dict(sys.modules, {"torch": _fake_torch_no_cuda()}): + snapshot = self.monitor._snapshot_torch_cuda() + self.assertIsNone(snapshot) + + def test_torch_not_installed_returns_none(self) -> None: + # Monkeypatch the import to raise ImportError. + original_import = __builtins__["__import__"] if isinstance(__builtins__, dict) else __builtins__.__import__ + + def fake_import(name, *args, **kwargs): + if name == "torch": + raise ImportError("No module named 'torch'") + return original_import(name, *args, **kwargs) + + with mock.patch("builtins.__import__", side_effect=fake_import): + # Also remove any previously cached torch entry so the + # function's ``import torch`` actually invokes the patched + # ``__import__`` instead of resolving via sys.modules. + with mock.patch.dict(sys.modules, {}, clear=False): + sys.modules.pop("torch", None) + snapshot = self.monitor._snapshot_torch_cuda() + self.assertIsNone(snapshot) + + +class SnapshotNvidiaTests(unittest.TestCase): + def setUp(self) -> None: + gpu_module.reset_vram_total_cache() + self.monitor = gpu_module.GPUMonitor() + self.monitor._system = "Linux" + + def tearDown(self) -> None: + gpu_module.reset_vram_total_cache() + + def test_falls_back_to_no_gpu_when_torch_and_nvidia_smi_both_fail(self) -> None: + with mock.patch.object(self.monitor, "_snapshot_torch_cuda", return_value=None), \ + mock.patch("subprocess.check_output", side_effect=FileNotFoundError): + snapshot = self.monitor._snapshot_nvidia() + self.assertEqual(snapshot["gpu_name"], "No GPU detected") + self.assertIsNone(snapshot["vram_total_gb"]) + self.assertIsNone(snapshot["vram_used_gb"]) + + def test_does_not_fall_back_to_system_ram(self) -> None: + """The whole point of this fix: don't lie that system RAM is VRAM.""" + with mock.patch.object(self.monitor, "_snapshot_torch_cuda", return_value=None), \ + mock.patch("subprocess.check_output", side_effect=FileNotFoundError): + snapshot = self.monitor._snapshot_nvidia() + self.assertNotEqual(snapshot["gpu_name"], "System RAM (no GPU detected)") + + def test_torch_cuda_takes_precedence_over_nvidia_smi(self) -> None: + torch_snapshot = { + "gpu_name": "RTX 4090", + "vram_total_gb": 24.0, + "vram_used_gb": 1.0, + "utilization_pct": None, + "temperature_c": None, + "power_w": None, + } + with mock.patch.object(self.monitor, "_snapshot_torch_cuda", return_value=torch_snapshot), \ + mock.patch("subprocess.check_output") as mock_subprocess: + snapshot = self.monitor._snapshot_nvidia() + self.assertEqual(snapshot["vram_total_gb"], 24.0) + mock_subprocess.assert_not_called() + + +class GetDeviceVramTotalGbTests(unittest.TestCase): + def setUp(self) -> None: + gpu_module.reset_vram_total_cache() + + def tearDown(self) -> None: + gpu_module.reset_vram_total_cache() + + def test_returns_none_when_snapshot_has_no_vram(self) -> None: + with mock.patch.object( + gpu_module._monitor, + "snapshot", + return_value={"vram_total_gb": None}, + ): + self.assertIsNone(gpu_module.get_device_vram_total_gb()) + + def test_returns_float_when_snapshot_has_vram(self) -> None: + with mock.patch.object( + gpu_module._monitor, + "snapshot", + return_value={"vram_total_gb": 24.0}, + ): + self.assertEqual(gpu_module.get_device_vram_total_gb(), 24.0) + + def test_caches_result_for_process_lifetime(self) -> None: + with mock.patch.object( + gpu_module._monitor, + "snapshot", + return_value={"vram_total_gb": 24.0}, + ) as mock_snapshot: + gpu_module.get_device_vram_total_gb() + gpu_module.get_device_vram_total_gb() + gpu_module.get_device_vram_total_gb() + self.assertEqual(mock_snapshot.call_count, 1) + + +if __name__ == "__main__": + unittest.main()