Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 94 additions & 31 deletions backend_service/helpers/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
"""
from __future__ import annotations

import json
import os
import platform
import shutil
import subprocess
import json
import sys
import threading
from typing import Any

Expand Down Expand Up @@ -146,42 +148,103 @@ def _snapshot_nvidia(self) -> dict[str, Any]:
return self._no_gpu_detected()

def _snapshot_torch_cuda(self) -> dict[str, Any] | None:
"""Read total + used VRAM from torch.cuda when available.
"""Read total + used VRAM from torch.cuda via a short-lived subprocess.

We deliberately do NOT ``import torch`` in the backend process.
On Windows, importing torch loads ``torch/lib/*.dll`` (asmjit,
cublas, cudnn, ...) into the backend's process handle table,
and pip's ``--target`` install of a fresh torch then fails with
``[WinError 5] Access is denied`` when ``shutil.rmtree`` tries
to delete the locked DLLs:

PermissionError: [WinError 5] Access is denied:
'...\\extras\\cp312\\site-packages\\torch\\lib\\asmjit.dll'

The fix is to query torch in a child Python process that exits
as soon as it has printed the JSON — the OS releases the DLL
handles, and the next ``Install GPU runtime`` click can swap
torch in place.

Returns ``None`` if torch isn't installed, has no CUDA build,
no CUDA device is visible, or the subprocess errors. The caller
then falls through to ``nvidia-smi``.
"""
# Skip on macOS — Apple Silicon has no torch.cuda; ``_snapshot_macos``
# owns the unified-memory path.
if self._system == "Darwin":
return None

Returns ``None`` if torch isn't importable, has no CUDA build, or
no CUDA device is currently visible (driver missing, GPU
passthrough disabled, etc.). The caller then falls through to
``nvidia-smi``.
executable = self._resolve_python_executable()
if executable is None:
return None

script = (
"import json, sys\n"
"try:\n"
" import torch\n"
"except Exception:\n"
" sys.exit(0)\n"
"if not getattr(torch, 'cuda', None) or not torch.cuda.is_available():\n"
" sys.exit(0)\n"
"device = torch.cuda.current_device()\n"
"props = torch.cuda.get_device_properties(device)\n"
"total = int(props.total_memory)\n"
"try:\n"
" free, _ = torch.cuda.mem_get_info(device)\n"
" used = max(0, total - int(free))\n"
"except Exception:\n"
" used = 0\n"
"json.dump({'gpu_name': props.name, 'total': total, 'used': used}, sys.stdout)\n"
)

Importing torch is heavy (~200ms first time) but the result is
cached one level up by ``get_device_vram_total_gb``, so the cost
is paid at most once per backend session.
"""
try:
import torch # type: ignore
except Exception:
result = subprocess.run(
[executable, "-c", script],
capture_output=True,
text=True,
timeout=15,
**_SUBPROCESS_KWARGS,
)
except (FileNotFoundError, subprocess.SubprocessError):
return None
if result.returncode != 0:
return None
payload = (result.stdout or "").strip()
if not payload:
return None
try:
if not torch.cuda.is_available():
return None
device = torch.cuda.current_device()
props = torch.cuda.get_device_properties(device)
total_bytes = int(props.total_memory)
try:
free_bytes, _ = torch.cuda.mem_get_info(device)
used_bytes = max(0, total_bytes - int(free_bytes))
except Exception:
used_bytes = 0
return {
"gpu_name": props.name,
"vram_total_gb": round(total_bytes / (1024 ** 3), 2),
"vram_used_gb": round(used_bytes / (1024 ** 3), 2),
"utilization_pct": None,
"temperature_c": None,
"power_w": None,
}
except Exception:
data = json.loads(payload)
total_bytes = int(data["total"])
used_bytes = int(data.get("used") or 0)
gpu_name = str(data.get("gpu_name") or "NVIDIA GPU")
except (ValueError, KeyError, TypeError):
return None
return {
"gpu_name": gpu_name,
"vram_total_gb": round(total_bytes / (1024 ** 3), 2),
"vram_used_gb": round(used_bytes / (1024 ** 3), 2),
"utilization_pct": None,
"temperature_c": None,
"power_w": None,
}

def _resolve_python_executable(self) -> str | None:
"""Pick a Python interpreter for the torch.cuda subprocess probe.

Prefers the embedded sidecar Python (the same one pip writes the
GPU bundle wheels to) so ``import torch`` resolves the freshly
installed wheel. Falls back to the running interpreter if the
embed override isn't set.
"""
candidates: list[str] = []
embed = os.environ.get("CHAOSENGINE_EMBED_PYTHON_BIN")
if embed:
candidates.append(embed)
candidates.append(sys.executable)
for candidate in candidates:
if candidate and os.path.isfile(candidate):
return candidate
return None

def _no_gpu_detected(self) -> dict[str, Any]:
return {
Expand Down
20 changes: 13 additions & 7 deletions src/styles.css
Original file line number Diff line number Diff line change
Expand Up @@ -6265,19 +6265,25 @@ select.text-input {
margin-top: 12px;
border: 1px solid rgba(255, 255, 255, 0.08);
border-radius: 8px;
background: rgba(0, 0, 0, 0.22);
/* Fully opaque card background — the previous rgba(0, 0, 0, 0.22) let
* the Prompt and Recent Outputs panel headers bleed through visually
* during a long GPU install, making the streaming pip output look
* like it was overlapping those cards on Windows. Match the rest of
* the surface tokens so the install log reads as a solid card. */
background: var(--surface);
width: 100%;
max-width: 100%;
overflow: hidden;
/* Establishes a stacking context so the streaming pip output stays
* above the Prompt + Recent Outputs cards in Image Studio and Video
* Studio. Without these the panel renders behind those siblings on
* Windows during a long GPU bundle install — the log is still alive
* but the user can't see it. ``z-index: 5`` is enough to win against
* the surrounding ``.panel`` cards (which set no z-index of their
* own) without fighting the global tooltip portal (z-index: 1000+). */
* above the Prompt + Recent Outputs cards even on Chrome / WebKit
* versions that lay out adjacent grid rows with subpixel overlap. */
position: relative;
z-index: 5;
/* ``contain: layout`` keeps the panel's growth from leaking into
* sibling grid rows — important when the parent runtime callout is
* laid out as a flex column and the install log expands by 350+ px
* during a torch download. */
contain: layout;
}
.install-log-summary {
cursor: pointer;
Expand Down
116 changes: 69 additions & 47 deletions tests/test_gpu_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,46 +2,26 @@

The pre-fix path returned system RAM via ``psutil.virtual_memory().total``
when ``nvidia-smi`` wasn't on PATH — so an RTX 4090 box on Windows showed
12 GB total in the safety estimator instead of 24 GB. The new path tries
``torch.cuda`` first, falls back to ``nvidia-smi``, and only returns a
``vram_total_gb=None`` when neither answers. The frontend treats ``None``
as "unknown" and skips the spurious crash warning.
12 GB total in the safety estimator instead of 24 GB. The new path probes
``torch.cuda`` via a short-lived subprocess (so we don't lock torch DLLs
in the backend process and break the next ``Install GPU runtime``), then
falls back to ``nvidia-smi``, and only returns ``vram_total_gb=None`` when
neither answers. The frontend treats ``None`` as "unknown" and skips the
spurious crash warning.
"""

from __future__ import annotations

import sys
import types
import json
import unittest
from unittest import mock

from backend_service.helpers import gpu as gpu_module


def _fake_torch_with_cuda(total_bytes: int, free_bytes: int, name: str = "NVIDIA GeForce RTX 4090") -> types.ModuleType:
cuda = types.SimpleNamespace()
cuda.is_available = lambda: True
cuda.current_device = lambda: 0

class _Props:
def __init__(self, mem: int, gpu_name: str) -> None:
self.total_memory = mem
self.name = gpu_name

cuda.get_device_properties = lambda device: _Props(total_bytes, name)
cuda.mem_get_info = lambda device: (free_bytes, total_bytes)

fake = types.ModuleType("torch")
fake.cuda = cuda # type: ignore[attr-defined]
return fake


def _fake_torch_no_cuda() -> types.ModuleType:
cuda = types.SimpleNamespace()
cuda.is_available = lambda: False
fake = types.ModuleType("torch")
fake.cuda = cuda # type: ignore[attr-defined]
return fake
def _fake_completed_process(returncode: int, stdout: str, stderr: str = ""):
"""Build a CompletedProcess-shaped mock for ``subprocess.run``."""
return mock.MagicMock(returncode=returncode, stdout=stdout, stderr=stderr)


class SnapshotTorchCudaTests(unittest.TestCase):
Expand All @@ -58,7 +38,14 @@ def tearDown(self) -> None:
def test_torch_cuda_returns_full_vram_for_rtx_4090(self) -> None:
twenty_four_gb = 24 * 1024 ** 3
free = 22 * 1024 ** 3
with mock.patch.dict(sys.modules, {"torch": _fake_torch_with_cuda(twenty_four_gb, free)}):
used = twenty_four_gb - free
payload = json.dumps({
"gpu_name": "NVIDIA GeForce RTX 4090",
"total": twenty_four_gb,
"used": used,
})
with mock.patch.object(self.monitor, "_resolve_python_executable", return_value="/usr/bin/python3"), \
mock.patch("backend_service.helpers.gpu.subprocess.run", return_value=_fake_completed_process(0, payload)):
snapshot = self.monitor._snapshot_torch_cuda()
self.assertIsNotNone(snapshot)
assert snapshot is not None # type narrow
Expand All @@ -68,26 +55,61 @@ def test_torch_cuda_returns_full_vram_for_rtx_4090(self) -> None:
self.assertEqual(snapshot["vram_used_gb"], 2.0)

def test_torch_cuda_unavailable_returns_none(self) -> None:
with mock.patch.dict(sys.modules, {"torch": _fake_torch_no_cuda()}):
# Subprocess exits 0 with empty stdout — the inline script printed
# nothing because torch.cuda.is_available() was False.
with mock.patch.object(self.monitor, "_resolve_python_executable", return_value="/usr/bin/python3"), \
mock.patch("backend_service.helpers.gpu.subprocess.run", return_value=_fake_completed_process(0, "")):
snapshot = self.monitor._snapshot_torch_cuda()
self.assertIsNone(snapshot)

def test_torch_not_installed_returns_none(self) -> None:
# Monkeypatch the import to raise ImportError.
original_import = __builtins__["__import__"] if isinstance(__builtins__, dict) else __builtins__.__import__

def fake_import(name, *args, **kwargs):
if name == "torch":
raise ImportError("No module named 'torch'")
return original_import(name, *args, **kwargs)

with mock.patch("builtins.__import__", side_effect=fake_import):
# Also remove any previously cached torch entry so the
# function's ``import torch`` actually invokes the patched
# ``__import__`` instead of resolving via sys.modules.
with mock.patch.dict(sys.modules, {}, clear=False):
sys.modules.pop("torch", None)
snapshot = self.monitor._snapshot_torch_cuda()
# Subprocess exits 0 with empty stdout — the inline script's
# ``import torch`` raised, the except branch did sys.exit(0).
with mock.patch.object(self.monitor, "_resolve_python_executable", return_value="/usr/bin/python3"), \
mock.patch("backend_service.helpers.gpu.subprocess.run", return_value=_fake_completed_process(0, "")):
snapshot = self.monitor._snapshot_torch_cuda()
self.assertIsNone(snapshot)

def test_subprocess_error_returns_none(self) -> None:
with mock.patch.object(self.monitor, "_resolve_python_executable", return_value="/usr/bin/python3"), \
mock.patch(
"backend_service.helpers.gpu.subprocess.run",
side_effect=FileNotFoundError("python3 missing"),
):
snapshot = self.monitor._snapshot_torch_cuda()
self.assertIsNone(snapshot)

def test_no_python_executable_returns_none(self) -> None:
with mock.patch.object(self.monitor, "_resolve_python_executable", return_value=None):
snapshot = self.monitor._snapshot_torch_cuda()
self.assertIsNone(snapshot)

def test_does_not_import_torch_in_main_process(self) -> None:
"""Critical: importing torch in-process locks Windows DLLs and
breaks the next Install GPU runtime click. The probe MUST go via
a child process so its DLL handles are released on exit."""
twenty_four_gb = 24 * 1024 ** 3
payload = json.dumps({"gpu_name": "RTX 4090", "total": twenty_four_gb, "used": 0})
captured: list[list[str]] = []

def fake_run(cmd, **kwargs):
captured.append(list(cmd))
return _fake_completed_process(0, payload)

with mock.patch.object(self.monitor, "_resolve_python_executable", return_value="/usr/bin/python3"), \
mock.patch("backend_service.helpers.gpu.subprocess.run", side_effect=fake_run):
self.monitor._snapshot_torch_cuda()
self.assertEqual(len(captured), 1)
cmd = captured[0]
# The probe must spawn a Python with a -c script containing
# 'import torch'. If the implementation ever switches back to
# an in-process import this assertion will catch it.
self.assertEqual(cmd[1], "-c")
self.assertIn("import torch", cmd[2])

def test_skipped_on_macos(self) -> None:
self.monitor._system = "Darwin"
snapshot = self.monitor._snapshot_torch_cuda()
self.assertIsNone(snapshot)


Expand Down
Loading