diff --git a/backend/main.py b/backend/main.py index f520ea4..516b298 100644 --- a/backend/main.py +++ b/backend/main.py @@ -723,6 +723,24 @@ def _relative_percent(numerator: float, denominator: float) -> Optional[float]: return round((numerator / denominator) * 100, 1) +_SAFE_DIAGNOSTIC_ERROR_CODES = { + "cpu_load_unavailable", + "gpu_runtime_unavailable", + "memory_snapshot_unavailable", + "queue_runtime_unavailable", +} + + +def _sanitize_diagnostic_error(raw_error: Any, fallback: str) -> Optional[str]: + if raw_error is None: + return None + if isinstance(raw_error, str): + normalized = raw_error.strip().lower() + if normalized in _SAFE_DIAGNOSTIC_ERROR_CODES: + return normalized + return fallback + + def _linux_memory_snapshot() -> Optional[Dict[str, Any]]: meminfo_path = "/proc/meminfo" if not os.path.exists(meminfo_path): @@ -739,7 +757,12 @@ def _linux_memory_snapshot() -> Optional[Dict[str, Any]]: if number.isdigit(): values[key.strip()] = int(number) except Exception as exc: - return {"error": str(exc)} + return { + "error": _sanitize_diagnostic_error( + exc, + "memory_snapshot_unavailable", + ) + } total_kb = values.get("MemTotal", 0) available_kb = values.get("MemAvailable", values.get("MemFree", 0)) @@ -761,7 +784,12 @@ def _windows_memory_snapshot() -> Optional[Dict[str, Any]]: if not kernel32.GlobalMemoryStatusEx(ctypes.byref(status)): return None except Exception as exc: - return {"error": str(exc)} + return { + "error": _sanitize_diagnostic_error( + exc, + "memory_snapshot_unavailable", + ) + } total_bytes = int(status.ullTotalPhys) available_bytes = int(status.ullAvailPhys) @@ -778,12 +806,18 @@ def _memory_snapshot() -> Dict[str, Any]: snapshot = _linux_memory_snapshot() if snapshot is None and os.name == "nt": snapshot = _windows_memory_snapshot() - if not snapshot: - return { + if not snapshot or snapshot.get("error"): + payload: Dict[str, Any] = { "available": False, "state": "warning", "note": "메모리 사용량을 수집하지 못했습니다.", } + if snapshot: + payload["error"] = _sanitize_diagnostic_error( + snapshot.get("error"), + "memory_snapshot_unavailable", + ) + return payload usage_percent = snapshot.get("usage_percent") critical_percent = max( @@ -868,7 +902,7 @@ def _cpu_snapshot() -> Dict[str, Any]: usage_percent: Optional[float] = None note = "CPU 부하가 정상 범위입니다." state = "ok" - error_message = "" + error_code: Optional[str] = None warning_percent = min( SAFE_COMPUTE_USAGE_LIMIT_PERCENT, int(os.getenv("RUNTIME_CPU_WARNING_PERCENT", str(SAFE_MEMORY_OCCUPANCY_LIMIT_PERCENT)) or SAFE_MEMORY_OCCUPANCY_LIMIT_PERCENT), @@ -883,7 +917,7 @@ def _cpu_snapshot() -> Dict[str, Any]: getloadavg = cast(Any, getattr(os, "getloadavg")) load_1m = round(float(getloadavg()[0]), 2) except Exception as exc: - error_message = str(exc) + error_code = _sanitize_diagnostic_error(exc, "cpu_load_unavailable") if load_1m is not None and cpu_count > 0: load_ratio_percent = _relative_percent(load_1m, cpu_count) @@ -911,29 +945,28 @@ def _cpu_snapshot() -> Dict[str, Any]: "load_ratio_percent": load_ratio_percent, "usage_percent": usage_percent, } - if error_message: - payload["error"] = error_message + if error_code: + payload["error"] = error_code return payload def _gpu_snapshot() -> Dict[str, Any]: gpu_runtime = get_gpu_runtime_info() + gpu_runtime_data = gpu_runtime if isinstance(gpu_runtime, dict) else {} + gpu_error = _sanitize_diagnostic_error( + gpu_runtime_data.get("error"), + "gpu_runtime_unavailable", + ) devices = ( - gpu_runtime.get("devices", []) - if isinstance(gpu_runtime, dict) - else [] + gpu_runtime_data.get("devices", []) ) - if not gpu_runtime.get("available"): + if not gpu_runtime_data.get("available"): return { "available": False, "state": "warning", "note": "GPU 런타임이 감지되지 않았습니다. CPU fallback 또는 드라이버 상태를 확인하세요.", "devices": [], - "error": ( - gpu_runtime.get("error") - if isinstance(gpu_runtime, dict) - else None - ), + "error": gpu_error or "gpu_runtime_unavailable", } peak_usage = 0.0 diff --git a/tests/test_health_diagnostics_sanitization.py b/tests/test_health_diagnostics_sanitization.py new file mode 100644 index 0000000..2eaadc0 --- /dev/null +++ b/tests/test_health_diagnostics_sanitization.py @@ -0,0 +1,105 @@ +import ast +import os +from pathlib import Path +from typing import Any, Dict, List, Optional, cast + + +MAIN_PATH = Path(__file__).resolve().parent.parent / "backend" / "main.py" +SAFE_DIAGNOSTIC_ERROR_CODES = { + "cpu_load_unavailable", + "gpu_runtime_unavailable", + "memory_snapshot_unavailable", + "queue_runtime_unavailable", +} + + +def _load_functions(*names: str, extra_globals: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + tree = ast.parse(MAIN_PATH.read_text(encoding="utf-8-sig"), filename=str(MAIN_PATH)) + selected = [ + node + for node in tree.body + if isinstance(node, ast.FunctionDef) and node.name in names + ] + namespace: Dict[str, Any] = { + "Any": Any, + "Dict": Dict, + "List": List, + "Optional": Optional, + "Path": Path, + "cast": cast, + "os": os, + } + if extra_globals: + namespace.update(extra_globals) + exec(compile(ast.Module(body=selected, type_ignores=[]), str(MAIN_PATH), "exec"), namespace) + return namespace + + +def test_sanitize_diagnostic_error_redacts_exception_text(): + namespace = _load_functions( + "_sanitize_diagnostic_error", + extra_globals={ + "_SAFE_DIAGNOSTIC_ERROR_CODES": SAFE_DIAGNOSTIC_ERROR_CODES, + }, + ) + + sanitize = namespace["_sanitize_diagnostic_error"] + + assert sanitize(PermissionError("cannot open /proc/meminfo"), "memory_snapshot_unavailable") == "memory_snapshot_unavailable" + assert sanitize("gpu_runtime_unavailable", "memory_snapshot_unavailable") == "gpu_runtime_unavailable" + assert sanitize(" GPU_Runtime_Unavailable ", "memory_snapshot_unavailable") == "gpu_runtime_unavailable" + assert sanitize(None, "memory_snapshot_unavailable") is None + + +def test_memory_snapshot_error_becomes_warning_payload(): + namespace = _load_functions( + "_sanitize_diagnostic_error", + "_memory_snapshot", + extra_globals={ + "_SAFE_DIAGNOSTIC_ERROR_CODES": SAFE_DIAGNOSTIC_ERROR_CODES, + "_linux_memory_snapshot": lambda: {"error": "permission denied: /proc/meminfo"}, + "_windows_memory_snapshot": lambda: None, + "SAFE_COMPUTE_USAGE_LIMIT_PERCENT": 90, + "SAFE_MEMORY_OCCUPANCY_LIMIT_PERCENT": 75, + }, + ) + + payload = namespace["_memory_snapshot"]() + + assert payload["available"] is False + assert payload["state"] == "warning" + assert payload["error"] == "memory_snapshot_unavailable" + assert "/proc/meminfo" not in payload["error"] + + +def test_cpu_and_gpu_snapshots_expose_only_safe_error_codes(monkeypatch): + namespace = _load_functions( + "_sanitize_diagnostic_error", + "_cpu_snapshot", + "_gpu_snapshot", + extra_globals={ + "_SAFE_DIAGNOSTIC_ERROR_CODES": SAFE_DIAGNOSTIC_ERROR_CODES, + "SAFE_COMPUTE_USAGE_LIMIT_PERCENT": 90, + "SAFE_MEMORY_OCCUPANCY_LIMIT_PERCENT": 75, + "_relative_percent": lambda numerator, denominator: round((numerator / denominator) * 100, 1) if denominator > 0 else None, + "_linux_cpu_usage_percent": lambda: None, + "get_gpu_runtime_info": lambda: { + "available": False, + "error": "driver init failed for /dev/nvidia0", + "devices": [], + }, + }, + ) + + def _raise_loadavg_error(): + raise OSError("cannot read /proc/loadavg") + + monkeypatch.setattr(os, "getloadavg", _raise_loadavg_error) + + cpu_payload = namespace["_cpu_snapshot"]() + gpu_payload = namespace["_gpu_snapshot"]() + + assert cpu_payload["error"] == "cpu_load_unavailable" + assert "/proc/loadavg" not in cpu_payload["error"] + assert gpu_payload["error"] == "gpu_runtime_unavailable" + assert "/dev/nvidia0" not in gpu_payload["error"]