diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 239520b..4357ddc 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -32,6 +32,12 @@ jobs: - name: Checkout repository uses: actions/checkout@v5 + - name: Setup Python + if: matrix.language == 'python' + uses: actions/setup-python@v6 + with: + python-version: '3.13' + - name: Initialize CodeQL uses: github/codeql-action/init@v4 with: diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..88555e6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,58 @@ +# Python +__pycache__/ +*.py[cod] +*.pyo +*.pyd +.Python +*.egg +*.egg-info/ +dist/ +build/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.whl +.installed.cfg + +# Virtual environments +.venv/ +venv/ +env/ +ENV/ + +# Testing +.pytest_cache/ +.coverage +htmlcov/ +.tox/ + +# IDEs +.idea/ +.vscode/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db + +# Logs +*.log + +# Environment variables +.env +.env.* +!.env.example + +# Node +node_modules/ +npm-debug.log* + +# Distribution +*.tar.gz +*.zip diff --git a/backend/auth.py b/backend/auth.py index 3e4a6bf..59e49d8 100644 --- a/backend/auth.py +++ b/backend/auth.py @@ -2,7 +2,7 @@ import os import bcrypt import secrets -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from pathlib import Path from typing import Optional @@ -10,29 +10,76 @@ from fastapi import Depends, HTTPException, status from fastapi.security import OAuth2PasswordBearer +logger = logging.getLogger(__name__) + def _resolve_secret_key() -> tuple[str, bool]: configured = str(os.getenv("SECRET_KEY") or "").strip() if configured: return configured, False - fallback_path = Path( - os.getenv( - "SECRET_KEY_FILE", - str(Path(os.getenv("TEMP") or "/tmp") / "codeai_jwt_secret.key"), - ) - ) - try: - fallback_path.parent.mkdir(parents=True, exist_ok=True) - if fallback_path.exists(): - cached_secret = fallback_path.read_text(encoding="utf-8").strip() - if cached_secret: + configured_file = str(os.getenv("SECRET_KEY_FILE") or "").strip() + if configured_file: + fallback_path = Path(configured_file).expanduser() + try: + if fallback_path.exists(): + if not fallback_path.is_file(): + raise RuntimeError( + f"SECRET_KEY_FILE is configured but is not a file: {fallback_path}" + ) + cached_secret = fallback_path.read_text(encoding="utf-8").strip() + if not cached_secret: + raise RuntimeError( + f"SECRET_KEY_FILE is configured but empty: {fallback_path}" + ) return cached_secret, True - generated_secret = secrets.token_urlsafe(48) - fallback_path.write_text(generated_secret, encoding="utf-8") - return generated_secret, True - except Exception: - return secrets.token_urlsafe(48), True + + fallback_path.parent.mkdir(parents=True, exist_ok=True) + generated_secret = secrets.token_urlsafe(48) + fd = os.open(str(fallback_path), os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o600) + try: + with os.fdopen(fd, "w", encoding="utf-8") as secret_file: + secret_file.write(generated_secret) + secret_file.write("\n") + except Exception: + try: + fallback_path.unlink() + except OSError: + logger.exception( + "Failed to clean up partially written SECRET_KEY_FILE at %s.", + str(fallback_path), + ) + raise + + try: + os.chmod(fallback_path, 0o600) + except OSError: + logger.exception( + "Failed to set permissions on SECRET_KEY_FILE at %s.", + str(fallback_path), + ) + raise RuntimeError( + f"Failed to set secure permissions on SECRET_KEY_FILE: {fallback_path}" + ) + + logger.warning( + "SECRET_KEY_FILE did not exist; generated and stored a new persistent secret at %s with mode 0600.", + str(fallback_path), + ) + return generated_secret, True + except Exception as exc: + logger.exception( + "Failed to initialize SECRET_KEY_FILE at %s. Refusing to start with an ephemeral runtime secret.", + str(fallback_path), + ) + raise RuntimeError( + f"Failed to initialize SECRET_KEY from SECRET_KEY_FILE: {fallback_path}" + ) from exc + else: + logger.error( + "SECRET_KEY/SECRET_KEY_FILE is not configured; generating ephemeral runtime secret that invalidates tokens on restart." + ) + return secrets.token_urlsafe(48), True SECRET_KEY, SECRET_KEY_IS_RUNTIME_FALLBACK = _resolve_secret_key() @@ -42,7 +89,6 @@ def _resolve_secret_key() -> tuple[str, bool]: ) oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/login") -logger = logging.getLogger(__name__) def get_password_hash(password: str) -> str: @@ -68,7 +114,7 @@ def create_access_token( ) -> str: to_encode = data.copy() if not no_expiry: - expire = datetime.utcnow() + ( + expire = datetime.now(timezone.utc) + ( expires_delta or timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES) ) to_encode.update({"exp": expire}) diff --git a/backend/auth_router.py b/backend/auth_router.py index 0fb4767..220da3c 100644 --- a/backend/auth_router.py +++ b/backend/auth_router.py @@ -1,4 +1,4 @@ -from datetime import datetime, timedelta + from secrets import compare_digest, randbelow, token_urlsafe import base64 import os @@ -7,7 +7,7 @@ from fastapi import APIRouter, Depends, HTTPException, Request, status from fastapi.security import OAuth2PasswordRequestForm from typing import Optional, cast -from pydantic import BaseModel, EmailStr + from sqlalchemy.orm import Session from webauthn import ( generate_authentication_options, @@ -82,8 +82,7 @@ class UserResponse(BaseModel): business_registration_number: Optional[str] = None representative_name: Optional[str] = None - class Config: - from_attributes = True + model_config = ConfigDict(from_attributes=True) class Token(BaseModel): @@ -254,7 +253,7 @@ class PasswordRecoveryResetResponse(BaseModel): def _issue_recovery_token(prefix: str) -> tuple[str, datetime]: - expires_at = datetime.utcnow() + timedelta(minutes=10) + return f"{prefix}_{token_urlsafe(24)}", expires_at @@ -273,7 +272,7 @@ def _normalize_password_recovery_scope(scope: str) -> str: def _purge_expired_password_recovery_sessions() -> None: - now = datetime.utcnow() + expired_tokens = [ session_token for session_token, session_state in _password_recovery_store.items() @@ -380,7 +379,7 @@ def start_passkey_registration( "user_id": int(user.id), "challenge": _to_base64url(options.challenge), "device_label": str(payload.device_label or "이 기기 패스키").strip() or "이 기기 패스키", - "expires_at": datetime.utcnow() + timedelta(minutes=5), + "expires_at": datetime.now(timezone.utc) + timedelta(minutes=5), "user_handle": user_handle, "rp_id": rp_id, "expected_origin": expected_origin, @@ -403,7 +402,7 @@ def finish_passkey_registration( raise HTTPException(status_code=404, detail="패스키 등록 세션을 찾을 수 없습니다") expires_at = state.get("expires_at") - if not isinstance(expires_at, datetime) or expires_at <= datetime.utcnow(): + if not isinstance(expires_at, datetime) or expires_at <= datetime.now(timezone.utc): _passkey_registration_store.pop(payload.registration_token, None) raise HTTPException(status_code=410, detail="패스키 등록 세션이 만료되었습니다") @@ -430,7 +429,7 @@ def finish_passkey_registration( user.passkey_public_key = _to_base64url(verification.credential_public_key) user.passkey_device_label = str(state.get("device_label") or "이 기기 패스키") user.passkey_sign_count = int(verification.sign_count) - user.passkey_registered_at = datetime.utcnow() + user.passkey_registered_at = datetime.now(timezone.utc) db.add(user) db.commit() _passkey_registration_store.pop(payload.registration_token, None) @@ -465,7 +464,7 @@ def start_passkey_login( ) _passkey_login_store[str(user.email)] = { "challenge": _to_base64url(options.challenge), - "expires_at": datetime.utcnow() + timedelta(minutes=5), + "expires_at": datetime.now(timezone.utc) + timedelta(minutes=5), "credential_id": str(user.passkey_credential_id), "rp_id": rp_id, "expected_origin": expected_origin, @@ -492,7 +491,7 @@ def finish_passkey_login( raise HTTPException(status_code=404, detail="패스키 로그인 세션을 찾을 수 없습니다") expires_at = state.get("expires_at") - if not isinstance(expires_at, datetime) or expires_at <= datetime.utcnow(): + if not isinstance(expires_at, datetime) or expires_at <= datetime.now(timezone.utc): _passkey_login_store.pop(str(user.email), None) raise HTTPException(status_code=410, detail="패스키 로그인 세션이 만료되었습니다") @@ -581,7 +580,7 @@ def verify_password_recovery_identity(payload: PasswordRecoveryVerifyIdentityReq raise HTTPException(status_code=404, detail="복구 세션을 찾을 수 없습니다") expires_at = session_state.get("expires_at") - if not isinstance(expires_at, datetime) or expires_at <= datetime.utcnow(): + if not isinstance(expires_at, datetime) or expires_at <= datetime.now(timezone.utc): _password_recovery_store.pop(payload.recovery_session_token, None) raise HTTPException(status_code=410, detail="복구 세션이 만료되었습니다") @@ -638,7 +637,7 @@ def reset_password_via_recovery( raise HTTPException(status_code=403, detail="본인확인 검증이 완료되지 않았습니다") reset_expires_at = session_state.get("reset_expires_at") - if not isinstance(reset_expires_at, datetime) or reset_expires_at <= datetime.utcnow(): + if not isinstance(reset_expires_at, datetime) or reset_expires_at <= datetime.now(timezone.utc): _password_recovery_store.pop(session_token, None) raise HTTPException(status_code=410, detail="재설정 토큰이 만료되었습니다") diff --git a/backend/llm/orchestrator.py b/backend/llm/orchestrator.py index 0bf7c48..d60218e 100644 --- a/backend/llm/orchestrator.py +++ b/backend/llm/orchestrator.py @@ -2684,6 +2684,7 @@ class OrchestrationAcceptedResponse(BaseModel): _ORCHESTRATION_PROGRESS_STORE: Dict[str, Dict[str, Any]] = {} +_ORCHESTRATION_PROGRESS_FILE_LOCK = threading.Lock() def _runtime_progress_root() -> Path: @@ -2694,9 +2695,13 @@ def _runtime_progress_root() -> Path: return progress_root -def _orchestration_progress_path(run_id: str) -> Path: - safe_run_id = re.sub(r"[^a-zA-Z0-9_.-]+", "-", str(run_id or "unknown")).strip("-") or "unknown" - return _runtime_progress_root() / f"{safe_run_id}.json" +def _orchestration_progress_file_path(run_id: str) -> Path: + normalized_run_id = str(run_id if run_id is not None else "unknown") + if normalized_run_id == "": + normalized_run_id = "unknown" + runtime_root = _runtime_progress_root().resolve() + file_name = f"{hashlib.sha256(normalized_run_id.encode('utf-8')).hexdigest()}.json" + return runtime_root / file_name def _build_progress_poll_url(run_id: str) -> str: @@ -2712,8 +2717,44 @@ def _save_orchestration_progress(run_id: str, payload: Dict[str, Any]) -> Dict[s normalized["run_id"] = str(run_id or normalized.get("run_id") or "") normalized.setdefault("updated_at", datetime.utcnow().isoformat() + "Z") _ORCHESTRATION_PROGRESS_STORE[normalized["run_id"]] = normalized - progress_path = _orchestration_progress_path(normalized["run_id"]) - progress_path.write_text(json.dumps(normalized, ensure_ascii=False, indent=2), encoding="utf-8") + progress_path = _orchestration_progress_file_path(normalized["run_id"]) + trusted_temp_prefix = ( + f"progress-{hashlib.sha256(normalized['run_id'].encode('utf-8')).hexdigest()}." + if normalized["run_id"] + else "progress-unknown." + ) + with _ORCHESTRATION_PROGRESS_FILE_LOCK: + temp_path: Optional[Path] = None + try: + with tempfile.NamedTemporaryFile( + mode="w", + encoding="utf-8", + dir=str(progress_path.parent), + prefix=trusted_temp_prefix, + suffix=".tmp", + delete=False, + ) as temp_file: + temp_file.write(json.dumps(normalized, ensure_ascii=False, indent=2)) + temp_file.flush() + os.fsync(temp_file.fileno()) + temp_path = Path(temp_file.name) + os.replace(temp_path, progress_path) + except Exception: + if temp_path is not None: + try: + temp_path.unlink(missing_ok=True) + except Exception: + logger.warning( + "Failed to remove temporary orchestration progress file %s", + str(temp_path), + exc_info=True, + ) + logger.warning( + "Failed to write orchestration progress file at %s", + str(progress_path), + exc_info=True, + ) + return dict(_ORCHESTRATION_PROGRESS_STORE.get(normalized["run_id"], {})) return normalized @@ -2721,14 +2762,16 @@ def _load_orchestration_progress(run_id: str) -> Dict[str, Any]: cached = _ORCHESTRATION_PROGRESS_STORE.get(str(run_id or "")) if isinstance(cached, dict) and cached: return dict(cached) - progress_path = _orchestration_progress_path(run_id) + progress_path = _orchestration_progress_file_path(run_id) try: - if progress_path.exists() and progress_path.is_file(): - payload = json.loads(progress_path.read_text(encoding="utf-8")) - if isinstance(payload, dict): - _ORCHESTRATION_PROGRESS_STORE[str(run_id or "")] = dict(payload) - return dict(payload) + with _ORCHESTRATION_PROGRESS_FILE_LOCK: + if progress_path.exists() and progress_path.is_file(): + payload = json.loads(progress_path.read_text(encoding="utf-8")) + if isinstance(payload, dict): + _ORCHESTRATION_PROGRESS_STORE[str(run_id or "")] = dict(payload) + return dict(payload) except Exception: + logger.error("Failed to load orchestration progress for run_id=%s", str(run_id or ""), exc_info=True) return {} return {} diff --git a/backend/main.py b/backend/main.py index 65c89cc..516b298 100644 --- a/backend/main.py +++ b/backend/main.py @@ -723,6 +723,24 @@ def _relative_percent(numerator: float, denominator: float) -> Optional[float]: return round((numerator / denominator) * 100, 1) +_SAFE_DIAGNOSTIC_ERROR_CODES = { + "cpu_load_unavailable", + "gpu_runtime_unavailable", + "memory_snapshot_unavailable", + "queue_runtime_unavailable", +} + + +def _sanitize_diagnostic_error(raw_error: Any, fallback: str) -> Optional[str]: + if raw_error is None: + return None + if isinstance(raw_error, str): + normalized = raw_error.strip().lower() + if normalized in _SAFE_DIAGNOSTIC_ERROR_CODES: + return normalized + return fallback + + def _linux_memory_snapshot() -> Optional[Dict[str, Any]]: meminfo_path = "/proc/meminfo" if not os.path.exists(meminfo_path): @@ -739,7 +757,12 @@ def _linux_memory_snapshot() -> Optional[Dict[str, Any]]: if number.isdigit(): values[key.strip()] = int(number) except Exception as exc: - return {"error": str(exc)} + return { + "error": _sanitize_diagnostic_error( + exc, + "memory_snapshot_unavailable", + ) + } total_kb = values.get("MemTotal", 0) available_kb = values.get("MemAvailable", values.get("MemFree", 0)) @@ -761,7 +784,12 @@ def _windows_memory_snapshot() -> Optional[Dict[str, Any]]: if not kernel32.GlobalMemoryStatusEx(ctypes.byref(status)): return None except Exception as exc: - return {"error": str(exc)} + return { + "error": _sanitize_diagnostic_error( + exc, + "memory_snapshot_unavailable", + ) + } total_bytes = int(status.ullTotalPhys) available_bytes = int(status.ullAvailPhys) @@ -778,12 +806,18 @@ def _memory_snapshot() -> Dict[str, Any]: snapshot = _linux_memory_snapshot() if snapshot is None and os.name == "nt": snapshot = _windows_memory_snapshot() - if not snapshot: - return { + if not snapshot or snapshot.get("error"): + payload: Dict[str, Any] = { "available": False, "state": "warning", "note": "메모리 사용량을 수집하지 못했습니다.", } + if snapshot: + payload["error"] = _sanitize_diagnostic_error( + snapshot.get("error"), + "memory_snapshot_unavailable", + ) + return payload usage_percent = snapshot.get("usage_percent") critical_percent = max( @@ -868,7 +902,7 @@ def _cpu_snapshot() -> Dict[str, Any]: usage_percent: Optional[float] = None note = "CPU 부하가 정상 범위입니다." state = "ok" - error_message = "" + error_code: Optional[str] = None warning_percent = min( SAFE_COMPUTE_USAGE_LIMIT_PERCENT, int(os.getenv("RUNTIME_CPU_WARNING_PERCENT", str(SAFE_MEMORY_OCCUPANCY_LIMIT_PERCENT)) or SAFE_MEMORY_OCCUPANCY_LIMIT_PERCENT), @@ -883,7 +917,7 @@ def _cpu_snapshot() -> Dict[str, Any]: getloadavg = cast(Any, getattr(os, "getloadavg")) load_1m = round(float(getloadavg()[0]), 2) except Exception as exc: - error_message = str(exc) + error_code = _sanitize_diagnostic_error(exc, "cpu_load_unavailable") if load_1m is not None and cpu_count > 0: load_ratio_percent = _relative_percent(load_1m, cpu_count) @@ -911,29 +945,28 @@ def _cpu_snapshot() -> Dict[str, Any]: "load_ratio_percent": load_ratio_percent, "usage_percent": usage_percent, } - if error_message: - payload["error"] = error_message + if error_code: + payload["error"] = error_code return payload def _gpu_snapshot() -> Dict[str, Any]: gpu_runtime = get_gpu_runtime_info() + gpu_runtime_data = gpu_runtime if isinstance(gpu_runtime, dict) else {} + gpu_error = _sanitize_diagnostic_error( + gpu_runtime_data.get("error"), + "gpu_runtime_unavailable", + ) devices = ( - gpu_runtime.get("devices", []) - if isinstance(gpu_runtime, dict) - else [] + gpu_runtime_data.get("devices", []) ) - if not gpu_runtime.get("available"): + if not gpu_runtime_data.get("available"): return { "available": False, "state": "warning", "note": "GPU 런타임이 감지되지 않았습니다. CPU fallback 또는 드라이버 상태를 확인하세요.", "devices": [], - "error": ( - gpu_runtime.get("error") - if isinstance(gpu_runtime, dict) - else None - ), + "error": gpu_error or "gpu_runtime_unavailable", } peak_usage = 0.0 @@ -1031,6 +1064,8 @@ def _runtime_health_payload() -> Dict[str, Any]: from backend.marketplace.router import get_ad_queue_runtime_status queue_runtime = get_ad_queue_runtime_status() except Exception as exc: + logger.exception("Failed to load ad queue runtime status") + safe_queue_error = "queue_runtime_unavailable" queue_runtime = { "redis_queue": { "available": False, @@ -1038,7 +1073,7 @@ def _runtime_health_payload() -> Dict[str, Any]: "note": "Redis queue 진단을 로드하지 못했습니다.", "connection_id": "redis:video_render_queue", "queue_name": "video_render_queue", - "error": str(exc), + "error": safe_queue_error, }, "ad_worker": { "available": False, @@ -1047,7 +1082,7 @@ def _runtime_health_payload() -> Dict[str, Any]: "connection_id": "redis:video_render_queue", "queue_name": "video_render_queue", "worker_id": "ad-render-worker-001", - "error": str(exc), + "error": safe_queue_error, }, } redis_queue = queue_runtime.get("redis_queue", {}) diff --git a/backend/marketplace/router.py b/backend/marketplace/router.py index fc322c7..55cda54 100644 --- a/backend/marketplace/router.py +++ b/backend/marketplace/router.py @@ -595,15 +595,20 @@ def _persist_progress(*, percent: int, step: str, state: str, message: str) -> N ), ) except Exception as exc: + logger.exception( + "Marketplace feature orchestrate stream failed run_id=%s", + request.run_id, + ) + public_error_message = "라이브뷰 실행 중 오류가 발생했습니다. 잠시 후 다시 시도해주세요." local_metadata["popup_state"] = "failed" local_metadata["last_event"] = "failed" - local_metadata["error"] = str(exc) + local_metadata["error"] = public_error_message local_metadata["updated_at"] = _utc_now_iso() - _persist_progress(percent=100, step="failed", state="failed", message=str(exc)) + _persist_progress(percent=100, step="failed", state="failed", message=public_error_message) local_stage_run = _set_feature_metadata(local_stage_run, local_metadata) - local_stage_run = _apply_feature_popup_state(local_stage_run, "failed", str(exc)) + local_stage_run = _apply_feature_popup_state(local_stage_run, "failed", public_error_message) save_stage_run(local_stage_run) - yield _build_feature_sse_event("failed", {"run_id": request.run_id, "state": "failed", "message": str(exc)}) + yield _build_feature_sse_event("failed", {"run_id": request.run_id, "state": "failed", "message": public_error_message}) yield _build_feature_sse_event( "progress", _build_feature_progress_payload( @@ -611,7 +616,7 @@ def _persist_progress(*, percent: int, step: str, state: str, message: str) -> N percent=100, step="failed", state="failed", - message=str(exc), + message=public_error_message, ), ) @@ -1054,9 +1059,10 @@ def get_ad_queue_runtime_status() -> Dict[str, Dict[str, Any]]: if redis_client is not None: try: queue_depth = int(redis_client.llen(VIDEO_RENDER_QUEUE_NAME)) - except RedisError as exc: + except RedisError: + logger.exception("Failed to read Redis queue depth for health diagnostics") redis_available = False - redis_error = str(exc) + redis_error = "redis_queue_unavailable" with _ad_worker_lock: started_at = _ad_worker_runtime.get("started_at") diff --git a/frontend/frontend/hooks/use-feature-orchestrator.ts b/frontend/frontend/hooks/use-feature-orchestrator.ts index ad39af5..7cb8b72 100644 --- a/frontend/frontend/hooks/use-feature-orchestrator.ts +++ b/frontend/frontend/hooks/use-feature-orchestrator.ts @@ -358,7 +358,7 @@ function buildDefaultCatalogItem(featureId: string): FeatureCatalogItem { const meta = FEATURE_EXPERIENCE_META[featureId] || FEATURE_EXPERIENCE_META['ai-sheet']; return { feature_id: featureId, - title: meta.popupKicker.replace('AI ', 'AI '), + title: meta.popupKicker, summary: meta.launcherSummary, popup_mode: preset.contextTags[1] || meta.outputKind, status: 'enabled', @@ -958,4 +958,4 @@ export function useFeatureOrchestrator() { progressSnapshot, progressHistory, }; -} \ No newline at end of file +} diff --git a/gpu-llm-server/custom-server/server.py b/gpu-llm-server/custom-server/server.py index 606bccf..1cb3cf0 100644 --- a/gpu-llm-server/custom-server/server.py +++ b/gpu-llm-server/custom-server/server.py @@ -176,7 +176,7 @@ def load_model(): logger.error(f"Failed to load model: {e}") model = None tokenizer = None - model_load_error = str(e) + model_load_error = "model_load_failed" logger.warning("Server will stay up without a loaded model.") diff --git a/pyproject.toml b/pyproject.toml index 8ceecb9..1bd5e2d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ dependencies = [ "httpx>=0.28,<1.0", "requests>=2.32,<3.0", "redis>=5.2,<6.0", - "Pillow>=10.4,<12.0", + "Pillow>=12.2,<13.0", "torch>=2.6,<3.0", "qdrant-client>=1.12,<2.0", "grpcio>=1.71,<2.0", diff --git a/run_profiler_backend.py b/run_profiler_backend.py index efceebd..55c66ab 100644 --- a/run_profiler_backend.py +++ b/run_profiler_backend.py @@ -1,5 +1,6 @@ from __future__ import annotations +import ipaddress import logging import os import socket @@ -23,9 +24,44 @@ logger = logging.getLogger(__name__) +def _is_container_runtime() -> bool: + return ( + Path("/.dockerenv").exists() + or Path("/run/.containerenv").exists() + or bool(os.getenv("KUBERNETES_SERVICE_HOST")) + ) + + def _default_profiler_host() -> str: - if os.path.exists("/.dockerenv") or os.getenv("DOTNET_RUNNING_IN_CONTAINER") == "true": - return "0.0.0.0" + return "127.0.0.1" + + +def _resolve_profiler_host() -> str: + requested_host = (os.getenv("BACKEND_PROFILER_HOST") or _default_profiler_host()).strip() + allow_remote = (os.getenv("BACKEND_PROFILER_ALLOW_REMOTE", "") or "").strip().lower() in {"1", "true", "yes", "on"} + if requested_host == "localhost": + try: + infos = socket.getaddrinfo("localhost", None) + if infos and all(ipaddress.ip_address(info[4][0]).is_loopback for info in infos): + return requested_host + except Exception: + logger.warning("[WARN] failed to resolve localhost loopback addresses", exc_info=True) + logger.warning("[WARN] localhost does not resolve to loopback only; fallback to 127.0.0.1") + return "127.0.0.1" + if requested_host in {"127.0.0.1", "::1"}: + return requested_host + try: + requested_ip = ipaddress.ip_address(requested_host) + except (TypeError, ValueError): + logger.warning("[WARN] hostname profiler host=%s is not allowed; fallback to 127.0.0.1", requested_host) + return "127.0.0.1" + if requested_ip.is_loopback: + return requested_host + if allow_remote: + if requested_ip.is_unspecified: + logger.warning("[WARN] profiler backend is binding to all interfaces (host=%s)", requested_host) + return requested_host + logger.warning("[WARN] remote profiler host=%s blocked; set BACKEND_PROFILER_ALLOW_REMOTE=true to allow", requested_host) return "127.0.0.1" @@ -60,7 +96,7 @@ def _resolve_bind_port(host: str, requested_port: int, max_attempts: int = 20) - def main() -> None: import uvicorn - host = os.getenv("BACKEND_PROFILER_HOST", _default_profiler_host()) + host = _resolve_profiler_host() port = _resolve_bind_port(host, int(os.getenv("BACKEND_PROFILER_PORT", "8000"))) logger.info("[OK] profiler backend bind target: http://%s:%s", host, port) uvicorn.run(app, host=host, port=port, reload=False) diff --git a/tests/test_auth_router_security.py b/tests/test_auth_router_security.py index 5b861c5..284c533 100644 --- a/tests/test_auth_router_security.py +++ b/tests/test_auth_router_security.py @@ -1,7 +1,7 @@ import importlib import sys import types -from datetime import datetime, timedelta + from types import SimpleNamespace import pytest @@ -50,6 +50,7 @@ class _StubUser: fake_models.User = _StubUser + monkeypatch.setitem(sys.modules, "backend.database", fake_database) monkeypatch.setitem(sys.modules, "backend.models", fake_models) return importlib.import_module("backend.auth_router") @@ -96,7 +97,7 @@ def test_password_recovery_verify_identity_limits_failed_attempts(monkeypatch): "verified": False, "verification_code": "654321", "verification_attempts": 0, - "expires_at": datetime.utcnow() + timedelta(minutes=5), + } payload = auth_router.PasswordRecoveryVerifyIdentityRequest( recovery_session_token=recovery_session_token, @@ -123,8 +124,7 @@ def test_reset_password_requires_verified_identity(monkeypatch): "scope": "admin", "verified": False, "reset_token": "reset_token", - "reset_expires_at": datetime.utcnow() + timedelta(minutes=5), - "expires_at": datetime.utcnow() + timedelta(minutes=5), + } with pytest.raises(HTTPException) as exc_info: @@ -151,8 +151,7 @@ def test_reset_password_updates_hash_and_clears_session(monkeypatch): "verified": True, "identity_session_token": "identity-proof", "reset_token": "reset_token", - "reset_expires_at": datetime.utcnow() + timedelta(minutes=5), - "expires_at": datetime.utcnow() + timedelta(minutes=5), + } response = auth_router.reset_password_via_recovery( diff --git a/tests/test_health_diagnostics_sanitization.py b/tests/test_health_diagnostics_sanitization.py new file mode 100644 index 0000000..2eaadc0 --- /dev/null +++ b/tests/test_health_diagnostics_sanitization.py @@ -0,0 +1,105 @@ +import ast +import os +from pathlib import Path +from typing import Any, Dict, List, Optional, cast + + +MAIN_PATH = Path(__file__).resolve().parent.parent / "backend" / "main.py" +SAFE_DIAGNOSTIC_ERROR_CODES = { + "cpu_load_unavailable", + "gpu_runtime_unavailable", + "memory_snapshot_unavailable", + "queue_runtime_unavailable", +} + + +def _load_functions(*names: str, extra_globals: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + tree = ast.parse(MAIN_PATH.read_text(encoding="utf-8-sig"), filename=str(MAIN_PATH)) + selected = [ + node + for node in tree.body + if isinstance(node, ast.FunctionDef) and node.name in names + ] + namespace: Dict[str, Any] = { + "Any": Any, + "Dict": Dict, + "List": List, + "Optional": Optional, + "Path": Path, + "cast": cast, + "os": os, + } + if extra_globals: + namespace.update(extra_globals) + exec(compile(ast.Module(body=selected, type_ignores=[]), str(MAIN_PATH), "exec"), namespace) + return namespace + + +def test_sanitize_diagnostic_error_redacts_exception_text(): + namespace = _load_functions( + "_sanitize_diagnostic_error", + extra_globals={ + "_SAFE_DIAGNOSTIC_ERROR_CODES": SAFE_DIAGNOSTIC_ERROR_CODES, + }, + ) + + sanitize = namespace["_sanitize_diagnostic_error"] + + assert sanitize(PermissionError("cannot open /proc/meminfo"), "memory_snapshot_unavailable") == "memory_snapshot_unavailable" + assert sanitize("gpu_runtime_unavailable", "memory_snapshot_unavailable") == "gpu_runtime_unavailable" + assert sanitize(" GPU_Runtime_Unavailable ", "memory_snapshot_unavailable") == "gpu_runtime_unavailable" + assert sanitize(None, "memory_snapshot_unavailable") is None + + +def test_memory_snapshot_error_becomes_warning_payload(): + namespace = _load_functions( + "_sanitize_diagnostic_error", + "_memory_snapshot", + extra_globals={ + "_SAFE_DIAGNOSTIC_ERROR_CODES": SAFE_DIAGNOSTIC_ERROR_CODES, + "_linux_memory_snapshot": lambda: {"error": "permission denied: /proc/meminfo"}, + "_windows_memory_snapshot": lambda: None, + "SAFE_COMPUTE_USAGE_LIMIT_PERCENT": 90, + "SAFE_MEMORY_OCCUPANCY_LIMIT_PERCENT": 75, + }, + ) + + payload = namespace["_memory_snapshot"]() + + assert payload["available"] is False + assert payload["state"] == "warning" + assert payload["error"] == "memory_snapshot_unavailable" + assert "/proc/meminfo" not in payload["error"] + + +def test_cpu_and_gpu_snapshots_expose_only_safe_error_codes(monkeypatch): + namespace = _load_functions( + "_sanitize_diagnostic_error", + "_cpu_snapshot", + "_gpu_snapshot", + extra_globals={ + "_SAFE_DIAGNOSTIC_ERROR_CODES": SAFE_DIAGNOSTIC_ERROR_CODES, + "SAFE_COMPUTE_USAGE_LIMIT_PERCENT": 90, + "SAFE_MEMORY_OCCUPANCY_LIMIT_PERCENT": 75, + "_relative_percent": lambda numerator, denominator: round((numerator / denominator) * 100, 1) if denominator > 0 else None, + "_linux_cpu_usage_percent": lambda: None, + "get_gpu_runtime_info": lambda: { + "available": False, + "error": "driver init failed for /dev/nvidia0", + "devices": [], + }, + }, + ) + + def _raise_loadavg_error(): + raise OSError("cannot read /proc/loadavg") + + monkeypatch.setattr(os, "getloadavg", _raise_loadavg_error) + + cpu_payload = namespace["_cpu_snapshot"]() + gpu_payload = namespace["_gpu_snapshot"]() + + assert cpu_payload["error"] == "cpu_load_unavailable" + assert "/proc/loadavg" not in cpu_payload["error"] + assert gpu_payload["error"] == "gpu_runtime_unavailable" + assert "/dev/nvidia0" not in gpu_payload["error"]