diff --git a/cortex/cli.py b/cortex/cli.py index 9261a816..c74042de 100644 --- a/cortex/cli.py +++ b/cortex/cli.py @@ -1,6 +1,7 @@ import argparse import logging import os +import subprocess import sys import time from datetime import datetime @@ -19,6 +20,18 @@ format_package_list, ) from cortex.env_manager import EnvironmentManager, get_env_manager +from cortex.gpu_manager import ( + apply_gpu_mode_switch, + detect_gpu_switch_backend, + get_app_gpu_preference, + get_per_app_gpu_env, + list_app_gpu_preferences, + plan_gpu_mode_switch, + remove_app_gpu_preference, + run_command_with_env, + set_app_gpu_preference, +) +from cortex.hardware_detection import detect_gpu_mode, estimate_gpu_battery_impact from cortex.installation_history import InstallationHistory, InstallationStatus, InstallationType from cortex.llm.interpreter import CommandInterpreter from cortex.network_config import NetworkConfig @@ -1013,6 +1026,246 @@ def status(self): doctor = SystemDoctor() return doctor.run_checks() + def gpu_battery(self, args: argparse.Namespace | None = None) -> int: + """ + Estimate battery impact based on current GPU usage and mode. + + Prints: + - Heuristic estimates (unless --measured-only) + - Measured battery discharge (if available) + - Measured NVIDIA GPU power draw (when supported by nvidia-smi) + """ + try: + data = estimate_gpu_battery_impact() + except (OSError, PermissionError, ValueError, RuntimeError) as e: + cx_print(f"Failed to probe battery/GPU info: {e}", "error") + return 2 + + measured_only = bool(getattr(args, "measured_only", False)) + + mode = data.get("mode", "Unknown") + estimates = data.get("estimates") or {} + + measured = data.get("measured") or {} + battery = measured.get("battery") or {} + + # NVIDIA power draw (watts) - emitted by estimate_gpu_battery_impact() as "nvidia_power_w" + nvidia_watts = measured.get("nvidia_power_w") + + has_battery_data = bool(battery) + has_nvidia_data = nvidia_watts is not None + + cx_print(f"GPU Mode: {mode}", "info") + print() + + if not measured_only: + cx_print("Estimated power draw:", "info") + print(f"- Integrated GPU only: {estimates.get('integrated', {}).get('power', 'N/A')}") + print(f"- Hybrid (idle dGPU): {estimates.get('hybrid_idle', {}).get('power', 'N/A')}") + print(f"- NVIDIA active: {estimates.get('nvidia_active', {}).get('power', 'N/A')}") + print() + + cx_print("Estimated battery impact:", "info") + print(f"- Hybrid idle: {estimates.get('hybrid_idle', {}).get('impact', 'N/A')}") + print(f"- NVIDIA active: {estimates.get('nvidia_active', {}).get('impact', 'N/A')}") + print() + + if has_battery_data or has_nvidia_data: + cx_print("Measured (if available):", "info") + + if has_battery_data: + status = battery.get("status") + percent = battery.get("percent") + power_watts = battery.get("power_watts") + hours_remaining = battery.get("hours_remaining") + + if status: + print(f"- Battery status: {status}") + if percent is not None: + print(f"- Battery: {percent}%") + if power_watts is not None: + try: + print(f"- Battery draw: ~{float(power_watts):.2f} W") + except (TypeError, ValueError): + print(f"- Battery draw: ~{power_watts} W") + if hours_remaining is not None: + try: + print(f"- Est. time remaining: ~{float(hours_remaining):.2f} h") + except (TypeError, ValueError): + print(f"- Est. time remaining: ~{hours_remaining} h") + + if has_nvidia_data: + try: + print(f"- NVIDIA power draw: ~{float(nvidia_watts):.2f} W") + except (TypeError, ValueError): + print(f"- NVIDIA power draw: ~{nvidia_watts} W") + + print() + else: + if measured_only: + cx_print("No real measurements available on this system.", "warning") + cx_print("Tip: NVIDIA GPU power requires nvidia-smi.", "info") + cx_print( + "Tip: Battery draw requires BAT* metrics (may be unavailable on WSL).", "info" + ) + return 2 + + if not measured_only: + cx_print( + "Note: Estimates are heuristic and vary by hardware and workload.", + "warning", + ) + return 0 + + def gpu(self, args: argparse.Namespace) -> int: + """Handle GPU management commands (status, set, run, app). + + Args: + args: Parsed command-line arguments containing gpu_command subcommand. + + Returns: + int: 0 on success, 1 on error, 2 on missing backend or invalid usage. + """ + + if args.gpu_command == "status": + backend = detect_gpu_switch_backend() + mode = detect_gpu_mode() + apps = list_app_gpu_preferences() + cx_print(f"GPU mode: {mode}") + cx_print(f"Switch backend: {backend.value}") + cx_print(f"Per-app assignments: {len(apps)}") + cx_print( + "Tip: `cortex gpu set integrated|hybrid|nvidia --dry-run` to preview, or add `--execute` to apply." + ) + return 0 + + if args.gpu_command == "set": + # Defensive guard (argparse should prevent this, but keep runtime safety) + if getattr(args, "dry_run", False) and getattr(args, "execute", False): + cx_print("Error: --dry-run and --execute cannot be used together.", "error") + return 2 + + # Plan the switch; invalid modes raise ValueError + try: + plan = plan_gpu_mode_switch(args.mode) + except ValueError as e: + cx_print(f"Invalid target GPU mode: {e}", "error") + return 1 + + if plan is None: + cx_print("No supported GPU switch backend found.", "error") + cx_print( + "Tip: install/configure prime-select or system76-power, then retry.", + "info", + ) + return 2 + + # Always show the plan + cx_print(f"Backend: {plan.backend.value}") + cx_print(f"Target mode: {plan.target_mode}") + cx_print("Commands:") + for c in plan.commands: + cx_print(" " + " ".join(c)) + cx_print(f"Restart required: {plan.requires_restart}") + if getattr(plan, "notes", None): + cx_print(f"Notes: {plan.notes}") + + # Honor --dry-run explicitly: show plan only, do not prompt, do not execute + if getattr(args, "dry_run", False): + return 0 + + # Execute only when --execute is set (otherwise plan-only) + if getattr(args, "execute", False): + if not getattr(args, "yes", False): + console.print("\n⚠ This will run GPU switch commands with sudo.") + console.print("Proceed? [y/N]: ", end="") + try: + resp = input().strip().lower() + except (EOFError, KeyboardInterrupt): + resp = "" + if resp not in ("y", "yes"): + cx_print("Operation cancelled", "info") + return 0 + + return apply_gpu_mode_switch(plan, execute=True) + + # Default: plan-only (no prompt, no execution) + return 0 + + if args.gpu_command == "run": + cmd = list(args.cmd or []) + if cmd and cmd[0] == "--": + cmd = cmd[1:] + if not cmd: + cx_print("Missing command.", "error") + return 2 + + # Resolve env for the requested mode/app + if getattr(args, "nvidia", False): + env = get_per_app_gpu_env(use_nvidia=True) + elif getattr(args, "integrated", False): + env = get_per_app_gpu_env(use_nvidia=False) + elif getattr(args, "app", None): + env = get_per_app_gpu_env(app=args.app) + else: + cx_print("Specify one of: --app, --nvidia, or --integrated", "error") + return 2 + + # If we're launching an integrated run and env is empty, we must also + # explicitly UNSET PRIME vars that may be set in the parent shell. + integrated_case = bool(getattr(args, "integrated", False)) + app_case = bool(getattr(args, "app", None)) + + if (integrated_case or app_case) and not env: + cmd = [ + "env", + "-u", + "__NV_PRIME_RENDER_OFFLOAD", + "-u", + "__GLX_VENDOR_LIBRARY_NAME", + "-u", + "__VK_LAYER_NV_optimus", + "--", + ] + cmd + + return run_command_with_env(cmd, extra_env=env) + + if args.gpu_command == "app": + if args.app_action == "set": + try: + set_app_gpu_preference(args.app, args.mode) + except ValueError as e: + cx_print(f"Invalid per-app GPU assignment: {e}", "error") + return 1 + + cx_print(f"Saved: {args.app} -> {args.mode}") + return 0 + + if args.app_action == "get": + pref = get_app_gpu_preference(args.app) + if pref is None: + cx_print(f"{args.app}: GPU preference not set", "warning") + return 2 + cx_print(f"{args.app}: {pref}") + return 0 + + if args.app_action == "list": + apps = list_app_gpu_preferences() + for k, v in apps.items(): + console.print(f"{k} -> {v}") + return 0 + if args.app_action == "remove": + removed = remove_app_gpu_preference(args.app) + if removed: + cx_print(f"GPU preference removed for app '{args.app}'", "success") + return 0 + + cx_print(f"No GPU preference found for app '{args.app}'", "warning") + return 1 + + cx_print("Unknown gpu command") + return 2 + def wizard(self): """Interactive setup wizard for API key configuration""" show_banner() @@ -2030,6 +2283,8 @@ def show_rich_help(): table.add_row("demo", "See Cortex in action") table.add_row("wizard", "Configure API key") table.add_row("status", "System status") + table.add_row("gpu", "Hybrid GPU manager tools") + table.add_row("gpu-battery", "Estimate battery impact of current GPU usage") table.add_row("install ", "Install software") table.add_row("import ", "Import deps from package files") table.add_row("history", "View history") @@ -2130,6 +2385,64 @@ def main(): # Status command (includes comprehensive health checks) subparsers.add_parser("status", help="Show comprehensive system status and health checks") + # GPU battery estimation + gpu_battery_parser = subparsers.add_parser( + "gpu-battery", + help="Estimate battery impact of current GPU usage", + ) + + gpu_battery_parser.add_argument( + "--measured-only", + action="store_true", + help="Show only real measurements (if available)", + ) + + gpu_parser = subparsers.add_parser("gpu", help="Hybrid GPU manager tools") + gpu_sub = gpu_parser.add_subparsers(dest="gpu_command", required=True) + + gpu_sub.add_parser("status", help="Show GPU status") + + gpu_set = gpu_sub.add_parser("set", help="Switch GPU mode") + gpu_set.add_argument("mode", choices=["integrated", "hybrid", "nvidia"]) + + # Make --dry-run and --execute mutually exclusive + gpu_set_flags = gpu_set.add_mutually_exclusive_group() + gpu_set_flags.add_argument( + "--dry-run", + action="store_true", + help="Show the switch plan only (no sudo, no changes applied)", + ) + gpu_set_flags.add_argument( + "--execute", + action="store_true", + help="Execute GPU switch commands (sudo required)", + ) + + gpu_set.add_argument("-y", "--yes", action="store_true", help="Skip confirmation prompt") + + gpu_app = gpu_sub.add_parser("app", help="Per-app GPU assignment") + gpu_app_sub = gpu_app.add_subparsers(dest="app_action", required=True) + + gpu_app_sub.add_parser("list") + app_set = gpu_app_sub.add_parser("set") + app_set.add_argument("app") + app_set.add_argument("mode", choices=["nvidia", "integrated"]) + + app_get = gpu_app_sub.add_parser("get") + app_get.add_argument("app") + + app_rm = gpu_app_sub.add_parser("remove") + app_rm.add_argument("app") + + gpu_run = gpu_sub.add_parser("run") + + gpu_run_mode = gpu_run.add_mutually_exclusive_group(required=True) + gpu_run_mode.add_argument("--nvidia", action="store_true", help="Force NVIDIA GPU") + gpu_run_mode.add_argument("--integrated", action="store_true", help="Force integrated GPU") + gpu_run_mode.add_argument("--app", help="Use saved per-app GPU preference") + + gpu_run.add_argument("cmd", nargs=argparse.REMAINDER) + # Ask command ask_parser = subparsers.add_parser("ask", help="Ask a question about your system") ask_parser.add_argument("question", type=str, help="Natural language question") @@ -2531,6 +2844,11 @@ def main(): return 1 elif args.command == "env": return cli.env(args) + elif args.command == "gpu-battery": + return cli.gpu_battery(args) + elif args.command == "gpu": + return cli.gpu(args) + else: parser.print_help() return 1 diff --git a/cortex/gpu_manager.py b/cortex/gpu_manager.py new file mode 100644 index 00000000..30baaf54 --- /dev/null +++ b/cortex/gpu_manager.py @@ -0,0 +1,325 @@ +""" +Hybrid GPU Manager for Cortex Linux. + +This module supports: +- Per-app NVIDIA PRIME offload environment variables +- Real GPU mode switching via system backends (when available) + +Note: Real switching typically requires sudo and may require logout/reboot, +depending on the backend and desktop session. +""" + +from __future__ import annotations + +import json +import os +import shutil +import subprocess +import tempfile +from collections.abc import Mapping +from dataclasses import dataclass +from enum import Enum +from pathlib import Path + +PRIME_OFFLOAD_ENV: dict[str, str] = { + "__NV_PRIME_RENDER_OFFLOAD": "1", + "__GLX_VENDOR_LIBRARY_NAME": "nvidia", + "__VK_LAYER_NV_optimus": "NVIDIA_only", +} + + +class GPUSwitchBackend(str, Enum): + PRIME_SELECT = "prime-select" # Ubuntu/Debian (nvidia-prime) + SYSTEM76_POWER = "system76-power" # Pop!_OS / System76 + NONE = "none" + + +@dataclass(frozen=True) +class GPUSwitchPlan: + backend: GPUSwitchBackend + target_mode: str # "integrated" | "hybrid" | "nvidia" + commands: list[list[str]] + requires_restart: bool + notes: str + + +class GPUAppMode(str, Enum): + """Per-app GPU preference.""" + + NVIDIA = "nvidia" # Offload to NVIDIA when possible + INTEGRATED = "integrated" # Force integrated / no PRIME offload env + + +def _cortex_config_dir() -> Path: + """Return Cortex config directory. + + Supports overriding for tests/portable installs via: + - CORTEX_CONFIG_DIR + """ + override = os.environ.get("CORTEX_CONFIG_DIR") + if override: + return Path(override).expanduser() + return Path.home() / ".cortex" + + +def _config_path() -> Path: + return _cortex_config_dir() / "config.yaml" + + +def _load_config() -> dict: + """Load ~/.cortex/config.yaml (best-effort). + + - If PyYAML is present, we use it. + - Otherwise we fall back to JSON (JSON is valid YAML 1.2). + """ + path = _config_path() + if not path.exists(): + return {} + + try: + text = path.read_text(encoding="utf-8") + except OSError: + return {} + + if not text.strip(): + return {} + + # Prefer YAML if available. + try: + import yaml # type: ignore + + data = yaml.safe_load(text) + return data if isinstance(data, dict) else {} + except Exception: + # Fallback to JSON + try: + data = json.loads(text) + return data if isinstance(data, dict) else {} + except Exception: + return {} + + +def _save_config(data: dict) -> None: + """Atomic write for ~/.cortex/config.yaml.""" + cfg_dir = _cortex_config_dir() + cfg_dir.mkdir(parents=True, exist_ok=True) + + # Prefer YAML if available. + serialized: str + try: + import yaml # type: ignore + + serialized = yaml.safe_dump(data, sort_keys=False) + except Exception: + serialized = json.dumps(data, indent=2, sort_keys=False) + + path = _config_path() + with tempfile.NamedTemporaryFile("w", delete=False, dir=str(cfg_dir), encoding="utf-8") as tf: + tf.write(serialized) + tmp = Path(tf.name) + os.replace(tmp, path) + + +def _get_gpu_apps_map(cfg: dict) -> dict[str, str]: + gpu = cfg.get("gpu") if isinstance(cfg.get("gpu"), dict) else {} + apps = gpu.get("apps") if isinstance(gpu.get("apps"), dict) else {} + # Normalize to str->str + out: dict[str, str] = {} + for k, v in apps.items(): + if isinstance(k, str) and isinstance(v, str): + out[k] = v + return out + + +def set_app_gpu_preference(app: str, mode: str) -> None: + """Persist a per-app GPU preference into ~/.cortex/config.yaml.""" + app_name = (app or "").strip() + if not app_name: + raise ValueError("app name is required") + + m = mode.lower().strip() + if m not in {GPUAppMode.NVIDIA.value, GPUAppMode.INTEGRATED.value}: + raise ValueError("mode must be 'nvidia' or 'integrated'") + + cfg = _load_config() + gpu = cfg.get("gpu") if isinstance(cfg.get("gpu"), dict) else {} + apps = gpu.get("apps") if isinstance(gpu.get("apps"), dict) else {} + apps[app_name] = m + gpu["apps"] = apps + cfg["gpu"] = gpu + _save_config(cfg) + + +def remove_app_gpu_preference(app: str) -> bool: + """Remove a per-app GPU preference. Returns True if removed.""" + app_name = (app or "").strip() + if not app_name: + return False + cfg = _load_config() + apps = _get_gpu_apps_map(cfg) + if app_name not in apps: + return False + apps.pop(app_name, None) + gpu = cfg.get("gpu") if isinstance(cfg.get("gpu"), dict) else {} + gpu["apps"] = apps + cfg["gpu"] = gpu + _save_config(cfg) + return True + + +def get_app_gpu_preference(app: str) -> str | None: + """Return stored preference for app, if any.""" + app_name = (app or "").strip() + if not app_name: + return None + cfg = _load_config() + apps = _get_gpu_apps_map(cfg) + val = apps.get(app_name) + if val in {GPUAppMode.NVIDIA.value, GPUAppMode.INTEGRATED.value}: + return val + return None + + +def list_app_gpu_preferences() -> dict[str, str]: + """Return mapping of app -> mode from config.""" + cfg = _load_config() + apps = _get_gpu_apps_map(cfg) + # Filter to known values. + return { + k: v for k, v in apps.items() if v in {GPUAppMode.NVIDIA.value, GPUAppMode.INTEGRATED.value} + } + + +def get_per_app_gpu_env( + *, app: str | None = None, use_nvidia: bool | None = None +) -> dict[str, str]: + """Return environment variables for per-application GPU assignment. + + You can either: + - pass `use_nvidia=True/False` explicitly, OR + - pass `app=` and omit use_nvidia to read ~/.cortex/config.yaml. + + Args: + app: App name (used for lookup only). + use_nvidia: Force NVIDIA offload env vars. + """ + if use_nvidia is None and app: + pref = get_app_gpu_preference(app) + if pref == GPUAppMode.NVIDIA.value: + use_nvidia = True + elif pref == GPUAppMode.INTEGRATED.value: + use_nvidia = False + + if not use_nvidia: + return {} + + # Return a copy so callers can't mutate the module constant. + return dict(PRIME_OFFLOAD_ENV) + + +def detect_gpu_switch_backend() -> GPUSwitchBackend: + """ + Detect which system backend is available for real GPU mode switching. + + Returns: + GPUSwitchBackend: Detected backend or NONE if unsupported. + """ + if shutil.which("prime-select"): + return GPUSwitchBackend.PRIME_SELECT + if shutil.which("system76-power"): + return GPUSwitchBackend.SYSTEM76_POWER + return GPUSwitchBackend.NONE + + +def plan_gpu_mode_switch(target_mode: str) -> GPUSwitchPlan | None: + """ + Build a command plan to switch GPU mode using the available backend. + + Supported target_mode values: + - "integrated" + - "hybrid" + - "nvidia" + + Returns: + GPUSwitchPlan | None: Plan if a backend is available, else None. + """ + target = target_mode.lower().strip() + if target not in {"integrated", "hybrid", "nvidia"}: + raise ValueError(f"Invalid target_mode: {target_mode}") + + backend = detect_gpu_switch_backend() + + if backend == GPUSwitchBackend.PRIME_SELECT: + # prime-select: intel | nvidia | on-demand + mapping = { + "integrated": "intel", + "hybrid": "on-demand", + "nvidia": "nvidia", + } + cmd = ["sudo", "prime-select", mapping[target]] + return GPUSwitchPlan( + backend=backend, + target_mode=target, + commands=[cmd], + requires_restart=True, + notes="Uses nvidia-prime prime-select. Logout/reboot may be required.", + ) + + if backend == GPUSwitchBackend.SYSTEM76_POWER: + # system76-power graphics: integrated | nvidia | hybrid + cmd = ["sudo", "system76-power", "graphics", target] + return GPUSwitchPlan( + backend=backend, + target_mode=target, + commands=[cmd], + requires_restart=True, + notes="Uses system76-power graphics. Restart is typically required.", + ) + + return None + + +def run_command_with_env(cmd: list[str], extra_env: Mapping[str, str] | None = None) -> int: + """ + Run a command with optional extra environment variables. + + Args: + cmd: Command argv list. + extra_env: Extra env vars to merge into current environment. + + Returns: + int: Process return code. + """ + env = os.environ.copy() + if extra_env: + env.update(dict(extra_env)) + + completed = subprocess.run(cmd, env=env) + return int(completed.returncode) + + +def apply_gpu_mode_switch(plan: GPUSwitchPlan, *, execute: bool = False) -> int: + """Apply a switch plan. + + Args: + plan: A plan created by plan_gpu_mode_switch(). + execute: If False, do nothing and return 0 (dry-run). + + Returns: + int: 0 if successful (or dry-run), otherwise the failing command's exit code. + """ + if not execute: + return 0 + for c in plan.commands: + rc = run_command_with_env(c) + if rc != 0: + return rc + return 0 + + +def get_gpu_profile(mode: str) -> dict[str, bool]: + return { + "use_integrated": mode == "Integrated", + "use_nvidia": mode == "NVIDIA", + "hybrid": mode == "Hybrid", + } diff --git a/cortex/hardware_detection.py b/cortex/hardware_detection.py index 7488a724..0fef3851 100644 --- a/cortex/hardware_detection.py +++ b/cortex/hardware_detection.py @@ -7,6 +7,8 @@ Issue: #253 """ +from __future__ import annotations + import builtins import contextlib import json @@ -752,3 +754,302 @@ def get_cpu_cores() -> int: print(f" Virtualization: {info.virtualization}") print("\n✅ Detection complete!") + + +def _run(cmd: list[str]) -> str: + """ + Run a system command and return stdout. + + Notes: + - Uses check=False so we can still parse stdout even when returncode != 0. + (Some driver/tool combinations emit useful output but exit non-zero.) + """ + try: + result = subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + text=True, + check=False, + ) + except OSError: + return "" + + return (result.stdout or "").strip() + + +def get_nvidia_power_draw_watts() -> float | None: + """ + Return real-time NVIDIA GPU power draw in Watts when available. + + Robust against: + - non-zero return codes (still parse stdout) + - extra units/text + - multi-GPU output (sums all GPUs) + - comma decimal separators (e.g., "123,4") + """ + try: + proc = subprocess.run( + ["nvidia-smi", "--query-gpu=power.draw", "--format=csv,noheader,nounits"], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + text=True, + check=False, + ) + except OSError: + return None + + out = (proc.stdout or "").strip() + if not out: + return None + + vals: list[float] = [] + for line in out.splitlines(): + s = line.strip() + if not s or s.upper() == "N/A": + continue + # support both "123.4" and "123,4" + s = s.replace(",", ".") + try: + vals.append(float(s)) + except ValueError: + continue + + return float(sum(vals)) if vals else None + + +def detect_nvidia_gpu() -> bool: + """ + Best-effort NVIDIA presence check. + + Uses `_run(["nvidia-smi"])` to detect whether NVIDIA tooling/driver is available. + This is a lightweight, non-privileged check and does not switch GPU modes. + + Returns: + bool: True if `nvidia-smi` succeeds (NVIDIA GPU/driver detected), + False otherwise. + + Notes: + - Heuristic: `nvidia-smi` may be missing or blocked even on NVIDIA systems. + - On some hybrid laptops, NVIDIA may be detected but still idle. + """ + return bool(_run(["nvidia-smi"])) + + +def detect_gpu_mode() -> str: + """ + Detect GPU mode for NVIDIA Optimus hybrid systems. + + Returns: + "NVIDIA": NVIDIA GPU detected (nvidia-smi available) + "Hybrid": Non-NVIDIA GPU detected via lspci (e.g., AMD dGPU with iGPU) + "Integrated": No discrete GPU detected or lspci unavailable + + Note: This detection is NVIDIA-centric and may not accurately classify + non-NVIDIA discrete GPUs. Returns "Hybrid" for any PCI GPU when NVIDIA + is not present. + """ + if not _run(["lspci"]): + return "Integrated" + + if detect_nvidia_gpu(): + return "NVIDIA" + + return "Hybrid" + + +def estimate_gpu_battery_impact() -> dict[str, Any]: + """ + Estimate battery impact based on detected GPU usage mode. + + This function combines: + - Best-effort GPU mode detection (Integrated / Hybrid / NVIDIA) + - Heuristic power and battery impact estimates + - Optional real measurements (battery + NVIDIA power draw) when available + + The function is safe to call in user space, does not require root access, + and gracefully degrades when system metrics are unavailable. + + Returns: + dict[str, Any]: { + "mode": str, # Integrated | Hybrid | NVIDIA + "current": str, # integrated | hybrid_idle | nvidia_active + "estimates": dict, # heuristic power & impact estimates + "measured": dict # optional real measurements (if available) + } + """ + mode = detect_gpu_mode() + nvidia_active = detect_nvidia_gpu() + + estimates = { + "integrated": { + "power": "~6–8 W", + "impact": "baseline (best battery life)", + }, + "hybrid_idle": { + "power": "~8–10 W", + "impact": "~10–15% less battery life", + }, + "nvidia_active": { + "power": "~18–25 W", + "impact": "~30–40% less battery life", + }, + } + + if mode == "Integrated": + current = "integrated" + elif nvidia_active: + current = "nvidia_active" + else: + current = "hybrid_idle" + + measured: dict[str, Any] = {} + + # ========================= + # REAL MEASUREMENTS (SAFE) + # ========================= + + # Battery metrics (Linux first, then WSL/Windows fallback) + try: + battery = get_battery_metrics() + if battery is None: + battery = get_windows_battery_metrics() + if battery: + measured["battery"] = battery + except Exception: + pass + + # NVIDIA power draw (best-effort) + try: + power_w = get_nvidia_power_draw_watts() + if power_w is not None: + measured["nvidia_power_w"] = power_w + except Exception: + pass + + result = { + "mode": mode, + "current": current, + "estimates": estimates, + } + + if measured: + result["measured"] = measured + + return result + + +def get_battery_metrics() -> dict[str, Any] | None: + """ + Best-effort Linux battery metrics via /sys/class/power_supply/BAT*. + Safe (no root). Returns None if unavailable. + """ + base = "/sys/class/power_supply" + if not os.path.isdir(base): + return None + + bats = sorted([d for d in os.listdir(base) if d.startswith("BAT")]) + if not bats: + return None + + bat_dir = os.path.join(base, bats[0]) + + def _read_str(name: str) -> str | None: + p = os.path.join(bat_dir, name) + try: + with open(p, encoding="utf-8") as f: + return f.read().strip() + except OSError: + return None + + def _read_int(name: str) -> int | None: + s = _read_str(name) + if s is None: + return None + try: + return int(s) + except ValueError: + return None + + status = _read_str("status") + percent = _read_int("capacity") + + # Power draw (if available). Units usually in µW (micro-watts). + power_now = _read_int("power_now") + if power_now is None: + # Some systems expose current_now (µA) + voltage_now (µV) + current_now = _read_int("current_now") + voltage_now = _read_int("voltage_now") + if current_now is not None and voltage_now is not None: + # (µA * µV) = pW => convert to W + power_watts = abs(current_now * voltage_now) / 1e12 + else: + power_watts = None + else: + power_watts = abs(power_now) / 1e6 + + out: dict[str, Any] = {} + if status: + out["status"] = status + if percent is not None: + out["percent"] = percent + if power_watts is not None: + out["power_watts"] = power_watts + + return out or None + + +def get_windows_battery_metrics() -> dict[str, Any] | None: + """ + Best-effort Windows/WSL battery metrics via PowerShell (if available). + Safe (no admin). Returns None if unavailable. + """ + try: + import json + import subprocess + + # On Windows: "powershell" + # On WSL: typically "powershell.exe" exists if Windows interop is enabled + ps = "powershell" if os.name == "nt" else "powershell.exe" + + cmd = [ + ps, + "-NoProfile", + "-Command", + "Get-CimInstance Win32_Battery | " + "Select-Object -First 1 EstimatedChargeRemaining,BatteryStatus | " + "ConvertTo-Json -Compress", + ] + p = subprocess.run(cmd, capture_output=True, text=True, timeout=2) + + if p.returncode != 0 or not p.stdout.strip(): + return None + + data = json.loads(p.stdout) + + percent = data.get("EstimatedChargeRemaining") + status_code = data.get("BatteryStatus") + + # Minimal mapping (optional) + status_map = { + 1: "Discharging", + 2: "AC", + 3: "Fully Charged", + 4: "Low", + 5: "Critical", + 6: "Charging", + } + + out: dict[str, Any] = {} + if percent is not None: + try: + out["percent"] = int(percent) + except (TypeError, ValueError): + pass + if status_code is not None: + out["status"] = status_map.get(status_code, str(status_code)) + + return out or None + + except Exception: + return None diff --git a/docs/HYBRID_GPU_MANAGER.md b/docs/HYBRID_GPU_MANAGER.md new file mode 100644 index 00000000..92aade71 --- /dev/null +++ b/docs/HYBRID_GPU_MANAGER.md @@ -0,0 +1,394 @@ +# Hybrid GPU Manager Module + +**Issue:** Hybrid GPU (Intel/AMD + NVIDIA) switching causes latency and stuttering +**Status:** Ready for Review +**Scope:** Hybrid GPU Manager (state + per-app + switching + battery estimates) + +## Overview + +Hybrid GPU laptops can stutter when the wrong GPU mode is active (e.g., NVIDIA always-on) or when apps launch on an unintended GPU. This module adds a **hybrid GPU manager** to Cortex that: + +- Detects current GPU state (Integrated / Hybrid / NVIDIA) +- Supports per-app GPU assignment (run specific apps on NVIDIA or Integrated) +- Supports easy switching between modes (via supported backends) +- Estimates battery impact (heuristic + optional measurements when available) + +## Features + +| Feature | Description | +| ------------------------ | --------------------------------------------------------------------------------- | +| GPU State Detection | Detects current mode: `Integrated`, `Hybrid`, or `NVIDIA` | +| Switch Backend Detection | Detects supported switch tooling: `prime-select`, `system76-power` | +| Mode Switch Planning | Produces a safe plan (commands + restart requirement) | +| Mode Switch Execution | Executes switch commands (sudo) with opt-in confirmation | +| Per-App GPU Assignment | Stores app → GPU preference in `config.yaml` | +| GPU Run (Env Injection) | Runs a command with GPU env vars applied (`--nvidia`, `--integrated`, `--app`) | +| Battery Impact Estimates | Heuristic power/impact + best-effort measured NVIDIA power draw / battery metrics | + +______________________________________________________________________ + +## Quick Start + +### 1) Show current GPU status + +```bash +cortex gpu status +``` + +Expected output includes: + +- GPU mode (Integrated/Hybrid/NVIDIA) +- Switch backend (prime-select / system76-power / none) +- Per-app assignment count + +### 2) Set per-app GPU preference (persisted) + +```bash +export CORTEX_CONFIG_DIR="$(mktemp -d)" +echo "Using CORTEX_CONFIG_DIR=$CORTEX_CONFIG_DIR" + +cortex gpu app set blender nvidia +cortex gpu app get blender +cortex gpu app list + +echo "--- config.yaml (persist evidence) ---" +cat "$CORTEX_CONFIG_DIR/config.yaml" +``` + +### 3) Run a command with GPU env injected + +```bash +# Force integrated +cortex gpu run --integrated -- bash -lc 'echo "__NV_PRIME_RENDER_OFFLOAD=${__NV_PRIME_RENDER_OFFLOAD:-empty}"; echo "__GLX_VENDOR_LIBRARY_NAME=${__GLX_VENDOR_LIBRARY_NAME:-empty}"' + +# Force nvidia +cortex gpu run --nvidia -- bash -lc 'echo "__NV_PRIME_RENDER_OFFLOAD=${__NV_PRIME_RENDER_OFFLOAD:-empty}"; echo "__GLX_VENDOR_LIBRARY_NAME=${__GLX_VENDOR_LIBRARY_NAME:-empty}"' + +# Per-app assignment -> run --app +cortex gpu app set blender nvidia +cortex gpu run --app blender -- bash -lc 'echo "__NV_PRIME_RENDER_OFFLOAD=${__NV_PRIME_RENDER_OFFLOAD:-empty}"; echo "__GLX_VENDOR_LIBRARY_NAME=${__GLX_VENDOR_LIBRARY_NAME:-empty}"' +``` + +### 4) Battery impact estimates + +```bash +cortex gpu-battery +# or measured-only: +cortex gpu-battery --measured-only +``` + +______________________________________________________________________ + +## CLI Usage + +### GPU status + +```bash +cortex gpu status +``` + +### GPU switching + +```bash +# Dry-run: show what would be executed +cortex gpu set integrated --dry-run +cortex gpu set hybrid --dry-run +cortex gpu set nvidia --dry-run + +# Execute: actually run (sudo) +cortex gpu set hybrid --execute +# Skip confirmation: +cortex gpu set hybrid --execute --yes +``` + +**Note:** switching often requires a reboot or logout/login (reported as `Restart required: True`). + +### Per-app assignment + +```bash +cortex gpu app set nvidia|integrated +cortex gpu app get +cortex gpu app list +cortex gpu app remove +``` + +### Run with GPU preference (env injection) + +```bash +# Explicit override +cortex gpu run --nvidia -- +cortex gpu run --integrated -- + +# Per-app preference +cortex gpu run --app -- +``` + +______________________________________________________________________ + +## Configuration + +Per-app preferences are stored under the user config dir (respects `CORTEX_CONFIG_DIR`): + +```yaml +gpu: + apps: + blender: nvidia + obs: integrated +``` + +To prove persistence in review videos, use: + +```bash +export CORTEX_CONFIG_DIR="$(mktemp -d)" +echo "Using CORTEX_CONFIG_DIR=$CORTEX_CONFIG_DIR" +ls -la "$CORTEX_CONFIG_DIR" || true +cat "$CORTEX_CONFIG_DIR/config.yaml" || true +``` + +______________________________________________________________________ + +## API Reference + +### detect_gpu_mode() + +Detects current GPU mode and returns one of: + +- `"Integrated"` +- `"Hybrid"` +- `"NVIDIA"` + +```python +from cortex.hardware_detection import detect_gpu_mode + +mode = detect_gpu_mode() +print(mode) +``` + +### detect_gpu_switch_backend() + +Detects which backend can perform a mode switch. + +```python +from cortex.gpu_manager import detect_gpu_switch_backend + +backend = detect_gpu_switch_backend() +print(backend.value) # e.g. "prime-select" +``` + +### plan_gpu_mode_switch(mode) + +Creates a plan describing what commands would be executed and whether a restart is required. + +```python +from cortex.gpu_manager import plan_gpu_mode_switch + +plan = plan_gpu_mode_switch("hybrid") +print(plan.backend.value) +print(plan.target_mode) +print(plan.commands) +print(plan.requires_restart) +``` + +### apply_gpu_mode_switch(plan, execute=True) + +Executes the switch plan (runs the commands). + +```python +from cortex.gpu_manager import plan_gpu_mode_switch, apply_gpu_mode_switch + +plan = plan_gpu_mode_switch("hybrid") +apply_gpu_mode_switch(plan, execute=True) +``` + +### Per-app preference helpers + +```python +from cortex.gpu_manager import ( + set_app_gpu_preference, get_app_gpu_preference, + list_app_gpu_preferences, remove_app_gpu_preference, +) + +set_app_gpu_preference("blender", "nvidia") +print(get_app_gpu_preference("blender")) + +print(list_app_gpu_preferences()) +remove_app_gpu_preference("blender") +``` + +### GPU env injection + +```python +from cortex.gpu_manager import get_per_app_gpu_env, run_command_with_env + +env = get_per_app_gpu_env(use_nvidia=True) # or use_nvidia=False +run_command_with_env(["bash", "-lc", "env | grep -E '__NV_|__GLX_'"], extra_env=env) +``` + +### estimate_gpu_battery_impact() + +Returns structured data: + +```python +from cortex.hardware_detection import estimate_gpu_battery_impact + +data = estimate_gpu_battery_impact() +print(data["mode"]) # Integrated | Hybrid | NVIDIA +print(data["current"]) # integrated | hybrid_idle | nvidia_active +print(data["estimates"]) # heuristic table +print(data.get("measured", {})) +``` + +______________________________________________________________________ + +## Switching Backends + +### prime-select (Ubuntu / NVIDIA PRIME) + +Planned commands: + +- Integrated: `sudo prime-select intel` +- Hybrid (on-demand): `sudo prime-select on-demand` +- NVIDIA: `sudo prime-select nvidia` + +### system76-power (System76 / Pop!_OS) + +Planned commands depend on the system76-power CLI modes supported by the distro. + +______________________________________________________________________ + +## Testing + +### Unit tests + +Run these locally: + +```bash +pytest -q tests/test_gpu_manager.py +pytest -q tests/test_hybrid_gpu_manager.py + +# verbose +pytest -vv tests/test_gpu_manager.py -rA +pytest -vv tests/test_hybrid_gpu_manager.py -rA +``` + +What these tests cover: + +- Backend detection (prime-select / system76-power) +- Mode switch planning (commands, restart requirement, invalid mode raises) +- Env merging and per-app config roundtrip +- Hybrid GPU mode detection safety + battery estimation structure + +### CLI / reviewer video scripts + +```bash +set -euo pipefail + +echo "=== VIDEO 1: VERSION + ENV ===" +which cortex || true +cortex --version || true +python3 --version +pytest --version + +echo +echo "=== VIDEO 2: COLLECT GPU TESTS (prove no missing tests) ===" +pytest --collect-only -q | grep -i gpu || true + +echo +echo "=== VIDEO 3: RUN UNIT TESTS (GPU MANAGER) ===" +pytest -vv tests/test_gpu_manager.py -rA + +echo +echo "=== VIDEO 4: RUN UNIT TESTS (HYBRID GPU DETECTION + BATTERY) ===" +pytest -vv tests/test_hybrid_gpu_manager.py -rA + +echo +echo "=== VIDEO 5: GPU STATUS (current state + backend + assignments count) ===" +cortex gpu status + +echo +echo "=== VIDEO 6: BATTERY IMPACT ESTIMATION (heuristic + measured if available) ===" +cortex gpu-battery || true +echo "--- measured only ---" +cortex gpu-battery --measured-only || true + +echo +echo "=== VIDEO 7: PER-APP ASSIGNMENT (persist evidence) ===" +export CORTEX_CONFIG_DIR="$(mktemp -d)" +echo "Using CORTEX_CONFIG_DIR=$CORTEX_CONFIG_DIR" + +cortex gpu app set blender nvidia +cortex gpu app get blender +cortex gpu app list + +echo "--- config.yaml (persist evidence) ---" +ls -la "$CORTEX_CONFIG_DIR" || true +cat "$CORTEX_CONFIG_DIR/config.yaml" || true + +cortex gpu app remove blender +cortex gpu app get blender || true + +echo +echo "=== VIDEO 8: GPU RUN (ENV INJECTION) ===" +export CORTEX_CONFIG_DIR="$(mktemp -d)" +echo "Using CORTEX_CONFIG_DIR=$CORTEX_CONFIG_DIR" + +echo "--- override integrated ---" +cortex gpu run --integrated -- bash -lc 'echo "__NV_PRIME_RENDER_OFFLOAD=${__NV_PRIME_RENDER_OFFLOAD:-empty}"; echo "__GLX_VENDOR_LIBRARY_NAME=${__GLX_VENDOR_LIBRARY_NAME:-empty}"' + +echo "--- override nvidia ---" +cortex gpu run --nvidia -- bash -lc 'echo "__NV_PRIME_RENDER_OFFLOAD=${__NV_PRIME_RENDER_OFFLOAD:-empty}"; echo "__GLX_VENDOR_LIBRARY_NAME=${__GLX_VENDOR_LIBRARY_NAME:-empty}"' + +echo "--- per-app assignment -> run --app ---" +cortex gpu app set blender nvidia +cortex gpu run --app blender -- bash -lc 'echo "__NV_PRIME_RENDER_OFFLOAD=${__NV_PRIME_RENDER_OFFLOAD:-empty}"; echo "__GLX_VENDOR_LIBRARY_NAME=${__GLX_VENDOR_LIBRARY_NAME:-empty}"' + +echo +echo "=== VIDEO 9: SWITCHING (DRY-RUN + EXECUTE PATH WITH FAKE BACKEND) ===" +cortex gpu set integrated --dry-run || true +cortex gpu set hybrid --dry-run || true +cortex gpu set nvidia --dry-run || true + +echo "--- fake backend to prove execute path runs sudo/prime-select ---" +FAKEBIN="$(mktemp -d)" +echo "Using FAKEBIN=$FAKEBIN" + +cat >"$FAKEBIN/sudo" <<'EOF' +#!/usr/bin/env bash +echo "[fake sudo] $@" >&2 +exec "$@" +EOF +chmod +x "$FAKEBIN/sudo" + +cat >"$FAKEBIN/prime-select" <<'EOF' +#!/usr/bin/env bash +echo "[fake prime-select] called with: $@" >&2 +exit 0 +EOF +chmod +x "$FAKEBIN/prime-select" + +export PATH="$FAKEBIN:$PATH" + +cortex gpu status +cortex gpu set hybrid --dry-run +cortex gpu set hybrid --execute --yes + +echo +echo "=== BONUS: NEGATIVE TESTS (validation should reject invalid choices) ===" +cortex gpu set turbo --dry-run || true +cortex gpu app set blender turbo || true + +echo +echo "=== DONE ===" +``` + +______________________________________________________________________ + +## Notes / Limitations + +- Battery *measurements* depend on system support (WSL often can’t read Linux battery metrics). +- Switching modes usually requires a reboot or logout/login. +- On systems without a supported backend, switching returns a “no supported backend” message (status still works). + +______________________________________________________________________ + +**Closes:** Hybrid GPU Manager scope (state + per-app + switching + battery estimates) diff --git a/tests/test_gpu_manager.py b/tests/test_gpu_manager.py new file mode 100644 index 00000000..d6cffe84 --- /dev/null +++ b/tests/test_gpu_manager.py @@ -0,0 +1,115 @@ +import os +from types import SimpleNamespace + +import pytest + + +def test_detect_gpu_switch_backend_prime_select(monkeypatch): + import cortex.gpu_manager as gm + + monkeypatch.setattr( + gm.shutil, + "which", + lambda name: "/usr/bin/prime-select" if name == "prime-select" else None, + ) + assert gm.detect_gpu_switch_backend() == gm.GPUSwitchBackend.PRIME_SELECT + + +def test_detect_gpu_switch_backend_system76_power(monkeypatch): + import cortex.gpu_manager as gm + + def fake_which(name: str): + if name == "prime-select": + return None + if name == "system76-power": + return "/usr/bin/system76-power" + return None + + monkeypatch.setattr(gm.shutil, "which", fake_which) + assert gm.detect_gpu_switch_backend() == gm.GPUSwitchBackend.SYSTEM76_POWER + + +def test_plan_gpu_mode_switch_prime_select(monkeypatch): + import cortex.gpu_manager as gm + + monkeypatch.setattr(gm, "detect_gpu_switch_backend", lambda: gm.GPUSwitchBackend.PRIME_SELECT) + + plan = gm.plan_gpu_mode_switch("hybrid") + assert plan is not None + assert plan.backend == gm.GPUSwitchBackend.PRIME_SELECT + assert plan.target_mode == "hybrid" + assert plan.commands == [["sudo", "prime-select", "on-demand"]] + + +def test_plan_gpu_mode_switch_system76_power(monkeypatch): + import cortex.gpu_manager as gm + + monkeypatch.setattr(gm, "detect_gpu_switch_backend", lambda: gm.GPUSwitchBackend.SYSTEM76_POWER) + + plan = gm.plan_gpu_mode_switch("nvidia") + assert plan is not None + assert plan.backend == gm.GPUSwitchBackend.SYSTEM76_POWER + assert plan.commands == [["sudo", "system76-power", "graphics", "nvidia"]] + + +def test_plan_gpu_mode_switch_invalid_mode_raises(monkeypatch): + import cortex.gpu_manager as gm + + monkeypatch.setattr(gm, "detect_gpu_switch_backend", lambda: gm.GPUSwitchBackend.NONE) + with pytest.raises(ValueError): + gm.plan_gpu_mode_switch("banana") + + +def test_run_command_with_env_merges_env(monkeypatch): + import cortex.gpu_manager as gm + + seen = {} + + def fake_run(cmd, env=None, **kwargs): + seen["cmd"] = cmd + seen["env"] = dict(env or {}) + return SimpleNamespace(returncode=0) + + monkeypatch.setattr(gm.subprocess, "run", fake_run) + + rc = gm.run_command_with_env(["echo", "hi"], extra_env={"A": "1"}) + assert rc == 0 + assert seen["cmd"] == ["echo", "hi"] + assert seen["env"].get("A") == "1" + + +def test_per_app_config_roundtrip(tmp_path, monkeypatch): + import cortex.gpu_manager as gm + + monkeypatch.setenv("CORTEX_CONFIG_DIR", str(tmp_path)) + + gm.set_app_gpu_preference("blender", "nvidia") + gm.set_app_gpu_preference("firefox", "integrated") + + assert gm.get_app_gpu_preference("blender") == "nvidia" + assert gm.get_app_gpu_preference("firefox") == "integrated" + + prefs = gm.list_app_gpu_preferences() + assert prefs["blender"] == "nvidia" + assert prefs["firefox"] == "integrated" + + env_blender = gm.get_per_app_gpu_env(app="blender") + assert env_blender.get("__NV_PRIME_RENDER_OFFLOAD") == "1" + + env_firefox = gm.get_per_app_gpu_env(app="firefox") + assert env_firefox == {} + + assert gm.remove_app_gpu_preference("blender") is True + assert gm.get_app_gpu_preference("blender") is None + + +def test_set_app_gpu_preference_validates_input(tmp_path, monkeypatch): + import cortex.gpu_manager as gm + + monkeypatch.setenv("CORTEX_CONFIG_DIR", str(tmp_path)) + + with pytest.raises(ValueError): + gm.set_app_gpu_preference("", "nvidia") + + with pytest.raises(ValueError): + gm.set_app_gpu_preference("blender", "bad") diff --git a/tests/test_hybrid_gpu_manager.py b/tests/test_hybrid_gpu_manager.py new file mode 100644 index 00000000..6ad27f0d --- /dev/null +++ b/tests/test_hybrid_gpu_manager.py @@ -0,0 +1,82 @@ +from cortex.hardware_detection import ( + detect_gpu_mode, + detect_nvidia_gpu, + estimate_gpu_battery_impact, +) + + +def test_detect_gpu_mode_returns_valid_state() -> None: + mode: str = detect_gpu_mode() + assert isinstance(mode, str) + assert mode in {"Integrated", "Hybrid", "NVIDIA"} + + +def test_estimate_gpu_battery_impact_structure() -> None: + result = estimate_gpu_battery_impact() + + assert isinstance(result, dict) + assert "mode" in result + assert "current" in result + assert "estimates" in result + + estimates = result["estimates"] + assert set(estimates.keys()) == { + "integrated", + "hybrid_idle", + "nvidia_active", + } + + for profile in estimates.values(): + assert "power" in profile + assert "impact" in profile + + +def test_detect_nvidia_gpu_is_safe_and_returns_bool() -> None: + result = detect_nvidia_gpu() + assert isinstance(result, bool) + + +def test_detect_gpu_mode_does_not_crash_and_returns_value() -> None: + mode = detect_gpu_mode() + assert mode is not None + + +def test_detect_gpu_mode_integrated_when_lspci_unavailable(monkeypatch) -> None: + import cortex.hardware_detection as hd + + def fake_run(cmd: list[str]) -> str: + # Simulate missing lspci + if cmd[:1] == ["lspci"]: + return "" + return "" + + monkeypatch.setattr(hd, "_run", fake_run) + assert hd.detect_gpu_mode() == "Integrated" + + +def test_detect_gpu_mode_nvidia_when_nvidia_smi_available(monkeypatch) -> None: + import cortex.hardware_detection as hd + + def fake_run(cmd: list[str]) -> str: + if cmd[:1] == ["lspci"]: + return "PCI GPU present" + if cmd[:1] == ["nvidia-smi"]: + return "GPU 0" + return "" + + monkeypatch.setattr(hd, "_run", fake_run) + assert hd.detect_gpu_mode() == "NVIDIA" + + +def test_detect_gpu_mode_hybrid_when_pci_gpu_but_no_nvidia(monkeypatch) -> None: + import cortex.hardware_detection as hd + + def fake_run(cmd: list[str]) -> str: + if cmd[:1] == ["lspci"]: + return "PCI GPU present" + if cmd[:1] == ["nvidia-smi"]: + return "" # drivers missing or GPU powered down + return "" + + monkeypatch.setattr(hd, "_run", fake_run) + assert hd.detect_gpu_mode() == "Hybrid"