diff --git a/cortex/kernel_features/__init__.py b/cortex/kernel_features/__init__.py new file mode 100644 index 00000000..7236e373 --- /dev/null +++ b/cortex/kernel_features/__init__.py @@ -0,0 +1,19 @@ +""" +Cortex Kernel Features + +User-space implementations of kernel-level AI concepts: +- Model Lifecycle Manager (systemd-based LLM services) +- KV-Cache Manager (shared memory cache pools) +- Accelerator Limits (cgroups v2 wrapper) +- LLM Device (/dev/llm FUSE interface) +""" + +from .model_lifecycle import ModelLifecycleManager, ModelConfig +from .kv_cache_manager import KVCacheManager, CacheConfig +from .accelerator_limits import AcceleratorLimitsManager, ResourceLimits + +__all__ = [ + 'ModelLifecycleManager', 'ModelConfig', + 'KVCacheManager', 'CacheConfig', + 'AcceleratorLimitsManager', 'ResourceLimits', +] diff --git a/cortex/kernel_features/accelerator_limits.py b/cortex/kernel_features/accelerator_limits.py new file mode 100644 index 00000000..81a5eb3d --- /dev/null +++ b/cortex/kernel_features/accelerator_limits.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +""" +Cortex Accelerator-Aware Resource Limits + +cgroups v2 wrapper for AI workloads. +""" + +import os +import json +import sqlite3 +import subprocess +from pathlib import Path +from dataclasses import dataclass, asdict +from typing import Optional, List, Dict +from enum import Enum + +CORTEX_DB = Path.home() / ".cortex/limits.db" +CGROUP_ROOT = Path("/sys/fs/cgroup") + +class WorkloadPreset(Enum): + INFERENCE = "inference" + TRAINING = "training" + BATCH = "batch" + INTERACTIVE = "interactive" + +PRESETS = { + "inference": {"cpu": 400, "memory_gb": 32, "oom_adj": -500, "gpu_pct": 100}, + "training": {"cpu": 1600, "memory_gb": 128, "oom_adj": -800, "gpu_pct": 100}, + "batch": {"cpu": 800, "memory_gb": 64, "oom_adj": 0, "gpu_pct": 80}, + "interactive": {"cpu": 200, "memory_gb": 16, "oom_adj": -200, "gpu_pct": 50}, +} + +@dataclass +class ResourceLimits: + name: str + preset: str = "inference" + cpu_quota: float = 400.0 + memory_max: int = 32 * 1024**3 + gpu_ids: List[int] = None + oom_score_adj: int = 0 + + def __post_init__(self): + self.gpu_ids = self.gpu_ids or [] + + @classmethod + def from_preset(cls, name: str, preset: str, gpus: int = 0): + p = PRESETS.get(preset, PRESETS["inference"]) + return cls(name, preset, p["cpu"], int(p["memory_gb"] * 1e9), + list(range(gpus)), p["oom_adj"]) + + +class LimitsDatabase: + def __init__(self): + CORTEX_DB.parent.mkdir(parents=True, exist_ok=True) + with sqlite3.connect(CORTEX_DB) as conn: + conn.execute("CREATE TABLE IF NOT EXISTS profiles (name TEXT PRIMARY KEY, config TEXT)") + + def save(self, limits: ResourceLimits): + with sqlite3.connect(CORTEX_DB) as conn: + conn.execute("INSERT OR REPLACE INTO profiles VALUES (?,?)", + (limits.name, json.dumps(asdict(limits)))) + + def get(self, name: str) -> Optional[ResourceLimits]: + with sqlite3.connect(CORTEX_DB) as conn: + row = conn.execute("SELECT config FROM profiles WHERE name=?", (name,)).fetchone() + return ResourceLimits(**json.loads(row[0])) if row else None + + def list_all(self): + with sqlite3.connect(CORTEX_DB) as conn: + return [ResourceLimits(**json.loads(r[0])) for r in conn.execute("SELECT config FROM profiles")] + + +class AcceleratorLimitsManager: + def __init__(self): + self.db = LimitsDatabase() + + def create(self, limits: ResourceLimits) -> bool: + self.db.save(limits) + print(f"✅ Created profile '{limits.name}' (preset: {limits.preset})") + return True + + def get_env(self, name: str) -> Dict[str, str]: + limits = self.db.get(name) + if not limits: + return {} + return {"CUDA_VISIBLE_DEVICES": ",".join(map(str, limits.gpu_ids))} + + def status(self): + profiles = self.db.list_all() + print(f"\n{'NAME':<20} {'PRESET':<12} {'CPU':<8} {'MEMORY':<10} {'GPUS':<10}") + print("-" * 65) + for p in profiles: + gpus = ",".join(map(str, p.gpu_ids)) or "-" + print(f"{p.name:<20} {p.preset:<12} {p.cpu_quota/100:.0f}{'':<5} {p.memory_max/1e9:.0f}G{'':<5} {gpus:<10}") + + +def main(): + import argparse + parser = argparse.ArgumentParser(description="Cortex Accelerator Limits") + sub = parser.add_subparsers(dest="cmd") + + c = sub.add_parser("create") + c.add_argument("name") + c.add_argument("--preset", default="inference") + c.add_argument("--gpus", type=int, default=0) + + sub.add_parser("env").add_argument("name") + sub.add_parser("status") + sub.add_parser("list") + + args = parser.parse_args() + mgr = AcceleratorLimitsManager() + + if args.cmd == "create": + mgr.create(ResourceLimits.from_preset(args.name, args.preset, args.gpus)) + elif args.cmd == "env": + for k, v in mgr.get_env(args.name).items(): + print(f"export {k}={v}") + elif args.cmd in ("status", "list"): + mgr.status() + + +if __name__ == "__main__": + main() diff --git a/cortex/kernel_features/kv_cache_manager.py b/cortex/kernel_features/kv_cache_manager.py new file mode 100644 index 00000000..de383280 --- /dev/null +++ b/cortex/kernel_features/kv_cache_manager.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 +""" +Cortex KV-Cache Manager + +User-space KV-cache management for LLM inference optimization. +""" + +import os +import json +import sqlite3 +import time +import hashlib +from pathlib import Path +from dataclasses import dataclass, asdict +from typing import Optional, List, Dict +from multiprocessing import shared_memory +from enum import Enum + +CORTEX_DB = Path.home() / ".cortex/kv_cache.db" +SHM_PREFIX = "cortex_kv_" + +class CachePolicy(Enum): + LRU = "lru" + LFU = "lfu" + FIFO = "fifo" + +@dataclass +class CacheConfig: + name: str + size_bytes: int + policy: str = "lru" + max_sequences: int = 1000 + +@dataclass +class CacheEntry: + sequence_id: int + created_at: float + last_accessed: float + access_count: int + token_count: int + size_bytes: int + offset: int + + +class CacheDatabase: + def __init__(self): + CORTEX_DB.parent.mkdir(parents=True, exist_ok=True) + with sqlite3.connect(CORTEX_DB) as conn: + conn.executescript(""" + CREATE TABLE IF NOT EXISTS pools (name TEXT PRIMARY KEY, config TEXT, shm_name TEXT); + CREATE TABLE IF NOT EXISTS entries (seq_id INTEGER, pool TEXT, created REAL, accessed REAL, + count INTEGER, tokens INTEGER, size INTEGER, offset INTEGER, PRIMARY KEY(seq_id, pool)); + CREATE TABLE IF NOT EXISTS stats (pool TEXT PRIMARY KEY, hits INTEGER DEFAULT 0, misses INTEGER DEFAULT 0); + """) + + def save_pool(self, cfg: CacheConfig, shm: str): + with sqlite3.connect(CORTEX_DB) as conn: + conn.execute("INSERT OR REPLACE INTO pools VALUES (?,?,?)", (cfg.name, json.dumps(asdict(cfg)), shm)) + conn.execute("INSERT OR IGNORE INTO stats (pool) VALUES (?)", (cfg.name,)) + + def get_pool(self, name: str): + with sqlite3.connect(CORTEX_DB) as conn: + row = conn.execute("SELECT config, shm_name FROM pools WHERE name=?", (name,)).fetchone() + return (CacheConfig(**json.loads(row[0])), row[1]) if row else None + + def list_pools(self): + with sqlite3.connect(CORTEX_DB) as conn: + return [CacheConfig(**json.loads(r[0])) for r in conn.execute("SELECT config FROM pools").fetchall()] + + +class SharedMemoryPool: + def __init__(self, name: str, size: int, create: bool = True): + self.name = f"{SHM_PREFIX}{name}" + self.size = size + if create: + try: + old = shared_memory.SharedMemory(name=self.name) + old.close() + old.unlink() + except: + pass + self.shm = shared_memory.SharedMemory(name=self.name, create=True, size=size + 8192) + else: + self.shm = shared_memory.SharedMemory(name=self.name) + + def get_usage(self): + return self.size, 0, 0 # Simplified + + def destroy(self): + self.shm.close() + try: + self.shm.unlink() + except: + pass + + +class KVCacheManager: + def __init__(self): + self.db = CacheDatabase() + self.pools: Dict[str, SharedMemoryPool] = {} + + def create_pool(self, cfg: CacheConfig) -> bool: + pool = SharedMemoryPool(cfg.name, cfg.size_bytes) + self.pools[cfg.name] = pool + self.db.save_pool(cfg, pool.name) + print(f"✅ Created cache pool '{cfg.name}' ({cfg.size_bytes / 1e9:.1f} GB)") + return True + + def destroy_pool(self, name: str) -> bool: + if name in self.pools: + self.pools[name].destroy() + del self.pools[name] + with sqlite3.connect(CORTEX_DB) as conn: + conn.execute("DELETE FROM pools WHERE name=?", (name,)) + print(f"✅ Destroyed pool '{name}'") + return True + + def status(self, name: str = None): + pools = [self.db.get_pool(name)] if name else [(p, "") for p in self.db.list_pools()] + print(f"\n{'POOL':<20} {'SIZE':<12} {'POLICY':<10}") + print("-" * 50) + for item in pools: + if item: + cfg = item[0] if isinstance(item, tuple) else item + print(f"{cfg.name:<20} {cfg.size_bytes/1e9:.1f}G{'':<6} {cfg.policy:<10}") + + +def main(): + import argparse + parser = argparse.ArgumentParser(description="Cortex KV-Cache Manager") + sub = parser.add_subparsers(dest="cmd") + + c = sub.add_parser("create") + c.add_argument("name") + c.add_argument("--size", required=True) + c.add_argument("--policy", default="lru") + + sub.add_parser("destroy").add_argument("name") + sub.add_parser("status").add_argument("name", nargs="?") + sub.add_parser("list") + + args = parser.parse_args() + mgr = KVCacheManager() + + if args.cmd == "create": + size_str = args.size.upper() + mult = {"K": 1e3, "M": 1e6, "G": 1e9}.get(size_str[-1], 1) + size = int(float(size_str.rstrip("KMG")) * mult) + mgr.create_pool(CacheConfig(args.name, size, args.policy)) + elif args.cmd == "destroy": + mgr.destroy_pool(args.name) + elif args.cmd in ("status", "list"): + mgr.status(getattr(args, 'name', None)) + + +if __name__ == "__main__": + main() diff --git a/cortex/kernel_features/llm_device.py b/cortex/kernel_features/llm_device.py new file mode 100644 index 00000000..9d41d4eb --- /dev/null +++ b/cortex/kernel_features/llm_device.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +""" +Cortex /dev/llm Virtual Device + +FUSE-based LLM interface - everything is a file. +""" + +import os +import sys +import json +import time +import stat +import errno +from dataclasses import dataclass, field +from typing import Optional, Dict, List + +try: + from fuse import FUSE, FuseOSError, Operations + HAS_FUSE = True +except ImportError: + HAS_FUSE = False + class FuseOSError(Exception): + def __init__(self, e): self.errno = e + class Operations: pass + +try: + import anthropic + HAS_API = True +except ImportError: + HAS_API = False + + +@dataclass +class Session: + id: str + model: str + messages: List[Dict] = field(default_factory=list) + prompt: str = "" + response: str = "" + temp: float = 0.7 + max_tokens: int = 4096 + + +class MockLLM: + def complete(self, model, messages, max_tokens, temp, system=None): + return f"[Mock] Response to: {messages[-1]['content'][:50]}..." + + +class LLMDevice(Operations): + MODELS = {"claude": "claude-3-sonnet-20240229", "sonnet": "claude-3-5-sonnet-20241022"} + + def __init__(self): + self.sessions: Dict[str, Session] = {"default": Session("default", "claude")} + self.llm = anthropic.Anthropic() if HAS_API and os.environ.get("ANTHROPIC_API_KEY") else MockLLM() + self.start = time.time() + self.requests = 0 + + def _parse(self, path): + parts = path.strip('/').split('/') + if not parts[0]: return ('root', None, None) + if parts[0] in self.MODELS: return ('model', parts[0], parts[1] if len(parts) > 1 else None) + if parts[0] == 'sessions': return ('session', parts[1] if len(parts) > 1 else None, parts[2] if len(parts) > 2 else None) + if parts[0] == 'status': return ('status', None, None) + return ('unknown', None, None) + + def getattr(self, path, fh=None): + t, m, f = self._parse(path) + now = time.time() + if t in ('root', 'model', 'session') and not f: + return {'st_mode': stat.S_IFDIR | 0o755, 'st_nlink': 2, 'st_uid': os.getuid(), 'st_gid': os.getgid(), 'st_atime': now, 'st_mtime': now, 'st_ctime': now} + if f or t == 'status': + return {'st_mode': stat.S_IFREG | 0o644, 'st_nlink': 1, 'st_uid': os.getuid(), 'st_gid': os.getgid(), 'st_size': 0, 'st_atime': now, 'st_mtime': now, 'st_ctime': now} + raise FuseOSError(errno.ENOENT) + + def readdir(self, path, fh): + t, m, f = self._parse(path) + base = ['.', '..'] + if t == 'root': return base + list(self.MODELS.keys()) + ['sessions', 'status'] + if t == 'model': return base + ['prompt', 'response', 'config'] + if t == 'session' and not m: return base + list(self.sessions.keys()) + if t == 'session' and m: return base + ['prompt', 'response', 'history'] + return base + + def read(self, path, size, offset, fh): + t, m, f = self._parse(path) + s = self.sessions.get("default") + if t == 'model' and f == 'response': return s.response.encode()[offset:offset+size] + if t == 'status': return json.dumps({"status": "running", "uptime": time.time() - self.start, "requests": self.requests}).encode()[offset:offset+size] + return b"" + + def write(self, path, data, offset, fh): + t, m, f = self._parse(path) + if t == 'model' and f == 'prompt': + s = self.sessions["default"] + s.prompt = data.decode().strip() + s.messages.append({"role": "user", "content": s.prompt}) + try: + resp = self.llm.messages.create(model=self.MODELS.get(m, "claude-3-sonnet-20240229"), + max_tokens=s.max_tokens, messages=s.messages) if HAS_API else self.llm.complete(m, s.messages, s.max_tokens, s.temp) + s.response = resp.content[0].text if HAS_API else resp + except Exception as e: + s.response = f"Error: {e}" + s.messages.append({"role": "assistant", "content": s.response}) + self.requests += 1 + return len(data) + raise FuseOSError(errno.EACCES) + + def truncate(self, path, length, fh=None): return 0 + def open(self, path, flags): return 0 + def create(self, path, mode, fi=None): return 0 + + +def mount(mountpoint, foreground=False): + if not HAS_FUSE: + print("Install fusepy: pip install fusepy") + return + from pathlib import Path + Path(mountpoint).mkdir(parents=True, exist_ok=True) + print(f"Mounting /dev/llm at {mountpoint}") + print(f'Usage: echo "Hello" > {mountpoint}/claude/prompt && cat {mountpoint}/claude/response') + FUSE(LLMDevice(), mountpoint, foreground=foreground, allow_other=False) + + +def main(): + import argparse + p = argparse.ArgumentParser(description="Cortex /dev/llm Device") + sub = p.add_subparsers(dest="cmd") + m = sub.add_parser("mount") + m.add_argument("mountpoint") + m.add_argument("-f", "--foreground", action="store_true") + sub.add_parser("umount").add_argument("mountpoint") + + args = p.parse_args() + if args.cmd == "mount": + mount(args.mountpoint, args.foreground) + elif args.cmd == "umount": + import subprocess + subprocess.run(["fusermount", "-u", args.mountpoint]) + + +if __name__ == "__main__": + main() diff --git a/cortex/kernel_features/model_lifecycle.py b/cortex/kernel_features/model_lifecycle.py new file mode 100644 index 00000000..7a4205b0 --- /dev/null +++ b/cortex/kernel_features/model_lifecycle.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python3 +""" +Cortex Model Lifecycle Manager + +Manages LLM models as first-class system services using systemd. +""" + +import os +import sys +import json +import subprocess +import sqlite3 +from pathlib import Path +from dataclasses import dataclass, field, asdict +from typing import Optional, List, Dict, Any +from datetime import datetime + +CORTEX_DB_PATH = Path.home() / ".cortex/models.db" +CORTEX_SERVICE_DIR = Path.home() / ".config/systemd/user" + +@dataclass +class ModelConfig: + name: str + model_path: str + backend: str = "vllm" + port: int = 8000 + gpu_memory_fraction: float = 0.9 + max_model_len: int = 4096 + gpu_ids: List[int] = field(default_factory=lambda: [0]) + memory_limit: str = "32G" + cpu_limit: float = 4.0 + restart_policy: str = "on-failure" + preload_on_boot: bool = False + + def to_dict(self) -> Dict[str, Any]: + return asdict(self) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> 'ModelConfig': + return cls(**data) + + +class ModelDatabase: + def __init__(self): + CORTEX_DB_PATH.parent.mkdir(parents=True, exist_ok=True) + self._init_db() + + def _init_db(self): + with sqlite3.connect(CORTEX_DB_PATH) as conn: + conn.execute(""" + CREATE TABLE IF NOT EXISTS models ( + name TEXT PRIMARY KEY, + config TEXT NOT NULL, + created_at TEXT NOT NULL + ) + """) + + def save_model(self, config: ModelConfig): + with sqlite3.connect(CORTEX_DB_PATH) as conn: + conn.execute( + "INSERT OR REPLACE INTO models VALUES (?, ?, ?)", + (config.name, json.dumps(config.to_dict()), datetime.utcnow().isoformat()) + ) + + def get_model(self, name: str) -> Optional[ModelConfig]: + with sqlite3.connect(CORTEX_DB_PATH) as conn: + row = conn.execute("SELECT config FROM models WHERE name = ?", (name,)).fetchone() + return ModelConfig.from_dict(json.loads(row[0])) if row else None + + def list_models(self) -> List[ModelConfig]: + with sqlite3.connect(CORTEX_DB_PATH) as conn: + rows = conn.execute("SELECT config FROM models").fetchall() + return [ModelConfig.from_dict(json.loads(r[0])) for r in rows] + + def delete_model(self, name: str): + with sqlite3.connect(CORTEX_DB_PATH) as conn: + conn.execute("DELETE FROM models WHERE name = ?", (name,)) + + +class ServiceGenerator: + BACKENDS = { + "vllm": "python -m vllm.entrypoints.openai.api_server --model {model_path} --port {port}", + "llamacpp": "llama-server -m {model_path} --port {port}", + "ollama": "ollama serve", + } + + def generate(self, config: ModelConfig) -> str: + cmd = self.BACKENDS.get(config.backend, self.BACKENDS["vllm"]).format(**asdict(config)) + return f"""[Unit] +Description=Cortex Model: {config.name} +After=network.target + +[Service] +Type=simple +ExecStart={cmd} +Environment=CUDA_VISIBLE_DEVICES={','.join(map(str, config.gpu_ids))} +CPUQuota={int(config.cpu_limit * 100)}% +MemoryMax={config.memory_limit} +Restart={config.restart_policy} +NoNewPrivileges=true + +[Install] +WantedBy=default.target +""" + + +class ModelLifecycleManager: + def __init__(self): + self.db = ModelDatabase() + CORTEX_SERVICE_DIR.mkdir(parents=True, exist_ok=True) + + def _systemctl(self, *args): + return subprocess.run(["systemctl", "--user"] + list(args), capture_output=True, text=True) + + def register(self, config: ModelConfig) -> bool: + service = ServiceGenerator().generate(config) + service_path = CORTEX_SERVICE_DIR / f"cortex-{config.name}.service" + service_path.write_text(service) + self.db.save_model(config) + self._systemctl("daemon-reload") + print(f"✅ Registered model '{config.name}'") + return True + + def start(self, name: str) -> bool: + result = self._systemctl("start", f"cortex-{name}.service") + print(f"{'✅' if result.returncode == 0 else '❌'} Start {name}: {result.stderr or 'OK'}") + return result.returncode == 0 + + def stop(self, name: str) -> bool: + result = self._systemctl("stop", f"cortex-{name}.service") + print(f"{'✅' if result.returncode == 0 else '❌'} Stop {name}") + return result.returncode == 0 + + def status(self, name: str = None): + models = [self.db.get_model(name)] if name else self.db.list_models() + print(f"\n{'NAME':<20} {'STATE':<12} {'BACKEND':<10}") + print("-" * 50) + for m in models: + if m: + result = self._systemctl("is-active", f"cortex-{m.name}.service") + state = result.stdout.strip() or "unknown" + print(f"{m.name:<20} {state:<12} {m.backend:<10}") + + +def main(): + import argparse + parser = argparse.ArgumentParser(description="Cortex Model Lifecycle Manager") + sub = parser.add_subparsers(dest="cmd") + + reg = sub.add_parser("register") + reg.add_argument("name") + reg.add_argument("--path", required=True) + reg.add_argument("--backend", default="vllm") + reg.add_argument("--port", type=int, default=8000) + reg.add_argument("--gpus", default="0") + + for cmd in ["start", "stop", "unregister"]: + p = sub.add_parser(cmd) + p.add_argument("name") + + sub.add_parser("status").add_argument("name", nargs="?") + sub.add_parser("list") + + args = parser.parse_args() + mgr = ModelLifecycleManager() + + if args.cmd == "register": + mgr.register(ModelConfig(args.name, args.path, args.backend, args.port, + gpu_ids=[int(x) for x in args.gpus.split(",")])) + elif args.cmd == "start": + mgr.start(args.name) + elif args.cmd == "stop": + mgr.stop(args.name) + elif args.cmd in ("status", "list"): + mgr.status(getattr(args, 'name', None)) + + +if __name__ == "__main__": + main() diff --git a/docs/KERNEL_FEATURES.md b/docs/KERNEL_FEATURES.md new file mode 100644 index 00000000..bbad83fe --- /dev/null +++ b/docs/KERNEL_FEATURES.md @@ -0,0 +1,48 @@ +# Cortex Kernel Features + +User-space implementations of kernel-level AI concepts. These demonstrate kernel-level thinking while running on standard Ubuntu 24.04. + +## Components + +### 1. Model Lifecycle Manager +Systemd-based LLM service management. + +```bash +cortex model register llama-70b --path meta-llama/Llama-2-70b-hf --backend vllm +cortex model start llama-70b +cortex model status +``` + +### 2. KV-Cache Manager +Shared memory cache pools for LLM inference. + +```bash +cortex cache create llama-cache --size 16G +cortex cache status +cortex cache destroy llama-cache +``` + +### 3. Accelerator Limits +cgroups v2 wrapper for AI workloads. + +```bash +cortex limits create inference-job --preset inference --gpus 2 +cortex limits status +``` + +### 4. /dev/llm Virtual Device +FUSE-based file interface to LLMs. + +```bash +cortex-llm-device mount /mnt/llm +echo "Hello" > /mnt/llm/claude/prompt +cat /mnt/llm/claude/response +``` + +## Architecture + +These are Tier 1 features from our kernel enhancement roadmap - user-space implementations that can ship now while we work on upstream kernel contributions. + +## Patents + +The KV-Cache Manager implements concepts from our provisional patent applications for kernel-managed KV-cache memory regions. diff --git a/tests/kernel_features/__init__.py b/tests/kernel_features/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/kernel_features/test_kv_cache.py b/tests/kernel_features/test_kv_cache.py new file mode 100644 index 00000000..ab06f703 --- /dev/null +++ b/tests/kernel_features/test_kv_cache.py @@ -0,0 +1,7 @@ +import pytest +from cortex.kernel_features.kv_cache_manager import CacheConfig, KVCacheManager + +def test_cache_config(): + cfg = CacheConfig("test", 1024*1024*16) + assert cfg.policy == "lru" + assert cfg.max_sequences == 1000 diff --git a/tests/kernel_features/test_model_lifecycle.py b/tests/kernel_features/test_model_lifecycle.py new file mode 100644 index 00000000..620b836d --- /dev/null +++ b/tests/kernel_features/test_model_lifecycle.py @@ -0,0 +1,14 @@ +import pytest +from cortex.kernel_features.model_lifecycle import ModelConfig, ModelLifecycleManager + +def test_model_config_defaults(): + cfg = ModelConfig("test", "/path/to/model") + assert cfg.backend == "vllm" + assert cfg.port == 8000 + +def test_config_roundtrip(): + cfg = ModelConfig("test", "/model", "llamacpp", 8080) + data = cfg.to_dict() + restored = ModelConfig.from_dict(data) + assert restored.name == cfg.name + assert restored.backend == cfg.backend