mivertowski · mivertowski · Jan 8, 2026 · Jan 2, 2026 · Jan 2, 2026 · Jan 2, 2026
diff --git a/.github/workflows/gpu-tests.yml b/.github/workflows/gpu-tests.yml
@@ -0,0 +1,189 @@
+name: GPU Tests
+
+on:
+  # Manual trigger for GPU tests
+  workflow_dispatch:
+    inputs:
+      backend:
+        description: 'GPU backend to test'
+        required: true
+        default: 'all'
+        type: choice
+        options:
+          - all
+          - cuda
+          - wgpu
+          - metal
+  # Run on PRs with GPU label
+  pull_request:
+    types: [labeled]
+
+env:
+  CARGO_TERM_COLOR: always
+  RUST_BACKTRACE: 1
+
+jobs:
+  # CUDA GPU Tests - requires self-hosted runner with NVIDIA GPU
+  cuda-tests:
+    name: CUDA Tests
+    if: |
+      github.event_name == 'workflow_dispatch' &&
+      (github.event.inputs.backend == 'all' || github.event.inputs.backend == 'cuda')
+      || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'gpu-test'))
+    runs-on: [self-hosted, gpu, cuda]
+    timeout-minutes: 30
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Rust
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Check CUDA availability
+        run: |
+          nvidia-smi
+          nvcc --version
+
+      - name: Cache cargo
+        uses: Swatinem/rust-cache@v2
+        with:
+          shared-key: "gpu-cuda"
+
+      - name: Run CUDA codegen tests
+        run: cargo test -p ringkernel-cuda-codegen --all-features
+
+      - name: Run CUDA backend tests
+        run: cargo test -p ringkernel-cuda --features cuda
+
+      - name: Run GPU execution verification tests
+        run: cargo test -p ringkernel-cuda --test gpu_execution_verify --features cuda
+
+      - name: Run WaveSim3D GPU benchmark
+        run: |
+          cargo run -p ringkernel-wavesim3d --bin wavesim3d-benchmark --release --features cuda-codegen -- --quick
+        continue-on-error: true
+
+      - name: Run TxMon GPU benchmark
+        run: |
+          cargo run -p ringkernel-txmon --bin txmon-benchmark --release --features cuda-codegen -- --quick
+        continue-on-error: true
+
+  # WebGPU Tests - can run on any runner with Vulkan/DX12/Metal support
+  wgpu-tests:
+    name: WebGPU Tests
+    if: |
+      github.event_name == 'workflow_dispatch' &&
+      (github.event.inputs.backend == 'all' || github.event.inputs.backend == 'wgpu')
+      || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'gpu-test'))
+    runs-on: [self-hosted, gpu]
+    timeout-minutes: 20
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Rust
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Cache cargo
+        uses: Swatinem/rust-cache@v2
+        with:
+          shared-key: "gpu-wgpu"
+
+      - name: Run WGSL codegen tests
+        run: cargo test -p ringkernel-wgpu-codegen --all-features
+
+      - name: Run WebGPU backend tests
+        run: cargo test -p ringkernel-wgpu --features wgpu-tests -- --ignored
+        continue-on-error: true
+
+  # Metal Tests - macOS only
+  metal-tests:
+    name: Metal Tests
+    if: |
+      github.event_name == 'workflow_dispatch' &&
+      (github.event.inputs.backend == 'all' || github.event.inputs.backend == 'metal')
+      || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'gpu-test'))
+    runs-on: macos-latest
+    timeout-minutes: 20
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Rust
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Cache cargo
+        uses: Swatinem/rust-cache@v2
+        with:
+          shared-key: "gpu-metal"
+
+      - name: Check Metal availability
+        run: |
+          system_profiler SPDisplaysDataType | grep -i metal || echo "Metal info not available"
+
+      - name: Run Metal backend tests
+        run: cargo test -p ringkernel-metal --features metal
+        continue-on-error: true
+
+      - name: Build Metal examples
+        run: cargo build -p ringkernel --examples --features metal
+        continue-on-error: true
+
+  # CPU Backend GPU Mock Tests - runs on all platforms
+  cpu-mock-tests:
+    name: CPU Mock GPU Tests
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Rust
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Cache cargo
+        uses: Swatinem/rust-cache@v2
+
+      - name: Run CPU backend tests (GPU mock)
+        run: cargo test -p ringkernel-cpu --all-features
+
+      - name: Run core tests with CPU backend
+        run: cargo test -p ringkernel-core --all-features
+
+      - name: Run ecosystem tests with CPU mock
+        run: cargo test -p ringkernel-ecosystem --features "persistent,actix,tower,axum,grpc"
+
+  # Performance baseline on CPU
+  benchmark-baseline:
+    name: Performance Baseline
+    runs-on: ubuntu-latest
+    if: github.event_name == 'workflow_dispatch'
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Rust
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Cache cargo
+        uses: Swatinem/rust-cache@v2
+
+      - name: Run CPU benchmarks
+        run: cargo bench --package ringkernel -- --noplot --quick
+        continue-on-error: true
+
+      - name: Run WaveSim CPU benchmark
+        run: cargo run -p ringkernel-wavesim --example benchmark --release -- --quick
+        continue-on-error: true
+
+  # Summary report
+  summary:
+    name: Test Summary
+    needs: [cuda-tests, wgpu-tests, metal-tests, cpu-mock-tests]
+    if: always()
+    runs-on: ubuntu-latest
+    steps:
+      - name: Report Status
+        run: |
+          echo "## GPU Test Results" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "| Backend | Status |" >> $GITHUB_STEP_SUMMARY
+          echo "|---------|--------|" >> $GITHUB_STEP_SUMMARY
+          echo "| CUDA | ${{ needs.cuda-tests.result }} |" >> $GITHUB_STEP_SUMMARY
+          echo "| WebGPU | ${{ needs.wgpu-tests.result }} |" >> $GITHUB_STEP_SUMMARY
+          echo "| Metal | ${{ needs.metal-tests.result }} |" >> $GITHUB_STEP_SUMMARY
+          echo "| CPU Mock | ${{ needs.cpu-mock-tests.result }} |" >> $GITHUB_STEP_SUMMARY
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -76,6 +76,12 @@ cargo test -p ringkernel-ecosystem --features "persistent,actix,tower,axum,grpc"
 
 # Run ecosystem example (Axum REST API)
 cargo run -p ringkernel-ecosystem --example axum_persistent_api --features "axum,persistent"
+
+# RingKernel CLI tool
+cargo run -p ringkernel-cli -- new my-app --template persistent-actor
+cargo run -p ringkernel-cli -- codegen src/kernels/mod.rs --backend cuda,wgsl
+cargo run -p ringkernel-cli -- check --backends all
+cargo run -p ringkernel-cli -- init --backends cuda
 ```
 
 ## Architecture
@@ -95,6 +101,7 @@ The project is a Cargo workspace with these crates:
 - **`ringkernel-cuda-codegen`** - Rust-to-CUDA transpiler for writing GPU kernels in Rust DSL
 - **`ringkernel-wgpu-codegen`** - Rust-to-WGSL transpiler for writing GPU kernels in Rust DSL (WebGPU backend)
 - **`ringkernel-ecosystem`** - Ecosystem integrations with **persistent GPU actor support** (Actix `GpuPersistentActor`, Axum REST/SSE, Tower `PersistentKernelService`, gRPC streaming)
+- **`ringkernel-cli`** - CLI tool for project scaffolding, kernel code generation, and compatibility checking
 - **`ringkernel-audio-fft`** - Example application: GPU-accelerated audio FFT processing
 - **`ringkernel-wavesim`** - Example application: 2D acoustic wave simulation with GPU-accelerated FDTD, tile-based ring kernel actors, and educational simulation modes
 - **`ringkernel-wavesim3d`** - Example application: 3D acoustic wave simulation with binaural audio, **persistent GPU actors** (H2K/K2H messaging, K2K halo exchange, cooperative groups), and volumetric ray marching visualization
@@ -114,6 +121,58 @@ The project is a Cargo workspace with these crates:
 - **`K2KBroker`/`K2KEndpoint`** - Kernel-to-kernel direct messaging
 - **`PubSubBroker`** - Topic-based publish/subscribe with wildcards
 
+### Enterprise Features (in ringkernel-core)
+
+The following enterprise-grade features provide production-ready infrastructure:
+
+- **`RingKernelContext`** - Unified runtime managing all enterprise features
+- **`RuntimeBuilder`** - Fluent builder with `development()`, `production()`, `high_performance()` presets
+- **`ConfigBuilder`** - Unified configuration system with nested builders
+
+**Health & Resilience:**
+- **`HealthChecker`** - Liveness/readiness probes with async health checks
+- **`CircuitBreaker`** - Fault tolerance with automatic recovery
+- **`DegradationManager`** - Graceful degradation with 5 levels (Normal → Critical)
+- **`KernelWatchdog`** - Stale kernel detection with heartbeat monitoring
+
+**Observability:**
+- **`PrometheusExporter`** - Prometheus metrics export
+- **`ObservabilityContext`** - Distributed tracing with spans
+
+**Multi-GPU:**
+- **`MultiGpuCoordinator`** - Device selection with load balancing strategies
+- **`KernelMigrator`** - Live kernel migration between GPUs using checkpoints
+- **`GpuTopology`** - NVLink/PCIe topology discovery
+
+**Lifecycle:**
+- **`LifecycleState`** - Initializing → Running → Draining → ShuttingDown → Stopped
+- **`ShutdownReport`** - Final statistics on graceful shutdown
+
+```rust
+// Enterprise runtime usage
+use ringkernel_core::prelude::*;
+
+let runtime = RuntimeBuilder::new()
+    .production()  // or .development() or .high_performance()
+    .build()?;
+
+runtime.start()?;  // Transition to Running state
+
+// Run health monitoring
+let result = runtime.run_health_check_cycle();
+println!("Health: {:?}, Circuit: {:?}", result.status, result.circuit_state);
+
+// Circuit breaker protection
+let guard = CircuitGuard::new(&runtime, "operation");
+guard.execute(|| { /* protected operation */ })?;
+
+// Graceful shutdown
+let report = runtime.complete_shutdown()?;
+println!("Uptime: {:?}", report.total_uptime);
+```
+
+Run the enterprise demo: `cargo run -p ringkernel --example enterprise_runtime`
+
 ### Backend System
 
 Backends implement `RingKernelRuntime` trait. Selection via features: