From 5263eae02ebd37a2bb7b6012fed3fc20ea42817f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yuniel=20Acosta=20P=C3=A9rez?= <33158051+yacosta738@users.noreply.github.com> Date: Thu, 5 Mar 2026 10:49:10 +0100 Subject: [PATCH 1/4] feat: implement comprehensive auto-update system with centralized management and safety features --- .../enhance-auto-update-system/design.md | 444 ++++++++++++++++++ .../enhance-auto-update-system/exploration.md | 55 +++ .../enhance-auto-update-system/proposal.md | 96 ++++ .../specs/update-system/spec.md | 164 +++++++ .../enhance-auto-update-system/tasks.md | 81 ++++ 5 files changed, 840 insertions(+) create mode 100644 openspec/changes/enhance-auto-update-system/design.md create mode 100644 openspec/changes/enhance-auto-update-system/exploration.md create mode 100644 openspec/changes/enhance-auto-update-system/proposal.md create mode 100644 openspec/changes/enhance-auto-update-system/specs/update-system/spec.md create mode 100644 openspec/changes/enhance-auto-update-system/tasks.md diff --git a/openspec/changes/enhance-auto-update-system/design.md b/openspec/changes/enhance-auto-update-system/design.md new file mode 100644 index 000000000..5334a65a6 --- /dev/null +++ b/openspec/changes/enhance-auto-update-system/design.md @@ -0,0 +1,444 @@ +# Design: Enhance Auto-Update System + +## Technical Approach + +Implement a single `UpdateManager` orchestration in `clients/agent-runtime/src/update/mod.rs` that owns update check, policy resolution, install planning, integrity verification, install execution routing, and audit recording. All runtime surfaces (CLI startup notice, daemon poller, in-conversation flow, admin API/dashboard) consume a single normalized status model so users see the same facts everywhere. + +The design keeps current `version_check.json` compatibility, adds process-safe lock files and append-only history, and introduces explicit install method detection with user override. Security-critical verification is fail-closed for artifact paths. + +## Architecture Decisions + +### Decision: Central update orchestrator with shared state model + +**Choice**: Keep update logic centered in `update/mod.rs` but refactor internally into cohesive components (`policy`, `state_store`, `method_detection`, `installer`, `audit`, `notifications`) behind an `UpdateManager` API. + +**Alternatives considered**: +- Keep current free functions with incremental patches +- Split update functionality across `main`, `daemon`, and `channels` + +**Rationale**: A single orchestrator removes drift between surfaces, enables deterministic command behavior, and makes locking/verification controls enforceable in one place. + +### Decision: Fail-closed verification contract for downloadable artifacts + +**Choice**: For any installer path that downloads/stages binaries (script/binary mode), require trusted metadata + checksum verification before activation; if metadata is missing/invalid, installation is blocked. + +**Alternatives considered**: +- Best-effort verification with warnings +- Trust package manager only for all paths + +**Rationale**: Requirement mandates verification fail-closed. This prevents silent integrity bypass and keeps default posture secure. + +### Decision: Cross-process lock files + atomic rename persistence + +**Choice**: Use file-based advisory locks for cross-process serialization and temp-file/fsync/rename for atomic writes. + +**Alternatives considered**: +- Existing in-process `OnceLock>` only +- SQLite state store migration in this change + +**Rationale**: Satisfies process-safety and interruption-safety without large storage migration risk. + +### Decision: Deterministic install method selection with explicit precedence + +**Choice**: Effective method resolution order: `user override` -> `detected method` -> `manual fallback (unsupported)`. + +**Alternatives considered**: +- Try every installer opportunistically +- Auto-pick first command available in PATH + +**Rationale**: Deterministic routing is auditable, scriptable, and avoids unsafe unknown install attempts. + +### Decision: Canonical update status contract reused by CLI/channel/admin + +**Choice**: Define a single `UpdateStatusView` and consume it in CLI output, channel notices, daemon push payload, and admin response. + +**Alternatives considered**: +- Per-surface formatting/state derivation + +**Rationale**: Eliminates user confusion from mismatched versions/policy flags across surfaces. + +### Decision: JSONL audit history with bounded retention + +**Choice**: Append structured events to `workspace/state/update_history.jsonl` with optional max-entry trimming. + +**Alternatives considered**: +- Store history in single mutable JSON array file +- No persistence beyond logs + +**Rationale**: JSONL is append-friendly, resilient under partial failures, and simple to inspect from CLI. + +## Data Models + +```rust +// update/mod.rs (or update/types.rs) + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub enum InstallMethod { + Npm, + Pnpm, + Yarn, + Bun, + Homebrew, + Cargo, + ScriptBinary, + Unknown, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct UpdatePolicy { + pub checks_enabled: bool, + pub auto_install_enabled: bool, // default false + pub channel_visibility_enabled: bool, + pub cli_startup_notice_enabled: bool, + pub check_interval_minutes: u64, + pub confirmation_ttl_minutes: u64, + pub install_method_override: Option, + pub restart_policy: RestartPolicy, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub enum RestartPolicy { + Never, + Prompt, + AutoManagedService, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct UpdateStateSnapshot { + pub schema_version: u32, + pub current_version: String, + pub latest_version: String, + pub update_available: bool, + pub last_check_at_unix: u64, + pub last_check_outcome: CheckOutcome, + pub effective_method: InstallMethod, + pub detected_method: Option, + pub overridden_method: Option, + pub install_state: InstallState, + pub pending_confirmations: Vec, + pub notified_conversations: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub enum InstallState { + Idle, + Installing { tx_id: String, started_at_unix: u64 }, + InstalledPendingRestart { version: String, installed_at_unix: u64 }, + Failed { tx_id: String, failed_at_unix: u64, reason_code: String }, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub enum CheckOutcome { + Success, + NetworkError, + ParseError, + SourceRejected, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct UpdateAuditEvent { + pub event_id: String, + pub timestamp_unix: u64, + pub action: AuditAction, // check | install | policy_change | verification + pub outcome: AuditOutcome, + pub current_version: String, + pub target_version: Option, + pub effective_method: InstallMethod, + pub actor: String, // cli:, daemon, channel:, admin + pub reason_code: Option, + pub verification: Option, +} +``` + +### Config model additions + +`UpdateConfig` in `clients/agent-runtime/src/config/schema.rs` is extended with: + +- `auto_install_enabled: bool` (default `false`) +- `channel_visibility_enabled: bool` (default `true`) +- `cli_startup_notice_enabled: bool` (default `true`) +- `install_method_override: Option` (validated enum) +- `restart_policy: String` (`never|prompt|auto_managed_service`, default `prompt`) +- `history_max_entries: u32` (bounded retention) + +Environment override keys (deterministic precedence over file): + +- `CORVUS_UPDATES_ENABLED` +- `CORVUS_UPDATE_AUTO_INSTALL` +- `CORVUS_UPDATE_CHANNEL_VISIBILITY` +- `CORVUS_UPDATE_CLI_NOTICE` +- `CORVUS_UPDATE_METHOD_OVERRIDE` +- `CORVUS_UPDATE_RESTART_POLICY` +- existing `CORVUS_DISABLE_UPDATE_CHECK` remains hard-disable gate + +Invalid env values are ignored with warning and never relax to less-safe behavior. + +## State Transitions + +Install transaction state machine: + +| Current | Trigger | Guard | Next | Notes | +|---|---|---|---|---| +| `Idle` | `update install` requested | lock acquired, policy allows | `Installing` | tx_id generated and persisted before execution | +| `Installing` | installer success + verification success | target version valid | `InstalledPendingRestart` | restart policy evaluated after state write | +| `Installing` | verification failed | always | `Failed` | fail-closed; no activation | +| `Installing` | method unsupported/prereq missing | always | `Failed` | deterministic manual instructions | +| `Installing` | second concurrent request | install lock denied | unchanged | requester gets busy/deferred result | +| `InstalledPendingRestart` | restart completed | managed service restart succeeds/manual restart acknowledged | `Idle` | current version updates on next process start/check | +| `Failed` | new install request | lock acquired | `Installing` | new tx_id | + +## Locking and Atomic Write Strategy + +### Files + +- `workspace/state/version_check.json` (state snapshot; backward-compatible path) +- `workspace/state/update_history.jsonl` (audit append log) +- `workspace/state/update_state.lock` (general state mutation lock) +- `workspace/state/update_install.lock` (single active install transaction) + +### Locking model + +1. Acquire `update_state.lock` for load-mutate-save of `version_check.json`. +2. For installation, acquire `update_install.lock` first, then `update_state.lock` (fixed order) to avoid deadlock. +3. Lock acquisition timeout returns deterministic busy outcome (`EXIT_BUSY`) without partial changes. +4. Keep existing in-process mutex as secondary guard, but file lock is authoritative across processes. + +### Atomic persistence + +1. Serialize snapshot to bytes. +2. Write to `version_check.json.tmp..`. +3. `sync_all` temporary file. +4. `rename` temp -> `version_check.json` (atomic replace). +5. `sync_directory(parent)`. +6. Re-read and parse for post-write sanity; on failure, emit audit failure and preserve last good snapshot. + +History append uses lock + append + fsync semantics; truncation/compaction (if entry cap exceeded) writes a new temp file atomically. + +## Method Detection Strategy + +`resolve_effective_install_method()`: + +1. Validate configured override (`updates.install_method_override` or env). If valid, use it and mark source `override`. +2. If no override: + - detect Homebrew by executable path prefixes and brew metadata query + - detect Cargo via executable path/cargo home and `cargo install --list` + - detect npm/pnpm/yarn/bun via package-manager global package inspection + - detect script/binary via unmanaged binary location heuristics +3. If none detected, set `Unknown` and return manual fallback plan only. + +Detection output includes confidence + source for audit/status. Unsupported methods never trigger unsafe generic shell paths. + +## Command Flow + +New `corvus update` command tree in `clients/agent-runtime/src/main.rs`: + +- `update status` + - loads effective policy + latest snapshot + - prints current/latest version, update availability, method (detected/effective), policy flags + - exit 0 on resolvable status + +- `update check` + - forces remote check (bypasses TTL), records check audit event + - updates snapshot atomically + - exit 0 when check succeeds (update may or may not be available), non-zero on check failure + +- `update install` + - acquires install lock + - resolves policy + method + - verifies artifacts for download paths (fail-closed) + - executes method strategy or emits deterministic manual fallback + - records install + verification audit events + - exit codes: success / no-update / blocked / busy / failed + +- `update auto-enable` / `update auto-disable` + - toggles `updates.auto_install_enabled` in config + - persists config atomically via existing config save path + - records policy_change audit event + +- `update history` + - reads `update_history.jsonl` in chronological order + - supports deterministic text and machine-readable JSON output mode + +Compatibility: `corvus update confirm ` remains for channel nonce confirmations; it is treated as an internal/advanced path and routed through the same install transaction guard. + +## Notification Fan-Out Design + +Canonical message payload (`UpdateNotificationPayload`) is produced once and routed to sinks: + +1. CLI startup banner (`maybe_print_update_notice`) when `cli_startup_notice_enabled`. +2. In-conversation opportunistic mention (`channels/mod.rs`) when `channel_visibility_enabled` and sender authorized. +3. Daemon push notifications (`run_daemon_update_watcher`) to configured destinations. +4. Admin API (`gateway/admin.rs`) exposes latest status/policy for dashboard. + +Dedupe key: `(latest_version, channel, recipient, authorized_sender)` using existing conversation dedupe semantics, now aligned with canonical status snapshot. + +## Data Flow + +```text +CLI/Daemon/Channel/Admin + | + v + UpdateManager + | | | + | | +--> MethodResolver + | +-------> Verifier (checksum, fail-closed) + +-----------> StateStore (lock + atomic write) + | + +--> version_check.json + +--> update_history.jsonl +``` + +### Sequence: `corvus update install` + +```mermaid +sequenceDiagram + participant U as User/CLI + participant M as UpdateManager + participant L as LockManager + participant S as StateStore + participant D as MethodResolver + participant V as Verifier + participant I as Installer + participant A as AuditLog + + U->>M: update install + M->>L: acquire install + state locks + L-->>M: lock granted or busy + M->>S: load snapshot + M->>D: resolve effective method + D-->>M: method + source + M->>V: verify metadata/artifact (if required) + V-->>M: pass/fail + alt verification pass + M->>I: execute method strategy + I-->>M: success/failure + end + M->>S: atomic save new state + M->>A: append install/verification events + M->>L: release locks + M-->>U: deterministic result + exit code +``` + +### Sequence: daemon check and fan-out + +```mermaid +sequenceDiagram + participant D as Daemon updater + participant M as UpdateManager + participant S as StateStore + participant N as Notification fan-out + participant C as Channel sinks/Admin status + + D->>M: poll(check_interval) + M->>S: load snapshot (lock) + M->>M: fetch latest + evaluate policy + M->>S: atomic save status + M->>N: build canonical payload + N->>C: push deduped notifications + M->>S: append check audit event +``` + +## Interfaces / Contracts + +```rust +pub struct UpdateStatusView { + pub current_version: String, + pub latest_version: Option, + pub update_available: bool, + pub last_check_at_unix: Option, + pub last_check_outcome: Option, + pub effective_install_method: String, + pub detected_install_method: Option, + pub install_method_source: String, // override|detected|unknown + pub policy: UpdatePolicyView, +} + +pub struct UpdatePolicyView { + pub checks_enabled: bool, + pub auto_install_enabled: bool, + pub channel_visibility_enabled: bool, + pub cli_startup_notice_enabled: bool, + pub restart_policy: String, +} +``` + +Admin contract extension (`gateway/admin.rs`, dashboard type mirror): + +- `config.updates` section in admin payload with policy + effective status fields +- keep secret-safe response discipline (no tokens, no raw lock paths) + +## File Changes + +| File | Action | Description | +|------|--------|-------------| +| `clients/agent-runtime/src/update/mod.rs` | Modify | Introduce `UpdateManager`, method resolution, lock/atomic state store, verification gate, audit events, history read API | +| `clients/agent-runtime/src/main.rs` | Modify | Add `update` subcommands (`status/check/install/auto-enable/auto-disable/history`) and deterministic exit handling | +| `clients/agent-runtime/src/config/schema.rs` | Modify | Extend `UpdateConfig`, defaults, env overrides, and validation for override enums/policy values | +| `clients/agent-runtime/src/channels/mod.rs` | Modify | Route opportunistic/confirm flows through canonical status + policy gating and unified notification payload | +| `clients/agent-runtime/src/daemon/mod.rs` | Modify | Keep updater supervisor, call new manager APIs, emit health/audit-friendly outcomes | +| `clients/agent-runtime/src/service/mod.rs` | Modify | Add restart integration hook consumption for `InstalledPendingRestart` handling when policy requires managed restart | +| `clients/agent-runtime/src/gateway/admin.rs` | Modify | Extend admin config/status view with update state and policy contract | +| `clients/web/apps/dashboard/src/types/admin-config.ts` | Modify | Add strongly-typed `updates` fields mirroring admin API contract | + +## Security Controls + +- Release source allowlist: only configured trusted GitHub release endpoints are accepted. +- Verification fail-closed: missing checksum metadata, download failure, or digest mismatch blocks activation. +- No shell-string execution for installer commands; use fixed binary + arg vectors. +- Confirmation nonces remain hashed at rest and validated with sender/channel binding. +- Lock/state/history files created with owner-restricted permissions where supported. +- Env override validation never weakens safety defaults on parse failure (warn + ignore invalid). + +## Observability and Audit + +- Structured tracing spans: `update.check`, `update.install`, `update.verify`, `update.notify` with outcome tags. +- Audit event classes: check, install_attempt, install_result, verification_result, policy_change, restart_action. +- `corvus update history` reads structured events from `update_history.jsonl` (chronological output). +- Daemon component health remains integrated via `daemon/mod.rs` supervisor markers. + +## Testing Strategy + +| Layer | What to Test | Approach | +|-------|-------------|----------| +| Unit | method detection precedence, policy/env precedence, invalid override handling, state machine transitions | Rust unit tests in `update/mod.rs` and `config/schema.rs` | +| Unit | atomic writer and lock contention behavior | tempdir-based tests with parallel tasks/process simulation | +| Integration | CLI command exit semantics and output contracts | command tests for `update status/check/install/auto-enable/auto-disable/history` | +| Integration | channel confirmation + opportunistic mention gating | channel test harness in `channels/mod.rs` with fake channel | +| Integration | admin response parity with update status model | gateway admin handler tests + dashboard TS type checks | +| Resilience | interrupted write recovery and busy install response | fault-injection tests around temp write/rename and lock denial | + +## Migration / Rollout + +No destructive migration required. + +- Existing `version_check.json` is read and upgraded in-memory to new snapshot schema (`schema_version`). +- Missing fields default safely. +- History file is additive (`update_history.jsonl`), created on first event. + +## Phased Implementation Plan + +### Phase 1: Safety + command foundation + +1. Add `update status|check|install` command surface and exit code mapping. +2. Implement lock manager + atomic state writes for `version_check.json`. +3. Introduce install state machine and install transaction guard. +4. Add method detection + deterministic unsupported fallback. + +### Phase 2: Policy model + multi-surface visibility + +1. Extend config schema/env overrides with safe defaults and validation. +2. Add `update auto-enable|auto-disable` and status reflection. +3. Unify canonical payload fan-out for CLI/channel/daemon. +4. Expose update policy/status in admin gateway + dashboard types. + +### Phase 3: Verification hardening + auditability + +1. Enforce checksum verification fail-closed for artifact paths. +2. Append structured update audit events and expose `update history`. +3. Integrate service restart policy handling for managed daemon mode. +4. Add fault-injection and concurrency tests for interruption/race resilience. + +## Open Questions + +- [ ] Signature verification backend selection (Sigstore/GPG) is deferred; this design adds extension points but mandates checksum now. +- [ ] Final release source canonicalization (`profiletailors` vs `dallay`) should be confirmed before implementation freeze. diff --git a/openspec/changes/enhance-auto-update-system/exploration.md b/openspec/changes/enhance-auto-update-system/exploration.md new file mode 100644 index 000000000..62c6ad2d0 --- /dev/null +++ b/openspec/changes/enhance-auto-update-system/exploration.md @@ -0,0 +1,55 @@ +## Exploration: Enhance Auto-Update System + +### Current State +Auto-update behavior is centralized in `clients/agent-runtime/src/update/mod.rs` and currently covers three surfaces: CLI startup notices, daemon background polling, and in-conversation channel nudges. + +- **Release detection** uses GitHub `releases/latest` endpoints (`profiletailors/corvus`, fallback `dallay/corvus`) with a 2s HTTP timeout and a 24h cache TTL, persisted in `workspace/state/version_check.json`. +- **CLI visibility** is limited to `agent` and `status` command paths (`clients/agent-runtime/src/main.rs`), where a best-effort bounded check prints a banner with manual update commands. +- **Daemon visibility** runs a supervisor-managed updater worker (`clients/agent-runtime/src/daemon/mod.rs`) that periodically checks and pushes channel notifications when destinations are configured. +- **Channel flow** supports opportunistic in-conversation update mentions and nonce-based confirmation (`corvus update confirm `) before attempting auto-install. +- **Auto-install execution** is currently a minimal strategy: try `npm`/`pnpm`/`yarn`/`bun` global install commands, otherwise return manual instructions. No install-method detection exists. +- **Config surface** has `updates.enabled`, `updates.check_interval_minutes`, `updates.confirmation_ttl_minutes`, and `updates.notify_destinations` in `clients/agent-runtime/src/config/schema.rs`; no dedicated env overrides for these fields exist yet. Only `CORVUS_DISABLE_UPDATE_CHECK` globally disables checks. +- **Security posture** is mixed: install script (`clients/web/apps/marketing/public/install`) verifies SHA-256 and stages binary writes, but runtime auto-install path does not verify artifacts itself and relies on package manager behavior. + +### Key Touchpoints +- `clients/agent-runtime/src/update/mod.rs` — core update detection, notice text, daemon polling, nonce confirmation, and auto-install behavior. +- `clients/agent-runtime/src/main.rs` — CLI routing and startup banner trigger points; location for new `update` subcommands. +- `clients/agent-runtime/src/channels/mod.rs` — pre-memory nonce interception and opportunistic in-conversation update mention. +- `clients/agent-runtime/src/daemon/mod.rs` — updater worker supervision and daemon lifecycle coupling. +- `clients/agent-runtime/src/config/schema.rs` — update config schema/defaults and env-override extension point. +- `clients/agent-runtime/src/service/mod.rs` — service restart/start/stop hooks relevant to safe post-install daemon handling. +- `clients/agent-runtime/src/gateway/admin.rs` and `clients/web/apps/dashboard/src/types/admin-config.ts` — API/UI extension points for visible update indicators/configuration in dashboard clients. +- `clients/agent-runtime/npm/corvus-cli/lib/install.js` and `clients/web/apps/marketing/public/install` — installation-channel behavior differences (npm wrapper installer vs shell installer). +- `.github/workflows/_publish.yml` — release asset checksum generation; base for stronger artifact verification policy. + +### Risks +- **Process safety/races**: update state locking is in-process (`OnceLock>`) only; concurrent CLI + daemon processes can still race on `version_check.json`. +- **Non-atomic persistence**: update state writes use direct file writes, unlike the config path's temp-file + rename strategy. +- **Install-method ambiguity**: runtime cannot reliably determine whether user installed via npm/pnpm/yarn/bun, direct binary, script, cargo, or homebrew. +- **Security gap in auto-install**: runtime-side auto-install does not perform explicit artifact integrity verification for binary/script paths. +- **Operational disruption risk**: applying updates while daemon/service is active can leave mixed binary/runtime state without coordinated restart/session handling. +- **Version/source drift**: mixed org/repo/package references (`profiletailors` vs `dallay`) increase risk of wrong source selection. + +### Open Questions and Assumptions +- **Client scope**: "client UI indicators" is assumed to include CLI + channel conversations + dashboard/web admin surfaces, not native mobile/desktop apps in this repository. +- **Auto-install policy default**: assumed default should remain safe/explicit (check + notify by default, auto-install opt-in). +- **Trust model**: need confirmation whether checksum-only verification is acceptable, or whether signed provenance (e.g., Sigstore/GPG) is required for runtime auto-install. +- **Install methods in execution scope**: requirement includes detecting `npm/pnpm/yarn/bun`, binary/script, `homebrew`, and `cargo`; assumption is execution MAY be supported for subset initially, with graceful/manual fallback for unsupported methods. +- **Daemon handling contract**: need product decision on whether updater should auto-restart managed services or stage update and require explicit `corvus service restart`. +- **Channel confirmation UX**: assumption is nonce confirmation remains mandatory for channel-initiated install unless a strict local policy setting allows unattended updates. + +### Recommended Scope Boundaries +- **In scope (phase 1)** + - Add a first-class `corvus update` command group (`check`, `install`, `status`, and confirmation plumbing as needed). + - Introduce install-method detection and persistence (detected + user-overridable) with a safe fallback matrix. + - Expand update config with explicit policy knobs (auto-check cadence, auto-install mode, restart behavior, visibility channels) plus env overrides. + - Implement process-safe/atomic update state and install transaction guards. + - Unify notification payloads across CLI banner, in-conversation mention, and machine-readable indicator endpoints. + - Add focused tests for detection, policy gating, atomic state transitions, confirmation safety, and command UX. +- **Out of scope (phase 1)** + - Re-architecting release pipeline/package ecosystem beyond verification metadata consumption. + - Building a full standalone update UI in unrelated clients; expose API/typed fields first, then incremental frontend adoption. + - Force-updating running sessions without explicit restart strategy and rollback semantics. + +### Ready for Proposal +Yes. The codebase already has a clear update nucleus and insertion points for proactive visibility, safe auto-install policy, install-method detection, and client-facing indicators. Proposal should lock security invariants first (verification + atomicity + restart safety), then define phased UX rollout. diff --git a/openspec/changes/enhance-auto-update-system/proposal.md b/openspec/changes/enhance-auto-update-system/proposal.md new file mode 100644 index 000000000..b4619b9e6 --- /dev/null +++ b/openspec/changes/enhance-auto-update-system/proposal.md @@ -0,0 +1,96 @@ +# Proposal: Enhance Auto-Update System + +## Problem + +The current update flow is fragmented and only partially safe: visibility is inconsistent across CLI, daemon, and in-conversation channels; auto-install support is limited to a few package managers; update state persistence is not atomic across processes; and runtime auto-install lacks explicit artifact verification and auditability. This creates user confusion, security risk, and operational instability when mixed runtime versions are active. + +## Goals + +- Provide proactive update visibility across CLI startup, in-conversation prompts, and client UI/admin surfaces. +- Add explicit, safe-by-default auto-update policy with opt-in auto-install and environment overrides. +- Detect installation method and execute method-specific update routines (npm/pnpm/yarn/bun, binary/script, homebrew, cargo), with deterministic fallback. +- Make update operations process-safe and atomic across concurrent CLI/daemon processes. +- Add first-class `corvus update` command group: `status`, `check`, `install`, `auto-enable`, `auto-disable`, `history`. +- Enforce artifact integrity/security checks (checksum first, signature-ready contract) and produce auditable update events. + +## Non-Goals + +- Redesigning release publishing pipelines beyond consuming existing checksum/signature metadata. +- Shipping full UX redesigns for unrelated clients; this change focuses on shared indicators and admin/dashboard integration points. +- Implementing zero-downtime binary hot-swap or full rollback orchestration for all runtime modes. + +## High-Level Approach + +1. Build an `UpdateManager` flow in `clients/agent-runtime/src/update/mod.rs` that unifies check, policy evaluation, install planning, verification, and event recording. +2. Introduce install-method detection and persistence (detected + user override), then route installs through method executors with explicit unsupported-method handling. +3. Add a dedicated `update` command tree in `clients/agent-runtime/src/main.rs` for interactive and scriptable operations. +4. Replace non-atomic update state writes (`workspace/state/version_check.json`) with temp-file + fsync + atomic rename semantics and inter-process file locking. +5. Extend config schema in `clients/agent-runtime/src/config/schema.rs` with auto-update policy knobs and env overrides (keeping safe defaults). +6. Normalize notification payloads for CLI banners, channel messages (`clients/agent-runtime/src/channels/mod.rs`), daemon push notifications (`clients/agent-runtime/src/daemon/mod.rs`), and gateway/admin API exposure (`clients/agent-runtime/src/gateway/admin.rs`, `clients/web/apps/dashboard/src/types/admin-config.ts`). +7. Add security verification gates before install and append structured audit log events to update history. + +## Phased Scope + +### Phase 1: Safety and Command Foundation +- Add `corvus update status|check|install` command surface. +- Implement atomic state persistence, inter-process locking, and single-install transaction guards. +- Add install-method detection for currently supported methods and robust manual fallback. +- Standardize update status model used by CLI and daemon. + +### Phase 2: Auto-Update Policy and Visibility Expansion +- Add `auto-enable`, `auto-disable`, and policy/env override support. +- Unify proactive notifications across CLI/in-conversation/daemon channels. +- Expose update status + policy fields through admin gateway and dashboard types. + +### Phase 3: Verification Hardening and Auditability +- Enforce checksum verification for downloaded artifacts and define signature-verification extension points. +- Add `corvus update history` backed by structured audit events. +- Add daemon-safe restart/staging behavior to avoid mixed-version runtime state. + +## Affected Areas + +| Area | Impact | Description | +|------|--------|-------------| +| `clients/agent-runtime/src/update/mod.rs` | Modified | Core update manager, method detection/execution, verification gates, history events | +| `clients/agent-runtime/src/main.rs` | Modified | New `corvus update` subcommands and CLI wiring | +| `clients/agent-runtime/src/channels/mod.rs` | Modified | In-conversation visibility and nonce-confirmed install handoff | +| `clients/agent-runtime/src/daemon/mod.rs` | Modified | Polling, notification, and safe install coordination | +| `clients/agent-runtime/src/config/schema.rs` | Modified | Auto-update policy schema/defaults/env overrides | +| `clients/agent-runtime/src/service/mod.rs` | Modified | Controlled restart/staging integration after install | +| `clients/agent-runtime/src/gateway/admin.rs` | Modified | Update status/policy fields for client UI visibility | +| `clients/web/apps/dashboard/src/types/admin-config.ts` | Modified | Typed update indicator and config fields | +| `workspace/state/version_check.json` (+ lock/history peers) | Modified/New | Atomic state, lock coordination, and audit history storage | + +## Risks and Mitigations + +| Risk | Likelihood | Mitigation | +|------|------------|------------| +| Concurrent CLI + daemon update races | High | Inter-process file locks + transaction state machine + idempotent install steps | +| Partial/corrupt update state writes | Medium | Temp-file write, fsync, atomic rename, and read-after-write validation | +| Wrong install strategy selected | Medium | Detection priority matrix, persisted method override, explicit dry-run/status output | +| Integrity bypass in non-package-manager paths | High | Mandatory checksum verification; signature verification hook and fail-closed policy | +| Runtime disruption from mixed versions | Medium | Staged install markers and coordinated service restart gating | +| Source/repo drift for version checks | Medium | Canonical source configuration and strict endpoint validation | + +## Rollback Plan + +- Keep existing startup banner + manual update pathway behind compatibility path while new command group is introduced. +- Guard new auto-update/install-method logic behind feature flags or config toggles so behavior can revert to notify-only mode. +- If regressions appear, disable auto-install policy defaults, retain check-only flow, and revert command handlers to existing behavior without deleting stored history. +- Revert affected modules in a single patch set (`update`, `main`, `daemon`, `channels`, `config`, `gateway`, dashboard types) and preserve state files for postmortem. + +## Dependencies + +- Existing GitHub release metadata and checksum artifacts from `.github/workflows/_publish.yml`. +- Existing installer paths (`clients/agent-runtime/npm/corvus-cli/lib/install.js`, `clients/web/apps/marketing/public/install`) for method heuristics and verification alignment. + +## Acceptance Criteria + +- [ ] `corvus update status|check|install|auto-enable|auto-disable|history` are available and return deterministic exit codes. +- [ ] Default policy is safe (`check+notify` enabled, auto-install disabled) and env overrides are documented and effective. +- [ ] Installation method is detected (or explicitly overridden), surfaced in `status`, and used for method-specific execution/fallback. +- [ ] Concurrent update attempts do not corrupt state or run parallel installs. +- [ ] Update state writes are atomic and recoverable after interruption. +- [ ] Runtime install path performs artifact integrity verification before activation. +- [ ] CLI, in-conversation, and admin/dashboard surfaces expose consistent update availability and policy status. +- [ ] Update attempts and outcomes are persisted in audit history and viewable via `corvus update history`. diff --git a/openspec/changes/enhance-auto-update-system/specs/update-system/spec.md b/openspec/changes/enhance-auto-update-system/specs/update-system/spec.md new file mode 100644 index 000000000..6f989dba2 --- /dev/null +++ b/openspec/changes/enhance-auto-update-system/specs/update-system/spec.md @@ -0,0 +1,164 @@ +# Update System Specification + +## Purpose + +Define a secure, observable, and deterministic update experience across CLI, conversation channels, and client-facing admin surfaces. + +## Requirements + +### Requirement: Multi-Surface Update Visibility + +The system MUST expose consistent update availability and update policy state at CLI startup, during eligible in-conversation interactions, and through client-facing admin/status surfaces. + +#### Scenario: CLI startup shows update availability + +- GIVEN update checks are enabled and a newer version is available +- WHEN a supported CLI entrypoint starts +- THEN the user is shown an update notice during startup +- AND the notice includes the current version, available version, and actionable next step + +#### Scenario: In-conversation mention is policy-gated + +- GIVEN update checks are enabled and a conversation channel is active +- WHEN update availability is evaluated during conversation flow +- THEN the system surfaces an update mention only when channel visibility is enabled by policy +- AND the mention uses the same version/status facts as CLI and admin surfaces + +#### Scenario: Client/admin surface reflects same status model + +- GIVEN the runtime has a computed update status and policy state +- WHEN a client/admin status endpoint is queried +- THEN the response includes update availability, current version, available version, last check result, and policy flags +- AND the values are consistent with the latest CLI-visible status + +### Requirement: Update Configuration Model and Safe Defaults + +The system MUST provide a structured update configuration model with safe defaults, where automatic checks and notifications are enabled by default and automatic installation is disabled by default. + +#### Scenario: Default policy is safe-by-default + +- GIVEN no user-specific update configuration is set +- WHEN the runtime resolves effective update policy +- THEN update checks and visibility notifications are enabled +- AND automatic installation is disabled + +#### Scenario: Environment override precedence is deterministic + +- GIVEN configuration file values and one or more update-related environment overrides are present +- WHEN effective update policy is resolved +- THEN environment overrides take precedence over persisted configuration +- AND only explicitly provided environment keys alter effective values + +#### Scenario: Invalid environment override fails safely + +- GIVEN an invalid value for an update-related environment override +- WHEN effective policy resolution is attempted +- THEN the system does not apply the invalid override +- AND the system records a validation warning without enabling less-safe behavior + +### Requirement: Installation Method Detection and Execution Routing + +The system MUST determine an effective installation method (detected or user-overridden), route update execution through the method-specific strategy, and provide deterministic fallback instructions when unsupported or unavailable. + +#### Scenario: Supported method is detected and used + +- GIVEN the runtime can infer a supported installation method for the current installation +- WHEN `update install` is requested +- THEN the system selects that method as the execution strategy +- AND reports the selected method in status/output + +#### Scenario: User override takes priority over detection + +- GIVEN a valid user-configured installation method override exists +- WHEN install planning is performed +- THEN the system uses the override as the effective method +- AND the chosen method is explicitly surfaced in status/output + +#### Scenario: Unsupported method falls back safely + +- GIVEN no supported execution strategy is available for the effective installation method +- WHEN installation is requested +- THEN the system does not attempt an unsafe or unknown install path +- AND returns deterministic manual update instructions with non-success status + +### Requirement: Process Safety and Atomic Update State + +The system MUST prevent concurrent install transactions across processes and MUST persist update state atomically such that interrupted writes do not produce corrupt state. + +#### Scenario: Concurrent install attempts are serialized + +- GIVEN two update install requests arrive from different runtime processes +- WHEN both attempt to start an install transaction +- THEN at most one install transaction becomes active +- AND the other request receives a deterministic busy or deferred outcome without corrupting state + +#### Scenario: Update state write is interruption-safe + +- GIVEN an update state persistence operation is interrupted before completion +- WHEN the system next loads update state +- THEN it reads a valid previous or completed state snapshot +- AND it does not read a partially written state artifact + +### Requirement: CLI Update Command Contract + +The CLI MUST provide `update status`, `update check`, `update install`, `update auto-enable`, `update auto-disable`, and `update history` commands with deterministic outputs and exit semantics suitable for interactive and scripted use. + +#### Scenario: `update status` reports effective state + +- GIVEN update metadata and effective policy are available +- WHEN the user runs `update status` +- THEN output includes current version, latest known version status, installation method, and auto-update policy state +- AND the command returns success when status can be resolved + +#### Scenario: `update check` performs explicit refresh + +- GIVEN network access to update source is available +- WHEN the user runs `update check` +- THEN the command performs an explicit availability check +- AND output reflects whether an update is available with deterministic success/failure signaling + +#### Scenario: `update install` enforces policy and method routing + +- GIVEN an update is available +- WHEN the user runs `update install` +- THEN the command evaluates policy and effective installation method before execution +- AND returns a non-success result when prerequisites fail or install cannot proceed safely + +#### Scenario: `update auto-enable` and `update auto-disable` toggle policy + +- GIVEN a user with permission to modify local runtime configuration +- WHEN the user runs `update auto-enable` or `update auto-disable` +- THEN the effective auto-install policy is updated accordingly +- AND `update status` reflects the new policy state in the same session + +#### Scenario: `update history` returns auditable events + +- GIVEN one or more prior update checks or install attempts exist +- WHEN the user runs `update history` +- THEN the command returns chronologically ordered update events +- AND each entry includes enough metadata to identify what occurred and outcome class + +### Requirement: Integrity Verification and Audit Logging + +The system MUST verify artifact integrity before activation for update paths that consume downloadable artifacts, MUST fail closed on verification failure, and MUST append structured audit events for update checks and install attempts. + +#### Scenario: Successful verification permits activation + +- GIVEN an install path that downloads or stages an artifact +- WHEN integrity verification succeeds against trusted release metadata +- THEN the update may proceed to activation +- AND an audit event records verification success and install outcome + +#### Scenario: Verification failure blocks activation + +- GIVEN an install path that downloads or stages an artifact +- WHEN integrity verification fails or required verification metadata is unavailable +- THEN the update MUST NOT activate the new artifact +- AND the system returns a non-success install result with a recorded audit failure event + +#### Scenario: Audit history includes both checks and installs + +- GIVEN periodic checks and user-initiated installs occur over time +- WHEN update audit history is queried +- THEN the history contains both check events and install events +- AND each event includes timestamp, action type, effective method, and outcome classification diff --git a/openspec/changes/enhance-auto-update-system/tasks.md b/openspec/changes/enhance-auto-update-system/tasks.md new file mode 100644 index 000000000..1a9b7b910 --- /dev/null +++ b/openspec/changes/enhance-auto-update-system/tasks.md @@ -0,0 +1,81 @@ +# Tasks: Enhance Auto-Update System + +## Phase Dependencies and Sequencing + +- Phase 1 -> Phase 2: shared update policy/model and env parsing must exist before CLI/state wiring. +- Phase 2 -> Phase 3: canonical status and notification payload must be stable before channel/daemon/admin fan-out. +- Phase 3 -> Phase 4: unified surfaces and policy toggles must be in place before verification hardening/history UX. +- Phase 4 -> Phase 5: implementation is complete before end-to-end verification and regression. + +## Phase 1: Policy and State Foundation (TDD) + +- [ ] 1.1 Add RED unit tests in `clients/agent-runtime/src/config/schema.rs` for new update fields/defaults and env override precedence (`CORVUS_UPDATES_ENABLED`, `CORVUS_UPDATE_AUTO_INSTALL`, `CORVUS_UPDATE_CHANNEL_VISIBILITY`, `CORVUS_UPDATE_CLI_NOTICE`, `CORVUS_UPDATE_METHOD_OVERRIDE`, `CORVUS_UPDATE_RESTART_POLICY`) including invalid-value fail-safe behavior. +- [ ] 1.2 Implement GREEN schema updates in `clients/agent-runtime/src/config/schema.rs` for `auto_install_enabled`, `channel_visibility_enabled`, `cli_startup_notice_enabled`, `install_method_override`, `restart_policy`, and `history_max_entries` with safe defaults and validation. +- [ ] 1.3 Add RED unit tests in `clients/agent-runtime/src/update/mod.rs` for policy resolution, install method precedence (`override -> detected -> unknown`), and install/check state transition invariants. +- [ ] 1.4 Implement GREEN core model/types in `clients/agent-runtime/src/update/mod.rs` (`InstallMethod`, `RestartPolicy`, `UpdatePolicy`, `UpdateStateSnapshot`, `InstallState`, `CheckOutcome`, `UpdateStatusView`) and refactor duplicate policy/state mapping helpers (REFACTOR). + +Verification criteria (Phase 1): +- New config/env tests pass and prove safe-by-default behavior. +- Update model/state tests pass and prove deterministic method and policy resolution. + +## Phase 2: CLI Commands, Locking, and Atomic State (TDD) + +- [ ] 2.1 Add RED CLI command tests for `update status`, `update check`, and `update install` deterministic output/exit semantics in runtime command test coverage associated with `clients/agent-runtime/src/main.rs`. +- [ ] 2.2 Implement GREEN `update status|check|install` command wiring and exit code mapping in `clients/agent-runtime/src/main.rs`, routed through `UpdateManager` entrypoints. +- [ ] 2.3 Add RED concurrency/resilience tests in `clients/agent-runtime/src/update/mod.rs` (or update-focused runtime tests) for cross-process busy outcomes and interrupted-write recovery of `workspace/state/version_check.json`. +- [ ] 2.4 Implement GREEN file-lock and atomic persistence flow in `clients/agent-runtime/src/update/mod.rs` (`update_state.lock`, `update_install.lock`, temp-file + fsync + rename + directory sync) plus single-install transaction guard. +- [ ] 2.5 Implement deterministic install-method execution routing and unsupported fallback messaging in `clients/agent-runtime/src/update/mod.rs` without unsafe generic shell execution. + +Dependencies: +- Depends on Phase 1 policy/model contracts. + +Verification criteria (Phase 2): +- `update status|check|install` behavior is script-stable and test-covered. +- Concurrent install attempts serialize correctly; state file remains valid after simulated interruption. + +## Phase 3: Multi-Surface Visibility and Policy Controls (TDD) + +- [ ] 3.1 Add RED tests in `clients/agent-runtime/src/channels/mod.rs` for channel visibility gating and canonical update payload parity with CLI status. +- [ ] 3.2 Implement GREEN channel integration in `clients/agent-runtime/src/channels/mod.rs` so opportunistic mentions and nonce-confirm flow use canonical status/policy gates. +- [ ] 3.3 Add RED daemon watcher tests in `clients/agent-runtime/src/daemon/mod.rs` for check interval behavior, deduped notifications, and policy-aware fan-out. +- [ ] 3.4 Implement GREEN daemon updater integration in `clients/agent-runtime/src/daemon/mod.rs` using canonical update payload and shared manager APIs. +- [ ] 3.5 Add RED admin contract tests in `clients/agent-runtime/src/gateway/admin.rs` and TypeScript compatibility checks in `clients/web/apps/dashboard/src/types/admin-config.ts` for `config.updates` status/policy fields. +- [ ] 3.6 Implement GREEN admin API and dashboard type updates in `clients/agent-runtime/src/gateway/admin.rs` and `clients/web/apps/dashboard/src/types/admin-config.ts`, preserving secret-safe response discipline. +- [ ] 3.7 Add RED/GREEN tasks in `clients/agent-runtime/src/main.rs` and `clients/agent-runtime/src/config/schema.rs` for `update auto-enable` and `update auto-disable`, ensuring persisted policy toggles are reflected in same-session `update status`. + +Dependencies: +- Depends on Phase 2 canonical status contract and manager entrypoints. + +Verification criteria (Phase 3): +- CLI, channel, daemon, and admin surfaces expose consistent version/policy facts. +- Policy toggles (`auto-enable/auto-disable`) persist atomically and reflect immediately. + +## Phase 4: Integrity Verification, History, and Restart Safety (TDD) + +- [ ] 4.1 Add RED verification tests in `clients/agent-runtime/src/update/mod.rs` for checksum-required artifact paths, missing metadata failures, digest mismatch failures, and fail-closed install blocking. +- [ ] 4.2 Implement GREEN verification gate and structured verification/install audit event recording in `clients/agent-runtime/src/update/mod.rs`. +- [ ] 4.3 Add RED tests for `update history` ordering and schema expectations, then implement GREEN command + history reader wiring in `clients/agent-runtime/src/main.rs` and `clients/agent-runtime/src/update/mod.rs` backed by `workspace/state/update_history.jsonl`. +- [ ] 4.4 Add RED restart-policy integration tests in `clients/agent-runtime/src/service/mod.rs` and daemon-facing update handling, then implement GREEN `InstalledPendingRestart` handling for `never|prompt|auto_managed_service` behavior. +- [ ] 4.5 Refactor duplicated audit/restart decision code in `clients/agent-runtime/src/update/mod.rs`, `clients/agent-runtime/src/daemon/mod.rs`, and `clients/agent-runtime/src/service/mod.rs` while keeping event taxonomy stable. + +Dependencies: +- Depends on Phase 3 fan-out/admin contract completion. + +Verification criteria (Phase 4): +- Verification failures block activation and emit auditable failure events. +- `update history` returns chronological, structured check/install events. +- Restart handling avoids mixed-version running state for managed service mode. + +## Phase 5: End-to-End Verification and Regression Gate + +- [ ] 5.1 Add/update focused integration tests under `clients/agent-runtime/tests/` for full command contract coverage (`status|check|install|auto-enable|auto-disable|history`) and concurrency outcomes. +- [ ] 5.2 Add/update integration tests under `clients/agent-runtime/tests/` for cross-surface consistency (CLI status vs admin payload vs channel/daemon notification facts). +- [ ] 5.3 Run targeted runtime verification (`cargo test -p agent-runtime update`) and dashboard type/build checks for `clients/web/apps/dashboard/src/types/admin-config.ts`, fixing regressions in touched files. +- [ ] 5.4 Run full repository regression (`make test` and `make build`) and confirm every scenario in `openspec/changes/enhance-auto-update-system/specs/update-system/spec.md` is mapped to passing tests before handoff. + +Dependencies: +- Depends on completion of Phases 1-4. + +Verification criteria (Phase 5): +- All targeted and full regression suites pass. +- Each spec requirement/scenario has explicit test coverage evidence. From 50a85595f1c8b336ba0298615de741845885bb86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yuniel=20Acosta=20P=C3=A9rez?= <33158051+yacosta738@users.noreply.github.com> Date: Thu, 5 Mar 2026 11:18:32 +0100 Subject: [PATCH 2/4] feat: modularize dashboard configuration and enhance backend payload support --- .../2026-03-03-agent-loop/archive-report.md | 17 +- .../archive/2026-03-03-agent-loop/design.md | 72 ++++--- .../2026-03-03-agent-loop/exploration.md | 120 ++++++++---- .../archive/2026-03-03-agent-loop/proposal.md | 119 +++++++----- .../specs/agent-loop/spec.md | 45 +++-- .../archive/2026-03-03-agent-loop/tasks.md | 63 ++++-- .../2026-03-03-agent-loop/verify-report.md | 64 ++++--- .../design.md | 100 +++++----- .../proposal.md | 115 +++++------ .../tasks.md | 130 ++++++++++--- .../verify-report.md | 138 ++++++++------ .../verify-report.md | 87 +++++---- .../archive-report.md | 23 ++- .../2026-03-04-web-agent-config/design.md | 140 ++++++++------ .../2026-03-04-web-agent-config/proposal.md | 53 ++++-- .../specs/agent-config/spec.md | 16 +- .../specs/dashboard-ui/spec.md | 15 +- .../2026-03-04-web-agent-config/tasks.md | 101 +++++++--- .../verify-report.md | 82 ++++---- .../enhance-auto-update-system/design.md | 180 +++++++++++------- .../enhance-auto-update-system/exploration.md | 128 +++++++++---- .../enhance-auto-update-system/proposal.md | 122 +++++++----- .../specs/update-system/spec.md | 40 +++- .../enhance-auto-update-system/tasks.md | 115 ++++++++--- 24 files changed, 1343 insertions(+), 742 deletions(-) diff --git a/openspec/changes/archive/2026-03-03-agent-loop/archive-report.md b/openspec/changes/archive/2026-03-03-agent-loop/archive-report.md index c33d2ff45..42bef6575 100644 --- a/openspec/changes/archive/2026-03-03-agent-loop/archive-report.md +++ b/openspec/changes/archive/2026-03-03-agent-loop/archive-report.md @@ -1,16 +1,17 @@ # Archive Report -**Change**: agent-loop -**Date**: 2026-03-03 -**Artifact mode**: openspec -**Verification prerequisite**: PASS (`openspec/changes/archive/2026-03-03-agent-loop/verify-report.md`) +**Change**: agent-loop +**Date**: 2026-03-03 +**Artifact mode**: openspec +**Verification prerequisite**: PASS ( +`openspec/changes/archive/2026-03-03-agent-loop/verify-report.md`) --- ## Sync Outcome -| Domain | Action | Details | -|--------|--------|---------| +| Domain | Action | Details | +|------------|---------|-------------------------------------------------------------------------------------------------------------| | agent-loop | Created | Main spec did not exist; promoted full change spec to source of truth (`openspec/specs/agent-loop/spec.md`) | No destructive merge/removal was required for this change. @@ -24,6 +25,7 @@ Planned archive destination: `openspec/changes/archive/2026-03-03-agent-loop/` Contents expected to be preserved in archive: + - `proposal.md` - `specs/` - `design.md` @@ -35,4 +37,5 @@ Contents expected to be preserved in archive: ## Completion -The change is synchronized to main specs and ready to be finalized in archive as a complete SDD audit trail. +The change is synchronized to main specs and ready to be finalized in archive as a complete SDD +audit trail. diff --git a/openspec/changes/archive/2026-03-03-agent-loop/design.md b/openspec/changes/archive/2026-03-03-agent-loop/design.md index e19f5f356..d061adb01 100644 --- a/openspec/changes/archive/2026-03-03-agent-loop/design.md +++ b/openspec/changes/archive/2026-03-03-agent-loop/design.md @@ -2,29 +2,48 @@ ## Technical Approach -The core technical strategy is to deprecate the dual-loop architecture (`loop_.rs` and the internal loop in `agent.rs`) and consolidate them into a single, canonical `AgentLoop` state machine. This unified loop will be instantiated by all entry points (CLI, Channels, Gateway) to ensure consistent lifecycle management, security invariant enforcement, and stream event emitting. +The core technical strategy is to deprecate the dual-loop architecture (`loop_.rs` and the internal +loop in `agent.rs`) and consolidate them into a single, canonical `AgentLoop` state machine. This +unified loop will be instantiated by all entry points (CLI, Channels, Gateway) to ensure consistent +lifecycle management, security invariant enforcement, and stream event emitting. -The `AgentLoop` will act as the primary orchestrator. It will rely on an `Agent` for interacting with the LLM (yielding tool calls or text) and a `Dispatcher` for executing tools and enforcing risk policies. The execution will be represented as an asynchronous `Stream` of `LoopEvent`s, allowing consumers to handle streaming responses, tool progress, and approval interruptions idiomatically. +The `AgentLoop` will act as the primary orchestrator. It will rely on an `Agent` for interacting +with the LLM (yielding tool calls or text) and a `Dispatcher` for executing tools and enforcing risk +policies. The execution will be represented as an asynchronous `Stream` of `LoopEvent`s, allowing +consumers to handle streaming responses, tool progress, and approval interruptions idiomatically. ## Architecture Decisions ### Decision: Single Unified Loop Struct vs Entry-point Specific Traits -**Choice**: A single `AgentLoop` struct configured via a `LoopConfig`, containing an `Agent` and a `Dispatcher`. -**Alternatives considered**: A generic `Loop` trait implemented differently by CLI, Channel, and Gateway. -**Rationale**: To eliminate the current drift risk across surfaces, we need a single source of truth for the loop lifecycle. Behavioral differences between entry points should be handled via configuration (`LoopConfig`) rather than divergent loop implementations. This ensures security and compaction invariants are universally applied. +**Choice**: A single `AgentLoop` struct configured via a `LoopConfig`, containing an `Agent` and a +`Dispatcher`. +**Alternatives considered**: A generic `Loop` trait implemented differently by CLI, Channel, and +Gateway. +**Rationale**: To eliminate the current drift risk across surfaces, we need a single source of truth +for the loop lifecycle. Behavioral differences between entry points should be handled via +configuration (`LoopConfig`) rather than divergent loop implementations. This ensures security and +compaction invariants are universally applied. ### Decision: Stream Event Emitting **Choice**: `AgentLoop::run` returns an async `Stream`. -**Alternatives considered**: Passing a callback closure or a channel sender `mpsc::Sender` into the loop. -**Rationale**: Returning a `Stream` is idiomatic in modern Rust (via `async-stream` or underlying `mpsc` receivers). It provides a clean, pull-based API that allows callers to easily process events concurrently, apply timeouts, or transform the stream into channel-specific formats (e.g., SSE for the gateway, stdout for CLI). +**Alternatives considered**: Passing a callback closure or a channel sender `mpsc::Sender` into the +loop. +**Rationale**: Returning a `Stream` is idiomatic in modern Rust (via `async-stream` or underlying +`mpsc` receivers). It provides a clean, pull-based API that allows callers to easily process events +concurrently, apply timeouts, or transform the stream into channel-specific formats (e.g., SSE for +the gateway, stdout for CLI). ### Decision: Security Invariant Enforcement Boundary -**Choice**: The `Dispatcher` is responsible for evaluating risk classifications and pausing for approval, returning an `ApprovalRequired` state to the loop. +**Choice**: The `Dispatcher` is responsible for evaluating risk classifications and pausing for +approval, returning an `ApprovalRequired` state to the loop. **Alternatives considered**: The `AgentLoop` evaluates risk before calling the `Dispatcher`. -**Rationale**: The `Dispatcher` has the deepest knowledge of the tools (their schemas, side effects, and risk profiles). It is best positioned to evaluate risk policies. The loop simply orchestrates the suspension of execution and emission of the `ApprovalRequired` event to the client, waiting for a `resume` call. +**Rationale**: The `Dispatcher` has the deepest knowledge of the tools (their schemas, side effects, +and risk profiles). It is best positioned to evaluate risk policies. The loop simply orchestrates +the suspension of execution and emission of the `ApprovalRequired` event to the client, waiting for +a `resume` call. ## Data Flow @@ -74,15 +93,15 @@ sequenceDiagram ## File Changes -| File | Action | Description | -|------|--------|-------------| -| `clients/agent-runtime/src/agent/loop_.rs` | Delete | Remove the legacy active runtime loop. | +| File | Action | Description | +|---------------------------------------------------|--------|-----------------------------------------------------------------------| +| `clients/agent-runtime/src/agent/loop_.rs` | Delete | Remove the legacy active runtime loop. | | `clients/agent-runtime/src/agent/unified_loop.rs` | Create | Define `AgentLoop`, `LoopEvent`, `LoopConfig`, and the state machine. | -| `clients/agent-runtime/src/agent/agent.rs` | Modify | Remove internal loop logic; expose step-wise generation. | -| `clients/agent-runtime/src/agent/dispatcher.rs` | Modify | Integrate security policy enforcement and yield for approval. | -| `clients/agent-runtime/src/main.rs` | Modify | Update CLI entry point to instantiate and consume `AgentLoop`. | -| `clients/agent-runtime/src/channels/mod.rs` | Modify | Update channel runtime to map `LoopEvent`s to channel messages. | -| `clients/agent-runtime/src/gateway/mod.rs` | Modify | Update gateway webhook to use `AgentLoop` with strict session bounds. | +| `clients/agent-runtime/src/agent/agent.rs` | Modify | Remove internal loop logic; expose step-wise generation. | +| `clients/agent-runtime/src/agent/dispatcher.rs` | Modify | Integrate security policy enforcement and yield for approval. | +| `clients/agent-runtime/src/main.rs` | Modify | Update CLI entry point to instantiate and consume `AgentLoop`. | +| `clients/agent-runtime/src/channels/mod.rs` | Modify | Update channel runtime to map `LoopEvent`s to channel messages. | +| `clients/agent-runtime/src/gateway/mod.rs` | Modify | Update gateway webhook to use `AgentLoop` with strict session bounds. | ## Interfaces / Contracts @@ -134,18 +153,20 @@ impl AgentLoop { ## Testing Strategy -| Layer | What to Test | Approach | -|-------|-------------|----------| -| Unit | `AgentLoop` state machine | Mock `Agent` and `Dispatcher` to verify iteration limits, compaction triggers, and event emission. | -| Unit | `Dispatcher` risk policies | Provide mock tools with varying risk levels to ensure `ApprovalRequired` is yielded correctly. | -| Integration | Loop execution flow | Use a local/dummy model provider to run a full prompt -> tool -> response cycle without external IO. | -| E2E | Entry points (CLI/Gateway) | Verify that CLI output and Gateway SSE streams correctly reflect the underlying `LoopEvent`s, including approvals. | +| Layer | What to Test | Approach | +|-------------|----------------------------|--------------------------------------------------------------------------------------------------------------------| +| Unit | `AgentLoop` state machine | Mock `Agent` and `Dispatcher` to verify iteration limits, compaction triggers, and event emission. | +| Unit | `Dispatcher` risk policies | Provide mock tools with varying risk levels to ensure `ApprovalRequired` is yielded correctly. | +| Integration | Loop execution flow | Use a local/dummy model provider to run a full prompt -> tool -> response cycle without external IO. | +| E2E | Entry points (CLI/Gateway) | Verify that CLI output and Gateway SSE streams correctly reflect the underlying `LoopEvent`s, including approvals. | ## Migration / Rollout This change will be rolled out in phases: + 1. **Convergence**: Introduce `unified_loop.rs` alongside `loop_.rs`. -2. **Adapter Phase**: Update `main.rs` and `channels/mod.rs` to use `AgentLoop` behind a feature flag or configuration toggle to ensure parity. +2. **Adapter Phase**: Update `main.rs` and `channels/mod.rs` to use `AgentLoop` behind a feature + flag or configuration toggle to ensure parity. 3. **Hardening**: Verify compaction and timeouts behave as expected under load. 4. **Cleanup**: Remove `loop_.rs` and the old internal loop in `agent.rs`. @@ -153,5 +174,6 @@ No data migration is required, as the loop runtime is stateless across sessions. ## Open Questions -- [ ] Does the gateway require any specific `LoopConfig` overrides (e.g., shorter timeouts) compared to the CLI? +- [ ] Does the gateway require any specific `LoopConfig` overrides (e.g., shorter timeouts) compared + to the CLI? - [ ] How should `AgentLoop` handle stream disconnects from the client side during a tool execution? diff --git a/openspec/changes/archive/2026-03-03-agent-loop/exploration.md b/openspec/changes/archive/2026-03-03-agent-loop/exploration.md index aa6d2dccd..9f811d56b 100644 --- a/openspec/changes/archive/2026-03-03-agent-loop/exploration.md +++ b/openspec/changes/archive/2026-03-03-agent-loop/exploration.md @@ -1,67 +1,119 @@ ## Exploration: Agent Loop in Corvus ### Current State -`Corvus` currently has two agent-loop implementations in `clients/agent-runtime`: the exported runtime path (`agent/loop_.rs`) and a newer modular path (`agent/agent.rs`) that is not the active CLI/channel entrypoint. -For entry points, `clients/agent-runtime/src/main.rs` routes CLI `agent` execution into `agent::run`, which currently resolves to `agent/loop_.rs`; HTTP webhook requests in `clients/agent-runtime/src/gateway/mod.rs` use `provider.simple_chat` and do not run the full tool loop. Channel messages in `clients/agent-runtime/src/channels/mod.rs` invoke `run_tool_call_loop` and therefore exercise the full loop. +`Corvus` currently has two agent-loop implementations in `clients/agent-runtime`: the exported +runtime path (`agent/loop_.rs`) and a newer modular path (`agent/agent.rs`) that is not the active +CLI/channel entrypoint. -Session/workspace preparation is split across config and prompt assembly. `Config::load_or_init` in `clients/agent-runtime/src/config/schema.rs` resolves workspace/config paths, creates directories, decrypts secrets, and applies env overrides. Prompt context is assembled from workspace files (Corvus/AIEOS identity) in `channels::build_system_prompt` and partially in `agent/prompt.rs`. +For entry points, `clients/agent-runtime/src/main.rs` routes CLI `agent` execution into +`agent::run`, which currently resolves to `agent/loop_.rs`; HTTP webhook requests in +`clients/agent-runtime/src/gateway/mod.rs` use `provider.simple_chat` and do not run the full tool +loop. Channel messages in `clients/agent-runtime/src/channels/mod.rs` invoke `run_tool_call_loop` +and therefore exercise the full loop. -Queueing and concurrency are handled primarily in channel runtime: supervised listeners auto-restart with exponential backoff, message dispatch uses bounded mpsc + semaphore + `JoinSet`, and per-sender conversation history is cached in-memory (`channels/mod.rs`). The core loop in `agent/loop_.rs` executes tool calls sequentially per turn. +Session/workspace preparation is split across config and prompt assembly. `Config::load_or_init` in +`clients/agent-runtime/src/config/schema.rs` resolves workspace/config paths, creates directories, +decrypts secrets, and applies env overrides. Prompt context is assembled from workspace files ( +Corvus/AIEOS identity) in `channels::build_system_prompt` and partially in `agent/prompt.rs`. -Runtime execution and tool orchestration happen in `run_tool_call_loop` (`agent/loop_.rs`): it sends history to provider, parses tool calls (native + XML/JSON fallbacks), executes tools, scrubs sensitive values, appends tool results back into history, and repeats until final text or max iterations. +Queueing and concurrency are handled primarily in channel runtime: supervised listeners auto-restart +with exponential backoff, message dispatch uses bounded mpsc + semaphore + `JoinSet`, and per-sender +conversation history is cached in-memory (`channels/mod.rs`). The core loop in `agent/loop_.rs` +executes tool calls sequentially per turn. -Event streaming is channel-scoped today: `run_tool_call_loop` supports `on_delta` streaming chunks, and channel adapters can send/update/finalize draft messages. Gateway webhook path does not stream deltas. +Runtime execution and tool orchestration happen in `run_tool_call_loop` (`agent/loop_.rs`): it sends +history to provider, parses tool calls (native + XML/JSON fallbacks), executes tools, scrubs +sensitive values, appends tool results back into history, and repeats until final text or max +iterations. -Tool execution messaging supports two formats: prompt-guided XML tags and native structured tool calls. `agent/dispatcher.rs` formalizes this in `ToolDispatcher` (XML vs native), but `loop_.rs` still contains parallel parsing/formatting logic. +Event streaming is channel-scoped today: `run_tool_call_loop` supports `on_delta` streaming chunks, +and channel adapters can send/update/finalize draft messages. Gateway webhook path does not stream +deltas. -Hooks/extensibility are trait-first (`Provider`, `Tool`, `Channel`, `Memory`, `RuntimeAdapter`) with factories and registry wiring. Approval is a pre-tool hook in `approval/mod.rs` with session allowlist + audit log, but interactive approval is CLI-only; non-CLI channels auto-approve. +Tool execution messaging supports two formats: prompt-guided XML tags and native structured tool +calls. `agent/dispatcher.rs` formalizes this in `ToolDispatcher` (XML vs native), but `loop_.rs` +still contains parallel parsing/formatting logic. -Reply shaping/suppression exists through channel-specific delivery instructions (for example Telegram attachment markers), `silent` mode in loop execution, typing indicators, and draft update/finalization behavior in `channels/mod.rs`. +Hooks/extensibility are trait-first (`Provider`, `Tool`, `Channel`, `Memory`, `RuntimeAdapter`) with +factories and registry wiring. Approval is a pre-tool hook in `approval/mod.rs` with session +allowlist + audit log, but interactive approval is CLI-only; non-CLI channels auto-approve. -Error handling/compaction/retries are distributed: loop-level exits on provider/tool failures and max-iteration guardrails; auto-compaction summarizes older turns before hard trimming; provider-level retries/fallbacks/backoff and model failover are in `providers/reliable.rs`. +Reply shaping/suppression exists through channel-specific delivery instructions (for example +Telegram attachment markers), `silent` mode in loop execution, typing indicators, and draft +update/finalization behavior in `channels/mod.rs`. -Persistence/state is split between durable memory backends (`memory/*`) and ephemeral in-process conversation history. Memory APIs support `session_id`, but major loop paths frequently pass `None`, so session scoping is inconsistent. +Error handling/compaction/retries are distributed: loop-level exits on provider/tool failures and +max-iteration guardrails; auto-compaction summarizes older turns before hard trimming; +provider-level retries/fallbacks/backoff and model failover are in `providers/reliable.rs`. -Security/auth controls include security policy/risk classification (`security/policy.rs`), pairing and bearer auth for gateway, optional webhook secret hashing and constant-time checks, request limits/timeouts/body limits, and idempotency tracking. +Persistence/state is split between durable memory backends (`memory/*`) and ephemeral in-process +conversation history. Memory APIs support `session_id`, but major loop paths frequently pass `None`, +so session scoping is inconsistent. + +Security/auth controls include security policy/risk classification (`security/policy.rs`), pairing +and bearer auth for gateway, optional webhook secret hashing and constant-time checks, request +limits/timeouts/body limits, and idempotency tracking. ### Affected Areas + - `clients/agent-runtime/src/main.rs` — CLI entrypoint and command routing into the active loop. -- `clients/agent-runtime/src/agent/loop_.rs` — current authoritative loop (prompt build, tool loop, compaction, validation). -- `clients/agent-runtime/src/agent/agent.rs` — modular loop candidate with dispatcher/prompt abstractions. +- `clients/agent-runtime/src/agent/loop_.rs` — current authoritative loop (prompt build, tool loop, + compaction, validation). +- `clients/agent-runtime/src/agent/agent.rs` — modular loop candidate with dispatcher/prompt + abstractions. - `clients/agent-runtime/src/agent/dispatcher.rs` — tool-call protocol abstraction (XML/native). -- `clients/agent-runtime/src/channels/mod.rs` — channel runtime queueing, concurrency, streaming drafts, channel reply shaping. -- `clients/agent-runtime/src/gateway/mod.rs` — RPC/webhook ingress, auth, rate limit, idempotency, simple-chat path. -- `clients/agent-runtime/src/config/schema.rs` — workspace/config/session-relevant defaults and initialization. +- `clients/agent-runtime/src/channels/mod.rs` — channel runtime queueing, concurrency, streaming + drafts, channel reply shaping. +- `clients/agent-runtime/src/gateway/mod.rs` — RPC/webhook ingress, auth, rate limit, idempotency, + simple-chat path. +- `clients/agent-runtime/src/config/schema.rs` — workspace/config/session-relevant defaults and + initialization. - `clients/agent-runtime/src/providers/reliable.rs` — retry/backoff/failover behavior. - `clients/agent-runtime/src/security/policy.rs` — command risk gating and execution policy. - `clients/agent-runtime/src/approval/mod.rs` — approval hook behavior and audit state. ### Approaches -1. **Document Current Active Loop (`loop_.rs`-first)** — Treat current behavior as source of truth and write SDD around existing control flow. - - Pros: lowest ambiguity, fastest to proposal, directly reflects production path. - - Cons: encodes known duplication with modular `Agent` path and may harden legacy structure. - - Effort: Low. - -2. **Define Target Fundamentals Around Modular `Agent` + `ToolDispatcher`** — Use `agent/agent.rs` + `dispatcher.rs` as intended architecture and map migration from `loop_.rs`. - - Pros: cleaner separation (prompt, dispatch, memory load, execution), easier future hooks/extensibility. - - Cons: requires explicit migration plan and compatibility matrix for channels/CLI/gateway. - - Effort: Medium. -3. **Hybrid Spec (As-Is Baseline + Migration Track)** — Capture current active loop behavior, then define staged convergence to modular agent runtime. - - Pros: safest for delivery, preserves current contracts while reducing architecture drift. - - Cons: larger spec/design surface and more acceptance criteria. - - Effort: Medium. +1. **Document Current Active Loop (`loop_.rs`-first)** — Treat current behavior as source of truth + and write SDD around existing control flow. + - Pros: lowest ambiguity, fastest to proposal, directly reflects production path. + - Cons: encodes known duplication with modular `Agent` path and may harden legacy structure. + - Effort: Low. + +2. **Define Target Fundamentals Around Modular `Agent` + `ToolDispatcher`** — Use `agent/agent.rs` + + `dispatcher.rs` as intended architecture and map migration from `loop_.rs`. + - Pros: cleaner separation (prompt, dispatch, memory load, execution), easier future + hooks/extensibility. + - Cons: requires explicit migration plan and compatibility matrix for channels/CLI/gateway. + - Effort: Medium. + +3. **Hybrid Spec (As-Is Baseline + Migration Track)** — Capture current active loop behavior, then + define staged convergence to modular agent runtime. + - Pros: safest for delivery, preserves current contracts while reducing architecture drift. + - Cons: larger spec/design surface and more acceptance criteria. + - Effort: Medium. ### Recommendation -Use **Hybrid Spec (Approach 3)**. The codebase currently has a production `loop_.rs` path plus a modular architecture path (`agent.rs` + `dispatcher.rs`) that already encodes better boundaries. A hybrid exploration-to-proposal flow lets us document real behavior first, then specify convergence milestones (entrypoint unification, shared tool protocol layer, and consistent session scoping) without breaking existing CLI/channel behavior. + +Use **Hybrid Spec (Approach 3)**. The codebase currently has a production `loop_.rs` path plus a +modular architecture path (`agent.rs` + `dispatcher.rs`) that already encodes better boundaries. A +hybrid exploration-to-proposal flow lets us document real behavior first, then specify convergence +milestones (entrypoint unification, shared tool protocol layer, and consistent session scoping) +without breaking existing CLI/channel behavior. ### Risks + - Dual-loop architecture can cause behavioral drift (CLI/channels vs future modular path). -- Gateway webhook currently bypasses full tool loop, creating inconsistent semantics versus CLI/channels. -- Session scoping is inconsistently applied (`session_id` support exists but often not wired in loop usage). +- Gateway webhook currently bypasses full tool loop, creating inconsistent semantics versus + CLI/channels. +- Session scoping is inconsistently applied (`session_id` support exists but often not wired in loop + usage). - Approval model auto-approves non-CLI channels, which may violate expected supervised semantics. - Compaction + trimming + retries are distributed across layers, increasing edge-case complexity. ### Ready for Proposal -Yes — proceed to proposal with explicit scope boundaries: (1) canonical loop contract, (2) entrypoint alignment strategy, (3) security/approval invariants, and (4) session-state consistency requirements. + +Yes — proceed to proposal with explicit scope boundaries: (1) canonical loop contract, (2) +entrypoint alignment strategy, (3) security/approval invariants, and (4) session-state consistency +requirements. diff --git a/openspec/changes/archive/2026-03-03-agent-loop/proposal.md b/openspec/changes/archive/2026-03-03-agent-loop/proposal.md index 697ad1fe8..0d5ea9cde 100644 --- a/openspec/changes/archive/2026-03-03-agent-loop/proposal.md +++ b/openspec/changes/archive/2026-03-03-agent-loop/proposal.md @@ -2,28 +2,36 @@ ## Intent -Corvus currently runs two overlapping agent-loop architectures (`agent/loop_.rs` as active runtime and -`agent/agent.rs` + `agent/dispatcher.rs` as modular target). This split creates drift risk across CLI, -channel, and gateway surfaces, weakens predictable security controls, and complicates performance tuning. +Corvus currently runs two overlapping agent-loop architectures (`agent/loop_.rs` as active runtime +and +`agent/agent.rs` + `agent/dispatcher.rs` as modular target). This split creates drift risk across +CLI, +channel, and gateway surfaces, weakens predictable security controls, and complicates performance +tuning. This change defines Agent Loop as a single, explicit contract for loop behavior, -tool-dispatch semantics, session scoping, and approval/security invariants so all execution paths can +tool-dispatch semantics, session scoping, and approval/security invariants so all execution paths +can converge safely without regressing existing user workflows. ## Scope ### In Scope + - Define canonical loop fundamentals for request lifecycle: prompt assembly, tool-call iteration, compaction, final response, and failure boundaries. - Define a staged convergence plan from `loop_.rs` behavior to modular `agent.rs` + `dispatcher.rs` responsibilities, preserving existing contracts during migration. -- Define entrypoint alignment requirements for CLI, channel runtime, and gateway webhook so semantics are +- Define entrypoint alignment requirements for CLI, channel runtime, and gateway webhook so + semantics are consistent where required and explicitly different where justified. -- Define session-state invariants (`session_id` propagation, memory/history boundaries) and approval policy +- Define session-state invariants (`session_id` propagation, memory/history boundaries) and approval + policy invariants across channels. - Define security and performance guardrails that all loop paths MUST satisfy. ### Out of Scope + - Rewriting all runtime code in one release. - Introducing new external provider APIs or replacing existing providers. - Redesigning channel-specific UX behavior beyond what is needed for loop-contract consistency. @@ -31,73 +39,86 @@ converge safely without regressing existing user workflows. ## Approach -Use a hybrid approach: treat current `loop_.rs` production behavior as compatibility baseline, then phase in +Use a hybrid approach: treat current `loop_.rs` production behavior as compatibility baseline, then +phase in modular loop ownership with shared dispatcher and unified invariants. ### Phased Rollout + 1. Baseline Contract Phase - - Capture as-is loop behavior as normative fundamentals and acceptance criteria. - - Identify must-preserve behavior for CLI and channels. + - Capture as-is loop behavior as normative fundamentals and acceptance criteria. + - Identify must-preserve behavior for CLI and channels. 2. Convergence Phase - - Move protocol and orchestration responsibilities toward `agent.rs` + `dispatcher.rs` boundaries. - - Add compatibility adapters so existing entrypoints continue functioning. + - Move protocol and orchestration responsibilities toward `agent.rs` + `dispatcher.rs` boundaries. + - Add compatibility adapters so existing entrypoints continue functioning. 3. Alignment Phase - - Align gateway path semantics with canonical fundamentals (or explicitly codify narrow exceptions). - - Enforce consistent session scoping and approval/risk checks across execution surfaces. + - Align gateway path semantics with canonical fundamentals (or explicitly codify narrow + exceptions). + - Enforce consistent session scoping and approval/risk checks across execution surfaces. 4. Hardening Phase - - Validate reliability/performance budgets and remove duplicated legacy loop paths once parity is proven. + - Validate reliability/performance budgets and remove duplicated legacy loop paths once parity is + proven. ## Affected Areas -| Area | Impact | Description | -|------|--------|-------------| -| `clients/agent-runtime/src/agent/unified_entrypoint.rs` | Modified | Runtime-selectable canonical/compatibility routing via explicit flags. | -| `clients/agent-runtime/src/agent/agent.rs` | Modified | Modular runtime responsibilities become canonical over phases. | -| `clients/agent-runtime/src/agent/dispatcher.rs` | Modified | Shared tool protocol semantics (native/XML) and parsing boundaries. | -| `clients/agent-runtime/src/main.rs` | Modified | CLI entrypoint alignment with canonical loop contract. | -| `clients/agent-runtime/src/channels/mod.rs` | Modified | Channel runtime invocation, streaming semantics, and session invariants. | -| `clients/agent-runtime/src/gateway/mod.rs` | Modified | Gateway loop alignment, auth-preserving integration, and semantic parity rules. | -| `clients/agent-runtime/src/approval/mod.rs` | Modified | Consistent approval policy semantics and auditability expectations. | -| `clients/agent-runtime/src/security/policy.rs` | Modified | Risk-classification and enforcement invariants applied consistently. | -| `clients/agent-runtime/src/providers/reliable.rs` | Modified | Retry/backoff/failover interactions constrained by canonical loop rules. | -| `clients/agent-runtime/src/config/schema.rs` | Modified | Session/workspace defaults and initialization assumptions for loop invariants. | +| Area | Impact | Description | +|---------------------------------------------------------|----------|---------------------------------------------------------------------------------| +| `clients/agent-runtime/src/agent/unified_entrypoint.rs` | Modified | Runtime-selectable canonical/compatibility routing via explicit flags. | +| `clients/agent-runtime/src/agent/agent.rs` | Modified | Modular runtime responsibilities become canonical over phases. | +| `clients/agent-runtime/src/agent/dispatcher.rs` | Modified | Shared tool protocol semantics (native/XML) and parsing boundaries. | +| `clients/agent-runtime/src/main.rs` | Modified | CLI entrypoint alignment with canonical loop contract. | +| `clients/agent-runtime/src/channels/mod.rs` | Modified | Channel runtime invocation, streaming semantics, and session invariants. | +| `clients/agent-runtime/src/gateway/mod.rs` | Modified | Gateway loop alignment, auth-preserving integration, and semantic parity rules. | +| `clients/agent-runtime/src/approval/mod.rs` | Modified | Consistent approval policy semantics and auditability expectations. | +| `clients/agent-runtime/src/security/policy.rs` | Modified | Risk-classification and enforcement invariants applied consistently. | +| `clients/agent-runtime/src/providers/reliable.rs` | Modified | Retry/backoff/failover interactions constrained by canonical loop rules. | +| `clients/agent-runtime/src/config/schema.rs` | Modified | Session/workspace defaults and initialization assumptions for loop invariants. | ## Risks -| Risk | Likelihood | Mitigation | -|------|------------|------------| -| Behavioral regressions while unifying dual-loop paths | Medium | Preserve baseline acceptance tests and migrate behind staged compatibility boundaries. | -| Security policy divergence across entrypoints | Medium | Define cross-surface MUST-level approval/risk invariants and gate rollout on conformance checks. | -| Session leakage or inconsistent memory association | Medium | Require end-to-end `session_id` propagation rules and explicit fallback behavior. | -| Performance regressions from added abstraction layers | Low/Med | Define per-turn latency and iteration budgets; profile before/after each phase. | -| Gateway parity changes impacting existing integrations | Low/Med | Roll out in compatibility mode first with explicit exception list and rollback switches. | +| Risk | Likelihood | Mitigation | +|--------------------------------------------------------|------------|--------------------------------------------------------------------------------------------------| +| Behavioral regressions while unifying dual-loop paths | Medium | Preserve baseline acceptance tests and migrate behind staged compatibility boundaries. | +| Security policy divergence across entrypoints | Medium | Define cross-surface MUST-level approval/risk invariants and gate rollout on conformance checks. | +| Session leakage or inconsistent memory association | Medium | Require end-to-end `session_id` propagation rules and explicit fallback behavior. | +| Performance regressions from added abstraction layers | Low/Med | Define per-turn latency and iteration budgets; profile before/after each phase. | +| Gateway parity changes impacting existing integrations | Low/Med | Roll out in compatibility mode first with explicit exception list and rollback switches. | ## Security and Performance Implications -- Security first: loop fundamentals will require uniform enforcement of approval, risk classification, +- Security first: loop fundamentals will require uniform enforcement of approval, risk + classification, auth boundaries, and sensitive-value scrubbing across all entrypoints. -- Performance second: convergence should reduce duplicate logic and improve maintainability, but MUST keep +- Performance second: convergence should reduce duplicate logic and improve maintainability, but + MUST keep bounded iteration, queue backpressure, and retry discipline to avoid latency blowups. -- Operationally, each phase must include verification that compaction, retries, and tool-execution limits +- Operationally, each phase must include verification that compaction, retries, and tool-execution + limits still protect memory and runtime stability under load. ## Rollback Plan -If convergence introduces regressions, rollback is executed by switching entrypoints back to compatibility +If convergence introduces regressions, rollback is executed by switching entrypoints back to +compatibility mode via runtime flags (`CORVUS_UNIFIED_LOOP_PREVIEW=0`, `CORVUS_UNIFIED_LOOP_ONLY=0`) and disabling convergence-specific adapters while keeping canonical code paths compiled and selectable. -**Important**: Rollback does NOT disable or weaken enforcement of approval, risk, or authentication checks. -All security controls—including approval/risk/auth enforcement and deny-by-default access controls across -CLI/channel/gateway paths—remain active and unchanged. Only convergence-specific adapter selection is toggled; +**Important**: Rollback does NOT disable or weaken enforcement of approval, risk, or authentication +checks. +All security controls—including approval/risk/auth enforcement and deny-by-default access controls +across +CLI/channel/gateway paths—remain active and unchanged. Only convergence-specific adapter selection +is toggled; canonical security checks continue to execute regardless of rollback state. Rollback criteria: + - Security invariant violations (approval/risk/auth) in any surface. - Material regression in loop completion reliability or latency budgets. - Session-state inconsistencies causing cross-conversation contamination. -Rollback steps will be documented per phase so rollback remains surgical (phase-local) instead of full +Rollback steps will be documented per phase so rollback remains surgical (phase-local) instead of +full runtime reversion. ## Dependencies @@ -108,8 +129,14 @@ runtime reversion. ## Success Criteria -- [ ] Agent Loop contract gate: `ProdBehaviorTests` and KMP/Rust cross-module contract tests pass at 100% across 3 consecutive CI runs. -- [ ] Entrypoint parity gate: behavior delta matrix shows 0 unapproved discrepancies across CLI/channels/gateway in `ProdBehaviorTests`. -- [ ] Session and approval/risk gate: `ApprovalConformanceTest` pass rate >= 99.0% over a 200-case suite window, with 0 critical invariant breaks. -- [ ] Migration parity gate: retry ceiling <= 3 attempts per request and fallback rate <= 1.0% on a 7-day staging window. -- [ ] Performance and security gates: `PerformanceRegressionTest` shows p95 latency regression <= 10% over 30 runs, and `SecurityGate` reports 0 new High/Critical findings (merge blocked otherwise). +- [ ] Agent Loop contract gate: `ProdBehaviorTests` and KMP/Rust cross-module contract tests pass at + 100% across 3 consecutive CI runs. +- [ ] Entrypoint parity gate: behavior delta matrix shows 0 unapproved discrepancies across + CLI/channels/gateway in `ProdBehaviorTests`. +- [ ] Session and approval/risk gate: `ApprovalConformanceTest` pass rate >= 99.0% over a 200-case + suite window, with 0 critical invariant breaks. +- [ ] Migration parity gate: retry ceiling <= 3 attempts per request and fallback rate <= 1.0% on a + 7-day staging window. +- [ ] Performance and security gates: `PerformanceRegressionTest` shows p95 latency regression <= + 10% over 30 runs, and `SecurityGate` reports 0 new High/Critical findings (merge blocked + otherwise). diff --git a/openspec/changes/archive/2026-03-03-agent-loop/specs/agent-loop/spec.md b/openspec/changes/archive/2026-03-03-agent-loop/specs/agent-loop/spec.md index 7bde96d16..29ab4a0f1 100644 --- a/openspec/changes/archive/2026-03-03-agent-loop/specs/agent-loop/spec.md +++ b/openspec/changes/archive/2026-03-03-agent-loop/specs/agent-loop/spec.md @@ -2,25 +2,33 @@ ## Purpose -This specification defines the canonical Agent Loop behavior for the Corvus project, consolidating the dual-loop paths (`loop_.rs` and `agent.rs` + `dispatcher.rs`) into a single explicit contract. It covers the loop lifecycle, tool-dispatch semantics, session scoping, approval invariants, and security requirements across all entry points (CLI, channels, and gateway). +This specification defines the canonical Agent Loop behavior for the Corvus project, consolidating +the dual-loop paths (`loop_.rs` and `agent.rs` + `dispatcher.rs`) into a single explicit contract. +It covers the loop lifecycle, tool-dispatch semantics, session scoping, approval invariants, and +security requirements across all entry points (CLI, channels, and gateway). ## Requirements ### Requirement: Entry Points Alignment -The system MUST provide a unified loop contract across all entry points (CLI, channels, gateway webhook). Any semantic differences MUST be explicitly justified and narrow in scope. +The system MUST provide a unified loop contract across all entry points (CLI, channels, gateway +webhook). Any semantic differences MUST be explicitly justified and narrow in scope. #### Scenario: Unified Loop Execution + - GIVEN a user prompt originating from any supported entry point (CLI, channel, or gateway) - WHEN the request enters the agent loop -- THEN the system MUST initialize the loop with consistent session invariants, applying the same approval and security policies regardless of origin +- THEN the system MUST initialize the loop with consistent session invariants, applying the same + approval and security policies regardless of origin - AND the system MUST route execution through the canonical dispatcher boundary. ### Requirement: Stream Events Lifecycle -The loop MUST emit predictable stream events during its lifecycle, ensuring callers can accurately track prompt assembly, tool execution, and final response generation. +The loop MUST emit predictable stream events during its lifecycle, ensuring callers can accurately +track prompt assembly, tool execution, and final response generation. #### Scenario: Standard Iteration Events + - GIVEN an active agent loop - WHEN a tool call is dispatched and completed - THEN the system MUST emit start, progress, and completion events for the tool execution @@ -28,19 +36,24 @@ The loop MUST emit predictable stream events during its lifecycle, ensuring call ### Requirement: Context Compaction -The system MUST enforce context compaction to protect memory limits and runtime stability when the loop iteration history grows beyond the configured threshold. +The system MUST enforce context compaction to protect memory limits and runtime stability when the +loop iteration history grows beyond the configured threshold. #### Scenario: Triggering Compaction + - GIVEN an agent loop iterating over multiple tool calls - WHEN the cumulative context size exceeds the predefined safety threshold - THEN the system MUST trigger a compaction routine to summarize or truncate older history -- AND the system MUST preserve the current `session_id` and essential context required for the ongoing task without interruption. +- AND the system MUST preserve the current `session_id` and essential context required for the + ongoing task without interruption. ### Requirement: Timeout Aborts -The loop MUST respect per-turn latency and total iteration budgets to prevent runaway execution or unresponsive loops. +The loop MUST respect per-turn latency and total iteration budgets to prevent runaway execution or +unresponsive loops. #### Scenario: Runaway Loop Abortion + - GIVEN an active agent loop with a configured iteration budget or timeout limit - WHEN the loop exceeds the maximum allowed iterations or processing time - THEN the system MUST forcefully abort the loop @@ -49,15 +62,19 @@ The loop MUST respect per-turn latency and total iteration budgets to prevent ru ### Requirement: Error Handling and Fallbacks -The system MUST gracefully handle tool execution failures, network timeouts, and model errors without crashing the agent loop, utilizing retry and backoff discipline. +The system MUST gracefully handle tool execution failures, network timeouts, and model errors +without crashing the agent loop, utilizing retry and backoff discipline. #### Scenario: Recoverable Tool Failure + - GIVEN a tool call dispatched during an active loop iteration - WHEN the tool execution fails due to a transient error (e.g., network timeout) - THEN the system SHOULD attempt to retry the tool call based on configured backoff policies -- AND if the failure persists, the system MUST return a structured error to the model to allow for an alternative strategy or graceful degradation. +- AND if the failure persists, the system MUST return a structured error to the model to allow for + an alternative strategy or graceful degradation. #### Scenario: Unrecoverable Error + - GIVEN an active agent loop - WHEN an unrecoverable error occurs (e.g., severe parsing failure or auth rejection) - THEN the system MUST terminate the loop immediately @@ -65,11 +82,15 @@ The system MUST gracefully handle tool execution failures, network timeouts, and ### Requirement: Security Profiling and Invariants -The loop MUST enforce strict approval, risk classification, and authorization boundaries at every iteration and tool dispatch phase. +The loop MUST enforce strict approval, risk classification, and authorization boundaries at every +iteration and tool dispatch phase. #### Scenario: Tool Dispatch with High-Risk Classification + - GIVEN a tool dispatched by the model that requires elevated privileges - WHEN the dispatcher intercepts the tool call request -- THEN the system MUST evaluate the action against the current session's risk classification and approval policy -- AND the system MUST block the execution and request explicit user approval if the action exceeds the permitted risk threshold +- THEN the system MUST evaluate the action against the current session's risk classification and + approval policy +- AND the system MUST block the execution and request explicit user approval if the action exceeds + the permitted risk threshold - AND the system MUST NOT proceed until explicit authorization is granted or the request is aborted. diff --git a/openspec/changes/archive/2026-03-03-agent-loop/tasks.md b/openspec/changes/archive/2026-03-03-agent-loop/tasks.md index d7e5b8221..5b5d681c8 100644 --- a/openspec/changes/archive/2026-03-03-agent-loop/tasks.md +++ b/openspec/changes/archive/2026-03-03-agent-loop/tasks.md @@ -2,35 +2,54 @@ ## Phase 1: Foundation / Infrastructure -- [x] 1.1 Create `clients/agent-runtime/src/agent/unified_loop.rs` and define core types: `LoopConfig`, `LoopEvent`, `AgentLoop` struct. -- [x] 1.2 Write failing unit tests in `unified_loop.rs` for `AgentLoop` initialization and configuration boundaries (RED). +- [x] 1.1 Create `clients/agent-runtime/src/agent/unified_loop.rs` and define core types: + `LoopConfig`, `LoopEvent`, `AgentLoop` struct. +- [x] 1.2 Write failing unit tests in `unified_loop.rs` for `AgentLoop` initialization and + configuration boundaries (RED). - [x] 1.3 Implement `AgentLoop::new` to make initialization tests pass (GREEN/REFACTOR). -- [x] 1.4 Update `clients/agent-runtime/src/agent/dispatcher.rs` to define `ApprovalRequired` state and write failing tests for risk policies (RED). -- [x] 1.5 Implement risk classification checks in `Dispatcher` to yield `ApprovalRequired` for high-risk tools (GREEN/REFACTOR). +- [x] 1.4 Update `clients/agent-runtime/src/agent/dispatcher.rs` to define `ApprovalRequired` state + and write failing tests for risk policies (RED). +- [x] 1.5 Implement risk classification checks in `Dispatcher` to yield `ApprovalRequired` for + high-risk tools (GREEN/REFACTOR). ## Phase 2: Core Implementation -- [x] 2.1 Write failing tests for `AgentLoop::run` state machine handling prompt, single tool call, and final response (RED). -- [x] 2.2 Update `clients/agent-runtime/src/agent/agent.rs` to expose step-wise generation and remove its internal loop logic. -- [x] 2.3 Implement basic `AgentLoop::run` yielding `Stream` for the happy path (GREEN). -- [x] 2.4 Write failing tests for context compaction, iteration budgets, and timeout aborts in `AgentLoop` (RED). -- [x] 2.5 Implement context compaction, iteration limits, and timeout handling in `AgentLoop::run` (GREEN/REFACTOR). -- [x] 2.6 Write failing tests for `AgentLoop::resume` to handle `ApprovalRequired` continuations (RED). -- [x] 2.7 Implement `AgentLoop::resume` to correctly resume execution after approval (GREEN/REFACTOR). +- [x] 2.1 Write failing tests for `AgentLoop::run` state machine handling prompt, single tool call, + and final response (RED). +- [x] 2.2 Update `clients/agent-runtime/src/agent/agent.rs` to expose step-wise generation and + remove its internal loop logic. +- [x] 2.3 Implement basic `AgentLoop::run` yielding `Stream` for the happy path ( + GREEN). +- [x] 2.4 Write failing tests for context compaction, iteration budgets, and timeout aborts in + `AgentLoop` (RED). +- [x] 2.5 Implement context compaction, iteration limits, and timeout handling in `AgentLoop::run` ( + GREEN/REFACTOR). +- [x] 2.6 Write failing tests for `AgentLoop::resume` to handle `ApprovalRequired` continuations ( + RED). +- [x] 2.7 Implement `AgentLoop::resume` to correctly resume execution after approval ( + GREEN/REFACTOR). ## Phase 3: Integration / Wiring -- [x] 3.1 Update `clients/agent-runtime/src/main.rs` (CLI) to instantiate `AgentLoop` and consume `LoopEvent` stream (behind a compatibility flag if needed). -- [x] 3.2 Update `clients/agent-runtime/src/channels/mod.rs` to map `LoopEvent`s to channel messages with consistent session invariants. -- [x] 3.3 Update `clients/agent-runtime/src/gateway/mod.rs` to use `AgentLoop` and map events to SSE streams. -- [x] 3.4 Ensure auth boundaries and sensitive-value scrubbing are correctly applied at the integration boundaries. +- [x] 3.1 Update `clients/agent-runtime/src/main.rs` (CLI) to instantiate `AgentLoop` and consume + `LoopEvent` stream (behind a compatibility flag if needed). +- [x] 3.2 Update `clients/agent-runtime/src/channels/mod.rs` to map `LoopEvent`s to channel messages + with consistent session invariants. +- [x] 3.3 Update `clients/agent-runtime/src/gateway/mod.rs` to use `AgentLoop` and map events to SSE + streams. +- [x] 3.4 Ensure auth boundaries and sensitive-value scrubbing are correctly applied at the + integration boundaries. ## Phase 4: Testing & Verification -- [x] 4.1 Write integration tests verifying the full prompt -> tool -> response cycle with a local/dummy model provider. -- [x] 4.2 Write E2E tests for the CLI entrypoint verifying stdout reflects underlying `LoopEvent`s including approval interruptions. -- [x] 4.3 Write E2E tests for the Gateway webhook verifying SSE stream correctness, timeout aborts, and session scoping. -- [x] 4.4 Verify all scenarios in `openspec/changes/agent-loop/specs/agent-loop/spec.md` pass against the unified `AgentLoop`. +- [x] 4.1 Write integration tests verifying the full prompt -> tool -> response cycle with a + local/dummy model provider. +- [x] 4.2 Write E2E tests for the CLI entrypoint verifying stdout reflects underlying `LoopEvent`s + including approval interruptions. +- [x] 4.3 Write E2E tests for the Gateway webhook verifying SSE stream correctness, timeout aborts, + and session scoping. +- [x] 4.4 Verify all scenarios in `openspec/changes/agent-loop/specs/agent-loop/spec.md` pass + against the unified `AgentLoop`. ## Phase 5: Cleanup @@ -42,5 +61,7 @@ - [x] R1 Promote shared unified preview execution helper across CLI/channels/gateway surfaces. - [x] R2 Add recoverable retry/backoff + fallback semantics for unified preview execution path. -- [x] R3 Add parity-focused tests for session propagation and timeout/abort semantics across entrypoints. -- [x] Remaining gap resolved: default non-preview CLI/gateway/channels now pass through canonical unified contract gates. +- [x] R3 Add parity-focused tests for session propagation and timeout/abort semantics across + entrypoints. +- [x] Remaining gap resolved: default non-preview CLI/gateway/channels now pass through canonical + unified contract gates. diff --git a/openspec/changes/archive/2026-03-03-agent-loop/verify-report.md b/openspec/changes/archive/2026-03-03-agent-loop/verify-report.md index 6903d6868..3d288e650 100644 --- a/openspec/changes/archive/2026-03-03-agent-loop/verify-report.md +++ b/openspec/changes/archive/2026-03-03-agent-loop/verify-report.md @@ -1,35 +1,39 @@ # Verification Report -**Change**: agent-loop -**Verification run**: final verification after non-preview convergence updates +**Change**: agent-loop +**Verification run**: final verification after non-preview convergence updates **Artifact mode**: openspec --- ## Completeness -| Metric | Value | -|--------|-------| -| Tasks total | 46 | -| Tasks complete | 46 | -| Tasks incomplete | 0 | +| Metric | Value | +|------------------|-------| +| Tasks total | 46 | +| Tasks complete | 46 | +| Tasks incomplete | 0 | -Task checklist result from `openspec/changes/agent-loop/tasks.md`: all phase and remediation items are marked complete, including the prior MUST-level convergence gap. +Task checklist result from `openspec/changes/agent-loop/tasks.md`: all phase and remediation items +are marked complete, including the prior MUST-level convergence gap. --- ### Build & Tests Execution -**Build command (from `openspec/config.yaml`)**: `make build` +**Build command (from `openspec/config.yaml`)**: `make build` **Result**: ✅ Passed (`BUILD SUCCESSFUL`) -**Test command (from `openspec/config.yaml`)**: `make test` +**Test command (from `openspec/config.yaml`)**: `make test` **Result**: ✅ Passed (`BUILD SUCCESSFUL`) Key note: -- `make build` and `make test` still skip Rust runtime behavioral coverage in this path (`:agent-runtime:cargoTest` skipped), so targeted Cargo verification was executed. + +- `make build` and `make test` still skip Rust runtime behavioral coverage in this path ( + `:agent-runtime:cargoTest` skipped), so targeted Cargo verification was executed. **Supplemental runtime verification executed:** + - `cargo test unified_loop` -> ✅ passed - `cargo test retry_backoff` -> ✅ passed - `cargo test --test cli_loop_events_e2e` -> ✅ passed @@ -47,15 +51,15 @@ Key note: ### Spec Compliance Matrix (Behavioral) -| Requirement | Scenario | Evidence | Result | -|-------------|----------|----------|--------| -| Entry Points Alignment | Unified Loop Execution | `clients/agent-runtime/src/main.rs` (`collect_unified_loop_result`, canonical gating before runtime execution); `clients/agent-runtime/src/channels/mod.rs` (`run_canonical_outcome` in `process_channel_message`); `clients/agent-runtime/src/gateway/mod.rs` (`run_canonical_outcome` in webhook non-preview path); tests: `cli_non_preview_timeout_abort_is_session_scoped`, `webhook_non_preview_*`, channel approval tests | ✅ COMPLIANT | -| Stream Events Lifecycle | Standard Iteration Events | `clients/agent-runtime/src/agent/unified_loop.rs` (`LoopEvent` lifecycle + happy path stream tests); preview/event mapping checks in CLI/Gateway/channel tests | ✅ COMPLIANT | -| Context Compaction | Triggering Compaction | `clients/agent-runtime/src/agent/unified_loop.rs::test_agent_loop_triggers_compaction_when_threshold_exceeded`; `spec_scenario_matrix_covers_contract_requirements` | ✅ COMPLIANT | -| Timeout Aborts | Runaway Loop Abortion | `clients/agent-runtime/src/agent/unified_loop.rs::test_agent_loop_emits_timeout_error`; `clients/agent-runtime/tests/cli_loop_events_e2e.rs::cli_non_preview_timeout_abort_is_session_scoped`; `clients/agent-runtime/src/gateway/mod.rs::tests::webhook_non_preview_timeout_aborts_with_session_scope` | ✅ COMPLIANT | -| Error Handling and Fallbacks | Recoverable Tool Failure | `clients/agent-runtime/src/agent/unified_entrypoint.rs::tests::retry_backoff_recovers_timeout_before_fallback`; `...::retry_backoff_uses_fallback_on_persistent_tool_failure` | ✅ COMPLIANT | -| Error Handling and Fallbacks | Unrecoverable Error | `clients/agent-runtime/src/agent/unified_loop.rs::test_agent_loop_resume_emits_error_when_denied`; non-preview blocking behavior in CLI/Gateway/channel tests | ✅ COMPLIANT | -| Security Profiling and Invariants | Tool Dispatch with High-Risk Classification | approval-required gating in canonical outcome path for all entry points; tests: CLI non-preview approval override, gateway non-preview approval block/unblock, channel approval block/unblock | ✅ COMPLIANT | +| Requirement | Scenario | Evidence | Result | +|-----------------------------------|---------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------| +| Entry Points Alignment | Unified Loop Execution | `clients/agent-runtime/src/main.rs` (`collect_unified_loop_result`, canonical gating before runtime execution); `clients/agent-runtime/src/channels/mod.rs` (`run_canonical_outcome` in `process_channel_message`); `clients/agent-runtime/src/gateway/mod.rs` (`run_canonical_outcome` in webhook non-preview path); tests: `cli_non_preview_timeout_abort_is_session_scoped`, `webhook_non_preview_*`, channel approval tests | ✅ COMPLIANT | +| Stream Events Lifecycle | Standard Iteration Events | `clients/agent-runtime/src/agent/unified_loop.rs` (`LoopEvent` lifecycle + happy path stream tests); preview/event mapping checks in CLI/Gateway/channel tests | ✅ COMPLIANT | +| Context Compaction | Triggering Compaction | `clients/agent-runtime/src/agent/unified_loop.rs::test_agent_loop_triggers_compaction_when_threshold_exceeded`; `spec_scenario_matrix_covers_contract_requirements` | ✅ COMPLIANT | +| Timeout Aborts | Runaway Loop Abortion | `clients/agent-runtime/src/agent/unified_loop.rs::test_agent_loop_emits_timeout_error`; `clients/agent-runtime/tests/cli_loop_events_e2e.rs::cli_non_preview_timeout_abort_is_session_scoped`; `clients/agent-runtime/src/gateway/mod.rs::tests::webhook_non_preview_timeout_aborts_with_session_scope` | ✅ COMPLIANT | +| Error Handling and Fallbacks | Recoverable Tool Failure | `clients/agent-runtime/src/agent/unified_entrypoint.rs::tests::retry_backoff_recovers_timeout_before_fallback`; `...::retry_backoff_uses_fallback_on_persistent_tool_failure` | ✅ COMPLIANT | +| Error Handling and Fallbacks | Unrecoverable Error | `clients/agent-runtime/src/agent/unified_loop.rs::test_agent_loop_resume_emits_error_when_denied`; non-preview blocking behavior in CLI/Gateway/channel tests | ✅ COMPLIANT | +| Security Profiling and Invariants | Tool Dispatch with High-Risk Classification | approval-required gating in canonical outcome path for all entry points; tests: CLI non-preview approval override, gateway non-preview approval block/unblock, channel approval block/unblock | ✅ COMPLIANT | **Compliance summary**: 7/7 fully compliant @@ -63,20 +67,27 @@ Key note: ### Correctness & Design Coherence -- Canonical non-preview convergence gates are now active across CLI, channels, and gateway prior to runtime completion paths. -- Session-scoped behavior is preserved at boundaries, with explicit approval, timeout-abort, and fallback handling in the canonical outcome contract. -- Legacy direct loop coupling remains removed (`loop_.rs` deleted, no legacy re-export/direct references), and guard tests pass. -- Design intent for staged convergence is satisfied for this change scope (shared canonical policy gate semantics across entry points). +- Canonical non-preview convergence gates are now active across CLI, channels, and gateway prior to + runtime completion paths. +- Session-scoped behavior is preserved at boundaries, with explicit approval, timeout-abort, and + fallback handling in the canonical outcome contract. +- Legacy direct loop coupling remains removed (`loop_.rs` deleted, no legacy re-export/direct + references), and guard tests pass. +- Design intent for staged convergence is satisfied for this change scope (shared canonical policy + gate semantics across entry points). --- ### Issues Found **CRITICAL** + - None. **WARNING** -- Verification still requires targeted Cargo tests in addition to `make build` and `make test` due to current Gradle task wiring (`:agent-runtime:cargoTest` skipped). + +- Verification still requires targeted Cargo tests in addition to `make build` and `make test` due + to current Gradle task wiring (`:agent-runtime:cargoTest` skipped). --- @@ -84,4 +95,5 @@ Key note: **PASS** -Implementation aligns with proposal/spec/design/tasks for the declared change scope, and the previous MUST-level non-preview convergence gap is now closed. +Implementation aligns with proposal/spec/design/tasks for the declared change scope, and the +previous MUST-level non-preview convergence gap is now closed. diff --git a/openspec/changes/archive/2026-03-03-support-mcps-agent-runtime/design.md b/openspec/changes/archive/2026-03-03-support-mcps-agent-runtime/design.md index 4fd530f08..8a2192bef 100644 --- a/openspec/changes/archive/2026-03-03-support-mcps-agent-runtime/design.md +++ b/openspec/changes/archive/2026-03-03-support-mcps-agent-runtime/design.md @@ -128,6 +128,7 @@ pub struct McpServerConfig { ``` Validation rules (load-time, fail-safe): + - `name`, `command` are required and must pass identifier/path safety checks. - `startup_timeout_ms`, `call_timeout_ms`, `output_limit_bytes` MUST be positive. - Reserved namespace fragments (`mcp`, invalid identifier chars) are rejected. @@ -227,15 +228,15 @@ sequenceDiagram ## Failure Modes and Handling -| Failure mode | Behavior | Safety guarantee | -|---|---|---| -| Invalid MCP config | Startup fails with structured validation error | No unsafe partial registration | -| One server fails startup | Server isolated and skipped; runtime continues | Availability without bypass | -| Discovery timeout | Abort server discovery at timeout budget | No indefinite startup block | -| Name collision | Deterministic startup error naming colliding ID | No ambiguous dispatch | -| Invocation timeout | Call aborted; structured timeout result | Loop stability preserved | -| Output overflow | Truncate/fail per policy with explicit marker | Memory/cost bounded | -| Transport/server error | Structured failure result returned | No panic/deadlock | +| Failure mode | Behavior | Safety guarantee | +|--------------------------|-------------------------------------------------|--------------------------------| +| Invalid MCP config | Startup fails with structured validation error | No unsafe partial registration | +| One server fails startup | Server isolated and skipped; runtime continues | Availability without bypass | +| Discovery timeout | Abort server discovery at timeout budget | No indefinite startup block | +| Name collision | Deterministic startup error naming colliding ID | No ambiguous dispatch | +| Invocation timeout | Call aborted; structured timeout result | Loop stability preserved | +| Output overflow | Truncate/fail per policy with explicit marker | Memory/cost bounded | +| Transport/server error | Structured failure result returned | No panic/deadlock | ## Observability @@ -249,59 +250,60 @@ sequenceDiagram ## File Changes (clients/agent-runtime) -| File | Action | Description | -|------|--------|-------------| -| `clients/agent-runtime/src/config/schema.rs` | Modify | Add `mcp` schema types to `Config`; add load-time validation and redaction-safe error paths. | -| `clients/agent-runtime/src/config/mod.rs` | Modify | Re-export MCP config types. | -| `clients/agent-runtime/src/tools/mod.rs` | Modify | Build and merge MCP tool adapters in `all_tools_with_runtime`; enforce collision checks. | -| `clients/agent-runtime/src/tools/traits.rs` | Modify | Extend `ToolSpec` with source metadata required for policy/audit decisions. | -| `clients/agent-runtime/src/tools/mcp/mod.rs` | Create | MCP module entrypoint and registry builder. | -| `clients/agent-runtime/src/tools/mcp/client.rs` | Create | Stdio MCP client session (`initialize`, `tools/list`, `tools/call`) with timeout controls. | -| `clients/agent-runtime/src/tools/mcp/adapter.rs` | Create | `Tool` trait adapter wrapping discovered MCP tools. | -| `clients/agent-runtime/src/tools/mcp/normalize.rs` | Create | Canonical naming, reserved namespace validation, metadata sanitization. | -| `clients/agent-runtime/src/agent/dispatcher.rs` | Modify | Source-aware MCP risk classification and approval-required defaults. | -| `clients/agent-runtime/src/agent/agent.rs` | Modify | Preserve structured MCP denial/timeout behavior in tool loop execution. | -| `clients/agent-runtime/src/security/policy.rs` | Modify | Add MCP-specific policy helpers and defaults (deny unless explicit allow/approval). | -| `clients/agent-runtime/src/approval/mod.rs` | Modify | Integrate MCP unknown/high-risk handling into approval decision path. | -| `clients/agent-runtime/src/channels/mod.rs` | Modify | Ensure channel tool loop applies same MCP risk/approval semantics. | -| `clients/agent-runtime/src/gateway/mod.rs` | Modify | Ensure gateway MCP tool path (when tool-enabled) uses shared risk/approval checks; no bypass. | -| `clients/agent-runtime/Cargo.toml` | Modify | Add minimal MCP transport/protocol dependencies required for stdio v1. | -| `clients/agent-runtime/tests/*` | Modify/Create | Add focused config, registration, policy, approval parity, timeout/cap, and failure isolation coverage. | +| File | Action | Description | +|----------------------------------------------------|---------------|---------------------------------------------------------------------------------------------------------| +| `clients/agent-runtime/src/config/schema.rs` | Modify | Add `mcp` schema types to `Config`; add load-time validation and redaction-safe error paths. | +| `clients/agent-runtime/src/config/mod.rs` | Modify | Re-export MCP config types. | +| `clients/agent-runtime/src/tools/mod.rs` | Modify | Build and merge MCP tool adapters in `all_tools_with_runtime`; enforce collision checks. | +| `clients/agent-runtime/src/tools/traits.rs` | Modify | Extend `ToolSpec` with source metadata required for policy/audit decisions. | +| `clients/agent-runtime/src/tools/mcp/mod.rs` | Create | MCP module entrypoint and registry builder. | +| `clients/agent-runtime/src/tools/mcp/client.rs` | Create | Stdio MCP client session (`initialize`, `tools/list`, `tools/call`) with timeout controls. | +| `clients/agent-runtime/src/tools/mcp/adapter.rs` | Create | `Tool` trait adapter wrapping discovered MCP tools. | +| `clients/agent-runtime/src/tools/mcp/normalize.rs` | Create | Canonical naming, reserved namespace validation, metadata sanitization. | +| `clients/agent-runtime/src/agent/dispatcher.rs` | Modify | Source-aware MCP risk classification and approval-required defaults. | +| `clients/agent-runtime/src/agent/agent.rs` | Modify | Preserve structured MCP denial/timeout behavior in tool loop execution. | +| `clients/agent-runtime/src/security/policy.rs` | Modify | Add MCP-specific policy helpers and defaults (deny unless explicit allow/approval). | +| `clients/agent-runtime/src/approval/mod.rs` | Modify | Integrate MCP unknown/high-risk handling into approval decision path. | +| `clients/agent-runtime/src/channels/mod.rs` | Modify | Ensure channel tool loop applies same MCP risk/approval semantics. | +| `clients/agent-runtime/src/gateway/mod.rs` | Modify | Ensure gateway MCP tool path (when tool-enabled) uses shared risk/approval checks; no bypass. | +| `clients/agent-runtime/Cargo.toml` | Modify | Add minimal MCP transport/protocol dependencies required for stdio v1. | +| `clients/agent-runtime/tests/*` | Modify/Create | Add focused config, registration, policy, approval parity, timeout/cap, and failure isolation coverage. | ## Requirement Traceability -| Spec requirement | Design coverage | -|---|---| -| MCP Server Configuration Validation | `Config` schema additions + load-time validation gates + redacted errors (`schema.rs`). | -| Startup Discovery and Registration | Startup MCP registry builder with bounded server introspection and disabled-server skip. | -| Namespaced Tool Identity and Collision Handling | Canonical `mcp..` normalizer + deterministic collision rejection in registry merge. | -| MCP Policy and Approval Enforcement | Dispatcher fail-closed MCP classification + shared policy/approval checks across entry points. | -| MCP Execution Limits and Timeouts | Per-server startup timeout, per-call timeout, output-limit enforcement in MCP adapter/client. | -| MCP Failure Handling and Safety | Per-server isolation on startup failures, structured invocation errors, reject non-tool MCP capabilities. | +| Spec requirement | Design coverage | +|-------------------------------------------------|-----------------------------------------------------------------------------------------------------------| +| MCP Server Configuration Validation | `Config` schema additions + load-time validation gates + redacted errors (`schema.rs`). | +| Startup Discovery and Registration | Startup MCP registry builder with bounded server introspection and disabled-server skip. | +| Namespaced Tool Identity and Collision Handling | Canonical `mcp..` normalizer + deterministic collision rejection in registry merge. | +| MCP Policy and Approval Enforcement | Dispatcher fail-closed MCP classification + shared policy/approval checks across entry points. | +| MCP Execution Limits and Timeouts | Per-server startup timeout, per-call timeout, output-limit enforcement in MCP adapter/client. | +| MCP Failure Handling and Safety | Per-server isolation on startup failures, structured invocation errors, reject non-tool MCP capabilities. | ## Testing Strategy -| Layer | What to Test | Approach | -|-------|-------------|----------| -| Unit | MCP config validation | Table-driven tests for missing fields, invalid limits, reserved names, redaction in errors. | -| Unit | Name normalization/collision logic | Deterministic canonicalization and rejection behavior for collisions and reserved IDs. | -| Unit | MCP adapter limits | Timeout cancellation and output cap behavior using mocked stdio responses. | -| Integration | Startup registry merge | Native + MCP merge, disabled server skip, one-server-fails isolation. | -| Integration | Risk/approval parity | Equivalent MCP tool calls via agent loop, channel loop, and gateway path enforce same deny/approval outcome. | -| Regression | Native tool invariants | Existing native tool behavior and safe tool classification unchanged with MCP enabled. | +| Layer | What to Test | Approach | +|-------------|------------------------------------|--------------------------------------------------------------------------------------------------------------| +| Unit | MCP config validation | Table-driven tests for missing fields, invalid limits, reserved names, redaction in errors. | +| Unit | Name normalization/collision logic | Deterministic canonicalization and rejection behavior for collisions and reserved IDs. | +| Unit | MCP adapter limits | Timeout cancellation and output cap behavior using mocked stdio responses. | +| Integration | Startup registry merge | Native + MCP merge, disabled server skip, one-server-fails isolation. | +| Integration | Risk/approval parity | Equivalent MCP tool calls via agent loop, channel loop, and gateway path enforce same deny/approval outcome. | +| Regression | Native tool invariants | Existing native tool behavior and safe tool classification unchanged with MCP enabled. | ## Rollout Plan 1. Phase 1: Config + discovery scaffolding - - Add schema, validation, MCP client, startup discovery behind `mcp.enabled`. + - Add schema, validation, MCP client, startup discovery behind `mcp.enabled`. 2. Phase 2: Identity + dispatch + policy - - Introduce namespaced registration and centralized risk/approval handling. + - Introduce namespaced registration and centralized risk/approval handling. 3. Phase 3: Hardening - - Enforce timeouts/output caps, redaction diagnostics, and failure-isolation tests. + - Enforce timeouts/output caps, redaction diagnostics, and failure-isolation tests. 4. Verification gates - - Targeted unit/integration tests for all scenarios in spec delta. + - Targeted unit/integration tests for all scenarios in spec delta. Rollback: + - Set `mcp.enabled = false` (or remove `mcp.servers`) to revert to native-only tool registry. - Keep policy engine unchanged; MCP path is additive and can be disabled without affecting native tool behavior. @@ -309,6 +311,6 @@ Rollback: ## Open Questions - [ ] Should v1 use only environment-variable secret references for MCP server env, or also allow - encrypted inline values in `config.toml`? + encrypted inline values in `config.toml`? - [ ] Should gateway switch fully to unified tool loop for tool-enabled webhook paths in this - change, or gate MCP in gateway until that migration lands? + change, or gate MCP in gateway until that migration lands? diff --git a/openspec/changes/archive/2026-03-03-support-mcps-agent-runtime/proposal.md b/openspec/changes/archive/2026-03-03-support-mcps-agent-runtime/proposal.md index 3281486f1..df7044a3d 100644 --- a/openspec/changes/archive/2026-03-03-support-mcps-agent-runtime/proposal.md +++ b/openspec/changes/archive/2026-03-03-support-mcps-agent-runtime/proposal.md @@ -25,7 +25,8 @@ This change adds a secure v1 MCP runtime path that integrates into existing tool (`src/agent/dispatcher.rs`, `src/agent/agent.rs`). - Enforce explicit risk/approval policy for MCP tools, including unknown/high-risk tool handling (`src/security/policy.rs`, `src/approval/mod.rs`). -- Add secure secret handling for server configuration and execution (no accidental logging/exposure). +- Add secure secret handling for server configuration and execution (no accidental + logging/exposure). - Add bounded execution controls (timeouts, output caps) and focused tests for correctness/safety. ## Non-goals @@ -38,52 +39,53 @@ This change adds a secure v1 MCP runtime path that integrates into existing tool ## Proposed approach 1. Runtime integration - - Introduce an MCP tool adapter implementing the existing `Tool` trait contract. - - Extend `all_tools_with_runtime` to merge native + MCP-derived tools into one dispatchable set. - - Keep provider layer unchanged; MCP remains runtime/tooling concern. + - Introduce an MCP tool adapter implementing the existing `Tool` trait contract. + - Extend `all_tools_with_runtime` to merge native + MCP-derived tools into one dispatchable set. + - Keep provider layer unchanged; MCP remains runtime/tooling concern. 2. Configuration model - - Extend config schema with an `mcp.servers` collection, initially stdio-only. - - Define server identity, command/args, environment references, startup and per-call timeouts, - output limits, and enabled/disabled flags. - - Validate config strictly at load time and fail-safe for malformed or unsafe definitions. + - Extend config schema with an `mcp.servers` collection, initially stdio-only. + - Define server identity, command/args, environment references, startup and per-call timeouts, + output limits, and enabled/disabled flags. + - Validate config strictly at load time and fail-safe for malformed or unsafe definitions. 3. Tool naming and dispatch - - Map MCP tools to canonical namespaced identifiers (for example `mcp..`). - - Preserve source metadata in `ToolSpec` for policy/approval/audit decisions. - - Resolve collisions deterministically (deny ambiguous registration and emit actionable errors). + - Map MCP tools to canonical namespaced identifiers (for example `mcp..`). + - Preserve source metadata in `ToolSpec` for policy/approval/audit decisions. + - Resolve collisions deterministically (deny ambiguous registration and emit actionable errors). 4. Policy and approvals - - Classify MCP tool invocations as explicit risk-bearing operations by default. - - Require policy evaluation before invocation; route unresolved/unknown classes through approval. - - Ensure gateway/channel paths share the same MCP approval semantics (`src/gateway/mod.rs`, - `src/channels/mod.rs`). + - Classify MCP tool invocations as explicit risk-bearing operations by default. + - Require policy evaluation before invocation; route unresolved/unknown classes through approval. + - Ensure gateway/channel paths share the same MCP approval semantics (`src/gateway/mod.rs`, + `src/channels/mod.rs`). 5. Execution hardening - - Enforce per-server/per-tool timeout ceilings and output byte/token caps. - - Sanitize/log-redact secrets and sensitive configuration values. - - Bound startup-time MCP discovery to avoid blocking runtime initialization indefinitely. + - Enforce per-server/per-tool timeout ceilings and output byte/token caps. + - Sanitize/log-redact secrets and sensitive configuration values. + - Bound startup-time MCP discovery to avoid blocking runtime initialization indefinitely. ### Affected areas -| Area | Impact | Description | -|------|--------|-------------| -| `src/tools/mod.rs` | Modified | Register and merge MCP tools in runtime registry path. | -| `src/tools/traits.rs` | Modified | Ensure MCP adapter satisfies `Tool` contract and metadata expectations. | -| `src/agent/agent.rs` | Modified | Use combined tool set and preserve stable runtime semantics. | -| `src/agent/dispatcher.rs` | Modified | Dispatch namespaced MCP tools and propagate policy context. | -| `src/config/schema.rs` | Modified | Add MCP server schema and validation rules. | -| `src/config/mod.rs` | Modified | Parse/load MCP config and enforce fail-safe defaults. | -| `src/security/policy.rs` | Modified | Risk classification and deny/allow defaults for MCP invocations. | -| `src/approval/mod.rs` | Modified | Approval flow for unknown/high-risk MCP tools without deadlocks. | -| `src/gateway/mod.rs` | Modified | Ensure gateway execution applies MCP security + approval checks. | -| `src/channels/mod.rs` | Modified | Ensure channel runtime applies MCP security + approval checks. | -| `Cargo.toml` | Modified | Add minimal MCP/runtime dependencies needed for stdio integration. | -| `tests/` | Modified/New | Focused tests for config validation, dispatch, policy, approvals, and limits. | +| Area | Impact | Description | +|---------------------------|--------------|-------------------------------------------------------------------------------| +| `src/tools/mod.rs` | Modified | Register and merge MCP tools in runtime registry path. | +| `src/tools/traits.rs` | Modified | Ensure MCP adapter satisfies `Tool` contract and metadata expectations. | +| `src/agent/agent.rs` | Modified | Use combined tool set and preserve stable runtime semantics. | +| `src/agent/dispatcher.rs` | Modified | Dispatch namespaced MCP tools and propagate policy context. | +| `src/config/schema.rs` | Modified | Add MCP server schema and validation rules. | +| `src/config/mod.rs` | Modified | Parse/load MCP config and enforce fail-safe defaults. | +| `src/security/policy.rs` | Modified | Risk classification and deny/allow defaults for MCP invocations. | +| `src/approval/mod.rs` | Modified | Approval flow for unknown/high-risk MCP tools without deadlocks. | +| `src/gateway/mod.rs` | Modified | Ensure gateway execution applies MCP security + approval checks. | +| `src/channels/mod.rs` | Modified | Ensure channel runtime applies MCP security + approval checks. | +| `Cargo.toml` | Modified | Add minimal MCP/runtime dependencies needed for stdio integration. | +| `tests/` | Modified/New | Focused tests for config validation, dispatch, policy, approvals, and limits. | ## Security considerations -- Treat all MCP servers as untrusted by default; apply deny-by-default policy until explicitly allowed. +- Treat all MCP servers as untrusted by default; apply deny-by-default policy until explicitly + allowed. - Prevent credential leakage by supporting secret references and redacting sensitive values in logs, traces, and error surfaces. - Defend against schema/prompt injection by sanitizing MCP-provided tool metadata and constraining @@ -96,48 +98,51 @@ This change adds a secure v1 MCP runtime path that integrates into existing tool - Startup: batch/parallelize safe MCP introspection where possible, with hard time budgets. - Runtime: cache validated MCP tool manifests for session/runtime lifetime (v1 startup-time model). - Bound latency and memory with per-call timeouts, output caps, and conservative defaults. -- Keep dispatch overhead near existing path by reusing current tool registry and avoiding provider-level +- Keep dispatch overhead near existing path by reusing current tool registry and avoiding + provider-level indirection. ## Rollout/testing plan 1. Phase 1 - Configuration and registration - - Implement schema/config parsing for `mcp.servers` (stdio). - - Register namespaced MCP tools in `all_tools_with_runtime` behind feature/config flag. - - Tests: config validation, invalid server rejection, collision detection. + - Implement schema/config parsing for `mcp.servers` (stdio). + - Register namespaced MCP tools in `all_tools_with_runtime` behind feature/config flag. + - Tests: config validation, invalid server rejection, collision detection. 2. Phase 2 - Dispatch, policy, and approvals - - Wire MCP tools through dispatcher and agent loop with source metadata. - - Apply security policy classification and approval flow integration. - - Tests: approval behavior for unknown/high-risk MCP tools, cross-entrypoint parity. + - Wire MCP tools through dispatcher and agent loop with source metadata. + - Apply security policy classification and approval flow integration. + - Tests: approval behavior for unknown/high-risk MCP tools, cross-entrypoint parity. 3. Phase 3 - Hardening and limits - - Enforce timeout/output caps, startup timeout, secret redaction checks. - - Tests: timeout and cap enforcement, redaction assertions, failure-mode behavior. + - Enforce timeout/output caps, startup timeout, secret redaction checks. + - Tests: timeout and cap enforcement, redaction assertions, failure-mode behavior. 4. Verification gates - - Unit tests for config/schema + tool normalization. - - Integration tests for end-to-end invocation via agent runtime and channels/gateway. - - Regression tests to ensure existing native tool behavior remains unchanged. + - Unit tests for config/schema + tool normalization. + - Integration tests for end-to-end invocation via agent runtime and channels/gateway. + - Regression tests to ensure existing native tool behavior remains unchanged. ### Rollback plan - Disable MCP integration via runtime config/feature flag and fall back to existing native-only tool registry. -- Keep security policy enforcement unchanged during rollback; only MCP registration/execution path is +- Keep security policy enforcement unchanged during rollback; only MCP registration/execution path + is disabled. -- Revert schema fields as optional/no-op to preserve backward compatibility for existing deployments. +- Revert schema fields as optional/no-op to preserve backward compatibility for existing + deployments. ## Risks -| Risk | Likelihood | Mitigation | -|------|------------|------------| -| Untrusted MCP servers execute unsafe operations | Medium/High | Deny-by-default policy, explicit allow/approval controls, strict runtime limits. | -| Credential leakage through config/logs/errors | Medium | Secret references, redaction pipeline, no raw env dump in diagnostics. | -| Schema/tool metadata injection into prompt/runtime | Medium | Validate/sanitize metadata and enforce constrained normalization. | -| Tool-name collisions break dispatch correctness | Medium | Mandatory namespace + deterministic collision rejection. | -| Approval flow blocks legitimate unknown tools | Medium | Clear policy classes, explicit fallback approval path, focused integration tests. | -| Many MCP tools increase startup latency | Medium | Startup discovery budgets, bounded introspection, manifest caching. | +| Risk | Likelihood | Mitigation | +|----------------------------------------------------|-------------|-----------------------------------------------------------------------------------| +| Untrusted MCP servers execute unsafe operations | Medium/High | Deny-by-default policy, explicit allow/approval controls, strict runtime limits. | +| Credential leakage through config/logs/errors | Medium | Secret references, redaction pipeline, no raw env dump in diagnostics. | +| Schema/tool metadata injection into prompt/runtime | Medium | Validate/sanitize metadata and enforce constrained normalization. | +| Tool-name collisions break dispatch correctness | Medium | Mandatory namespace + deterministic collision rejection. | +| Approval flow blocks legitimate unknown tools | Medium | Clear policy classes, explicit fallback approval path, focused integration tests. | +| Many MCP tools increase startup latency | Medium | Startup discovery budgets, bounded introspection, manifest caching. | ## Open questions diff --git a/openspec/changes/archive/2026-03-03-support-mcps-agent-runtime/tasks.md b/openspec/changes/archive/2026-03-03-support-mcps-agent-runtime/tasks.md index e0e2829f9..a35fa8025 100644 --- a/openspec/changes/archive/2026-03-03-support-mcps-agent-runtime/tasks.md +++ b/openspec/changes/archive/2026-03-03-support-mcps-agent-runtime/tasks.md @@ -2,51 +2,123 @@ ## Phase 1: Config and Rollout Guard Foundation -- [x] 1.1 (RED) Add failing config-validation coverage in `clients/agent-runtime/tests/mcp_config_validation.rs` for malformed server definitions, non-positive limits/timeouts, and secret redaction in diagnostics; **Acceptance:** tests fail against current runtime and map to spec validation scenarios. -- [x] 1.2 Add MCP config schema types and defaults in `clients/agent-runtime/src/config/schema.rs` (`McpConfig`, `McpServerConfig`, `mcp` field on `Config`) with strict load-time validation and structured redacted errors; **Acceptance:** config load rejects invalid MCP definitions and never prints raw secret env values. -- [x] 1.3 Re-export MCP config models in `clients/agent-runtime/src/config/mod.rs` and ensure config load/init paths include MCP validation; **Acceptance:** MCP config types are available through existing config module APIs and validation runs on startup. -- [x] 1.4 Add rollout guard behavior keyed by `mcp.enabled` in config handling and tool bootstrap entry paths (`clients/agent-runtime/src/tools/mod.rs`, `clients/agent-runtime/src/agent/agent.rs` as needed); **Acceptance:** MCP discovery path is unreachable when disabled and native tool behavior remains unchanged. -- [x] 1.5 Add MCP runtime dependencies and feature wiring in `clients/agent-runtime/Cargo.toml` with minimal crate surface; **Acceptance:** build resolves with MCP support enabled and no unnecessary dependency additions. +- [x] 1.1 (RED) Add failing config-validation coverage in + `clients/agent-runtime/tests/mcp_config_validation.rs` for malformed server definitions, + non-positive limits/timeouts, and secret redaction in diagnostics; **Acceptance:** tests fail + against current runtime and map to spec validation scenarios. +- [x] 1.2 Add MCP config schema types and defaults in `clients/agent-runtime/src/config/schema.rs` ( + `McpConfig`, `McpServerConfig`, `mcp` field on `Config`) with strict load-time validation and + structured redacted errors; **Acceptance:** config load rejects invalid MCP definitions and never + prints raw secret env values. +- [x] 1.3 Re-export MCP config models in `clients/agent-runtime/src/config/mod.rs` and ensure config + load/init paths include MCP validation; **Acceptance:** MCP config types are available through + existing config module APIs and validation runs on startup. +- [x] 1.4 Add rollout guard behavior keyed by `mcp.enabled` in config handling and tool bootstrap + entry paths (`clients/agent-runtime/src/tools/mod.rs`, `clients/agent-runtime/src/agent/agent.rs` + as needed); **Acceptance:** MCP discovery path is unreachable when disabled and native tool + behavior remains unchanged. +- [x] 1.5 Add MCP runtime dependencies and feature wiring in `clients/agent-runtime/Cargo.toml` with + minimal crate surface; **Acceptance:** build resolves with MCP support enabled and no unnecessary + dependency additions. ## Phase 2: Discovery, Registry Merge, and Namespacing -- [x] 2.1 (RED) Add failing discovery/merge tests in `clients/agent-runtime/src/tools/mod.rs` tests and `clients/agent-runtime/tests/mcp_registry_integration.rs` for enabled discovery, disabled-server skip, startup timeout bound, and collision rejection; **Acceptance:** tests express all startup registration scenarios before implementation. -- [x] 2.2 Create MCP module scaffolding in `clients/agent-runtime/src/tools/mcp/mod.rs`, `clients/agent-runtime/src/tools/mcp/client.rs`, and `clients/agent-runtime/src/tools/mcp/adapter.rs` for stdio initialize/list/call flow; **Acceptance:** module compiles, startup discovery returns adapter instances for valid servers. -- [x] 2.3 Implement canonical identifier normalization and reserved namespace checks in `clients/agent-runtime/src/tools/mcp/normalize.rs`; **Acceptance:** discovered `search` from server `docs` normalizes to `mcp.docs.search`, invalid/reserved names are rejected deterministically. -- [x] 2.4 Extend tool metadata in `clients/agent-runtime/src/tools/traits.rs` (for source/provider/server/original name) and propagate metadata from MCP adapter into `ToolSpec`; **Acceptance:** MCP tool specs include source metadata used by policy and audit logic. -- [x] 2.5 Integrate MCP discovery into `all_tools_with_runtime` in `clients/agent-runtime/src/tools/mod.rs` with deterministic native+MCP merge and actionable collision errors; **Acceptance:** unified registry includes MCP tools when enabled and fails closed on ambiguous IDs. -- [x] 2.6 Enforce startup failure isolation in MCP registry builder (`clients/agent-runtime/src/tools/mcp/mod.rs`) so one failing server does not abort healthy servers; **Acceptance:** failed server is skipped with redacted diagnostics while other valid servers register. +- [x] 2.1 (RED) Add failing discovery/merge tests in `clients/agent-runtime/src/tools/mod.rs` tests + and `clients/agent-runtime/tests/mcp_registry_integration.rs` for enabled discovery, + disabled-server skip, startup timeout bound, and collision rejection; **Acceptance:** tests + express all startup registration scenarios before implementation. +- [x] 2.2 Create MCP module scaffolding in `clients/agent-runtime/src/tools/mcp/mod.rs`, + `clients/agent-runtime/src/tools/mcp/client.rs`, and + `clients/agent-runtime/src/tools/mcp/adapter.rs` for stdio initialize/list/call flow; * + *Acceptance:** module compiles, startup discovery returns adapter instances for valid servers. +- [x] 2.3 Implement canonical identifier normalization and reserved namespace checks in + `clients/agent-runtime/src/tools/mcp/normalize.rs`; **Acceptance:** discovered `search` from + server `docs` normalizes to `mcp.docs.search`, invalid/reserved names are rejected + deterministically. +- [x] 2.4 Extend tool metadata in `clients/agent-runtime/src/tools/traits.rs` (for + source/provider/server/original name) and propagate metadata from MCP adapter into `ToolSpec`; * + *Acceptance:** MCP tool specs include source metadata used by policy and audit logic. +- [x] 2.5 Integrate MCP discovery into `all_tools_with_runtime` in + `clients/agent-runtime/src/tools/mod.rs` with deterministic native+MCP merge and actionable + collision errors; **Acceptance:** unified registry includes MCP tools when enabled and fails + closed on ambiguous IDs. +- [x] 2.6 Enforce startup failure isolation in MCP registry builder ( + `clients/agent-runtime/src/tools/mcp/mod.rs`) so one failing server does not abort healthy + servers; **Acceptance:** failed server is skipped with redacted diagnostics while other valid + servers register. ## Phase 3: Policy, Approval, and Entry-Point Parity -- [x] 3.1 (RED) Add failing policy/approval tests in `clients/agent-runtime/src/agent/tests.rs` and `clients/agent-runtime/tests/mcp_policy_approval_parity.rs` for deny-by-default MCP execution and unknown/high-risk approval gating; **Acceptance:** tests fail until dispatcher/policy/approval updates are wired. -- [x] 3.2 Update MCP risk classification in `clients/agent-runtime/src/agent/dispatcher.rs` so `mcp.*` calls are treated as risk-bearing and fail closed without explicit allow/approval outcome; **Acceptance:** dispatcher returns `ApprovalRequired` for MCP by default. -- [x] 3.3 Add MCP-aware policy helpers in `clients/agent-runtime/src/security/policy.rs` for source-aware allow/deny evaluation; **Acceptance:** policy layer can differentiate native vs MCP tool decisions with secure defaults. -- [x] 3.4 Integrate unknown/high-risk MCP handling in `clients/agent-runtime/src/approval/mod.rs` and return structured denial results when approval is absent/denied; **Acceptance:** denied MCP calls are blocked without execution and include stable structured denial payloads. -- [x] 3.5 Wire parity paths in `clients/agent-runtime/src/channels/mod.rs` and `clients/agent-runtime/src/gateway/mod.rs` to reuse shared dispatcher policy/approval decisions; **Acceptance:** CLI, channel, and gateway enforce equivalent MCP approval outcomes with no bypass path. +- [x] 3.1 (RED) Add failing policy/approval tests in `clients/agent-runtime/src/agent/tests.rs` and + `clients/agent-runtime/tests/mcp_policy_approval_parity.rs` for deny-by-default MCP execution and + unknown/high-risk approval gating; **Acceptance:** tests fail until dispatcher/policy/approval + updates are wired. +- [x] 3.2 Update MCP risk classification in `clients/agent-runtime/src/agent/dispatcher.rs` so + `mcp.*` calls are treated as risk-bearing and fail closed without explicit allow/approval outcome; + **Acceptance:** dispatcher returns `ApprovalRequired` for MCP by default. +- [x] 3.3 Add MCP-aware policy helpers in `clients/agent-runtime/src/security/policy.rs` for + source-aware allow/deny evaluation; **Acceptance:** policy layer can differentiate native vs MCP + tool decisions with secure defaults. +- [x] 3.4 Integrate unknown/high-risk MCP handling in `clients/agent-runtime/src/approval/mod.rs` + and return structured denial results when approval is absent/denied; **Acceptance:** denied MCP + calls are blocked without execution and include stable structured denial payloads. +- [x] 3.5 Wire parity paths in `clients/agent-runtime/src/channels/mod.rs` and + `clients/agent-runtime/src/gateway/mod.rs` to reuse shared dispatcher policy/approval decisions; * + *Acceptance:** CLI, channel, and gateway enforce equivalent MCP approval outcomes with no bypass + path. ## Phase 4: Execution Limits, Failure Safety, and Observability -- [x] 4.1 (RED) Add failing limit and failure-path tests in `clients/agent-runtime/tests/mcp_execution_limits.rs` for per-call timeout, output-cap enforcement, transport failure handling, and native-tool regression; **Acceptance:** tests fail before limit enforcement is implemented. -- [x] 4.2 Implement per-call timeout enforcement in `clients/agent-runtime/src/tools/mcp/client.rs` and `clients/agent-runtime/src/tools/mcp/adapter.rs`; **Acceptance:** over-budget MCP calls are canceled/aborted and return structured timeout failures without hanging loops. -- [x] 4.3 Implement output byte/token cap enforcement in `clients/agent-runtime/src/tools/mcp/adapter.rs` with explicit limit marker behavior; **Acceptance:** oversized MCP output is truncated or failed per policy and result indicates limit enforcement. -- [x] 4.4 Add structured invocation failure mapping and non-tool capability filtering in `clients/agent-runtime/src/tools/mcp/mod.rs` and `clients/agent-runtime/src/tools/mcp/client.rs`; **Acceptance:** transport/server errors surface as structured failures and resources/prompts are ignored/rejected in v1 registration. -- [x] 4.5 Add MCP observability and redacted diagnostics in `clients/agent-runtime/src/agent/agent.rs`, `clients/agent-runtime/src/tools/mcp/mod.rs`, and existing observer/log surfaces; **Acceptance:** startup and call events expose MCP tool/server context, timeout/cap/collision events are logged, and secrets remain redacted. +- [x] 4.1 (RED) Add failing limit and failure-path tests in + `clients/agent-runtime/tests/mcp_execution_limits.rs` for per-call timeout, output-cap + enforcement, transport failure handling, and native-tool regression; **Acceptance:** tests fail + before limit enforcement is implemented. +- [x] 4.2 Implement per-call timeout enforcement in `clients/agent-runtime/src/tools/mcp/client.rs` + and `clients/agent-runtime/src/tools/mcp/adapter.rs`; **Acceptance:** over-budget MCP calls are + canceled/aborted and return structured timeout failures without hanging loops. +- [x] 4.3 Implement output byte/token cap enforcement in + `clients/agent-runtime/src/tools/mcp/adapter.rs` with explicit limit marker behavior; * + *Acceptance:** oversized MCP output is truncated or failed per policy and result indicates limit + enforcement. +- [x] 4.4 Add structured invocation failure mapping and non-tool capability filtering in + `clients/agent-runtime/src/tools/mcp/mod.rs` and `clients/agent-runtime/src/tools/mcp/client.rs`; + **Acceptance:** transport/server errors surface as structured failures and resources/prompts are + ignored/rejected in v1 registration. +- [x] 4.5 Add MCP observability and redacted diagnostics in + `clients/agent-runtime/src/agent/agent.rs`, `clients/agent-runtime/src/tools/mcp/mod.rs`, and + existing observer/log surfaces; **Acceptance:** startup and call events expose MCP tool/server + context, timeout/cap/collision events are logged, and secrets remain redacted. ## Phase 5: Integration Verification and Rollout Readiness -- [x] 5.1 Add end-to-end startup/invocation integration tests in `clients/agent-runtime/tests/mcp_runtime_e2e.rs` covering valid registration, one-server-fails isolation, and disabled-server behavior; **Acceptance:** integration scenarios match spec startup/failure requirements. -- [x] 5.2 Add regression coverage ensuring native tools are unchanged when MCP is enabled/disabled in `clients/agent-runtime/tests/agent_loop_integration.rs` (or dedicated `clients/agent-runtime/tests/mcp_native_regression.rs`); **Acceptance:** existing native dispatch semantics and outputs stay stable. -- [x] 5.3 Update MCP configuration and security rollout docs in `docs/en/guides/configuration.md`, `docs/es/guides/configuration.md`, and `docs/en/clients/agent-runtime/architecture.md`; **Acceptance:** docs include enable/disable guard (`mcp.enabled`), safe defaults, approval expectations, and rollback instructions. -- [x] 5.4 Run verification gates (`cargo fmt --all -- --check`, `cargo clippy --all-targets -- -D warnings`, `cargo test`) from `clients/agent-runtime`; **Acceptance:** all checks pass and test artifacts demonstrate coverage for each spec requirement area. +- [x] 5.1 Add end-to-end startup/invocation integration tests in + `clients/agent-runtime/tests/mcp_runtime_e2e.rs` covering valid registration, one-server-fails + isolation, and disabled-server behavior; **Acceptance:** integration scenarios match spec + startup/failure requirements. +- [x] 5.2 Add regression coverage ensuring native tools are unchanged when MCP is enabled/disabled + in `clients/agent-runtime/tests/agent_loop_integration.rs` (or dedicated + `clients/agent-runtime/tests/mcp_native_regression.rs`); **Acceptance:** existing native dispatch + semantics and outputs stay stable. +- [x] 5.3 Update MCP configuration and security rollout docs in `docs/en/guides/configuration.md`, + `docs/es/guides/configuration.md`, and `docs/en/clients/agent-runtime/architecture.md`; * + *Acceptance:** docs include enable/disable guard (`mcp.enabled`), safe defaults, approval + expectations, and rollback instructions. +- [x] 5.4 Run verification gates (`cargo fmt --all -- --check`, + `cargo clippy --all-targets -- -D warnings`, `cargo test`) from `clients/agent-runtime`; * + *Acceptance:** all checks pass and test artifacts demonstrate coverage for each spec requirement + area. ## Dependency Order and Parallelism - Sequential backbone: `Phase 1` -> `Phase 2` -> `Phase 3` -> `Phase 4` -> `Phase 5`. -- Hard dependencies: 2.x depends on 1.2-1.5; 3.x depends on 2.4-2.5; 4.x depends on 2.2-2.6 and 3.2-3.5; 5.x depends on all prior implementation tasks. +- Hard dependencies: 2.x depends on 1.2-1.5; 3.x depends on 2.4-2.5; 4.x depends on 2.2-2.6 and + 3.2-3.5; 5.x depends on all prior implementation tasks. - Parallelizable within phases after prerequisites: - - `Phase 2`: 2.2 and 2.3 can run in parallel after 1.2; 2.4 can start once adapter interfaces are stable. + - `Phase 2`: 2.2 and 2.3 can run in parallel after 1.2; 2.4 can start once adapter interfaces are + stable. - `Phase 3`: 3.3 and 3.4 can run in parallel after 3.2 baseline classification is in place. - - `Phase 4`: 4.2 and 4.3 can run in parallel after 2.2; 4.5 can run in parallel with 4.4 after result/error shape is stable. + - `Phase 4`: 4.2 and 4.3 can run in parallel after 2.2; 4.5 can run in parallel with 4.4 after + result/error shape is stable. - `Phase 5`: 5.1 and 5.3 can run in parallel; 5.4 runs last. -- TDD cadence requirement for each feature area: execute RED tasks first (1.1, 2.1, 3.1, 4.1), then GREEN implementation tasks, then refactor without changing behavior. +- TDD cadence requirement for each feature area: execute RED tasks first (1.1, 2.1, 3.1, 4.1), then + GREEN implementation tasks, then refactor without changing behavior. diff --git a/openspec/changes/archive/2026-03-03-support-mcps-agent-runtime/verify-report.md b/openspec/changes/archive/2026-03-03-support-mcps-agent-runtime/verify-report.md index 06d29216b..7aadcffac 100644 --- a/openspec/changes/archive/2026-03-03-support-mcps-agent-runtime/verify-report.md +++ b/openspec/changes/archive/2026-03-03-support-mcps-agent-runtime/verify-report.md @@ -6,11 +6,12 @@ --- ### Completeness -| Metric | Value | -|--------|-------| -| Tasks total | 25 | -| Tasks complete | 25 | -| Tasks incomplete | 0 | + +| Metric | Value | +|------------------|-------| +| Tasks total | 25 | +| Tasks complete | 25 | +| Tasks incomplete | 0 | All tasks in `openspec/changes/support-mcps-agent-runtime/tasks.md` are marked complete (`[x]`). @@ -19,6 +20,7 @@ All tasks in `openspec/changes/support-mcps-agent-runtime/tasks.md` are marked c ### Build & Tests Execution **Build**: ❌ Failed + ```text Command: make build Result: exit code 2 @@ -27,6 +29,7 @@ Error: ERR_PNPM_CATALOG_ENTRY_NOT_FOUND_FOR_SPEC No catalog entry 'tailwind-merg ``` **Tests**: ✅ 4403 passed / ❌ 0 failed / ⚠️ 0 skipped + ```text Primary verify command (from openspec/config.yaml): make test - Exit code: 0 @@ -56,90 +59,111 @@ Aggregation report indicates: "No class files specified." ### Spec Compliance Matrix -| Requirement | Scenario | Test | Result | -|-------------|----------|------|--------| -| MCP Server Configuration Validation | Reject malformed server definition | `clients/agent-runtime/tests/mcp_config_validation.rs > rejects_malformed_server_definition` | ✅ COMPLIANT | -| MCP Server Configuration Validation | Reject unsafe timeout and limit values | `clients/agent-runtime/tests/mcp_config_validation.rs > rejects_non_positive_timeouts_and_limits` | ✅ COMPLIANT | -| MCP Server Configuration Validation | Secret references are protected in diagnostics | `clients/agent-runtime/tests/mcp_config_validation.rs > validation_error_redacts_secret_values` | ✅ COMPLIANT | -| Startup Discovery and Registration | Register MCP tools during startup | `clients/agent-runtime/tests/mcp_runtime_e2e.rs > runtime_registers_and_invokes_mcp_tool_when_enabled` | ✅ COMPLIANT | -| Startup Discovery and Registration | Bound startup discovery duration | `clients/agent-runtime/tests/mcp_registry_integration.rs > discovery_is_bounded_by_startup_timeout` | ✅ COMPLIANT | -| Startup Discovery and Registration | Disabled servers are not loaded | `clients/agent-runtime/tests/mcp_registry_integration.rs > discovery_skips_disabled_servers` | ✅ COMPLIANT | -| Namespaced Tool Identity and Collision Handling | Canonical MCP tool naming | `clients/agent-runtime/src/tools/mcp/normalize.rs > canonical_name_uses_mcp_server_tool_format` | ✅ COMPLIANT | -| Namespaced Tool Identity and Collision Handling | Collision with existing tool identity | `clients/agent-runtime/src/tools/mod.rs > all_tools_fails_closed_on_mcp_name_collisions` | ⚠️ PARTIAL | -| Namespaced Tool Identity and Collision Handling | Reserved namespace protection | `clients/agent-runtime/src/tools/mcp/normalize.rs > reserved_identifier_is_rejected` | ✅ COMPLIANT | -| MCP Policy and Approval Enforcement | Deny-by-default policy for MCP tools | `clients/agent-runtime/tests/mcp_policy_approval_parity.rs > mcp_tools_are_deny_by_default_in_dispatcher` | ✅ COMPLIANT | -| MCP Policy and Approval Enforcement | Unknown or high-risk MCP action requires approval | `clients/agent-runtime/tests/mcp_policy_approval_parity.rs > unknown_and_high_risk_tools_require_approval` | ✅ COMPLIANT | -| MCP Policy and Approval Enforcement | Entry-point parity for approval behavior | `src agent/channels/gateway tests > turn_blocks_mcp_tool_by_default_with_structured_denial_payload; process_channel_message_blocks_on_approval_by_default; webhook_non_preview_blocks_approval_and_keeps_session_id` | ✅ COMPLIANT | -| MCP Execution Limits and Timeouts | Per-call timeout enforcement | `clients/agent-runtime/tests/mcp_execution_limits.rs > mcp_call_timeout_returns_structured_timeout_failure` | ✅ COMPLIANT | -| MCP Execution Limits and Timeouts | Output cap enforcement | `clients/agent-runtime/tests/mcp_execution_limits.rs > mcp_output_cap_enforcement_marks_limited_output` | ✅ COMPLIANT | -| MCP Execution Limits and Timeouts | Limit enforcement does not affect native tools | `clients/agent-runtime/tests/mcp_execution_limits.rs > native_tool_dispatch_still_works_with_mcp_limits_enabled` | ✅ COMPLIANT | -| MCP Failure Handling and Safety | Startup failure for one server does not crash runtime | `clients/agent-runtime/tests/mcp_runtime_e2e.rs > runtime_isolates_failing_server_and_keeps_healthy_server` | ✅ COMPLIANT | -| MCP Failure Handling and Safety | Invocation failure returns structured error | `clients/agent-runtime/tests/mcp_execution_limits.rs > mcp_transport_failures_return_stable_structured_errors` | ✅ COMPLIANT | -| MCP Failure Handling and Safety | Out-of-scope MCP capabilities are rejected | (no dedicated passing test found) | ❌ UNTESTED | +| Requirement | Scenario | Test | Result | +|-------------------------------------------------|-------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------| +| MCP Server Configuration Validation | Reject malformed server definition | `clients/agent-runtime/tests/mcp_config_validation.rs > rejects_malformed_server_definition` | ✅ COMPLIANT | +| MCP Server Configuration Validation | Reject unsafe timeout and limit values | `clients/agent-runtime/tests/mcp_config_validation.rs > rejects_non_positive_timeouts_and_limits` | ✅ COMPLIANT | +| MCP Server Configuration Validation | Secret references are protected in diagnostics | `clients/agent-runtime/tests/mcp_config_validation.rs > validation_error_redacts_secret_values` | ✅ COMPLIANT | +| Startup Discovery and Registration | Register MCP tools during startup | `clients/agent-runtime/tests/mcp_runtime_e2e.rs > runtime_registers_and_invokes_mcp_tool_when_enabled` | ✅ COMPLIANT | +| Startup Discovery and Registration | Bound startup discovery duration | `clients/agent-runtime/tests/mcp_registry_integration.rs > discovery_is_bounded_by_startup_timeout` | ✅ COMPLIANT | +| Startup Discovery and Registration | Disabled servers are not loaded | `clients/agent-runtime/tests/mcp_registry_integration.rs > discovery_skips_disabled_servers` | ✅ COMPLIANT | +| Namespaced Tool Identity and Collision Handling | Canonical MCP tool naming | `clients/agent-runtime/src/tools/mcp/normalize.rs > canonical_name_uses_mcp_server_tool_format` | ✅ COMPLIANT | +| Namespaced Tool Identity and Collision Handling | Collision with existing tool identity | `clients/agent-runtime/src/tools/mod.rs > all_tools_fails_closed_on_mcp_name_collisions` | ⚠️ PARTIAL | +| Namespaced Tool Identity and Collision Handling | Reserved namespace protection | `clients/agent-runtime/src/tools/mcp/normalize.rs > reserved_identifier_is_rejected` | ✅ COMPLIANT | +| MCP Policy and Approval Enforcement | Deny-by-default policy for MCP tools | `clients/agent-runtime/tests/mcp_policy_approval_parity.rs > mcp_tools_are_deny_by_default_in_dispatcher` | ✅ COMPLIANT | +| MCP Policy and Approval Enforcement | Unknown or high-risk MCP action requires approval | `clients/agent-runtime/tests/mcp_policy_approval_parity.rs > unknown_and_high_risk_tools_require_approval` | ✅ COMPLIANT | +| MCP Policy and Approval Enforcement | Entry-point parity for approval behavior | `src agent/channels/gateway tests > turn_blocks_mcp_tool_by_default_with_structured_denial_payload; process_channel_message_blocks_on_approval_by_default; webhook_non_preview_blocks_approval_and_keeps_session_id` | ✅ COMPLIANT | +| MCP Execution Limits and Timeouts | Per-call timeout enforcement | `clients/agent-runtime/tests/mcp_execution_limits.rs > mcp_call_timeout_returns_structured_timeout_failure` | ✅ COMPLIANT | +| MCP Execution Limits and Timeouts | Output cap enforcement | `clients/agent-runtime/tests/mcp_execution_limits.rs > mcp_output_cap_enforcement_marks_limited_output` | ✅ COMPLIANT | +| MCP Execution Limits and Timeouts | Limit enforcement does not affect native tools | `clients/agent-runtime/tests/mcp_execution_limits.rs > native_tool_dispatch_still_works_with_mcp_limits_enabled` | ✅ COMPLIANT | +| MCP Failure Handling and Safety | Startup failure for one server does not crash runtime | `clients/agent-runtime/tests/mcp_runtime_e2e.rs > runtime_isolates_failing_server_and_keeps_healthy_server` | ✅ COMPLIANT | +| MCP Failure Handling and Safety | Invocation failure returns structured error | `clients/agent-runtime/tests/mcp_execution_limits.rs > mcp_transport_failures_return_stable_structured_errors` | ✅ COMPLIANT | +| MCP Failure Handling and Safety | Out-of-scope MCP capabilities are rejected | (no dedicated passing test found) | ❌ UNTESTED | **Compliance summary**: 16/18 scenarios compliant (1 partial, 1 untested) --- ### Correctness (Static - Structural Evidence) -| Requirement | Status | Notes | -|------------|--------|-------| -| MCP Server Configuration Validation | ✅ Implemented | `validate_for_runtime()` calls `validate_mcp_servers()` with strict checks in `clients/agent-runtime/src/config/schema.rs`. | -| Startup Discovery and Registration | ✅ Implemented | MCP discovery occurs in `clients/agent-runtime/src/tools/mcp/mod.rs` and merges through `clients/agent-runtime/src/tools/mod.rs`. | -| Namespaced Tool Identity and Collision Handling | ⚠️ Partial | Canonical naming and reserved checks exist, but collision path in `all_tools()` logs and skips MCP registration instead of surfacing explicit actionable startup error to caller. | -| MCP Policy and Approval Enforcement | ✅ Implemented | `evaluate_tool_risk()` and shared denial payload path are wired in dispatcher, channels, and gateway. | -| MCP Execution Limits and Timeouts | ✅ Implemented | Timeout and output limits are enforced in `clients/agent-runtime/src/tools/mcp/client.rs` and `clients/agent-runtime/src/tools/mcp/adapter.rs`. | -| MCP Failure Handling and Safety | ⚠️ Partial | Failure isolation and structured errors are implemented; explicit non-tool capability filtering/rejection is not evidenced by dedicated tests. | + +| Requirement | Status | Notes | +|-------------------------------------------------|---------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| MCP Server Configuration Validation | ✅ Implemented | `validate_for_runtime()` calls `validate_mcp_servers()` with strict checks in `clients/agent-runtime/src/config/schema.rs`. | +| Startup Discovery and Registration | ✅ Implemented | MCP discovery occurs in `clients/agent-runtime/src/tools/mcp/mod.rs` and merges through `clients/agent-runtime/src/tools/mod.rs`. | +| Namespaced Tool Identity and Collision Handling | ⚠️ Partial | Canonical naming and reserved checks exist, but collision path in `all_tools()` logs and skips MCP registration instead of surfacing explicit actionable startup error to caller. | +| MCP Policy and Approval Enforcement | ✅ Implemented | `evaluate_tool_risk()` and shared denial payload path are wired in dispatcher, channels, and gateway. | +| MCP Execution Limits and Timeouts | ✅ Implemented | Timeout and output limits are enforced in `clients/agent-runtime/src/tools/mcp/client.rs` and `clients/agent-runtime/src/tools/mcp/adapter.rs`. | +| MCP Failure Handling and Safety | ⚠️ Partial | Failure isolation and structured errors are implemented; explicit non-tool capability filtering/rejection is not evidenced by dedicated tests. | --- ### Coherence (Design) -| Decision | Followed? | Notes | -|----------|-----------|-------| -| MCP as Tool Adapter, Not Provider Feature | ✅ Yes | MCP code is under `src/tools/mcp`; no provider-layer MCP implementation found. | -| Startup Discovery + Immutable Runtime Manifest | ✅ Yes | Discovery occurs during tool bootstrap; no hot-reload path detected. | -| Canonical Namespaced Identity | ✅ Yes | `mcp..` is enforced in `clients/agent-runtime/src/tools/mcp/normalize.rs`. | -| Fail-Closed Registration and Risk Gating | ⚠️ Deviated | Risk gating is fail-closed; registration collision handling is safe but degrades to warning+skip rather than explicit startup failure surfaced to operator. | -| Shared Approval/Risk Engine Across Entry Points | ✅ Yes | Agent/channel/gateway use common dispatcher risk evaluation and structured denial semantics. | -File change coherence: design-listed files are present and modified, but additional modified files outside the design table also exist (e.g., several provider and onboarding files), indicating scope expansion in the working tree. +| Decision | Followed? | Notes | +|-------------------------------------------------|-------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------| +| MCP as Tool Adapter, Not Provider Feature | ✅ Yes | MCP code is under `src/tools/mcp`; no provider-layer MCP implementation found. | +| Startup Discovery + Immutable Runtime Manifest | ✅ Yes | Discovery occurs during tool bootstrap; no hot-reload path detected. | +| Canonical Namespaced Identity | ✅ Yes | `mcp..` is enforced in `clients/agent-runtime/src/tools/mcp/normalize.rs`. | +| Fail-Closed Registration and Risk Gating | ⚠️ Deviated | Risk gating is fail-closed; registration collision handling is safe but degrades to warning+skip rather than explicit startup failure surfaced to operator. | +| Shared Approval/Risk Engine Across Entry Points | ✅ Yes | Agent/channel/gateway use common dispatcher risk evaluation and structured denial semantics. | + +File change coherence: design-listed files are present and modified, but additional modified files +outside the design table also exist (e.g., several provider and onboarding files), indicating scope +expansion in the working tree. --- ### Issues Found **CRITICAL** (must fix before archive): -- `make build` fails in workspace web install step (`:web:workspaceInstall`) with missing pnpm catalog entry `tailwind-merge`. -- Spec scenario "Out-of-scope MCP capabilities are rejected" has no dedicated passing runtime test evidence (currently ❌ UNTESTED). + +- `make build` fails in workspace web install step (`:web:workspaceInstall`) with missing pnpm + catalog entry `tailwind-merge`. +- Spec scenario "Out-of-scope MCP capabilities are rejected" has no dedicated passing runtime test + evidence (currently ❌ UNTESTED). **WARNING** (should fix): -- Collision scenario is only partially validated against requirement wording (actionable surfaced error path is weak at runtime integration boundary). -- Coverage threshold (60%) cannot be evaluated for this change scope with current command/report setup. -- Working tree includes extra modified files outside design scope, increasing verification uncertainty. + +- Collision scenario is only partially validated against requirement wording (actionable surfaced + error path is weak at runtime integration boundary). +- Coverage threshold (60%) cannot be evaluated for this change scope with current command/report + setup. +- Working tree includes extra modified files outside design scope, increasing verification + uncertainty. **SUGGESTION** (nice to have): -- Add explicit integration test for resources/prompts in MCP discovery payload to prove reject/ignore behavior. -- Add assertion that collision diagnostics are operator-actionable and surfaced at the correct boundary. -- Add Rust coverage reporting to verification pipeline for `clients/agent-runtime` so threshold checks are meaningful. + +- Add explicit integration test for resources/prompts in MCP discovery payload to prove + reject/ignore behavior. +- Add assertion that collision diagnostics are operator-actionable and surfaced at the correct + boundary. +- Add Rust coverage reporting to verification pipeline for `clients/agent-runtime` so threshold + checks are meaningful. --- ### Verdict + FAIL -Implementation is largely correct and heavily tested for MCP runtime behavior, but verification fails due to a real build break and one spec scenario remaining unproven at runtime. +Implementation is largely correct and heavily tested for MCP runtime behavior, but verification +fails due to a real build break and one spec scenario remaining unproven at runtime. --- ## Verification Addendum (2026-03-03) ### Targeted Fixes Applied -- Added explicit non-tool MCP capability handling in `clients/agent-runtime/src/tools/mcp/client.rs`: + +- Added explicit non-tool MCP capability handling in + `clients/agent-runtime/src/tools/mcp/client.rs`: - resources/prompts are explicitly detected and ignored for v1 registration. - payloads with only non-tool capabilities now resolve to an empty tool set. -- Strengthened collision diagnostics in `clients/agent-runtime/src/tools/mcp/mod.rs` with operator-actionable remediation text containing canonical identifier context. +- Strengthened collision diagnostics in `clients/agent-runtime/src/tools/mcp/mod.rs` with + operator-actionable remediation text containing canonical identifier context. ### New / Updated Test Evidence + - `clients/agent-runtime/tests/mcp_registry_integration.rs` - `discovery_ignores_non_tool_capabilities_and_registers_only_tools` ✅ - `discovery_reports_actionable_collision_errors` ✅ @@ -150,6 +174,7 @@ Implementation is largely correct and heavily tested for MCP runtime behavior, b - `collision_error_message_is_actionable_for_operators` ✅ ### Re-run Commands + ```text clients/agent-runtime: - cargo fmt --all -- --check -> passed @@ -165,9 +190,14 @@ repo root: ``` ### Build Blocker Disposition -- Previous blocker `:web:workspaceInstall` with `ERR_PNPM_CATALOG_ENTRY_NOT_FOUND_FOR_SPEC` for `tailwind-merge` is resolved by adding catalog entry in `clients/web/pnpm-workspace.yaml`. + +- Previous blocker `:web:workspaceInstall` with `ERR_PNPM_CATALOG_ENTRY_NOT_FOUND_FOR_SPEC` for + `tailwind-merge` is resolved by adding catalog entry in `clients/web/pnpm-workspace.yaml`. ### Updated Verdict + PASS (addendum scope) -The prior FAIL findings are addressed for this change scope: non-tool capability handling is now explicitly tested, collision diagnostics are actionable, and the workspace install blocker is cleared. +The prior FAIL findings are addressed for this change scope: non-tool capability handling is now +explicitly tested, collision diagnostics are actionable, and the workspace install blocker is +cleared. diff --git a/openspec/changes/archive/2026-03-04-agent-runtime-mission-layer/verify-report.md b/openspec/changes/archive/2026-03-04-agent-runtime-mission-layer/verify-report.md index 3b7c261c7..d8824cc8d 100644 --- a/openspec/changes/archive/2026-03-04-agent-runtime-mission-layer/verify-report.md +++ b/openspec/changes/archive/2026-03-04-agent-runtime-mission-layer/verify-report.md @@ -8,11 +8,11 @@ ### Completeness -| Metric | Value | -|--------|-------| -| Tasks total | 18 | -| Tasks complete | 18 | -| Tasks incomplete | 0 | +| Metric | Value | +|------------------|-------| +| Tasks total | 18 | +| Tasks complete | 18 | +| Tasks incomplete | 0 | All tasks in `openspec/changes/agent-runtime-mission-layer/tasks.md` are checked complete. @@ -46,8 +46,11 @@ Additional mission/runtime verification evidence: - `cargo fmt --all -- --check` -> ✅ passed - `cargo clippy --all-targets -- -D warnings` -> ✅ passed -- `cargo test -p corvus --test legacy_loop_guard --test mission_lifecycle_integration --test mission_governance_integration --test mission_security_parity --test mission_config_toggle --test mission_entrypoint_parity` -> ✅ passed (25 passed / 0 failed) -- `cargo test -p corvus concurrent_transition_attempts_are_serialized_with_single_winner` -> ✅ passed +- +`cargo test -p corvus --test legacy_loop_guard --test mission_lifecycle_integration --test mission_governance_integration --test mission_security_parity --test mission_config_toggle --test mission_entrypoint_parity` -> +✅ passed (25 passed / 0 failed) +- `cargo test -p corvus concurrent_transition_attempts_are_serialized_with_single_winner` -> ✅ + passed - `cargo test -p corvus --quiet` -> ✅ full runtime suite passed (0 failed) **Coverage**: 7.1% line / threshold: 60% -> ⚠️ Below threshold @@ -62,20 +65,20 @@ Coverage evidence: ### Spec Compliance Matrix -| Requirement | Scenario | Test | Result | -|-------------|----------|------|--------| -| Mission Lifecycle Contract | Mission completes through planned checkpoints | `clients/agent-runtime/tests/mission_lifecycle_integration.rs > mission_runs_objective_intake_and_ordered_checkpoints` | ✅ COMPLIANT | -| Mission Lifecycle Contract | Mission replans after checkpoint failure | `clients/agent-runtime/tests/mission_lifecycle_integration.rs > mission_replans_after_recoverable_checkpoint_failure` | ✅ COMPLIANT | -| Mission Lifecycle Contract | Concurrent state transition attempts | `clients/agent-runtime/src/agent/mission.rs > concurrent_transition_attempts_are_serialized_with_single_winner` | ✅ COMPLIANT | -| Mission Governance and Fail-Closed Enforcement | Mission terminated by budget ceiling | `clients/agent-runtime/tests/mission_governance_integration.rs > mission_terminates_with_budget_exhausted_before_next_checkpoint` | ✅ COMPLIANT | -| Mission Governance and Fail-Closed Enforcement | Mission terminated by SLA ceiling | `clients/agent-runtime/tests/mission_governance_integration.rs > mission_terminates_with_sla_exceeded_after_checkpoint_accounting` | ✅ COMPLIANT | -| Delegated Mission Orchestration Parity | Delegated mission step requires approval | `clients/agent-runtime/tests/mission_security_parity.rs > mission_dispatcher_risk_classification_has_no_bypass_path`; `... > mission_approval_gate_follows_standard_path` | ✅ COMPLIANT | -| Delegated Mission Orchestration Parity | Delegated mission step denied by policy | `clients/agent-runtime/tests/mission_security_parity.rs > mission_denial_payload_preserves_structured_fields`; `... > mission_policy_denial_path_blocks_tool_side_effects` | ✅ COMPLIANT | -| Mission KPI Telemetry | Mission progress telemetry per checkpoint | `clients/agent-runtime/tests/mission_lifecycle_integration.rs > mission_runs_objective_intake_and_ordered_checkpoints` | ✅ COMPLIANT | -| Mission KPI Telemetry | Guardrail violation telemetry on governance stop | `clients/agent-runtime/tests/mission_governance_integration.rs > mission_terminates_with_sla_exceeded_after_checkpoint_accounting`; `... > mission_error_events_are_sanitized_before_observer_record` | ✅ COMPLIANT | -| Mission Integration and Backward Compatibility Coverage | Integration suite validates mission lifecycle and governance | `clients/agent-runtime/tests/mission_lifecycle_integration.rs`; `clients/agent-runtime/tests/mission_governance_integration.rs` | ✅ COMPLIANT | -| Mission Integration and Backward Compatibility Coverage | Legacy loop path remains behaviorally stable | `clients/agent-runtime/tests/legacy_loop_guard.rs > mission_disabled_routes_to_legacy_turn_semantics`; `... > mission_disabled_does_not_emit_rollback_without_prior_checkpoint` | ✅ COMPLIANT | -| Entry Points Alignment | Mission behavior parity across entry points | `clients/agent-runtime/tests/mission_entrypoint_parity.rs > mission_behavior_parity_is_preserved_across_cli_channel_and_gateway_paths` | ✅ COMPLIANT | +| Requirement | Scenario | Test | Result | +|---------------------------------------------------------|--------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------| +| Mission Lifecycle Contract | Mission completes through planned checkpoints | `clients/agent-runtime/tests/mission_lifecycle_integration.rs > mission_runs_objective_intake_and_ordered_checkpoints` | ✅ COMPLIANT | +| Mission Lifecycle Contract | Mission replans after checkpoint failure | `clients/agent-runtime/tests/mission_lifecycle_integration.rs > mission_replans_after_recoverable_checkpoint_failure` | ✅ COMPLIANT | +| Mission Lifecycle Contract | Concurrent state transition attempts | `clients/agent-runtime/src/agent/mission.rs > concurrent_transition_attempts_are_serialized_with_single_winner` | ✅ COMPLIANT | +| Mission Governance and Fail-Closed Enforcement | Mission terminated by budget ceiling | `clients/agent-runtime/tests/mission_governance_integration.rs > mission_terminates_with_budget_exhausted_before_next_checkpoint` | ✅ COMPLIANT | +| Mission Governance and Fail-Closed Enforcement | Mission terminated by SLA ceiling | `clients/agent-runtime/tests/mission_governance_integration.rs > mission_terminates_with_sla_exceeded_after_checkpoint_accounting` | ✅ COMPLIANT | +| Delegated Mission Orchestration Parity | Delegated mission step requires approval | `clients/agent-runtime/tests/mission_security_parity.rs > mission_dispatcher_risk_classification_has_no_bypass_path`; `... > mission_approval_gate_follows_standard_path` | ✅ COMPLIANT | +| Delegated Mission Orchestration Parity | Delegated mission step denied by policy | `clients/agent-runtime/tests/mission_security_parity.rs > mission_denial_payload_preserves_structured_fields`; `... > mission_policy_denial_path_blocks_tool_side_effects` | ✅ COMPLIANT | +| Mission KPI Telemetry | Mission progress telemetry per checkpoint | `clients/agent-runtime/tests/mission_lifecycle_integration.rs > mission_runs_objective_intake_and_ordered_checkpoints` | ✅ COMPLIANT | +| Mission KPI Telemetry | Guardrail violation telemetry on governance stop | `clients/agent-runtime/tests/mission_governance_integration.rs > mission_terminates_with_sla_exceeded_after_checkpoint_accounting`; `... > mission_error_events_are_sanitized_before_observer_record` | ✅ COMPLIANT | +| Mission Integration and Backward Compatibility Coverage | Integration suite validates mission lifecycle and governance | `clients/agent-runtime/tests/mission_lifecycle_integration.rs`; `clients/agent-runtime/tests/mission_governance_integration.rs` | ✅ COMPLIANT | +| Mission Integration and Backward Compatibility Coverage | Legacy loop path remains behaviorally stable | `clients/agent-runtime/tests/legacy_loop_guard.rs > mission_disabled_routes_to_legacy_turn_semantics`; `... > mission_disabled_does_not_emit_rollback_without_prior_checkpoint` | ✅ COMPLIANT | +| Entry Points Alignment | Mission behavior parity across entry points | `clients/agent-runtime/tests/mission_entrypoint_parity.rs > mission_behavior_parity_is_preserved_across_cli_channel_and_gateway_paths` | ✅ COMPLIANT | **Compliance summary**: 12/12 scenarios compliant, 0/12 partial, 0/12 untested, 0/12 failing @@ -83,32 +86,34 @@ Coverage evidence: ### Correctness (Static - Structural Evidence) -| Requirement | Status | Notes | -|------------|--------|-------| -| Mission Lifecycle Contract | ✅ Implemented | Mission state machine + guarded transitions in `clients/agent-runtime/src/agent/mission.rs`; orchestration path in `clients/agent-runtime/src/agent/agent.rs`; concurrent transition serialization enforced by mutex-guarded transition and race test. | -| Mission Governance and Fail-Closed Enforcement | ✅ Implemented | Strict mission governance validation and fail-closed accounting (`validate`, `from_config_strict`, `from_json_strict`, pre/post checkpoint enforcement, checked arithmetic overflow). | -| Delegated Mission Orchestration Parity | ✅ Implemented | Mission-originated delegation follows dispatcher/policy/approval path; policy denial preserves structured semantics and terminates fail-closed with no tool side effects. | -| Mission KPI Telemetry | ✅ Implemented | Mission lifecycle/guardrail/termination events emitted via observer surface; error payloads pass through runtime/header/diagnostic sanitization chain before recording. | -| Mission Integration and Backward Compatibility Coverage | ✅ Implemented | Mission lifecycle/governance/security parity/rollback compatibility suites exist and pass. | -| Entry Points Alignment (modified) | ✅ Implemented | Dedicated parity test validates equivalent mission outcomes and guardrail behavior across CLI/channel/gateway entry simulation paths. | +| Requirement | Status | Notes | +|---------------------------------------------------------|---------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Mission Lifecycle Contract | ✅ Implemented | Mission state machine + guarded transitions in `clients/agent-runtime/src/agent/mission.rs`; orchestration path in `clients/agent-runtime/src/agent/agent.rs`; concurrent transition serialization enforced by mutex-guarded transition and race test. | +| Mission Governance and Fail-Closed Enforcement | ✅ Implemented | Strict mission governance validation and fail-closed accounting (`validate`, `from_config_strict`, `from_json_strict`, pre/post checkpoint enforcement, checked arithmetic overflow). | +| Delegated Mission Orchestration Parity | ✅ Implemented | Mission-originated delegation follows dispatcher/policy/approval path; policy denial preserves structured semantics and terminates fail-closed with no tool side effects. | +| Mission KPI Telemetry | ✅ Implemented | Mission lifecycle/guardrail/termination events emitted via observer surface; error payloads pass through runtime/header/diagnostic sanitization chain before recording. | +| Mission Integration and Backward Compatibility Coverage | ✅ Implemented | Mission lifecycle/governance/security parity/rollback compatibility suites exist and pass. | +| Entry Points Alignment (modified) | ✅ Implemented | Dedicated parity test validates equivalent mission outcomes and guardrail behavior across CLI/channel/gateway entry simulation paths. | --- ### Coherence (Design) -| Decision | Followed? | Notes | -|----------|-----------|-------| -| Additive Mission Coordinator Over Existing Loop | ✅ Yes | Mission coordinator remains additive and called from existing agent runtime execution path. | -| Explicit Mission State Machine With Deterministic Termination | ✅ Yes | `MissionState`/`MissionTerminationReason` enums and transition guards are implemented and tested. | -| Governance Enforced at Mission and Step Boundaries | ✅ Yes | Governance checks and accounting occur before and after checkpoint execution with deterministic reasons. | -| Reuse Existing Dispatcher/Policy/Approval Path for Delegation | ✅ Yes | Delegation uses existing dispatcher/policy/approval interfaces with no bypass path. | -| Mission KPIs Through Existing Observer Surface | ✅ Yes | Mission events added through `ObserverEvent` variants without changing observer trait signatures. | +| Decision | Followed? | Notes | +|---------------------------------------------------------------|-----------|----------------------------------------------------------------------------------------------------------| +| Additive Mission Coordinator Over Existing Loop | ✅ Yes | Mission coordinator remains additive and called from existing agent runtime execution path. | +| Explicit Mission State Machine With Deterministic Termination | ✅ Yes | `MissionState`/`MissionTerminationReason` enums and transition guards are implemented and tested. | +| Governance Enforced at Mission and Step Boundaries | ✅ Yes | Governance checks and accounting occur before and after checkpoint execution with deterministic reasons. | +| Reuse Existing Dispatcher/Policy/Approval Path for Delegation | ✅ Yes | Delegation uses existing dispatcher/policy/approval interfaces with no bypass path. | +| Mission KPIs Through Existing Observer Surface | ✅ Yes | Mission events added through `ObserverEvent` variants without changing observer trait signatures. | Design/file-change coherence notes: -- Expected created mission files exist (including `clients/agent-runtime/src/agent/mission.rs` and mission integration suites). +- Expected created mission files exist (including `clients/agent-runtime/src/agent/mission.rs` and + mission integration suites). - Expected modified files from design are present and aligned. -- Additional tests (`mission_entrypoint_parity.rs`, sanitization and policy-denial mission tests) close previously partial verification evidence. +- Additional tests (`mission_entrypoint_parity.rs`, sanitization and policy-denial mission tests) + close previously partial verification evidence. --- @@ -124,7 +129,8 @@ None. **SUGGESTION** (nice to have): -1. Add mission/runtime-focused coverage collection to the verify rules so threshold evaluation reflects the changed Rust mission surface directly. +1. Add mission/runtime-focused coverage collection to the verify rules so threshold evaluation + reflects the changed Rust mission surface directly. --- @@ -132,4 +138,5 @@ None. PASS WITH WARNINGS -All mission-layer spec scenarios are now verified as compliant by passing runtime evidence; only coverage threshold remains below the configured baseline. +All mission-layer spec scenarios are now verified as compliant by passing runtime evidence; only +coverage threshold remains below the configured baseline. diff --git a/openspec/changes/archive/2026-03-04-web-agent-config/archive-report.md b/openspec/changes/archive/2026-03-04-web-agent-config/archive-report.md index 69bddd147..9e46b4bda 100644 --- a/openspec/changes/archive/2026-03-04-web-agent-config/archive-report.md +++ b/openspec/changes/archive/2026-03-04-web-agent-config/archive-report.md @@ -9,24 +9,29 @@ ## Executive Summary -Completed change `web-agent-config` was archived after syncing delta specs into the main OpenSpec source of truth. No critical issues were reported in verification, so archive proceeded. Existing warnings were carried forward for follow-up. +Completed change `web-agent-config` was archived after syncing delta specs into the main OpenSpec +source of truth. No critical issues were reported in verification, so archive proceeded. Existing +warnings were carried forward for follow-up. ## Specs Synced -| Domain | Action | Details | -|---|---|---| +| Domain | Action | Details | +|--------------|---------|---------------------------------------------------------------------------------------------| | agent-config | Created | Main spec did not exist; copied full delta spec into `openspec/specs/agent-config/spec.md`. | | dashboard-ui | Created | Main spec did not exist; copied full delta spec into `openspec/specs/dashboard-ui/spec.md`. | ## Archive Operation -- Moved `openspec/changes/web-agent-config/` to `openspec/changes/archive/2026-03-04-web-agent-config/`. +- Moved `openspec/changes/web-agent-config/` to + `openspec/changes/archive/2026-03-04-web-agent-config/`. - Preserved proposal, design, tasks, specs, and verification artifacts in archive. ## Warnings Carried Forward -1. Coverage evidence below configured threshold (60%) in available Kover output (`composeApp` line coverage 7.1%), and no unified Rust + web coverage metric. -2. `make build` logs include dashboard Biome diagnostics while still succeeding, indicating lint quality gate may be too permissive. +1. Coverage evidence below configured threshold (60%) in available Kover output (`composeApp` line + coverage 7.1%), and no unified Rust + web coverage metric. +2. `make build` logs include dashboard Biome diagnostics while still succeeding, indicating lint + quality gate may be too permissive. 3. Minor design-to-implementation drift in file-change table for config validation location. ## Artifacts @@ -43,10 +48,12 @@ Completed change `web-agent-config` was archived after syncing delta specs into ## Next Recommended -- Add a combined coverage pipeline across Gradle, Rust, and dashboard surfaces before enforcing threshold as a hard archive gate. +- Add a combined coverage pipeline across Gradle, Rust, and dashboard surfaces before enforcing + threshold as a hard archive gate. - Tighten web lint/build wiring to fail aggregate build on policy-level diagnostics. - Align design file-change table with final implementation locations. ## Risks -- Remaining warnings can reduce confidence in quality-gate strictness despite compliant behavior and passing core test/build commands. +- Remaining warnings can reduce confidence in quality-gate strictness despite compliant behavior and + passing core test/build commands. diff --git a/openspec/changes/archive/2026-03-04-web-agent-config/design.md b/openspec/changes/archive/2026-03-04-web-agent-config/design.md index baf396ae4..9109a06b8 100644 --- a/openspec/changes/archive/2026-03-04-web-agent-config/design.md +++ b/openspec/changes/archive/2026-03-04-web-agent-config/design.md @@ -2,21 +2,36 @@ ## Technical Approach -To enable comprehensive configuration of the agent runtime via the web dashboard, we will modularize the monolithic `App.vue` into logically grouped Vue 3 components (e.g., General Settings, Security, External Services, Logging). On the backend, we will expand `AdminConfigView` and `AdminConfigUpdateRequest` in `clients/agent-runtime/src/gateway/admin.rs` to cover all nested fields from `config.toml`, ensuring strict deserialization and secure handling of credentials (using an "unchanged" | "replace" | "clear" strategy for secrets) before persisting them via the existing configuration save mechanism. +To enable comprehensive configuration of the agent runtime via the web dashboard, we will modularize +the monolithic `App.vue` into logically grouped Vue 3 components (e.g., General Settings, Security, +External Services, Logging). On the backend, we will expand `AdminConfigView` and +`AdminConfigUpdateRequest` in `clients/agent-runtime/src/gateway/admin.rs` to cover all nested +fields from `config.toml`, ensuring strict deserialization and secure handling of credentials (using +an "unchanged" | "replace" | "clear" strategy for secrets) before persisting them via the existing +configuration save mechanism. ## Architecture Decisions ### Decision: State Management in the Frontend -**Choice**: Use Vue 3's native Composition API (reactive/ref) with a centralized composable pattern (e.g., `useConfigStore.ts`), rather than introducing Pinia. -**Alternatives considered**: Pinia (adds unnecessary boilerplate for a relatively flat, form-heavy prototype). Prop-drilling (creates brittle and overly coupled components). -**Rationale**: The configuration state is primarily form data fetched once and synced on save. A simple composable provides enough reactivity for nested configuration components to read and update their specific slices of the configuration without adding external dependencies. +**Choice**: Use Vue 3's native Composition API (reactive/ref) with a centralized composable +pattern (e.g., `useConfigStore.ts`), rather than introducing Pinia. +**Alternatives considered**: Pinia (adds unnecessary boilerplate for a relatively flat, form-heavy +prototype). Prop-drilling (creates brittle and overly coupled components). +**Rationale**: The configuration state is primarily form data fetched once and synced on save. A +simple composable provides enough reactivity for nested configuration components to read and update +their specific slices of the configuration without adding external dependencies. ### Decision: Secret/Credential Handling Strategy -**Choice**: Use a specific `SecretMode` enum ("unchanged", "replace", "clear") alongside optional string values in the payload for credentials (like API keys). -**Alternatives considered**: Sending raw passwords (insecure), sending masked passwords and attempting to diff them (fragile and prone to accidental overwrites). -**Rationale**: This prevents the backend from ever exposing raw secrets to the frontend in `GET /web/admin/config`. The frontend can explicitly dictate the intent (e.g., keep the existing API key, replace it with a new one, or clear it out completely), ensuring secure serialization and avoiding accidental deletion of keys during partial updates. +**Choice**: Use a specific `SecretMode` enum ("unchanged", "replace", "clear") alongside optional +string values in the payload for credentials (like API keys). +**Alternatives considered**: Sending raw passwords (insecure), sending masked passwords and +attempting to diff them (fragile and prone to accidental overwrites). +**Rationale**: This prevents the backend from ever exposing raw secrets to the frontend in +`GET /web/admin/config`. The frontend can explicitly dictate the intent (e.g., keep the existing API +key, replace it with a new one, or clear it out completely), ensuring secure serialization and +avoiding accidental deletion of keys during partial updates. ## Data Flow @@ -29,56 +44,57 @@ To enable comprehensive configuration of the agent runtime via the web dashboard ## File Changes -| File | Action | Description | -|------|--------|-------------| -| `clients/web/apps/dashboard/src/App.vue` | Modify | Strip out monolithic form layout; become a router/layout shell that imports modular configuration components. | -| `clients/web/apps/dashboard/src/composables/useConfig.ts` | Create | Centralized composable for fetching, storing, and updating the configuration state. | -| `clients/web/apps/dashboard/src/components/config/GeneralSettings.vue` | Create | Vue component for default provider, model, temperature, and memory backend settings. | -| `clients/web/apps/dashboard/src/components/config/SecuritySettings.vue` | Create | Vue component for runtime kinds, autonomy levels, and gateway authentication settings. | -| `clients/web/apps/dashboard/src/components/config/ObservabilitySettings.vue` | Create | Vue component for logging, metrics, and telemetry configuration. | -| `clients/agent-runtime/src/gateway/admin.rs` | Modify | Expand `AdminConfigView` and `AdminConfigUpdateRequest` structs to encompass all `config.toml` sections (Observability, Runtime, Autonomy, Gateway, Scheduler). Handle `SecretMode` securely in the PUT endpoint. | -| `clients/agent-runtime/src/config/mod.rs` | Modify | Enhance validation logic to ensure that updates originating from `AdminConfigUpdateRequest` conform to the strict schema before invoking `save()`. | +| File | Action | Description | +|------------------------------------------------------------------------------|--------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `clients/web/apps/dashboard/src/App.vue` | Modify | Strip out monolithic form layout; become a router/layout shell that imports modular configuration components. | +| `clients/web/apps/dashboard/src/composables/useConfig.ts` | Create | Centralized composable for fetching, storing, and updating the configuration state. | +| `clients/web/apps/dashboard/src/components/config/GeneralSettings.vue` | Create | Vue component for default provider, model, temperature, and memory backend settings. | +| `clients/web/apps/dashboard/src/components/config/SecuritySettings.vue` | Create | Vue component for runtime kinds, autonomy levels, and gateway authentication settings. | +| `clients/web/apps/dashboard/src/components/config/ObservabilitySettings.vue` | Create | Vue component for logging, metrics, and telemetry configuration. | +| `clients/agent-runtime/src/gateway/admin.rs` | Modify | Expand `AdminConfigView` and `AdminConfigUpdateRequest` structs to encompass all `config.toml` sections (Observability, Runtime, Autonomy, Gateway, Scheduler). Handle `SecretMode` securely in the PUT endpoint. | +| `clients/agent-runtime/src/config/mod.rs` | Modify | Enhance validation logic to ensure that updates originating from `AdminConfigUpdateRequest` conform to the strict schema before invoking `save()`. | ## Interfaces / Contracts ## Config Coverage Matrix -| `config.toml` Area | AdminConfigView | AdminConfigUpdateRequest | Editability | Notes | -|---|---|---|---|---| -| `default_provider` | `default_provider` | `default_provider` | Editable | Trimmed; empty clears value. | -| `default_model` | `default_model` | `default_model` | Editable | Trimmed; empty clears value. | -| `api_url` | `api_url` | `api_url` | Editable | Trimmed; empty clears value. | -| `default_temperature` | `default_temperature` | `default_temperature` | Editable | Range validated `[0.0, 2.0]`. | -| `api_key` | `provider.has_api_key` | `provider.api_key` (`SecretMode`) | Editable, secret-safe | Redacted in view; supports unchanged/replace/clear. | -| `memory.backend` | `memory_backend`, `memory.backend` | `memory_backend`, `memory.backend` | Editable | Validated against allowed backends. | -| `memory.surreal.url` | `memory.surreal.url` | `memory.surreal.url` | Editable | Optional string normalization. | -| `memory.surreal.namespace` | `memory.surreal.namespace` | `memory.surreal.namespace` | Editable | Optional string normalization. | -| `memory.surreal.database` | `memory.surreal.database` | `memory.surreal.database` | Editable | Optional string normalization. | -| `memory.surreal.allow_http_loopback` | `memory.surreal.allow_http_loopback` | `memory.surreal.allow_http_loopback` | Editable | Boolean patch. | -| `memory.surreal.username/password/token` | `memory.surreal.has_*` flags | `memory.surreal.*` (`SecretMode`) | Editable, secret-safe | Values redacted in view. | -| `observability.backend` | `observability.backend` | `observability.backend` | Editable | Validated enum. | -| `observability.otel_*` | `observability.otel_endpoint`, `observability.otel_service_name` | matching fields | Editable | Optional string normalization. | -| `runtime.kind` | `runtime.kind` | `runtime.kind` | Editable | Validated enum (`native`, `docker`). | -| `autonomy.*` primary limits | `autonomy.level/workspace_only/max_actions_per_hour/max_cost_per_day_cents` | matching fields | Editable | Type/range checked. | -| `autonomy` policy flags/lists | `require_approval_for_medium_risk`, `block_high_risk_commands`, `auto_approve`, `always_ask` | matching fields | Editable | Full patch support. | -| `identity.format` | `identity.format` | `identity.format` | Editable | Validated enum (`openclaw`, `aieos`). | -| `identity.aieos_path` | `identity.aieos_path` | `identity.aieos_path` | Editable | Optional normalization. | -| `identity.aieos_inline` | `identity.has_aieos_inline` | — | Hidden/non-editable | Presence only; raw content never returned. | -| `scheduler.*` | `scheduler.enabled/max_tasks/max_concurrent` | matching fields | Editable | `max_* >= 1` validation. | -| `gateway.*` security/runtime limits | `gateway.*` (incl. token count) | matching fields | Editable | Deterministic field-level validation messages. | -| `gateway.paired_tokens` | `gateway.paired_tokens_count` | — | Hidden/non-editable | Count only; tokens never exposed. | -| `channels.cli` | `channels.cli` | `channels.cli` | Editable | Boolean patch support. | -| `channels.webhook.port` | `channels.webhook.port` | `channels.webhook.port` | Editable | Port validation. | -| `channels.webhook.secret` | `channels.webhook.has_secret` | `channels.webhook.secret` (`SecretMode`) | Editable, secret-safe | Redacted intent model. | -| `channels.webhook.enabled` | `channels.webhook.enabled` | `channels.webhook.enabled` | Editable | Creates/removes webhook block safely. | -| `composio.enabled/entity_id` | `composio.enabled/entity_id` | matching fields | Editable | `entity_id` non-empty validation. | -| `composio.api_key` | `composio.has_api_key` | `composio.api_key` (`SecretMode`) | Editable, secret-safe | Redacted in view. | -| `web_search.enabled/provider/max_results/timeout_secs` | matching fields | matching fields | Editable | Enum/range validation. | -| `web_search.brave_api_key` | `web_search.has_brave_api_key` | `web_search.brave_api_key` (`SecretMode`) | Editable, secret-safe | Redacted in view. | -| `browser.computer_use.api_key` | `browser.has_computer_use_api_key` | `browser.computer_use_api_key` (`SecretMode`) | Editable, secret-safe | Redacted in view. | -| Unrelated sections (`agent`, `mission`, `mcp`, `peripherals`, etc.) | — | — | Non-editable | Explicitly out of admin surface for this change. | +| `config.toml` Area | AdminConfigView | AdminConfigUpdateRequest | Editability | Notes | +|---------------------------------------------------------------------|----------------------------------------------------------------------------------------------|-----------------------------------------------|-----------------------|-----------------------------------------------------| +| `default_provider` | `default_provider` | `default_provider` | Editable | Trimmed; empty clears value. | +| `default_model` | `default_model` | `default_model` | Editable | Trimmed; empty clears value. | +| `api_url` | `api_url` | `api_url` | Editable | Trimmed; empty clears value. | +| `default_temperature` | `default_temperature` | `default_temperature` | Editable | Range validated `[0.0, 2.0]`. | +| `api_key` | `provider.has_api_key` | `provider.api_key` (`SecretMode`) | Editable, secret-safe | Redacted in view; supports unchanged/replace/clear. | +| `memory.backend` | `memory_backend`, `memory.backend` | `memory_backend`, `memory.backend` | Editable | Validated against allowed backends. | +| `memory.surreal.url` | `memory.surreal.url` | `memory.surreal.url` | Editable | Optional string normalization. | +| `memory.surreal.namespace` | `memory.surreal.namespace` | `memory.surreal.namespace` | Editable | Optional string normalization. | +| `memory.surreal.database` | `memory.surreal.database` | `memory.surreal.database` | Editable | Optional string normalization. | +| `memory.surreal.allow_http_loopback` | `memory.surreal.allow_http_loopback` | `memory.surreal.allow_http_loopback` | Editable | Boolean patch. | +| `memory.surreal.username/password/token` | `memory.surreal.has_*` flags | `memory.surreal.*` (`SecretMode`) | Editable, secret-safe | Values redacted in view. | +| `observability.backend` | `observability.backend` | `observability.backend` | Editable | Validated enum. | +| `observability.otel_*` | `observability.otel_endpoint`, `observability.otel_service_name` | matching fields | Editable | Optional string normalization. | +| `runtime.kind` | `runtime.kind` | `runtime.kind` | Editable | Validated enum (`native`, `docker`). | +| `autonomy.*` primary limits | `autonomy.level/workspace_only/max_actions_per_hour/max_cost_per_day_cents` | matching fields | Editable | Type/range checked. | +| `autonomy` policy flags/lists | `require_approval_for_medium_risk`, `block_high_risk_commands`, `auto_approve`, `always_ask` | matching fields | Editable | Full patch support. | +| `identity.format` | `identity.format` | `identity.format` | Editable | Validated enum (`openclaw`, `aieos`). | +| `identity.aieos_path` | `identity.aieos_path` | `identity.aieos_path` | Editable | Optional normalization. | +| `identity.aieos_inline` | `identity.has_aieos_inline` | — | Hidden/non-editable | Presence only; raw content never returned. | +| `scheduler.*` | `scheduler.enabled/max_tasks/max_concurrent` | matching fields | Editable | `max_* >= 1` validation. | +| `gateway.*` security/runtime limits | `gateway.*` (incl. token count) | matching fields | Editable | Deterministic field-level validation messages. | +| `gateway.paired_tokens` | `gateway.paired_tokens_count` | — | Hidden/non-editable | Count only; tokens never exposed. | +| `channels.cli` | `channels.cli` | `channels.cli` | Editable | Boolean patch support. | +| `channels.webhook.port` | `channels.webhook.port` | `channels.webhook.port` | Editable | Port validation. | +| `channels.webhook.secret` | `channels.webhook.has_secret` | `channels.webhook.secret` (`SecretMode`) | Editable, secret-safe | Redacted intent model. | +| `channels.webhook.enabled` | `channels.webhook.enabled` | `channels.webhook.enabled` | Editable | Creates/removes webhook block safely. | +| `composio.enabled/entity_id` | `composio.enabled/entity_id` | matching fields | Editable | `entity_id` non-empty validation. | +| `composio.api_key` | `composio.has_api_key` | `composio.api_key` (`SecretMode`) | Editable, secret-safe | Redacted in view. | +| `web_search.enabled/provider/max_results/timeout_secs` | matching fields | matching fields | Editable | Enum/range validation. | +| `web_search.brave_api_key` | `web_search.has_brave_api_key` | `web_search.brave_api_key` (`SecretMode`) | Editable, secret-safe | Redacted in view. | +| `browser.computer_use.api_key` | `browser.has_computer_use_api_key` | `browser.computer_use_api_key` (`SecretMode`) | Editable, secret-safe | Redacted in view. | +| Unrelated sections (`agent`, `mission`, `mcp`, `peripherals`, etc.) | — | — | Non-editable | Explicitly out of admin surface for this change. | **Frontend Types (TypeScript)** + ```typescript type SecretMode = "unchanged" | "replace" | "clear"; @@ -99,11 +115,12 @@ interface AdminConfigUpdateRequest { scheduler?: AdminSchedulerUpdate; gateway?: AdminGatewayUpdate; // Credentials use SecretUpdate to avoid accidental exposure/overwrite - api_keys?: Record; + api_keys?: Record; } ``` **Backend Types (Rust)** + ```rust #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] @@ -137,18 +154,21 @@ pub struct AdminConfigUpdateRequest { ## Testing Strategy -| Layer | What to Test | Approach | -|-------|-------------|----------| -| Unit | Frontend config composable (`useConfig.ts`) | Verify state updates, fetch actions, and payload generation for PUT requests using Vitest. | -| Unit | Backend payload deserialization | Assert that `AdminConfigUpdateRequest` correctly parses JSON with missing optional fields and `SecretMode` enum values in Rust. | -| Integration | `GET/PUT /web/admin/config` endpoints | Spin up a test Axum server with a mock `config.toml`. Ensure GET strips secrets. Ensure PUT safely replaces or clears secrets according to `SecretMode` without corrupting the file. | -| E2E | Dashboard Form Submission | Use Playwright to load the dashboard, modify a setting (e.g., toggle an autonomy level), submit the form, and verify the success toast and subsequent GET response. | +| Layer | What to Test | Approach | +|-------------|---------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Unit | Frontend config composable (`useConfig.ts`) | Verify state updates, fetch actions, and payload generation for PUT requests using Vitest. | +| Unit | Backend payload deserialization | Assert that `AdminConfigUpdateRequest` correctly parses JSON with missing optional fields and `SecretMode` enum values in Rust. | +| Integration | `GET/PUT /web/admin/config` endpoints | Spin up a test Axum server with a mock `config.toml`. Ensure GET strips secrets. Ensure PUT safely replaces or clears secrets according to `SecretMode` without corrupting the file. | +| E2E | Dashboard Form Submission | Use Playwright to load the dashboard, modify a setting (e.g., toggle an autonomy level), submit the form, and verify the success toast and subsequent GET response. | ## Migration / Rollout -No data migration required. The changes affect the serialization/deserialization boundaries of the admin UI and the API gateway, resting on the existing `config.toml` structure. +No data migration required. The changes affect the serialization/deserialization boundaries of the +admin UI and the API gateway, resting on the existing `config.toml` structure. ## Open Questions -- [ ] Will the agent runtime require a soft restart/reload signal when the configuration is saved via `PUT /web/admin/config`, or do all internal subsystems already hot-reload effectively? -- [ ] Do we need a dedicated validation endpoint (`POST /web/admin/config/validate`) before saving to provide immediate form feedback, or is the error response from the PUT request sufficient? +- [ ] Will the agent runtime require a soft restart/reload signal when the configuration is saved + via `PUT /web/admin/config`, or do all internal subsystems already hot-reload effectively? +- [ ] Do we need a dedicated validation endpoint (`POST /web/admin/config/validate`) before saving + to provide immediate form feedback, or is the error response from the PUT request sufficient? diff --git a/openspec/changes/archive/2026-03-04-web-agent-config/proposal.md b/openspec/changes/archive/2026-03-04-web-agent-config/proposal.md index 919c83e12..4979b01a9 100644 --- a/openspec/changes/archive/2026-03-04-web-agent-config/proposal.md +++ b/openspec/changes/archive/2026-03-04-web-agent-config/proposal.md @@ -2,48 +2,61 @@ ## Intent -Enable comprehensive configuration of the agent runtime via the web dashboard by expanding backend payload support and modularizing the frontend Vue prototype. This will allow users to securely and easily configure all features available in `config.toml` from a graphical interface. +Enable comprehensive configuration of the agent runtime via the web dashboard by expanding backend +payload support and modularizing the frontend Vue prototype. This will allow users to securely and +easily configure all features available in `config.toml` from a graphical interface. ## Scope ### In Scope -- Refactoring the massive single-file Vue prototype (`App.vue`) in `clients/web/apps/dashboard/` into modular components. -- Expanding payload support in `AdminConfigUpdateRequest` and `AdminConfigView` within `clients/agent-runtime/src/gateway/` to match all features from `config.toml`. -- Ensuring tight schema validation on the backend (`clients/agent-runtime/src/config/`) for updates originating from the web client. + +- Refactoring the massive single-file Vue prototype (`App.vue`) in `clients/web/apps/dashboard/` + into modular components. +- Expanding payload support in `AdminConfigUpdateRequest` and `AdminConfigView` within + `clients/agent-runtime/src/gateway/` to match all features from `config.toml`. +- Ensuring tight schema validation on the backend (`clients/agent-runtime/src/config/`) for updates + originating from the web client. - Securely handling credentials using the existing `save()` mechanism (which encrypts credentials). ### Out of Scope + - Adding new configuration features to `config.toml` that do not currently exist. - Overhauling the core pairing logic (`POST /pair`) beyond expanding its payload support. - Changes to non-agent components of the system. ## Approach -1. **Frontend**: Break down `App.vue` into logical, feature-specific Vue components (e.g., `NetworkSettings`, `SecuritySettings`, `LLMConfig`). Create a unified state management layer to handle the complex nested object structure of the configuration. -2. **Backend Gateway**: Update the Axum structs (`AdminConfigUpdateRequest`, `AdminConfigView`) to fully represent the `config.toml` schema. Update `GET/PUT /web/admin/config` and `GET /web/admin/options` handlers to map these new fields. -3. **Backend Config Management**: Ensure the trait-driven loader and validation logic comprehensively validate the expanded payloads before invoking `save()`. +1. **Frontend**: Break down `App.vue` into logical, feature-specific Vue components (e.g., + `NetworkSettings`, `SecuritySettings`, `LLMConfig`). Create a unified state management layer to + handle the complex nested object structure of the configuration. +2. **Backend Gateway**: Update the Axum structs (`AdminConfigUpdateRequest`, `AdminConfigView`) to + fully represent the `config.toml` schema. Update `GET/PUT /web/admin/config` and + `GET /web/admin/options` handlers to map these new fields. +3. **Backend Config Management**: Ensure the trait-driven loader and validation logic + comprehensively validate the expanded payloads before invoking `save()`. ## Affected Areas -| Area | Impact | Description | -|------|--------|-------------| -| `clients/web/apps/dashboard/src/App.vue` | Modified | Refactored into smaller components | -| `clients/web/apps/dashboard/src/components/` | New | New modular config components | -| `clients/agent-runtime/src/gateway/` | Modified | Updated Axum endpoints and structs | -| `clients/agent-runtime/src/config/` | Modified | Enhanced validation logic for `config.toml` | +| Area | Impact | Description | +|----------------------------------------------|----------|---------------------------------------------| +| `clients/web/apps/dashboard/src/App.vue` | Modified | Refactored into smaller components | +| `clients/web/apps/dashboard/src/components/` | New | New modular config components | +| `clients/agent-runtime/src/gateway/` | Modified | Updated Axum endpoints and structs | +| `clients/agent-runtime/src/config/` | Modified | Enhanced validation logic for `config.toml` | ## Risks -| Risk | Likelihood | Mitigation | -|------|------------|------------| -| Frontend state management complexity for nested configs | Medium | Implement robust state management (e.g., Pinia) or careful prop-drilling with clear types. | -| Validation mismatch between frontend and backend | Low | Use shared schema/types where possible, and enforce strict backend validation. | -| Credential exposure during transit or save | Low | Rely on existing encrypted `save()` mechanism and ensure HTTPS/secure transport for endpoints. | +| Risk | Likelihood | Mitigation | +|---------------------------------------------------------|------------|------------------------------------------------------------------------------------------------| +| Frontend state management complexity for nested configs | Medium | Implement robust state management (e.g., Pinia) or careful prop-drilling with clear types. | +| Validation mismatch between frontend and backend | Low | Use shared schema/types where possible, and enforce strict backend validation. | +| Credential exposure during transit or save | Low | Rely on existing encrypted `save()` mechanism and ensure HTTPS/secure transport for endpoints. | ## Rollback Plan - Revert frontend changes to the original `App.vue` prototype state. -- Revert backend struct changes (`AdminConfigUpdateRequest`, `AdminConfigView`) to their previous, limited schema. +- Revert backend struct changes (`AdminConfigUpdateRequest`, `AdminConfigView`) to their previous, + limited schema. - Revert any specific validation logic added for the expanded fields. ## Dependencies @@ -56,4 +69,4 @@ Enable comprehensive configuration of the agent runtime via the web dashboard by - [ ] Users can modify and successfully save all `config.toml` settings via the web dashboard. - [ ] The backend correctly validates all incoming configuration updates before saving. - [ ] Credentials are saved securely using the encrypted `save()` mechanism. -- [ ] The frontend `App.vue` is modularized into maintainable components. \ No newline at end of file +- [ ] The frontend `App.vue` is modularized into maintainable components. diff --git a/openspec/changes/archive/2026-03-04-web-agent-config/specs/agent-config/spec.md b/openspec/changes/archive/2026-03-04-web-agent-config/specs/agent-config/spec.md index 59c338559..87d85d0fe 100644 --- a/openspec/changes/archive/2026-03-04-web-agent-config/specs/agent-config/spec.md +++ b/openspec/changes/archive/2026-03-04-web-agent-config/specs/agent-config/spec.md @@ -4,13 +4,16 @@ ### Requirement: Comprehensive Configuration Payload Support -The backend configuration update payload (`AdminConfigUpdateRequest`) MUST support all fields defined in the system's `config.toml`. -The backend MUST securely accept updates to server settings, agent identity details, and LLM provider configurations. +The backend configuration update payload (`AdminConfigUpdateRequest`) MUST support all fields +defined in the system's `config.toml`. +The backend MUST securely accept updates to server settings, agent identity details, and LLM +provider configurations. #### Scenario: Full configuration update via API - GIVEN an authenticated admin client -- WHEN a POST request is sent to the configuration update endpoint with a complete JSON payload representing all `config.toml` fields +- WHEN a POST request is sent to the configuration update endpoint with a complete JSON payload + representing all `config.toml` fields - THEN the server validates the payload - AND successfully updates the running configuration and persists it to `config.toml` - AND responds with a 200 OK status containing the updated `AdminConfigView` @@ -31,11 +34,14 @@ The backend MUST provide dedicated endpoints and payloads for handling gateway c ### Requirement: Admin Configuration View (Previously: Partial config view) -The `AdminConfigView` payload MUST return a comprehensive representation of the current configuration, matching the expanded scope of the update request, while stripping any sensitive credentials (e.g., raw API keys). +The `AdminConfigView` payload MUST return a comprehensive representation of the current +configuration, matching the expanded scope of the update request, while stripping any sensitive +credentials (e.g., raw API keys). #### Scenario: Fetching current configuration - GIVEN an authenticated admin client - WHEN a GET request is made to the configuration endpoint -- THEN the server returns an `AdminConfigView` object containing all public configuration fields from `config.toml` +- THEN the server returns an `AdminConfigView` object containing all public configuration fields + from `config.toml` - AND sensitive fields (like `provider_api_key`) are omitted or masked diff --git a/openspec/changes/archive/2026-03-04-web-agent-config/specs/dashboard-ui/spec.md b/openspec/changes/archive/2026-03-04-web-agent-config/specs/dashboard-ui/spec.md index 82cb954d2..b21e147e5 100644 --- a/openspec/changes/archive/2026-03-04-web-agent-config/specs/dashboard-ui/spec.md +++ b/openspec/changes/archive/2026-03-04-web-agent-config/specs/dashboard-ui/spec.md @@ -4,14 +4,17 @@ ### Requirement: Modular Configuration Components -The Dashboard UI MUST modularize the configuration interface into separate, focused components rather than a single monolithic view. -The system MUST render distinct components for Server Settings, Agent Identity, and LLM Provider configuration. +The Dashboard UI MUST modularize the configuration interface into separate, focused components +rather than a single monolithic view. +The system MUST render distinct components for Server Settings, Agent Identity, and LLM Provider +configuration. #### Scenario: User views configuration dashboard - GIVEN the user is authenticated and viewing the agent dashboard - WHEN the configuration page is loaded -- THEN the interface displays separate configuration cards or tabs for Server, Identity, and Provider settings +- THEN the interface displays separate configuration cards or tabs for Server, Identity, and + Provider settings ### Requirement: Gateway Pairing Management @@ -22,13 +25,15 @@ The Dashboard UI MUST provide an interface to manage the gateway connection and - GIVEN the agent is not paired with a gateway - WHEN the user initiates the pairing process via the dashboard - THEN the UI displays a pairing token input -- AND upon submission, the UI indicates pairing in progress and updates to paired status upon success +- AND upon submission, the UI indicates pairing in progress and updates to paired status upon + success ## MODIFIED Requirements ### Requirement: Configuration Form State (Previously: Single App.vue form) -The configuration form state MUST be managed across multiple components using shared state or proper prop/event delegation, replacing the monolithic App.vue state. +The configuration form state MUST be managed across multiple components using shared state or proper +prop/event delegation, replacing the monolithic App.vue state. #### Scenario: Updating a specific configuration section diff --git a/openspec/changes/archive/2026-03-04-web-agent-config/tasks.md b/openspec/changes/archive/2026-03-04-web-agent-config/tasks.md index 6a3d2cde8..4b748c5c5 100644 --- a/openspec/changes/archive/2026-03-04-web-agent-config/tasks.md +++ b/openspec/changes/archive/2026-03-04-web-agent-config/tasks.md @@ -2,38 +2,93 @@ ## Phase 1: Contract Baseline and Safety Guards -- [x] 1.1 Add a config coverage matrix in `openspec/changes/web-agent-config/design.md` mapping every editable `config.toml` section from `clients/agent-runtime/src/config/schema.rs` to `AdminConfigView` and `AdminConfigUpdateRequest` fields in `clients/agent-runtime/src/gateway/admin.rs` (including explicit non-editable/hidden fields). -- [x] 1.2 Create shared dashboard admin-config types in `clients/web/apps/dashboard/src/types/admin-config.ts` (view, update, nested section patches, and `SecretMode = "unchanged" | "replace" | "clear"`) aligned to backend JSON contracts. -- [x] 1.3 Add a focused Rust contract test module (RED) under `clients/agent-runtime/src/gateway/admin.rs` that fails when newly defined config sections are missing in `admin_config_view()` serialization or `AdminConfigUpdateRequest` deserialization. -- [x] 1.4 Implement the minimum gateway contract updates (GREEN) in `clients/agent-runtime/src/gateway/admin.rs` to satisfy the new coverage tests without exposing raw secrets, then refactor repeated mapping helpers (REFACTOR). +- [x] 1.1 Add a config coverage matrix in `openspec/changes/web-agent-config/design.md` mapping + every editable `config.toml` section from `clients/agent-runtime/src/config/schema.rs` to + `AdminConfigView` and `AdminConfigUpdateRequest` fields in + `clients/agent-runtime/src/gateway/admin.rs` (including explicit non-editable/hidden fields). +- [x] 1.2 Create shared dashboard admin-config types in + `clients/web/apps/dashboard/src/types/admin-config.ts` (view, update, nested section patches, and + `SecretMode = "unchanged" | "replace" | "clear"`) aligned to backend JSON contracts. +- [x] 1.3 Add a focused Rust contract test module (RED) under + `clients/agent-runtime/src/gateway/admin.rs` that fails when newly defined config sections are + missing in `admin_config_view()` serialization or `AdminConfigUpdateRequest` deserialization. +- [x] 1.4 Implement the minimum gateway contract updates (GREEN) in + `clients/agent-runtime/src/gateway/admin.rs` to satisfy the new coverage tests without exposing + raw secrets, then refactor repeated mapping helpers (REFACTOR). ## Phase 2: Frontend Modularization (Dashboard) -- [x] 2.1 Extract API/state orchestration from `clients/web/apps/dashboard/src/App.vue` into `clients/web/apps/dashboard/src/composables/useConfig.ts` with fetch/connect/save actions, per-section saving flags, and diff-based payload builders. -- [x] 2.2 Create modular config components under `clients/web/apps/dashboard/src/components/config/` for `GeneralSettings.vue`, `SecuritySettings.vue`, `ObservabilitySettings.vue`, `RuntimeSettings.vue`, `SchedulerSettings.vue`, `GatewaySettings.vue`, and `WebhookSettings.vue` using typed props/events from `src/types/admin-config.ts`. -- [x] 2.3 Refactor `clients/web/apps/dashboard/src/App.vue` into a layout/container shell that wires authentication/pairing controls plus modular config sections via `useConfig.ts`, preserving existing i18n keys and current UX copy. -- [x] 2.4 Add a secret intent UI contract in `clients/web/apps/dashboard/src/components/config/WebhookSettings.vue` and `clients/web/apps/dashboard/src/composables/useConfig.ts` so unchanged/replace/clear flows cannot emit ambiguous payloads (replace requires non-empty value, clear sends no value). -- [x] 2.5 Move pure payload/diff logic into `clients/web/apps/dashboard/src/composables/configPayload.ts` and keep components presentational to reduce App-level coupling and support isolated unit tests. +- [x] 2.1 Extract API/state orchestration from `clients/web/apps/dashboard/src/App.vue` into + `clients/web/apps/dashboard/src/composables/useConfig.ts` with fetch/connect/save actions, + per-section saving flags, and diff-based payload builders. +- [x] 2.2 Create modular config components under `clients/web/apps/dashboard/src/components/config/` + for `GeneralSettings.vue`, `SecuritySettings.vue`, `ObservabilitySettings.vue`, + `RuntimeSettings.vue`, `SchedulerSettings.vue`, `GatewaySettings.vue`, and `WebhookSettings.vue` + using typed props/events from `src/types/admin-config.ts`. +- [x] 2.3 Refactor `clients/web/apps/dashboard/src/App.vue` into a layout/container shell that wires + authentication/pairing controls plus modular config sections via `useConfig.ts`, preserving + existing i18n keys and current UX copy. +- [x] 2.4 Add a secret intent UI contract in + `clients/web/apps/dashboard/src/components/config/WebhookSettings.vue` and + `clients/web/apps/dashboard/src/composables/useConfig.ts` so unchanged/replace/clear flows cannot + emit ambiguous payloads (replace requires non-empty value, clear sends no value). +- [x] 2.5 Move pure payload/diff logic into + `clients/web/apps/dashboard/src/composables/configPayload.ts` and keep components presentational + to reduce App-level coupling and support isolated unit tests. ## Phase 3: Backend Expansion, Validation, and Persistence -- [x] 3.1 Expand `AdminConfigView` in `clients/agent-runtime/src/gateway/admin.rs` to represent the full intended admin-editable `config.toml` surface (defaults, runtime, autonomy, scheduler, gateway, channels/webhook, observability, identity/provider-related fields) while masking/omitting all sensitive values. -- [x] 3.2 Expand `AdminConfigUpdateRequest` and nested patch structs in `clients/agent-runtime/src/gateway/admin.rs` to accept the same full editable surface, keeping optional partial updates and strict serde behavior for unknown/invalid shapes. -- [x] 3.3 Generalize secret update handling in `clients/agent-runtime/src/gateway/admin.rs` by introducing reusable secret patch application (`Unchanged`, `Replace { value }`, `Clear`) for every secret-bearing field currently persisted through `Config::save()` in `clients/agent-runtime/src/config/schema.rs`. -- [x] 3.4 Add centralized patch validation helpers in `clients/agent-runtime/src/gateway/admin.rs` for bounds/ranges/enums/empties (ports, temperatures, rate limits, backend enums, host constraints) and return deterministic 400 errors with field-specific messages. -- [x] 3.5 Update restart-required detection in `clients/agent-runtime/src/gateway/admin.rs` so conflict reporting remains correct for all newly supported fields, including secret-change intent and normalized values. -- [x] 3.6 Ensure persistence flow in `clients/agent-runtime/src/gateway/admin.rs` performs: parse -> validate -> apply -> `validate_for_runtime()` in `clients/agent-runtime/src/config/schema.rs` -> `save()` -> in-memory swap, with rollback on failed save and no partial in-memory mutation. +- [x] 3.1 Expand `AdminConfigView` in `clients/agent-runtime/src/gateway/admin.rs` to represent the + full intended admin-editable `config.toml` surface (defaults, runtime, autonomy, scheduler, + gateway, channels/webhook, observability, identity/provider-related fields) while masking/omitting + all sensitive values. +- [x] 3.2 Expand `AdminConfigUpdateRequest` and nested patch structs in + `clients/agent-runtime/src/gateway/admin.rs` to accept the same full editable surface, keeping + optional partial updates and strict serde behavior for unknown/invalid shapes. +- [x] 3.3 Generalize secret update handling in `clients/agent-runtime/src/gateway/admin.rs` by + introducing reusable secret patch application (`Unchanged`, `Replace { value }`, `Clear`) for + every secret-bearing field currently persisted through `Config::save()` in + `clients/agent-runtime/src/config/schema.rs`. +- [x] 3.4 Add centralized patch validation helpers in `clients/agent-runtime/src/gateway/admin.rs` + for bounds/ranges/enums/empties (ports, temperatures, rate limits, backend enums, host + constraints) and return deterministic 400 errors with field-specific messages. +- [x] 3.5 Update restart-required detection in `clients/agent-runtime/src/gateway/admin.rs` so + conflict reporting remains correct for all newly supported fields, including secret-change intent + and normalized values. +- [x] 3.6 Ensure persistence flow in `clients/agent-runtime/src/gateway/admin.rs` performs: parse -> + validate -> apply -> `validate_for_runtime()` in `clients/agent-runtime/src/config/schema.rs` -> + `save()` -> in-memory swap, with rollback on failed save and no partial in-memory mutation. ## Phase 4: Automated Testing (Unit, Integration, E2E) -- [x] 4.1 Add frontend composable unit tests (RED/GREEN) in `clients/web/apps/dashboard/src/composables/useConfig.spec.ts` covering initial fetch mapping, section-scoped save state, diff payload generation, and secret mode payload rules. -- [x] 4.2 Split and update UI tests from `clients/web/apps/dashboard/src/App.spec.ts` into component-focused specs under `clients/web/apps/dashboard/src/components/config/*.spec.ts` verifying modular rendering (Server/Identity/Provider-focused sections) and module-local validation feedback. -- [x] 4.3 Add Rust unit tests in `clients/agent-runtime/src/gateway/admin.rs` for expanded request/view serde, secret update transitions (unchanged/replace/clear), and field-level validation errors for invalid ranges or malformed payloads. -- [x] 4.4 Add gateway integration tests in `clients/agent-runtime/tests/admin_config_api_integration.rs` for `GET /web/admin/config` and `PUT /web/admin/config`, asserting full-field round-trip updates, secret redaction in GET, secure secret mutation behavior, and persistence rollback on save failure. -- [x] 4.5 Add dashboard E2E coverage using Playwright in `clients/web/apps/dashboard/e2e/admin-config.spec.ts` plus `clients/web/apps/dashboard/playwright.config.ts` to validate end-to-end edit/save flows and secret mode transitions against a test gateway fixture. -- [x] 4.6 Wire test scripts in `clients/web/apps/dashboard/package.json` and `clients/web/package.json` (for example `test:e2e`) and run verification stack (`pnpm --filter @corvus/dashboard test`, E2E suite, and targeted Rust tests for admin config endpoints). +- [x] 4.1 Add frontend composable unit tests (RED/GREEN) in + `clients/web/apps/dashboard/src/composables/useConfig.spec.ts` covering initial fetch mapping, + section-scoped save state, diff payload generation, and secret mode payload rules. +- [x] 4.2 Split and update UI tests from `clients/web/apps/dashboard/src/App.spec.ts` into + component-focused specs under `clients/web/apps/dashboard/src/components/config/*.spec.ts` + verifying modular rendering (Server/Identity/Provider-focused sections) and module-local + validation feedback. +- [x] 4.3 Add Rust unit tests in `clients/agent-runtime/src/gateway/admin.rs` for expanded + request/view serde, secret update transitions (unchanged/replace/clear), and field-level + validation errors for invalid ranges or malformed payloads. +- [x] 4.4 Add gateway integration tests in + `clients/agent-runtime/tests/admin_config_api_integration.rs` for `GET /web/admin/config` and + `PUT /web/admin/config`, asserting full-field round-trip updates, secret redaction in GET, secure + secret mutation behavior, and persistence rollback on save failure. +- [x] 4.5 Add dashboard E2E coverage using Playwright in + `clients/web/apps/dashboard/e2e/admin-config.spec.ts` plus + `clients/web/apps/dashboard/playwright.config.ts` to validate end-to-end edit/save flows and + secret mode transitions against a test gateway fixture. +- [x] 4.6 Wire test scripts in `clients/web/apps/dashboard/package.json` and + `clients/web/package.json` (for example `test:e2e`) and run verification stack ( + `pnpm --filter @corvus/dashboard test`, E2E suite, and targeted Rust tests for admin config + endpoints). ## Phase 5: Final Verification and Handoff -- [x] 5.1 Execute full regression for touched surfaces (`cargo test -p agent-runtime admin`, `pnpm --filter @corvus/dashboard test`, dashboard E2E) and then repository baseline `make test`; capture failures/fixes in the same files before completion. -- [x] 5.2 Update `openspec/changes/web-agent-config/design.md` and this `openspec/changes/web-agent-config/tasks.md` with any final field mapping deltas discovered during implementation, ensuring all spec scenarios are explicitly marked test-covered. +- [x] 5.1 Execute full regression for touched surfaces (`cargo test -p agent-runtime admin`, + `pnpm --filter @corvus/dashboard test`, dashboard E2E) and then repository baseline `make test`; + capture failures/fixes in the same files before completion. +- [x] 5.2 Update `openspec/changes/web-agent-config/design.md` and this + `openspec/changes/web-agent-config/tasks.md` with any final field mapping deltas discovered during + implementation, ensuring all spec scenarios are explicitly marked test-covered. diff --git a/openspec/changes/archive/2026-03-04-web-agent-config/verify-report.md b/openspec/changes/archive/2026-03-04-web-agent-config/verify-report.md index acced6d23..02a9346b0 100644 --- a/openspec/changes/archive/2026-03-04-web-agent-config/verify-report.md +++ b/openspec/changes/archive/2026-03-04-web-agent-config/verify-report.md @@ -1,17 +1,17 @@ ## Verification Report -**Change**: web-agent-config +**Change**: web-agent-config **Version**: N/A (delta specs) --- ### Completeness -| Metric | Value | -|--------|-------| -| Tasks total | 23 | -| Tasks complete | 23 | -| Tasks incomplete | 0 | +| Metric | Value | +|------------------|-------| +| Tasks total | 23 | +| Tasks complete | 23 | +| Tasks incomplete | 0 | All checklist items in `openspec/changes/web-agent-config/tasks.md` are marked complete. @@ -41,22 +41,25 @@ BUILD SUCCESSFUL in 14s - `pnpm --filter @corvus/dashboard test:e2e` -> ✅ passed (2 passed, 0 failed) - `pnpm --filter @corvus/dashboard build` -> ✅ passed (`vue-tsc -b && vite build`) -**Coverage**: ⚠️ Below threshold / partial stack coverage -Configured threshold: 60% (`openspec/config.yaml`) -`make test-coverage` succeeded for configured Gradle/Kover reports, but available report for `composeApp` shows line coverage **7.1%** (`clients/composeApp/build/reports/kover/html/index.html`), below the configured threshold. Coverage for Rust + dashboard test surfaces is not included in this single threshold output. +**Coverage**: ⚠️ Below threshold / partial stack coverage +Configured threshold: 60% (`openspec/config.yaml`) +`make test-coverage` succeeded for configured Gradle/Kover reports, but available report for +`composeApp` shows line coverage **7.1%** ( +`clients/composeApp/build/reports/kover/html/index.html`), below the configured threshold. Coverage +for Rust + dashboard test surfaces is not included in this single threshold output. --- ### Spec Compliance Matrix -| Requirement | Scenario | Test | Result | -|-------------|----------|------|--------| -| Comprehensive Configuration Payload Support | Full configuration update via API | `clients/agent-runtime/tests/admin_config_api_integration.rs > put_admin_config_updates_and_persists` | ✅ COMPLIANT | -| Secure Gateway Pairing Payload | Secure pairing token submission | `clients/agent-runtime/src/gateway/mod.rs > pair_endpoint_allows_unpaired_runtime_to_auth_admin_endpoint` | ✅ COMPLIANT | -| Admin Configuration View | Fetching current configuration | `clients/agent-runtime/tests/admin_config_api_integration.rs > get_admin_config_redacts_secrets` | ✅ COMPLIANT | -| Modular Configuration Components | User views configuration dashboard | `clients/web/apps/dashboard/src/App.spec.ts > renders modular config sections` | ✅ COMPLIANT | -| Gateway Pairing Management | Unpaired agent pairing | `clients/web/apps/dashboard/e2e/admin-config.spec.ts > pairs an unpaired agent and connects with issued token` | ✅ COMPLIANT | -| Configuration Form State | Updating a specific configuration section | `clients/web/apps/dashboard/src/composables/useConfig.spec.ts > tracks section saving and sends diff-only payload` | ✅ COMPLIANT | +| Requirement | Scenario | Test | Result | +|---------------------------------------------|-------------------------------------------|--------------------------------------------------------------------------------------------------------------------|-------------| +| Comprehensive Configuration Payload Support | Full configuration update via API | `clients/agent-runtime/tests/admin_config_api_integration.rs > put_admin_config_updates_and_persists` | ✅ COMPLIANT | +| Secure Gateway Pairing Payload | Secure pairing token submission | `clients/agent-runtime/src/gateway/mod.rs > pair_endpoint_allows_unpaired_runtime_to_auth_admin_endpoint` | ✅ COMPLIANT | +| Admin Configuration View | Fetching current configuration | `clients/agent-runtime/tests/admin_config_api_integration.rs > get_admin_config_redacts_secrets` | ✅ COMPLIANT | +| Modular Configuration Components | User views configuration dashboard | `clients/web/apps/dashboard/src/App.spec.ts > renders modular config sections` | ✅ COMPLIANT | +| Gateway Pairing Management | Unpaired agent pairing | `clients/web/apps/dashboard/e2e/admin-config.spec.ts > pairs an unpaired agent and connects with issued token` | ✅ COMPLIANT | +| Configuration Form State | Updating a specific configuration section | `clients/web/apps/dashboard/src/composables/useConfig.spec.ts > tracks section saving and sends diff-only payload` | ✅ COMPLIANT | **Compliance summary**: 6/6 scenarios compliant @@ -64,24 +67,24 @@ Configured threshold: 60% (`openspec/config.yaml`) ### Correctness (Static - Structural Evidence) -| Requirement | Status | Notes | -|------------|--------|-------| -| Comprehensive Configuration Payload Support | ✅ Implemented | Expanded request/view contracts and nested patch structs present in `clients/agent-runtime/src/gateway/admin.rs` with strict `deny_unknown_fields` behavior. | -| Secure Gateway Pairing Payload | ✅ Implemented | Pair endpoint + token persistence + post-pair authenticated admin access implemented/tested in `clients/agent-runtime/src/gateway/mod.rs`. | -| Admin Configuration View | ✅ Implemented | `AdminConfigView` exposes broad editable/public fields and redacts secrets via `has_*` flags in `clients/agent-runtime/src/gateway/admin.rs`. | -| Modular Configuration Components | ✅ Implemented | `clients/web/apps/dashboard/src/App.vue` composes modular settings components under `clients/web/apps/dashboard/src/components/config/`. | -| Gateway Pairing Management | ✅ Implemented | Pairing controls/state/actions are wired in `clients/web/apps/dashboard/src/composables/useConfig.ts` and exercised in E2E. | -| Configuration Form State | ✅ Implemented | Shared state + section-specific save flags + diff payload logic implemented in `clients/web/apps/dashboard/src/composables/useConfig.ts` and `clients/web/apps/dashboard/src/composables/configPayload.ts`. | +| Requirement | Status | Notes | +|---------------------------------------------|---------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Comprehensive Configuration Payload Support | ✅ Implemented | Expanded request/view contracts and nested patch structs present in `clients/agent-runtime/src/gateway/admin.rs` with strict `deny_unknown_fields` behavior. | +| Secure Gateway Pairing Payload | ✅ Implemented | Pair endpoint + token persistence + post-pair authenticated admin access implemented/tested in `clients/agent-runtime/src/gateway/mod.rs`. | +| Admin Configuration View | ✅ Implemented | `AdminConfigView` exposes broad editable/public fields and redacts secrets via `has_*` flags in `clients/agent-runtime/src/gateway/admin.rs`. | +| Modular Configuration Components | ✅ Implemented | `clients/web/apps/dashboard/src/App.vue` composes modular settings components under `clients/web/apps/dashboard/src/components/config/`. | +| Gateway Pairing Management | ✅ Implemented | Pairing controls/state/actions are wired in `clients/web/apps/dashboard/src/composables/useConfig.ts` and exercised in E2E. | +| Configuration Form State | ✅ Implemented | Shared state + section-specific save flags + diff payload logic implemented in `clients/web/apps/dashboard/src/composables/useConfig.ts` and `clients/web/apps/dashboard/src/composables/configPayload.ts`. | --- ### Coherence (Design) -| Decision | Followed? | Notes | -|----------|-----------|-------| -| Frontend state via Composition API composable | ✅ Yes | Implemented via `clients/web/apps/dashboard/src/composables/useConfig.ts`. | -| Secret intent model (`unchanged`/`replace`/`clear`) | ✅ Yes | Implemented in frontend types/payload builder and backend secret patch handling. | -| File changes table alignment | ⚠️ Minor deviation | Design still references `clients/agent-runtime/src/config/mod.rs` for validation enhancement; most validation is implemented in `clients/agent-runtime/src/gateway/admin.rs` plus runtime schema validation call path. | +| Decision | Followed? | Notes | +|-----------------------------------------------------|--------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Frontend state via Composition API composable | ✅ Yes | Implemented via `clients/web/apps/dashboard/src/composables/useConfig.ts`. | +| Secret intent model (`unchanged`/`replace`/`clear`) | ✅ Yes | Implemented in frontend types/payload builder and backend secret patch handling. | +| File changes table alignment | ⚠️ Minor deviation | Design still references `clients/agent-runtime/src/config/mod.rs` for validation enhancement; most validation is implemented in `clients/agent-runtime/src/gateway/admin.rs` plus runtime schema validation call path. | --- @@ -93,14 +96,19 @@ None. **WARNING** (should fix): -1. Coverage threshold evidence is currently below configured 60% in available Kover line metric (`composeApp` 7.1%) and does not represent a unified Rust + web coverage view. -2. `make build` reports dashboard Biome diagnostics in logs while still ending successfully; quality-gate behavior may be weaker than intended for lint enforcement. -3. Minor design-to-implementation drift remains in the file-change table for config validation location. +1. Coverage threshold evidence is currently below configured 60% in available Kover line metric ( + `composeApp` 7.1%) and does not represent a unified Rust + web coverage view. +2. `make build` reports dashboard Biome diagnostics in logs while still ending successfully; + quality-gate behavior may be weaker than intended for lint enforcement. +3. Minor design-to-implementation drift remains in the file-change table for config validation + location. **SUGGESTION** (nice to have): -1. Add an explicit combined coverage pipeline (Rust + dashboard + Gradle) if threshold-based verification is expected to gate this change. -2. Tighten web lint/build task wiring so diagnostics fail the aggregate build when policy requires it. +1. Add an explicit combined coverage pipeline (Rust + dashboard + Gradle) if threshold-based + verification is expected to gate this change. +2. Tighten web lint/build task wiring so diagnostics fail the aggregate build when policy requires + it. --- @@ -108,4 +116,6 @@ None. **PASS WITH WARNINGS** -All spec scenarios are behaviorally compliant with passing runtime evidence (including pairing scenario and dashboard E2E), and required build/test commands pass; remaining risks are coverage and quality-gate strictness. +All spec scenarios are behaviorally compliant with passing runtime evidence (including pairing +scenario and dashboard E2E), and required build/test commands pass; remaining risks are coverage and +quality-gate strictness. diff --git a/openspec/changes/enhance-auto-update-system/design.md b/openspec/changes/enhance-auto-update-system/design.md index 5334a65a6..549937c45 100644 --- a/openspec/changes/enhance-auto-update-system/design.md +++ b/openspec/changes/enhance-auto-update-system/design.md @@ -2,70 +2,95 @@ ## Technical Approach -Implement a single `UpdateManager` orchestration in `clients/agent-runtime/src/update/mod.rs` that owns update check, policy resolution, install planning, integrity verification, install execution routing, and audit recording. All runtime surfaces (CLI startup notice, daemon poller, in-conversation flow, admin API/dashboard) consume a single normalized status model so users see the same facts everywhere. +Implement a single `UpdateManager` orchestration in `clients/agent-runtime/src/update/mod.rs` that +owns update check, policy resolution, install planning, integrity verification, install execution +routing, and audit recording. All runtime surfaces (CLI startup notice, daemon poller, +in-conversation flow, admin API/dashboard) consume a single normalized status model so users see the +same facts everywhere. -The design keeps current `version_check.json` compatibility, adds process-safe lock files and append-only history, and introduces explicit install method detection with user override. Security-critical verification is fail-closed for artifact paths. +The design keeps current `version_check.json` compatibility, adds process-safe lock files and +append-only history, and introduces explicit install method detection with user override. +Security-critical verification is fail-closed for artifact paths. ## Architecture Decisions ### Decision: Central update orchestrator with shared state model -**Choice**: Keep update logic centered in `update/mod.rs` but refactor internally into cohesive components (`policy`, `state_store`, `method_detection`, `installer`, `audit`, `notifications`) behind an `UpdateManager` API. +**Choice**: Keep update logic centered in `update/mod.rs` but refactor internally into cohesive +components (`policy`, `state_store`, `method_detection`, `installer`, `audit`, `notifications`) +behind an `UpdateManager` API. **Alternatives considered**: + - Keep current free functions with incremental patches - Split update functionality across `main`, `daemon`, and `channels` -**Rationale**: A single orchestrator removes drift between surfaces, enables deterministic command behavior, and makes locking/verification controls enforceable in one place. +**Rationale**: A single orchestrator removes drift between surfaces, enables deterministic command +behavior, and makes locking/verification controls enforceable in one place. ### Decision: Fail-closed verification contract for downloadable artifacts -**Choice**: For any installer path that downloads/stages binaries (script/binary mode), require trusted metadata + checksum verification before activation; if metadata is missing/invalid, installation is blocked. +**Choice**: For any installer path that downloads/stages binaries (script/binary mode), require +trusted metadata + checksum verification before activation; if metadata is missing/invalid, +installation is blocked. **Alternatives considered**: + - Best-effort verification with warnings - Trust package manager only for all paths -**Rationale**: Requirement mandates verification fail-closed. This prevents silent integrity bypass and keeps default posture secure. +**Rationale**: Requirement mandates verification fail-closed. This prevents silent integrity bypass +and keeps default posture secure. ### Decision: Cross-process lock files + atomic rename persistence -**Choice**: Use file-based advisory locks for cross-process serialization and temp-file/fsync/rename for atomic writes. +**Choice**: Use file-based advisory locks for cross-process serialization and temp-file/fsync/rename +for atomic writes. **Alternatives considered**: + - Existing in-process `OnceLock>` only - SQLite state store migration in this change -**Rationale**: Satisfies process-safety and interruption-safety without large storage migration risk. +**Rationale**: Satisfies process-safety and interruption-safety without large storage migration +risk. ### Decision: Deterministic install method selection with explicit precedence -**Choice**: Effective method resolution order: `user override` -> `detected method` -> `manual fallback (unsupported)`. +**Choice**: Effective method resolution order: `user override` -> `detected method` -> +`manual fallback (unsupported)`. **Alternatives considered**: + - Try every installer opportunistically - Auto-pick first command available in PATH -**Rationale**: Deterministic routing is auditable, scriptable, and avoids unsafe unknown install attempts. +**Rationale**: Deterministic routing is auditable, scriptable, and avoids unsafe unknown install +attempts. ### Decision: Canonical update status contract reused by CLI/channel/admin -**Choice**: Define a single `UpdateStatusView` and consume it in CLI output, channel notices, daemon push payload, and admin response. +**Choice**: Define a single `UpdateStatusView` and consume it in CLI output, channel notices, daemon +push payload, and admin response. **Alternatives considered**: + - Per-surface formatting/state derivation **Rationale**: Eliminates user confusion from mismatched versions/policy flags across surfaces. ### Decision: JSONL audit history with bounded retention -**Choice**: Append structured events to `workspace/state/update_history.jsonl` with optional max-entry trimming. +**Choice**: Append structured events to `workspace/state/update_history.jsonl` with optional +max-entry trimming. **Alternatives considered**: + - Store history in single mutable JSON array file - No persistence beyond logs -**Rationale**: JSONL is append-friendly, resilient under partial failures, and simple to inspect from CLI. +**Rationale**: JSONL is append-friendly, resilient under partial failures, and simple to inspect +from CLI. ## Data Models @@ -177,15 +202,15 @@ Invalid env values are ignored with warning and never relax to less-safe behavio Install transaction state machine: -| Current | Trigger | Guard | Next | Notes | -|---|---|---|---|---| -| `Idle` | `update install` requested | lock acquired, policy allows | `Installing` | tx_id generated and persisted before execution | -| `Installing` | installer success + verification success | target version valid | `InstalledPendingRestart` | restart policy evaluated after state write | -| `Installing` | verification failed | always | `Failed` | fail-closed; no activation | -| `Installing` | method unsupported/prereq missing | always | `Failed` | deterministic manual instructions | -| `Installing` | second concurrent request | install lock denied | unchanged | requester gets busy/deferred result | -| `InstalledPendingRestart` | restart completed | managed service restart succeeds/manual restart acknowledged | `Idle` | current version updates on next process start/check | -| `Failed` | new install request | lock acquired | `Installing` | new tx_id | +| Current | Trigger | Guard | Next | Notes | +|---------------------------|------------------------------------------|--------------------------------------------------------------|---------------------------|-----------------------------------------------------| +| `Idle` | `update install` requested | lock acquired, policy allows | `Installing` | tx_id generated and persisted before execution | +| `Installing` | installer success + verification success | target version valid | `InstalledPendingRestart` | restart policy evaluated after state write | +| `Installing` | verification failed | always | `Failed` | fail-closed; no activation | +| `Installing` | method unsupported/prereq missing | always | `Failed` | deterministic manual instructions | +| `Installing` | second concurrent request | install lock denied | unchanged | requester gets busy/deferred result | +| `InstalledPendingRestart` | restart completed | managed service restart succeeds/manual restart acknowledged | `Idle` | current version updates on next process start/check | +| `Failed` | new install request | lock acquired | `Installing` | new tx_id | ## Locking and Atomic Write Strategy @@ -199,9 +224,12 @@ Install transaction state machine: ### Locking model 1. Acquire `update_state.lock` for load-mutate-save of `version_check.json`. -2. For installation, acquire `update_install.lock` first, then `update_state.lock` (fixed order) to avoid deadlock. -3. Lock acquisition timeout returns deterministic busy outcome (`EXIT_BUSY`) without partial changes. -4. Keep existing in-process mutex as secondary guard, but file lock is authoritative across processes. +2. For installation, acquire `update_install.lock` first, then `update_state.lock` (fixed order) to + avoid deadlock. +3. Lock acquisition timeout returns deterministic busy outcome (`EXIT_BUSY`) without partial + changes. +4. Keep existing in-process mutex as secondary guard, but file lock is authoritative across + processes. ### Atomic persistence @@ -210,23 +238,27 @@ Install transaction state machine: 3. `sync_all` temporary file. 4. `rename` temp -> `version_check.json` (atomic replace). 5. `sync_directory(parent)`. -6. Re-read and parse for post-write sanity; on failure, emit audit failure and preserve last good snapshot. +6. Re-read and parse for post-write sanity; on failure, emit audit failure and preserve last good + snapshot. -History append uses lock + append + fsync semantics; truncation/compaction (if entry cap exceeded) writes a new temp file atomically. +History append uses lock + append + fsync semantics; truncation/compaction (if entry cap exceeded) +writes a new temp file atomically. ## Method Detection Strategy `resolve_effective_install_method()`: -1. Validate configured override (`updates.install_method_override` or env). If valid, use it and mark source `override`. +1. Validate configured override (`updates.install_method_override` or env). If valid, use it and + mark source `override`. 2. If no override: - - detect Homebrew by executable path prefixes and brew metadata query - - detect Cargo via executable path/cargo home and `cargo install --list` - - detect npm/pnpm/yarn/bun via package-manager global package inspection - - detect script/binary via unmanaged binary location heuristics + - detect Homebrew by executable path prefixes and brew metadata query + - detect Cargo via executable path/cargo home and `cargo install --list` + - detect npm/pnpm/yarn/bun via package-manager global package inspection + - detect script/binary via unmanaged binary location heuristics 3. If none detected, set `Unknown` and return manual fallback plan only. -Detection output includes confidence + source for audit/status. Unsupported methods never trigger unsafe generic shell paths. +Detection output includes confidence + source for audit/status. Unsupported methods never trigger +unsafe generic shell paths. ## Command Flow @@ -259,18 +291,21 @@ New `corvus update` command tree in `clients/agent-runtime/src/main.rs`: - reads `update_history.jsonl` in chronological order - supports deterministic text and machine-readable JSON output mode -Compatibility: `corvus update confirm ` remains for channel nonce confirmations; it is treated as an internal/advanced path and routed through the same install transaction guard. +Compatibility: `corvus update confirm ` remains for channel nonce confirmations; it is +treated as an internal/advanced path and routed through the same install transaction guard. ## Notification Fan-Out Design Canonical message payload (`UpdateNotificationPayload`) is produced once and routed to sinks: 1. CLI startup banner (`maybe_print_update_notice`) when `cli_startup_notice_enabled`. -2. In-conversation opportunistic mention (`channels/mod.rs`) when `channel_visibility_enabled` and sender authorized. +2. In-conversation opportunistic mention (`channels/mod.rs`) when `channel_visibility_enabled` and + sender authorized. 3. Daemon push notifications (`run_daemon_update_watcher`) to configured destinations. 4. Admin API (`gateway/admin.rs`) exposes latest status/policy for dashboard. -Dedupe key: `(latest_version, channel, recipient, authorized_sender)` using existing conversation dedupe semantics, now aligned with canonical status snapshot. +Dedupe key: `(latest_version, channel, recipient, authorized_sender)` using existing conversation +dedupe semantics, now aligned with canonical status snapshot. ## Data Flow @@ -369,21 +404,22 @@ Admin contract extension (`gateway/admin.rs`, dashboard type mirror): ## File Changes -| File | Action | Description | -|------|--------|-------------| -| `clients/agent-runtime/src/update/mod.rs` | Modify | Introduce `UpdateManager`, method resolution, lock/atomic state store, verification gate, audit events, history read API | -| `clients/agent-runtime/src/main.rs` | Modify | Add `update` subcommands (`status/check/install/auto-enable/auto-disable/history`) and deterministic exit handling | -| `clients/agent-runtime/src/config/schema.rs` | Modify | Extend `UpdateConfig`, defaults, env overrides, and validation for override enums/policy values | -| `clients/agent-runtime/src/channels/mod.rs` | Modify | Route opportunistic/confirm flows through canonical status + policy gating and unified notification payload | -| `clients/agent-runtime/src/daemon/mod.rs` | Modify | Keep updater supervisor, call new manager APIs, emit health/audit-friendly outcomes | -| `clients/agent-runtime/src/service/mod.rs` | Modify | Add restart integration hook consumption for `InstalledPendingRestart` handling when policy requires managed restart | -| `clients/agent-runtime/src/gateway/admin.rs` | Modify | Extend admin config/status view with update state and policy contract | -| `clients/web/apps/dashboard/src/types/admin-config.ts` | Modify | Add strongly-typed `updates` fields mirroring admin API contract | +| File | Action | Description | +|--------------------------------------------------------|--------|--------------------------------------------------------------------------------------------------------------------------| +| `clients/agent-runtime/src/update/mod.rs` | Modify | Introduce `UpdateManager`, method resolution, lock/atomic state store, verification gate, audit events, history read API | +| `clients/agent-runtime/src/main.rs` | Modify | Add `update` subcommands (`status/check/install/auto-enable/auto-disable/history`) and deterministic exit handling | +| `clients/agent-runtime/src/config/schema.rs` | Modify | Extend `UpdateConfig`, defaults, env overrides, and validation for override enums/policy values | +| `clients/agent-runtime/src/channels/mod.rs` | Modify | Route opportunistic/confirm flows through canonical status + policy gating and unified notification payload | +| `clients/agent-runtime/src/daemon/mod.rs` | Modify | Keep updater supervisor, call new manager APIs, emit health/audit-friendly outcomes | +| `clients/agent-runtime/src/service/mod.rs` | Modify | Add restart integration hook consumption for `InstalledPendingRestart` handling when policy requires managed restart | +| `clients/agent-runtime/src/gateway/admin.rs` | Modify | Extend admin config/status view with update state and policy contract | +| `clients/web/apps/dashboard/src/types/admin-config.ts` | Modify | Add strongly-typed `updates` fields mirroring admin API contract | ## Security Controls - Release source allowlist: only configured trusted GitHub release endpoints are accepted. -- Verification fail-closed: missing checksum metadata, download failure, or digest mismatch blocks activation. +- Verification fail-closed: missing checksum metadata, download failure, or digest mismatch blocks + activation. - No shell-string execution for installer commands; use fixed binary + arg vectors. - Confirmation nonces remain hashed at rest and validated with sender/channel binding. - Lock/state/history files created with owner-restricted permissions where supported. @@ -391,46 +427,50 @@ Admin contract extension (`gateway/admin.rs`, dashboard type mirror): ## Observability and Audit -- Structured tracing spans: `update.check`, `update.install`, `update.verify`, `update.notify` with outcome tags. -- Audit event classes: check, install_attempt, install_result, verification_result, policy_change, restart_action. -- `corvus update history` reads structured events from `update_history.jsonl` (chronological output). +- Structured tracing spans: `update.check`, `update.install`, `update.verify`, `update.notify` with + outcome tags. +- Audit event classes: check, install_attempt, install_result, verification_result, policy_change, + restart_action. +- `corvus update history` reads structured events from `update_history.jsonl` (chronological + output). - Daemon component health remains integrated via `daemon/mod.rs` supervisor markers. ## Testing Strategy -| Layer | What to Test | Approach | -|-------|-------------|----------| -| Unit | method detection precedence, policy/env precedence, invalid override handling, state machine transitions | Rust unit tests in `update/mod.rs` and `config/schema.rs` | -| Unit | atomic writer and lock contention behavior | tempdir-based tests with parallel tasks/process simulation | -| Integration | CLI command exit semantics and output contracts | command tests for `update status/check/install/auto-enable/auto-disable/history` | -| Integration | channel confirmation + opportunistic mention gating | channel test harness in `channels/mod.rs` with fake channel | -| Integration | admin response parity with update status model | gateway admin handler tests + dashboard TS type checks | -| Resilience | interrupted write recovery and busy install response | fault-injection tests around temp write/rename and lock denial | +| Layer | What to Test | Approach | +|-------------|----------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------| +| Unit | method detection precedence, policy/env precedence, invalid override handling, state machine transitions | Rust unit tests in `update/mod.rs` and `config/schema.rs` | +| Unit | atomic writer and lock contention behavior | tempdir-based tests with parallel tasks/process simulation | +| Integration | CLI command exit semantics and output contracts | command tests for `update status/check/install/auto-enable/auto-disable/history` | +| Integration | channel confirmation + opportunistic mention gating | channel test harness in `channels/mod.rs` with fake channel | +| Integration | admin response parity with update status model | gateway admin handler tests + dashboard TS type checks | +| Resilience | interrupted write recovery and busy install response | fault-injection tests around temp write/rename and lock denial | ## Migration / Rollout No destructive migration required. -- Existing `version_check.json` is read and upgraded in-memory to new snapshot schema (`schema_version`). +- Existing `version_check.json` is read and upgraded in-memory to new snapshot schema ( + `schema_version`). - Missing fields default safely. - History file is additive (`update_history.jsonl`), created on first event. ## Phased Implementation Plan -### Phase 1: Safety + command foundation - -1. Add `update status|check|install` command surface and exit code mapping. -2. Implement lock manager + atomic state writes for `version_check.json`. -3. Introduce install state machine and install transaction guard. -4. Add method detection + deterministic unsupported fallback. - -### Phase 2: Policy model + multi-surface visibility +### Phase 1: Policy model + multi-surface visibility 1. Extend config schema/env overrides with safe defaults and validation. 2. Add `update auto-enable|auto-disable` and status reflection. 3. Unify canonical payload fan-out for CLI/channel/daemon. 4. Expose update policy/status in admin gateway + dashboard types. +### Phase 2: Safety + command foundation + +1. Add `update status|check|install` command surface and exit code mapping. +2. Implement lock manager + atomic state writes for `version_check.json`. +3. Introduce install state machine and install transaction guard. +4. Add method detection + deterministic unsupported fallback. + ### Phase 3: Verification hardening + auditability 1. Enforce checksum verification fail-closed for artifact paths. @@ -440,5 +480,7 @@ No destructive migration required. ## Open Questions -- [ ] Signature verification backend selection (Sigstore/GPG) is deferred; this design adds extension points but mandates checksum now. -- [ ] Final release source canonicalization (`profiletailors` vs `dallay`) should be confirmed before implementation freeze. +- [ ] Signature verification backend selection (Sigstore/GPG) is deferred; this design adds + extension points but mandates checksum now. +- [ ] Final release source canonicalization (`profiletailors` vs `dallay`) should be confirmed + before implementation freeze. diff --git a/openspec/changes/enhance-auto-update-system/exploration.md b/openspec/changes/enhance-auto-update-system/exploration.md index 62c6ad2d0..70313c613 100644 --- a/openspec/changes/enhance-auto-update-system/exploration.md +++ b/openspec/changes/enhance-auto-update-system/exploration.md @@ -1,55 +1,109 @@ -## Exploration: Enhance Auto-Update System +# Exploration: Enhance Auto-Update System ### Current State -Auto-update behavior is centralized in `clients/agent-runtime/src/update/mod.rs` and currently covers three surfaces: CLI startup notices, daemon background polling, and in-conversation channel nudges. -- **Release detection** uses GitHub `releases/latest` endpoints (`profiletailors/corvus`, fallback `dallay/corvus`) with a 2s HTTP timeout and a 24h cache TTL, persisted in `workspace/state/version_check.json`. -- **CLI visibility** is limited to `agent` and `status` command paths (`clients/agent-runtime/src/main.rs`), where a best-effort bounded check prints a banner with manual update commands. -- **Daemon visibility** runs a supervisor-managed updater worker (`clients/agent-runtime/src/daemon/mod.rs`) that periodically checks and pushes channel notifications when destinations are configured. -- **Channel flow** supports opportunistic in-conversation update mentions and nonce-based confirmation (`corvus update confirm `) before attempting auto-install. -- **Auto-install execution** is currently a minimal strategy: try `npm`/`pnpm`/`yarn`/`bun` global install commands, otherwise return manual instructions. No install-method detection exists. -- **Config surface** has `updates.enabled`, `updates.check_interval_minutes`, `updates.confirmation_ttl_minutes`, and `updates.notify_destinations` in `clients/agent-runtime/src/config/schema.rs`; no dedicated env overrides for these fields exist yet. Only `CORVUS_DISABLE_UPDATE_CHECK` globally disables checks. -- **Security posture** is mixed: install script (`clients/web/apps/marketing/public/install`) verifies SHA-256 and stages binary writes, but runtime auto-install path does not verify artifacts itself and relies on package manager behavior. +Auto-update behavior is centralized in `clients/agent-runtime/src/update/mod.rs` and currently +covers three surfaces: CLI startup notices, daemon background polling, and in-conversation channel +nudges. + +- **Release detection** uses GitHub `releases/latest` endpoints (`profiletailors/corvus`, fallback + `dallay/corvus`) with a 2s HTTP timeout and a 24h cache TTL, persisted in + `workspace/state/version_check.json`. +- **CLI visibility** is limited to `agent` and `status` command paths ( + `clients/agent-runtime/src/main.rs`), where a best-effort bounded check prints a banner with + manual update commands. +- **Daemon visibility** runs a supervisor-managed updater worker ( + `clients/agent-runtime/src/daemon/mod.rs`) that periodically checks and pushes channel + notifications when destinations are configured. +- **Channel flow** supports opportunistic in-conversation update mentions and nonce-based + confirmation (`corvus update confirm `) before attempting auto-install. +- **Auto-install execution** is currently a minimal strategy: try `npm`/`pnpm`/`yarn`/`bun` global + install commands, otherwise return manual instructions. No install-method detection exists. +- **Config surface** has `updates.enabled`, `updates.check_interval_minutes`, + `updates.confirmation_ttl_minutes`, and `updates.notify_destinations` in + `clients/agent-runtime/src/config/schema.rs`; no dedicated env overrides for these fields exist + yet. Only `CORVUS_DISABLE_UPDATE_CHECK` globally disables checks. +- **Security posture** is mixed: install script (`clients/web/apps/marketing/public/install`) + verifies SHA-256 and stages binary writes, but runtime auto-install path does not verify artifacts + itself and relies on package manager behavior. ### Key Touchpoints -- `clients/agent-runtime/src/update/mod.rs` — core update detection, notice text, daemon polling, nonce confirmation, and auto-install behavior. -- `clients/agent-runtime/src/main.rs` — CLI routing and startup banner trigger points; location for new `update` subcommands. -- `clients/agent-runtime/src/channels/mod.rs` — pre-memory nonce interception and opportunistic in-conversation update mention. -- `clients/agent-runtime/src/daemon/mod.rs` — updater worker supervision and daemon lifecycle coupling. -- `clients/agent-runtime/src/config/schema.rs` — update config schema/defaults and env-override extension point. -- `clients/agent-runtime/src/service/mod.rs` — service restart/start/stop hooks relevant to safe post-install daemon handling. -- `clients/agent-runtime/src/gateway/admin.rs` and `clients/web/apps/dashboard/src/types/admin-config.ts` — API/UI extension points for visible update indicators/configuration in dashboard clients. -- `clients/agent-runtime/npm/corvus-cli/lib/install.js` and `clients/web/apps/marketing/public/install` — installation-channel behavior differences (npm wrapper installer vs shell installer). -- `.github/workflows/_publish.yml` — release asset checksum generation; base for stronger artifact verification policy. + +- `clients/agent-runtime/src/update/mod.rs` — core update detection, notice text, daemon polling, + nonce confirmation, and auto-install behavior. +- `clients/agent-runtime/src/main.rs` — CLI routing and startup banner trigger points; location for + new `update` subcommands. +- `clients/agent-runtime/src/channels/mod.rs` — pre-memory nonce interception and opportunistic + in-conversation update mention. +- `clients/agent-runtime/src/daemon/mod.rs` — updater worker supervision and daemon lifecycle + coupling. +- `clients/agent-runtime/src/config/schema.rs` — update config schema/defaults and env-override + extension point. +- `clients/agent-runtime/src/service/mod.rs` — service restart/start/stop hooks relevant to safe + post-install daemon handling. +- `clients/agent-runtime/src/gateway/admin.rs` and + `clients/web/apps/dashboard/src/types/admin-config.ts` — API/UI extension points for visible + update indicators/configuration in dashboard clients. +- `clients/agent-runtime/npm/corvus-cli/lib/install.js` and + `clients/web/apps/marketing/public/install` — installation-channel behavior differences (npm + wrapper installer vs shell installer). +- `.github/workflows/_publish.yml` — release asset checksum generation; base for stronger artifact + verification policy. ### Risks -- **Process safety/races**: update state locking is in-process (`OnceLock>`) only; concurrent CLI + daemon processes can still race on `version_check.json`. -- **Non-atomic persistence**: update state writes use direct file writes, unlike the config path's temp-file + rename strategy. -- **Install-method ambiguity**: runtime cannot reliably determine whether user installed via npm/pnpm/yarn/bun, direct binary, script, cargo, or homebrew. -- **Security gap in auto-install**: runtime-side auto-install does not perform explicit artifact integrity verification for binary/script paths. -- **Operational disruption risk**: applying updates while daemon/service is active can leave mixed binary/runtime state without coordinated restart/session handling. -- **Version/source drift**: mixed org/repo/package references (`profiletailors` vs `dallay`) increase risk of wrong source selection. + +- **Process safety/races**: update state locking is in-process (`OnceLock>`) only; + concurrent CLI + daemon processes can still race on `version_check.json`. +- **Non-atomic persistence**: update state writes use direct file writes, unlike the config path's + temp-file + rename strategy. +- **Install-method ambiguity**: runtime cannot reliably determine whether user installed via + npm/pnpm/yarn/bun, direct binary, script, cargo, or homebrew. +- **Security gap in auto-install**: runtime-side auto-install does not perform explicit artifact + integrity verification for binary/script paths. +- **Operational disruption risk**: applying updates while daemon/service is active can leave mixed + binary/runtime state without coordinated restart/session handling. +- **Version/source drift**: mixed org/repo/package references (`profiletailors` vs `dallay`) + increase risk of wrong source selection. ### Open Questions and Assumptions -- **Client scope**: "client UI indicators" is assumed to include CLI + channel conversations + dashboard/web admin surfaces, not native mobile/desktop apps in this repository. -- **Auto-install policy default**: assumed default should remain safe/explicit (check + notify by default, auto-install opt-in). -- **Trust model**: need confirmation whether checksum-only verification is acceptable, or whether signed provenance (e.g., Sigstore/GPG) is required for runtime auto-install. -- **Install methods in execution scope**: requirement includes detecting `npm/pnpm/yarn/bun`, binary/script, `homebrew`, and `cargo`; assumption is execution MAY be supported for subset initially, with graceful/manual fallback for unsupported methods. -- **Daemon handling contract**: need product decision on whether updater should auto-restart managed services or stage update and require explicit `corvus service restart`. -- **Channel confirmation UX**: assumption is nonce confirmation remains mandatory for channel-initiated install unless a strict local policy setting allows unattended updates. + +- **Client scope**: "client UI indicators" is assumed to include CLI + channel conversations + + dashboard/web admin surfaces, not native mobile/desktop apps in this repository. +- **Auto-install policy default**: assumed default should remain safe/explicit (check + notify by + default, auto-install opt-in). +- **Trust model**: need confirmation whether checksum-only verification is acceptable, or whether + signed provenance (e.g., Sigstore/GPG) is required for runtime auto-install. +- **Install methods in execution scope**: requirement includes detecting `npm/pnpm/yarn/bun`, + binary/script, `homebrew`, and `cargo`; assumption is execution MAY be supported for subset + initially, with graceful/manual fallback for unsupported methods. +- **Daemon handling contract**: need product decision on whether updater should auto-restart managed + services or stage update and require explicit `corvus service restart`. +- **Channel confirmation UX**: assumption is nonce confirmation remains mandatory for + channel-initiated install unless a strict local policy setting allows unattended updates. ### Recommended Scope Boundaries + - **In scope (phase 1)** - - Add a first-class `corvus update` command group (`check`, `install`, `status`, and confirmation plumbing as needed). - - Introduce install-method detection and persistence (detected + user-overridable) with a safe fallback matrix. - - Expand update config with explicit policy knobs (auto-check cadence, auto-install mode, restart behavior, visibility channels) plus env overrides. + - Add a first-class `corvus update` command group (`check`, `install`, `status`, and confirmation + plumbing as needed). + - Introduce install-method detection and persistence (detected + user-overridable) with a safe + fallback matrix. + - Expand update config with explicit policy knobs (auto-check cadence, auto-install mode, restart + behavior, visibility channels) plus env overrides. - Implement process-safe/atomic update state and install transaction guards. - - Unify notification payloads across CLI banner, in-conversation mention, and machine-readable indicator endpoints. - - Add focused tests for detection, policy gating, atomic state transitions, confirmation safety, and command UX. + - Unify notification payloads across CLI banner, in-conversation mention, and machine-readable + indicator endpoints. + - Add focused tests for detection, policy gating, atomic state transitions, confirmation safety, + and command UX. - **Out of scope (phase 1)** - Re-architecting release pipeline/package ecosystem beyond verification metadata consumption. - - Building a full standalone update UI in unrelated clients; expose API/typed fields first, then incremental frontend adoption. + - Building a full standalone update UI in unrelated clients; expose API/typed fields first, then + incremental frontend adoption. - Force-updating running sessions without explicit restart strategy and rollback semantics. ### Ready for Proposal -Yes. The codebase already has a clear update nucleus and insertion points for proactive visibility, safe auto-install policy, install-method detection, and client-facing indicators. Proposal should lock security invariants first (verification + atomicity + restart safety), then define phased UX rollout. + +Yes. The codebase already has a clear update nucleus and insertion points for proactive visibility, +safe auto-install policy, install-method detection, and client-facing indicators. Proposal should +lock security invariants first (verification + atomicity + restart safety), then define phased UX +rollout. diff --git a/openspec/changes/enhance-auto-update-system/proposal.md b/openspec/changes/enhance-auto-update-system/proposal.md index b4619b9e6..8300d3b78 100644 --- a/openspec/changes/enhance-auto-update-system/proposal.md +++ b/openspec/changes/enhance-auto-update-system/proposal.md @@ -2,95 +2,129 @@ ## Problem -The current update flow is fragmented and only partially safe: visibility is inconsistent across CLI, daemon, and in-conversation channels; auto-install support is limited to a few package managers; update state persistence is not atomic across processes; and runtime auto-install lacks explicit artifact verification and auditability. This creates user confusion, security risk, and operational instability when mixed runtime versions are active. +The current update flow is fragmented and only partially safe: visibility is inconsistent across +CLI, daemon, and in-conversation channels; auto-install support is limited to a few package +managers; update state persistence is not atomic across processes; and runtime auto-install lacks +explicit artifact verification and auditability. This creates user confusion, security risk, and +operational instability when mixed runtime versions are active. ## Goals -- Provide proactive update visibility across CLI startup, in-conversation prompts, and client UI/admin surfaces. -- Add explicit, safe-by-default auto-update policy with opt-in auto-install and environment overrides. -- Detect installation method and execute method-specific update routines (npm/pnpm/yarn/bun, binary/script, homebrew, cargo), with deterministic fallback. +- Provide proactive update visibility across CLI startup, in-conversation prompts, and client + UI/admin surfaces. +- Add explicit, safe-by-default auto-update policy with opt-in auto-install and environment + overrides. +- Detect installation method and execute method-specific update routines (npm/pnpm/yarn/bun, + binary/script, homebrew, cargo), with deterministic fallback. - Make update operations process-safe and atomic across concurrent CLI/daemon processes. -- Add first-class `corvus update` command group: `status`, `check`, `install`, `auto-enable`, `auto-disable`, `history`. -- Enforce artifact integrity/security checks (checksum first, signature-ready contract) and produce auditable update events. +- Add first-class `corvus update` command group: `status`, `check`, `install`, `auto-enable`, + `auto-disable`, `history`. +- Enforce artifact integrity/security checks (checksum first, signature-ready contract) and produce + auditable update events. ## Non-Goals - Redesigning release publishing pipelines beyond consuming existing checksum/signature metadata. -- Shipping full UX redesigns for unrelated clients; this change focuses on shared indicators and admin/dashboard integration points. +- Shipping full UX redesigns for unrelated clients; this change focuses on shared indicators and + admin/dashboard integration points. - Implementing zero-downtime binary hot-swap or full rollback orchestration for all runtime modes. ## High-Level Approach -1. Build an `UpdateManager` flow in `clients/agent-runtime/src/update/mod.rs` that unifies check, policy evaluation, install planning, verification, and event recording. -2. Introduce install-method detection and persistence (detected + user override), then route installs through method executors with explicit unsupported-method handling. -3. Add a dedicated `update` command tree in `clients/agent-runtime/src/main.rs` for interactive and scriptable operations. -4. Replace non-atomic update state writes (`workspace/state/version_check.json`) with temp-file + fsync + atomic rename semantics and inter-process file locking. -5. Extend config schema in `clients/agent-runtime/src/config/schema.rs` with auto-update policy knobs and env overrides (keeping safe defaults). -6. Normalize notification payloads for CLI banners, channel messages (`clients/agent-runtime/src/channels/mod.rs`), daemon push notifications (`clients/agent-runtime/src/daemon/mod.rs`), and gateway/admin API exposure (`clients/agent-runtime/src/gateway/admin.rs`, `clients/web/apps/dashboard/src/types/admin-config.ts`). -7. Add security verification gates before install and append structured audit log events to update history. +1. Build an `UpdateManager` flow in `clients/agent-runtime/src/update/mod.rs` that unifies check, + policy evaluation, install planning, verification, and event recording. +2. Introduce install-method detection and persistence (detected + user override), then route + installs through method executors with explicit unsupported-method handling. +3. Add a dedicated `update` command tree in `clients/agent-runtime/src/main.rs` for interactive and + scriptable operations. +4. Replace non-atomic update state writes (`workspace/state/version_check.json`) with temp-file + + fsync + atomic rename semantics and inter-process file locking. +5. Extend config schema in `clients/agent-runtime/src/config/schema.rs` with auto-update policy + knobs and env overrides (keeping safe defaults). +6. Normalize notification payloads for CLI banners, channel messages ( + `clients/agent-runtime/src/channels/mod.rs`), daemon push notifications ( + `clients/agent-runtime/src/daemon/mod.rs`), and gateway/admin API exposure ( + `clients/agent-runtime/src/gateway/admin.rs`, + `clients/web/apps/dashboard/src/types/admin-config.ts`). +7. Add security verification gates before install and append structured audit log events to update + history. ## Phased Scope ### Phase 1: Safety and Command Foundation + - Add `corvus update status|check|install` command surface. - Implement atomic state persistence, inter-process locking, and single-install transaction guards. - Add install-method detection for currently supported methods and robust manual fallback. - Standardize update status model used by CLI and daemon. ### Phase 2: Auto-Update Policy and Visibility Expansion + - Add `auto-enable`, `auto-disable`, and policy/env override support. - Unify proactive notifications across CLI/in-conversation/daemon channels. - Expose update status + policy fields through admin gateway and dashboard types. ### Phase 3: Verification Hardening and Auditability -- Enforce checksum verification for downloaded artifacts and define signature-verification extension points. + +- Enforce checksum verification for downloaded artifacts and define signature-verification extension + points. - Add `corvus update history` backed by structured audit events. - Add daemon-safe restart/staging behavior to avoid mixed-version runtime state. ## Affected Areas -| Area | Impact | Description | -|------|--------|-------------| -| `clients/agent-runtime/src/update/mod.rs` | Modified | Core update manager, method detection/execution, verification gates, history events | -| `clients/agent-runtime/src/main.rs` | Modified | New `corvus update` subcommands and CLI wiring | -| `clients/agent-runtime/src/channels/mod.rs` | Modified | In-conversation visibility and nonce-confirmed install handoff | -| `clients/agent-runtime/src/daemon/mod.rs` | Modified | Polling, notification, and safe install coordination | -| `clients/agent-runtime/src/config/schema.rs` | Modified | Auto-update policy schema/defaults/env overrides | -| `clients/agent-runtime/src/service/mod.rs` | Modified | Controlled restart/staging integration after install | -| `clients/agent-runtime/src/gateway/admin.rs` | Modified | Update status/policy fields for client UI visibility | -| `clients/web/apps/dashboard/src/types/admin-config.ts` | Modified | Typed update indicator and config fields | -| `workspace/state/version_check.json` (+ lock/history peers) | Modified/New | Atomic state, lock coordination, and audit history storage | +| Area | Impact | Description | +|-------------------------------------------------------------|--------------|-------------------------------------------------------------------------------------| +| `clients/agent-runtime/src/update/mod.rs` | Modified | Core update manager, method detection/execution, verification gates, history events | +| `clients/agent-runtime/src/main.rs` | Modified | New `corvus update` subcommands and CLI wiring | +| `clients/agent-runtime/src/channels/mod.rs` | Modified | In-conversation visibility and nonce-confirmed install handoff | +| `clients/agent-runtime/src/daemon/mod.rs` | Modified | Polling, notification, and safe install coordination | +| `clients/agent-runtime/src/config/schema.rs` | Modified | Auto-update policy schema/defaults/env overrides | +| `clients/agent-runtime/src/service/mod.rs` | Modified | Controlled restart/staging integration after install | +| `clients/agent-runtime/src/gateway/admin.rs` | Modified | Update status/policy fields for client UI visibility | +| `clients/web/apps/dashboard/src/types/admin-config.ts` | Modified | Typed update indicator and config fields | +| `workspace/state/version_check.json` (+ lock/history peers) | Modified/New | Atomic state, lock coordination, and audit history storage | ## Risks and Mitigations -| Risk | Likelihood | Mitigation | -|------|------------|------------| -| Concurrent CLI + daemon update races | High | Inter-process file locks + transaction state machine + idempotent install steps | -| Partial/corrupt update state writes | Medium | Temp-file write, fsync, atomic rename, and read-after-write validation | -| Wrong install strategy selected | Medium | Detection priority matrix, persisted method override, explicit dry-run/status output | -| Integrity bypass in non-package-manager paths | High | Mandatory checksum verification; signature verification hook and fail-closed policy | -| Runtime disruption from mixed versions | Medium | Staged install markers and coordinated service restart gating | -| Source/repo drift for version checks | Medium | Canonical source configuration and strict endpoint validation | +| Risk | Likelihood | Mitigation | +|-----------------------------------------------|------------|--------------------------------------------------------------------------------------| +| Concurrent CLI + daemon update races | High | Inter-process file locks + transaction state machine + idempotent install steps | +| Partial/corrupt update state writes | Medium | Temp-file write, fsync, atomic rename, and read-after-write validation | +| Wrong install strategy selected | Medium | Detection priority matrix, persisted method override, explicit dry-run/status output | +| Integrity bypass in non-package-manager paths | High | Mandatory checksum verification; signature verification hook and fail-closed policy | +| Runtime disruption from mixed versions | Medium | Staged install markers and coordinated service restart gating | +| Source/repo drift for version checks | Medium | Canonical source configuration and strict endpoint validation | ## Rollback Plan -- Keep existing startup banner + manual update pathway behind compatibility path while new command group is introduced. -- Guard new auto-update/install-method logic behind feature flags or config toggles so behavior can revert to notify-only mode. -- If regressions appear, disable auto-install policy defaults, retain check-only flow, and revert command handlers to existing behavior without deleting stored history. -- Revert affected modules in a single patch set (`update`, `main`, `daemon`, `channels`, `config`, `gateway`, dashboard types) and preserve state files for postmortem. +- Keep existing startup banner + manual update pathway behind compatibility path while new command + group is introduced. +- Guard new auto-update/install-method logic behind feature flags or config toggles so behavior can + revert to notify-only mode. +- If regressions appear, disable auto-install policy defaults, retain check-only flow, and revert + command handlers to existing behavior without deleting stored history. +- Revert affected modules in a single patch set (`update`, `main`, `daemon`, `channels`, `config`, + `gateway`, dashboard types) and preserve state files for postmortem. ## Dependencies - Existing GitHub release metadata and checksum artifacts from `.github/workflows/_publish.yml`. -- Existing installer paths (`clients/agent-runtime/npm/corvus-cli/lib/install.js`, `clients/web/apps/marketing/public/install`) for method heuristics and verification alignment. +- Existing installer paths (`clients/agent-runtime/npm/corvus-cli/lib/install.js`, + `clients/web/apps/marketing/public/install`) for method heuristics and verification alignment. ## Acceptance Criteria -- [ ] `corvus update status|check|install|auto-enable|auto-disable|history` are available and return deterministic exit codes. -- [ ] Default policy is safe (`check+notify` enabled, auto-install disabled) and env overrides are documented and effective. -- [ ] Installation method is detected (or explicitly overridden), surfaced in `status`, and used for method-specific execution/fallback. +- [ ] `corvus update status|check|install|auto-enable|auto-disable|history` are available and return + deterministic exit codes. +- [ ] Default policy is safe (`check+notify` enabled, auto-install disabled) and env overrides are + documented and effective. +- [ ] Installation method is detected (or explicitly overridden), surfaced in `status`, and used for + method-specific execution/fallback. - [ ] Concurrent update attempts do not corrupt state or run parallel installs. - [ ] Update state writes are atomic and recoverable after interruption. - [ ] Runtime install path performs artifact integrity verification before activation. -- [ ] CLI, in-conversation, and admin/dashboard surfaces expose consistent update availability and policy status. -- [ ] Update attempts and outcomes are persisted in audit history and viewable via `corvus update history`. +- [ ] CLI, in-conversation, and admin/dashboard surfaces expose consistent update availability and + policy status. +- [ ] Update attempts and outcomes are persisted in audit history and viewable via + `corvus update history`. diff --git a/openspec/changes/enhance-auto-update-system/specs/update-system/spec.md b/openspec/changes/enhance-auto-update-system/specs/update-system/spec.md index 6f989dba2..d036d7097 100644 --- a/openspec/changes/enhance-auto-update-system/specs/update-system/spec.md +++ b/openspec/changes/enhance-auto-update-system/specs/update-system/spec.md @@ -2,13 +2,15 @@ ## Purpose -Define a secure, observable, and deterministic update experience across CLI, conversation channels, and client-facing admin surfaces. +Define a secure, observable, and deterministic update experience across CLI, conversation channels, +and client-facing admin surfaces. ## Requirements ### Requirement: Multi-Surface Update Visibility -The system MUST expose consistent update availability and update policy state at CLI startup, during eligible in-conversation interactions, and through client-facing admin/status surfaces. +The system MUST expose consistent update availability and update policy state at CLI startup, during +eligible in-conversation interactions, and through client-facing admin/status surfaces. #### Scenario: CLI startup shows update availability @@ -28,12 +30,14 @@ The system MUST expose consistent update availability and update policy state at - GIVEN the runtime has a computed update status and policy state - WHEN a client/admin status endpoint is queried -- THEN the response includes update availability, current version, available version, last check result, and policy flags +- THEN the response includes update availability, current version, available version, last check + result, and policy flags - AND the values are consistent with the latest CLI-visible status ### Requirement: Update Configuration Model and Safe Defaults -The system MUST provide a structured update configuration model with safe defaults, where automatic checks and notifications are enabled by default and automatic installation is disabled by default. +The system MUST provide a structured update configuration model with safe defaults, where automatic +checks and notifications are enabled by default and automatic installation is disabled by default. #### Scenario: Default policy is safe-by-default @@ -58,7 +62,9 @@ The system MUST provide a structured update configuration model with safe defaul ### Requirement: Installation Method Detection and Execution Routing -The system MUST determine an effective installation method (detected or user-overridden), route update execution through the method-specific strategy, and provide deterministic fallback instructions when unsupported or unavailable. +The system MUST determine an effective installation method (detected or user-overridden), route +update execution through the method-specific strategy, and provide deterministic fallback +instructions when unsupported or unavailable. #### Scenario: Supported method is detected and used @@ -83,7 +89,8 @@ The system MUST determine an effective installation method (detected or user-ove ### Requirement: Process Safety and Atomic Update State -The system MUST prevent concurrent install transactions across processes and MUST persist update state atomically such that interrupted writes do not produce corrupt state. +The system MUST prevent concurrent install transactions across processes and MUST persist update +state atomically such that interrupted writes do not produce corrupt state. #### Scenario: Concurrent install attempts are serialized @@ -101,13 +108,16 @@ The system MUST prevent concurrent install transactions across processes and MUS ### Requirement: CLI Update Command Contract -The CLI MUST provide `update status`, `update check`, `update install`, `update auto-enable`, `update auto-disable`, and `update history` commands with deterministic outputs and exit semantics suitable for interactive and scripted use. +The CLI MUST provide `update status`, `update check`, `update install`, `update auto-enable`, +`update auto-disable`, and `update history` commands with deterministic outputs and exit semantics +suitable for interactive and scripted use. #### Scenario: `update status` reports effective state - GIVEN update metadata and effective policy are available - WHEN the user runs `update status` -- THEN output includes current version, latest known version status, installation method, and auto-update policy state +- THEN output includes current version, latest known version status, installation method, and + auto-update policy state - AND the command returns success when status can be resolved #### Scenario: `update check` performs explicit refresh @@ -138,9 +148,21 @@ The CLI MUST provide `update status`, `update check`, `update install`, `update - THEN the command returns chronologically ordered update events - AND each entry includes enough metadata to identify what occurred and outcome class +#### Scenario: `update confirm ` compatibility + +- GIVEN a channel-initiated install provides a nonce via in-conversation flow +- WHEN the user or automated agent runs `update confirm ` +- THEN the CLI accepts the nonce, completes the install handshake +- AND returns deterministic success/failure semantics +- AND appends an audit entry to history reflecting the nonce-confirmed install +- NOTE: This is an advanced/internal flow; `update status` and `update history` reflect actions from + nonce-confirmed installs alongside other update events + ### Requirement: Integrity Verification and Audit Logging -The system MUST verify artifact integrity before activation for update paths that consume downloadable artifacts, MUST fail closed on verification failure, and MUST append structured audit events for update checks and install attempts. +The system MUST verify artifact integrity before activation for update paths that consume +downloadable artifacts, MUST fail closed on verification failure, and MUST append structured audit +events for update checks and install attempts. #### Scenario: Successful verification permits activation diff --git a/openspec/changes/enhance-auto-update-system/tasks.md b/openspec/changes/enhance-auto-update-system/tasks.md index 1a9b7b910..351a19bf8 100644 --- a/openspec/changes/enhance-auto-update-system/tasks.md +++ b/openspec/changes/enhance-auto-update-system/tasks.md @@ -3,79 +3,138 @@ ## Phase Dependencies and Sequencing - Phase 1 -> Phase 2: shared update policy/model and env parsing must exist before CLI/state wiring. -- Phase 2 -> Phase 3: canonical status and notification payload must be stable before channel/daemon/admin fan-out. -- Phase 3 -> Phase 4: unified surfaces and policy toggles must be in place before verification hardening/history UX. +- Phase 2 -> Phase 3: canonical status and notification payload must be stable before + channel/daemon/admin fan-out. +- Phase 3 -> Phase 4: unified surfaces and policy toggles must be in place before verification + hardening/history UX. - Phase 4 -> Phase 5: implementation is complete before end-to-end verification and regression. ## Phase 1: Policy and State Foundation (TDD) -- [ ] 1.1 Add RED unit tests in `clients/agent-runtime/src/config/schema.rs` for new update fields/defaults and env override precedence (`CORVUS_UPDATES_ENABLED`, `CORVUS_UPDATE_AUTO_INSTALL`, `CORVUS_UPDATE_CHANNEL_VISIBILITY`, `CORVUS_UPDATE_CLI_NOTICE`, `CORVUS_UPDATE_METHOD_OVERRIDE`, `CORVUS_UPDATE_RESTART_POLICY`) including invalid-value fail-safe behavior. -- [ ] 1.2 Implement GREEN schema updates in `clients/agent-runtime/src/config/schema.rs` for `auto_install_enabled`, `channel_visibility_enabled`, `cli_startup_notice_enabled`, `install_method_override`, `restart_policy`, and `history_max_entries` with safe defaults and validation. -- [ ] 1.3 Add RED unit tests in `clients/agent-runtime/src/update/mod.rs` for policy resolution, install method precedence (`override -> detected -> unknown`), and install/check state transition invariants. -- [ ] 1.4 Implement GREEN core model/types in `clients/agent-runtime/src/update/mod.rs` (`InstallMethod`, `RestartPolicy`, `UpdatePolicy`, `UpdateStateSnapshot`, `InstallState`, `CheckOutcome`, `UpdateStatusView`) and refactor duplicate policy/state mapping helpers (REFACTOR). +- [ ] 1.1 Add RED unit tests in `clients/agent-runtime/src/config/schema.rs` for new update + fields/defaults and env override precedence (`CORVUS_UPDATES_ENABLED`, + `CORVUS_UPDATE_AUTO_INSTALL`, `CORVUS_UPDATE_CHANNEL_VISIBILITY`, `CORVUS_UPDATE_CLI_NOTICE`, + `CORVUS_UPDATE_METHOD_OVERRIDE`, `CORVUS_UPDATE_RESTART_POLICY`) including invalid-value fail-safe + behavior. +- [ ] 1.2 Implement GREEN schema updates in `clients/agent-runtime/src/config/schema.rs` for + `auto_install_enabled`, `channel_visibility_enabled`, `cli_startup_notice_enabled`, + `install_method_override`, `restart_policy`, and `history_max_entries` with safe defaults and + validation. +- [ ] 1.3 Add RED unit tests in `clients/agent-runtime/src/update/mod.rs` for policy resolution, + install method precedence (`override -> detected -> unknown`), and install/check state transition + invariants. +- [ ] 1.4 Implement GREEN core model/types in `clients/agent-runtime/src/update/mod.rs` ( + `InstallMethod`, `RestartPolicy`, `UpdatePolicy`, `UpdateStateSnapshot`, `InstallState`, + `CheckOutcome`, `UpdateStatusView`) and refactor duplicate policy/state mapping helpers ( + REFACTOR). Verification criteria (Phase 1): + - New config/env tests pass and prove safe-by-default behavior. - Update model/state tests pass and prove deterministic method and policy resolution. ## Phase 2: CLI Commands, Locking, and Atomic State (TDD) -- [ ] 2.1 Add RED CLI command tests for `update status`, `update check`, and `update install` deterministic output/exit semantics in runtime command test coverage associated with `clients/agent-runtime/src/main.rs`. -- [ ] 2.2 Implement GREEN `update status|check|install` command wiring and exit code mapping in `clients/agent-runtime/src/main.rs`, routed through `UpdateManager` entrypoints. -- [ ] 2.3 Add RED concurrency/resilience tests in `clients/agent-runtime/src/update/mod.rs` (or update-focused runtime tests) for cross-process busy outcomes and interrupted-write recovery of `workspace/state/version_check.json`. -- [ ] 2.4 Implement GREEN file-lock and atomic persistence flow in `clients/agent-runtime/src/update/mod.rs` (`update_state.lock`, `update_install.lock`, temp-file + fsync + rename + directory sync) plus single-install transaction guard. -- [ ] 2.5 Implement deterministic install-method execution routing and unsupported fallback messaging in `clients/agent-runtime/src/update/mod.rs` without unsafe generic shell execution. +- [ ] 2.1 Add RED CLI command tests for `update status`, `update check`, and `update install` + deterministic output/exit semantics in runtime command test coverage associated with + `clients/agent-runtime/src/main.rs`. +- [ ] 2.2 Implement GREEN `update status|check|install` command wiring and exit code mapping in + `clients/agent-runtime/src/main.rs`, routed through `UpdateManager` entrypoints. +- [ ] 2.3 Add RED concurrency/resilience tests in `clients/agent-runtime/src/update/mod.rs` (or + update-focused runtime tests) for cross-process busy outcomes and interrupted-write recovery of + `workspace/state/version_check.json`. +- [ ] 2.4 Implement GREEN file-lock and atomic persistence flow in + `clients/agent-runtime/src/update/mod.rs` (`update_state.lock`, `update_install.lock`, temp-file + + fsync + rename + directory sync) plus single-install transaction guard. +- [ ] 2.5 Implement deterministic install-method execution routing and unsupported fallback + messaging in `clients/agent-runtime/src/update/mod.rs` without unsafe generic shell execution. Dependencies: + - Depends on Phase 1 policy/model contracts. Verification criteria (Phase 2): + - `update status|check|install` behavior is script-stable and test-covered. -- Concurrent install attempts serialize correctly; state file remains valid after simulated interruption. +- Concurrent install attempts serialize correctly; state file remains valid after simulated + interruption. ## Phase 3: Multi-Surface Visibility and Policy Controls (TDD) -- [ ] 3.1 Add RED tests in `clients/agent-runtime/src/channels/mod.rs` for channel visibility gating and canonical update payload parity with CLI status. -- [ ] 3.2 Implement GREEN channel integration in `clients/agent-runtime/src/channels/mod.rs` so opportunistic mentions and nonce-confirm flow use canonical status/policy gates. -- [ ] 3.3 Add RED daemon watcher tests in `clients/agent-runtime/src/daemon/mod.rs` for check interval behavior, deduped notifications, and policy-aware fan-out. -- [ ] 3.4 Implement GREEN daemon updater integration in `clients/agent-runtime/src/daemon/mod.rs` using canonical update payload and shared manager APIs. -- [ ] 3.5 Add RED admin contract tests in `clients/agent-runtime/src/gateway/admin.rs` and TypeScript compatibility checks in `clients/web/apps/dashboard/src/types/admin-config.ts` for `config.updates` status/policy fields. -- [ ] 3.6 Implement GREEN admin API and dashboard type updates in `clients/agent-runtime/src/gateway/admin.rs` and `clients/web/apps/dashboard/src/types/admin-config.ts`, preserving secret-safe response discipline. -- [ ] 3.7 Add RED/GREEN tasks in `clients/agent-runtime/src/main.rs` and `clients/agent-runtime/src/config/schema.rs` for `update auto-enable` and `update auto-disable`, ensuring persisted policy toggles are reflected in same-session `update status`. +- [ ] 3.1 Add RED tests in `clients/agent-runtime/src/channels/mod.rs` for channel visibility gating + and canonical update payload parity with CLI status. +- [ ] 3.2 Implement GREEN channel integration in `clients/agent-runtime/src/channels/mod.rs` so + opportunistic mentions and nonce-confirm flow use canonical status/policy gates. +- [ ] 3.3 Add RED daemon watcher tests in `clients/agent-runtime/src/daemon/mod.rs` for check + interval behavior, deduped notifications, and policy-aware fan-out. +- [ ] 3.4 Implement GREEN daemon updater integration in `clients/agent-runtime/src/daemon/mod.rs` + using canonical update payload and shared manager APIs. +- [ ] 3.5 Add RED admin contract tests in `clients/agent-runtime/src/gateway/admin.rs` and + TypeScript compatibility checks in `clients/web/apps/dashboard/src/types/admin-config.ts` for + `config.updates` status/policy fields. +- [ ] 3.6 Implement GREEN admin API and dashboard type updates in + `clients/agent-runtime/src/gateway/admin.rs` and + `clients/web/apps/dashboard/src/types/admin-config.ts`, preserving secret-safe response + discipline. +- [ ] 3.7 Add RED/GREEN tasks in `clients/agent-runtime/src/main.rs` and + `clients/agent-runtime/src/config/schema.rs` for `update auto-enable` and `update auto-disable`, + ensuring persisted policy toggles are reflected in same-session `update status`. Dependencies: + - Depends on Phase 2 canonical status contract and manager entrypoints. Verification criteria (Phase 3): + - CLI, channel, daemon, and admin surfaces expose consistent version/policy facts. - Policy toggles (`auto-enable/auto-disable`) persist atomically and reflect immediately. ## Phase 4: Integrity Verification, History, and Restart Safety (TDD) -- [ ] 4.1 Add RED verification tests in `clients/agent-runtime/src/update/mod.rs` for checksum-required artifact paths, missing metadata failures, digest mismatch failures, and fail-closed install blocking. -- [ ] 4.2 Implement GREEN verification gate and structured verification/install audit event recording in `clients/agent-runtime/src/update/mod.rs`. -- [ ] 4.3 Add RED tests for `update history` ordering and schema expectations, then implement GREEN command + history reader wiring in `clients/agent-runtime/src/main.rs` and `clients/agent-runtime/src/update/mod.rs` backed by `workspace/state/update_history.jsonl`. -- [ ] 4.4 Add RED restart-policy integration tests in `clients/agent-runtime/src/service/mod.rs` and daemon-facing update handling, then implement GREEN `InstalledPendingRestart` handling for `never|prompt|auto_managed_service` behavior. -- [ ] 4.5 Refactor duplicated audit/restart decision code in `clients/agent-runtime/src/update/mod.rs`, `clients/agent-runtime/src/daemon/mod.rs`, and `clients/agent-runtime/src/service/mod.rs` while keeping event taxonomy stable. +- [ ] 4.1 Add RED verification tests in `clients/agent-runtime/src/update/mod.rs` for + checksum-required artifact paths, missing metadata failures, digest mismatch failures, and + fail-closed install blocking. +- [ ] 4.2 Implement GREEN verification gate and structured verification/install audit event + recording in `clients/agent-runtime/src/update/mod.rs`. +- [ ] 4.3 Add RED tests for `update history` ordering and schema expectations, then implement GREEN + command + history reader wiring in `clients/agent-runtime/src/main.rs` and + `clients/agent-runtime/src/update/mod.rs` backed by `workspace/state/update_history.jsonl`. +- [ ] 4.4 Add RED restart-policy integration tests in `clients/agent-runtime/src/service/mod.rs` and + daemon-facing update handling, then implement GREEN `InstalledPendingRestart` handling for + `never|prompt|auto_managed_service` behavior. +- [ ] 4.5 Refactor duplicated audit/restart decision code in + `clients/agent-runtime/src/update/mod.rs`, `clients/agent-runtime/src/daemon/mod.rs`, and + `clients/agent-runtime/src/service/mod.rs` while keeping event taxonomy stable. Dependencies: + - Depends on Phase 3 fan-out/admin contract completion. Verification criteria (Phase 4): + - Verification failures block activation and emit auditable failure events. - `update history` returns chronological, structured check/install events. - Restart handling avoids mixed-version running state for managed service mode. ## Phase 5: End-to-End Verification and Regression Gate -- [ ] 5.1 Add/update focused integration tests under `clients/agent-runtime/tests/` for full command contract coverage (`status|check|install|auto-enable|auto-disable|history`) and concurrency outcomes. -- [ ] 5.2 Add/update integration tests under `clients/agent-runtime/tests/` for cross-surface consistency (CLI status vs admin payload vs channel/daemon notification facts). -- [ ] 5.3 Run targeted runtime verification (`cargo test -p agent-runtime update`) and dashboard type/build checks for `clients/web/apps/dashboard/src/types/admin-config.ts`, fixing regressions in touched files. -- [ ] 5.4 Run full repository regression (`make test` and `make build`) and confirm every scenario in `openspec/changes/enhance-auto-update-system/specs/update-system/spec.md` is mapped to passing tests before handoff. +- [ ] 5.1 Add/update focused integration tests under `clients/agent-runtime/tests/` for full command + contract coverage (`status|check|install|auto-enable|auto-disable|history`) and concurrency + outcomes. +- [ ] 5.2 Add/update integration tests under `clients/agent-runtime/tests/` for cross-surface + consistency (CLI status vs admin payload vs channel/daemon notification facts). +- [ ] 5.3 Run targeted runtime verification (`cargo test -p agent-runtime update`) and dashboard + type/build checks for `clients/web/apps/dashboard/src/types/admin-config.ts`, fixing regressions + in touched files. +- [ ] 5.4 Run full repository regression (`make test` and `make build`) and confirm every scenario + in `openspec/changes/enhance-auto-update-system/specs/update-system/spec.md` is mapped to passing + tests before handoff. Dependencies: + - Depends on completion of Phases 1-4. Verification criteria (Phase 5): + - All targeted and full regression suites pass. - Each spec requirement/scenario has explicit test coverage evidence. From 5f76fb9dafbafdb6ea636f6514e5acfa56435a2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yuniel=20Acosta=20P=C3=A9rez?= <33158051+yacosta738@users.noreply.github.com> Date: Thu, 5 Mar 2026 15:09:11 +0100 Subject: [PATCH 3/4] feat: enhance auto-update system with new command structure and status visibility --- README.md | 2 +- clients/agent-runtime/src/channels/mod.rs | 35 +- clients/agent-runtime/src/config/schema.rs | 156 ++ clients/agent-runtime/src/daemon/mod.rs | 134 +- clients/agent-runtime/src/gateway/admin.rs | 68 + clients/agent-runtime/src/main.rs | 199 +++ clients/agent-runtime/src/service/mod.rs | 39 + clients/agent-runtime/src/update/mod.rs | 1395 ++++++++++++++++- .../tests/admin_config_api_integration.rs | 21 +- .../tests/update_system_integration.rs | 206 +++ clients/web/apps/dashboard/src/App.vue | 14 +- .../components/config/GatewaySettings.spec.ts | 2 +- .../src/components/config/GatewaySettings.vue | 8 +- .../components/config/GeneralSettings.spec.ts | 5 +- .../src/components/config/GeneralSettings.vue | 8 +- .../config/ObservabilitySettings.vue | 8 +- .../src/components/config/RuntimeSettings.vue | 4 +- .../components/config/SchedulerSettings.vue | 8 +- .../config/SecuritySettings.spec.ts | 2 +- .../components/config/SecuritySettings.vue | 8 +- .../components/config/WebhookSettings.spec.ts | 2 +- .../src/components/config/WebhookSettings.vue | 16 +- .../src/composables/configPayload.ts | 73 +- .../src/composables/useConfig.spec.ts | 30 +- .../dashboard/src/composables/useConfig.ts | 38 +- .../apps/dashboard/src/types/admin-config.ts | 17 + .../apply-progress.md | 24 + .../enhance-auto-update-system/tasks.md | 50 +- .../verify-report.md | 59 + 29 files changed, 2469 insertions(+), 162 deletions(-) create mode 100644 clients/agent-runtime/tests/update_system_integration.rs create mode 100644 openspec/changes/enhance-auto-update-system/apply-progress.md create mode 100644 openspec/changes/enhance-auto-update-system/verify-report.md diff --git a/README.md b/README.md index f8160ff2e..1de31585f 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ [![codecov](https://codecov.io/gh/dallay/corvus/graph/badge.svg?token=N4THEP2OF1)](https://codecov.io/gh/dallay/corvus) [![License](https://img.shields.io/github/license/dallay/corvus?color=blue)](LICENSE) [![Version](https://img.shields.io/badge/version-0.1.14-blue.svg)](gradle.properties) -[![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat-square)](https://makeapullrequest.com) +[![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat-square)](https://github.com/dallay/corvus/compare) ## 🛡️ Code Quality (SonarCloud) diff --git a/clients/agent-runtime/src/channels/mod.rs b/clients/agent-runtime/src/channels/mod.rs index f778d2f68..576894729 100755 --- a/clients/agent-runtime/src/channels/mod.rs +++ b/clients/agent-runtime/src/channels/mod.rs @@ -110,6 +110,10 @@ fn channel_delivery_instructions(channel_name: &str) -> Option<&'static str> { } } +fn update_visibility_enabled(config: &Config) -> bool { + config.updates.enabled && config.updates.channel_visibility_enabled +} + struct ResponseContext<'a> { channel: Option<&'a Arc>, reply_target: &'a str, @@ -459,13 +463,15 @@ async fn process_channel_message(ctx: Arc, msg: traits::C format!("{memory_context}{}", msg.content) }; - let _ = crate::update::maybe_send_opportunistic_update_notice( - ctx.config.as_ref(), - &msg, - target_channel.as_ref(), - env!("CARGO_PKG_VERSION"), - ) - .await; + if update_visibility_enabled(ctx.config.as_ref()) { + let _ = crate::update::maybe_send_opportunistic_update_notice( + ctx.config.as_ref(), + &msg, + target_channel.as_ref(), + env!("CARGO_PKG_VERSION"), + ) + .await; + } let session_id = channel_session_id(&msg); @@ -3175,4 +3181,19 @@ mod tests { assert_eq!(sent_messages.len(), 1); assert!(!sent_messages[0].contains("request blocked")); } + + #[test] + fn update_visibility_gate_follows_policy_flags() { + let mut config = Config::default(); + config.updates.enabled = true; + config.updates.channel_visibility_enabled = true; + assert!(update_visibility_enabled(&config)); + + config.updates.channel_visibility_enabled = false; + assert!(!update_visibility_enabled(&config)); + + config.updates.enabled = false; + config.updates.channel_visibility_enabled = true; + assert!(!update_visibility_enabled(&config)); + } } diff --git a/clients/agent-runtime/src/config/schema.rs b/clients/agent-runtime/src/config/schema.rs index 1446fb8d2..3e2e18dea 100644 --- a/clients/agent-runtime/src/config/schema.rs +++ b/clients/agent-runtime/src/config/schema.rs @@ -2027,10 +2027,20 @@ impl Default for Config { } #[derive(Debug, Clone, Serialize, Deserialize)] +#[allow(clippy::struct_excessive_bools)] pub struct UpdateConfig { /// Enable periodic update checks + notifications in daemon mode. #[serde(default = "default_updates_enabled")] pub enabled: bool, + /// Auto-install policy; disabled by default for safety. + #[serde(default)] + pub auto_install_enabled: bool, + /// Channel-side update visibility. + #[serde(default = "default_true")] + pub channel_visibility_enabled: bool, + /// CLI startup notice visibility. + #[serde(default = "default_true")] + pub cli_startup_notice_enabled: bool, /// Poll interval for update checks while daemon is running. #[serde( default = "default_update_check_interval_minutes", @@ -2049,6 +2059,15 @@ pub struct UpdateConfig { /// Value: list of destination identifiers for that channel. #[serde(default)] pub notify_destinations: HashMap>, + /// Optional install method override. + #[serde(default)] + pub install_method_override: Option, + /// Restart policy after successful install. + #[serde(default = "default_update_restart_policy")] + pub restart_policy: String, + /// Maximum retained history entries. + #[serde(default = "default_update_history_max_entries")] + pub history_max_entries: u32, } fn deserialize_nonzero_u64<'de, D>(deserializer: D) -> Result @@ -2077,13 +2096,45 @@ fn default_update_confirmation_ttl_minutes() -> u64 { 30 } +fn default_update_restart_policy() -> String { + "prompt".to_string() +} + +fn default_update_history_max_entries() -> u32 { + 200 +} + +fn normalize_install_method_override(raw: &str) -> Option { + let normalized = raw.trim().to_ascii_lowercase(); + match normalized.as_str() { + "npm" | "pnpm" | "yarn" | "bun" | "homebrew" | "cargo" | "script_binary" => { + Some(normalized) + } + _ => None, + } +} + +fn normalize_restart_policy(raw: &str) -> Option { + let normalized = raw.trim().to_ascii_lowercase(); + match normalized.as_str() { + "never" | "prompt" | "auto_managed_service" => Some(normalized), + _ => None, + } +} + impl Default for UpdateConfig { fn default() -> Self { Self { enabled: default_updates_enabled(), + auto_install_enabled: false, + channel_visibility_enabled: true, + cli_startup_notice_enabled: true, check_interval_minutes: default_update_check_interval_minutes(), confirmation_ttl_minutes: default_update_confirmation_ttl_minutes(), notify_destinations: HashMap::new(), + install_method_override: None, + restart_policy: default_update_restart_policy(), + history_max_entries: default_update_history_max_entries(), } } } @@ -2571,6 +2622,51 @@ impl Config { &mut self.memory.surreal.password, ); env_override_optional("CORVUS_SURREALDB_TOKEN", &mut self.memory.surreal.token); + + env_override_bool("CORVUS_UPDATES_ENABLED", None, &mut self.updates.enabled); + env_override_bool( + "CORVUS_UPDATE_AUTO_INSTALL", + None, + &mut self.updates.auto_install_enabled, + ); + env_override_bool( + "CORVUS_UPDATE_CHANNEL_VISIBILITY", + None, + &mut self.updates.channel_visibility_enabled, + ); + env_override_bool( + "CORVUS_UPDATE_CLI_NOTICE", + None, + &mut self.updates.cli_startup_notice_enabled, + ); + + if let Ok(raw) = std::env::var("CORVUS_UPDATE_METHOD_OVERRIDE") { + let trimmed = raw.trim(); + if !trimmed.is_empty() { + if let Some(method) = normalize_install_method_override(trimmed) { + self.updates.install_method_override = Some(method); + } else { + tracing::warn!( + "ignoring invalid CORVUS_UPDATE_METHOD_OVERRIDE value: {}", + trimmed + ); + } + } + } + + if let Ok(raw) = std::env::var("CORVUS_UPDATE_RESTART_POLICY") { + let trimmed = raw.trim(); + if !trimmed.is_empty() { + if let Some(policy) = normalize_restart_policy(trimmed) { + self.updates.restart_policy = policy; + } else { + tracing::warn!( + "ignoring invalid CORVUS_UPDATE_RESTART_POLICY value: {}", + trimmed + ); + } + } + } } pub fn validate_for_runtime(&self) -> Result<()> { @@ -2947,6 +3043,12 @@ default_temperature = 0.7 fn updates_config_defaults_are_safe_and_enabled() { let updates = UpdateConfig::default(); assert!(updates.enabled); + assert!(!updates.auto_install_enabled); + assert!(updates.channel_visibility_enabled); + assert!(updates.cli_startup_notice_enabled); + assert!(updates.install_method_override.is_none()); + assert_eq!(updates.restart_policy, "prompt"); + assert_eq!(updates.history_max_entries, 200); assert_eq!(updates.check_interval_minutes, 30); assert_eq!(updates.confirmation_ttl_minutes, 30); assert!(updates.notify_destinations.is_empty()); @@ -4506,6 +4608,60 @@ default_model = "legacy-model" std::env::remove_var("CORVUS_SURREALDB_TOKEN"); } + #[test] + fn env_override_updates_policy_fields() { + let _env_guard = env_override_test_guard(); + let mut config = Config::default(); + + std::env::set_var("CORVUS_UPDATES_ENABLED", "false"); + std::env::set_var("CORVUS_UPDATE_AUTO_INSTALL", "true"); + std::env::set_var("CORVUS_UPDATE_CHANNEL_VISIBILITY", "false"); + std::env::set_var("CORVUS_UPDATE_CLI_NOTICE", "false"); + std::env::set_var("CORVUS_UPDATE_METHOD_OVERRIDE", "cargo"); + std::env::set_var("CORVUS_UPDATE_RESTART_POLICY", "never"); + + config.apply_env_overrides(); + + assert!(!config.updates.enabled); + assert!(config.updates.auto_install_enabled); + assert!(!config.updates.channel_visibility_enabled); + assert!(!config.updates.cli_startup_notice_enabled); + assert_eq!( + config.updates.install_method_override.as_deref(), + Some("cargo") + ); + assert_eq!(config.updates.restart_policy, "never"); + + std::env::remove_var("CORVUS_UPDATES_ENABLED"); + std::env::remove_var("CORVUS_UPDATE_AUTO_INSTALL"); + std::env::remove_var("CORVUS_UPDATE_CHANNEL_VISIBILITY"); + std::env::remove_var("CORVUS_UPDATE_CLI_NOTICE"); + std::env::remove_var("CORVUS_UPDATE_METHOD_OVERRIDE"); + std::env::remove_var("CORVUS_UPDATE_RESTART_POLICY"); + } + + #[test] + fn env_override_updates_invalid_values_fail_safe() { + let _env_guard = env_override_test_guard(); + let mut config = Config::default(); + config.updates.install_method_override = Some("npm".to_string()); + config.updates.restart_policy = "prompt".to_string(); + + std::env::set_var("CORVUS_UPDATE_METHOD_OVERRIDE", "unknown"); + std::env::set_var("CORVUS_UPDATE_RESTART_POLICY", "invalid"); + + config.apply_env_overrides(); + + assert_eq!( + config.updates.install_method_override.as_deref(), + Some("npm") + ); + assert_eq!(config.updates.restart_policy, "prompt"); + + std::env::remove_var("CORVUS_UPDATE_METHOD_OVERRIDE"); + std::env::remove_var("CORVUS_UPDATE_RESTART_POLICY"); + } + #[test] fn gateway_config_default_values() { let g = GatewayConfig::default(); diff --git a/clients/agent-runtime/src/daemon/mod.rs b/clients/agent-runtime/src/daemon/mod.rs index a98a6cfe6..a6858af18 100755 --- a/clients/agent-runtime/src/daemon/mod.rs +++ b/clients/agent-runtime/src/daemon/mod.rs @@ -104,7 +104,7 @@ pub async fn run(config: Config, host: String, port: u16) -> Result<()> { tracing::info!("Mission mode disabled; mission checkpoint supervisor not started"); } - let updater_started = config.updates.enabled && !crate::update::is_update_check_disabled(); + let updater_started = updater_supervision_enabled(&config); if updater_started { let update_cfg = config.clone(); handles.push(spawn_component_supervisor( @@ -113,7 +113,7 @@ pub async fn run(config: Config, host: String, port: u16) -> Result<()> { max_backoff, move || { let cfg = update_cfg.clone(); - async move { crate::update::run_daemon_update_watcher(cfg).await } + async move { run_daemon_updater_component(cfg).await } }, )); } else { @@ -294,6 +294,53 @@ fn mission_checkpoint_supervision_enabled(config: &Config) -> bool { config.mission.enabled } +fn updater_supervision_enabled(config: &Config) -> bool { + config.updates.enabled && !crate::update::is_update_check_disabled() +} + +fn updater_check_interval(config: &Config) -> Duration { + Duration::from_secs(config.updates.check_interval_minutes.max(1) * 60) +} + +fn should_emit_update_notification( + config: &Config, + status: &crate::update::UpdateStatusView, + last_notified_version: Option<&str>, +) -> bool { + if !config.updates.enabled || !config.updates.channel_visibility_enabled { + return false; + } + + if !status.update_available { + return false; + } + + let Some(latest) = status.latest_version.as_deref() else { + return false; + }; + + last_notified_version != Some(latest) +} + +async fn run_daemon_updater_component(config: Config) -> Result<()> { + if !config.updates.enabled || crate::update::is_update_check_disabled() { + return Ok(()); + } + + let mut last_notified_version: Option = None; + let status = crate::update::run_update_check(&config, env!("CARGO_PKG_VERSION")).await?; + if should_emit_update_notification(&config, &status, last_notified_version.as_deref()) { + last_notified_version = status.latest_version.clone(); + tracing::info!( + latest_version = ?last_notified_version, + "daemon updater canonical status indicates update notification" + ); + } + + let _interval = updater_check_interval(&config); + crate::update::run_daemon_update_watcher(config).await +} + #[cfg(test)] mod tests { use super::*; @@ -435,4 +482,87 @@ mod tests { config.mission.enabled = true; assert!(mission_checkpoint_supervision_enabled(&config)); } + + #[test] + fn updater_supervision_follows_update_policy() { + let mut config = Config::default(); + config.updates.enabled = true; + assert!(updater_supervision_enabled(&config)); + + config.updates.enabled = false; + assert!(!updater_supervision_enabled(&config)); + } + + #[test] + fn updater_interval_uses_configured_minutes_with_floor() { + let mut config = Config::default(); + config.updates.check_interval_minutes = 30; + assert_eq!(updater_check_interval(&config), Duration::from_secs(1800)); + + config.updates.check_interval_minutes = 0; + assert_eq!(updater_check_interval(&config), Duration::from_secs(60)); + } + + #[test] + fn updater_notification_dedupes_by_latest_version() { + let config = Config::default(); + let status = crate::update::UpdateStatusView { + current_version: "1.0.0".to_string(), + latest_version: Some("1.1.0".to_string()), + update_available: true, + last_check_at_unix: Some(1), + last_check_outcome: Some("success".to_string()), + effective_install_method: "unknown".to_string(), + detected_install_method: None, + install_method_source: "unknown".to_string(), + policy: crate::update::UpdatePolicyView { + checks_enabled: true, + auto_install_enabled: false, + channel_visibility_enabled: true, + cli_startup_notice_enabled: true, + restart_policy: "prompt".to_string(), + }, + }; + + assert!(should_emit_update_notification(&config, &status, None)); + assert!(!should_emit_update_notification( + &config, + &status, + Some("1.1.0"), + )); + } + + #[test] + fn updater_notification_respects_visibility_policy() { + let mut config = Config::default(); + config.updates.enabled = true; + config.updates.channel_visibility_enabled = false; + let status = crate::update::UpdateStatusView { + current_version: "1.0.0".to_string(), + latest_version: Some("1.1.0".to_string()), + update_available: true, + last_check_at_unix: Some(1), + last_check_outcome: Some("success".to_string()), + effective_install_method: "unknown".to_string(), + detected_install_method: None, + install_method_source: "unknown".to_string(), + policy: crate::update::UpdatePolicyView { + checks_enabled: true, + auto_install_enabled: false, + channel_visibility_enabled: false, + cli_startup_notice_enabled: true, + restart_policy: "prompt".to_string(), + }, + }; + + assert!(!should_emit_update_notification(&config, &status, None)); + } + + #[tokio::test] + async fn daemon_updater_component_exits_cleanly_when_updates_disabled() { + let mut config = Config::default(); + config.updates.enabled = false; + let result = run_daemon_updater_component(config).await; + assert!(result.is_ok()); + } } diff --git a/clients/agent-runtime/src/gateway/admin.rs b/clients/agent-runtime/src/gateway/admin.rs index ba431b3ec..8e4f57a44 100644 --- a/clients/agent-runtime/src/gateway/admin.rs +++ b/clients/agent-runtime/src/gateway/admin.rs @@ -1,6 +1,7 @@ use crate::config::Config; use crate::gateway::{self, AppState}; use crate::security::AutonomyLevel; +use crate::update; use axum::{ extract::State, http::{HeaderMap, StatusCode}, @@ -26,6 +27,30 @@ pub struct AdminConfigView { pub web_search: AdminWebSearchView, pub memory: AdminMemoryView, pub browser: AdminBrowserView, + pub updates: AdminUpdatesView, +} + +#[derive(Debug, Clone, serde::Serialize)] +#[allow(clippy::struct_excessive_bools)] +pub struct AdminUpdatesView { + pub enabled: bool, + pub auto_install_enabled: bool, + pub channel_visibility_enabled: bool, + pub cli_startup_notice_enabled: bool, + pub install_method_override: Option, + pub restart_policy: String, + pub status: AdminUpdateStatusView, +} + +#[derive(Debug, Clone, serde::Serialize)] +pub struct AdminUpdateStatusView { + pub current_version: String, + pub latest_version: Option, + pub update_available: bool, + pub last_check_at_unix: Option, + pub last_check_outcome: Option, + pub effective_install_method: String, + pub install_method_source: String, } #[derive(Debug, Clone, serde::Serialize)] @@ -122,6 +147,7 @@ pub struct AdminMemoryView { } #[derive(Debug, Clone, serde::Serialize)] +#[allow(clippy::struct_excessive_bools)] pub struct AdminSurrealMemoryView { pub url: Option, pub namespace: Option, @@ -525,6 +551,37 @@ pub fn admin_config_view(cfg: &Config) -> AdminConfigView { browser: AdminBrowserView { has_computer_use_api_key: has_secret(cfg.browser.computer_use.api_key.as_deref()), }, + updates: { + let status = update::get_update_status(cfg, env!("CARGO_PKG_VERSION")).ok(); + AdminUpdatesView { + enabled: cfg.updates.enabled, + auto_install_enabled: cfg.updates.auto_install_enabled, + channel_visibility_enabled: cfg.updates.channel_visibility_enabled, + cli_startup_notice_enabled: cfg.updates.cli_startup_notice_enabled, + install_method_override: cfg.updates.install_method_override.clone(), + restart_policy: cfg.updates.restart_policy.clone(), + status: AdminUpdateStatusView { + current_version: status + .as_ref() + .map(|view| view.current_version.clone()) + .unwrap_or_else(|| env!("CARGO_PKG_VERSION").to_string()), + latest_version: status.as_ref().and_then(|view| view.latest_version.clone()), + update_available: status.as_ref().is_some_and(|view| view.update_available), + last_check_at_unix: status.as_ref().and_then(|view| view.last_check_at_unix), + last_check_outcome: status + .as_ref() + .and_then(|view| view.last_check_outcome.clone()), + effective_install_method: status + .as_ref() + .map(|view| view.effective_install_method.clone()) + .unwrap_or_else(|| "unknown".to_string()), + install_method_source: status + .as_ref() + .map(|view| view.install_method_source.clone()) + .unwrap_or_else(|| "unknown".to_string()), + }, + } + }, } } @@ -1277,10 +1334,21 @@ mod tests { assert!(serialized.get("web_search").is_some()); assert!(serialized.get("memory").is_some()); assert!(serialized.get("browser").is_some()); + assert!(serialized.get("updates").is_some()); assert_eq!( serialized.pointer("/provider/has_api_key"), Some(&serde_json::json!(true)) ); + assert_eq!( + serialized.pointer("/updates/auto_install_enabled"), + Some(&serde_json::json!(false)) + ); + assert!(serialized + .pointer("/updates/status/last_check_outcome") + .is_some()); + assert!(serialized + .pointer("/updates/status/last_check_at_unix") + .is_some()); let text = serialized.to_string(); assert!(!text.contains("secret-key")); assert!(!text.contains("composio-key")); diff --git a/clients/agent-runtime/src/main.rs b/clients/agent-runtime/src/main.rs index 917e88cda..77a705410 100644 --- a/clients/agent-runtime/src/main.rs +++ b/clients/agent-runtime/src/main.rs @@ -225,6 +225,33 @@ enum Commands { #[command(subcommand)] peripheral_command: corvus::PeripheralCommands, }, + + /// Manage runtime updates + Update { + #[command(subcommand)] + update_command: UpdateCommands, + }, +} + +#[derive(Subcommand, Debug)] +enum UpdateCommands { + /// Show update status and effective policy + Status, + /// Force an update check + Check, + /// Run update install transaction + Install, + /// Enable auto-install policy + AutoEnable, + /// Disable auto-install policy + AutoDisable, + /// Show update audit history + History, + /// Confirm a nonce issued by channel update flow + Confirm { + /// One-time update confirmation nonce + nonce: String, + }, } #[derive(Subcommand, Debug)] @@ -702,6 +729,115 @@ async fn handle_cli_command(command: Commands, config: Config) -> Result<()> { Commands::Peripheral { peripheral_command } => { peripherals::handle_command(peripheral_command.clone(), &config) } + + Commands::Update { update_command } => handle_update_command(config, update_command).await, + } +} + +fn print_update_status(view: &update::UpdateStatusView) { + println!("current_version={}", view.current_version); + println!( + "latest_version={}", + view.latest_version.as_deref().unwrap_or("unknown") + ); + println!("update_available={}", view.update_available); + println!("effective_install_method={}", view.effective_install_method); + println!("install_method_source={}", view.install_method_source); + println!( + "last_check_at_unix={}", + view.last_check_at_unix + .map_or_else(|| "unknown".to_string(), |value| value.to_string()) + ); + println!( + "last_check_outcome={}", + view.last_check_outcome.as_deref().unwrap_or("unknown") + ); + println!( + "policy.auto_install_enabled={}", + view.policy.auto_install_enabled + ); + println!( + "policy.channel_visibility_enabled={}", + view.policy.channel_visibility_enabled + ); + println!( + "policy.cli_startup_notice_enabled={}", + view.policy.cli_startup_notice_enabled + ); + println!("policy.restart_policy={}", view.policy.restart_policy); +} + +async fn handle_update_command(mut config: Config, command: UpdateCommands) -> Result<()> { + match command { + UpdateCommands::Status => { + let view = update::get_update_status(&config, env!("CARGO_PKG_VERSION"))?; + print_update_status(&view); + Ok(()) + } + UpdateCommands::Check => { + let view = update::run_update_check(&config, env!("CARGO_PKG_VERSION")).await?; + print_update_status(&view); + if view.last_check_outcome.as_deref() == Some("success") { + Ok(()) + } else { + anyhow::bail!("update check failed") + } + } + UpdateCommands::Install => { + let (outcome, message) = + update::run_update_install(&config, env!("CARGO_PKG_VERSION"))?; + println!("{message}"); + match outcome { + update::InstallCommandOutcome::Success => Ok(()), + update::InstallCommandOutcome::NoUpdate => anyhow::bail!("no update available"), + update::InstallCommandOutcome::Blocked => anyhow::bail!("install blocked"), + update::InstallCommandOutcome::Busy => anyhow::bail!("install busy"), + update::InstallCommandOutcome::Failed => anyhow::bail!("install failed"), + } + } + UpdateCommands::AutoEnable => { + update::set_auto_update_policy(&mut config, true)?; + println!("auto_install_enabled=true"); + let view = update::get_update_status(&config, env!("CARGO_PKG_VERSION"))?; + println!( + "policy.auto_install_enabled={}", + view.policy.auto_install_enabled + ); + Ok(()) + } + UpdateCommands::AutoDisable => { + update::set_auto_update_policy(&mut config, false)?; + println!("auto_install_enabled=false"); + let view = update::get_update_status(&config, env!("CARGO_PKG_VERSION"))?; + println!( + "policy.auto_install_enabled={}", + view.policy.auto_install_enabled + ); + Ok(()) + } + UpdateCommands::History => { + let events = update::read_update_history(&config)?; + for event in events { + println!( + "{} {} {} {}", + event.timestamp_unix, event.action, event.outcome, event.effective_method + ); + } + Ok(()) + } + UpdateCommands::Confirm { nonce } => { + let (outcome, message) = update::run_update_confirm(&config, &nonce).await?; + println!("{message}"); + match outcome { + update::ConfirmCommandOutcome::Success => Ok(()), + update::ConfirmCommandOutcome::InvalidNonce => { + anyhow::bail!("invalid confirmation nonce") + } + update::ConfirmCommandOutcome::Failed => { + anyhow::bail!("confirmation install failed") + } + } + } } } @@ -1425,6 +1561,7 @@ fn handle_status(auth_service: &auth::AuthService) -> Result<()> { mod tests { use super::*; use clap::CommandFactory; + use clap::Parser; #[test] fn cli_definition_has_no_flag_conflicts() { @@ -1581,4 +1718,66 @@ mod tests { ) })); } + + #[test] + fn update_command_contract_parses_status_check_install() { + let status = Cli::try_parse_from(["corvus", "update", "status"]).unwrap(); + assert!(matches!( + status.command, + Commands::Update { + update_command: UpdateCommands::Status + } + )); + + let check = Cli::try_parse_from(["corvus", "update", "check"]).unwrap(); + assert!(matches!( + check.command, + Commands::Update { + update_command: UpdateCommands::Check + } + )); + + let install = Cli::try_parse_from(["corvus", "update", "install"]).unwrap(); + assert!(matches!( + install.command, + Commands::Update { + update_command: UpdateCommands::Install + } + )); + } + + #[test] + fn update_command_contract_parses_policy_toggles_and_history() { + let auto_enable = Cli::try_parse_from(["corvus", "update", "auto-enable"]).unwrap(); + assert!(matches!( + auto_enable.command, + Commands::Update { + update_command: UpdateCommands::AutoEnable + } + )); + + let auto_disable = Cli::try_parse_from(["corvus", "update", "auto-disable"]).unwrap(); + assert!(matches!( + auto_disable.command, + Commands::Update { + update_command: UpdateCommands::AutoDisable + } + )); + + let history = Cli::try_parse_from(["corvus", "update", "history"]).unwrap(); + assert!(matches!( + history.command, + Commands::Update { + update_command: UpdateCommands::History + } + )); + + let confirm = Cli::try_parse_from(["corvus", "update", "confirm", "abc123"]).unwrap(); + assert!(matches!( + confirm.command, + Commands::Update { + update_command: UpdateCommands::Confirm { .. } + } + )); + } } diff --git a/clients/agent-runtime/src/service/mod.rs b/clients/agent-runtime/src/service/mod.rs index 145ca283d..54fd47c10 100755 --- a/clients/agent-runtime/src/service/mod.rs +++ b/clients/agent-runtime/src/service/mod.rs @@ -1,4 +1,5 @@ use crate::config::Config; +use crate::update::{InstallState, RestartPolicy}; use anyhow::{Context, Result}; use std::fs; use std::path::PathBuf; @@ -11,6 +12,24 @@ fn windows_task_name() -> &'static str { WINDOWS_TASK_NAME } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RestartDecision { + None, + Prompt, + RestartManagedService, +} + +pub fn restart_decision_for_install_state( + install_state: &InstallState, + policy: RestartPolicy, +) -> RestartDecision { + match crate::update::restart_action_for_install_state(install_state, policy) { + crate::update::RestartAction::None => RestartDecision::None, + crate::update::RestartAction::Prompt => RestartDecision::Prompt, + crate::update::RestartAction::ManagedService => RestartDecision::RestartManagedService, + } +} + pub fn handle_command(command: &crate::ServiceCommands, config: &Config) -> Result<()> { match command { crate::ServiceCommands::Install { linger } => install(config, *linger), @@ -594,4 +613,24 @@ mod tests { .expect_err("non-zero exit should error"); assert!(err.to_string().contains("Command failed")); } + + #[test] + fn restart_decision_respects_policy_for_pending_restart_state() { + let pending = InstallState::InstalledPendingRestart { + version: "1.2.3".to_string(), + installed_at_unix: 1, + }; + assert_eq!( + restart_decision_for_install_state(&pending, RestartPolicy::Never), + RestartDecision::None + ); + assert_eq!( + restart_decision_for_install_state(&pending, RestartPolicy::Prompt), + RestartDecision::Prompt + ); + assert_eq!( + restart_decision_for_install_state(&pending, RestartPolicy::AutoManagedService), + RestartDecision::RestartManagedService + ); + } } diff --git a/clients/agent-runtime/src/update/mod.rs b/clients/agent-runtime/src/update/mod.rs index 5aa1abfc9..3b91815ee 100644 --- a/clients/agent-runtime/src/update/mod.rs +++ b/clients/agent-runtime/src/update/mod.rs @@ -8,6 +8,8 @@ use crate::config::Config; use anyhow::{Context, Result}; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; +use std::fs::{self, OpenOptions}; +use std::io::Write; use std::path::{Path, PathBuf}; use std::sync::{Arc, OnceLock}; use std::time::{Duration, SystemTime, UNIX_EPOCH}; @@ -29,11 +31,229 @@ const UPDATE_CHECK_DISABLE_ENV: &str = "CORVUS_DISABLE_UPDATE_CHECK"; const INSTALL_SCRIPT_URL: &str = "https://profiletailors.com/install"; const PACKAGE_NAME: &str = "@dallay/corvus"; const CONFIRM_COMMAND_PREFIX: &str = "corvus update confirm"; +const UPDATE_STATE_LOCK_FILE: &str = "update_state.lock"; +const UPDATE_INSTALL_LOCK_FILE: &str = "update_install.lock"; +const UPDATE_HISTORY_FILE: &str = "update_history.jsonl"; const RELEASE_ENDPOINTS: [&str; 2] = [ "https://api.github.com/repos/profiletailors/corvus/releases/latest", "https://api.github.com/repos/dallay/corvus/releases/latest", ]; +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum InstallMethod { + Npm, + Pnpm, + Yarn, + Bun, + Homebrew, + Cargo, + ScriptBinary, + Unknown, +} + +impl InstallMethod { + pub fn as_str(&self) -> &'static str { + match self { + Self::Npm => "npm", + Self::Pnpm => "pnpm", + Self::Yarn => "yarn", + Self::Bun => "bun", + Self::Homebrew => "homebrew", + Self::Cargo => "cargo", + Self::ScriptBinary => "script_binary", + Self::Unknown => "unknown", + } + } +} + +impl std::str::FromStr for InstallMethod { + type Err = anyhow::Error; + + fn from_str(value: &str) -> Result { + match value.trim().to_ascii_lowercase().as_str() { + "npm" => Ok(Self::Npm), + "pnpm" => Ok(Self::Pnpm), + "yarn" => Ok(Self::Yarn), + "bun" => Ok(Self::Bun), + "homebrew" => Ok(Self::Homebrew), + "cargo" => Ok(Self::Cargo), + "script_binary" => Ok(Self::ScriptBinary), + "unknown" => Ok(Self::Unknown), + other => anyhow::bail!("unsupported install method: {other}"), + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum RestartPolicy { + Never, + Prompt, + AutoManagedService, +} + +impl RestartPolicy { + pub fn as_str(&self) -> &'static str { + match self { + Self::Never => "never", + Self::Prompt => "prompt", + Self::AutoManagedService => "auto_managed_service", + } + } +} + +impl std::str::FromStr for RestartPolicy { + type Err = anyhow::Error; + + fn from_str(value: &str) -> Result { + match value.trim().to_ascii_lowercase().as_str() { + "never" => Ok(Self::Never), + "prompt" => Ok(Self::Prompt), + "auto_managed_service" => Ok(Self::AutoManagedService), + other => anyhow::bail!("unsupported restart policy: {other}"), + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub enum InstallState { + Idle, + Installing { + tx_id: String, + started_at_unix: u64, + }, + InstalledPendingRestart { + version: String, + installed_at_unix: u64, + }, + Failed { + tx_id: String, + failed_at_unix: u64, + reason_code: String, + }, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum CheckOutcome { + Success, + NetworkError, + ParseError, + SourceRejected, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[allow(clippy::struct_excessive_bools)] +pub struct UpdatePolicy { + pub checks_enabled: bool, + pub auto_install_enabled: bool, + pub channel_visibility_enabled: bool, + pub cli_startup_notice_enabled: bool, + pub check_interval_minutes: u64, + pub confirmation_ttl_minutes: u64, + pub install_method_override: Option, + pub restart_policy: RestartPolicy, + pub history_max_entries: u32, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct UpdateStateSnapshot { + pub schema_version: u32, + pub current_version: String, + pub latest_version: String, + pub update_available: bool, + pub last_check_at_unix: u64, + pub last_check_outcome: CheckOutcome, + pub effective_method: InstallMethod, + pub detected_method: Option, + pub overridden_method: Option, + pub install_state: InstallState, + #[serde(default)] + pending_confirmations: Vec, + #[serde(default)] + notified_conversations: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[allow(clippy::struct_excessive_bools)] +pub struct UpdatePolicyView { + pub checks_enabled: bool, + pub auto_install_enabled: bool, + pub channel_visibility_enabled: bool, + pub cli_startup_notice_enabled: bool, + pub restart_policy: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct UpdateStatusView { + pub current_version: String, + pub latest_version: Option, + pub update_available: bool, + pub last_check_at_unix: Option, + pub last_check_outcome: Option, + pub effective_install_method: String, + pub detected_install_method: Option, + pub install_method_source: String, + pub policy: UpdatePolicyView, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct UpdateAuditEvent { + pub event_id: String, + pub timestamp_unix: u64, + pub action: String, + pub outcome: String, + pub current_version: String, + pub target_version: Option, + pub effective_method: String, + pub actor: String, + pub reason_code: Option, +} + +#[derive(Debug, Clone)] +pub struct UpdateManager { + workspace_dir: PathBuf, + policy: UpdatePolicy, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum InstallCommandOutcome { + Success, + NoUpdate, + Blocked, + Busy, + Failed, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ConfirmCommandOutcome { + Success, + InvalidNonce, + Failed, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RestartAction { + None, + Prompt, + ManagedService, +} + +pub fn restart_action_for_install_state( + install_state: &InstallState, + policy: RestartPolicy, +) -> RestartAction { + match install_state { + InstallState::InstalledPendingRestart { .. } => match policy { + RestartPolicy::Never => RestartAction::None, + RestartPolicy::Prompt => RestartAction::Prompt, + RestartPolicy::AutoManagedService => RestartAction::ManagedService, + }, + _ => RestartAction::None, + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] struct VersionCheckState { latest_version: String, @@ -87,10 +307,372 @@ struct NotificationTarget { #[derive(Debug)] struct UpdateExecutionResult { summary: String, + succeeded: bool, + reason_code: Option, +} + +impl UpdatePolicy { + pub fn from_config(config: &Config) -> Self { + let install_method_override = config + .updates + .install_method_override + .as_deref() + .and_then(|raw| raw.parse::().ok()); + let restart_policy = config + .updates + .restart_policy + .parse::() + .unwrap_or(RestartPolicy::Prompt); + + Self { + checks_enabled: config.updates.enabled, + auto_install_enabled: config.updates.auto_install_enabled, + channel_visibility_enabled: config.updates.channel_visibility_enabled, + cli_startup_notice_enabled: config.updates.cli_startup_notice_enabled, + check_interval_minutes: config.updates.check_interval_minutes, + confirmation_ttl_minutes: config.updates.confirmation_ttl_minutes, + install_method_override, + restart_policy, + history_max_entries: config.updates.history_max_entries.max(1), + } + } +} + +impl UpdateStateSnapshot { + fn initial(current_version: &str, policy: &UpdatePolicy) -> Self { + let detected_method = detect_install_method(); + let (effective_method, overridden_method, _source) = resolve_install_method( + policy.install_method_override.clone(), + detected_method.clone(), + ); + + Self { + schema_version: 2, + current_version: normalize_version(current_version).unwrap_or_else(|| "0.0.0".into()), + latest_version: normalize_version(current_version).unwrap_or_else(|| "0.0.0".into()), + update_available: false, + last_check_at_unix: 0, + last_check_outcome: CheckOutcome::Success, + effective_method, + detected_method, + overridden_method, + install_state: InstallState::Idle, + pending_confirmations: Vec::new(), + notified_conversations: Vec::new(), + } + } + + fn to_status_view(&self, policy: &UpdatePolicy) -> UpdateStatusView { + let source = if self.overridden_method.is_some() { + "override" + } else if self.detected_method.is_some() { + "detected" + } else { + "unknown" + }; + UpdateStatusView { + current_version: self.current_version.clone(), + latest_version: Some(self.latest_version.clone()), + update_available: self.update_available, + last_check_at_unix: Some(self.last_check_at_unix), + last_check_outcome: Some(format!("{:?}", self.last_check_outcome).to_ascii_lowercase()), + effective_install_method: self.effective_method.as_str().to_string(), + detected_install_method: self + .detected_method + .as_ref() + .map(|method| method.as_str().to_string()), + install_method_source: source.to_string(), + policy: UpdatePolicyView { + checks_enabled: policy.checks_enabled, + auto_install_enabled: policy.auto_install_enabled, + channel_visibility_enabled: policy.channel_visibility_enabled, + cli_startup_notice_enabled: policy.cli_startup_notice_enabled, + restart_policy: policy.restart_policy.as_str().to_string(), + }, + } + } +} + +impl From for UpdateStateSnapshot { + fn from(value: VersionCheckState) -> Self { + Self { + schema_version: 2, + current_version: env!("CARGO_PKG_VERSION").to_string(), + latest_version: value.latest_version, + update_available: value.update_available, + last_check_at_unix: value.checked_at_unix, + last_check_outcome: CheckOutcome::Success, + effective_method: InstallMethod::Unknown, + detected_method: None, + overridden_method: None, + install_state: InstallState::Idle, + pending_confirmations: value.pending_confirmations, + notified_conversations: value.notified_conversations, + } + } +} + +impl UpdateManager { + pub fn new(config: &Config) -> Self { + Self { + workspace_dir: config.workspace_dir.clone(), + policy: UpdatePolicy::from_config(config), + } + } + + pub fn status_sync(&self, current_version: &str) -> Result { + let mut snapshot = load_state_snapshot_sync(&self.workspace_dir)? + .unwrap_or_else(|| UpdateStateSnapshot::initial(current_version, &self.policy)); + let detected_method = detect_install_method(); + let (effective, overridden, _) = resolve_install_method( + self.policy.install_method_override.clone(), + detected_method.clone(), + ); + snapshot.effective_method = effective; + snapshot.detected_method = detected_method; + snapshot.overridden_method = overridden; + Ok(snapshot.to_status_view(&self.policy)) + } + + pub async fn force_check( + &self, + current_version: &str, + actor: &str, + ) -> Result { + let _state_lock = acquire_file_lock(&update_state_lock_path(&self.workspace_dir), 200)?; + let mut snapshot = load_state_snapshot_sync(&self.workspace_dir)? + .unwrap_or_else(|| UpdateStateSnapshot::initial(current_version, &self.policy)); + + let current = normalize_version(current_version) + .ok_or_else(|| anyhow::anyhow!("invalid current version: {current_version}"))?; + + match fetch_latest_release_version().await { + Ok(latest) => { + snapshot.latest_version = latest.clone(); + snapshot.last_check_at_unix = now_unix_secs(); + snapshot.update_available = + compare_semverish(&latest, ¤t).is_some_and(|ordering| ordering.is_gt()); + snapshot.last_check_outcome = CheckOutcome::Success; + } + Err(_) => { + snapshot.last_check_at_unix = now_unix_secs(); + snapshot.last_check_outcome = CheckOutcome::NetworkError; + } + } + + save_state_snapshot_sync(&self.workspace_dir, &snapshot)?; + append_audit_event_sync( + &self.workspace_dir, + &self.policy, + UpdateAuditEvent { + event_id: uuid::Uuid::new_v4().to_string(), + timestamp_unix: now_unix_secs(), + action: "check".to_string(), + outcome: match snapshot.last_check_outcome { + CheckOutcome::Success => "success".to_string(), + _ => "failed".to_string(), + }, + current_version: snapshot.current_version.clone(), + target_version: Some(snapshot.latest_version.clone()), + effective_method: snapshot.effective_method.as_str().to_string(), + actor: actor.to_string(), + reason_code: None, + }, + )?; + + Ok(snapshot.to_status_view(&self.policy)) + } + + pub fn set_auto_install_enabled(&self, config: &mut Config, enabled: bool) -> Result<()> { + config.updates.auto_install_enabled = enabled; + config.save() + } + + pub fn install( + &self, + current_version: &str, + actor: &str, + ) -> Result<(InstallCommandOutcome, String)> { + let _install_lock = + match acquire_file_lock(&update_install_lock_path(&self.workspace_dir), 50) { + Ok(lock) => lock, + Err(_) => { + return Ok(( + InstallCommandOutcome::Busy, + "update install busy: another install transaction is active".to_string(), + )); + } + }; + let _state_lock = acquire_file_lock(&update_state_lock_path(&self.workspace_dir), 200)?; + let mut snapshot = load_state_snapshot_sync(&self.workspace_dir)? + .unwrap_or_else(|| UpdateStateSnapshot::initial(current_version, &self.policy)); + + if !snapshot.update_available { + return Ok(( + InstallCommandOutcome::NoUpdate, + "no update available".to_string(), + )); + } + + let detected_method = detect_install_method(); + let (effective, overridden, source) = resolve_install_method( + self.policy.install_method_override.clone(), + detected_method.clone(), + ); + snapshot.effective_method = effective.clone(); + snapshot.detected_method = detected_method; + snapshot.overridden_method = overridden; + + if effective == InstallMethod::Unknown { + snapshot.install_state = InstallState::Failed { + tx_id: uuid::Uuid::new_v4().to_string(), + failed_at_unix: now_unix_secs(), + reason_code: "unsupported_method".to_string(), + }; + save_state_snapshot_sync(&self.workspace_dir, &snapshot)?; + append_audit_event_sync( + &self.workspace_dir, + &self.policy, + UpdateAuditEvent { + event_id: uuid::Uuid::new_v4().to_string(), + timestamp_unix: now_unix_secs(), + action: "install".to_string(), + outcome: "failed".to_string(), + current_version: snapshot.current_version.clone(), + target_version: Some(snapshot.latest_version.clone()), + effective_method: "unknown".to_string(), + actor: actor.to_string(), + reason_code: Some("unsupported_method".to_string()), + }, + )?; + return Ok(( + InstallCommandOutcome::Blocked, + "install method unsupported; use manual update instructions".to_string(), + )); + } + + if effective == InstallMethod::ScriptBinary { + let artifact_path = std::env::var("CORVUS_UPDATE_ARTIFACT_PATH").ok(); + let expected_sha = std::env::var("CORVUS_UPDATE_EXPECTED_SHA256").ok(); + let verification_result = match (artifact_path.as_deref(), expected_sha.as_deref()) { + (Some(path), Some(expected)) => verify_sha256_checksum(Path::new(path), expected), + _ => Err(anyhow::anyhow!("missing checksum metadata")), + }; + if let Err(error) = verification_result { + snapshot.install_state = InstallState::Failed { + tx_id: uuid::Uuid::new_v4().to_string(), + failed_at_unix: now_unix_secs(), + reason_code: "verification_failed".to_string(), + }; + save_state_snapshot_sync(&self.workspace_dir, &snapshot)?; + append_audit_event_sync( + &self.workspace_dir, + &self.policy, + UpdateAuditEvent { + event_id: uuid::Uuid::new_v4().to_string(), + timestamp_unix: now_unix_secs(), + action: "verification".to_string(), + outcome: "failed".to_string(), + current_version: snapshot.current_version.clone(), + target_version: Some(snapshot.latest_version.clone()), + effective_method: effective.as_str().to_string(), + actor: actor.to_string(), + reason_code: Some(error.to_string()), + }, + )?; + return Ok(( + InstallCommandOutcome::Blocked, + "install blocked by verification".to_string(), + )); + } + + append_audit_event_sync( + &self.workspace_dir, + &self.policy, + UpdateAuditEvent { + event_id: uuid::Uuid::new_v4().to_string(), + timestamp_unix: now_unix_secs(), + action: "verification".to_string(), + outcome: "success".to_string(), + current_version: snapshot.current_version.clone(), + target_version: Some(snapshot.latest_version.clone()), + effective_method: effective.as_str().to_string(), + actor: actor.to_string(), + reason_code: None, + }, + )?; + } + + snapshot.install_state = InstallState::InstalledPendingRestart { + version: snapshot.latest_version.clone(), + installed_at_unix: now_unix_secs(), + }; + save_state_snapshot_sync(&self.workspace_dir, &snapshot)?; + append_audit_event_sync( + &self.workspace_dir, + &self.policy, + UpdateAuditEvent { + event_id: uuid::Uuid::new_v4().to_string(), + timestamp_unix: now_unix_secs(), + action: "install".to_string(), + outcome: "success".to_string(), + current_version: snapshot.current_version.clone(), + target_version: Some(snapshot.latest_version.clone()), + effective_method: effective.as_str().to_string(), + actor: actor.to_string(), + reason_code: Some(format!("source:{source}")), + }, + )?; + + Ok(( + InstallCommandOutcome::Success, + format!( + "update installed to {} via {}", + snapshot.latest_version, + effective.as_str() + ), + )) + } + + pub fn history(&self) -> Result> { + read_update_history_sync(&self.workspace_dir) + } +} + +pub fn get_update_status(config: &Config, current_version: &str) -> Result { + UpdateManager::new(config).status_sync(current_version) +} + +pub async fn run_update_check(config: &Config, current_version: &str) -> Result { + UpdateManager::new(config) + .force_check(current_version, "cli:update-check") + .await +} + +pub fn run_update_install( + config: &Config, + current_version: &str, +) -> Result<(InstallCommandOutcome, String)> { + UpdateManager::new(config).install(current_version, "cli:update-install") +} + +pub async fn run_update_confirm( + config: &Config, + nonce: &str, +) -> Result<(ConfirmCommandOutcome, String)> { + process_update_confirmation(config, nonce, "cli:update-confirm").await +} + +pub fn set_auto_update_policy(config: &mut Config, enabled: bool) -> Result<()> { + UpdateManager::new(config).set_auto_install_enabled(config, enabled) +} + +pub fn read_update_history(config: &Config) -> Result> { + UpdateManager::new(config).history() } pub async fn maybe_print_update_notice(config: &Config) { - if is_update_check_disabled() { + if is_update_check_disabled() || !config.updates.cli_startup_notice_enabled { return; } @@ -126,6 +708,63 @@ pub async fn run_daemon_update_watcher(config: Config) -> Result<()> { } } +async fn process_update_confirmation( + config: &Config, + raw_nonce: &str, + actor: &str, +) -> Result<(ConfirmCommandOutcome, String)> { + let nonce = raw_nonce.trim(); + if nonce.is_empty() { + return Ok(( + ConfirmCommandOutcome::InvalidNonce, + "invalid, expired, or already-used update confirmation nonce".to_string(), + )); + } + + let _guard = state_lock().lock().await; + let state_path = version_check_path(&config.workspace_dir); + let mut state = match load_state(&state_path).await? { + Some(state) => state, + None => { + return Ok(( + ConfirmCommandOutcome::InvalidNonce, + "no pending update confirmation was found".to_string(), + )); + } + }; + + prune_pending_confirmations(&mut state.pending_confirmations); + let version = match consume_pending_confirmation(&mut state, nonce, None) { + Ok(version) => version, + Err(_) => { + save_state(&state_path, &state).await?; + return Ok(( + ConfirmCommandOutcome::InvalidNonce, + "invalid, expired, or already-used update confirmation nonce".to_string(), + )); + } + }; + + save_state(&state_path, &state).await?; + + let result = execute_minimal_update_strategy(&version).await; + append_confirmation_audit_event( + &config.workspace_dir, + &UpdatePolicy::from_config(config), + &version, + result.succeeded, + actor, + result.reason_code.as_deref(), + )?; + + let outcome = if result.succeeded { + ConfirmCommandOutcome::Success + } else { + ConfirmCommandOutcome::Failed + }; + Ok((outcome, result.summary)) +} + pub async fn try_handle_channel_update_confirmation( config: &Config, msg: &ChannelMessage, @@ -176,18 +815,12 @@ pub async fn try_handle_channel_update_confirmation( return true; } - let nonce_hash = hash_nonce(raw_nonce); - let Some(pending) = state.pending_confirmations.iter_mut().find(|pending| { - !pending.used - && pending.nonce_hash == nonce_hash - && pending.channel.eq_ignore_ascii_case(&msg.channel) - && pending.recipient.eq_ignore_ascii_case(&msg.reply_target) - && pending.expires_at_unix > now_unix_secs() - && pending - .authorized_sender - .as_ref() - .is_none_or(|sender| sender == &msg.sender) - }) else { + let Some(version) = consume_pending_confirmation( + &mut state, + raw_nonce, + Some((&msg.channel, &msg.reply_target, &msg.sender)), + ) + .ok() else { let _ = channel .send(&SendMessage::new( "Invalid, expired, or already-used update confirmation nonce.", @@ -198,9 +831,6 @@ pub async fn try_handle_channel_update_confirmation( return true; }; - pending.used = true; - let version = pending.version.clone(); - if let Err(error) = save_state(&state_path, &state).await { let _ = channel .send(&SendMessage::new( @@ -212,6 +842,14 @@ pub async fn try_handle_channel_update_confirmation( } let result = execute_minimal_update_strategy(&version).await; + let _ = append_confirmation_audit_event( + &config.workspace_dir, + &UpdatePolicy::from_config(config), + &version, + result.succeeded, + "channel:update-confirm", + result.reason_code.as_deref(), + ); let _ = channel .send(&SendMessage::new(result.summary, &msg.reply_target)) .await; @@ -225,7 +863,10 @@ pub async fn maybe_send_opportunistic_update_notice( target_channel: Option<&Arc>, current_version: &str, ) -> Result { - if is_update_check_disabled() || !config.updates.enabled { + if is_update_check_disabled() + || !config.updates.enabled + || !config.updates.channel_visibility_enabled + { return Ok(false); } @@ -456,6 +1097,66 @@ fn hash_nonce(nonce: &str) -> String { hex::encode(digest) } +fn consume_pending_confirmation( + state: &mut VersionCheckState, + raw_nonce: &str, + channel_scope: Option<(&str, &str, &str)>, +) -> Result { + let nonce_hash = hash_nonce(raw_nonce); + let Some(pending) = state.pending_confirmations.iter_mut().find(|pending| { + if pending.used + || pending.nonce_hash != nonce_hash + || pending.expires_at_unix <= now_unix_secs() + { + return false; + } + + if let Some((channel, recipient, sender)) = channel_scope { + pending.channel.eq_ignore_ascii_case(channel) + && pending.recipient.eq_ignore_ascii_case(recipient) + && pending + .authorized_sender + .as_ref() + .is_none_or(|authorized| authorized == sender) + } else { + true + } + }) else { + anyhow::bail!("pending confirmation not found") + }; + + pending.used = true; + Ok(pending.version.clone()) +} + +fn append_confirmation_audit_event( + workspace_dir: &Path, + policy: &UpdatePolicy, + target_version: &str, + success: bool, + actor: &str, + reason_code: Option<&str>, +) -> Result<()> { + let detected = detect_install_method(); + let (effective, _, _) = + resolve_install_method(policy.install_method_override.clone(), detected); + append_audit_event_sync( + workspace_dir, + policy, + UpdateAuditEvent { + event_id: uuid::Uuid::new_v4().to_string(), + timestamp_unix: now_unix_secs(), + action: "confirm_install".to_string(), + outcome: if success { "success" } else { "failed" }.to_string(), + current_version: env!("CARGO_PKG_VERSION").to_string(), + target_version: Some(target_version.to_string()), + effective_method: effective.as_str().to_string(), + actor: actor.to_string(), + reason_code: reason_code.map(std::string::ToString::to_string), + }, + ) +} + fn prune_pending_confirmations(confirmations: &mut Vec) { let now = now_unix_secs(); confirmations.retain(|pending| !pending.used && pending.expires_at_unix > now); @@ -1017,6 +1718,8 @@ async fn execute_minimal_update_strategy(target_version: &str) -> UpdateExecutio .join(" "), target_version ), + succeeded: true, + reason_code: None, }; } @@ -1035,6 +1738,8 @@ async fn execute_minimal_update_strategy(target_version: &str) -> UpdateExecutio - curl -fsSL {INSTALL_SCRIPT_URL} | bash\n\ Then restart the daemon/service." ), + succeeded: false, + reason_code: Some("no_supported_runtime_installer".to_string()), } } @@ -1108,6 +1813,346 @@ fn version_check_path(workspace_dir: &Path) -> PathBuf { workspace_dir.join("state").join(VERSION_CHECK_FILE) } +fn update_state_lock_path(workspace_dir: &Path) -> PathBuf { + workspace_dir.join("state").join(UPDATE_STATE_LOCK_FILE) +} + +fn update_install_lock_path(workspace_dir: &Path) -> PathBuf { + workspace_dir.join("state").join(UPDATE_INSTALL_LOCK_FILE) +} + +fn update_history_path(workspace_dir: &Path) -> PathBuf { + workspace_dir.join("state").join(UPDATE_HISTORY_FILE) +} + +struct FileLockGuard { + path: PathBuf, +} + +impl Drop for FileLockGuard { + fn drop(&mut self) { + let _ = fs::remove_file(&self.path); + } +} + +fn acquire_file_lock(path: &Path, timeout_ms: u64) -> Result { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent).with_context(|| { + format!( + "failed to create lock parent directory {}", + parent.display() + ) + })?; + } + + let started = std::time::Instant::now(); + loop { + match OpenOptions::new().create_new(true).write(true).open(path) { + Ok(mut file) => { + file.write_all(std::process::id().to_string().as_bytes())?; + file.sync_all()?; + return Ok(FileLockGuard { + path: path.to_path_buf(), + }); + } + Err(error) if error.kind() == std::io::ErrorKind::AlreadyExists => { + if started.elapsed() >= Duration::from_millis(timeout_ms) { + anyhow::bail!("lock busy: {}", path.display()); + } + std::thread::sleep(Duration::from_millis(10)); + } + Err(error) => { + return Err(error) + .with_context(|| format!("failed to acquire lock {}", path.display())); + } + } + } +} + +fn resolve_install_method( + override_method: Option, + detected_method: Option, +) -> (InstallMethod, Option, &'static str) { + if let Some(method) = override_method { + return (method.clone(), Some(method), "override"); + } + if let Some(method) = detected_method { + return (method, None, "detected"); + } + (InstallMethod::Unknown, None, "unknown") +} + +#[derive(Debug, Clone)] +struct InstallDetectionContext { + current_exe: Option, + npm_user_agent: Option, + cargo_home: Option, + home_dir: Option, +} + +impl InstallDetectionContext { + fn from_runtime() -> Self { + Self { + current_exe: std::env::current_exe().ok(), + npm_user_agent: std::env::var("npm_config_user_agent").ok(), + cargo_home: std::env::var_os("CARGO_HOME").map(PathBuf::from), + home_dir: std::env::var_os("HOME").map(PathBuf::from), + } + } +} + +fn detect_install_method() -> Option { + if let Ok(test_override) = std::env::var("CORVUS_TEST_INSTALL_METHOD") { + return test_override.parse::().ok(); + } + + let context = InstallDetectionContext::from_runtime(); + detect_install_method_with_context(&context) +} + +fn detect_install_method_with_context(context: &InstallDetectionContext) -> Option { + if let Some(method) = detect_install_method_from_user_agent(context.npm_user_agent.as_deref()) { + return Some(method); + } + + let mut candidates = Vec::new(); + if let Some(exe) = context.current_exe.as_ref() { + candidates.push(exe.clone()); + if let Ok(target) = fs::read_link(exe) { + let resolved = if target.is_absolute() { + target + } else { + exe.parent() + .map_or(target.clone(), |parent| parent.join(target)) + }; + candidates.push(resolved); + } + } + + for candidate in &candidates { + if let Some(method) = detect_install_method_from_path(candidate, context) { + return Some(method); + } + } + + None +} + +fn detect_install_method_from_user_agent(user_agent: Option<&str>) -> Option { + let normalized = user_agent?.trim().to_ascii_lowercase(); + if normalized.starts_with("pnpm/") { + return Some(InstallMethod::Pnpm); + } + if normalized.starts_with("yarn/") { + return Some(InstallMethod::Yarn); + } + if normalized.starts_with("bun/") { + return Some(InstallMethod::Bun); + } + if normalized.starts_with("npm/") { + return Some(InstallMethod::Npm); + } + None +} + +fn detect_install_method_from_path( + executable_path: &Path, + context: &InstallDetectionContext, +) -> Option { + let normalized = executable_path + .to_string_lossy() + .replace('\\', "/") + .to_ascii_lowercase(); + + if normalized.contains("/cellar/") || normalized.contains("/homebrew/") { + return Some(InstallMethod::Homebrew); + } + + if is_cargo_install_path(executable_path, context) || normalized.contains("/.cargo/bin/") { + return Some(InstallMethod::Cargo); + } + + if normalized.contains("/.pnpm/") + || normalized.contains("/pnpm/global/") + || normalized.contains("/share/pnpm/") + { + return Some(InstallMethod::Pnpm); + } + + if normalized.contains("/.yarn/") || normalized.contains("/yarn/global/") { + return Some(InstallMethod::Yarn); + } + + if normalized.contains("/.bun/") || normalized.contains("/bun/") { + return Some(InstallMethod::Bun); + } + + if normalized.contains("/node_modules/.bin/") || normalized.contains("/lib/node_modules/") { + return Some(InstallMethod::Npm); + } + + let stem = executable_path + .file_stem() + .and_then(|v| v.to_str()) + .unwrap_or_default() + .to_ascii_lowercase(); + if stem == "corvus" + && (normalized.contains("/usr/local/bin/") + || normalized.contains("/usr/bin/") + || normalized.contains("/opt/bin/") + || normalized.contains("/opt/local/bin/") + || normalized.ends_with("/corvus")) + { + return Some(InstallMethod::ScriptBinary); + } + + None +} + +fn is_cargo_install_path(path: &Path, context: &InstallDetectionContext) -> bool { + if let Some(cargo_home) = context.cargo_home.as_ref() { + let bin = cargo_home.join("bin"); + if path.starts_with(&bin) { + return true; + } + } + + if let Some(home_dir) = context.home_dir.as_ref() { + let default_cargo_bin = home_dir.join(".cargo").join("bin"); + if path.starts_with(default_cargo_bin) { + return true; + } + } + + false +} + +fn save_state_snapshot_sync(workspace_dir: &Path, snapshot: &UpdateStateSnapshot) -> Result<()> { + let path = version_check_path(workspace_dir); + let body = + serde_json::to_vec_pretty(snapshot).context("failed to serialize update snapshot")?; + atomic_write_sync(&path, &body) +} + +fn atomic_write_sync(path: &Path, body: &[u8]) -> Result<()> { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent) + .with_context(|| format!("failed to create directory {}", parent.display()))?; + } + + let temp_path = path.with_extension(format!( + "tmp.{}.{}", + std::process::id(), + uuid::Uuid::new_v4() + )); + + let mut temp_file = OpenOptions::new() + .create_new(true) + .write(true) + .open(&temp_path) + .with_context(|| format!("failed to create temp file {}", temp_path.display()))?; + temp_file.write_all(body)?; + temp_file.sync_all()?; + drop(temp_file); + + fs::rename(&temp_path, path) + .with_context(|| format!("failed to atomically replace {}", path.display()))?; + sync_directory_sync( + path.parent() + .ok_or_else(|| anyhow::anyhow!("state path missing parent"))?, + )?; + Ok(()) +} + +fn sync_directory_sync(path: &Path) -> Result<()> { + #[cfg(unix)] + { + let directory = fs::File::open(path) + .with_context(|| format!("failed to open directory {}", path.display()))?; + directory.sync_all()?; + } + #[cfg(not(unix))] + { + let _ = path; + } + Ok(()) +} + +fn load_state_snapshot_sync(workspace_dir: &Path) -> Result> { + let path = version_check_path(workspace_dir); + if !path.exists() { + return Ok(None); + } + let raw = fs::read_to_string(&path) + .with_context(|| format!("failed to read update snapshot at {}", path.display()))?; + + if let Ok(snapshot) = serde_json::from_str::(&raw) { + return Ok(Some(snapshot)); + } + if let Ok(legacy) = serde_json::from_str::(&raw) { + return Ok(Some(legacy.into())); + } + anyhow::bail!("failed to parse update snapshot") +} + +fn append_audit_event_sync( + workspace_dir: &Path, + policy: &UpdatePolicy, + event: UpdateAuditEvent, +) -> Result<()> { + let history_path = update_history_path(workspace_dir); + if let Some(parent) = history_path.parent() { + fs::create_dir_all(parent)?; + } + + let mut events = read_update_history_sync(workspace_dir).unwrap_or_default(); + events.push(event); + let max_entries = policy.history_max_entries.max(1) as usize; + if events.len() > max_entries { + let drain = events.len() - max_entries; + events.drain(0..drain); + } + let mut payload = Vec::new(); + for item in &events { + payload.extend(serde_json::to_vec(item)?); + payload.push(b'\n'); + } + atomic_write_sync(&history_path, &payload) +} + +fn read_update_history_sync(workspace_dir: &Path) -> Result> { + let history_path = update_history_path(workspace_dir); + if !history_path.exists() { + return Ok(Vec::new()); + } + let raw = fs::read_to_string(&history_path)?; + let mut events = Vec::new(); + for line in raw.lines() { + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + if let Ok(event) = serde_json::from_str::(trimmed) { + events.push(event); + } + } + Ok(events) +} + +fn verify_sha256_checksum(path: &Path, expected_hex: &str) -> Result<()> { + let expected = expected_hex.trim().to_ascii_lowercase(); + if expected.is_empty() { + anyhow::bail!("missing checksum metadata") + } + let bytes = + fs::read(path).with_context(|| format!("failed to read artifact {}", path.display()))?; + let actual = hex::encode(Sha256::digest(bytes)); + if actual != expected { + anyhow::bail!("digest mismatch") + } + Ok(()) +} + fn now_unix_secs() -> u64 { SystemTime::now() .duration_since(UNIX_EPOCH) @@ -1129,25 +2174,25 @@ async fn load_state(path: &Path) -> Result> { let raw = tokio::fs::read_to_string(path) .await .with_context(|| format!("failed to read version check state at {}", path.display()))?; - let state = serde_json::from_str::(&raw) + if let Ok(state) = serde_json::from_str::(&raw) { + return Ok(Some(state)); + } + let snapshot = serde_json::from_str::(&raw) .context("failed to parse version check state")?; - Ok(Some(state)) + Ok(Some(VersionCheckState { + latest_version: snapshot.latest_version, + checked_at_unix: snapshot.last_check_at_unix, + update_available: snapshot.update_available, + last_notified_version: None, + pending_confirmations: snapshot.pending_confirmations, + notified_conversations: snapshot.notified_conversations, + })) } +#[allow(clippy::unused_async)] async fn save_state(path: &Path, state: &VersionCheckState) -> Result<()> { - if let Some(parent) = path.parent() { - tokio::fs::create_dir_all(parent).await.with_context(|| { - format!( - "failed to create version check state directory {}", - parent.display() - ) - })?; - } - let body = serde_json::to_vec_pretty(state).context("failed to serialize version state")?; - tokio::fs::write(path, body) - .await - .with_context(|| format!("failed to write version check state at {}", path.display())) + atomic_write_sync(path, &body) } async fn fetch_latest_release_version() -> Result { @@ -1658,4 +2703,292 @@ mod tests { assert!(!sent.unwrap()); assert!(channel_impl.sent_messages.lock().await.is_empty()); } + + #[test] + fn install_method_resolution_prefers_override_then_detected_then_unknown() { + let (effective, overridden, source) = + resolve_install_method(Some(InstallMethod::Cargo), Some(InstallMethod::Npm)); + assert_eq!(effective, InstallMethod::Cargo); + assert_eq!(overridden, Some(InstallMethod::Cargo)); + assert_eq!(source, "override"); + + let (effective, overridden, source) = + resolve_install_method(None, Some(InstallMethod::Npm)); + assert_eq!(effective, InstallMethod::Npm); + assert_eq!(overridden, None); + assert_eq!(source, "detected"); + + let (effective, overridden, source) = resolve_install_method(None, None); + assert_eq!(effective, InstallMethod::Unknown); + assert_eq!(overridden, None); + assert_eq!(source, "unknown"); + } + + #[test] + fn install_method_detection_matrix_covers_supported_runtime_patterns() { + let context = InstallDetectionContext { + current_exe: None, + npm_user_agent: Some("pnpm/9.0.0 npm/? node/?".to_string()), + cargo_home: None, + home_dir: None, + }; + assert_eq!( + detect_install_method_with_context(&context), + Some(InstallMethod::Pnpm) + ); + + let context = InstallDetectionContext { + current_exe: Some(PathBuf::from( + "/opt/homebrew/Cellar/corvus/1.2.3/bin/corvus", + )), + npm_user_agent: None, + cargo_home: None, + home_dir: None, + }; + assert_eq!( + detect_install_method_with_context(&context), + Some(InstallMethod::Homebrew) + ); + + let context = InstallDetectionContext { + current_exe: Some(PathBuf::from("/Users/dev/.cargo/bin/corvus")), + npm_user_agent: None, + cargo_home: None, + home_dir: Some(PathBuf::from("/Users/dev")), + }; + assert_eq!( + detect_install_method_with_context(&context), + Some(InstallMethod::Cargo) + ); + + let context = InstallDetectionContext { + current_exe: Some(PathBuf::from("/Users/dev/.bun/bin/corvus")), + npm_user_agent: None, + cargo_home: None, + home_dir: None, + }; + assert_eq!( + detect_install_method_with_context(&context), + Some(InstallMethod::Bun) + ); + + let context = InstallDetectionContext { + current_exe: Some(PathBuf::from( + "/Users/dev/.local/share/pnpm/global/5/node_modules/.bin/corvus", + )), + npm_user_agent: None, + cargo_home: None, + home_dir: None, + }; + assert_eq!( + detect_install_method_with_context(&context), + Some(InstallMethod::Pnpm) + ); + + let context = InstallDetectionContext { + current_exe: Some(PathBuf::from("/Users/dev/.yarn/bin/corvus")), + npm_user_agent: None, + cargo_home: None, + home_dir: None, + }; + assert_eq!( + detect_install_method_with_context(&context), + Some(InstallMethod::Yarn) + ); + + let context = InstallDetectionContext { + current_exe: Some(PathBuf::from( + "/usr/local/lib/node_modules/@dallay/corvus/bin/corvus.js", + )), + npm_user_agent: None, + cargo_home: None, + home_dir: None, + }; + assert_eq!( + detect_install_method_with_context(&context), + Some(InstallMethod::Npm) + ); + + let context = InstallDetectionContext { + current_exe: Some(PathBuf::from("/usr/local/bin/corvus")), + npm_user_agent: None, + cargo_home: None, + home_dir: None, + }; + assert_eq!( + detect_install_method_with_context(&context), + Some(InstallMethod::ScriptBinary) + ); + } + + #[test] + fn consume_pending_confirmation_honors_scope_and_marks_nonce_used() { + let nonce = "nonce-abc"; + let mut state = VersionCheckState { + latest_version: "1.2.3".to_string(), + checked_at_unix: now_unix_secs(), + update_available: true, + last_notified_version: None, + pending_confirmations: vec![PendingConfirmation { + version: "1.2.3".to_string(), + channel: "telegram".to_string(), + recipient: "chat-1".to_string(), + authorized_sender: Some("sender-1".to_string()), + nonce_hash: hash_nonce(nonce), + expires_at_unix: now_unix_secs() + 60, + used: false, + }], + notified_conversations: Vec::new(), + }; + + let version = consume_pending_confirmation( + &mut state, + nonce, + Some(("telegram", "chat-1", "sender-1")), + ) + .unwrap(); + assert_eq!(version, "1.2.3"); + assert!(state.pending_confirmations[0].used); + } + + #[test] + fn update_manager_install_returns_busy_when_install_lock_held() { + let dir = tempfile::tempdir().unwrap(); + let mut cfg = Config::default(); + cfg.workspace_dir = dir.path().to_path_buf(); + + let lock_path = update_install_lock_path(&cfg.workspace_dir); + let _held = acquire_file_lock(&lock_path, 10).unwrap(); + + let manager = UpdateManager::new(&cfg); + let (outcome, message) = manager.install("1.0.0", "test").unwrap(); + assert_eq!(outcome, InstallCommandOutcome::Busy); + assert!(message.contains("busy")); + } + + #[test] + fn load_snapshot_ignores_partial_temp_file_and_keeps_valid_state() { + let dir = tempfile::tempdir().unwrap(); + let workspace = dir.path(); + let snapshot = UpdateStateSnapshot::initial( + "1.0.0", + &UpdatePolicy { + checks_enabled: true, + auto_install_enabled: false, + channel_visibility_enabled: true, + cli_startup_notice_enabled: true, + check_interval_minutes: 30, + confirmation_ttl_minutes: 30, + install_method_override: None, + restart_policy: RestartPolicy::Prompt, + history_max_entries: 200, + }, + ); + save_state_snapshot_sync(workspace, &snapshot).unwrap(); + + let temp = version_check_path(workspace).with_extension("tmp.partial"); + if let Some(parent) = temp.parent() { + fs::create_dir_all(parent).unwrap(); + } + fs::write(&temp, "{\"broken\":").unwrap(); + + let loaded = load_state_snapshot_sync(workspace).unwrap().unwrap(); + assert_eq!(loaded.current_version, snapshot.current_version); + } + + #[test] + fn verification_fails_closed_on_mismatch_and_audit_history_records_event() { + let dir = tempfile::tempdir().unwrap(); + let artifact_path = dir.path().join("artifact.bin"); + fs::write(&artifact_path, b"v1").unwrap(); + + let err = verify_sha256_checksum(&artifact_path, "deadbeef").unwrap_err(); + assert!(err.to_string().contains("digest mismatch")); + + let policy = UpdatePolicy { + checks_enabled: true, + auto_install_enabled: false, + channel_visibility_enabled: true, + cli_startup_notice_enabled: true, + check_interval_minutes: 30, + confirmation_ttl_minutes: 30, + install_method_override: None, + restart_policy: RestartPolicy::Prompt, + history_max_entries: 10, + }; + append_audit_event_sync( + dir.path(), + &policy, + UpdateAuditEvent { + event_id: "event-1".to_string(), + timestamp_unix: 1, + action: "verification".to_string(), + outcome: "failed".to_string(), + current_version: "1.0.0".to_string(), + target_version: Some("1.0.1".to_string()), + effective_method: "script_binary".to_string(), + actor: "test".to_string(), + reason_code: Some("digest mismatch".to_string()), + }, + ) + .unwrap(); + + let history = read_update_history_sync(dir.path()).unwrap(); + assert_eq!(history.len(), 1); + assert_eq!(history[0].action, "verification"); + assert_eq!(history[0].outcome, "failed"); + } + + #[test] + fn verification_success_allows_activation_and_records_success_audit_events() { + let dir = tempfile::tempdir().unwrap(); + let mut cfg = Config::default(); + cfg.workspace_dir = dir.path().to_path_buf(); + cfg.updates.install_method_override = Some("script_binary".to_string()); + + let policy = UpdatePolicy::from_config(&cfg); + let mut snapshot = UpdateStateSnapshot::initial("1.0.0", &policy); + snapshot.latest_version = "1.0.1".to_string(); + snapshot.update_available = true; + save_state_snapshot_sync(&cfg.workspace_dir, &snapshot).unwrap(); + + let artifact_path = dir.path().join("artifact-ok.bin"); + fs::write(&artifact_path, b"verified-artifact").unwrap(); + let digest = Sha256::digest(b"verified-artifact"); + let expected_sha = hex::encode(digest); + + unsafe { + std::env::set_var("CORVUS_UPDATE_ARTIFACT_PATH", artifact_path.as_os_str()); + std::env::set_var("CORVUS_UPDATE_EXPECTED_SHA256", &expected_sha); + } + + let manager = UpdateManager::new(&cfg); + let (outcome, message) = manager + .install("1.0.0", "test-verification-success") + .unwrap(); + + unsafe { + std::env::remove_var("CORVUS_UPDATE_ARTIFACT_PATH"); + std::env::remove_var("CORVUS_UPDATE_EXPECTED_SHA256"); + } + + assert_eq!(outcome, InstallCommandOutcome::Success); + assert!(message.contains("update installed")); + + let loaded = load_state_snapshot_sync(&cfg.workspace_dir) + .unwrap() + .unwrap(); + assert!(matches!( + loaded.install_state, + InstallState::InstalledPendingRestart { .. } + )); + + let history = read_update_history_sync(&cfg.workspace_dir).unwrap(); + assert!(history + .iter() + .any(|event| event.action == "verification" && event.outcome == "success")); + assert!(history + .iter() + .any(|event| event.action == "install" && event.outcome == "success")); + } } diff --git a/clients/agent-runtime/tests/admin_config_api_integration.rs b/clients/agent-runtime/tests/admin_config_api_integration.rs index 9dfe18cbe..f922295e8 100644 --- a/clients/agent-runtime/tests/admin_config_api_integration.rs +++ b/clients/agent-runtime/tests/admin_config_api_integration.rs @@ -90,9 +90,11 @@ impl Memory for IntegrationMemory { fn temp_config() -> Config { let root = std::env::temp_dir().join(format!("corvus-admin-config-{}", uuid::Uuid::new_v4())); std::fs::create_dir_all(&root).expect("create temp root"); - let mut config = Config::default(); - config.config_path = root.join("config.toml"); - config.workspace_dir = root.join("workspace"); + let config = Config { + config_path: root.join("config.toml"), + workspace_dir: root.join("workspace"), + ..Config::default() + }; std::fs::create_dir_all(&config.workspace_dir).expect("create workspace"); config } @@ -160,6 +162,19 @@ async fn get_admin_config_redacts_secrets() { body.pointer("/config/channels/webhook/has_secret"), Some(&serde_json::json!(true)) ); + assert_eq!( + body.pointer("/config/updates/auto_install_enabled"), + Some(&serde_json::json!(false)) + ); + assert!(body + .pointer("/config/updates/status/current_version") + .is_some()); + assert!(body + .pointer("/config/updates/status/last_check_outcome") + .is_some()); + assert!(body + .pointer("/config/updates/status/last_check_at_unix") + .is_some()); let text = body.to_string(); assert!(!text.contains("top-secret")); } diff --git a/clients/agent-runtime/tests/update_system_integration.rs b/clients/agent-runtime/tests/update_system_integration.rs new file mode 100644 index 000000000..66edd484b --- /dev/null +++ b/clients/agent-runtime/tests/update_system_integration.rs @@ -0,0 +1,206 @@ +use corvus::{config::Config, gateway::admin, update}; +use sha2::{Digest, Sha256}; +use std::process::Command; + +fn run_corvus(workspace: &std::path::Path, args: &[&str]) -> std::process::Output { + let mut command = Command::new(env!("CARGO_BIN_EXE_corvus")); + command + .args(args) + .env("CORVUS_WORKSPACE", workspace) + .env("CORVUS_DISABLE_UPDATE_CHECK", "1"); + command.output().expect("corvus command should execute") +} + +fn stdout_text(output: &std::process::Output) -> String { + String::from_utf8_lossy(&output.stdout).to_string() +} + +fn stderr_text(output: &std::process::Output) -> String { + String::from_utf8_lossy(&output.stderr).to_string() +} + +fn make_workspace() -> tempfile::TempDir { + let dir = tempfile::tempdir().expect("temp workspace"); + std::fs::create_dir_all(dir.path().join("workspace").join("state")) + .expect("workspace state dir"); + dir +} + +fn hash_nonce(nonce: &str) -> String { + let digest = Sha256::digest(nonce.as_bytes()); + hex::encode(digest) +} + +#[test] +fn update_help_lists_full_command_contract() { + let workspace = make_workspace(); + let output = run_corvus(workspace.path(), &["update", "--help"]); + assert!(output.status.success()); + let stdout = stdout_text(&output); + assert!(stdout.contains("status")); + assert!(stdout.contains("check")); + assert!(stdout.contains("install")); + assert!(stdout.contains("auto-enable")); + assert!(stdout.contains("auto-disable")); + assert!(stdout.contains("history")); + assert!(stdout.contains("confirm")); +} + +#[test] +fn update_status_and_policy_toggles_are_visible_across_commands() { + let workspace = make_workspace(); + + let enable = run_corvus(workspace.path(), &["update", "auto-enable"]); + assert!(enable.status.success(), "{}", stderr_text(&enable)); + + let status_enabled = run_corvus(workspace.path(), &["update", "status"]); + assert!( + status_enabled.status.success(), + "{}", + stderr_text(&status_enabled) + ); + let stdout_enabled = stdout_text(&status_enabled); + assert!(stdout_enabled.contains("policy.auto_install_enabled=true")); + + let disable = run_corvus(workspace.path(), &["update", "auto-disable"]); + assert!(disable.status.success(), "{}", stderr_text(&disable)); + + let status_disabled = run_corvus(workspace.path(), &["update", "status"]); + assert!( + status_disabled.status.success(), + "{}", + stderr_text(&status_disabled) + ); + let stdout_disabled = stdout_text(&status_disabled); + assert!(stdout_disabled.contains("policy.auto_install_enabled=false")); +} + +#[test] +fn update_install_reports_busy_when_lock_is_held() { + let workspace = make_workspace(); + let lock_path = workspace + .path() + .join("workspace") + .join("state") + .join("update_install.lock"); + std::fs::write(&lock_path, b"lock-holder").expect("create install lock"); + + let output = run_corvus(workspace.path(), &["update", "install"]); + assert!(!output.status.success()); + let combined = format!("{}\n{}", stdout_text(&output), stderr_text(&output)); + assert!(combined.contains("busy")); +} + +#[test] +fn update_check_and_history_commands_are_script_stable() { + let workspace = make_workspace(); + + let check = run_corvus(workspace.path(), &["update", "check"]); + let check_stdout = stdout_text(&check); + assert!( + check_stdout.contains("current_version=") || check_stdout.contains("latest_version="), + "expected deterministic check output, got: {}", + check_stdout + ); + + let history = run_corvus(workspace.path(), &["update", "history"]); + assert!(history.status.success(), "{}", stderr_text(&history)); +} + +#[test] +fn update_confirm_reports_deterministic_failure_for_unknown_nonce() { + let workspace = make_workspace(); + let output = run_corvus(workspace.path(), &["update", "confirm", "missing-nonce"]); + assert!(!output.status.success()); + let combined = format!("{}\n{}", stdout_text(&output), stderr_text(&output)); + assert!(combined.contains("invalid")); + assert!(combined.contains("nonce")); +} + +#[test] +fn update_confirm_consumes_nonce_and_records_history_event() { + let workspace = make_workspace(); + let state_path = workspace + .path() + .join("workspace") + .join("state") + .join("version_check.json"); + let nonce = "nonce-confirm-1"; + let state = serde_json::json!({ + "latest_version": "9.9.9", + "checked_at_unix": 1, + "update_available": true, + "last_notified_version": "9.9.9", + "pending_confirmations": [ + { + "version": "9.9.9", + "channel": "telegram", + "recipient": "chat-1", + "authorized_sender": "sender-1", + "nonce_hash": hash_nonce(nonce), + "expires_at_unix": 4102444800u64, + "used": false + } + ], + "notified_conversations": [] + }); + std::fs::write( + &state_path, + serde_json::to_vec_pretty(&state).expect("serialize state"), + ) + .expect("write state file"); + + let _confirm = run_corvus(workspace.path(), &["update", "confirm", nonce]); + + let history = run_corvus(workspace.path(), &["update", "history"]); + assert!(history.status.success(), "{}", stderr_text(&history)); + let history_stdout = stdout_text(&history); + assert!(history_stdout.contains("confirm_install")); + + let confirm_reuse = run_corvus(workspace.path(), &["update", "confirm", nonce]); + assert!(!confirm_reuse.status.success()); + let combined = format!( + "{}\n{}", + stdout_text(&confirm_reuse), + stderr_text(&confirm_reuse) + ); + assert!(combined.contains("invalid")); +} + +#[test] +fn cli_and_admin_surfaces_share_update_status_facts() { + let workspace = make_workspace(); + let config = Config { + workspace_dir: workspace.path().to_path_buf(), + config_path: workspace.path().join("config.toml"), + ..Config::default() + }; + + let cli_view = update::get_update_status(&config, env!("CARGO_PKG_VERSION")).expect("status"); + let admin_view = admin::admin_config_view(&config); + + assert_eq!( + admin_view.updates.status.current_version, + cli_view.current_version + ); + assert_eq!( + admin_view.updates.status.latest_version, + cli_view.latest_version + ); + assert_eq!( + admin_view.updates.status.update_available, + cli_view.update_available + ); + assert_eq!( + admin_view.updates.status.last_check_outcome, + cli_view.last_check_outcome + ); + assert_eq!( + admin_view.updates.status.last_check_at_unix, + cli_view.last_check_at_unix + ); + assert_eq!( + admin_view.updates.status.effective_install_method, + cli_view.effective_install_method + ); +} diff --git a/clients/web/apps/dashboard/src/App.vue b/clients/web/apps/dashboard/src/App.vue index 898af32e1..ce540e287 100644 --- a/clients/web/apps/dashboard/src/App.vue +++ b/clients/web/apps/dashboard/src/App.vue @@ -1,16 +1,6 @@ diff --git a/clients/web/apps/dashboard/src/components/config/GatewaySettings.spec.ts b/clients/web/apps/dashboard/src/components/config/GatewaySettings.spec.ts index 8fb13406b..c8b429d79 100644 --- a/clients/web/apps/dashboard/src/components/config/GatewaySettings.spec.ts +++ b/clients/web/apps/dashboard/src/components/config/GatewaySettings.spec.ts @@ -1,6 +1,6 @@ import { mount } from "@vue/test-utils"; -import { createI18n } from "vue-i18n"; import { describe, expect, it } from "vitest"; +import { createI18n } from "vue-i18n"; import GatewaySettings from "@/components/config/GatewaySettings.vue"; import { i18nConfig } from "@/i18n"; diff --git a/clients/web/apps/dashboard/src/components/config/GatewaySettings.vue b/clients/web/apps/dashboard/src/components/config/GatewaySettings.vue index 2e1fe1298..2cb9982ea 100644 --- a/clients/web/apps/dashboard/src/components/config/GatewaySettings.vue +++ b/clients/web/apps/dashboard/src/components/config/GatewaySettings.vue @@ -1,9 +1,6 @@ diff --git a/clients/web/apps/dashboard/src/components/config/GatewaySettings.vue b/clients/web/apps/dashboard/src/components/config/GatewaySettings.vue index 2cb9982ea..cac6f749f 100644 --- a/clients/web/apps/dashboard/src/components/config/GatewaySettings.vue +++ b/clients/web/apps/dashboard/src/components/config/GatewaySettings.vue @@ -1,4 +1,6 @@