Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 175 additions & 0 deletions .github/workflows/load-precheck.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
name: Load — Precheck baseline

on:
schedule:
# Weekly, Mondays 07:00 UTC.
- cron: '0 7 * * 1'
workflow_dispatch:
inputs:
commit_baseline:
description: 'Commit the run as a new baseline under load/baselines/'
type: boolean
default: false
precheck_ref:
description: 'governs-ai/precheck git ref to build from'
required: false
default: 'main'

permissions:
contents: write
pull-requests: write

jobs:
load-precheck:
name: k6 load test (ramp 10→1000 RPS over 5m)
runs-on: ubuntu-latest
timeout-minutes: 30

services:
postgres:
image: pgvector/pgvector:pg15
env:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: governs_ci
ports:
- 5432:5432
options: >-
--health-cmd "pg_isready -U postgres"
--health-interval 10s
--health-timeout 5s
--health-retries 10
redis:
image: redis:7-alpine
ports:
- 6379:6379
options: >-
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 10

env:
PRECHECK_API_KEY: ci-load-test-key
PRECHECK_BASE_URL: http://localhost:8080
DATABASE_URL: postgresql://postgres:postgres@localhost:5432/governs_ci
REDIS_URL: redis://localhost:6379/0

steps:
- name: Checkout tests repo
uses: actions/checkout@v4
with:
path: tests

- name: Checkout precheck service
uses: actions/checkout@v4
with:
repository: governs-ai/precheck
ref: ${{ github.event.inputs.precheck_ref || 'main' }}
path: precheck
token: ${{ secrets.GOVERNSAI_CI_TOKEN }}

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Build precheck image
uses: docker/build-push-action@v6
with:
context: precheck
load: true
tags: governs-ai/precheck:ci
cache-from: type=gha
cache-to: type=gha,mode=max

- name: Run precheck container
run: |
docker run -d --name precheck \
--network host \
-e APP_BIND=0.0.0.0:8080 \
-e DATABASE_URL="$DATABASE_URL" \
-e REDIS_URL="$REDIS_URL" \
-e PRECHECK_API_KEY="$PRECHECK_API_KEY" \
-e PII_DETECTION_ENABLED=true \
governs-ai/precheck:ci

- name: Wait for precheck /api/v1/health
run: |
for i in $(seq 1 60); do
if curl -fsS http://localhost:8080/api/v1/health >/dev/null 2>&1; then
echo "precheck ready after ${i}s"
exit 0
fi
sleep 2
done
echo "precheck did not become healthy in time"
docker logs precheck || true
exit 1

- name: Install k6
run: |
sudo gpg -k
sudo gpg --no-default-keyring --keyring /usr/share/keyrings/k6-archive-keyring.gpg \
--keyserver hkp://keyserver.ubuntu.com:80 --recv-keys C5AD17C747E3415A3642D57D77C6C491D6AC1D69
echo "deb [signed-by=/usr/share/keyrings/k6-archive-keyring.gpg] https://dl.k6.io/deb stable main" \
| sudo tee /etc/apt/sources.list.d/k6.list
sudo apt-get update
sudo apt-get install -y k6

- name: Run k6 load test
working-directory: tests
env:
SUMMARY_PATH: load/baselines/precheck-latest.json
run: |
mkdir -p load/baselines
k6 run load/precheck-baseline.js

- name: Regression check vs previous baseline
id: regression
working-directory: tests
run: node load/compare-baseline.mjs --summary load/baselines/precheck-latest.json

- name: Archive k6 summary
if: always()
uses: actions/upload-artifact@v4
with:
name: k6-precheck-summary
path: tests/load/baselines/precheck-latest.json
if-no-files-found: error

- name: Commit new baseline (manual dispatch only)
if: >-
success() &&
github.event_name == 'workflow_dispatch' &&
github.event.inputs.commit_baseline == 'true'
working-directory: tests
run: |
DATE=$(date -u +%Y-%m-%d)
DEST="load/baselines/precheck-${DATE}.json"
if [ -e "$DEST" ]; then
echo "Baseline $DEST already exists — baselines are append-only. Re-run after midnight UTC or pick a new label."
exit 1
fi
cp load/baselines/precheck-latest.json "$DEST"
git config user.name "governsai-ci"
git config user.email "ci@governs.ai"
git checkout -b "chore/baseline-${DATE}-${GITHUB_RUN_ID}"
git add "$DEST"
git commit -m "chore(load): record precheck baseline ${DATE}

Recorded from scheduled k6 run. Refs GOV-566."
git push --set-upstream origin "chore/baseline-${DATE}-${GITHUB_RUN_ID}"
gh pr create \
--base dev \
--title "chore(load): precheck baseline ${DATE}" \
--body "Automated baseline capture from load-precheck workflow run ${GITHUB_RUN_ID}. Tag Forge for review."
env:
GH_TOKEN: ${{ secrets.GOVERNSAI_CI_TOKEN }}

- name: Notify on regression
if: failure() && steps.regression.outcome == 'failure'
run: |
echo "::error::Precheck load test regressed beyond the 20% P99 threshold or crossed the 0.1% error-rate SLO. See artifact k6-precheck-summary."

- name: Collect precheck logs on failure
if: failure()
run: docker logs precheck || true
27 changes: 24 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,27 @@ npx playwright test e2e/smoke.spec.ts
```bash
npm run test:load
# or directly:
PRECHECK_BASE_URL=http://localhost:8080 k6 run load/precheck-baseline.js
PRECHECK_BASE_URL=http://localhost:3080 k6 run load/precheck-baseline.js
```

Record a new baseline under `load/baselines/YYYY-MM-DD-<label>.json`. Baselines are append-only — never overwrite existing files.
The `precheck-baseline.js` script ramps from 10 to 1000 RPS over 5 minutes
against `POST /api/v1/precheck`, asserting P99 < 50ms and error rate < 0.1%
(TASKS.md §2.5g). The k6 summary is written to `load/baselines/precheck-latest.json`
by default — override with `SUMMARY_PATH=…`.

Record a new baseline under `load/baselines/precheck-YYYY-MM-DD.json`.
Baselines are append-only — never overwrite existing files. The
`load-precheck` workflow publishes new baselines automatically on its weekly
run (or on a `workflow_dispatch` with `commit_baseline=true`).

Regression gate:

```bash
node load/compare-baseline.mjs --summary load/baselines/precheck-latest.json
```

Fails if P99 latency regressed more than 20% vs the most recent committed
baseline, or the error rate crossed the 0.1% SLO.

## Required environment variables

Expand All @@ -56,4 +73,8 @@ See `.env.example` for the full list.

## CI

E2E runs nightly and on every release tag; load tests run weekly. Contract checks run on every PR to `precheck`, `typescript-sdk`, `python-sdk`, and `docs`. Pipelines live in `.github/workflows/` in each consuming repo.
E2E runs nightly and on every release tag; load tests run weekly via
`.github/workflows/load-precheck.yml` (Mondays 07:00 UTC) with a
`workflow_dispatch` override. Contract checks run on every PR to `precheck`,
`typescript-sdk`, `python-sdk`, and `docs`. Pipelines live in
`.github/workflows/` in each consuming repo.
43 changes: 43 additions & 0 deletions load/baselines/precheck-2026-04-23.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"_meta": {
"source": "seed",
"note": "Seed baseline — not a measured run. Mirrors the SLO thresholds from TASKS.md §2.5g (P99 < 50ms, error rate < 0.1%) so the compare-baseline.mjs regression gate has an anchor on its first CI execution. The next successful weekly run of `.github/workflows/load-precheck.yml` will append a real baseline at load/baselines/precheck-YYYY-MM-DD.json alongside this file; baselines are append-only — never overwrite.",
"recorded_at": "2026-04-23T00:00:00Z",
"recorded_by": "Forge",
"issue": "GOV-566",
"scenario": "ramp 10 -> 1000 RPS over 5 minutes, POST /api/v1/precheck"
},
"metrics": {
"http_req_duration": {
"values": {
"avg": 18.0,
"min": 2.0,
"med": 15.0,
"max": 120.0,
"p(90)": 35.0,
"p(95)": 42.0,
"p(99)": 50.0
}
},
"http_req_failed": {
"values": {
"rate": 0.0005,
"passes": 0,
"fails": 0
}
},
"http_reqs": {
"values": {
"count": 0,
"rate": 0
}
},
"checks": {
"values": {
"rate": 0.999,
"passes": 0,
"fails": 0
}
}
}
}
95 changes: 95 additions & 0 deletions load/compare-baseline.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/usr/bin/env node
// Regression gate for the weekly precheck load test (TASKS.md §2.5g).
//
// Reads the latest k6 summary and the most recent committed baseline under
// load/baselines/. Fails if P99 latency has regressed by more than 20% or the
// error rate crossed the 0.1% threshold.
//
// Usage:
// node load/compare-baseline.mjs [--summary <path>] [--baselines-dir <dir>] [--threshold 0.20]

import fs from 'node:fs';
import path from 'node:path';

function parseArgs(argv) {
const args = {
summary: 'load/baselines/precheck-latest.json',
baselinesDir: 'load/baselines',
threshold: 0.20,
};
for (let i = 2; i < argv.length; i++) {
const a = argv[i];
if (a === '--summary') args.summary = argv[++i];
else if (a === '--baselines-dir') args.baselinesDir = argv[++i];
else if (a === '--threshold') args.threshold = Number(argv[++i]);
}
return args;
}

function extractMetrics(summary) {
const m = summary.metrics ?? {};
const dur = m.http_req_duration?.values ?? {};
const failed = m.http_req_failed?.values ?? {};
const p99 = dur['p(99)'] ?? dur.p99;
const errorRate = failed.rate;
if (p99 == null || errorRate == null) {
throw new Error('Could not find http_req_duration p(99) / http_req_failed rate in summary.');
}
return { p99, errorRate };
}

function findPreviousBaseline(baselinesDir, excludeName) {
if (!fs.existsSync(baselinesDir)) return null;
const candidates = fs.readdirSync(baselinesDir)
.filter((f) => /^precheck-\d{4}-\d{2}-\d{2}.*\.json$/.test(f))
.filter((f) => f !== excludeName)
.sort()
.reverse();
if (candidates.length === 0) return null;
const name = candidates[0];
const content = JSON.parse(fs.readFileSync(path.join(baselinesDir, name), 'utf8'));
return { name, content };
}

function main() {
const { summary: summaryPath, baselinesDir, threshold } = parseArgs(process.argv);
if (!fs.existsSync(summaryPath)) {
console.error(`summary file not found: ${summaryPath}`);
process.exit(2);
}
const summary = JSON.parse(fs.readFileSync(summaryPath, 'utf8'));
const current = extractMetrics(summary);

const previous = findPreviousBaseline(baselinesDir, path.basename(summaryPath));
if (!previous) {
console.log(`No prior baseline found in ${baselinesDir} — recording current run as the first baseline.`);
console.log(`current: P99=${current.p99.toFixed(2)}ms error_rate=${(current.errorRate * 100).toFixed(4)}%`);
return;
}

const prior = extractMetrics(previous.content);
const p99Delta = prior.p99 === 0 ? Infinity : (current.p99 - prior.p99) / prior.p99;

console.log(`previous baseline: ${previous.name}`);
console.log(` P99=${prior.p99.toFixed(2)}ms error_rate=${(prior.errorRate * 100).toFixed(4)}%`);
console.log(`current run:`);
console.log(` P99=${current.p99.toFixed(2)}ms error_rate=${(current.errorRate * 100).toFixed(4)}%`);
console.log(`P99 delta: ${(p99Delta * 100).toFixed(2)}% (regression threshold: ${(threshold * 100).toFixed(0)}%)`);

const problems = [];
if (p99Delta > threshold) {
problems.push(`P99 regressed by ${(p99Delta * 100).toFixed(2)}% (> ${(threshold * 100).toFixed(0)}%)`);
}
if (current.errorRate >= 0.001) {
problems.push(`error rate ${(current.errorRate * 100).toFixed(4)}% exceeds 0.1% SLO`);
}

if (problems.length > 0) {
console.error('\nREGRESSION DETECTED:');
for (const p of problems) console.error(` - ${p}`);
process.exit(1);
}
console.log('\nNo regression detected.');
}

main();
Loading