Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion jest.config.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
module.exports = {
preset: 'ts-jest',
testEnvironment: 'node',
roots: ['<rootDir>/src'],
roots: ['<rootDir>/src', '<rootDir>/scripts'],
testMatch: ['**/__tests__/**/*.ts', '**/*.test.ts'],
collectCoverageFrom: [
'src/**/*.ts',
Expand Down
54 changes: 2 additions & 52 deletions scripts/ci/benchmark-performance.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/

import { execSync, ExecSyncOptions, spawn, ChildProcess } from "child_process";
import { stats, parseMb, checkRegressions, BenchmarkResult, BenchmarkReport } from "./benchmark-utils";

// ── Configuration ──────────────────────────────────────────────────

Expand All @@ -24,25 +25,6 @@ const AWF_CMD = "sudo awf";
const ALLOWED_DOMAIN = "api.github.com";
const CLEANUP_CMD = "sudo docker compose down -v 2>/dev/null; sudo docker rm -f awf-squid awf-agent 2>/dev/null; sudo docker network prune -f 2>/dev/null";

interface BenchmarkResult {
metric: string;
unit: string;
values: number[];
mean: number;
median: number;
p95: number;
p99: number;
}

interface BenchmarkReport {
timestamp: string;
commitSha: string;
iterations: number;
results: BenchmarkResult[];
thresholds: Record<string, { target: number; critical: number }>;
regressions: string[];
}

// ── Thresholds (milliseconds or MB) ───────────────────────────────

const THRESHOLDS: Record<string, { target: number; critical: number }> = {
Expand All @@ -65,17 +47,6 @@ function timeMs(fn: () => void): number {
return Math.round(performance.now() - start);
}

function stats(values: number[]): Pick<BenchmarkResult, "mean" | "median" | "p95" | "p99"> {
const sorted = [...values].sort((a, b) => a - b);
const n = sorted.length;
return {
mean: Math.round(sorted.reduce((a, b) => a + b, 0) / n),
median: sorted[Math.floor(n / 2)],
p95: sorted[Math.min(Math.floor(n * 0.95), n - 1)],
p99: sorted[Math.min(Math.floor(n * 0.99), n - 1)],
};
}

function cleanup(): void {
try {
execSync(CLEANUP_CMD, { stdio: "ignore", timeout: 30_000 });
Expand Down Expand Up @@ -202,19 +173,6 @@ function waitForContainers(containerNames: string[], timeoutMs: number): Promise
});
}

/**
* Parse a Docker memory usage string like "123.4MiB / 7.773GiB" into MB.
*/
function parseMb(s: string): number {
const match = s.match(/([\d.]+)\s*(MiB|GiB|KiB)/i);
if (!match) return 0;
const val = parseFloat(match[1]);
const unit = match[2].toLowerCase();
if (unit === "gib") return val * 1024;
if (unit === "kib") return val / 1024;
return val;
}

/**
* Kill a spawned background process and its entire process group, best-effort.
* Sends SIGTERM then SIGKILL to the process group so descendant processes
Expand Down Expand Up @@ -343,15 +301,7 @@ async function main(): Promise<void> {
cleanup();

// Check for regressions against critical thresholds
const regressions: string[] = [];
for (const r of results) {
const threshold = THRESHOLDS[r.metric];
if (threshold && r.p95 > threshold.critical) {
regressions.push(
`${r.metric}: p95=${r.p95}${r.unit} exceeds critical threshold of ${threshold.critical}${r.unit}`
);
}
}
const regressions = checkRegressions(results, THRESHOLDS);

const report: BenchmarkReport = {
timestamp: new Date().toISOString(),
Expand Down
192 changes: 192 additions & 0 deletions scripts/ci/benchmark-utils.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
import { stats, parseMb, checkRegressions, BenchmarkResult } from "./benchmark-utils";

// ── stats() ──────────────────────────────────────────────────────

describe("stats()", () => {
it("throws on empty array", () => {
expect(() => stats([])).toThrow("stats() requires at least one value");
});

it("handles single element", () => {
const result = stats([42]);
expect(result).toEqual({ mean: 42, median: 42, p95: 42, p99: 42 });
});

it("handles two elements", () => {
const result = stats([10, 20]);
expect(result.mean).toBe(15);
expect(result.median).toBe(20); // floor(2/2) = index 1
expect(result.p95).toBe(20);
expect(result.p99).toBe(20);
});

it("handles odd count", () => {
const result = stats([3, 1, 2]);
// sorted: [1, 2, 3]
expect(result.mean).toBe(2);
expect(result.median).toBe(2); // floor(3/2) = index 1
expect(result.p95).toBe(3); // floor(3*0.95)=2, index 2
expect(result.p99).toBe(3); // floor(3*0.99)=2, index 2
});

it("handles even count", () => {
const result = stats([4, 2, 1, 3]);
// sorted: [1, 2, 3, 4]
expect(result.mean).toBe(3); // Math.round(10/4) = 3 (2.5 rounds to 3)
expect(result.median).toBe(3); // floor(4/2) = index 2
expect(result.p95).toBe(4); // floor(4*0.95)=3
expect(result.p99).toBe(4); // floor(4*0.99)=3
});

it("handles all same values", () => {
const result = stats([7, 7, 7, 7, 7]);
expect(result).toEqual({ mean: 7, median: 7, p95: 7, p99: 7 });
});

it("rounds mean correctly", () => {
// 1 + 2 + 3 = 6 / 3 = 2, no rounding needed
expect(stats([1, 2, 3]).mean).toBe(2);
// 1 + 2 = 3 / 2 = 1.5, rounds to 2
expect(stats([1, 2]).mean).toBe(2);
// 1 + 2 + 4 = 7 / 3 = 2.333... rounds to 2
expect(stats([1, 2, 4]).mean).toBe(2);
});

it("does not mutate input array", () => {
const input = [5, 3, 1, 4, 2];
const copy = [...input];
stats(input);
expect(input).toEqual(copy);
});

it("handles large array with correct percentiles", () => {
// 100 values: 1..100
const values = Array.from({ length: 100 }, (_, i) => i + 1);
const result = stats(values);
expect(result.mean).toBe(51); // Math.round(5050/100)
expect(result.median).toBe(51); // floor(100/2)=50, value at index 50 = 51
expect(result.p95).toBe(96); // floor(100*0.95)=95, value at index 95 = 96
expect(result.p99).toBe(100); // floor(100*0.99)=99, value at index 99 = 100
});

it("handles negative values", () => {
const result = stats([-10, -5, 0, 5, 10]);
expect(result.mean).toBe(0);
expect(result.median).toBe(0);
});
});

// ── parseMb() ────────────────────────────────────────────────────

describe("parseMb()", () => {
it("parses MiB values", () => {
expect(parseMb("123.4MiB / 7.773GiB")).toBe(123.4);
});

it("parses GiB values", () => {
expect(parseMb("2GiB / 8GiB")).toBe(2048);
});

it("parses KiB values", () => {
expect(parseMb("512KiB / 8GiB")).toBe(0.5);
});

it("parses zero-valued MiB input", () => {
expect(parseMb("0MiB")).toBe(0);
});

it("returns 0 for unrecognized or empty format", () => {
expect(parseMb("unknown")).toBe(0);
expect(parseMb("")).toBe(0);
});

it("is case insensitive", () => {
expect(parseMb("100mib")).toBe(100);
expect(parseMb("1gib")).toBe(1024);
expect(parseMb("1024kib")).toBe(1);
});

it("handles decimal values", () => {
expect(parseMb("1.5GiB / 8GiB")).toBe(1536);
expect(parseMb("0.5MiB / 8GiB")).toBe(0.5);
});
});

// ── checkRegressions() ──────────────────────────────────────────

describe("checkRegressions()", () => {
const thresholds: Record<string, { target: number; critical: number }> = {
container_startup_cold: { target: 15000, critical: 20000 },
squid_https_latency: { target: 100, critical: 200 },
memory_footprint_mb: { target: 500, critical: 1024 },
};

function makeResult(metric: string, p95: number, unit = "ms"): BenchmarkResult {
return { metric, unit, values: [p95], mean: p95, median: p95, p95, p99: p95 };
}

it("returns empty array when all within thresholds", () => {
const results = [
makeResult("container_startup_cold", 19000),
makeResult("squid_https_latency", 150),
makeResult("memory_footprint_mb", 800, "MB"),
];
expect(checkRegressions(results, thresholds)).toEqual([]);
});

it("detects single regression", () => {
const results = [
makeResult("container_startup_cold", 25000),
];
const regressions = checkRegressions(results, thresholds);
expect(regressions).toHaveLength(1);
expect(regressions[0]).toContain("container_startup_cold");
expect(regressions[0]).toContain("p95=25000");
expect(regressions[0]).toContain("critical threshold of 20000");
});

it("detects multiple regressions", () => {
const results = [
makeResult("container_startup_cold", 25000),
makeResult("squid_https_latency", 300),
];
const regressions = checkRegressions(results, thresholds);
expect(regressions).toHaveLength(2);
});

it("ignores metrics without thresholds", () => {
const results = [
makeResult("unknown_metric", 999999),
];
expect(checkRegressions(results, thresholds)).toEqual([]);
});

it("p95 exactly at critical is not a regression", () => {
const results = [
makeResult("container_startup_cold", 20000),
];
expect(checkRegressions(results, thresholds)).toEqual([]);
});

it("p95 one unit above critical is a regression", () => {
const results = [
makeResult("container_startup_cold", 20001),
];
expect(checkRegressions(results, thresholds)).toHaveLength(1);
});

it("returns empty array for empty results", () => {
expect(checkRegressions([], thresholds)).toEqual([]);
});

it("returns empty array for empty thresholds", () => {
const results = [makeResult("container_startup_cold", 99999)];
expect(checkRegressions(results, {})).toEqual([]);
});

it("includes unit in regression message", () => {
const results = [makeResult("memory_footprint_mb", 2000, "MB")];
const regressions = checkRegressions(results, thresholds);
expect(regressions[0]).toContain("MB");
});
});
87 changes: 87 additions & 0 deletions scripts/ci/benchmark-utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/**
* Pure utility functions extracted from benchmark-performance.ts
* for testability. No Docker/exec dependencies.
*/

// ── Types ─────────────────────────────────────────────────────────

export interface BenchmarkResult {
metric: string;
unit: string;
values: number[];
mean: number;
median: number;
p95: number;
p99: number;
}

export interface BenchmarkReport {
timestamp: string;
commitSha: string;
iterations: number;
results: BenchmarkResult[];
thresholds: Record<string, { target: number; critical: number }>;
regressions: string[];
}

// ── Statistics ────────────────────────────────────────────────────

/**
* Compute mean, median, p95, and p99 for an array of numeric values.
*
* - Empty arrays throw an Error (caller must guard).
* - Values are sorted ascending before computing percentiles.
* - Percentile indices use Math.floor, clamped to the last element.
*/
export function stats(values: number[]): Pick<BenchmarkResult, "mean" | "median" | "p95" | "p99"> {
if (values.length === 0) {
throw new Error("stats() requires at least one value");
}
const sorted = [...values].sort((a, b) => a - b);
const n = sorted.length;
return {
mean: Math.round(sorted.reduce((a, b) => a + b, 0) / n),
median: sorted[Math.floor(n / 2)],
p95: sorted[Math.min(Math.floor(n * 0.95), n - 1)],
p99: sorted[Math.min(Math.floor(n * 0.99), n - 1)],
};
}

// ── Memory parsing ───────────────────────────────────────────────

/**
* Parse a Docker memory usage string like "123.4MiB / 7.773GiB"
* and return the used amount in MiB (first number only).
* Note: GiB values are converted to MiB (GiB * 1024), KiB to MiB (KiB / 1024).
*/
export function parseMb(s: string): number {
const match = s.match(/([\d.]+)\s*(MiB|GiB|KiB)/i);
if (!match) return 0;
const val = parseFloat(match[1]);
const unit = match[2].toLowerCase();
if (unit === "gib") return val * 1024;
if (unit === "kib") return val / 1024;
return val;
Comment on lines +52 to +64
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parseMb() docstring says it returns the used amount in "MB", but the implementation is operating on binary units (MiB/GiB/KiB) and returns MiB-equivalent values (e.g., GiB * 1024). Please either (a) update the documentation to say MiB (or “MiB treated as MB”), or (b) convert to true MB/GB (decimal) so the doc matches behavior.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. Updated docstring to say MiB and added a note about the GiB/KiB conversions.

}

// ── Threshold checking ───────────────────────────────────────────

/**
* Compare benchmark results against critical thresholds.
* Returns an array of human-readable regression descriptions.
*/
export function checkRegressions(
results: BenchmarkResult[],
thresholds: Record<string, { target: number; critical: number }>,
): string[] {
const regressions: string[] = [];
for (const r of results) {
const threshold = thresholds[r.metric];
if (threshold && r.p95 > threshold.critical) {
regressions.push(
`${r.metric}: p95=${r.p95}${r.unit} exceeds critical threshold of ${threshold.critical}${r.unit}`,
);
}
}
return regressions;
}
Loading