diff --git a/package.json b/package.json index 83b06ca..5a0e8b2 100644 --- a/package.json +++ b/package.json @@ -33,7 +33,7 @@ "test:unit": "vitest run test/unit/**/*.test.ts", "lint": "eslint --color src/ test/", "prepublishOnly": "yarn build", - "benchmark": "node --loader ts-node/esm ./src/cli/cli.ts 'test/perf/**/*.test.ts'", + "benchmark": "node --loader ts-node/esm ./src/cli/cli.ts 'test/perf/**/@(!(errors)).test.ts'", "writeDocs": "node --loader ts-node/esm scripts/writeOptionsMd.ts" }, "devDependencies": { diff --git a/src/benchmark/benchmarkFn.ts b/src/benchmark/benchmarkFn.ts index 7c6f705..decd330 100644 --- a/src/benchmark/benchmarkFn.ts +++ b/src/benchmark/benchmarkFn.ts @@ -5,29 +5,19 @@ import {createChainable} from "@vitest/runner/utils"; import {store} from "./globalState.js"; import {BenchApi, BenchmarkOpts, BenchmarkRunOptsWithFn, PartialBy} from "../types.js"; import {runBenchFn} from "./runBenchmarkFn.js"; -import {optionsDefault} from "../cli/options.js"; +import {getBenchmarkOptionsWithDefaults} from "./options.js"; export const bench: BenchApi = createBenchmarkFunction(function ( this: Record<"skip" | "only", boolean | undefined>, idOrOpts: string | PartialBy, "fn">, fn?: (arg: T) => void | Promise ) { - const {fn: benchTask, ...opts} = coerceToOptsObj(idOrOpts, fn); + const {fn: benchTask, before, beforeEach, ...opts} = coerceToOptsObj(idOrOpts, fn); const currentSuite = getCurrentSuite(); const globalOptions = store.getGlobalOptions() ?? {}; - const parentOptions = store.getOptions(getCurrentSuite()) ?? {}; - const options = {...globalOptions, ...parentOptions, ...opts}; - const {timeoutBench, maxMs, minMs} = options; - - let timeout = timeoutBench ?? optionsDefault.timeoutBench; - if (maxMs && maxMs > timeout) { - timeout = maxMs * 1.5; - } - - if (minMs && minMs > timeout) { - timeout = minMs * 1.5; - } + const parentOptions = store.getOptions(currentSuite) ?? {}; + const options = getBenchmarkOptionsWithDefaults({...globalOptions, ...parentOptions, ...opts}); async function handler(): Promise { // Ensure bench id is unique @@ -35,17 +25,20 @@ export const bench: BenchApi = createBenchmarkFunction(function ( throw Error(`test titles must be unique, duplicated: '${opts.id}'`); } - // Persist full results if requested. dir is created in `beforeAll` - const benchmarkResultsCsvDir = process.env.BENCHMARK_RESULTS_CSV_DIR; - const persistRunsNs = Boolean(benchmarkResultsCsvDir); - - const {result, runsNs} = await runBenchFn({...options, fn: benchTask}, persistRunsNs); + const {result, runsNs} = await runBenchFn({ + ...options, + fn: benchTask, + before, + beforeEach, + } as BenchmarkRunOptsWithFn); // Store result for: // - to persist benchmark data latter // - to render with the custom reporter store.setResult(opts.id, result); + // Persist full results if requested. dir is created in `beforeAll` + const benchmarkResultsCsvDir = process.env.BENCHMARK_RESULTS_CSV_DIR; if (benchmarkResultsCsvDir) { fs.mkdirSync(benchmarkResultsCsvDir, {recursive: true}); const filename = `${result.id}.csv`; @@ -59,27 +52,25 @@ export const bench: BenchApi = createBenchmarkFunction(function ( only: opts.only ?? this.only, sequential: true, concurrent: false, - timeout, + timeout: options.timeoutBench, meta: { "chainsafe/benchmark": true, }, }); - const {id: _, ...optionsWithoutId} = opts; setFn(task, handler); - store.setOptions(task, optionsWithoutId); - - task.onFinished = [ - () => { - store.removeOptions(task); - }, - () => { - // Clear up the assigned handler to clean the memory - // eslint-disable-next-line @typescript-eslint/ban-ts-comment - // @ts-expect-error - setFn(task, null); - }, - ]; + store.setOptions(task, opts); + + const cleanup = (): void => { + store.removeOptions(task); + // Clear up the assigned handler to clean the memory + // eslint-disable-next-line @typescript-eslint/ban-ts-comment + // @ts-expect-error + setFn(task, null); + }; + + task.onFailed = [cleanup]; + task.onFinished = [cleanup]; }); function createBenchmarkFunction( diff --git a/src/benchmark/options.ts b/src/benchmark/options.ts new file mode 100644 index 0000000..937b5b7 --- /dev/null +++ b/src/benchmark/options.ts @@ -0,0 +1,40 @@ +import {BenchmarkOpts} from "../types.js"; + +export const defaultBenchmarkOptions: Required = { + minRuns: 1, + maxRuns: Infinity, + minMs: 100, + maxMs: Infinity, + maxWarmUpRuns: 1000, + maxWarmUpMs: 500, + convergeFactor: 0.5 / 100, // 0.5% + runsFactor: 1, + yieldEventLoopAfterEach: false, + timeoutBench: 10_000, + noThreshold: false, + triggerGC: false, + setupFiles: [], + skip: false, + only: false, + threshold: 2, + convergence: "linear", + averageCalculation: "simple", +}; + +export function getBenchmarkOptionsWithDefaults(opts: BenchmarkOpts): Required { + const options = Object.assign({}, defaultBenchmarkOptions, opts); + + if (options.noThreshold) { + options.threshold = Infinity; + } + + if (options.maxMs && options.maxMs > options.timeoutBench) { + options.timeoutBench = options.maxMs * 1.5; + } + + if (options.minMs && options.minMs > options.timeoutBench) { + options.timeoutBench = options.minMs * 1.5; + } + + return options; +} diff --git a/src/benchmark/reporter.ts b/src/benchmark/reporter.ts index 36c5b0e..c3f6a2f 100644 --- a/src/benchmark/reporter.ts +++ b/src/benchmark/reporter.ts @@ -3,7 +3,7 @@ import {color, consoleLog, symbols} from "../utils/output.js"; import {store} from "./globalState.js"; import {Benchmark, BenchmarkOpts, BenchmarkResult} from "../types.js"; import {formatResultRow} from "./format.js"; -import {optionsDefault} from "../cli/options.js"; +import {defaultBenchmarkOptions} from "./options.js"; export class BenchmarkReporter { indents = 0; @@ -16,7 +16,7 @@ export class BenchmarkReporter { constructor({prevBench, benchmarkOpts}: {prevBench: Benchmark | null; benchmarkOpts: BenchmarkOpts}) { this.prevResults = new Map(); - this.threshold = benchmarkOpts.threshold ?? optionsDefault.threshold; + this.threshold = benchmarkOpts.threshold ?? defaultBenchmarkOptions.threshold; if (prevBench) { for (const bench of prevBench.results) { @@ -25,9 +25,14 @@ export class BenchmarkReporter { } } - // eslint-disable-next-line @typescript-eslint/no-unused-vars - onTestStarted(_task: Task): void { - // this.log(task.name, "started"); + onTestStarted(task: Task): void { + if (task.mode === "skip") { + this.skipped++; + consoleLog(`${this.indent()}${color("pending", " - %s")}`, task.name); + } else if (task.mode === "todo") { + this.skipped++; + consoleLog(`${this.indent()}${color("pending", " - %s")}`, task.name); + } } onTestFinished(task: Task): void { @@ -46,8 +51,9 @@ export class BenchmarkReporter { } case "fail": { this.failed++; - consoleLog(this.indent() + color("fail", " %d) %s"), ++this.failed, task.name); - consoleLog(task.result?.errors); + const fmt = this.indent() + color("fail", " " + symbols.err) + color("fail", " %s"); + consoleLog(fmt, task.name); + consoleLog(task.result?.errors?.map((e) => e.stackStr).join("\n")); break; } case "pass": { diff --git a/src/benchmark/runBenchmarkFn.ts b/src/benchmark/runBenchmarkFn.ts index 9e5adb0..b093633 100644 --- a/src/benchmark/runBenchmarkFn.ts +++ b/src/benchmark/runBenchmarkFn.ts @@ -1,4 +1,12 @@ import {BenchmarkResult, BenchmarkOpts} from "../types.js"; +import {calcSum, filterOutliers, OutlierSensitivity} from "../utils/math.js"; +import {getBenchmarkOptionsWithDefaults} from "./options.js"; +import {createCVConvergenceCriteria, createLinearConvergenceCriteria} from "./termination.js"; + +const convergenceCriteria = { + ["linear"]: createLinearConvergenceCriteria, + ["cv"]: createCVConvergenceCriteria, +}; export type BenchmarkRunOpts = BenchmarkOpts & { id: string; @@ -12,49 +20,54 @@ export type BenchmarkRunOptsWithFn = BenchmarkOpts & { }; export async function runBenchFn( - opts: BenchmarkRunOptsWithFn, - persistRunsNs?: boolean + opts: BenchmarkRunOptsWithFn ): Promise<{result: BenchmarkResult; runsNs: bigint[]}> { - const minRuns = opts.minRuns || 1; - const maxRuns = opts.maxRuns || Infinity; - const maxMs = opts.maxMs || Infinity; - const minMs = opts.minMs || 100; - const maxWarmUpMs = opts.maxWarmUpMs !== undefined ? opts.maxWarmUpMs : 500; - const maxWarmUpRuns = opts.maxWarmUpRuns !== undefined ? opts.maxWarmUpRuns : 1000; - // Ratio of maxMs that the warmup is allow to take from ellapsedMs + const {id, before, beforeEach, fn, ...rest} = opts; + const benchOptions = getBenchmarkOptionsWithDefaults(rest); + const {maxMs, maxRuns, maxWarmUpMs, maxWarmUpRuns, runsFactor, threshold, convergence, averageCalculation} = + benchOptions; + + if (maxWarmUpMs >= maxMs) { + throw new Error(`Warmup time must be lower than max run time. maxWarmUpMs: ${maxWarmUpMs}, maxMs: ${maxMs}`); + } + + if (maxWarmUpRuns >= maxRuns) { + throw new Error(`Warmup runs must be lower than max runs. maxWarmUpRuns: ${maxWarmUpRuns}, maxRuns: ${maxRuns}`); + } + + if (averageCalculation !== "simple" && averageCalculation !== "clean-outliers") { + throw new Error(`Average calculation logic is not defined. ${averageCalculation}`); + } + + if (convergence !== "linear" && convergence !== "cv") { + throw new Error(`Unknown convergence value ${convergence}`); + } + + // Ratio of maxMs that the warmup is allow to take from elapsedMs const maxWarmUpRatio = 0.5; - const convergeFactor = opts.convergeFactor || 0.5 / 100; // 0.5% - const runsFactor = opts.runsFactor || 1; - const maxWarmUpNs = BigInt(maxWarmUpMs) * BigInt(1e6); - const sampleEveryMs = 100; + const maxWarmUpNs = BigInt(benchOptions.maxWarmUpMs) * BigInt(1e6); const runsNs: bigint[] = []; const startRunMs = Date.now(); + const shouldTerminate = convergenceCriteria[convergence](startRunMs, benchOptions); + let runIdx = 0; let totalNs = BigInt(0); + let totalWarmUpNs = BigInt(0); let totalWarmUpRuns = 0; - let prevAvg0 = 0; - let prevAvg1 = 0; - let lastConvergenceSample = startRunMs; - let isWarmUp = maxWarmUpNs > 0 && maxWarmUpRuns > 0; + let isWarmUpPhase = maxWarmUpNs > 0 && maxWarmUpRuns > 0; - const inputAll = opts.before ? await opts.before() : (undefined as unknown as T2); + const inputAll = before ? await before() : (undefined as unknown as T2); while (true) { - const ellapsedMs = Date.now() - startRunMs; - const mustStop = ellapsedMs >= maxMs || runIdx >= maxRuns; - const mayStop = ellapsedMs > minMs && runIdx > minRuns; - // Exceeds limits, must stop now - if (mustStop) { - break; - } + const elapsedMs = Date.now() - startRunMs; - const input = opts.beforeEach ? await opts.beforeEach(inputAll, runIdx) : (undefined as unknown as T); + const input = beforeEach ? await beforeEach(inputAll, runIdx) : (undefined as unknown as T); const startNs = process.hrtime.bigint(); - await opts.fn(input); + await fn(input); const endNs = process.hrtime.bigint(); const runNs = endNs - startNs; @@ -64,54 +77,26 @@ export async function runBenchFn( await new Promise((r) => setTimeout(r, 0)); } - if (isWarmUp) { + if (isWarmUpPhase) { // Warm-up, do not count towards results totalWarmUpRuns += 1; totalWarmUpNs += runNs; // On any warm-up finish condition, mark isWarmUp = true to prevent having to check them again - if (totalWarmUpNs >= maxWarmUpNs || totalWarmUpRuns >= maxWarmUpRuns || ellapsedMs / maxMs >= maxWarmUpRatio) { - isWarmUp = false; - } - } else { - // Persist results - runIdx += 1; - totalNs += runNs; - // If the caller wants the exact times of all runs, persist them - if (persistRunsNs) runsNs.push(runNs); - - // When is a good time to stop a benchmark? A naive answer is after N miliseconds or M runs. - // This code aims to stop the benchmark when the average fn run time has converged at a value - // within a given convergence factor. To prevent doing expensive math to often for fast fn, - // it only takes samples every `sampleEveryMs`. It stores two past values to be able to compute - // a very rough linear and quadratic convergence. - if (Date.now() - lastConvergenceSample > sampleEveryMs) { - lastConvergenceSample = Date.now(); - const avg = Number(totalNs / BigInt(runIdx)); - - // Compute convergence (1st order + 2nd order) - const a = prevAvg0; - const b = prevAvg1; - const c = avg; - - // Only do convergence math if it may stop - if (mayStop) { - // Aprox linear convergence - const convergence1 = Math.abs(c - a); - // Aprox quadratic convergence - const convergence2 = Math.abs(b - (a + c) / 2); - // Take the greater of both to enfore linear and quadratic are below convergeFactor - const convergence = Math.max(convergence1, convergence2) / a; - - // Okay to stop + has converged, stop now - if (convergence < convergeFactor) { - break; - } - } - - prevAvg0 = prevAvg1; - prevAvg1 = avg; + if (totalWarmUpNs >= maxWarmUpNs || totalWarmUpRuns >= maxWarmUpRuns || elapsedMs / maxMs >= maxWarmUpRatio) { + isWarmUpPhase = false; } + + continue; + } + + // Persist results + runIdx += 1; + totalNs += runNs; + runsNs.push(runNs); + + if (shouldTerminate(runIdx, totalNs, runsNs)) { + break; } } @@ -135,15 +120,24 @@ either the before(), beforeEach() or fn() functions are too slow. } } - const averageNs = Number(totalNs / BigInt(runIdx)) / runsFactor; + let averageNs!: number; + + if (averageCalculation === "simple") { + averageNs = Number(totalNs / BigInt(runIdx)) / runsFactor; + } + + if (averageCalculation === "clean-outliers") { + const cleanData = filterOutliers(runsNs, false, OutlierSensitivity.Mild); + averageNs = Number(calcSum(cleanData) / BigInt(cleanData.length)) / runsFactor; + } return { result: { - id: opts.id, + id: id, averageNs, runsDone: runIdx, totalMs: Date.now() - startRunMs, - threshold: opts.noThreshold === true ? Infinity : opts.threshold, + threshold, }, runsNs, }; diff --git a/src/benchmark/runner.ts b/src/benchmark/runner.ts index 9973cb0..15e8f9e 100644 --- a/src/benchmark/runner.ts +++ b/src/benchmark/runner.ts @@ -99,11 +99,7 @@ export class BenchmarkRunner implements VitestRunner { debug("finished tests. passed: %i, skipped: %i, failed: %i", passed.length, skipped.length, failed.length); - if (failed.length > 0) { - throw failed[0].result?.errors; - } - - if (passed.length + skipped.length === res.length) { + if (passed.length + skipped.length + failed.length === res.length) { return store.getAllResults(); } diff --git a/src/benchmark/termination.ts b/src/benchmark/termination.ts new file mode 100644 index 0000000..6c94145 --- /dev/null +++ b/src/benchmark/termination.ts @@ -0,0 +1,101 @@ +import {BenchmarkOpts} from "../types.js"; +import {calcMean, calcMedian, calcVariance, filterOutliers, OutlierSensitivity, sortData} from "../utils/math.js"; + +export type TerminationCriteria = (runIdx: number, totalNs: bigint, runNs: bigint[]) => boolean; + +export function createLinearConvergenceCriteria( + startMs: number, + {maxMs, maxRuns, minRuns, minMs, convergeFactor}: Required +): TerminationCriteria { + let prevAvg0 = 0; + let prevAvg1 = 0; + let lastConvergenceSample = startMs; + const sampleEveryMs = 100; + + // eslint-disable-next-line @typescript-eslint/no-unused-vars + return function canTerminate(runIdx: number, totalNs: bigint, _runNs: bigint[]): boolean { + const currentMs = Date.now(); + const elapsedMs = currentMs - startMs; + const mustStop = elapsedMs >= maxMs || runIdx >= maxRuns; + const mayStop = elapsedMs >= minMs && runIdx >= minRuns; + + // Must stop + if (mustStop) return true; + + // When is a good time to stop a benchmark? A naive answer is after N milliseconds or M runs. + // This code aims to stop the benchmark when the average fn run time has converged at a value + // within a given convergence factor. To prevent doing expensive math to often for fast fn, + // it only takes samples every `sampleEveryMs`. It stores two past values to be able to compute + // a very rough linear and quadratic convergence.a + if (Date.now() - lastConvergenceSample <= sampleEveryMs) return false; + + lastConvergenceSample = currentMs; + const avg = Number(totalNs / BigInt(runIdx)); + + // Compute convergence (1st order + 2nd order) + const a = prevAvg0; + const b = prevAvg1; + const c = avg; + + if (mayStop) { + // Approx linear convergence + const convergence1 = Math.abs(c - a); + // Approx quadratic convergence + const convergence2 = Math.abs(b - (a + c) / 2); + // Take the greater of both to enforce linear and quadratic are below convergeFactor + const convergence = Math.max(convergence1, convergence2) / a; + + // Okay to stop + has converged, stop now + if (convergence < convergeFactor) return true; + } + + prevAvg0 = prevAvg1; + prevAvg1 = avg; + return false; + }; +} + +export function createCVConvergenceCriteria( + startMs: number, + {maxMs, maxRuns, minRuns, minMs, convergeFactor}: Required +): TerminationCriteria { + let lastConvergenceSample = startMs; + const sampleEveryMs = 100; + const minSamples = minRuns > 5 ? minRuns : 5; + const maxSamplesForCV = 1000; + + return function canTerminate(runIdx: number, totalNs: bigint, runsNs: bigint[]): boolean { + const currentMs = Date.now(); + const elapsedMs = currentMs - startMs; + const mustStop = elapsedMs >= maxMs || runIdx >= maxRuns; + const mayStop = elapsedMs >= minMs && runIdx >= minRuns && runIdx > minSamples; + + // Must stop + if (mustStop) return true; + + if (Date.now() - lastConvergenceSample <= sampleEveryMs) return false; + + if (mayStop) { + lastConvergenceSample = currentMs; + + const mean = calcMean(runsNs); + const variance = calcVariance(runsNs, mean); + const cv = Math.sqrt(Number(variance)) / Number(mean); + + if (cv < convergeFactor) return true; + + // If CV does not stabilize we fallback to the median approach + if (runsNs.length > maxSamplesForCV) { + const sorted = sortData(runsNs); + const cleanedRunsNs = filterOutliers(sorted, true, OutlierSensitivity.Mild); + const median = calcMedian(cleanedRunsNs, true); + const mean = calcMean(cleanedRunsNs); + const medianFactor = Math.abs(Number(mean - median)) / Number(median); + + if (medianFactor < convergeFactor) return true; + } + } + + return false; + }; +} diff --git a/src/cli/options.ts b/src/cli/options.ts index 5fb06c8..056e9ba 100644 --- a/src/cli/options.ts +++ b/src/cli/options.ts @@ -1,9 +1,8 @@ import {Options} from "yargs"; import {StorageOptions, BenchmarkOpts, FileCollectionOptions} from "../types.js"; +import {defaultBenchmarkOptions} from "../benchmark/options.js"; export const optionsDefault = { - threshold: 2, - timeoutBench: 10_000, historyLocalPath: "./benchmark_data", historyCacheKey: "benchmark_data", }; @@ -127,74 +126,97 @@ export const benchmarkOptions: ICliCommandOptions = { description: "Ratio of new average time per run vs previos time per run to consider a failure. Set to 'Infinity' to disable it.", type: "number", - default: optionsDefault.threshold, + default: defaultBenchmarkOptions.threshold, group: benchmarkGroup, }, maxRuns: { type: "number", description: "Max number of fn() runs, after which the benchmark stops", + default: defaultBenchmarkOptions.maxRuns, group: benchmarkGroup, }, minRuns: { type: "number", description: "Min number of fn() runs before considering stopping the benchmark after converging", + default: defaultBenchmarkOptions.minRuns, group: benchmarkGroup, }, maxMs: { type: "number", description: "Max total miliseconds of runs, after which the benchmark stops", + default: defaultBenchmarkOptions.maxMs, group: benchmarkGroup, }, minMs: { type: "number", description: "Min total miiliseconds of runs before considering stopping the benchmark after converging", + default: defaultBenchmarkOptions.minMs, group: benchmarkGroup, }, maxWarmUpMs: { type: "number", description: "Maximum real benchmark function run time before starting to count towards results. Set to 0 to not warm-up. May warm up for less ms if the `maxWarmUpRuns` condition is met first.", + default: defaultBenchmarkOptions.maxWarmUpMs, group: benchmarkGroup, }, maxWarmUpRuns: { type: "number", description: "Maximum benchmark function runs before starting to count towards results. Set to 0 to not warm-up. May warm up for less ms if the `maxWarmUpMs` condition is met first.", + default: defaultBenchmarkOptions.maxWarmUpRuns, group: benchmarkGroup, }, convergeFactor: { type: "number", description: "Convergance factor (0,1) at which the benchmark automatically stops. Set to 1 to disable", + default: defaultBenchmarkOptions.convergeFactor, group: benchmarkGroup, }, runsFactor: { type: "number", description: "If fn() contains a foor loop repeating a task N times, you may set runsFactor = N to scale down the results.", + default: defaultBenchmarkOptions.runsFactor, group: benchmarkGroup, }, yieldEventLoopAfterEach: { type: "boolean", description: "Run `sleep(0)` after each fn() call. Use when the event loop needs to tick to free resources created by fn()", + default: defaultBenchmarkOptions.yieldEventLoopAfterEach, group: benchmarkGroup, }, timeoutBench: { type: "number", description: "Hard timeout for each benchmark", - default: optionsDefault.timeoutBench, + default: defaultBenchmarkOptions.timeoutBench, group: benchmarkGroup, }, setupFiles: { type: "array", description: "List of setup files to load before the tests", - default: [], + default: defaultBenchmarkOptions.setupFiles, group: benchmarkGroup, }, triggerGC: { type: "boolean", description: "Trigger GC (if available) after every benchmark", - default: false, + default: defaultBenchmarkOptions.triggerGC, + group: benchmarkGroup, + }, + convergence: { + type: "string", + description: "The algorithm used to detect the convergence to stop benchmark runs", + default: defaultBenchmarkOptions.convergence, + choices: ["linear", "cv"], + group: benchmarkGroup, + }, + averageCalculation: { + type: "string", + description: "Use simple average of all runs or clean the outliers before calculating average", + default: defaultBenchmarkOptions.averageCalculation, + choices: ["simple", "clean-outliers"], group: benchmarkGroup, }, }; diff --git a/src/cli/run.ts b/src/cli/run.ts index f5068ca..4691a4e 100644 --- a/src/cli/run.ts +++ b/src/cli/run.ts @@ -22,6 +22,7 @@ import {consoleLog} from "../utils/output.js"; import {HistoryProviderType} from "../history/provider.js"; import {performanceReportComment} from "../github/comments/performanceReportComment.js"; import {GithubCommentTag} from "../github/octokit.js"; +import {defaultBenchmarkOptions} from "../benchmark/options.js"; const debug = Debug("@chainsafe/benchmark/cli"); @@ -89,7 +90,11 @@ export async function run(opts_: FileCollectionOptions & StorageOptions & Benchm await historyProvider.writeToHistory(currBench); } - const resultsComp = computePerformanceReport(currBench, prevBench, opts.threshold); + const resultsComp = computePerformanceReport( + currBench, + prevBench, + opts.threshold ?? defaultBenchmarkOptions.threshold + ); debug("detecting to post comment. skipPostComment: %o, isGaRun: %o", !opts.skipPostComment, isGaRun()); if (!opts.skipPostComment && isGaRun()) { diff --git a/src/types.ts b/src/types.ts index b6fd508..e11c970 100644 --- a/src/types.ts +++ b/src/types.ts @@ -65,6 +65,14 @@ export type BenchmarkOpts = { setupFiles?: string[]; /** Trigger GC cleanup every test to have consistent memory usage */ triggerGC?: boolean; + /** + * The algorithm to detect the convergence to stop the benchmark function runs. + * linear - Calculate the moving average among last 3 runs average and compare through quadratic formula + * cv - Coefficient Variance is a statistical tool which calculates data pattern on all runs and calculate median + * */ + convergence?: "linear" | "cv"; + /** Use simple average of all runs or clean the outliers before calculating average */ + averageCalculation?: "simple" | "clean-outliers"; }; // Create partial only for specific keys diff --git a/src/utils/math.ts b/src/utils/math.ts new file mode 100644 index 0000000..34ee6ec --- /dev/null +++ b/src/utils/math.ts @@ -0,0 +1,127 @@ +/** + * Computes the total of all values in the array by sequentially adding each element. + * Handles both positive and negative BigInt values without precision loss. + */ +export function calcSum(arr: bigint[]): bigint { + let s = BigInt(0); + + for (const n of arr) { + s += n; + } + return s; +} + +/** + * Determines the central tendency by dividing the total sum by the number of elements. + * Uses integer division that naturally truncates decimal remainders. + */ +export function calcMean(arr: bigint[]): bigint { + return BigInt(calcSum(arr) / BigInt(arr.length)); +} + +/** + * Quantifies data spread by averaging squared deviations from the mean. + * A value of zero indicates identical values, larger values show greater dispersion. + */ +export function calcVariance(arr: bigint[], mean: bigint): bigint { + let base = BigInt(0); + + for (const n of arr) { + const diff = n - mean; + base += diff * diff; + } + + return base / BigInt(arr.length); +} + +/** + * Organizes values from smallest to largest while preserving the original array. + * Essential for percentile-based calculations like median and quartiles. + */ +export function sortData(arr: bigint[]): bigint[] { + return [...arr].sort((a, b) => (a < b ? -1 : a > b ? 1 : 0)); +} + +/** + * Identifies the middle value that separates higher and lower halves of the dataset. + * For even-sized arrays, averages the two central values to find the midpoint. + */ +export function calcMedian(arr: bigint[], sorted: boolean): bigint { + // 1. Sort the BigInt array + const data = sorted ? arr : sortData(arr); + + // 3. Calculate median + const mid = Math.floor(data.length / 2); + if (data.length % 2 === 0) { + return (data[mid - 1] + data[mid]) / BigInt(2); // Average two middle values + } else { + return data[mid]; // Single middle value + } +} + +/** + * Determines cutoff points that divide data into four equal-frequency segments. + * Uses linear interpolation to estimate values between actual data points. + */ +export function calcQuartile(arr: bigint[], sorted: boolean, percentile: number): number { + const sortedData = sorted ? arr : sortData(arr); + + const index = (sortedData.length - 1) * percentile; + const floor = Math.floor(index); + const fraction = index - floor; + + if (sortedData[floor + 1] !== undefined) { + return Number(sortedData[floor]) + fraction * Number(sortedData[floor + 1] - sortedData[floor]); + } + + return Number(sortedData[floor]); +} + +/** + * Configures how aggressively outlier detection removes edge values. + * - Mild: Removes typical anomalies (e.g., temporary CPU spikes) + * - Strict: Only filters extreme deviations (e.g., measurement errors) + */ +export enum OutlierSensitivity { + /** + * A standard multiplier for detecting mild outliers. Captures ~99.3% of normally distributed data. + */ + Mild = 1.5, + /** + * A stricter multiplier for detecting extreme outliers. Captures ~99.99% of normally distributed data. + */ + Strict = 3.0, +} + +/** + * Isolates the core dataset by excluding values far from the central cluster. + * Uses quartile ranges to establish inclusion boundaries, preserving data integrity + * while eliminating measurement noise. Sorting can be bypassed for pre-processed data. + * + * We use the `IQR` Interquartile Range method to detect the outliers. IQR is distribution + * of difference of Q3 - Q1 and represents the middle 50% of the data.: + * - Q1 (First Quartile): The 25th percentile (25% of the data is below this value). + * - Q3 (Third Quartile): The 75th percentile (75% of the data is below this value). + * + * The `OutlierSensitivity` is scaling factors applied to the IQR to determine how far data points + * can deviate from the quartiles before being considered outliers. + */ +export function filterOutliers(arr: bigint[], sorted: boolean, sensitivity: OutlierSensitivity): bigint[] { + if (arr.length < 4) return arr; // Too few data points + + const data = sorted ? arr : sortData(arr); + + // Calculate quartiles and IQR + const q1 = calcQuartile(data, true, 0.25); + const q3 = calcQuartile(data, true, 0.75); + const iqr = q3 - q1; + + // Define outlier bounds (adjust multiplier for sensitivity) + const lowerBound = q1 - sensitivity * iqr; + const upperBound = q3 + sensitivity * iqr; + + // Filter original BigInt values + return data.filter((n) => { + return n >= lowerBound && n <= upperBound; + }); +} diff --git a/test/perf/errors.test.ts b/test/perf/errors.test.ts new file mode 100644 index 0000000..c6dfb98 --- /dev/null +++ b/test/perf/errors.test.ts @@ -0,0 +1,49 @@ +import {bench, describe} from "../../src/index.js"; + +// This test file is to validate the error cases manually +// should not be included into actual benchmarks as there are cases +// in this file which will always fail. +describe("Hooks", () => { + bench("normal benchmark", () => { + const arr = Array.from({length: 10}, (_, i) => i); + arr.reduce((total, curr) => total + curr, 0); + }); + + bench.skip("normal skipped", () => { + const arr = Array.from({length: 10}, (_, i) => i); + arr.reduce((total, curr) => total + curr, 0); + }); + + describe("before", () => { + bench({ + id: "before failed", + before: () => { + throw new Error("Failed in before"); + }, + fn: () => { + const arr = Array.from({length: 10}, (_, i) => i); + arr.reduce((total, curr) => total + curr, 0); + }, + }); + }); + + describe("beforeEach", () => { + bench({ + id: "beforeEach failed", + beforeEach: () => { + throw new Error("Failed in beforeEach"); + }, + fn: () => { + const arr = Array.from({length: 10}, (_, i) => i); + arr.reduce((total, curr) => total + curr, 0); + }, + }); + }); + + bench({ + id: "error during fn", + fn: () => { + throw new Error("Failed in fn"); + }, + }); +}); diff --git a/test/perf/iteration.test.ts b/test/perf/iteration.test.ts index f5740b7..ac6f0db 100644 --- a/test/perf/iteration.test.ts +++ b/test/perf/iteration.test.ts @@ -8,7 +8,7 @@ import {bench, describe, setBenchOpts} from "../../src/index.js"; // byteArrayEquals with valueOf() 853971.0 ops/s 1.171000 us/op 9963051 runs 16.07 s describe("Array iteration", () => { - setBenchOpts({maxMs: 60 * 1000, convergeFactor: 0.1 / 100}); + setBenchOpts({maxMs: 60 * 1000, convergeFactor: 1 / 100}); // nonce = 5 const n = 1e6; diff --git a/test/unit/utils/math.test.ts b/test/unit/utils/math.test.ts new file mode 100644 index 0000000..8fc2915 --- /dev/null +++ b/test/unit/utils/math.test.ts @@ -0,0 +1,225 @@ +import {describe, it, expect} from "vitest"; +import { + calcSum, + calcMean, + calcVariance, + sortData, + calcMedian, + calcQuartile, + OutlierSensitivity, + filterOutliers, +} from "../../../src/utils/math.js"; + +describe("math utility functions", () => { + describe("calcSum", () => { + it("should return 0n for an empty array", () => { + expect(calcSum([])).toBe(BigInt(0)); + }); + + it("should correctly sum an array of positive BigInts", () => { + const arr = [1n, 2n, 3n, 4n]; + expect(calcSum(arr)).toBe(10n); + }); + + it("should correctly sum an array with negative BigInts", () => { + const arr = [-1n, 2n, -3n, 4n]; + // -1 + 2 = 1; 1 - 3 = -2; -2 + 4 = 2 + expect(calcSum(arr)).toBe(2n); + }); + + it("should handle large BigInt values without overflow", () => { + const big1 = BigInt("9007199254740991"); // ~ Number.MAX_SAFE_INTEGER + const big2 = BigInt("9007199254740992"); + expect(calcSum([big1, big2])).toBe(big1 + big2); + }); + }); + + describe("calcMean", () => { + it("should throw or behave predictably for an empty array", () => { + // By default, dividing by BigInt(0) will throw in JavaScript. + // If you want a different behavior, you can wrap your function or catch errors here. + expect(() => calcMean([])).toThrow(); + }); + + it("should correctly calculate the mean of a single-element array", () => { + const arr = [5n]; + expect(calcMean(arr)).toBe(5n); + }); + + it("should correctly calculate the mean of multiple BigInts", () => { + const arr = [2n, 4n, 6n]; + // sum=12, length=3 => mean=4 + expect(calcMean(arr)).toBe(4n); + }); + + it("should handle negative values correctly", () => { + const arr = [-5n, -15n, 10n]; + // sum=-10, length=3 => mean=-3.333..., but truncated to BigInt => -3n if using integer division + expect(calcMean(arr)).toBe(-3n); + }); + }); + + describe("calcVariance", () => { + it("should compute variance for a small sample of integers", () => { + const arr = [2n, 4n, 4n, 6n, 8n]; + // mean = (2+4+4+6+8)/5 = 24/5 = 4.8 => truncated to 4n if using integer division + // If mean=4n, diffs = (-2,0,0,2,4), squares = (4,0,0,4,16), sum=24 => var=24/5=4.8 => truncated to 4n + const meanBigInt = calcMean(arr); + const varianceBigInt = calcVariance(arr, meanBigInt); + expect(varianceBigInt).toBe(4n); + }); + + it("should handle a single-element array (variance=0)", () => { + const arr = [100n]; + const mean = calcMean(arr); // 100n + const variance = calcVariance(arr, mean); + expect(variance).toBe(0n); + }); + + it("should handle negative values", () => { + const arr = [-10n, -4n, -2n]; + // sum = -16, length=3 => mean = floor(-16/3) = -5n + // diffs = (-5,1,3), squares=(25,1,9)=35 => var=35/3=11 => 11n + const mean = calcMean(arr); + const variance = calcVariance(arr, mean); + expect(variance).toBe(11n); + }); + + it("should return 0 for an array of identical values", () => { + const arr = [5n, 5n, 5n]; + const mean = calcMean(arr); + const variance = calcVariance(arr, mean); + expect(variance).toBe(0n); + }); + }); + + describe("sortData", () => { + it("should return a new sorted array without mutating the original", () => { + const arr = [5n, 1n, 3n]; + const sorted = sortData(arr); + expect(sorted).toEqual([1n, 3n, 5n]); + // Ensure original is unchanged + expect(arr).toEqual([5n, 1n, 3n]); + }); + + it("should handle negative and positive numbers", () => { + const arr = [0n, -1n, 10n, -5n, 2n]; + const sorted = sortData(arr); + expect(sorted).toEqual([-5n, -1n, 0n, 2n, 10n]); + }); + + it("should handle an empty array", () => { + expect(sortData([])).toEqual([]); + }); + + it("should handle an already sorted array", () => { + expect(sortData([1n, 2n, 3n, 4n])).toEqual([1n, 2n, 3n, 4n]); + }); + }); + + describe("calcMedian", () => { + it("should throw or handle empty array (no median)", () => { + expect(() => calcMedian([], false)).toThrow(); + }); + + it("should return the middle element when the array length is odd", () => { + const arr = [3n, 1n, 2n]; + // sorted = [1n, 2n, 3n], median = 2n + expect(calcMedian(arr, false)).toBe(2n); + }); + + it("should return the average of two middle elements when the array length is even", () => { + const arr = [3n, 1n, 2n, 4n]; + // sorted = [1n, 2n, 3n, 4n] + // middle indices = 1,2 => average => (2n+3n)/2n=2n + expect(calcMedian(arr, false)).toBe(2n); + }); + + it("should skip re-sorting if 'sorted=true' is provided", () => { + // already sorted + const arr = [1n, 2n, 3n, 4n]; + expect(calcMedian(arr, true)).toBe(2n); // middle indices => 1n,2n => average=2n + }); + }); + + describe("calcQuartile", () => { + const sortedData = sortData([1n, 2n, 4n, 10n, 20n, 100n]); + + it("should return the first quartile (Q1) => percentile=0.25", () => { + // sorted array = [1n, 2n, 4n, 10n, 20n, 100n] + // length=6 => index = (6-1)*0.25=1.25 => floor=1 => fraction=0.25 + // base=2n, next=4n => difference=2n => fraction=0.25 => 2 + 0.25*2=2.5 => ~ BigInt(2.5) + // Because we must do BigInt arithmetic carefully, the function does Number(...) inside + // => the result = 2n + 0.25*(4-2)=2n + 0.5=2.5 => cast => 2n if trunc + // But the function does => BigInt(2 + fraction*(4-2)) => 2 + 0.25*2 => 2.5 + const q1 = calcQuartile(sortedData, true, 0.25); + expect(q1).toBe(2.5); + }); + + it("should return the third quartile (Q3) => percentile=0.75", () => { + // index=(5*0.75)=3.75 => floor=3 => fraction=0.75 + // base=10n, next=20n => difference=10 => 10 + 0.75*10=17.5 => rounded and truncated => 18 + const q3 = calcQuartile(sortedData, true, 0.75); + expect(q3).toBe(17.5); + }); + + it("should gracefully handle the highest index boundary (percentile=1.0)", () => { + // index=(6-1)*1.0=5 => floor=5 => fraction=0 => return data[5] => 100n + const maxVal = calcQuartile(sortedData, true, 1.0); + expect(maxVal).toBe(100); + }); + + it("should gracefully handle the lowest index boundary (percentile=0.0)", () => { + // index=(6-1)*0.0=0 => floor=0 => fraction=0 => return data[0] => 1n + const minVal = calcQuartile(sortedData, true, 0.0); + expect(minVal).toBe(1); + }); + + it("should handle a single-element array => always that element", () => { + const arr = [42n]; + expect(calcQuartile(arr, true, 0.25)).toBe(42); + expect(calcQuartile(arr, true, 0.75)).toBe(42); + }); + }); + + describe("filterOutliers", () => { + it("should return the same array if length < 4", () => { + const arr = [1n, 100n]; + expect(filterOutliers(arr, false, OutlierSensitivity.Mild)).toEqual([1n, 100n]); + }); + + it("should remove outliers using the Mild (1.5x IQR) approach", () => { + // Example: [1n, 2n, 4n, 10n, 20n, 100n] + // sorted => [1n,2n,4n,10n,20n,100n] + // Q1=2n, Q3=20n => iqr=18 => mild => +/- 1.5*18=27 => lower=2-27=-25 => upper=20+27=47 + // So any element outside -25..47 is out => 100n is out + const arr = [20n, 100n, 2n, 10n, 1n, 4n]; + const filtered = filterOutliers(arr, false, OutlierSensitivity.Mild); + expect(filtered).toEqual([1n, 2n, 4n, 10n, 20n]); + }); + + it("should remove outliers using the Strict (3.0x IQR) approach", () => { + // same array => Q1=2n, Q3=20n => iqr=18 => strict => +/- 3.0*18=54 => lower=-52 => upper=74 + // 100 is outside => filter it out + const arr = [20n, 100n, 2n, 10n, 1n, 4n]; + const filtered = filterOutliers(arr, false, OutlierSensitivity.Strict); + expect(filtered).toEqual([1n, 2n, 4n, 10n, 20n]); + }); + + it("should handle negative values correctly", () => { + // e.g. [-100n, -10n, -5n, -2n, -1n, 0n, 1n, 5n, 6n] + // We'll skip the exact math here, but we test that they are sorted and outliers removed + const arr = [-10n, 6n, -2n, -100n, -5n, 1n, -1n, 5n, 0n]; + const filtered = filterOutliers(arr, false, OutlierSensitivity.Mild); + // We can check that -100n is probably an outlier + expect(filtered).not.toContain(-100n); + }); + + it("should not filter anything if all values are within the mild IQR range", () => { + const arr = [10n, 12n, 11n, 9n, 8n, 10n, 10n]; + const filtered = filterOutliers(arr, false, OutlierSensitivity.Mild); + // all within a small range => no outliers + expect(filtered).toEqual(sortData(arr)); + }); + }); +});