From 0c7e0eb58f10e6e3200e14458369f7005518a59e Mon Sep 17 00:00:00 2001 From: lodekeeper Date: Sat, 28 Mar 2026 15:24:16 +0000 Subject: [PATCH] fix: log benchmark failures and fail CI instead of silently dropping results The process() method counted failed tests but returned store.getAllResults() (which only contains passed results) whenever passed + skipped + failed === total. This meant benchmark failures were silently dropped as long as at least one other benchmark in the run passed. Now: - Runner logs each failure via consoleError (summary first, then per-benchmark) - Runner exposes failedCount so callers can check - Passed results still flow through persistence + reporting pipeline - run.ts checks failedCount after persistence and throws (respecting noThrow) This ensures CI fails on benchmark errors while still persisting/reporting results for benchmarks that did pass. Ref: ChainSafe/lodestar#7484 --- src/benchmark/runner.ts | 18 +++++++++++++++--- src/cli/run.ts | 4 ++++ src/utils/output.ts | 4 ++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/benchmark/runner.ts b/src/benchmark/runner.ts index ba1ff75..de643ce 100644 --- a/src/benchmark/runner.ts +++ b/src/benchmark/runner.ts @@ -10,6 +10,7 @@ import { } from "@vitest/runner"; import Debug from "debug"; import {Benchmark, BenchmarkOpts, BenchmarkResults} from "../types.ts"; +import {consoleError} from "../utils/output.ts"; import {store} from "./globalState.ts"; import {BenchmarkReporter} from "./reporter.ts"; @@ -17,6 +18,7 @@ const debug = Debug("@chainsafe/benchmark/runner"); export class BenchmarkRunner implements VitestRunner { readonly triggerGC: boolean; + failedCount = 0; readonly config: VitestRunnerConfig; readonly reporter: BenchmarkReporter; readonly prevBench: Benchmark | null; @@ -98,10 +100,20 @@ export class BenchmarkRunner implements VitestRunner { debug("finished tests. passed: %i, skipped: %i, failed: %i", passed.length, skipped.length, failed.length); - if (passed.length + skipped.length + failed.length === res.length) { - return store.getAllResults(); + if (passed.length + skipped.length + failed.length !== res.length) { + throw new Error("Some tests returned with unknown state"); } - throw new Error("Some tests cause returned with unknown state"); + if (failed.length > 0) { + this.failedCount = failed.length; + consoleError(`${failed.length} benchmark(s) failed:\n`); + for (const f of failed) { + const error = f.result?.errors?.[0]; + const errorMsg = error?.message ?? error?.toString() ?? "unknown error"; + consoleError(` ✖ ${f.name}: ${errorMsg}`); + } + } + + return store.getAllResults(); } } diff --git a/src/cli/run.ts b/src/cli/run.ts index 65cac0f..f727fbb 100644 --- a/src/cli/run.ts +++ b/src/cli/run.ts @@ -114,6 +114,10 @@ export async function run(opts_: FileCollectionOptions & StorageOptions & Benchm if (resultsComp.someFailed && !opts.noThrow) { throw Error("Performance regression"); } + + if (runner.failedCount > 0 && !opts.noThrow) { + throw Error(`${runner.failedCount} benchmark(s) failed with errors`); + } } catch (err) { consoleLog(`Error processing benchmark files. ${(err as Error).message}`); throw err; diff --git a/src/utils/output.ts b/src/utils/output.ts index ddc0a69..9fe68d0 100644 --- a/src/utils/output.ts +++ b/src/utils/output.ts @@ -40,3 +40,7 @@ export function consoleLog(...args: unknown[]): void { // biome-ignore lint/suspicious/noConsoleLog: We explicitly need to log some output console.log(...args); } + +export function consoleError(...args: unknown[]): void { + console.error(...args); +}