From 0c7e0eb58f10e6e3200e14458369f7005518a59e Mon Sep 17 00:00:00 2001
From: lodekeeper <lodekeeper@users.noreply.github.com>
Date: Sat, 28 Mar 2026 15:24:16 +0000
Subject: [PATCH] fix: log benchmark failures and fail CI instead of silently
 dropping results

The process() method counted failed tests but returned store.getAllResults()
(which only contains passed results) whenever passed + skipped + failed === total.
This meant benchmark failures were silently dropped as long as at least one other
benchmark in the run passed.

Now:
- Runner logs each failure via consoleError (summary first, then per-benchmark)
- Runner exposes failedCount so callers can check
- Passed results still flow through persistence + reporting pipeline
- run.ts checks failedCount after persistence and throws (respecting noThrow)

This ensures CI fails on benchmark errors while still persisting/reporting
results for benchmarks that did pass.

Ref: ChainSafe/lodestar#7484
---
 src/benchmark/runner.ts | 18 +++++++++++++++---
 src/cli/run.ts          |  4 ++++
 src/utils/output.ts     |  4 ++++
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/src/benchmark/runner.ts b/src/benchmark/runner.ts
index ba1ff75..de643ce 100644
--- a/src/benchmark/runner.ts
+++ b/src/benchmark/runner.ts
@@ -10,6 +10,7 @@ import {
 } from "@vitest/runner";
 import Debug from "debug";
 import {Benchmark, BenchmarkOpts, BenchmarkResults} from "../types.ts";
+import {consoleError} from "../utils/output.ts";
 import {store} from "./globalState.ts";
 import {BenchmarkReporter} from "./reporter.ts";
 
@@ -17,6 +18,7 @@ const debug = Debug("@chainsafe/benchmark/runner");
 
 export class BenchmarkRunner implements VitestRunner {
   readonly triggerGC: boolean;
+  failedCount = 0;
   readonly config: VitestRunnerConfig;
   readonly reporter: BenchmarkReporter;
   readonly prevBench: Benchmark | null;
@@ -98,10 +100,20 @@ export class BenchmarkRunner implements VitestRunner {
 
     debug("finished tests. passed: %i, skipped: %i, failed: %i", passed.length, skipped.length, failed.length);
 
-    if (passed.length + skipped.length + failed.length === res.length) {
-      return store.getAllResults();
+    if (passed.length + skipped.length + failed.length !== res.length) {
+      throw new Error("Some tests returned with unknown state");
     }
 
-    throw new Error("Some tests cause returned with unknown state");
+    if (failed.length > 0) {
+      this.failedCount = failed.length;
+      consoleError(`${failed.length} benchmark(s) failed:\n`);
+      for (const f of failed) {
+        const error = f.result?.errors?.[0];
+        const errorMsg = error?.message ?? error?.toString() ?? "unknown error";
+        consoleError(`  ✖ ${f.name}: ${errorMsg}`);
+      }
+    }
+
+    return store.getAllResults();
   }
 }
diff --git a/src/cli/run.ts b/src/cli/run.ts
index 65cac0f..f727fbb 100644
--- a/src/cli/run.ts
+++ b/src/cli/run.ts
@@ -114,6 +114,10 @@ export async function run(opts_: FileCollectionOptions & StorageOptions & Benchm
     if (resultsComp.someFailed && !opts.noThrow) {
       throw Error("Performance regression");
     }
+
+    if (runner.failedCount > 0 && !opts.noThrow) {
+      throw Error(`${runner.failedCount} benchmark(s) failed with errors`);
+    }
   } catch (err) {
     consoleLog(`Error processing benchmark files. ${(err as Error).message}`);
     throw err;
diff --git a/src/utils/output.ts b/src/utils/output.ts
index ddc0a69..9fe68d0 100644
--- a/src/utils/output.ts
+++ b/src/utils/output.ts
@@ -40,3 +40,7 @@ export function consoleLog(...args: unknown[]): void {
   // biome-ignore lint/suspicious/noConsoleLog: We explicitly need to log some output
   console.log(...args);
 }
+
+export function consoleError(...args: unknown[]): void {
+  console.error(...args);
+}