diff --git a/package.json b/package.json
index 83b06ca..5a0e8b2 100644
--- a/package.json
+++ b/package.json
@@ -33,7 +33,7 @@
     "test:unit": "vitest run test/unit/**/*.test.ts",
     "lint": "eslint --color src/ test/",
     "prepublishOnly": "yarn build",
-    "benchmark": "node --loader ts-node/esm ./src/cli/cli.ts 'test/perf/**/*.test.ts'",
+    "benchmark": "node --loader ts-node/esm ./src/cli/cli.ts 'test/perf/**/@(!(errors)).test.ts'",
     "writeDocs": "node --loader ts-node/esm scripts/writeOptionsMd.ts"
   },
   "devDependencies": {
diff --git a/src/benchmark/benchmarkFn.ts b/src/benchmark/benchmarkFn.ts
index 7c6f705..decd330 100644
--- a/src/benchmark/benchmarkFn.ts
+++ b/src/benchmark/benchmarkFn.ts
@@ -5,29 +5,19 @@ import {createChainable} from "@vitest/runner/utils";
 import {store} from "./globalState.js";
 import {BenchApi, BenchmarkOpts, BenchmarkRunOptsWithFn, PartialBy} from "../types.js";
 import {runBenchFn} from "./runBenchmarkFn.js";
-import {optionsDefault} from "../cli/options.js";
+import {getBenchmarkOptionsWithDefaults} from "./options.js";
 
 export const bench: BenchApi = createBenchmarkFunction(function <T, T2>(
   this: Record<"skip" | "only", boolean | undefined>,
   idOrOpts: string | PartialBy<BenchmarkRunOptsWithFn<T, T2>, "fn">,
   fn?: (arg: T) => void | Promise<void>
 ) {
-  const {fn: benchTask, ...opts} = coerceToOptsObj(idOrOpts, fn);
+  const {fn: benchTask, before, beforeEach, ...opts} = coerceToOptsObj(idOrOpts, fn);
   const currentSuite = getCurrentSuite();
 
   const globalOptions = store.getGlobalOptions() ?? {};
-  const parentOptions = store.getOptions(getCurrentSuite()) ?? {};
-  const options = {...globalOptions, ...parentOptions, ...opts};
-  const {timeoutBench, maxMs, minMs} = options;
-
-  let timeout = timeoutBench ?? optionsDefault.timeoutBench;
-  if (maxMs && maxMs > timeout) {
-    timeout = maxMs * 1.5;
-  }
-
-  if (minMs && minMs > timeout) {
-    timeout = minMs * 1.5;
-  }
+  const parentOptions = store.getOptions(currentSuite) ?? {};
+  const options = getBenchmarkOptionsWithDefaults({...globalOptions, ...parentOptions, ...opts});
 
   async function handler(): Promise<void> {
     // Ensure bench id is unique
@@ -35,17 +25,20 @@ export const bench: BenchApi = createBenchmarkFunction(function <T, T2>(
       throw Error(`test titles must be unique, duplicated: '${opts.id}'`);
     }
 
-    // Persist full results if requested. dir is created in `beforeAll`
-    const benchmarkResultsCsvDir = process.env.BENCHMARK_RESULTS_CSV_DIR;
-    const persistRunsNs = Boolean(benchmarkResultsCsvDir);
-
-    const {result, runsNs} = await runBenchFn({...options, fn: benchTask}, persistRunsNs);
+    const {result, runsNs} = await runBenchFn<T, T2>({
+      ...options,
+      fn: benchTask,
+      before,
+      beforeEach,
+    } as BenchmarkRunOptsWithFn<T, T2>);
 
     // Store result for:
     // - to persist benchmark data latter
     // - to render with the custom reporter
     store.setResult(opts.id, result);
 
+    // Persist full results if requested. dir is created in `beforeAll`
+    const benchmarkResultsCsvDir = process.env.BENCHMARK_RESULTS_CSV_DIR;
     if (benchmarkResultsCsvDir) {
       fs.mkdirSync(benchmarkResultsCsvDir, {recursive: true});
       const filename = `${result.id}.csv`;
@@ -59,27 +52,25 @@ export const bench: BenchApi = createBenchmarkFunction(function <T, T2>(
     only: opts.only ?? this.only,
     sequential: true,
     concurrent: false,
-    timeout,
+    timeout: options.timeoutBench,
     meta: {
       "chainsafe/benchmark": true,
     },
   });
 
-  const {id: _, ...optionsWithoutId} = opts;
   setFn(task, handler);
-  store.setOptions(task, optionsWithoutId);
-
-  task.onFinished = [
-    () => {
-      store.removeOptions(task);
-    },
-    () => {
-      // Clear up the assigned handler to clean the memory
-      // eslint-disable-next-line @typescript-eslint/ban-ts-comment
-      // @ts-expect-error
-      setFn(task, null);
-    },
-  ];
+  store.setOptions(task, opts);
+
+  const cleanup = (): void => {
+    store.removeOptions(task);
+    // Clear up the assigned handler to clean the memory
+    // eslint-disable-next-line @typescript-eslint/ban-ts-comment
+    // @ts-expect-error
+    setFn(task, null);
+  };
+
+  task.onFailed = [cleanup];
+  task.onFinished = [cleanup];
 });
 
 function createBenchmarkFunction(
diff --git a/src/benchmark/options.ts b/src/benchmark/options.ts
new file mode 100644
index 0000000..937b5b7
--- /dev/null
+++ b/src/benchmark/options.ts
@@ -0,0 +1,40 @@
+import {BenchmarkOpts} from "../types.js";
+
+export const defaultBenchmarkOptions: Required<BenchmarkOpts> = {
+  minRuns: 1,
+  maxRuns: Infinity,
+  minMs: 100,
+  maxMs: Infinity,
+  maxWarmUpRuns: 1000,
+  maxWarmUpMs: 500,
+  convergeFactor: 0.5 / 100, // 0.5%
+  runsFactor: 1,
+  yieldEventLoopAfterEach: false,
+  timeoutBench: 10_000,
+  noThreshold: false,
+  triggerGC: false,
+  setupFiles: [],
+  skip: false,
+  only: false,
+  threshold: 2,
+  convergence: "linear",
+  averageCalculation: "simple",
+};
+
+export function getBenchmarkOptionsWithDefaults(opts: BenchmarkOpts): Required<BenchmarkOpts> {
+  const options = Object.assign({}, defaultBenchmarkOptions, opts);
+
+  if (options.noThreshold) {
+    options.threshold = Infinity;
+  }
+
+  if (options.maxMs && options.maxMs > options.timeoutBench) {
+    options.timeoutBench = options.maxMs * 1.5;
+  }
+
+  if (options.minMs && options.minMs > options.timeoutBench) {
+    options.timeoutBench = options.minMs * 1.5;
+  }
+
+  return options;
+}
diff --git a/src/benchmark/reporter.ts b/src/benchmark/reporter.ts
index 36c5b0e..c3f6a2f 100644
--- a/src/benchmark/reporter.ts
+++ b/src/benchmark/reporter.ts
@@ -3,7 +3,7 @@ import {color, consoleLog, symbols} from "../utils/output.js";
 import {store} from "./globalState.js";
 import {Benchmark, BenchmarkOpts, BenchmarkResult} from "../types.js";
 import {formatResultRow} from "./format.js";
-import {optionsDefault} from "../cli/options.js";
+import {defaultBenchmarkOptions} from "./options.js";
 
 export class BenchmarkReporter {
   indents = 0;
@@ -16,7 +16,7 @@ export class BenchmarkReporter {
 
   constructor({prevBench, benchmarkOpts}: {prevBench: Benchmark | null; benchmarkOpts: BenchmarkOpts}) {
     this.prevResults = new Map<string, BenchmarkResult>();
-    this.threshold = benchmarkOpts.threshold ?? optionsDefault.threshold;
+    this.threshold = benchmarkOpts.threshold ?? defaultBenchmarkOptions.threshold;
 
     if (prevBench) {
       for (const bench of prevBench.results) {
@@ -25,9 +25,14 @@ export class BenchmarkReporter {
     }
   }
 
-  // eslint-disable-next-line @typescript-eslint/no-unused-vars
-  onTestStarted(_task: Task): void {
-    // this.log(task.name, "started");
+  onTestStarted(task: Task): void {
+    if (task.mode === "skip") {
+      this.skipped++;
+      consoleLog(`${this.indent()}${color("pending", "  - %s")}`, task.name);
+    } else if (task.mode === "todo") {
+      this.skipped++;
+      consoleLog(`${this.indent()}${color("pending", "  - %s")}`, task.name);
+    }
   }
 
   onTestFinished(task: Task): void {
@@ -46,8 +51,9 @@ export class BenchmarkReporter {
       }
       case "fail": {
         this.failed++;
-        consoleLog(this.indent() + color("fail", "  %d) %s"), ++this.failed, task.name);
-        consoleLog(task.result?.errors);
+        const fmt = this.indent() + color("fail", "  " + symbols.err) + color("fail", " %s");
+        consoleLog(fmt, task.name);
+        consoleLog(task.result?.errors?.map((e) => e.stackStr).join("\n"));
         break;
       }
       case "pass": {
diff --git a/src/benchmark/runBenchmarkFn.ts b/src/benchmark/runBenchmarkFn.ts
index 9e5adb0..b093633 100644
--- a/src/benchmark/runBenchmarkFn.ts
+++ b/src/benchmark/runBenchmarkFn.ts
@@ -1,4 +1,12 @@
 import {BenchmarkResult, BenchmarkOpts} from "../types.js";
+import {calcSum, filterOutliers, OutlierSensitivity} from "../utils/math.js";
+import {getBenchmarkOptionsWithDefaults} from "./options.js";
+import {createCVConvergenceCriteria, createLinearConvergenceCriteria} from "./termination.js";
+
+const convergenceCriteria = {
+  ["linear"]: createLinearConvergenceCriteria,
+  ["cv"]: createCVConvergenceCriteria,
+};
 
 export type BenchmarkRunOpts = BenchmarkOpts & {
   id: string;
@@ -12,49 +20,54 @@ export type BenchmarkRunOptsWithFn<T, T2> = BenchmarkOpts & {
 };
 
 export async function runBenchFn<T, T2>(
-  opts: BenchmarkRunOptsWithFn<T, T2>,
-  persistRunsNs?: boolean
+  opts: BenchmarkRunOptsWithFn<T, T2>
 ): Promise<{result: BenchmarkResult; runsNs: bigint[]}> {
-  const minRuns = opts.minRuns || 1;
-  const maxRuns = opts.maxRuns || Infinity;
-  const maxMs = opts.maxMs || Infinity;
-  const minMs = opts.minMs || 100;
-  const maxWarmUpMs = opts.maxWarmUpMs !== undefined ? opts.maxWarmUpMs : 500;
-  const maxWarmUpRuns = opts.maxWarmUpRuns !== undefined ? opts.maxWarmUpRuns : 1000;
-  // Ratio of maxMs that the warmup is allow to take from ellapsedMs
+  const {id, before, beforeEach, fn, ...rest} = opts;
+  const benchOptions = getBenchmarkOptionsWithDefaults(rest);
+  const {maxMs, maxRuns, maxWarmUpMs, maxWarmUpRuns, runsFactor, threshold, convergence, averageCalculation} =
+    benchOptions;
+
+  if (maxWarmUpMs >= maxMs) {
+    throw new Error(`Warmup time must be lower than max run time. maxWarmUpMs: ${maxWarmUpMs}, maxMs: ${maxMs}`);
+  }
+
+  if (maxWarmUpRuns >= maxRuns) {
+    throw new Error(`Warmup runs must be lower than max runs. maxWarmUpRuns: ${maxWarmUpRuns}, maxRuns: ${maxRuns}`);
+  }
+
+  if (averageCalculation !== "simple" && averageCalculation !== "clean-outliers") {
+    throw new Error(`Average calculation logic is not defined. ${averageCalculation}`);
+  }
+
+  if (convergence !== "linear" && convergence !== "cv") {
+    throw new Error(`Unknown convergence value ${convergence}`);
+  }
+
+  // Ratio of maxMs that the warmup is allow to take from elapsedMs
   const maxWarmUpRatio = 0.5;
-  const convergeFactor = opts.convergeFactor || 0.5 / 100; // 0.5%
-  const runsFactor = opts.runsFactor || 1;
-  const maxWarmUpNs = BigInt(maxWarmUpMs) * BigInt(1e6);
-  const sampleEveryMs = 100;
+  const maxWarmUpNs = BigInt(benchOptions.maxWarmUpMs) * BigInt(1e6);
 
   const runsNs: bigint[] = [];
   const startRunMs = Date.now();
 
+  const shouldTerminate = convergenceCriteria[convergence](startRunMs, benchOptions);
+
   let runIdx = 0;
   let totalNs = BigInt(0);
+
   let totalWarmUpNs = BigInt(0);
   let totalWarmUpRuns = 0;
-  let prevAvg0 = 0;
-  let prevAvg1 = 0;
-  let lastConvergenceSample = startRunMs;
-  let isWarmUp = maxWarmUpNs > 0 && maxWarmUpRuns > 0;
+  let isWarmUpPhase = maxWarmUpNs > 0 && maxWarmUpRuns > 0;
 
-  const inputAll = opts.before ? await opts.before() : (undefined as unknown as T2);
+  const inputAll = before ? await before() : (undefined as unknown as T2);
 
   while (true) {
-    const ellapsedMs = Date.now() - startRunMs;
-    const mustStop = ellapsedMs >= maxMs || runIdx >= maxRuns;
-    const mayStop = ellapsedMs > minMs && runIdx > minRuns;
-    // Exceeds limits, must stop now
-    if (mustStop) {
-      break;
-    }
+    const elapsedMs = Date.now() - startRunMs;
 
-    const input = opts.beforeEach ? await opts.beforeEach(inputAll, runIdx) : (undefined as unknown as T);
+    const input = beforeEach ? await beforeEach(inputAll, runIdx) : (undefined as unknown as T);
 
     const startNs = process.hrtime.bigint();
-    await opts.fn(input);
+    await fn(input);
     const endNs = process.hrtime.bigint();
 
     const runNs = endNs - startNs;
@@ -64,54 +77,26 @@ export async function runBenchFn<T, T2>(
       await new Promise((r) => setTimeout(r, 0));
     }
 
-    if (isWarmUp) {
+    if (isWarmUpPhase) {
       // Warm-up, do not count towards results
       totalWarmUpRuns += 1;
       totalWarmUpNs += runNs;
 
       // On any warm-up finish condition, mark isWarmUp = true to prevent having to check them again
-      if (totalWarmUpNs >= maxWarmUpNs || totalWarmUpRuns >= maxWarmUpRuns || ellapsedMs / maxMs >= maxWarmUpRatio) {
-        isWarmUp = false;
-      }
-    } else {
-      // Persist results
-      runIdx += 1;
-      totalNs += runNs;
-      // If the caller wants the exact times of all runs, persist them
-      if (persistRunsNs) runsNs.push(runNs);
-
-      // When is a good time to stop a benchmark? A naive answer is after N miliseconds or M runs.
-      // This code aims to stop the benchmark when the average fn run time has converged at a value
-      // within a given convergence factor. To prevent doing expensive math to often for fast fn,
-      // it only takes samples every `sampleEveryMs`. It stores two past values to be able to compute
-      // a very rough linear and quadratic convergence.
-      if (Date.now() - lastConvergenceSample > sampleEveryMs) {
-        lastConvergenceSample = Date.now();
-        const avg = Number(totalNs / BigInt(runIdx));
-
-        // Compute convergence (1st order + 2nd order)
-        const a = prevAvg0;
-        const b = prevAvg1;
-        const c = avg;
-
-        // Only do convergence math if it may stop
-        if (mayStop) {
-          // Aprox linear convergence
-          const convergence1 = Math.abs(c - a);
-          // Aprox quadratic convergence
-          const convergence2 = Math.abs(b - (a + c) / 2);
-          // Take the greater of both to enfore linear and quadratic are below convergeFactor
-          const convergence = Math.max(convergence1, convergence2) / a;
-
-          // Okay to stop + has converged, stop now
-          if (convergence < convergeFactor) {
-            break;
-          }
-        }
-
-        prevAvg0 = prevAvg1;
-        prevAvg1 = avg;
+      if (totalWarmUpNs >= maxWarmUpNs || totalWarmUpRuns >= maxWarmUpRuns || elapsedMs / maxMs >= maxWarmUpRatio) {
+        isWarmUpPhase = false;
       }
+
+      continue;
+    }
+
+    // Persist results
+    runIdx += 1;
+    totalNs += runNs;
+    runsNs.push(runNs);
+
+    if (shouldTerminate(runIdx, totalNs, runsNs)) {
+      break;
     }
   }
 
@@ -135,15 +120,24 @@ either the before(), beforeEach() or fn() functions are too slow.
     }
   }
 
-  const averageNs = Number(totalNs / BigInt(runIdx)) / runsFactor;
+  let averageNs!: number;
+
+  if (averageCalculation === "simple") {
+    averageNs = Number(totalNs / BigInt(runIdx)) / runsFactor;
+  }
+
+  if (averageCalculation === "clean-outliers") {
+    const cleanData = filterOutliers(runsNs, false, OutlierSensitivity.Mild);
+    averageNs = Number(calcSum(cleanData) / BigInt(cleanData.length)) / runsFactor;
+  }
 
   return {
     result: {
-      id: opts.id,
+      id: id,
       averageNs,
       runsDone: runIdx,
       totalMs: Date.now() - startRunMs,
-      threshold: opts.noThreshold === true ? Infinity : opts.threshold,
+      threshold,
     },
     runsNs,
   };
diff --git a/src/benchmark/runner.ts b/src/benchmark/runner.ts
index 9973cb0..15e8f9e 100644
--- a/src/benchmark/runner.ts
+++ b/src/benchmark/runner.ts
@@ -99,11 +99,7 @@ export class BenchmarkRunner implements VitestRunner {
 
     debug("finished tests. passed: %i, skipped: %i, failed: %i", passed.length, skipped.length, failed.length);
 
-    if (failed.length > 0) {
-      throw failed[0].result?.errors;
-    }
-
-    if (passed.length + skipped.length === res.length) {
+    if (passed.length + skipped.length + failed.length === res.length) {
       return store.getAllResults();
     }
 
diff --git a/src/benchmark/termination.ts b/src/benchmark/termination.ts
new file mode 100644
index 0000000..6c94145
--- /dev/null
+++ b/src/benchmark/termination.ts
@@ -0,0 +1,101 @@
+import {BenchmarkOpts} from "../types.js";
+import {calcMean, calcMedian, calcVariance, filterOutliers, OutlierSensitivity, sortData} from "../utils/math.js";
+
+export type TerminationCriteria = (runIdx: number, totalNs: bigint, runNs: bigint[]) => boolean;
+
+export function createLinearConvergenceCriteria(
+  startMs: number,
+  {maxMs, maxRuns, minRuns, minMs, convergeFactor}: Required<BenchmarkOpts>
+): TerminationCriteria {
+  let prevAvg0 = 0;
+  let prevAvg1 = 0;
+  let lastConvergenceSample = startMs;
+  const sampleEveryMs = 100;
+
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars
+  return function canTerminate(runIdx: number, totalNs: bigint, _runNs: bigint[]): boolean {
+    const currentMs = Date.now();
+    const elapsedMs = currentMs - startMs;
+    const mustStop = elapsedMs >= maxMs || runIdx >= maxRuns;
+    const mayStop = elapsedMs >= minMs && runIdx >= minRuns;
+
+    // Must stop
+    if (mustStop) return true;
+
+    // When is a good time to stop a benchmark? A naive answer is after N milliseconds or M runs.
+    // This code aims to stop the benchmark when the average fn run time has converged at a value
+    // within a given convergence factor. To prevent doing expensive math to often for fast fn,
+    // it only takes samples every `sampleEveryMs`. It stores two past values to be able to compute
+    // a very rough linear and quadratic convergence.a
+    if (Date.now() - lastConvergenceSample <= sampleEveryMs) return false;
+
+    lastConvergenceSample = currentMs;
+    const avg = Number(totalNs / BigInt(runIdx));
+
+    // Compute convergence (1st order + 2nd order)
+    const a = prevAvg0;
+    const b = prevAvg1;
+    const c = avg;
+
+    if (mayStop) {
+      // Approx linear convergence
+      const convergence1 = Math.abs(c - a);
+      // Approx quadratic convergence
+      const convergence2 = Math.abs(b - (a + c) / 2);
+      // Take the greater of both to enforce linear and quadratic are below convergeFactor
+      const convergence = Math.max(convergence1, convergence2) / a;
+
+      // Okay to stop + has converged, stop now
+      if (convergence < convergeFactor) return true;
+    }
+
+    prevAvg0 = prevAvg1;
+    prevAvg1 = avg;
+    return false;
+  };
+}
+
+export function createCVConvergenceCriteria(
+  startMs: number,
+  {maxMs, maxRuns, minRuns, minMs, convergeFactor}: Required<BenchmarkOpts>
+): TerminationCriteria {
+  let lastConvergenceSample = startMs;
+  const sampleEveryMs = 100;
+  const minSamples = minRuns > 5 ? minRuns : 5;
+  const maxSamplesForCV = 1000;
+
+  return function canTerminate(runIdx: number, totalNs: bigint, runsNs: bigint[]): boolean {
+    const currentMs = Date.now();
+    const elapsedMs = currentMs - startMs;
+    const mustStop = elapsedMs >= maxMs || runIdx >= maxRuns;
+    const mayStop = elapsedMs >= minMs && runIdx >= minRuns && runIdx > minSamples;
+
+    // Must stop
+    if (mustStop) return true;
+
+    if (Date.now() - lastConvergenceSample <= sampleEveryMs) return false;
+
+    if (mayStop) {
+      lastConvergenceSample = currentMs;
+
+      const mean = calcMean(runsNs);
+      const variance = calcVariance(runsNs, mean);
+      const cv = Math.sqrt(Number(variance)) / Number(mean);
+
+      if (cv < convergeFactor) return true;
+
+      // If CV does not stabilize we fallback to the median approach
+      if (runsNs.length > maxSamplesForCV) {
+        const sorted = sortData(runsNs);
+        const cleanedRunsNs = filterOutliers(sorted, true, OutlierSensitivity.Mild);
+        const median = calcMedian(cleanedRunsNs, true);
+        const mean = calcMean(cleanedRunsNs);
+        const medianFactor = Math.abs(Number(mean - median)) / Number(median);
+
+        if (medianFactor < convergeFactor) return true;
+      }
+    }
+
+    return false;
+  };
+}
diff --git a/src/cli/options.ts b/src/cli/options.ts
index 5fb06c8..056e9ba 100644
--- a/src/cli/options.ts
+++ b/src/cli/options.ts
@@ -1,9 +1,8 @@
 import {Options} from "yargs";
 import {StorageOptions, BenchmarkOpts, FileCollectionOptions} from "../types.js";
+import {defaultBenchmarkOptions} from "../benchmark/options.js";
 
 export const optionsDefault = {
-  threshold: 2,
-  timeoutBench: 10_000,
   historyLocalPath: "./benchmark_data",
   historyCacheKey: "benchmark_data",
 };
@@ -127,74 +126,97 @@ export const benchmarkOptions: ICliCommandOptions<CLIBenchmarkOptions> = {
     description:
       "Ratio of new average time per run vs previos time per run to consider a failure. Set to 'Infinity' to disable it.",
     type: "number",
-    default: optionsDefault.threshold,
+    default: defaultBenchmarkOptions.threshold,
     group: benchmarkGroup,
   },
   maxRuns: {
     type: "number",
     description: "Max number of fn() runs, after which the benchmark stops",
+    default: defaultBenchmarkOptions.maxRuns,
     group: benchmarkGroup,
   },
   minRuns: {
     type: "number",
     description: "Min number of fn() runs before considering stopping the benchmark after converging",
+    default: defaultBenchmarkOptions.minRuns,
     group: benchmarkGroup,
   },
   maxMs: {
     type: "number",
     description: "Max total miliseconds of runs, after which the benchmark stops",
+    default: defaultBenchmarkOptions.maxMs,
     group: benchmarkGroup,
   },
   minMs: {
     type: "number",
     description: "Min total miiliseconds of runs before considering stopping the benchmark after converging",
+    default: defaultBenchmarkOptions.minMs,
     group: benchmarkGroup,
   },
   maxWarmUpMs: {
     type: "number",
     description:
       "Maximum real benchmark function run time before starting to count towards results. Set to 0 to not warm-up. May warm up for less ms if the `maxWarmUpRuns` condition is met first.",
+    default: defaultBenchmarkOptions.maxWarmUpMs,
     group: benchmarkGroup,
   },
   maxWarmUpRuns: {
     type: "number",
     description:
       "Maximum benchmark function runs before starting to count towards results. Set to 0 to not warm-up. May warm up for less ms if the `maxWarmUpMs` condition is met first.",
+    default: defaultBenchmarkOptions.maxWarmUpRuns,
     group: benchmarkGroup,
   },
   convergeFactor: {
     type: "number",
     description: "Convergance factor (0,1) at which the benchmark automatically stops. Set to 1 to disable",
+    default: defaultBenchmarkOptions.convergeFactor,
     group: benchmarkGroup,
   },
   runsFactor: {
     type: "number",
     description:
       "If fn() contains a foor loop repeating a task N times, you may set runsFactor = N to scale down the results.",
+    default: defaultBenchmarkOptions.runsFactor,
     group: benchmarkGroup,
   },
   yieldEventLoopAfterEach: {
     type: "boolean",
     description:
       "Run `sleep(0)` after each fn() call. Use when the event loop needs to tick to free resources created by fn()",
+    default: defaultBenchmarkOptions.yieldEventLoopAfterEach,
     group: benchmarkGroup,
   },
   timeoutBench: {
     type: "number",
     description: "Hard timeout for each benchmark",
-    default: optionsDefault.timeoutBench,
+    default: defaultBenchmarkOptions.timeoutBench,
     group: benchmarkGroup,
   },
   setupFiles: {
     type: "array",
     description: "List of setup files to load before the tests",
-    default: [],
+    default: defaultBenchmarkOptions.setupFiles,
     group: benchmarkGroup,
   },
   triggerGC: {
     type: "boolean",
     description: "Trigger GC (if available) after every benchmark",
-    default: false,
+    default: defaultBenchmarkOptions.triggerGC,
+    group: benchmarkGroup,
+  },
+  convergence: {
+    type: "string",
+    description: "The algorithm used to detect the convergence to stop benchmark runs",
+    default: defaultBenchmarkOptions.convergence,
+    choices: ["linear", "cv"],
+    group: benchmarkGroup,
+  },
+  averageCalculation: {
+    type: "string",
+    description: "Use simple average of all runs or clean the outliers before calculating average",
+    default: defaultBenchmarkOptions.averageCalculation,
+    choices: ["simple", "clean-outliers"],
     group: benchmarkGroup,
   },
 };
diff --git a/src/cli/run.ts b/src/cli/run.ts
index f5068ca..4691a4e 100644
--- a/src/cli/run.ts
+++ b/src/cli/run.ts
@@ -22,6 +22,7 @@ import {consoleLog} from "../utils/output.js";
 import {HistoryProviderType} from "../history/provider.js";
 import {performanceReportComment} from "../github/comments/performanceReportComment.js";
 import {GithubCommentTag} from "../github/octokit.js";
+import {defaultBenchmarkOptions} from "../benchmark/options.js";
 
 const debug = Debug("@chainsafe/benchmark/cli");
 
@@ -89,7 +90,11 @@ export async function run(opts_: FileCollectionOptions & StorageOptions & Benchm
       await historyProvider.writeToHistory(currBench);
     }
 
-    const resultsComp = computePerformanceReport(currBench, prevBench, opts.threshold);
+    const resultsComp = computePerformanceReport(
+      currBench,
+      prevBench,
+      opts.threshold ?? defaultBenchmarkOptions.threshold
+    );
 
     debug("detecting to post comment. skipPostComment: %o, isGaRun: %o", !opts.skipPostComment, isGaRun());
     if (!opts.skipPostComment && isGaRun()) {
diff --git a/src/types.ts b/src/types.ts
index b6fd508..e11c970 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -65,6 +65,14 @@ export type BenchmarkOpts = {
   setupFiles?: string[];
   /** Trigger GC cleanup every test to have consistent memory usage */
   triggerGC?: boolean;
+  /**
+   * The algorithm to detect the convergence to stop the benchmark function runs.
+   *  linear - Calculate the moving average among last 3 runs average and compare through quadratic formula
+   *  cv - Coefficient Variance is a statistical tool which calculates data pattern on all runs and calculate median
+   * */
+  convergence?: "linear" | "cv";
+  /** Use simple average of all runs or clean the outliers before calculating average */
+  averageCalculation?: "simple" | "clean-outliers";
 };
 
 // Create partial only for specific keys
diff --git a/src/utils/math.ts b/src/utils/math.ts
new file mode 100644
index 0000000..34ee6ec
--- /dev/null
+++ b/src/utils/math.ts
@@ -0,0 +1,127 @@
+/**
+ * Computes the total of all values in the array by sequentially adding each element.
+ * Handles both positive and negative BigInt values without precision loss.
+ */
+export function calcSum(arr: bigint[]): bigint {
+  let s = BigInt(0);
+
+  for (const n of arr) {
+    s += n;
+  }
+  return s;
+}
+
+/**
+ * Determines the central tendency by dividing the total sum by the number of elements.
+ * Uses integer division that naturally truncates decimal remainders.
+ */
+export function calcMean(arr: bigint[]): bigint {
+  return BigInt(calcSum(arr) / BigInt(arr.length));
+}
+
+/**
+ * Quantifies data spread by averaging squared deviations from the mean.
+ * A value of zero indicates identical values, larger values show greater dispersion.
+ */
+export function calcVariance(arr: bigint[], mean: bigint): bigint {
+  let base = BigInt(0);
+
+  for (const n of arr) {
+    const diff = n - mean;
+    base += diff * diff;
+  }
+
+  return base / BigInt(arr.length);
+}
+
+/**
+ * Organizes values from smallest to largest while preserving the original array.
+ * Essential for percentile-based calculations like median and quartiles.
+ */
+export function sortData(arr: bigint[]): bigint[] {
+  return [...arr].sort((a, b) => (a < b ? -1 : a > b ? 1 : 0));
+}
+
+/**
+ * Identifies the middle value that separates higher and lower halves of the dataset.
+ * For even-sized arrays, averages the two central values to find the midpoint.
+ */
+export function calcMedian(arr: bigint[], sorted: boolean): bigint {
+  // 1. Sort the BigInt array
+  const data = sorted ? arr : sortData(arr);
+
+  // 3. Calculate median
+  const mid = Math.floor(data.length / 2);
+  if (data.length % 2 === 0) {
+    return (data[mid - 1] + data[mid]) / BigInt(2); // Average two middle values
+  } else {
+    return data[mid]; // Single middle value
+  }
+}
+
+/**
+ * Determines cutoff points that divide data into four equal-frequency segments.
+ * Uses linear interpolation to estimate values between actual data points.
+ */
+export function calcQuartile(arr: bigint[], sorted: boolean, percentile: number): number {
+  const sortedData = sorted ? arr : sortData(arr);
+
+  const index = (sortedData.length - 1) * percentile;
+  const floor = Math.floor(index);
+  const fraction = index - floor;
+
+  if (sortedData[floor + 1] !== undefined) {
+    return Number(sortedData[floor]) + fraction * Number(sortedData[floor + 1] - sortedData[floor]);
+  }
+
+  return Number(sortedData[floor]);
+}
+
+/**
+ * Configures how aggressively outlier detection removes edge values.
+ * - Mild: Removes typical anomalies (e.g., temporary CPU spikes)
+ * - Strict: Only filters extreme deviations (e.g., measurement errors)
+ */
+export enum OutlierSensitivity {
+  /**
+   * A standard multiplier for detecting mild outliers. Captures ~99.3% of normally distributed data.
+   */
+  Mild = 1.5,
+  /**
+   * A stricter multiplier for detecting extreme outliers. Captures ~99.99% of normally distributed data.
+   */
+  Strict = 3.0,
+}
+
+/**
+ * Isolates the core dataset by excluding values far from the central cluster.
+ * Uses quartile ranges to establish inclusion boundaries, preserving data integrity
+ * while eliminating measurement noise. Sorting can be bypassed for pre-processed data.
+ *
+ * We use the `IQR` Interquartile Range method to detect the outliers. IQR is distribution
+ * of difference of Q3 - Q1 and represents the middle 50% of the data.:
+ * - Q1 (First Quartile): The 25th percentile (25% of the data is below this value).
+ * - Q3 (Third Quartile): The 75th percentile (75% of the data is below this value).
+ *
+ * The `OutlierSensitivity` is scaling factors applied to the IQR to determine how far data points
+ * can deviate from the quartiles before being considered outliers.
+ */
+export function filterOutliers(arr: bigint[], sorted: boolean, sensitivity: OutlierSensitivity): bigint[] {
+  if (arr.length < 4) return arr; // Too few data points
+
+  const data = sorted ? arr : sortData(arr);
+
+  // Calculate quartiles and IQR
+  const q1 = calcQuartile(data, true, 0.25);
+  const q3 = calcQuartile(data, true, 0.75);
+  const iqr = q3 - q1;
+
+  // Define outlier bounds (adjust multiplier for sensitivity)
+  const lowerBound = q1 - sensitivity * iqr;
+  const upperBound = q3 + sensitivity * iqr;
+
+  // Filter original BigInt values
+  return data.filter((n) => {
+    return n >= lowerBound && n <= upperBound;
+  });
+}
diff --git a/test/perf/errors.test.ts b/test/perf/errors.test.ts
new file mode 100644
index 0000000..c6dfb98
--- /dev/null
+++ b/test/perf/errors.test.ts
@@ -0,0 +1,49 @@
+import {bench, describe} from "../../src/index.js";
+
+// This test file is to validate the error cases manually
+// should not be included into actual benchmarks as there are cases
+// in this file which will always fail.
+describe("Hooks", () => {
+  bench("normal benchmark", () => {
+    const arr = Array.from({length: 10}, (_, i) => i);
+    arr.reduce((total, curr) => total + curr, 0);
+  });
+
+  bench.skip("normal skipped", () => {
+    const arr = Array.from({length: 10}, (_, i) => i);
+    arr.reduce((total, curr) => total + curr, 0);
+  });
+
+  describe("before", () => {
+    bench({
+      id: "before failed",
+      before: () => {
+        throw new Error("Failed in before");
+      },
+      fn: () => {
+        const arr = Array.from({length: 10}, (_, i) => i);
+        arr.reduce((total, curr) => total + curr, 0);
+      },
+    });
+  });
+
+  describe("beforeEach", () => {
+    bench({
+      id: "beforeEach failed",
+      beforeEach: () => {
+        throw new Error("Failed in beforeEach");
+      },
+      fn: () => {
+        const arr = Array.from({length: 10}, (_, i) => i);
+        arr.reduce((total, curr) => total + curr, 0);
+      },
+    });
+  });
+
+  bench({
+    id: "error during fn",
+    fn: () => {
+      throw new Error("Failed in fn");
+    },
+  });
+});
diff --git a/test/perf/iteration.test.ts b/test/perf/iteration.test.ts
index f5740b7..ac6f0db 100644
--- a/test/perf/iteration.test.ts
+++ b/test/perf/iteration.test.ts
@@ -8,7 +8,7 @@ import {bench, describe, setBenchOpts} from "../../src/index.js";
 // byteArrayEquals with valueOf()                                         853971.0 ops/s      1.171000 us/op 9963051 runs    16.07 s
 
 describe("Array iteration", () => {
-  setBenchOpts({maxMs: 60 * 1000, convergeFactor: 0.1 / 100});
+  setBenchOpts({maxMs: 60 * 1000, convergeFactor: 1 / 100});
 
   // nonce = 5
   const n = 1e6;
diff --git a/test/unit/utils/math.test.ts b/test/unit/utils/math.test.ts
new file mode 100644
index 0000000..8fc2915
--- /dev/null
+++ b/test/unit/utils/math.test.ts
@@ -0,0 +1,225 @@
+import {describe, it, expect} from "vitest";
+import {
+  calcSum,
+  calcMean,
+  calcVariance,
+  sortData,
+  calcMedian,
+  calcQuartile,
+  OutlierSensitivity,
+  filterOutliers,
+} from "../../../src/utils/math.js";
+
+describe("math utility functions", () => {
+  describe("calcSum", () => {
+    it("should return 0n for an empty array", () => {
+      expect(calcSum([])).toBe(BigInt(0));
+    });
+
+    it("should correctly sum an array of positive BigInts", () => {
+      const arr = [1n, 2n, 3n, 4n];
+      expect(calcSum(arr)).toBe(10n);
+    });
+
+    it("should correctly sum an array with negative BigInts", () => {
+      const arr = [-1n, 2n, -3n, 4n];
+      // -1 + 2 = 1; 1 - 3 = -2; -2 + 4 = 2
+      expect(calcSum(arr)).toBe(2n);
+    });
+
+    it("should handle large BigInt values without overflow", () => {
+      const big1 = BigInt("9007199254740991"); // ~ Number.MAX_SAFE_INTEGER
+      const big2 = BigInt("9007199254740992");
+      expect(calcSum([big1, big2])).toBe(big1 + big2);
+    });
+  });
+
+  describe("calcMean", () => {
+    it("should throw or behave predictably for an empty array", () => {
+      // By default, dividing by BigInt(0) will throw in JavaScript.
+      // If you want a different behavior, you can wrap your function or catch errors here.
+      expect(() => calcMean([])).toThrow();
+    });
+
+    it("should correctly calculate the mean of a single-element array", () => {
+      const arr = [5n];
+      expect(calcMean(arr)).toBe(5n);
+    });
+
+    it("should correctly calculate the mean of multiple BigInts", () => {
+      const arr = [2n, 4n, 6n];
+      // sum=12, length=3 => mean=4
+      expect(calcMean(arr)).toBe(4n);
+    });
+
+    it("should handle negative values correctly", () => {
+      const arr = [-5n, -15n, 10n];
+      // sum=-10, length=3 => mean=-3.333..., but truncated to BigInt => -3n if using integer division
+      expect(calcMean(arr)).toBe(-3n);
+    });
+  });
+
+  describe("calcVariance", () => {
+    it("should compute variance for a small sample of integers", () => {
+      const arr = [2n, 4n, 4n, 6n, 8n];
+      // mean = (2+4+4+6+8)/5 = 24/5 = 4.8 => truncated to 4n if using integer division
+      // If mean=4n, diffs = (-2,0,0,2,4), squares = (4,0,0,4,16), sum=24 => var=24/5=4.8 => truncated to 4n
+      const meanBigInt = calcMean(arr);
+      const varianceBigInt = calcVariance(arr, meanBigInt);
+      expect(varianceBigInt).toBe(4n);
+    });
+
+    it("should handle a single-element array (variance=0)", () => {
+      const arr = [100n];
+      const mean = calcMean(arr); // 100n
+      const variance = calcVariance(arr, mean);
+      expect(variance).toBe(0n);
+    });
+
+    it("should handle negative values", () => {
+      const arr = [-10n, -4n, -2n];
+      // sum = -16, length=3 => mean = floor(-16/3) = -5n
+      // diffs = (-5,1,3), squares=(25,1,9)=35 => var=35/3=11 => 11n
+      const mean = calcMean(arr);
+      const variance = calcVariance(arr, mean);
+      expect(variance).toBe(11n);
+    });
+
+    it("should return 0 for an array of identical values", () => {
+      const arr = [5n, 5n, 5n];
+      const mean = calcMean(arr);
+      const variance = calcVariance(arr, mean);
+      expect(variance).toBe(0n);
+    });
+  });
+
+  describe("sortData", () => {
+    it("should return a new sorted array without mutating the original", () => {
+      const arr = [5n, 1n, 3n];
+      const sorted = sortData(arr);
+      expect(sorted).toEqual([1n, 3n, 5n]);
+      // Ensure original is unchanged
+      expect(arr).toEqual([5n, 1n, 3n]);
+    });
+
+    it("should handle negative and positive numbers", () => {
+      const arr = [0n, -1n, 10n, -5n, 2n];
+      const sorted = sortData(arr);
+      expect(sorted).toEqual([-5n, -1n, 0n, 2n, 10n]);
+    });
+
+    it("should handle an empty array", () => {
+      expect(sortData([])).toEqual([]);
+    });
+
+    it("should handle an already sorted array", () => {
+      expect(sortData([1n, 2n, 3n, 4n])).toEqual([1n, 2n, 3n, 4n]);
+    });
+  });
+
+  describe("calcMedian", () => {
+    it("should throw or handle empty array (no median)", () => {
+      expect(() => calcMedian([], false)).toThrow();
+    });
+
+    it("should return the middle element when the array length is odd", () => {
+      const arr = [3n, 1n, 2n];
+      // sorted = [1n, 2n, 3n], median = 2n
+      expect(calcMedian(arr, false)).toBe(2n);
+    });
+
+    it("should return the average of two middle elements when the array length is even", () => {
+      const arr = [3n, 1n, 2n, 4n];
+      // sorted = [1n, 2n, 3n, 4n]
+      // middle indices = 1,2 => average => (2n+3n)/2n=2n
+      expect(calcMedian(arr, false)).toBe(2n);
+    });
+
+    it("should skip re-sorting if 'sorted=true' is provided", () => {
+      // already sorted
+      const arr = [1n, 2n, 3n, 4n];
+      expect(calcMedian(arr, true)).toBe(2n); // middle indices => 1n,2n => average=2n
+    });
+  });
+
+  describe("calcQuartile", () => {
+    const sortedData = sortData([1n, 2n, 4n, 10n, 20n, 100n]);
+
+    it("should return the first quartile (Q1) => percentile=0.25", () => {
+      // sorted array = [1n, 2n, 4n, 10n, 20n, 100n]
+      // length=6 => index = (6-1)*0.25=1.25 => floor=1 => fraction=0.25
+      // base=2n, next=4n => difference=2n => fraction=0.25 => 2 + 0.25*2=2.5 => ~ BigInt(2.5)
+      // Because we must do BigInt arithmetic carefully, the function does Number(...) inside
+      // => the result = 2n + 0.25*(4-2)=2n + 0.5=2.5 => cast => 2n if trunc
+      // But the function does => BigInt(2 + fraction*(4-2)) => 2 + 0.25*2 => 2.5
+      const q1 = calcQuartile(sortedData, true, 0.25);
+      expect(q1).toBe(2.5);
+    });
+
+    it("should return the third quartile (Q3) => percentile=0.75", () => {
+      // index=(5*0.75)=3.75 => floor=3 => fraction=0.75
+      // base=10n, next=20n => difference=10 => 10 + 0.75*10=17.5 => rounded and truncated => 18
+      const q3 = calcQuartile(sortedData, true, 0.75);
+      expect(q3).toBe(17.5);
+    });
+
+    it("should gracefully handle the highest index boundary (percentile=1.0)", () => {
+      // index=(6-1)*1.0=5 => floor=5 => fraction=0 => return data[5] => 100n
+      const maxVal = calcQuartile(sortedData, true, 1.0);
+      expect(maxVal).toBe(100);
+    });
+
+    it("should gracefully handle the lowest index boundary (percentile=0.0)", () => {
+      // index=(6-1)*0.0=0 => floor=0 => fraction=0 => return data[0] => 1n
+      const minVal = calcQuartile(sortedData, true, 0.0);
+      expect(minVal).toBe(1);
+    });
+
+    it("should handle a single-element array => always that element", () => {
+      const arr = [42n];
+      expect(calcQuartile(arr, true, 0.25)).toBe(42);
+      expect(calcQuartile(arr, true, 0.75)).toBe(42);
+    });
+  });
+
+  describe("filterOutliers", () => {
+    it("should return the same array if length < 4", () => {
+      const arr = [1n, 100n];
+      expect(filterOutliers(arr, false, OutlierSensitivity.Mild)).toEqual([1n, 100n]);
+    });
+
+    it("should remove outliers using the Mild (1.5x IQR) approach", () => {
+      // Example: [1n, 2n, 4n, 10n, 20n, 100n]
+      // sorted => [1n,2n,4n,10n,20n,100n]
+      // Q1=2n, Q3=20n => iqr=18 => mild => +/- 1.5*18=27 => lower=2-27=-25 => upper=20+27=47
+      // So any element outside -25..47 is out => 100n is out
+      const arr = [20n, 100n, 2n, 10n, 1n, 4n];
+      const filtered = filterOutliers(arr, false, OutlierSensitivity.Mild);
+      expect(filtered).toEqual([1n, 2n, 4n, 10n, 20n]);
+    });
+
+    it("should remove outliers using the Strict (3.0x IQR) approach", () => {
+      // same array => Q1=2n, Q3=20n => iqr=18 => strict => +/- 3.0*18=54 => lower=-52 => upper=74
+      // 100 is outside => filter it out
+      const arr = [20n, 100n, 2n, 10n, 1n, 4n];
+      const filtered = filterOutliers(arr, false, OutlierSensitivity.Strict);
+      expect(filtered).toEqual([1n, 2n, 4n, 10n, 20n]);
+    });
+
+    it("should handle negative values correctly", () => {
+      // e.g. [-100n, -10n, -5n, -2n, -1n, 0n, 1n, 5n, 6n]
+      // We'll skip the exact math here, but we test that they are sorted and outliers removed
+      const arr = [-10n, 6n, -2n, -100n, -5n, 1n, -1n, 5n, 0n];
+      const filtered = filterOutliers(arr, false, OutlierSensitivity.Mild);
+      // We can check that -100n is probably an outlier
+      expect(filtered).not.toContain(-100n);
+    });
+
+    it("should not filter anything if all values are within the mild IQR range", () => {
+      const arr = [10n, 12n, 11n, 9n, 8n, 10n, 10n];
+      const filtered = filterOutliers(arr, false, OutlierSensitivity.Mild);
+      // all within a small range => no outliers
+      expect(filtered).toEqual(sortData(arr));
+    });
+  });
+});