From 944a0d87f601904d2c53e3063c79093f445a9eca Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Thu, 16 Apr 2026 22:25:49 +0000
Subject: [PATCH 1/6] feat: add AI gateway benchmark mode and CI workflow

---
 .github/workflows/ai-gateway-benchmarks.yml | 167 +++++++++++
 package.json                                |   3 +
 src/ai-gateway/benchmark.ts                 | 301 ++++++++++++++++++++
 src/ai-gateway/providers.ts                 |  29 ++
 src/ai-gateway/scoring.ts                   |  66 +++++
 src/ai-gateway/types.ts                     |  40 +++
 src/merge-results.ts                        | 108 ++++++-
 src/run.ts                                  |  81 +++++-
 8 files changed, 790 insertions(+), 5 deletions(-)
 create mode 100644 .github/workflows/ai-gateway-benchmarks.yml
 create mode 100644 src/ai-gateway/benchmark.ts
 create mode 100644 src/ai-gateway/providers.ts
 create mode 100644 src/ai-gateway/scoring.ts
 create mode 100644 src/ai-gateway/types.ts

diff --git a/.github/workflows/ai-gateway-benchmarks.yml b/.github/workflows/ai-gateway-benchmarks.yml
new file mode 100644
index 0000000..db4d455
--- /dev/null
+++ b/.github/workflows/ai-gateway-benchmarks.yml
@@ -0,0 +1,167 @@
+name: AI Gateway Benchmark
+
+on:
+  pull_request:
+    paths:
+      - 'src/ai-gateway/**'
+      - 'src/util/**'
+      - 'src/run.ts'
+      - 'src/merge-results.ts'
+      - 'package.json'
+  workflow_dispatch:
+    inputs:
+      iterations:
+        description: 'Iterations per provider'
+        required: false
+        default: '50'
+
+concurrency:
+  group: ai-gateway-benchmarks
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  bench:
+    name: Bench ${{ matrix.provider }} ${{ matrix.scenario }}
+    runs-on: namespace-profile-default
+    timeout-minutes: 60
+    strategy:
+      fail-fast: false
+      matrix:
+        provider:
+          - openrouter
+          - vercel-ai-gateway
+          - cloudflare-ai-gateway
+        scenario:
+          - short-nonstream
+          - short-stream
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 24
+          cache: 'npm'
+      - run: npm ci
+      - name: Clear stale results from checkout
+        run: rm -rf results/ai_gateway/
+      - name: Run AI gateway benchmark
+        env:
+          AI_GATEWAY_MODEL: ${{ secrets.AI_GATEWAY_MODEL }}
+          OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
+          OPENROUTER_BASE_URL: ${{ secrets.OPENROUTER_BASE_URL }}
+          OPENROUTER_HTTP_REFERER: ${{ secrets.OPENROUTER_HTTP_REFERER }}
+          OPENROUTER_X_TITLE: ${{ secrets.OPENROUTER_X_TITLE }}
+          VERCEL_AI_GATEWAY_BASE_URL: ${{ secrets.VERCEL_AI_GATEWAY_BASE_URL }}
+          VERCEL_AI_GATEWAY_API_KEY: ${{ secrets.VERCEL_AI_GATEWAY_API_KEY }}
+          CLOUDFLARE_AI_GATEWAY_BASE_URL: ${{ secrets.CLOUDFLARE_AI_GATEWAY_BASE_URL }}
+          CLOUDFLARE_AI_GATEWAY_API_KEY: ${{ secrets.CLOUDFLARE_AI_GATEWAY_API_KEY }}
+        run: |
+          npm run bench -- \
+            --mode ai-gateway \
+            --provider ${{ matrix.provider }} \
+            --ai-gateway-scenario ${{ matrix.scenario }} \
+            --iterations ${{ github.event.inputs.iterations || '50' }}
+      - name: Upload results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: ai-gateway-results-${{ matrix.provider }}-${{ matrix.scenario }}
+          path: results/ai_gateway/
+          if-no-files-found: ignore
+          retention-days: 7
+
+  collect:
+    name: Collect Results
+    runs-on: namespace-profile-default
+    needs: bench
+    if: always()
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 24
+          cache: 'npm'
+      - run: npm ci
+      - name: Download all artifacts
+        uses: actions/download-artifact@v4
+        with:
+          path: artifacts/
+          pattern: ai-gateway-results-*
+      - name: Merge results
+        run: npx tsx src/merge-results.ts --input artifacts --mode ai-gateway
+      - name: Post results to PR
+        if: github.event_name == 'pull_request'
+        continue-on-error: true
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+            const path = require('path');
+
+            const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
+            const scenarios = ['short_nonstream', 'short_stream'];
+            let body = '## AI Gateway Benchmark Results\n\n';
+
+            let hasResults = false;
+            for (const scenario of scenarios) {
+              const latestPath = path.join('results', 'ai_gateway', scenario, 'latest.json');
+              if (!fs.existsSync(latestPath)) continue;
+
+              const data = JSON.parse(fs.readFileSync(latestPath, 'utf-8'));
+              const results = data.results
+                .filter(r => !r.skipped)
+                .sort((a, b) => (b.compositeScore || 0) - (a.compositeScore || 0));
+
+              if (results.length === 0) continue;
+              hasResults = true;
+
+              body += `### ${scenario.replace('_', ' ').toUpperCase()}\n\n`;
+              body += '| # | Provider | Score | First Token | Total | Tok/sec | Status |\n';
+              body += '|---|----------|-------|-------------|-------|---------|--------|\n';
+
+              results.forEach((r, i) => {
+                const score = r.compositeScore !== undefined ? r.compositeScore.toFixed(1) : '--';
+                const first = (r.summary.firstTokenMs.median / 1000).toFixed(2) + 's';
+                const total = (r.summary.totalMs.median / 1000).toFixed(2) + 's';
+                const tps = r.summary.outputTokensPerSec.median.toFixed(1);
+                const ok = r.iterations.filter(it => !it.error).length;
+                const count = r.iterations.length;
+                body += `| ${i + 1} | ${r.provider} | ${score} | ${first} | ${total} | ${tps} | ${ok}/${count} |\n`;
+              });
+
+              body += '\n';
+            }
+
+            if (!hasResults) {
+              body += '> No AI gateway benchmark results were generated.\n\n';
+            }
+
+            body += `---\n*[View full run](${runUrl})*`;
+
+            const marker = '## AI Gateway Benchmark Results';
+            const { data: comments } = await github.rest.issues.listComments({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+            });
+
+            const existing = comments.find(c => c.body.startsWith(marker));
+
+            if (existing) {
+              await github.rest.issues.updateComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: existing.id,
+                body,
+              });
+            } else {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                body,
+              });
+            }
diff --git a/package.json b/package.json
index a3c95f5..2ee194e 100644
--- a/package.json
+++ b/package.json
@@ -23,6 +23,9 @@
     "bench:sprites": "tsx src/run.ts --provider sprites",
     "bench:browser": "tsx src/run.ts --mode browser",
     "bench:browser:browserbase": "tsx src/run.ts --mode browser --provider browserbase",
+    "bench:ai-gateway": "tsx src/run.ts --mode ai-gateway",
+    "bench:ai-gateway:nonstream": "tsx src/run.ts --mode ai-gateway --ai-gateway-scenario short-nonstream",
+    "bench:ai-gateway:stream": "tsx src/run.ts --mode ai-gateway --ai-gateway-scenario short-stream",
     "bench:storage": "tsx src/run.ts --mode storage",
     "bench:storage:s3": "tsx src/run.ts --mode storage --provider aws-s3",
     "bench:storage:r2": "tsx src/run.ts --mode storage --provider cloudflare-r2",
diff --git a/src/ai-gateway/benchmark.ts b/src/ai-gateway/benchmark.ts
new file mode 100644
index 0000000..9fec610
--- /dev/null
+++ b/src/ai-gateway/benchmark.ts
@@ -0,0 +1,301 @@
+import { computeStats } from '../util/stats.js';
+import { withTimeout } from '../util/timeout.js';
+import type {
+  AIGatewayProviderConfig,
+  AIGatewayScenario,
+  AIGatewayTimingResult,
+  AIGatewayBenchmarkResult,
+} from './types.js';
+
+function round(n: number): number {
+  return Math.round(n * 100) / 100;
+}
+
+const SCENARIO_PROMPTS: Record<AIGatewayScenario, { prompt: string; maxTokens: number; stream: boolean }> = {
+  'short-nonstream': {
+    prompt: 'Reply with exactly: ok',
+    maxTokens: 16,
+    stream: false,
+  },
+  'short-stream': {
+    prompt: 'Write one short sentence about distributed systems.',
+    maxTokens: 64,
+    stream: true,
+  },
+};
+
+function extractCompletionTokens(payload: any): number {
+  if (typeof payload?.usage?.completion_tokens === 'number') return payload.usage.completion_tokens;
+  if (typeof payload?.usage?.output_tokens === 'number') return payload.usage.output_tokens;
+  if (typeof payload?.completion_tokens === 'number') return payload.completion_tokens;
+  return 0;
+}
+
+function normalizeBaseUrl(baseUrl: string): string {
+  return baseUrl.endsWith('/') ? baseUrl.slice(0, -1) : baseUrl;
+}
+
+async function runNonStreamingIteration(
+  provider: AIGatewayProviderConfig,
+  timeout: number,
+  scenario: AIGatewayScenario,
+): Promise<AIGatewayTimingResult> {
+  const request = SCENARIO_PROMPTS[scenario];
+  const start = performance.now();
+
+  try {
+    const response = await withTimeout(fetch(`${normalizeBaseUrl(provider.baseUrl)}/chat/completions`, {
+      method: 'POST',
+      headers: {
+        'content-type': 'application/json',
+        authorization: `Bearer ${provider.apiKey}`,
+        ...provider.defaultHeaders,
+      },
+      body: JSON.stringify({
+        model: provider.model,
+        messages: [{ role: 'user', content: request.prompt }],
+        temperature: 0,
+        max_tokens: request.maxTokens,
+        stream: false,
+      }),
+    }), timeout, 'Gateway request timed out');
+
+    const totalMs = performance.now() - start;
+    const statusCode = response.status;
+    const bodyText = await response.text();
+
+    if (!response.ok) {
+      return {
+        firstTokenMs: 0,
+        totalMs,
+        outputTokens: 0,
+        outputTokensPerSec: 0,
+        statusCode,
+        error: `HTTP ${statusCode}: ${bodyText.slice(0, 200)}`,
+      };
+    }
+
+    const payload = JSON.parse(bodyText);
+    const outputTokens = extractCompletionTokens(payload);
+    const seconds = Math.max(totalMs / 1000, 0.001);
+
+    return {
+      firstTokenMs: totalMs,
+      totalMs,
+      outputTokens,
+      outputTokensPerSec: outputTokens > 0 ? outputTokens / seconds : 0,
+      statusCode,
+    };
+  } catch (err) {
+    return {
+      firstTokenMs: 0,
+      totalMs: performance.now() - start,
+      outputTokens: 0,
+      outputTokensPerSec: 0,
+      error: err instanceof Error ? err.message : String(err),
+    };
+  }
+}
+
+async function runStreamingIteration(
+  provider: AIGatewayProviderConfig,
+  timeout: number,
+  scenario: AIGatewayScenario,
+): Promise<AIGatewayTimingResult> {
+  const request = SCENARIO_PROMPTS[scenario];
+  const start = performance.now();
+
+  try {
+    const response = await withTimeout(fetch(`${normalizeBaseUrl(provider.baseUrl)}/chat/completions`, {
+      method: 'POST',
+      headers: {
+        'content-type': 'application/json',
+        authorization: `Bearer ${provider.apiKey}`,
+        ...provider.defaultHeaders,
+      },
+      body: JSON.stringify({
+        model: provider.model,
+        messages: [{ role: 'user', content: request.prompt }],
+        temperature: 0,
+        max_tokens: request.maxTokens,
+        stream: true,
+      }),
+    }), timeout, 'Gateway request timed out');
+
+    const statusCode = response.status;
+    if (!response.ok || !response.body) {
+      const errorText = await response.text().catch(() => '');
+      return {
+        firstTokenMs: 0,
+        totalMs: performance.now() - start,
+        outputTokens: 0,
+        outputTokensPerSec: 0,
+        statusCode,
+        error: `HTTP ${statusCode}: ${errorText.slice(0, 200)}`,
+      };
+    }
+
+    const reader = response.body.getReader();
+    const decoder = new TextDecoder();
+    let firstTokenMs = 0;
+    let outputTokens = 0;
+    let done = false;
+
+    while (!done) {
+      const readResult = await withTimeout(reader.read(), timeout, 'Stream read timed out');
+      done = readResult.done;
+      if (done) break;
+      const chunk = decoder.decode(readResult.value, { stream: true });
+
+      if (chunk.includes('data:') && firstTokenMs === 0) {
+        firstTokenMs = performance.now() - start;
+      }
+
+      const lines = chunk.split('\n').filter(line => line.startsWith('data: '));
+      for (const line of lines) {
+        const data = line.slice(6).trim();
+        if (!data || data === '[DONE]') continue;
+        try {
+          const payload = JSON.parse(data);
+          const tokenText = payload?.choices?.[0]?.delta?.content;
+          if (typeof tokenText === 'string' && tokenText.length > 0) {
+            outputTokens += 1;
+          }
+        } catch {
+          // Ignore malformed partial SSE chunks.
+        }
+      }
+    }
+
+    const totalMs = performance.now() - start;
+    const effectiveStart = firstTokenMs > 0 ? firstTokenMs : totalMs;
+    const generationSeconds = Math.max((totalMs - effectiveStart) / 1000, 0.001);
+
+    return {
+      firstTokenMs: effectiveStart,
+      totalMs,
+      outputTokens,
+      outputTokensPerSec: outputTokens > 0 ? outputTokens / generationSeconds : 0,
+      statusCode,
+    };
+  } catch (err) {
+    return {
+      firstTokenMs: 0,
+      totalMs: performance.now() - start,
+      outputTokens: 0,
+      outputTokensPerSec: 0,
+      error: err instanceof Error ? err.message : String(err),
+    };
+  }
+}
+
+export async function runAIGatewayBenchmark(
+  config: AIGatewayProviderConfig,
+  scenario: AIGatewayScenario,
+): Promise<AIGatewayBenchmarkResult> {
+  const { name, requiredEnvVars, iterations = 100, timeout = 45_000 } = config;
+  const missingVars = requiredEnvVars.filter(v => !process.env[v]);
+  if (missingVars.length > 0) {
+    return {
+      provider: name,
+      mode: 'ai-gateway',
+      scenario,
+      model: config.model || process.env.AI_GATEWAY_MODEL || '',
+      iterations: [],
+      summary: {
+        firstTokenMs: { median: 0, p95: 0, p99: 0 },
+        totalMs: { median: 0, p95: 0, p99: 0 },
+        outputTokensPerSec: { median: 0, p95: 0, p99: 0 },
+      },
+      skipped: true,
+      skipReason: `Missing: ${missingVars.join(', ')}`,
+    };
+  }
+
+  const resolved: AIGatewayProviderConfig = {
+    ...config,
+    model: config.model || process.env.AI_GATEWAY_MODEL || '',
+    apiKey: config.apiKey || '',
+    baseUrl: config.baseUrl || '',
+  };
+
+  const results: AIGatewayTimingResult[] = [];
+  const isStreaming = SCENARIO_PROMPTS[scenario].stream;
+
+  console.log(`\n--- AI Gateway Benchmarking: ${name} (${scenario}, ${iterations} iterations) ---`);
+
+  for (let i = 0; i < iterations; i++) {
+    const run = isStreaming ? runStreamingIteration : runNonStreamingIteration;
+    const result = await run(resolved, timeout, scenario);
+    results.push(result);
+
+    const status = result.error ? `FAILED: ${result.error}` : `${(result.totalMs / 1000).toFixed(2)}s`;
+    const first = result.firstTokenMs > 0 ? `${(result.firstTokenMs / 1000).toFixed(2)}s` : '--';
+    console.log(`  Iteration ${i + 1}/${iterations}: total ${status}, first ${first}`);
+  }
+
+  const successful = results.filter(r => !r.error);
+  return {
+    provider: name,
+    mode: 'ai-gateway',
+    scenario,
+    model: resolved.model,
+    iterations: results,
+    summary: {
+      firstTokenMs: computeStats(successful.map(r => r.firstTokenMs).filter(v => v > 0)),
+      totalMs: computeStats(successful.map(r => r.totalMs)),
+      outputTokensPerSec: computeStats(successful.map(r => r.outputTokensPerSec)),
+    },
+  };
+}
+
+function roundStats(s: { median: number; p95: number; p99: number }) {
+  return { median: round(s.median), p95: round(s.p95), p99: round(s.p99) };
+}
+
+export async function writeAIGatewayResultsJson(results: AIGatewayBenchmarkResult[], outPath: string): Promise<void> {
+  const fs = await import('fs');
+  const os = await import('os');
+
+  const cleanResults = results.map(r => ({
+    provider: r.provider,
+    mode: r.mode,
+    scenario: r.scenario,
+    model: r.model,
+    iterations: r.iterations.map(i => ({
+      firstTokenMs: round(i.firstTokenMs),
+      totalMs: round(i.totalMs),
+      outputTokens: i.outputTokens,
+      outputTokensPerSec: round(i.outputTokensPerSec),
+      ...(i.statusCode !== undefined ? { statusCode: i.statusCode } : {}),
+      ...(i.error ? { error: i.error } : {}),
+    })),
+    summary: {
+      firstTokenMs: roundStats(r.summary.firstTokenMs),
+      totalMs: roundStats(r.summary.totalMs),
+      outputTokensPerSec: roundStats(r.summary.outputTokensPerSec),
+    },
+    ...(r.compositeScore !== undefined ? { compositeScore: round(r.compositeScore) } : {}),
+    ...(r.successRate !== undefined ? { successRate: round(r.successRate) } : {}),
+    ...(r.skipped ? { skipped: r.skipped, skipReason: r.skipReason } : {}),
+  }));
+
+  const output = {
+    version: '1.0',
+    timestamp: new Date().toISOString(),
+    environment: {
+      node: process.version,
+      platform: os.platform(),
+      arch: os.arch(),
+    },
+    config: {
+      iterations: results[0]?.iterations.length || 0,
+      timeoutMs: 45000,
+      scenario: results[0]?.scenario || null,
+    },
+    results: cleanResults,
+  };
+
+  fs.writeFileSync(outPath, JSON.stringify(output, null, 2));
+  console.log(`Results written to ${outPath}`);
+}
diff --git a/src/ai-gateway/providers.ts b/src/ai-gateway/providers.ts
new file mode 100644
index 0000000..537b7bd
--- /dev/null
+++ b/src/ai-gateway/providers.ts
@@ -0,0 +1,29 @@
+import type { AIGatewayProviderConfig } from './types.js';
+
+export const aiGatewayProviders: AIGatewayProviderConfig[] = [
+  {
+    name: 'openrouter',
+    requiredEnvVars: ['OPENROUTER_API_KEY', 'AI_GATEWAY_MODEL'],
+    baseUrl: process.env.OPENROUTER_BASE_URL || 'https://openrouter.ai/api/v1',
+    apiKey: process.env.OPENROUTER_API_KEY || '',
+    model: process.env.AI_GATEWAY_MODEL || '',
+    defaultHeaders: {
+      ...(process.env.OPENROUTER_HTTP_REFERER ? { 'HTTP-Referer': process.env.OPENROUTER_HTTP_REFERER } : {}),
+      ...(process.env.OPENROUTER_X_TITLE ? { 'X-Title': process.env.OPENROUTER_X_TITLE } : {}),
+    },
+  },
+  {
+    name: 'vercel-ai-gateway',
+    requiredEnvVars: ['VERCEL_AI_GATEWAY_BASE_URL', 'VERCEL_AI_GATEWAY_API_KEY', 'AI_GATEWAY_MODEL'],
+    baseUrl: process.env.VERCEL_AI_GATEWAY_BASE_URL || '',
+    apiKey: process.env.VERCEL_AI_GATEWAY_API_KEY || '',
+    model: process.env.AI_GATEWAY_MODEL || '',
+  },
+  {
+    name: 'cloudflare-ai-gateway',
+    requiredEnvVars: ['CLOUDFLARE_AI_GATEWAY_BASE_URL', 'CLOUDFLARE_AI_GATEWAY_API_KEY', 'AI_GATEWAY_MODEL'],
+    baseUrl: process.env.CLOUDFLARE_AI_GATEWAY_BASE_URL || '',
+    apiKey: process.env.CLOUDFLARE_AI_GATEWAY_API_KEY || '',
+    model: process.env.AI_GATEWAY_MODEL || '',
+  },
+];
diff --git a/src/ai-gateway/scoring.ts b/src/ai-gateway/scoring.ts
new file mode 100644
index 0000000..9cd7bbb
--- /dev/null
+++ b/src/ai-gateway/scoring.ts
@@ -0,0 +1,66 @@
+import type { AIGatewayBenchmarkResult } from './types.js';
+
+export interface AIGatewayScoringWeights {
+  totalMedian: number;
+  totalP95: number;
+  totalP99: number;
+  firstTokenMedian: number;
+}
+
+export const DEFAULT_AI_GATEWAY_WEIGHTS: AIGatewayScoringWeights = {
+  totalMedian: 0.35,
+  totalP95: 0.20,
+  totalP99: 0.10,
+  firstTokenMedian: 0.35,
+};
+
+const LATENCY_CEILING_MS = 60000;
+
+function scoreLatency(valueMs: number): number {
+  return Math.max(0, 100 * (1 - valueMs / LATENCY_CEILING_MS));
+}
+
+export function computeAIGatewaySuccessRate(result: AIGatewayBenchmarkResult): number {
+  if (result.skipped || result.iterations.length === 0) return 0;
+  const successful = result.iterations.filter(i => !i.error).length;
+  return successful / result.iterations.length;
+}
+
+function computeAIGatewayScore(
+  result: AIGatewayBenchmarkResult,
+  weights: AIGatewayScoringWeights = DEFAULT_AI_GATEWAY_WEIGHTS,
+): number {
+  return (
+    weights.totalMedian * scoreLatency(result.summary.totalMs.median) +
+    weights.totalP95 * scoreLatency(result.summary.totalMs.p95) +
+    weights.totalP99 * scoreLatency(result.summary.totalMs.p99) +
+    weights.firstTokenMedian * scoreLatency(result.summary.firstTokenMs.median)
+  );
+}
+
+export function computeAIGatewayCompositeScores(
+  results: AIGatewayBenchmarkResult[],
+  weights: AIGatewayScoringWeights = DEFAULT_AI_GATEWAY_WEIGHTS,
+): void {
+  for (const result of results) {
+    const successRate = computeAIGatewaySuccessRate(result);
+    result.successRate = successRate;
+
+    if (result.skipped || successRate === 0) {
+      result.compositeScore = 0;
+      continue;
+    }
+
+    const gatewayScore = computeAIGatewayScore(result, weights);
+    result.compositeScore = Math.round(gatewayScore * successRate * 100) / 100;
+  }
+}
+
+export function sortAIGatewayByCompositeScore(results: AIGatewayBenchmarkResult[]): AIGatewayBenchmarkResult[] {
+  return [...results].sort((a, b) => {
+    if (a.skipped && !b.skipped) return 1;
+    if (!a.skipped && b.skipped) return -1;
+    if (a.skipped && b.skipped) return 0;
+    return (b.compositeScore ?? 0) - (a.compositeScore ?? 0);
+  });
+}
diff --git a/src/ai-gateway/types.ts b/src/ai-gateway/types.ts
new file mode 100644
index 0000000..937b807
--- /dev/null
+++ b/src/ai-gateway/types.ts
@@ -0,0 +1,40 @@
+export type AIGatewayScenario = 'short-nonstream' | 'short-stream';
+
+export interface AIGatewayProviderConfig {
+  name: string;
+  requiredEnvVars: string[];
+  baseUrl: string;
+  apiKey: string;
+  model: string;
+  defaultHeaders?: Record<string, string>;
+  timeout?: number;
+  iterations?: number;
+}
+
+export interface AIGatewayTimingResult {
+  firstTokenMs: number;
+  totalMs: number;
+  outputTokens: number;
+  outputTokensPerSec: number;
+  statusCode?: number;
+  error?: string;
+}
+
+export interface AIGatewayStats {
+  firstTokenMs: { median: number; p95: number; p99: number };
+  totalMs: { median: number; p95: number; p99: number };
+  outputTokensPerSec: { median: number; p95: number; p99: number };
+}
+
+export interface AIGatewayBenchmarkResult {
+  provider: string;
+  mode: 'ai-gateway';
+  scenario: AIGatewayScenario;
+  model: string;
+  iterations: AIGatewayTimingResult[];
+  summary: AIGatewayStats;
+  compositeScore?: number;
+  successRate?: number;
+  skipped?: boolean;
+  skipReason?: string;
+}
diff --git a/src/merge-results.ts b/src/merge-results.ts
index a75d651..e75d1c8 100644
--- a/src/merge-results.ts
+++ b/src/merge-results.ts
@@ -1,7 +1,7 @@
 /**
  * Merge per-provider benchmark results into combined result files.
  *
- * Usage: tsx src/merge-results.ts --input <artifacts-dir> [--mode storage|browser]
+ * Usage: tsx src/merge-results.ts --input <artifacts-dir> [--mode storage|browser|ai-gateway]
  *
  * By default, merges sandbox benchmark results: reads latest.json files from
  * the input directory, groups by mode (sequential/staggered/burst), computes
@@ -14,6 +14,10 @@
  * With --mode browser, merges browser benchmark results: deduplicates by
  * provider, computes browser-specific composite scores, and writes combined
  * files to results/browser/latest.json.
+ *
+ * With --mode ai-gateway, merges AI gateway benchmark results grouped by
+ * scenario, computes gateway-specific composite scores, and writes combined
+ * files to results/ai_gateway/<scenario>/latest.json.
  */
 import fs from 'fs';
 import path from 'path';
@@ -21,10 +25,12 @@ import { fileURLToPath } from 'url';
 import { computeCompositeScores } from './sandbox/scoring.js';
 import { computeStorageCompositeScores, sortStorageByCompositeScore } from './storage/scoring.js';
 import { computeBrowserCompositeScores, sortBrowserByCompositeScore } from './browser/scoring.js';
+import { computeAIGatewayCompositeScores, sortAIGatewayByCompositeScore } from './ai-gateway/scoring.js';
 import { printResultsTable, writeResultsJson } from './sandbox/table.js';
 import type { BenchmarkResult } from './sandbox/types.js';
 import type { StorageBenchmarkResult } from './storage/types.js';
 import type { BrowserBenchmarkResult } from './browser/types.js';
+import type { AIGatewayBenchmarkResult } from './ai-gateway/types.js';
 
 const __dirname = path.dirname(fileURLToPath(import.meta.url));
 const ROOT = path.resolve(__dirname, '..');
@@ -38,7 +44,7 @@ function getArgValue(flag: string): string | undefined {
 const inputDir = getArgValue('--input');
 const mergeMode = getArgValue('--mode');
 if (!inputDir) {
-  console.error('Usage: tsx src/merge-results.ts --input <artifacts-dir> [--mode storage|browser]');
+  console.error('Usage: tsx src/merge-results.ts --input <artifacts-dir> [--mode storage|browser|ai-gateway]');
   process.exit(1);
 }
 
@@ -307,6 +313,35 @@ function printBrowserResultsTable(results: BrowserBenchmarkResult[]): void {
   console.log('='.repeat(110));
 }
 
+function printAIGatewayResultsTable(results: AIGatewayBenchmarkResult[], scenario: string): void {
+  const sorted = sortAIGatewayByCompositeScore(results);
+
+  console.log(`\n${'='.repeat(108)}`);
+  console.log(`  AI GATEWAY BENCHMARK RESULTS - ${scenario.toUpperCase()}`);
+  console.log('='.repeat(108));
+  console.log(
+    ['Provider', 'Score', 'First Token', 'Total', 'Tok/sec', 'Status']
+      .map((h, i) => h.padEnd([24, 8, 14, 14, 14, 12][i]))
+      .join(' | ')
+  );
+  console.log([24, 8, 14, 14, 14, 12].map(w => '-'.repeat(w)).join('-+-'));
+
+  for (const r of sorted) {
+    if (r.skipped) {
+      console.log([r.provider.padEnd(24), '--'.padEnd(8), '--'.padEnd(14), '--'.padEnd(14), '--'.padEnd(14), 'SKIPPED'.padEnd(12)].join(' | '));
+      continue;
+    }
+    const ok = r.iterations.filter(i => !i.error).length;
+    const total = r.iterations.length;
+    const score = r.compositeScore !== undefined ? r.compositeScore.toFixed(1) : '--';
+    const first = (r.summary.firstTokenMs.median / 1000).toFixed(2) + 's';
+    const tot = (r.summary.totalMs.median / 1000).toFixed(2) + 's';
+    const tps = r.summary.outputTokensPerSec.median.toFixed(1);
+    console.log([r.provider.padEnd(24), score.padEnd(8), first.padEnd(14), tot.padEnd(14), tps.padEnd(14), `${ok}/${total} OK`.padEnd(12)].join(' | '));
+  }
+  console.log('='.repeat(108));
+}
+
 /**
  * Merge browser benchmark results.
  */
@@ -366,7 +401,74 @@ async function mainBrowser() {
   console.log(`Copied latest: ${latestPath}`);
 }
 
-const runner = mergeMode === 'storage' ? mainStorage : mergeMode === 'browser' ? mainBrowser : main;
+async function mainAIGateway() {
+  const jsonFiles: string[] = [];
+  function walk(dir: string) {
+    if (!fs.existsSync(dir)) return;
+    for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
+      const full = path.join(dir, entry.name);
+      if (entry.isDirectory()) walk(full);
+      else if (entry.name === 'latest.json') jsonFiles.push(full);
+    }
+  }
+  walk(inputDir!);
+
+  if (jsonFiles.length === 0) {
+    console.error(`No latest.json files found in ${inputDir}`);
+    process.exit(1);
+  }
+
+  console.log(`Found ${jsonFiles.length} result files`);
+
+  const byScenario: Record<string, { results: { result: AIGatewayBenchmarkResult; fromSingleProvider: boolean }[] }> = {};
+
+  for (const file of jsonFiles) {
+    const raw = JSON.parse(fs.readFileSync(file, 'utf-8')) as { results: AIGatewayBenchmarkResult[] };
+    const fromSingleProvider = raw.results.length === 1;
+    const scenario = path.basename(path.dirname(file));
+
+    if (!byScenario[scenario]) byScenario[scenario] = { results: [] };
+    for (const result of raw.results) {
+      byScenario[scenario].results.push({ result, fromSingleProvider });
+    }
+  }
+
+  for (const [scenario, { results }] of Object.entries(byScenario)) {
+    const seen = new Map<string, { result: AIGatewayBenchmarkResult; fromSingleProvider: boolean }>();
+    for (const entry of results) {
+      const existing = seen.get(entry.result.provider);
+      if (!existing || (entry.fromSingleProvider && !existing.fromSingleProvider)) {
+        seen.set(entry.result.provider, entry);
+      }
+    }
+
+    const deduped = Array.from(seen.values()).map(e => e.result);
+    console.log(`\nMerging ${deduped.length} provider results for mode: ai-gateway/${scenario}`);
+
+    computeAIGatewayCompositeScores(deduped);
+    printAIGatewayResultsTable(deduped, scenario);
+
+    const { writeAIGatewayResultsJson } = await import('./ai-gateway/benchmark.js');
+    const timestamp = new Date().toISOString().slice(0, 10);
+    const resultsDir = path.resolve(ROOT, `results/ai_gateway/${scenario}`);
+    fs.mkdirSync(resultsDir, { recursive: true });
+
+    const outPath = path.join(resultsDir, `${timestamp}.json`);
+    await writeAIGatewayResultsJson(deduped, outPath);
+
+    const latestPath = path.join(resultsDir, 'latest.json');
+    fs.copyFileSync(outPath, latestPath);
+    console.log(`Copied latest: ${latestPath}`);
+  }
+}
+
+const runner = mergeMode === 'storage'
+  ? mainStorage
+  : mergeMode === 'browser'
+    ? mainBrowser
+    : mergeMode === 'ai-gateway'
+      ? mainAIGateway
+      : main;
 runner().catch(err => {
   console.error('Merge failed:', err);
   process.exit(1);
diff --git a/src/run.ts b/src/run.ts
index fdd5962..ff76320 100644
--- a/src/run.ts
+++ b/src/run.ts
@@ -9,16 +9,20 @@ import { runConcurrentBenchmark } from './sandbox/concurrent.js';
 import { runStaggeredBenchmark } from './sandbox/staggered.js';
 import { runStorageBenchmark, writeStorageResultsJson } from './storage/benchmark.js';
 import { runBrowserBenchmark, writeBrowserResultsJson } from './browser/benchmark.js';
+import { runAIGatewayBenchmark, writeAIGatewayResultsJson } from './ai-gateway/benchmark.js';
 import { printResultsTable, writeResultsJson } from './sandbox/table.js';
 import { providers } from './sandbox/providers.js';
 import { storageProviders } from './storage/providers.js';
 import { browserProviders } from './browser/providers.js';
+import { aiGatewayProviders } from './ai-gateway/providers.js';
 import { computeCompositeScores } from './sandbox/scoring.js';
 import { computeStorageCompositeScores } from './storage/scoring.js';
 import { computeBrowserCompositeScores } from './browser/scoring.js';
+import { computeAIGatewayCompositeScores } from './ai-gateway/scoring.js';
 import type { BenchmarkResult, BenchmarkMode } from './sandbox/types.js';
 import type { StorageBenchmarkResult } from './storage/types.js';
 import type { BrowserBenchmarkResult } from './browser/types.js';
+import type { AIGatewayBenchmarkResult, AIGatewayScenario } from './ai-gateway/types.js';
 
 const __dirname = path.dirname(fileURLToPath(import.meta.url));
 
@@ -31,6 +35,7 @@ const concurrency = parseInt(getArgValue(args, '--concurrency') || '100', 10);
 const storageConcurrency = parseInt(getArgValue(args, '--storage-concurrency') || '1', 10);
 const staggerDelay = parseInt(getArgValue(args, '--stagger-delay') || '200', 10);
 const fileSizeArg = getArgValue(args, '--file-size') || '10MB';
+const aiGatewayScenario = (getArgValue(args, '--ai-gateway-scenario') || 'short-nonstream') as AIGatewayScenario;
 
 function getArgValue(args: string[], flag: string): string | undefined {
   const idx = args.indexOf(flag);
@@ -38,22 +43,24 @@ function getArgValue(args: string[], flag: string): string | undefined {
 }
 
 /** Resolve which modes to run */
-function getModesToRun(): BenchmarkMode[] | ['storage'] | ['browser'] {
+function getModesToRun(): BenchmarkMode[] | ['storage'] | ['browser'] | ['ai-gateway'] {
   if (!rawMode) return ['sequential', 'staggered', 'burst'];
   if (rawMode === 'storage') return ['storage'];
   if (rawMode === 'browser') return ['browser'];
+  if (rawMode === 'ai-gateway') return ['ai-gateway'];
   const m = rawMode === 'concurrent' ? 'burst' : rawMode as BenchmarkMode;
   return [m];
 }
 
 /** Map mode to results subdirectory name */
-function modeToDir(m: BenchmarkMode | 'storage'): string {
+function modeToDir(m: BenchmarkMode | 'storage' | 'ai-gateway'): string {
   switch (m) {
     case 'sequential': return 'sequential_tti';
     case 'staggered': return 'staggered_tti';
     case 'burst':
     case 'concurrent': return 'burst_tti';
     case 'storage': return 'storage';
+    case 'ai-gateway': return 'ai_gateway';
     default: return `${m}_tti`;
   }
 }
@@ -218,6 +225,51 @@ async function runBrowser(toRun: typeof browserProviders): Promise<void> {
   console.log(`Copied latest: ${latestPath}`);
 }
 
+async function runAIGateway(
+  toRun: typeof aiGatewayProviders,
+  scenario: AIGatewayScenario,
+): Promise<void> {
+  console.log('\n' + '='.repeat(70));
+  console.log('  MODE: AI GATEWAY');
+  console.log(`  Scenario: ${scenario}`);
+  console.log(`  Iterations per provider: ${iterations}`);
+  console.log('='.repeat(70));
+
+  const results: AIGatewayBenchmarkResult[] = [];
+
+  for (const providerConfig of toRun) {
+    const result = await runAIGatewayBenchmark({ ...providerConfig, iterations }, scenario);
+    results.push(result);
+  }
+
+  computeAIGatewayCompositeScores(results);
+
+  console.log('\n--- AI Gateway Benchmark Results ---');
+  for (const r of results) {
+    if (r.skipped) {
+      console.log(`${r.provider}: SKIPPED (${r.skipReason})`);
+      continue;
+    }
+    const ok = r.iterations.filter(i => !i.error).length;
+    const total = r.iterations.length;
+    console.log(`${r.provider}:`);
+    console.log(`  Total: ${(r.summary.totalMs.median / 1000).toFixed(2)}s (median), First token: ${(r.summary.firstTokenMs.median / 1000).toFixed(2)}s`);
+    console.log(`  Score: ${r.compositeScore?.toFixed(1) || '--'} (${ok}/${total} OK)`);
+  }
+
+  const timestamp = new Date().toISOString().slice(0, 10);
+  const subDir = modeToDir('ai-gateway');
+  const scenarioDir = path.resolve(__dirname, `../results/${subDir}/${scenario.replace(/-/g, '_')}`);
+  fs.mkdirSync(scenarioDir, { recursive: true });
+
+  const outPath = path.join(scenarioDir, `${timestamp}.json`);
+  await writeAIGatewayResultsJson(results, outPath);
+
+  const latestPath = path.join(scenarioDir, 'latest.json');
+  fs.copyFileSync(outPath, latestPath);
+  console.log(`Copied latest: ${latestPath}`);
+}
+
 async function main() {
   const modes = getModesToRun();
 
@@ -246,6 +298,31 @@ async function main() {
     return;
   }
 
+  // Handle AI gateway mode separately
+  if (modes[0] === 'ai-gateway') {
+    console.log('ComputeSDK AI Gateway Benchmarks');
+    console.log(`Scenario: ${aiGatewayScenario}`);
+    console.log(`Date: ${new Date().toISOString()}\n`);
+
+    const toRun = providerFilter
+      ? aiGatewayProviders.filter(p => p.name === providerFilter)
+      : aiGatewayProviders;
+
+    if (toRun.length === 0) {
+      if (providerFilter) {
+        console.error(`Unknown AI gateway provider: ${providerFilter}`);
+        console.error(`Available: ${aiGatewayProviders.map(p => p.name).join(', ')}`);
+      } else {
+        console.error('No AI gateway providers configured. Add entries to src/ai-gateway/providers.ts.');
+      }
+      process.exit(1);
+    }
+
+    await runAIGateway(toRun, aiGatewayScenario);
+    console.log('\nAll AI gateway tests complete.');
+    return;
+  }
+
   // Handle storage mode separately
   if (modes[0] === 'storage') {
     console.log('ComputeSDK Storage Provider Benchmarks');

From a7289b4f3d1df392a72716ce8989dd765e13a99e Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Fri, 17 Apr 2026 01:25:21 +0000
Subject: [PATCH 2/6] fix: use Cloudflare-compatible max completion token param

---
 src/ai-gateway/benchmark.ts | 36 ++++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/src/ai-gateway/benchmark.ts b/src/ai-gateway/benchmark.ts
index 9fec610..fc3157d 100644
--- a/src/ai-gateway/benchmark.ts
+++ b/src/ai-gateway/benchmark.ts
@@ -35,6 +35,26 @@ function normalizeBaseUrl(baseUrl: string): string {
   return baseUrl.endsWith('/') ? baseUrl.slice(0, -1) : baseUrl;
 }
 
+function buildCompletionBody(
+  provider: AIGatewayProviderConfig,
+  request: { prompt: string; maxTokens: number; stream: boolean },
+) {
+  const body: Record<string, unknown> = {
+    model: provider.model,
+    messages: [{ role: 'user', content: request.prompt }],
+    temperature: 0,
+    stream: request.stream,
+  };
+
+  if (provider.name === 'cloudflare-ai-gateway') {
+    body.max_completion_tokens = request.maxTokens;
+  } else {
+    body.max_tokens = request.maxTokens;
+  }
+
+  return body;
+}
+
 async function runNonStreamingIteration(
   provider: AIGatewayProviderConfig,
   timeout: number,
@@ -51,13 +71,7 @@ async function runNonStreamingIteration(
         authorization: `Bearer ${provider.apiKey}`,
         ...provider.defaultHeaders,
       },
-      body: JSON.stringify({
-        model: provider.model,
-        messages: [{ role: 'user', content: request.prompt }],
-        temperature: 0,
-        max_tokens: request.maxTokens,
-        stream: false,
-      }),
+      body: JSON.stringify(buildCompletionBody(provider, { ...request, stream: false })),
     }), timeout, 'Gateway request timed out');
 
     const totalMs = performance.now() - start;
@@ -113,13 +127,7 @@ async function runStreamingIteration(
         authorization: `Bearer ${provider.apiKey}`,
         ...provider.defaultHeaders,
       },
-      body: JSON.stringify({
-        model: provider.model,
-        messages: [{ role: 'user', content: request.prompt }],
-        temperature: 0,
-        max_tokens: request.maxTokens,
-        stream: true,
-      }),
+      body: JSON.stringify(buildCompletionBody(provider, { ...request, stream: true })),
     }), timeout, 'Gateway request timed out');
 
     const statusCode = response.status;

From bf4d241daecb151cb73eb78f1f86f0c74d37afe0 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Fri, 17 Apr 2026 01:28:11 +0000
Subject: [PATCH 3/6] refactor: standardize AI gateway token limit field

---
 src/ai-gateway/benchmark.ts | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/ai-gateway/benchmark.ts b/src/ai-gateway/benchmark.ts
index fc3157d..fe8a71e 100644
--- a/src/ai-gateway/benchmark.ts
+++ b/src/ai-gateway/benchmark.ts
@@ -44,14 +44,9 @@ function buildCompletionBody(
     messages: [{ role: 'user', content: request.prompt }],
     temperature: 0,
     stream: request.stream,
+    max_completion_tokens: request.maxTokens,
   };
 
-  if (provider.name === 'cloudflare-ai-gateway') {
-    body.max_completion_tokens = request.maxTokens;
-  } else {
-    body.max_tokens = request.maxTokens;
-  }
-
   return body;
 }
 

From 86e754d4aeeb1976759add1648ebcfd65ab642d1 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Fri, 17 Apr 2026 01:32:14 +0000
Subject: [PATCH 4/6] fix: scope ai gateway workflow concurrency by event and
 ref

---
 .github/workflows/ai-gateway-benchmarks.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ai-gateway-benchmarks.yml b/.github/workflows/ai-gateway-benchmarks.yml
index db4d455..7c86680 100644
--- a/.github/workflows/ai-gateway-benchmarks.yml
+++ b/.github/workflows/ai-gateway-benchmarks.yml
@@ -16,7 +16,7 @@ on:
         default: '50'
 
 concurrency:
-  group: ai-gateway-benchmarks
+  group: ai-gateway-benchmarks-${{ github.event_name }}-${{ github.ref }}
   cancel-in-progress: true
 
 permissions:

From 250fce84c612a23145040b48e3f412945bb661fd Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Fri, 17 Apr 2026 01:42:54 +0000
Subject: [PATCH 5/6] fix: compute streaming throughput from usage tokens

---
 .github/workflows/ai-gateway-benchmarks.yml |  2 +-
 src/ai-gateway/benchmark.ts                 | 44 +++++++++++++--------
 src/ai-gateway/types.ts                     |  3 +-
 src/merge-results.ts                        |  2 +-
 4 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/ai-gateway-benchmarks.yml b/.github/workflows/ai-gateway-benchmarks.yml
index 7c86680..ccc8dc6 100644
--- a/.github/workflows/ai-gateway-benchmarks.yml
+++ b/.github/workflows/ai-gateway-benchmarks.yml
@@ -126,7 +126,7 @@ jobs:
                 const score = r.compositeScore !== undefined ? r.compositeScore.toFixed(1) : '--';
                 const first = (r.summary.firstTokenMs.median / 1000).toFixed(2) + 's';
                 const total = (r.summary.totalMs.median / 1000).toFixed(2) + 's';
-                const tps = r.summary.outputTokensPerSec.median.toFixed(1);
+                const tps = r.throughputAvailable ? r.summary.outputTokensPerSec.median.toFixed(1) : '--';
                 const ok = r.iterations.filter(it => !it.error).length;
                 const count = r.iterations.length;
                 body += `| ${i + 1} | ${r.provider} | ${score} | ${first} | ${total} | ${tps} | ${ok}/${count} |\n`;
diff --git a/src/ai-gateway/benchmark.ts b/src/ai-gateway/benchmark.ts
index fe8a71e..06a6b41 100644
--- a/src/ai-gateway/benchmark.ts
+++ b/src/ai-gateway/benchmark.ts
@@ -47,6 +47,10 @@ function buildCompletionBody(
     max_completion_tokens: request.maxTokens,
   };
 
+  if (request.stream) {
+    body.stream_options = { include_usage: true };
+  }
+
   return body;
 }
 
@@ -78,7 +82,7 @@ async function runNonStreamingIteration(
         firstTokenMs: 0,
         totalMs,
         outputTokens: 0,
-        outputTokensPerSec: 0,
+        outputTokensPerSec: undefined,
         statusCode,
         error: `HTTP ${statusCode}: ${bodyText.slice(0, 200)}`,
       };
@@ -92,7 +96,7 @@ async function runNonStreamingIteration(
       firstTokenMs: totalMs,
       totalMs,
       outputTokens,
-      outputTokensPerSec: outputTokens > 0 ? outputTokens / seconds : 0,
+      outputTokensPerSec: outputTokens > 0 ? outputTokens / seconds : undefined,
       statusCode,
     };
   } catch (err) {
@@ -100,7 +104,7 @@ async function runNonStreamingIteration(
       firstTokenMs: 0,
       totalMs: performance.now() - start,
       outputTokens: 0,
-      outputTokensPerSec: 0,
+      outputTokensPerSec: undefined,
       error: err instanceof Error ? err.message : String(err),
     };
   }
@@ -132,7 +136,7 @@ async function runStreamingIteration(
         firstTokenMs: 0,
         totalMs: performance.now() - start,
         outputTokens: 0,
-        outputTokensPerSec: 0,
+        outputTokensPerSec: undefined,
         statusCode,
         error: `HTTP ${statusCode}: ${errorText.slice(0, 200)}`,
       };
@@ -141,7 +145,7 @@ async function runStreamingIteration(
     const reader = response.body.getReader();
     const decoder = new TextDecoder();
     let firstTokenMs = 0;
-    let outputTokens = 0;
+    let outputTokens: number | undefined;
     let done = false;
 
     while (!done) {
@@ -150,19 +154,20 @@ async function runStreamingIteration(
       if (done) break;
       const chunk = decoder.decode(readResult.value, { stream: true });
 
-      if (chunk.includes('data:') && firstTokenMs === 0) {
-        firstTokenMs = performance.now() - start;
-      }
-
       const lines = chunk.split('\n').filter(line => line.startsWith('data: '));
       for (const line of lines) {
         const data = line.slice(6).trim();
         if (!data || data === '[DONE]') continue;
         try {
           const payload = JSON.parse(data);
+          const usageTokens = extractCompletionTokens(payload);
+          if (usageTokens > 0) {
+            outputTokens = usageTokens;
+          }
+
           const tokenText = payload?.choices?.[0]?.delta?.content;
-          if (typeof tokenText === 'string' && tokenText.length > 0) {
-            outputTokens += 1;
+          if (firstTokenMs === 0 && typeof tokenText === 'string' && tokenText.length > 0) {
+            firstTokenMs = performance.now() - start;
           }
         } catch {
           // Ignore malformed partial SSE chunks.
@@ -177,8 +182,8 @@ async function runStreamingIteration(
     return {
       firstTokenMs: effectiveStart,
       totalMs,
-      outputTokens,
-      outputTokensPerSec: outputTokens > 0 ? outputTokens / generationSeconds : 0,
+      outputTokens: outputTokens ?? 0,
+      outputTokensPerSec: outputTokens && outputTokens > 0 ? outputTokens / generationSeconds : undefined,
       statusCode,
     };
   } catch (err) {
@@ -186,7 +191,7 @@ async function runStreamingIteration(
       firstTokenMs: 0,
       totalMs: performance.now() - start,
       outputTokens: 0,
-      outputTokensPerSec: 0,
+      outputTokensPerSec: undefined,
       error: err instanceof Error ? err.message : String(err),
     };
   }
@@ -210,6 +215,7 @@ export async function runAIGatewayBenchmark(
         totalMs: { median: 0, p95: 0, p99: 0 },
         outputTokensPerSec: { median: 0, p95: 0, p99: 0 },
       },
+      throughputAvailable: false,
       skipped: true,
       skipReason: `Missing: ${missingVars.join(', ')}`,
     };
@@ -238,6 +244,10 @@ export async function runAIGatewayBenchmark(
   }
 
   const successful = results.filter(r => !r.error);
+  const throughputValues = successful
+    .map(r => r.outputTokensPerSec)
+    .filter((v): v is number => typeof v === 'number' && Number.isFinite(v));
+
   return {
     provider: name,
     mode: 'ai-gateway',
@@ -247,8 +257,9 @@ export async function runAIGatewayBenchmark(
     summary: {
       firstTokenMs: computeStats(successful.map(r => r.firstTokenMs).filter(v => v > 0)),
       totalMs: computeStats(successful.map(r => r.totalMs)),
-      outputTokensPerSec: computeStats(successful.map(r => r.outputTokensPerSec)),
+      outputTokensPerSec: computeStats(throughputValues),
     },
+    throughputAvailable: throughputValues.length > 0,
   };
 }
 
@@ -269,7 +280,7 @@ export async function writeAIGatewayResultsJson(results: AIGatewayBenchmarkResul
       firstTokenMs: round(i.firstTokenMs),
       totalMs: round(i.totalMs),
       outputTokens: i.outputTokens,
-      outputTokensPerSec: round(i.outputTokensPerSec),
+      ...(i.outputTokensPerSec !== undefined ? { outputTokensPerSec: round(i.outputTokensPerSec) } : {}),
       ...(i.statusCode !== undefined ? { statusCode: i.statusCode } : {}),
       ...(i.error ? { error: i.error } : {}),
     })),
@@ -278,6 +289,7 @@ export async function writeAIGatewayResultsJson(results: AIGatewayBenchmarkResul
       totalMs: roundStats(r.summary.totalMs),
       outputTokensPerSec: roundStats(r.summary.outputTokensPerSec),
     },
+    ...(r.throughputAvailable !== undefined ? { throughputAvailable: r.throughputAvailable } : {}),
     ...(r.compositeScore !== undefined ? { compositeScore: round(r.compositeScore) } : {}),
     ...(r.successRate !== undefined ? { successRate: round(r.successRate) } : {}),
     ...(r.skipped ? { skipped: r.skipped, skipReason: r.skipReason } : {}),
diff --git a/src/ai-gateway/types.ts b/src/ai-gateway/types.ts
index 937b807..c65c573 100644
--- a/src/ai-gateway/types.ts
+++ b/src/ai-gateway/types.ts
@@ -15,7 +15,7 @@ export interface AIGatewayTimingResult {
   firstTokenMs: number;
   totalMs: number;
   outputTokens: number;
-  outputTokensPerSec: number;
+  outputTokensPerSec?: number;
   statusCode?: number;
   error?: string;
 }
@@ -33,6 +33,7 @@ export interface AIGatewayBenchmarkResult {
   model: string;
   iterations: AIGatewayTimingResult[];
   summary: AIGatewayStats;
+  throughputAvailable?: boolean;
   compositeScore?: number;
   successRate?: number;
   skipped?: boolean;
diff --git a/src/merge-results.ts b/src/merge-results.ts
index e75d1c8..26dbc5f 100644
--- a/src/merge-results.ts
+++ b/src/merge-results.ts
@@ -336,7 +336,7 @@ function printAIGatewayResultsTable(results: AIGatewayBenchmarkResult[], scenari
     const score = r.compositeScore !== undefined ? r.compositeScore.toFixed(1) : '--';
     const first = (r.summary.firstTokenMs.median / 1000).toFixed(2) + 's';
     const tot = (r.summary.totalMs.median / 1000).toFixed(2) + 's';
-    const tps = r.summary.outputTokensPerSec.median.toFixed(1);
+    const tps = r.throughputAvailable ? r.summary.outputTokensPerSec.median.toFixed(1) : '--';
     console.log([r.provider.padEnd(24), score.padEnd(8), first.padEnd(14), tot.padEnd(14), tps.padEnd(14), `${ok}/${total} OK`.padEnd(12)].join(' | '));
   }
   console.log('='.repeat(108));

From 2d92b4b4159d1070cc92399ca3e1978829c49df8 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Fri, 17 Apr 2026 02:05:12 +0000
Subject: [PATCH 6/6] feat: include model in AI gateway benchmark output

---
 .github/workflows/ai-gateway-benchmarks.yml | 5 +++++
 src/run.ts                                  | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/.github/workflows/ai-gateway-benchmarks.yml b/.github/workflows/ai-gateway-benchmarks.yml
index ccc8dc6..42a0594 100644
--- a/.github/workflows/ai-gateway-benchmarks.yml
+++ b/.github/workflows/ai-gateway-benchmarks.yml
@@ -118,7 +118,12 @@ jobs:
               if (results.length === 0) continue;
               hasResults = true;
 
+              const models = [...new Set(results.map(r => r.model).filter(Boolean))];
+
               body += `### ${scenario.replace('_', ' ').toUpperCase()}\n\n`;
+              if (models.length > 0) {
+                body += `Model${models.length > 1 ? 's' : ''}: ${models.map(m => `\`${m}\``).join(', ')}\n\n`;
+              }
               body += '| # | Provider | Score | First Token | Total | Tok/sec | Status |\n';
               body += '|---|----------|-------|-------------|-------|---------|--------|\n';
 
diff --git a/src/run.ts b/src/run.ts
index ff76320..416332e 100644
--- a/src/run.ts
+++ b/src/run.ts
@@ -244,7 +244,12 @@ async function runAIGateway(
 
   computeAIGatewayCompositeScores(results);
 
+  const models = Array.from(new Set(results.map(r => r.model).filter(Boolean)));
+
   console.log('\n--- AI Gateway Benchmark Results ---');
+  if (models.length > 0) {
+    console.log(`Model${models.length > 1 ? 's' : ''}: ${models.join(', ')}`);
+  }
   for (const r of results) {
     if (r.skipped) {
       console.log(`${r.provider}: SKIPPED (${r.skipReason})`);