From 944a0d87f601904d2c53e3063c79093f445a9eca Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 16 Apr 2026 22:25:49 +0000 Subject: [PATCH 1/6] feat: add AI gateway benchmark mode and CI workflow --- .github/workflows/ai-gateway-benchmarks.yml | 167 +++++++++++ package.json | 3 + src/ai-gateway/benchmark.ts | 301 ++++++++++++++++++++ src/ai-gateway/providers.ts | 29 ++ src/ai-gateway/scoring.ts | 66 +++++ src/ai-gateway/types.ts | 40 +++ src/merge-results.ts | 108 ++++++- src/run.ts | 81 +++++- 8 files changed, 790 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/ai-gateway-benchmarks.yml create mode 100644 src/ai-gateway/benchmark.ts create mode 100644 src/ai-gateway/providers.ts create mode 100644 src/ai-gateway/scoring.ts create mode 100644 src/ai-gateway/types.ts diff --git a/.github/workflows/ai-gateway-benchmarks.yml b/.github/workflows/ai-gateway-benchmarks.yml new file mode 100644 index 0000000..db4d455 --- /dev/null +++ b/.github/workflows/ai-gateway-benchmarks.yml @@ -0,0 +1,167 @@ +name: AI Gateway Benchmark + +on: + pull_request: + paths: + - 'src/ai-gateway/**' + - 'src/util/**' + - 'src/run.ts' + - 'src/merge-results.ts' + - 'package.json' + workflow_dispatch: + inputs: + iterations: + description: 'Iterations per provider' + required: false + default: '50' + +concurrency: + group: ai-gateway-benchmarks + cancel-in-progress: true + +permissions: + contents: read + pull-requests: write + +jobs: + bench: + name: Bench ${{ matrix.provider }} ${{ matrix.scenario }} + runs-on: namespace-profile-default + timeout-minutes: 60 + strategy: + fail-fast: false + matrix: + provider: + - openrouter + - vercel-ai-gateway + - cloudflare-ai-gateway + scenario: + - short-nonstream + - short-stream + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 24 + cache: 'npm' + - run: npm ci + - name: Clear stale results from checkout + run: rm -rf results/ai_gateway/ + - name: Run AI gateway benchmark + env: + AI_GATEWAY_MODEL: ${{ secrets.AI_GATEWAY_MODEL }} + OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + OPENROUTER_BASE_URL: ${{ secrets.OPENROUTER_BASE_URL }} + OPENROUTER_HTTP_REFERER: ${{ secrets.OPENROUTER_HTTP_REFERER }} + OPENROUTER_X_TITLE: ${{ secrets.OPENROUTER_X_TITLE }} + VERCEL_AI_GATEWAY_BASE_URL: ${{ secrets.VERCEL_AI_GATEWAY_BASE_URL }} + VERCEL_AI_GATEWAY_API_KEY: ${{ secrets.VERCEL_AI_GATEWAY_API_KEY }} + CLOUDFLARE_AI_GATEWAY_BASE_URL: ${{ secrets.CLOUDFLARE_AI_GATEWAY_BASE_URL }} + CLOUDFLARE_AI_GATEWAY_API_KEY: ${{ secrets.CLOUDFLARE_AI_GATEWAY_API_KEY }} + run: | + npm run bench -- \ + --mode ai-gateway \ + --provider ${{ matrix.provider }} \ + --ai-gateway-scenario ${{ matrix.scenario }} \ + --iterations ${{ github.event.inputs.iterations || '50' }} + - name: Upload results + if: always() + uses: actions/upload-artifact@v4 + with: + name: ai-gateway-results-${{ matrix.provider }}-${{ matrix.scenario }} + path: results/ai_gateway/ + if-no-files-found: ignore + retention-days: 7 + + collect: + name: Collect Results + runs-on: namespace-profile-default + needs: bench + if: always() + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 24 + cache: 'npm' + - run: npm ci + - name: Download all artifacts + uses: actions/download-artifact@v4 + with: + path: artifacts/ + pattern: ai-gateway-results-* + - name: Merge results + run: npx tsx src/merge-results.ts --input artifacts --mode ai-gateway + - name: Post results to PR + if: github.event_name == 'pull_request' + continue-on-error: true + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + const path = require('path'); + + const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; + const scenarios = ['short_nonstream', 'short_stream']; + let body = '## AI Gateway Benchmark Results\n\n'; + + let hasResults = false; + for (const scenario of scenarios) { + const latestPath = path.join('results', 'ai_gateway', scenario, 'latest.json'); + if (!fs.existsSync(latestPath)) continue; + + const data = JSON.parse(fs.readFileSync(latestPath, 'utf-8')); + const results = data.results + .filter(r => !r.skipped) + .sort((a, b) => (b.compositeScore || 0) - (a.compositeScore || 0)); + + if (results.length === 0) continue; + hasResults = true; + + body += `### ${scenario.replace('_', ' ').toUpperCase()}\n\n`; + body += '| # | Provider | Score | First Token | Total | Tok/sec | Status |\n'; + body += '|---|----------|-------|-------------|-------|---------|--------|\n'; + + results.forEach((r, i) => { + const score = r.compositeScore !== undefined ? r.compositeScore.toFixed(1) : '--'; + const first = (r.summary.firstTokenMs.median / 1000).toFixed(2) + 's'; + const total = (r.summary.totalMs.median / 1000).toFixed(2) + 's'; + const tps = r.summary.outputTokensPerSec.median.toFixed(1); + const ok = r.iterations.filter(it => !it.error).length; + const count = r.iterations.length; + body += `| ${i + 1} | ${r.provider} | ${score} | ${first} | ${total} | ${tps} | ${ok}/${count} |\n`; + }); + + body += '\n'; + } + + if (!hasResults) { + body += '> No AI gateway benchmark results were generated.\n\n'; + } + + body += `---\n*[View full run](${runUrl})*`; + + const marker = '## AI Gateway Benchmark Results'; + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + + const existing = comments.find(c => c.body.startsWith(marker)); + + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body, + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body, + }); + } diff --git a/package.json b/package.json index a3c95f5..2ee194e 100644 --- a/package.json +++ b/package.json @@ -23,6 +23,9 @@ "bench:sprites": "tsx src/run.ts --provider sprites", "bench:browser": "tsx src/run.ts --mode browser", "bench:browser:browserbase": "tsx src/run.ts --mode browser --provider browserbase", + "bench:ai-gateway": "tsx src/run.ts --mode ai-gateway", + "bench:ai-gateway:nonstream": "tsx src/run.ts --mode ai-gateway --ai-gateway-scenario short-nonstream", + "bench:ai-gateway:stream": "tsx src/run.ts --mode ai-gateway --ai-gateway-scenario short-stream", "bench:storage": "tsx src/run.ts --mode storage", "bench:storage:s3": "tsx src/run.ts --mode storage --provider aws-s3", "bench:storage:r2": "tsx src/run.ts --mode storage --provider cloudflare-r2", diff --git a/src/ai-gateway/benchmark.ts b/src/ai-gateway/benchmark.ts new file mode 100644 index 0000000..9fec610 --- /dev/null +++ b/src/ai-gateway/benchmark.ts @@ -0,0 +1,301 @@ +import { computeStats } from '../util/stats.js'; +import { withTimeout } from '../util/timeout.js'; +import type { + AIGatewayProviderConfig, + AIGatewayScenario, + AIGatewayTimingResult, + AIGatewayBenchmarkResult, +} from './types.js'; + +function round(n: number): number { + return Math.round(n * 100) / 100; +} + +const SCENARIO_PROMPTS: Record = { + 'short-nonstream': { + prompt: 'Reply with exactly: ok', + maxTokens: 16, + stream: false, + }, + 'short-stream': { + prompt: 'Write one short sentence about distributed systems.', + maxTokens: 64, + stream: true, + }, +}; + +function extractCompletionTokens(payload: any): number { + if (typeof payload?.usage?.completion_tokens === 'number') return payload.usage.completion_tokens; + if (typeof payload?.usage?.output_tokens === 'number') return payload.usage.output_tokens; + if (typeof payload?.completion_tokens === 'number') return payload.completion_tokens; + return 0; +} + +function normalizeBaseUrl(baseUrl: string): string { + return baseUrl.endsWith('/') ? baseUrl.slice(0, -1) : baseUrl; +} + +async function runNonStreamingIteration( + provider: AIGatewayProviderConfig, + timeout: number, + scenario: AIGatewayScenario, +): Promise { + const request = SCENARIO_PROMPTS[scenario]; + const start = performance.now(); + + try { + const response = await withTimeout(fetch(`${normalizeBaseUrl(provider.baseUrl)}/chat/completions`, { + method: 'POST', + headers: { + 'content-type': 'application/json', + authorization: `Bearer ${provider.apiKey}`, + ...provider.defaultHeaders, + }, + body: JSON.stringify({ + model: provider.model, + messages: [{ role: 'user', content: request.prompt }], + temperature: 0, + max_tokens: request.maxTokens, + stream: false, + }), + }), timeout, 'Gateway request timed out'); + + const totalMs = performance.now() - start; + const statusCode = response.status; + const bodyText = await response.text(); + + if (!response.ok) { + return { + firstTokenMs: 0, + totalMs, + outputTokens: 0, + outputTokensPerSec: 0, + statusCode, + error: `HTTP ${statusCode}: ${bodyText.slice(0, 200)}`, + }; + } + + const payload = JSON.parse(bodyText); + const outputTokens = extractCompletionTokens(payload); + const seconds = Math.max(totalMs / 1000, 0.001); + + return { + firstTokenMs: totalMs, + totalMs, + outputTokens, + outputTokensPerSec: outputTokens > 0 ? outputTokens / seconds : 0, + statusCode, + }; + } catch (err) { + return { + firstTokenMs: 0, + totalMs: performance.now() - start, + outputTokens: 0, + outputTokensPerSec: 0, + error: err instanceof Error ? err.message : String(err), + }; + } +} + +async function runStreamingIteration( + provider: AIGatewayProviderConfig, + timeout: number, + scenario: AIGatewayScenario, +): Promise { + const request = SCENARIO_PROMPTS[scenario]; + const start = performance.now(); + + try { + const response = await withTimeout(fetch(`${normalizeBaseUrl(provider.baseUrl)}/chat/completions`, { + method: 'POST', + headers: { + 'content-type': 'application/json', + authorization: `Bearer ${provider.apiKey}`, + ...provider.defaultHeaders, + }, + body: JSON.stringify({ + model: provider.model, + messages: [{ role: 'user', content: request.prompt }], + temperature: 0, + max_tokens: request.maxTokens, + stream: true, + }), + }), timeout, 'Gateway request timed out'); + + const statusCode = response.status; + if (!response.ok || !response.body) { + const errorText = await response.text().catch(() => ''); + return { + firstTokenMs: 0, + totalMs: performance.now() - start, + outputTokens: 0, + outputTokensPerSec: 0, + statusCode, + error: `HTTP ${statusCode}: ${errorText.slice(0, 200)}`, + }; + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let firstTokenMs = 0; + let outputTokens = 0; + let done = false; + + while (!done) { + const readResult = await withTimeout(reader.read(), timeout, 'Stream read timed out'); + done = readResult.done; + if (done) break; + const chunk = decoder.decode(readResult.value, { stream: true }); + + if (chunk.includes('data:') && firstTokenMs === 0) { + firstTokenMs = performance.now() - start; + } + + const lines = chunk.split('\n').filter(line => line.startsWith('data: ')); + for (const line of lines) { + const data = line.slice(6).trim(); + if (!data || data === '[DONE]') continue; + try { + const payload = JSON.parse(data); + const tokenText = payload?.choices?.[0]?.delta?.content; + if (typeof tokenText === 'string' && tokenText.length > 0) { + outputTokens += 1; + } + } catch { + // Ignore malformed partial SSE chunks. + } + } + } + + const totalMs = performance.now() - start; + const effectiveStart = firstTokenMs > 0 ? firstTokenMs : totalMs; + const generationSeconds = Math.max((totalMs - effectiveStart) / 1000, 0.001); + + return { + firstTokenMs: effectiveStart, + totalMs, + outputTokens, + outputTokensPerSec: outputTokens > 0 ? outputTokens / generationSeconds : 0, + statusCode, + }; + } catch (err) { + return { + firstTokenMs: 0, + totalMs: performance.now() - start, + outputTokens: 0, + outputTokensPerSec: 0, + error: err instanceof Error ? err.message : String(err), + }; + } +} + +export async function runAIGatewayBenchmark( + config: AIGatewayProviderConfig, + scenario: AIGatewayScenario, +): Promise { + const { name, requiredEnvVars, iterations = 100, timeout = 45_000 } = config; + const missingVars = requiredEnvVars.filter(v => !process.env[v]); + if (missingVars.length > 0) { + return { + provider: name, + mode: 'ai-gateway', + scenario, + model: config.model || process.env.AI_GATEWAY_MODEL || '', + iterations: [], + summary: { + firstTokenMs: { median: 0, p95: 0, p99: 0 }, + totalMs: { median: 0, p95: 0, p99: 0 }, + outputTokensPerSec: { median: 0, p95: 0, p99: 0 }, + }, + skipped: true, + skipReason: `Missing: ${missingVars.join(', ')}`, + }; + } + + const resolved: AIGatewayProviderConfig = { + ...config, + model: config.model || process.env.AI_GATEWAY_MODEL || '', + apiKey: config.apiKey || '', + baseUrl: config.baseUrl || '', + }; + + const results: AIGatewayTimingResult[] = []; + const isStreaming = SCENARIO_PROMPTS[scenario].stream; + + console.log(`\n--- AI Gateway Benchmarking: ${name} (${scenario}, ${iterations} iterations) ---`); + + for (let i = 0; i < iterations; i++) { + const run = isStreaming ? runStreamingIteration : runNonStreamingIteration; + const result = await run(resolved, timeout, scenario); + results.push(result); + + const status = result.error ? `FAILED: ${result.error}` : `${(result.totalMs / 1000).toFixed(2)}s`; + const first = result.firstTokenMs > 0 ? `${(result.firstTokenMs / 1000).toFixed(2)}s` : '--'; + console.log(` Iteration ${i + 1}/${iterations}: total ${status}, first ${first}`); + } + + const successful = results.filter(r => !r.error); + return { + provider: name, + mode: 'ai-gateway', + scenario, + model: resolved.model, + iterations: results, + summary: { + firstTokenMs: computeStats(successful.map(r => r.firstTokenMs).filter(v => v > 0)), + totalMs: computeStats(successful.map(r => r.totalMs)), + outputTokensPerSec: computeStats(successful.map(r => r.outputTokensPerSec)), + }, + }; +} + +function roundStats(s: { median: number; p95: number; p99: number }) { + return { median: round(s.median), p95: round(s.p95), p99: round(s.p99) }; +} + +export async function writeAIGatewayResultsJson(results: AIGatewayBenchmarkResult[], outPath: string): Promise { + const fs = await import('fs'); + const os = await import('os'); + + const cleanResults = results.map(r => ({ + provider: r.provider, + mode: r.mode, + scenario: r.scenario, + model: r.model, + iterations: r.iterations.map(i => ({ + firstTokenMs: round(i.firstTokenMs), + totalMs: round(i.totalMs), + outputTokens: i.outputTokens, + outputTokensPerSec: round(i.outputTokensPerSec), + ...(i.statusCode !== undefined ? { statusCode: i.statusCode } : {}), + ...(i.error ? { error: i.error } : {}), + })), + summary: { + firstTokenMs: roundStats(r.summary.firstTokenMs), + totalMs: roundStats(r.summary.totalMs), + outputTokensPerSec: roundStats(r.summary.outputTokensPerSec), + }, + ...(r.compositeScore !== undefined ? { compositeScore: round(r.compositeScore) } : {}), + ...(r.successRate !== undefined ? { successRate: round(r.successRate) } : {}), + ...(r.skipped ? { skipped: r.skipped, skipReason: r.skipReason } : {}), + })); + + const output = { + version: '1.0', + timestamp: new Date().toISOString(), + environment: { + node: process.version, + platform: os.platform(), + arch: os.arch(), + }, + config: { + iterations: results[0]?.iterations.length || 0, + timeoutMs: 45000, + scenario: results[0]?.scenario || null, + }, + results: cleanResults, + }; + + fs.writeFileSync(outPath, JSON.stringify(output, null, 2)); + console.log(`Results written to ${outPath}`); +} diff --git a/src/ai-gateway/providers.ts b/src/ai-gateway/providers.ts new file mode 100644 index 0000000..537b7bd --- /dev/null +++ b/src/ai-gateway/providers.ts @@ -0,0 +1,29 @@ +import type { AIGatewayProviderConfig } from './types.js'; + +export const aiGatewayProviders: AIGatewayProviderConfig[] = [ + { + name: 'openrouter', + requiredEnvVars: ['OPENROUTER_API_KEY', 'AI_GATEWAY_MODEL'], + baseUrl: process.env.OPENROUTER_BASE_URL || 'https://openrouter.ai/api/v1', + apiKey: process.env.OPENROUTER_API_KEY || '', + model: process.env.AI_GATEWAY_MODEL || '', + defaultHeaders: { + ...(process.env.OPENROUTER_HTTP_REFERER ? { 'HTTP-Referer': process.env.OPENROUTER_HTTP_REFERER } : {}), + ...(process.env.OPENROUTER_X_TITLE ? { 'X-Title': process.env.OPENROUTER_X_TITLE } : {}), + }, + }, + { + name: 'vercel-ai-gateway', + requiredEnvVars: ['VERCEL_AI_GATEWAY_BASE_URL', 'VERCEL_AI_GATEWAY_API_KEY', 'AI_GATEWAY_MODEL'], + baseUrl: process.env.VERCEL_AI_GATEWAY_BASE_URL || '', + apiKey: process.env.VERCEL_AI_GATEWAY_API_KEY || '', + model: process.env.AI_GATEWAY_MODEL || '', + }, + { + name: 'cloudflare-ai-gateway', + requiredEnvVars: ['CLOUDFLARE_AI_GATEWAY_BASE_URL', 'CLOUDFLARE_AI_GATEWAY_API_KEY', 'AI_GATEWAY_MODEL'], + baseUrl: process.env.CLOUDFLARE_AI_GATEWAY_BASE_URL || '', + apiKey: process.env.CLOUDFLARE_AI_GATEWAY_API_KEY || '', + model: process.env.AI_GATEWAY_MODEL || '', + }, +]; diff --git a/src/ai-gateway/scoring.ts b/src/ai-gateway/scoring.ts new file mode 100644 index 0000000..9cd7bbb --- /dev/null +++ b/src/ai-gateway/scoring.ts @@ -0,0 +1,66 @@ +import type { AIGatewayBenchmarkResult } from './types.js'; + +export interface AIGatewayScoringWeights { + totalMedian: number; + totalP95: number; + totalP99: number; + firstTokenMedian: number; +} + +export const DEFAULT_AI_GATEWAY_WEIGHTS: AIGatewayScoringWeights = { + totalMedian: 0.35, + totalP95: 0.20, + totalP99: 0.10, + firstTokenMedian: 0.35, +}; + +const LATENCY_CEILING_MS = 60000; + +function scoreLatency(valueMs: number): number { + return Math.max(0, 100 * (1 - valueMs / LATENCY_CEILING_MS)); +} + +export function computeAIGatewaySuccessRate(result: AIGatewayBenchmarkResult): number { + if (result.skipped || result.iterations.length === 0) return 0; + const successful = result.iterations.filter(i => !i.error).length; + return successful / result.iterations.length; +} + +function computeAIGatewayScore( + result: AIGatewayBenchmarkResult, + weights: AIGatewayScoringWeights = DEFAULT_AI_GATEWAY_WEIGHTS, +): number { + return ( + weights.totalMedian * scoreLatency(result.summary.totalMs.median) + + weights.totalP95 * scoreLatency(result.summary.totalMs.p95) + + weights.totalP99 * scoreLatency(result.summary.totalMs.p99) + + weights.firstTokenMedian * scoreLatency(result.summary.firstTokenMs.median) + ); +} + +export function computeAIGatewayCompositeScores( + results: AIGatewayBenchmarkResult[], + weights: AIGatewayScoringWeights = DEFAULT_AI_GATEWAY_WEIGHTS, +): void { + for (const result of results) { + const successRate = computeAIGatewaySuccessRate(result); + result.successRate = successRate; + + if (result.skipped || successRate === 0) { + result.compositeScore = 0; + continue; + } + + const gatewayScore = computeAIGatewayScore(result, weights); + result.compositeScore = Math.round(gatewayScore * successRate * 100) / 100; + } +} + +export function sortAIGatewayByCompositeScore(results: AIGatewayBenchmarkResult[]): AIGatewayBenchmarkResult[] { + return [...results].sort((a, b) => { + if (a.skipped && !b.skipped) return 1; + if (!a.skipped && b.skipped) return -1; + if (a.skipped && b.skipped) return 0; + return (b.compositeScore ?? 0) - (a.compositeScore ?? 0); + }); +} diff --git a/src/ai-gateway/types.ts b/src/ai-gateway/types.ts new file mode 100644 index 0000000..937b807 --- /dev/null +++ b/src/ai-gateway/types.ts @@ -0,0 +1,40 @@ +export type AIGatewayScenario = 'short-nonstream' | 'short-stream'; + +export interface AIGatewayProviderConfig { + name: string; + requiredEnvVars: string[]; + baseUrl: string; + apiKey: string; + model: string; + defaultHeaders?: Record; + timeout?: number; + iterations?: number; +} + +export interface AIGatewayTimingResult { + firstTokenMs: number; + totalMs: number; + outputTokens: number; + outputTokensPerSec: number; + statusCode?: number; + error?: string; +} + +export interface AIGatewayStats { + firstTokenMs: { median: number; p95: number; p99: number }; + totalMs: { median: number; p95: number; p99: number }; + outputTokensPerSec: { median: number; p95: number; p99: number }; +} + +export interface AIGatewayBenchmarkResult { + provider: string; + mode: 'ai-gateway'; + scenario: AIGatewayScenario; + model: string; + iterations: AIGatewayTimingResult[]; + summary: AIGatewayStats; + compositeScore?: number; + successRate?: number; + skipped?: boolean; + skipReason?: string; +} diff --git a/src/merge-results.ts b/src/merge-results.ts index a75d651..e75d1c8 100644 --- a/src/merge-results.ts +++ b/src/merge-results.ts @@ -1,7 +1,7 @@ /** * Merge per-provider benchmark results into combined result files. * - * Usage: tsx src/merge-results.ts --input [--mode storage|browser] + * Usage: tsx src/merge-results.ts --input [--mode storage|browser|ai-gateway] * * By default, merges sandbox benchmark results: reads latest.json files from * the input directory, groups by mode (sequential/staggered/burst), computes @@ -14,6 +14,10 @@ * With --mode browser, merges browser benchmark results: deduplicates by * provider, computes browser-specific composite scores, and writes combined * files to results/browser/latest.json. + * + * With --mode ai-gateway, merges AI gateway benchmark results grouped by + * scenario, computes gateway-specific composite scores, and writes combined + * files to results/ai_gateway//latest.json. */ import fs from 'fs'; import path from 'path'; @@ -21,10 +25,12 @@ import { fileURLToPath } from 'url'; import { computeCompositeScores } from './sandbox/scoring.js'; import { computeStorageCompositeScores, sortStorageByCompositeScore } from './storage/scoring.js'; import { computeBrowserCompositeScores, sortBrowserByCompositeScore } from './browser/scoring.js'; +import { computeAIGatewayCompositeScores, sortAIGatewayByCompositeScore } from './ai-gateway/scoring.js'; import { printResultsTable, writeResultsJson } from './sandbox/table.js'; import type { BenchmarkResult } from './sandbox/types.js'; import type { StorageBenchmarkResult } from './storage/types.js'; import type { BrowserBenchmarkResult } from './browser/types.js'; +import type { AIGatewayBenchmarkResult } from './ai-gateway/types.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); const ROOT = path.resolve(__dirname, '..'); @@ -38,7 +44,7 @@ function getArgValue(flag: string): string | undefined { const inputDir = getArgValue('--input'); const mergeMode = getArgValue('--mode'); if (!inputDir) { - console.error('Usage: tsx src/merge-results.ts --input [--mode storage|browser]'); + console.error('Usage: tsx src/merge-results.ts --input [--mode storage|browser|ai-gateway]'); process.exit(1); } @@ -307,6 +313,35 @@ function printBrowserResultsTable(results: BrowserBenchmarkResult[]): void { console.log('='.repeat(110)); } +function printAIGatewayResultsTable(results: AIGatewayBenchmarkResult[], scenario: string): void { + const sorted = sortAIGatewayByCompositeScore(results); + + console.log(`\n${'='.repeat(108)}`); + console.log(` AI GATEWAY BENCHMARK RESULTS - ${scenario.toUpperCase()}`); + console.log('='.repeat(108)); + console.log( + ['Provider', 'Score', 'First Token', 'Total', 'Tok/sec', 'Status'] + .map((h, i) => h.padEnd([24, 8, 14, 14, 14, 12][i])) + .join(' | ') + ); + console.log([24, 8, 14, 14, 14, 12].map(w => '-'.repeat(w)).join('-+-')); + + for (const r of sorted) { + if (r.skipped) { + console.log([r.provider.padEnd(24), '--'.padEnd(8), '--'.padEnd(14), '--'.padEnd(14), '--'.padEnd(14), 'SKIPPED'.padEnd(12)].join(' | ')); + continue; + } + const ok = r.iterations.filter(i => !i.error).length; + const total = r.iterations.length; + const score = r.compositeScore !== undefined ? r.compositeScore.toFixed(1) : '--'; + const first = (r.summary.firstTokenMs.median / 1000).toFixed(2) + 's'; + const tot = (r.summary.totalMs.median / 1000).toFixed(2) + 's'; + const tps = r.summary.outputTokensPerSec.median.toFixed(1); + console.log([r.provider.padEnd(24), score.padEnd(8), first.padEnd(14), tot.padEnd(14), tps.padEnd(14), `${ok}/${total} OK`.padEnd(12)].join(' | ')); + } + console.log('='.repeat(108)); +} + /** * Merge browser benchmark results. */ @@ -366,7 +401,74 @@ async function mainBrowser() { console.log(`Copied latest: ${latestPath}`); } -const runner = mergeMode === 'storage' ? mainStorage : mergeMode === 'browser' ? mainBrowser : main; +async function mainAIGateway() { + const jsonFiles: string[] = []; + function walk(dir: string) { + if (!fs.existsSync(dir)) return; + for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { + const full = path.join(dir, entry.name); + if (entry.isDirectory()) walk(full); + else if (entry.name === 'latest.json') jsonFiles.push(full); + } + } + walk(inputDir!); + + if (jsonFiles.length === 0) { + console.error(`No latest.json files found in ${inputDir}`); + process.exit(1); + } + + console.log(`Found ${jsonFiles.length} result files`); + + const byScenario: Record = {}; + + for (const file of jsonFiles) { + const raw = JSON.parse(fs.readFileSync(file, 'utf-8')) as { results: AIGatewayBenchmarkResult[] }; + const fromSingleProvider = raw.results.length === 1; + const scenario = path.basename(path.dirname(file)); + + if (!byScenario[scenario]) byScenario[scenario] = { results: [] }; + for (const result of raw.results) { + byScenario[scenario].results.push({ result, fromSingleProvider }); + } + } + + for (const [scenario, { results }] of Object.entries(byScenario)) { + const seen = new Map(); + for (const entry of results) { + const existing = seen.get(entry.result.provider); + if (!existing || (entry.fromSingleProvider && !existing.fromSingleProvider)) { + seen.set(entry.result.provider, entry); + } + } + + const deduped = Array.from(seen.values()).map(e => e.result); + console.log(`\nMerging ${deduped.length} provider results for mode: ai-gateway/${scenario}`); + + computeAIGatewayCompositeScores(deduped); + printAIGatewayResultsTable(deduped, scenario); + + const { writeAIGatewayResultsJson } = await import('./ai-gateway/benchmark.js'); + const timestamp = new Date().toISOString().slice(0, 10); + const resultsDir = path.resolve(ROOT, `results/ai_gateway/${scenario}`); + fs.mkdirSync(resultsDir, { recursive: true }); + + const outPath = path.join(resultsDir, `${timestamp}.json`); + await writeAIGatewayResultsJson(deduped, outPath); + + const latestPath = path.join(resultsDir, 'latest.json'); + fs.copyFileSync(outPath, latestPath); + console.log(`Copied latest: ${latestPath}`); + } +} + +const runner = mergeMode === 'storage' + ? mainStorage + : mergeMode === 'browser' + ? mainBrowser + : mergeMode === 'ai-gateway' + ? mainAIGateway + : main; runner().catch(err => { console.error('Merge failed:', err); process.exit(1); diff --git a/src/run.ts b/src/run.ts index fdd5962..ff76320 100644 --- a/src/run.ts +++ b/src/run.ts @@ -9,16 +9,20 @@ import { runConcurrentBenchmark } from './sandbox/concurrent.js'; import { runStaggeredBenchmark } from './sandbox/staggered.js'; import { runStorageBenchmark, writeStorageResultsJson } from './storage/benchmark.js'; import { runBrowserBenchmark, writeBrowserResultsJson } from './browser/benchmark.js'; +import { runAIGatewayBenchmark, writeAIGatewayResultsJson } from './ai-gateway/benchmark.js'; import { printResultsTable, writeResultsJson } from './sandbox/table.js'; import { providers } from './sandbox/providers.js'; import { storageProviders } from './storage/providers.js'; import { browserProviders } from './browser/providers.js'; +import { aiGatewayProviders } from './ai-gateway/providers.js'; import { computeCompositeScores } from './sandbox/scoring.js'; import { computeStorageCompositeScores } from './storage/scoring.js'; import { computeBrowserCompositeScores } from './browser/scoring.js'; +import { computeAIGatewayCompositeScores } from './ai-gateway/scoring.js'; import type { BenchmarkResult, BenchmarkMode } from './sandbox/types.js'; import type { StorageBenchmarkResult } from './storage/types.js'; import type { BrowserBenchmarkResult } from './browser/types.js'; +import type { AIGatewayBenchmarkResult, AIGatewayScenario } from './ai-gateway/types.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); @@ -31,6 +35,7 @@ const concurrency = parseInt(getArgValue(args, '--concurrency') || '100', 10); const storageConcurrency = parseInt(getArgValue(args, '--storage-concurrency') || '1', 10); const staggerDelay = parseInt(getArgValue(args, '--stagger-delay') || '200', 10); const fileSizeArg = getArgValue(args, '--file-size') || '10MB'; +const aiGatewayScenario = (getArgValue(args, '--ai-gateway-scenario') || 'short-nonstream') as AIGatewayScenario; function getArgValue(args: string[], flag: string): string | undefined { const idx = args.indexOf(flag); @@ -38,22 +43,24 @@ function getArgValue(args: string[], flag: string): string | undefined { } /** Resolve which modes to run */ -function getModesToRun(): BenchmarkMode[] | ['storage'] | ['browser'] { +function getModesToRun(): BenchmarkMode[] | ['storage'] | ['browser'] | ['ai-gateway'] { if (!rawMode) return ['sequential', 'staggered', 'burst']; if (rawMode === 'storage') return ['storage']; if (rawMode === 'browser') return ['browser']; + if (rawMode === 'ai-gateway') return ['ai-gateway']; const m = rawMode === 'concurrent' ? 'burst' : rawMode as BenchmarkMode; return [m]; } /** Map mode to results subdirectory name */ -function modeToDir(m: BenchmarkMode | 'storage'): string { +function modeToDir(m: BenchmarkMode | 'storage' | 'ai-gateway'): string { switch (m) { case 'sequential': return 'sequential_tti'; case 'staggered': return 'staggered_tti'; case 'burst': case 'concurrent': return 'burst_tti'; case 'storage': return 'storage'; + case 'ai-gateway': return 'ai_gateway'; default: return `${m}_tti`; } } @@ -218,6 +225,51 @@ async function runBrowser(toRun: typeof browserProviders): Promise { console.log(`Copied latest: ${latestPath}`); } +async function runAIGateway( + toRun: typeof aiGatewayProviders, + scenario: AIGatewayScenario, +): Promise { + console.log('\n' + '='.repeat(70)); + console.log(' MODE: AI GATEWAY'); + console.log(` Scenario: ${scenario}`); + console.log(` Iterations per provider: ${iterations}`); + console.log('='.repeat(70)); + + const results: AIGatewayBenchmarkResult[] = []; + + for (const providerConfig of toRun) { + const result = await runAIGatewayBenchmark({ ...providerConfig, iterations }, scenario); + results.push(result); + } + + computeAIGatewayCompositeScores(results); + + console.log('\n--- AI Gateway Benchmark Results ---'); + for (const r of results) { + if (r.skipped) { + console.log(`${r.provider}: SKIPPED (${r.skipReason})`); + continue; + } + const ok = r.iterations.filter(i => !i.error).length; + const total = r.iterations.length; + console.log(`${r.provider}:`); + console.log(` Total: ${(r.summary.totalMs.median / 1000).toFixed(2)}s (median), First token: ${(r.summary.firstTokenMs.median / 1000).toFixed(2)}s`); + console.log(` Score: ${r.compositeScore?.toFixed(1) || '--'} (${ok}/${total} OK)`); + } + + const timestamp = new Date().toISOString().slice(0, 10); + const subDir = modeToDir('ai-gateway'); + const scenarioDir = path.resolve(__dirname, `../results/${subDir}/${scenario.replace(/-/g, '_')}`); + fs.mkdirSync(scenarioDir, { recursive: true }); + + const outPath = path.join(scenarioDir, `${timestamp}.json`); + await writeAIGatewayResultsJson(results, outPath); + + const latestPath = path.join(scenarioDir, 'latest.json'); + fs.copyFileSync(outPath, latestPath); + console.log(`Copied latest: ${latestPath}`); +} + async function main() { const modes = getModesToRun(); @@ -246,6 +298,31 @@ async function main() { return; } + // Handle AI gateway mode separately + if (modes[0] === 'ai-gateway') { + console.log('ComputeSDK AI Gateway Benchmarks'); + console.log(`Scenario: ${aiGatewayScenario}`); + console.log(`Date: ${new Date().toISOString()}\n`); + + const toRun = providerFilter + ? aiGatewayProviders.filter(p => p.name === providerFilter) + : aiGatewayProviders; + + if (toRun.length === 0) { + if (providerFilter) { + console.error(`Unknown AI gateway provider: ${providerFilter}`); + console.error(`Available: ${aiGatewayProviders.map(p => p.name).join(', ')}`); + } else { + console.error('No AI gateway providers configured. Add entries to src/ai-gateway/providers.ts.'); + } + process.exit(1); + } + + await runAIGateway(toRun, aiGatewayScenario); + console.log('\nAll AI gateway tests complete.'); + return; + } + // Handle storage mode separately if (modes[0] === 'storage') { console.log('ComputeSDK Storage Provider Benchmarks'); From a7289b4f3d1df392a72716ce8989dd765e13a99e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 17 Apr 2026 01:25:21 +0000 Subject: [PATCH 2/6] fix: use Cloudflare-compatible max completion token param --- src/ai-gateway/benchmark.ts | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/src/ai-gateway/benchmark.ts b/src/ai-gateway/benchmark.ts index 9fec610..fc3157d 100644 --- a/src/ai-gateway/benchmark.ts +++ b/src/ai-gateway/benchmark.ts @@ -35,6 +35,26 @@ function normalizeBaseUrl(baseUrl: string): string { return baseUrl.endsWith('/') ? baseUrl.slice(0, -1) : baseUrl; } +function buildCompletionBody( + provider: AIGatewayProviderConfig, + request: { prompt: string; maxTokens: number; stream: boolean }, +) { + const body: Record = { + model: provider.model, + messages: [{ role: 'user', content: request.prompt }], + temperature: 0, + stream: request.stream, + }; + + if (provider.name === 'cloudflare-ai-gateway') { + body.max_completion_tokens = request.maxTokens; + } else { + body.max_tokens = request.maxTokens; + } + + return body; +} + async function runNonStreamingIteration( provider: AIGatewayProviderConfig, timeout: number, @@ -51,13 +71,7 @@ async function runNonStreamingIteration( authorization: `Bearer ${provider.apiKey}`, ...provider.defaultHeaders, }, - body: JSON.stringify({ - model: provider.model, - messages: [{ role: 'user', content: request.prompt }], - temperature: 0, - max_tokens: request.maxTokens, - stream: false, - }), + body: JSON.stringify(buildCompletionBody(provider, { ...request, stream: false })), }), timeout, 'Gateway request timed out'); const totalMs = performance.now() - start; @@ -113,13 +127,7 @@ async function runStreamingIteration( authorization: `Bearer ${provider.apiKey}`, ...provider.defaultHeaders, }, - body: JSON.stringify({ - model: provider.model, - messages: [{ role: 'user', content: request.prompt }], - temperature: 0, - max_tokens: request.maxTokens, - stream: true, - }), + body: JSON.stringify(buildCompletionBody(provider, { ...request, stream: true })), }), timeout, 'Gateway request timed out'); const statusCode = response.status; From bf4d241daecb151cb73eb78f1f86f0c74d37afe0 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 17 Apr 2026 01:28:11 +0000 Subject: [PATCH 3/6] refactor: standardize AI gateway token limit field --- src/ai-gateway/benchmark.ts | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/ai-gateway/benchmark.ts b/src/ai-gateway/benchmark.ts index fc3157d..fe8a71e 100644 --- a/src/ai-gateway/benchmark.ts +++ b/src/ai-gateway/benchmark.ts @@ -44,14 +44,9 @@ function buildCompletionBody( messages: [{ role: 'user', content: request.prompt }], temperature: 0, stream: request.stream, + max_completion_tokens: request.maxTokens, }; - if (provider.name === 'cloudflare-ai-gateway') { - body.max_completion_tokens = request.maxTokens; - } else { - body.max_tokens = request.maxTokens; - } - return body; } From 86e754d4aeeb1976759add1648ebcfd65ab642d1 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 17 Apr 2026 01:32:14 +0000 Subject: [PATCH 4/6] fix: scope ai gateway workflow concurrency by event and ref --- .github/workflows/ai-gateway-benchmarks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ai-gateway-benchmarks.yml b/.github/workflows/ai-gateway-benchmarks.yml index db4d455..7c86680 100644 --- a/.github/workflows/ai-gateway-benchmarks.yml +++ b/.github/workflows/ai-gateway-benchmarks.yml @@ -16,7 +16,7 @@ on: default: '50' concurrency: - group: ai-gateway-benchmarks + group: ai-gateway-benchmarks-${{ github.event_name }}-${{ github.ref }} cancel-in-progress: true permissions: From 250fce84c612a23145040b48e3f412945bb661fd Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 17 Apr 2026 01:42:54 +0000 Subject: [PATCH 5/6] fix: compute streaming throughput from usage tokens --- .github/workflows/ai-gateway-benchmarks.yml | 2 +- src/ai-gateway/benchmark.ts | 44 +++++++++++++-------- src/ai-gateway/types.ts | 3 +- src/merge-results.ts | 2 +- 4 files changed, 32 insertions(+), 19 deletions(-) diff --git a/.github/workflows/ai-gateway-benchmarks.yml b/.github/workflows/ai-gateway-benchmarks.yml index 7c86680..ccc8dc6 100644 --- a/.github/workflows/ai-gateway-benchmarks.yml +++ b/.github/workflows/ai-gateway-benchmarks.yml @@ -126,7 +126,7 @@ jobs: const score = r.compositeScore !== undefined ? r.compositeScore.toFixed(1) : '--'; const first = (r.summary.firstTokenMs.median / 1000).toFixed(2) + 's'; const total = (r.summary.totalMs.median / 1000).toFixed(2) + 's'; - const tps = r.summary.outputTokensPerSec.median.toFixed(1); + const tps = r.throughputAvailable ? r.summary.outputTokensPerSec.median.toFixed(1) : '--'; const ok = r.iterations.filter(it => !it.error).length; const count = r.iterations.length; body += `| ${i + 1} | ${r.provider} | ${score} | ${first} | ${total} | ${tps} | ${ok}/${count} |\n`; diff --git a/src/ai-gateway/benchmark.ts b/src/ai-gateway/benchmark.ts index fe8a71e..06a6b41 100644 --- a/src/ai-gateway/benchmark.ts +++ b/src/ai-gateway/benchmark.ts @@ -47,6 +47,10 @@ function buildCompletionBody( max_completion_tokens: request.maxTokens, }; + if (request.stream) { + body.stream_options = { include_usage: true }; + } + return body; } @@ -78,7 +82,7 @@ async function runNonStreamingIteration( firstTokenMs: 0, totalMs, outputTokens: 0, - outputTokensPerSec: 0, + outputTokensPerSec: undefined, statusCode, error: `HTTP ${statusCode}: ${bodyText.slice(0, 200)}`, }; @@ -92,7 +96,7 @@ async function runNonStreamingIteration( firstTokenMs: totalMs, totalMs, outputTokens, - outputTokensPerSec: outputTokens > 0 ? outputTokens / seconds : 0, + outputTokensPerSec: outputTokens > 0 ? outputTokens / seconds : undefined, statusCode, }; } catch (err) { @@ -100,7 +104,7 @@ async function runNonStreamingIteration( firstTokenMs: 0, totalMs: performance.now() - start, outputTokens: 0, - outputTokensPerSec: 0, + outputTokensPerSec: undefined, error: err instanceof Error ? err.message : String(err), }; } @@ -132,7 +136,7 @@ async function runStreamingIteration( firstTokenMs: 0, totalMs: performance.now() - start, outputTokens: 0, - outputTokensPerSec: 0, + outputTokensPerSec: undefined, statusCode, error: `HTTP ${statusCode}: ${errorText.slice(0, 200)}`, }; @@ -141,7 +145,7 @@ async function runStreamingIteration( const reader = response.body.getReader(); const decoder = new TextDecoder(); let firstTokenMs = 0; - let outputTokens = 0; + let outputTokens: number | undefined; let done = false; while (!done) { @@ -150,19 +154,20 @@ async function runStreamingIteration( if (done) break; const chunk = decoder.decode(readResult.value, { stream: true }); - if (chunk.includes('data:') && firstTokenMs === 0) { - firstTokenMs = performance.now() - start; - } - const lines = chunk.split('\n').filter(line => line.startsWith('data: ')); for (const line of lines) { const data = line.slice(6).trim(); if (!data || data === '[DONE]') continue; try { const payload = JSON.parse(data); + const usageTokens = extractCompletionTokens(payload); + if (usageTokens > 0) { + outputTokens = usageTokens; + } + const tokenText = payload?.choices?.[0]?.delta?.content; - if (typeof tokenText === 'string' && tokenText.length > 0) { - outputTokens += 1; + if (firstTokenMs === 0 && typeof tokenText === 'string' && tokenText.length > 0) { + firstTokenMs = performance.now() - start; } } catch { // Ignore malformed partial SSE chunks. @@ -177,8 +182,8 @@ async function runStreamingIteration( return { firstTokenMs: effectiveStart, totalMs, - outputTokens, - outputTokensPerSec: outputTokens > 0 ? outputTokens / generationSeconds : 0, + outputTokens: outputTokens ?? 0, + outputTokensPerSec: outputTokens && outputTokens > 0 ? outputTokens / generationSeconds : undefined, statusCode, }; } catch (err) { @@ -186,7 +191,7 @@ async function runStreamingIteration( firstTokenMs: 0, totalMs: performance.now() - start, outputTokens: 0, - outputTokensPerSec: 0, + outputTokensPerSec: undefined, error: err instanceof Error ? err.message : String(err), }; } @@ -210,6 +215,7 @@ export async function runAIGatewayBenchmark( totalMs: { median: 0, p95: 0, p99: 0 }, outputTokensPerSec: { median: 0, p95: 0, p99: 0 }, }, + throughputAvailable: false, skipped: true, skipReason: `Missing: ${missingVars.join(', ')}`, }; @@ -238,6 +244,10 @@ export async function runAIGatewayBenchmark( } const successful = results.filter(r => !r.error); + const throughputValues = successful + .map(r => r.outputTokensPerSec) + .filter((v): v is number => typeof v === 'number' && Number.isFinite(v)); + return { provider: name, mode: 'ai-gateway', @@ -247,8 +257,9 @@ export async function runAIGatewayBenchmark( summary: { firstTokenMs: computeStats(successful.map(r => r.firstTokenMs).filter(v => v > 0)), totalMs: computeStats(successful.map(r => r.totalMs)), - outputTokensPerSec: computeStats(successful.map(r => r.outputTokensPerSec)), + outputTokensPerSec: computeStats(throughputValues), }, + throughputAvailable: throughputValues.length > 0, }; } @@ -269,7 +280,7 @@ export async function writeAIGatewayResultsJson(results: AIGatewayBenchmarkResul firstTokenMs: round(i.firstTokenMs), totalMs: round(i.totalMs), outputTokens: i.outputTokens, - outputTokensPerSec: round(i.outputTokensPerSec), + ...(i.outputTokensPerSec !== undefined ? { outputTokensPerSec: round(i.outputTokensPerSec) } : {}), ...(i.statusCode !== undefined ? { statusCode: i.statusCode } : {}), ...(i.error ? { error: i.error } : {}), })), @@ -278,6 +289,7 @@ export async function writeAIGatewayResultsJson(results: AIGatewayBenchmarkResul totalMs: roundStats(r.summary.totalMs), outputTokensPerSec: roundStats(r.summary.outputTokensPerSec), }, + ...(r.throughputAvailable !== undefined ? { throughputAvailable: r.throughputAvailable } : {}), ...(r.compositeScore !== undefined ? { compositeScore: round(r.compositeScore) } : {}), ...(r.successRate !== undefined ? { successRate: round(r.successRate) } : {}), ...(r.skipped ? { skipped: r.skipped, skipReason: r.skipReason } : {}), diff --git a/src/ai-gateway/types.ts b/src/ai-gateway/types.ts index 937b807..c65c573 100644 --- a/src/ai-gateway/types.ts +++ b/src/ai-gateway/types.ts @@ -15,7 +15,7 @@ export interface AIGatewayTimingResult { firstTokenMs: number; totalMs: number; outputTokens: number; - outputTokensPerSec: number; + outputTokensPerSec?: number; statusCode?: number; error?: string; } @@ -33,6 +33,7 @@ export interface AIGatewayBenchmarkResult { model: string; iterations: AIGatewayTimingResult[]; summary: AIGatewayStats; + throughputAvailable?: boolean; compositeScore?: number; successRate?: number; skipped?: boolean; diff --git a/src/merge-results.ts b/src/merge-results.ts index e75d1c8..26dbc5f 100644 --- a/src/merge-results.ts +++ b/src/merge-results.ts @@ -336,7 +336,7 @@ function printAIGatewayResultsTable(results: AIGatewayBenchmarkResult[], scenari const score = r.compositeScore !== undefined ? r.compositeScore.toFixed(1) : '--'; const first = (r.summary.firstTokenMs.median / 1000).toFixed(2) + 's'; const tot = (r.summary.totalMs.median / 1000).toFixed(2) + 's'; - const tps = r.summary.outputTokensPerSec.median.toFixed(1); + const tps = r.throughputAvailable ? r.summary.outputTokensPerSec.median.toFixed(1) : '--'; console.log([r.provider.padEnd(24), score.padEnd(8), first.padEnd(14), tot.padEnd(14), tps.padEnd(14), `${ok}/${total} OK`.padEnd(12)].join(' | ')); } console.log('='.repeat(108)); From 2d92b4b4159d1070cc92399ca3e1978829c49df8 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 17 Apr 2026 02:05:12 +0000 Subject: [PATCH 6/6] feat: include model in AI gateway benchmark output --- .github/workflows/ai-gateway-benchmarks.yml | 5 +++++ src/run.ts | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/.github/workflows/ai-gateway-benchmarks.yml b/.github/workflows/ai-gateway-benchmarks.yml index ccc8dc6..42a0594 100644 --- a/.github/workflows/ai-gateway-benchmarks.yml +++ b/.github/workflows/ai-gateway-benchmarks.yml @@ -118,7 +118,12 @@ jobs: if (results.length === 0) continue; hasResults = true; + const models = [...new Set(results.map(r => r.model).filter(Boolean))]; + body += `### ${scenario.replace('_', ' ').toUpperCase()}\n\n`; + if (models.length > 0) { + body += `Model${models.length > 1 ? 's' : ''}: ${models.map(m => `\`${m}\``).join(', ')}\n\n`; + } body += '| # | Provider | Score | First Token | Total | Tok/sec | Status |\n'; body += '|---|----------|-------|-------------|-------|---------|--------|\n'; diff --git a/src/run.ts b/src/run.ts index ff76320..416332e 100644 --- a/src/run.ts +++ b/src/run.ts @@ -244,7 +244,12 @@ async function runAIGateway( computeAIGatewayCompositeScores(results); + const models = Array.from(new Set(results.map(r => r.model).filter(Boolean))); + console.log('\n--- AI Gateway Benchmark Results ---'); + if (models.length > 0) { + console.log(`Model${models.length > 1 ? 's' : ''}: ${models.join(', ')}`); + } for (const r of results) { if (r.skipped) { console.log(`${r.provider}: SKIPPED (${r.skipReason})`);