Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 36 additions & 16 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,16 @@ jobs:
- name: Check for changes
id: changes
run: |
if git diff --quiet HEAD -- generated/BUILD-BENCHMARKS.md README.md; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
CHANGED=false
# Detect modified tracked files
if ! git diff --quiet HEAD -- generated/BUILD-BENCHMARKS.md README.md 2>/dev/null; then
CHANGED=true
fi
# Detect newly created (untracked) files
if [ -n "$(git ls-files --others --exclude-standard generated/BUILD-BENCHMARKS.md)" ]; then
CHANGED=true
fi
echo "changed=$CHANGED" >> "$GITHUB_OUTPUT"

- name: Commit and push via PR
if: steps.changes.outputs.changed == 'true'
Expand Down Expand Up @@ -212,11 +217,16 @@ jobs:
- name: Check for changes
id: changes
run: |
if git diff --quiet HEAD -- generated/EMBEDDING-BENCHMARKS.md; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
CHANGED=false
# Detect modified tracked files
if ! git diff --quiet HEAD -- generated/EMBEDDING-BENCHMARKS.md 2>/dev/null; then
CHANGED=true
fi
# Detect newly created (untracked) files
if [ -n "$(git ls-files --others --exclude-standard generated/EMBEDDING-BENCHMARKS.md)" ]; then
CHANGED=true
fi
echo "changed=$CHANGED" >> "$GITHUB_OUTPUT"

- name: Commit and push via PR
if: steps.changes.outputs.changed == 'true'
Expand Down Expand Up @@ -320,11 +330,16 @@ jobs:
- name: Check for changes
id: changes
run: |
if git diff --quiet HEAD -- generated/QUERY-BENCHMARKS.md; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
CHANGED=false
# Detect modified tracked files
if ! git diff --quiet HEAD -- generated/QUERY-BENCHMARKS.md 2>/dev/null; then
CHANGED=true
fi
# Detect newly created (untracked) files
if [ -n "$(git ls-files --others --exclude-standard generated/QUERY-BENCHMARKS.md)" ]; then
CHANGED=true
fi
echo "changed=$CHANGED" >> "$GITHUB_OUTPUT"

- name: Commit and push via PR
if: steps.changes.outputs.changed == 'true'
Expand Down Expand Up @@ -428,11 +443,16 @@ jobs:
- name: Check for changes
id: changes
run: |
if git diff --quiet HEAD -- generated/INCREMENTAL-BENCHMARKS.md; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
CHANGED=false
# Detect modified tracked files
if ! git diff --quiet HEAD -- generated/INCREMENTAL-BENCHMARKS.md 2>/dev/null; then
CHANGED=true
fi
# Detect newly created (untracked) files
if [ -n "$(git ls-files --others --exclude-standard generated/INCREMENTAL-BENCHMARKS.md)" ]; then
CHANGED=true
fi
echo "changed=$CHANGED" >> "$GITHUB_OUTPUT"

- name: Commit and push via PR
if: steps.changes.outputs.changed == 'true'
Expand Down
17 changes: 14 additions & 3 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ jobs:
npm_tag: ${{ steps.compute.outputs.npm_tag }}
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0

- name: Compute version
id: compute
Expand All @@ -65,12 +67,21 @@ jobs:
NPM_TAG="latest"
echo "Stable release (manual retry): $VERSION"
else
IFS='.' read -r MAJOR MINOR PATCH <<< "$CURRENT"
DEV_PATCH=$((PATCH + 1))
# Use git describe to find the nearest release tag — same strategy
# as scripts/bench-version.js so versions are always consistent.
RELEASE_TAG=$(git describe --tags --match "v*" --abbrev=0 2>/dev/null || echo "")
if [ -n "$RELEASE_TAG" ]; then
COMMITS=$(git rev-list "${RELEASE_TAG}..HEAD" --count)
IFS='.' read -r MAJOR MINOR PATCH <<< "${RELEASE_TAG#v}"
else
COMMITS=1
IFS='.' read -r MAJOR MINOR PATCH <<< "$CURRENT"
fi
DEV_PATCH=$((PATCH + COMMITS))
SHORT_SHA=$(echo "${{ github.sha }}" | cut -c1-7)
VERSION="${MAJOR}.${MINOR}.${DEV_PATCH}-dev.${SHORT_SHA}"
NPM_TAG="dev"
echo "Dev release: $VERSION"
echo "Dev release: $VERSION (${COMMITS} commits since ${RELEASE_TAG:-none})"
fi

echo "version=$VERSION" >> "$GITHUB_OUTPUT"
Expand Down
61 changes: 61 additions & 0 deletions scripts/bench-version.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/**
* Compute the benchmark version string from git state.
*
* Uses the same two-step strategy as publish.yml's compute-version job:
* 1. `git describe --tags --match "v*" --abbrev=0` → find nearest release tag
* 2. `git rev-list <tag>..HEAD --count` → count commits since that tag
*
* - If HEAD is exactly tagged (0 commits): returns "2.5.0"
* - Otherwise: returns "2.5.N-dev.hash" (e.g. "2.5.3-dev.c50f7f5")
* where N = PATCH + commits since tag, hash = short commit SHA
*
* This prevents dev/dogfood benchmark runs from overwriting release data
* in the historical benchmark reports (which deduplicate by version).
*/

import { execFileSync } from 'node:child_process';

const GIT_OPTS = { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] };

export function getBenchmarkVersion(pkgVersion, cwd) {
try {
// Step 1: find the nearest release tag (mirrors publish.yml --abbrev=0)
const tag = execFileSync('git', ['describe', '--tags', '--match', 'v*', '--abbrev=0'], {
cwd,
...GIT_OPTS,
}).trim();

// Step 2: count commits since that tag (mirrors publish.yml git rev-list)
const commits = Number(
execFileSync('git', ['rev-list', `${tag}..HEAD`, '--count'], { cwd, ...GIT_OPTS }).trim(),
);

const m = tag.match(/^v(\d+)\.(\d+)\.(\d+)$/);
if (!m) return `${pkgVersion}-dev`;

const [, major, minor, patch] = m;

// Exact tag (0 commits since tag): return clean release version
if (commits === 0) return `${major}.${minor}.${patch}`;

// Dev build: MAJOR.MINOR.(PATCH+COMMITS)-dev.SHORT_SHA
const hash = execFileSync('git', ['rev-parse', '--short', 'HEAD'], { cwd, ...GIT_OPTS }).trim();
const devPatch = Number(patch) + commits;
return `${major}.${minor}.${devPatch}-dev.${hash}`;
} catch {
/* git not available or no tags */
}

// Fallback: no git or no tags — match publish.yml's no-tags behavior (PATCH+1-dev.SHA)
const parts = pkgVersion.split('.');
if (parts.length === 3) {
const [major, minor, patch] = parts;
try {
const hash = execFileSync('git', ['rev-parse', '--short', 'HEAD'], { cwd, ...GIT_OPTS }).trim();
return `${major}.${minor}.${Number(patch) + 1}-dev.${hash}`;
} catch {
return `${major}.${minor}.${Number(patch) + 1}-dev`;
}
}
return `${pkgVersion}-dev`;
}
7 changes: 5 additions & 2 deletions scripts/lib/bench-config.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import os from 'node:os';
import path from 'node:path';
import { pathToFileURL } from 'node:url';

import { getBenchmarkVersion } from '../bench-version.js';

/**
* Parse `--version <v>` and `--npm` from process.argv.
*/
Expand Down Expand Up @@ -44,10 +46,11 @@ export async function resolveBenchmarkSource() {
const { version: cliVersion, npm } = parseArgs();

if (!npm) {
// Local mode — use repo src/, label as "dev" unless overridden
// Local mode — use repo src/, version derived from git state
const root = path.resolve(path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/, '$1')), '..', '..');
const pkg = JSON.parse(fs.readFileSync(path.join(root, 'package.json'), 'utf8'));
return {
version: cliVersion || 'dev',
version: cliVersion || getBenchmarkVersion(pkg.version, root),
srcDir: path.join(root, 'src'),
cleanup() {},
};
Expand Down
4 changes: 3 additions & 1 deletion scripts/token-benchmark.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,12 @@ import { fileURLToPath } from 'node:url';
import { parseArgs } from 'node:util';

import { ISSUES, extractAgentOutput, validateResult } from './token-benchmark-issues.js';
import { getBenchmarkVersion } from './bench-version.js';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const root = path.resolve(__dirname, '..');
const pkg = JSON.parse(fs.readFileSync(path.join(root, 'package.json'), 'utf8'));
const benchVersion = getBenchmarkVersion(pkg.version, root);

// Redirect console.log to stderr so only JSON goes to stdout
const origLog = console.log;
Expand Down Expand Up @@ -590,7 +592,7 @@ async function main() {
console.log = origLog;

const output = {
version: pkg.version,
version: benchVersion,
date: new Date().toISOString().slice(0, 10),
model: MODEL,
runsPerIssue: RUNS,
Expand Down