diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 4e952b56..45ff530b 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -92,11 +92,16 @@ jobs: - name: Check for changes id: changes run: | - if git diff --quiet HEAD -- generated/BUILD-BENCHMARKS.md README.md; then - echo "changed=false" >> "$GITHUB_OUTPUT" - else - echo "changed=true" >> "$GITHUB_OUTPUT" + CHANGED=false + # Detect modified tracked files + if ! git diff --quiet HEAD -- generated/BUILD-BENCHMARKS.md README.md 2>/dev/null; then + CHANGED=true + fi + # Detect newly created (untracked) files + if [ -n "$(git ls-files --others --exclude-standard generated/BUILD-BENCHMARKS.md)" ]; then + CHANGED=true fi + echo "changed=$CHANGED" >> "$GITHUB_OUTPUT" - name: Commit and push via PR if: steps.changes.outputs.changed == 'true' @@ -212,11 +217,16 @@ jobs: - name: Check for changes id: changes run: | - if git diff --quiet HEAD -- generated/EMBEDDING-BENCHMARKS.md; then - echo "changed=false" >> "$GITHUB_OUTPUT" - else - echo "changed=true" >> "$GITHUB_OUTPUT" + CHANGED=false + # Detect modified tracked files + if ! git diff --quiet HEAD -- generated/EMBEDDING-BENCHMARKS.md 2>/dev/null; then + CHANGED=true + fi + # Detect newly created (untracked) files + if [ -n "$(git ls-files --others --exclude-standard generated/EMBEDDING-BENCHMARKS.md)" ]; then + CHANGED=true fi + echo "changed=$CHANGED" >> "$GITHUB_OUTPUT" - name: Commit and push via PR if: steps.changes.outputs.changed == 'true' @@ -320,11 +330,16 @@ jobs: - name: Check for changes id: changes run: | - if git diff --quiet HEAD -- generated/QUERY-BENCHMARKS.md; then - echo "changed=false" >> "$GITHUB_OUTPUT" - else - echo "changed=true" >> "$GITHUB_OUTPUT" + CHANGED=false + # Detect modified tracked files + if ! git diff --quiet HEAD -- generated/QUERY-BENCHMARKS.md 2>/dev/null; then + CHANGED=true + fi + # Detect newly created (untracked) files + if [ -n "$(git ls-files --others --exclude-standard generated/QUERY-BENCHMARKS.md)" ]; then + CHANGED=true fi + echo "changed=$CHANGED" >> "$GITHUB_OUTPUT" - name: Commit and push via PR if: steps.changes.outputs.changed == 'true' @@ -428,11 +443,16 @@ jobs: - name: Check for changes id: changes run: | - if git diff --quiet HEAD -- generated/INCREMENTAL-BENCHMARKS.md; then - echo "changed=false" >> "$GITHUB_OUTPUT" - else - echo "changed=true" >> "$GITHUB_OUTPUT" + CHANGED=false + # Detect modified tracked files + if ! git diff --quiet HEAD -- generated/INCREMENTAL-BENCHMARKS.md 2>/dev/null; then + CHANGED=true + fi + # Detect newly created (untracked) files + if [ -n "$(git ls-files --others --exclude-standard generated/INCREMENTAL-BENCHMARKS.md)" ]; then + CHANGED=true fi + echo "changed=$CHANGED" >> "$GITHUB_OUTPUT" - name: Commit and push via PR if: steps.changes.outputs.changed == 'true' diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index bb025640..2b755614 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -44,6 +44,8 @@ jobs: npm_tag: ${{ steps.compute.outputs.npm_tag }} steps: - uses: actions/checkout@v6 + with: + fetch-depth: 0 - name: Compute version id: compute @@ -65,12 +67,21 @@ jobs: NPM_TAG="latest" echo "Stable release (manual retry): $VERSION" else - IFS='.' read -r MAJOR MINOR PATCH <<< "$CURRENT" - DEV_PATCH=$((PATCH + 1)) + # Use git describe to find the nearest release tag — same strategy + # as scripts/bench-version.js so versions are always consistent. + RELEASE_TAG=$(git describe --tags --match "v*" --abbrev=0 2>/dev/null || echo "") + if [ -n "$RELEASE_TAG" ]; then + COMMITS=$(git rev-list "${RELEASE_TAG}..HEAD" --count) + IFS='.' read -r MAJOR MINOR PATCH <<< "${RELEASE_TAG#v}" + else + COMMITS=1 + IFS='.' read -r MAJOR MINOR PATCH <<< "$CURRENT" + fi + DEV_PATCH=$((PATCH + COMMITS)) SHORT_SHA=$(echo "${{ github.sha }}" | cut -c1-7) VERSION="${MAJOR}.${MINOR}.${DEV_PATCH}-dev.${SHORT_SHA}" NPM_TAG="dev" - echo "Dev release: $VERSION" + echo "Dev release: $VERSION (${COMMITS} commits since ${RELEASE_TAG:-none})" fi echo "version=$VERSION" >> "$GITHUB_OUTPUT" diff --git a/scripts/bench-version.js b/scripts/bench-version.js new file mode 100644 index 00000000..eaa7a75b --- /dev/null +++ b/scripts/bench-version.js @@ -0,0 +1,61 @@ +/** + * Compute the benchmark version string from git state. + * + * Uses the same two-step strategy as publish.yml's compute-version job: + * 1. `git describe --tags --match "v*" --abbrev=0` → find nearest release tag + * 2. `git rev-list ..HEAD --count` → count commits since that tag + * + * - If HEAD is exactly tagged (0 commits): returns "2.5.0" + * - Otherwise: returns "2.5.N-dev.hash" (e.g. "2.5.3-dev.c50f7f5") + * where N = PATCH + commits since tag, hash = short commit SHA + * + * This prevents dev/dogfood benchmark runs from overwriting release data + * in the historical benchmark reports (which deduplicate by version). + */ + +import { execFileSync } from 'node:child_process'; + +const GIT_OPTS = { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }; + +export function getBenchmarkVersion(pkgVersion, cwd) { + try { + // Step 1: find the nearest release tag (mirrors publish.yml --abbrev=0) + const tag = execFileSync('git', ['describe', '--tags', '--match', 'v*', '--abbrev=0'], { + cwd, + ...GIT_OPTS, + }).trim(); + + // Step 2: count commits since that tag (mirrors publish.yml git rev-list) + const commits = Number( + execFileSync('git', ['rev-list', `${tag}..HEAD`, '--count'], { cwd, ...GIT_OPTS }).trim(), + ); + + const m = tag.match(/^v(\d+)\.(\d+)\.(\d+)$/); + if (!m) return `${pkgVersion}-dev`; + + const [, major, minor, patch] = m; + + // Exact tag (0 commits since tag): return clean release version + if (commits === 0) return `${major}.${minor}.${patch}`; + + // Dev build: MAJOR.MINOR.(PATCH+COMMITS)-dev.SHORT_SHA + const hash = execFileSync('git', ['rev-parse', '--short', 'HEAD'], { cwd, ...GIT_OPTS }).trim(); + const devPatch = Number(patch) + commits; + return `${major}.${minor}.${devPatch}-dev.${hash}`; + } catch { + /* git not available or no tags */ + } + + // Fallback: no git or no tags — match publish.yml's no-tags behavior (PATCH+1-dev.SHA) + const parts = pkgVersion.split('.'); + if (parts.length === 3) { + const [major, minor, patch] = parts; + try { + const hash = execFileSync('git', ['rev-parse', '--short', 'HEAD'], { cwd, ...GIT_OPTS }).trim(); + return `${major}.${minor}.${Number(patch) + 1}-dev.${hash}`; + } catch { + return `${major}.${minor}.${Number(patch) + 1}-dev`; + } + } + return `${pkgVersion}-dev`; +} diff --git a/scripts/lib/bench-config.js b/scripts/lib/bench-config.js index 241a6254..4564f735 100644 --- a/scripts/lib/bench-config.js +++ b/scripts/lib/bench-config.js @@ -13,6 +13,8 @@ import os from 'node:os'; import path from 'node:path'; import { pathToFileURL } from 'node:url'; +import { getBenchmarkVersion } from '../bench-version.js'; + /** * Parse `--version ` and `--npm` from process.argv. */ @@ -44,10 +46,11 @@ export async function resolveBenchmarkSource() { const { version: cliVersion, npm } = parseArgs(); if (!npm) { - // Local mode — use repo src/, label as "dev" unless overridden + // Local mode — use repo src/, version derived from git state const root = path.resolve(path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/, '$1')), '..', '..'); + const pkg = JSON.parse(fs.readFileSync(path.join(root, 'package.json'), 'utf8')); return { - version: cliVersion || 'dev', + version: cliVersion || getBenchmarkVersion(pkg.version, root), srcDir: path.join(root, 'src'), cleanup() {}, }; diff --git a/scripts/token-benchmark.js b/scripts/token-benchmark.js index 7c20996e..7c75051c 100644 --- a/scripts/token-benchmark.js +++ b/scripts/token-benchmark.js @@ -26,10 +26,12 @@ import { fileURLToPath } from 'node:url'; import { parseArgs } from 'node:util'; import { ISSUES, extractAgentOutput, validateResult } from './token-benchmark-issues.js'; +import { getBenchmarkVersion } from './bench-version.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); const root = path.resolve(__dirname, '..'); const pkg = JSON.parse(fs.readFileSync(path.join(root, 'package.json'), 'utf8')); +const benchVersion = getBenchmarkVersion(pkg.version, root); // Redirect console.log to stderr so only JSON goes to stdout const origLog = console.log; @@ -590,7 +592,7 @@ async function main() { console.log = origLog; const output = { - version: pkg.version, + version: benchVersion, date: new Date().toISOString().slice(0, 10), model: MODEL, runsPerIssue: RUNS,