diff --git a/.changeset/patch-push-pr-incremental-size-check.md b/.changeset/patch-push-pr-incremental-size-check.md new file mode 100644 index 0000000000..d0bd590d62 --- /dev/null +++ b/.changeset/patch-push-pr-incremental-size-check.md @@ -0,0 +1,12 @@ +--- +"gh-aw": patch +--- + +Fixed `push_to_pull_request_branch` `max_patch_size` check incorrectly measuring the patch from the default branch (checkout base) instead of from the existing PR branch head, causing every push to fail on long-running branches that accumulate iterations (e.g. autoloop's accumulating iteration branches). + +The `max_patch_size` check now uses the **incremental net diff** between `origin/` and the new working state, not the size of the format-patch transport file or the cumulative diff from the default branch. This means the limit reflects how much the branch will actually change in the push, not the cumulative divergence of the long-running branch from `main`. + +Changes: +- `generate_git_patch.cjs`: in incremental mode, also computes and returns `diffSize` (size of `git diff ..`), and refuses to fall through to checkout-base strategies (`GITHUB_SHA..HEAD` / merge-base with default branch) if Strategy 1 fails. +- `safe_outputs_handlers.cjs`: passes the incremental `diffSize` to the safe-output entry as `diff_size`. +- `push_to_pull_request_branch.cjs`: prefers `message.diff_size` for the `max_patch_size` check, falling back to the patch file size when the field is absent (backward compatible). diff --git a/actions/setup/js/generate_git_patch.cjs b/actions/setup/js/generate_git_patch.cjs index 449c6894f5..d7fa08dd2b 100644 --- a/actions/setup/js/generate_git_patch.cjs +++ b/actions/setup/js/generate_git_patch.cjs @@ -12,6 +12,11 @@ const path = require("path"); const { getErrorMessage } = require("./error_helpers.cjs"); const { execGitSync, getGitAuthEnv } = require("./git_helpers.cjs"); const { ERR_SYSTEM } = require("./error_codes.cjs"); +const { sanitizeForFilename, sanitizeBranchNameForPatch, sanitizeRepoSlugForPatch, getPatchPath, getPatchPathForRepo, buildExcludePathspecs, computeIncrementalDiffSize } = require("./git_patch_utils.cjs"); + +// sanitizeForFilename is re-exported below for backward compatibility with +// existing callers that imported it from this module. +void sanitizeForFilename; /** * Debug logging helper - logs to stderr when DEBUG env var matches @@ -24,63 +29,6 @@ function debugLog(message) { } } -/** - * Sanitize a string for use as a patch filename component. - * Replaces path separators and special characters with dashes. - * @param {string} value - The value to sanitize - * @param {string} fallback - Fallback value when input is empty or nullish - * @returns {string} The sanitized string safe for use in a filename - */ -function sanitizeForFilename(value, fallback) { - if (!value) return fallback; - return value - .replace(/[/\\:*?"<>|]/g, "-") - .replace(/-{2,}/g, "-") - .replace(/^-|-$/g, "") - .toLowerCase(); -} - -/** - * Sanitize a branch name for use as a patch filename - * @param {string} branchName - The branch name to sanitize - * @returns {string} The sanitized branch name safe for use in a filename - */ -function sanitizeBranchNameForPatch(branchName) { - return sanitizeForFilename(branchName, "unknown"); -} - -/** - * Get the patch file path for a given branch name - * @param {string} branchName - The branch name - * @returns {string} The full patch file path - */ -function getPatchPath(branchName) { - const sanitized = sanitizeBranchNameForPatch(branchName); - return `/tmp/gh-aw/aw-${sanitized}.patch`; -} - -/** - * Sanitize a repo slug for use in a filename - * @param {string} repoSlug - The repo slug (owner/repo) - * @returns {string} The sanitized slug safe for use in a filename - */ -function sanitizeRepoSlugForPatch(repoSlug) { - return sanitizeForFilename(repoSlug, ""); -} - -/** - * Get the patch file path for a given branch name and repo slug - * Used for multi-repo scenarios to prevent patch file collisions - * @param {string} branchName - The branch name - * @param {string} repoSlug - The repository slug (owner/repo) - * @returns {string} The full patch file path including repo disambiguation - */ -function getPatchPathForRepo(branchName, repoSlug) { - const sanitizedBranch = sanitizeBranchNameForPatch(branchName); - const sanitizedRepo = sanitizeRepoSlugForPatch(repoSlug); - return `/tmp/gh-aw/aw-${sanitizedRepo}-${sanitizedBranch}.patch`; -} - /** * Generates a git patch file for the current changes * @param {string} branchName - The branch name to generate patch for @@ -111,15 +59,14 @@ async function generateGitPatch(branchName, baseBranch, options = {}) { // These are appended after "--" so git treats them as pathspecs, not revisions. // Using git's native pathspec magic keeps the exclusions out of the patch entirely // without any post-processing of the generated patch file. - const excludePathspecs = Array.isArray(options.excludedFiles) && options.excludedFiles.length > 0 ? options.excludedFiles.map(p => `:(exclude)${p}`) : []; + const excludeArgsArr = buildExcludePathspecs(options.excludedFiles); /** * Returns the arguments to append to a format-patch call when excludedFiles is set. - * Produces ["--", ":(exclude)pattern1", ":(exclude)pattern2", ...] or []. * @returns {string[]} */ function excludeArgs() { - return excludePathspecs.length > 0 ? ["--", ...excludePathspecs] : []; + return excludeArgsArr; } const patchPath = options.repoSlug ? getPatchPathForRepo(branchName, options.repoSlug) : getPatchPath(branchName); @@ -294,6 +241,25 @@ async function generateGitPatch(branchName, baseBranch, options = {}) { patchLines: 0, }; } + + // In incremental mode, the patch must be measured relative to the existing + // PR branch head (origin/), never relative to the default branch. + // If Strategy 1 did not produce a patch (e.g. format-patch yielded empty + // output for an unusual commit shape — excluded-files filtering away every + // change, or binary-only commits with unusual encoding), do NOT fall + // through to Strategy 2 or Strategy 3 — those use GITHUB_SHA..HEAD or + // merge-base with a remote ref and would produce a checkout-base diff + // (which can be many MB on a long-running branch). Returning an explicit + // error preserves the "incremental" contract that the patch reflects only + // the new commits. + if (!patchGenerated && mode === "incremental") { + debugLog(`Strategy 1 (incremental): format-patch produced no output for ${baseRef}..${branchName} despite ${commitCount} incremental commit(s), refusing to fall through to checkout-base strategies`); + return { + success: false, + error: `Cannot generate incremental patch: git format-patch produced no output for ${baseRef}..${branchName} despite ${commitCount} incremental commit(s).`, + patchPath: patchPath, + }; + } } catch (branchError) { // Branch does not exist locally debugLog(`Strategy 1: Branch '${branchName}' does not exist locally - ${getErrorMessage(branchError)}`); @@ -450,12 +416,36 @@ async function generateGitPatch(branchName, baseBranch, options = {}) { }; } - debugLog(`Final: SUCCESS - patchSize=${patchSize} bytes, patchLines=${patchLines}, baseCommit=${baseCommitSha || "(unknown)"}`); + // In incremental mode, also compute the net diff size between baseRef and the + // branch tip. The format-patch file size (patchSize) is the sum of every + // commit's individual diff plus per-commit metadata headers, which can be + // significantly larger than the actual net change. Consumers (e.g. + // push_to_pull_request_branch) should validate `max_patch_size` against the + // incremental net diff so the limit reflects how much the branch will + // actually change, not the cumulative size of the commit history. + // + // The measurement itself (stream to temp file via `git diff --output`, stat, + // cleanup) is extracted into git_patch_utils.computeIncrementalDiffSize so + // it is O(1) memory and independently unit-testable against a real repo. + let diffSize = null; + if (mode === "incremental" && baseCommitSha && branchName) { + diffSize = computeIncrementalDiffSize({ + baseRef: baseCommitSha, + headRef: branchName, + cwd, + tmpPath: `${patchPath}.diff.tmp`, + excludedFiles: options.excludedFiles, + }); + debugLog(`Final: diffSize=${diffSize ?? "(n/a)"} bytes (baseRef=${baseCommitSha}..${branchName})`); + } + + debugLog(`Final: SUCCESS - patchSize=${patchSize} bytes, patchLines=${patchLines}, diffSize=${diffSize ?? "(n/a)"} bytes, baseCommit=${baseCommitSha || "(unknown)"}`); return { success: true, patchPath: patchPath, patchSize: patchSize, patchLines: patchLines, + diffSize: diffSize, baseCommit: baseCommitSha, }; } diff --git a/actions/setup/js/git_patch_integration.test.cjs b/actions/setup/js/git_patch_integration.test.cjs index c998b3301a..85e5e23fa1 100644 --- a/actions/setup/js/git_patch_integration.test.cjs +++ b/actions/setup/js/git_patch_integration.test.cjs @@ -619,6 +619,52 @@ describe("git patch integration tests", () => { } }); + it("should report diffSize as the net diff between origin/branch and HEAD in incremental mode", async () => { + // Reproduces the long-running branch scenario from the issue: + // - origin/ already has accumulated history (e.g. many KB) + // - the agent makes a small new commit on top + // - the format-patch file size only reflects the *new* commit (because + // baseRef = origin/), but the returned diffSize must also be + // small and must NOT reflect the divergence from main. + + // Create the long-running branch with a "large" accumulated payload. + execGit(["checkout", "-b", "long-running-branch"], { cwd: workingRepo }); + const accumulated = "accumulated content line\n".repeat(2000); // ~50 KB + fs.writeFileSync(path.join(workingRepo, "accumulated.txt"), accumulated); + execGit(["add", "accumulated.txt"], { cwd: workingRepo }); + execGit(["commit", "-m", "Accumulated work from previous iterations"], { cwd: workingRepo }); + execGit(["push", "-u", "origin", "long-running-branch"], { cwd: workingRepo }); + + // Now the agent's "new iteration": a tiny incremental change. + fs.writeFileSync(path.join(workingRepo, "tiny.txt"), "tiny change\n"); + execGit(["add", "tiny.txt"], { cwd: workingRepo }); + execGit(["commit", "-m", "Tiny new iteration"], { cwd: workingRepo }); + + const origWorkspace = process.env.GITHUB_WORKSPACE; + const origDefaultBranch = process.env.DEFAULT_BRANCH; + process.env.GITHUB_WORKSPACE = workingRepo; + process.env.DEFAULT_BRANCH = "main"; + + try { + const result = await generateGitPatch("long-running-branch", "main", { mode: "incremental" }); + + expect(result.success).toBe(true); + expect(typeof result.diffSize).toBe("number"); + + // The incremental net diff is just the tiny.txt addition (well under 1 KB). + expect(result.diffSize).toBeGreaterThan(0); + expect(result.diffSize).toBeLessThan(1024); + + // And the diffSize must NOT include the accumulated 50 KB payload that + // already exists on origin/long-running-branch — that is the entire + // point of the fix. + expect(result.diffSize).toBeLessThan(2000); + } finally { + process.env.GITHUB_WORKSPACE = origWorkspace; + process.env.DEFAULT_BRANCH = origDefaultBranch; + } + }); + /** * Sets GITHUB_WORKSPACE, DEFAULT_BRANCH, GITHUB_TOKEN, and GITHUB_SERVER_URL for * a test, then restores the original values (or deletes them if they were unset). diff --git a/actions/setup/js/git_patch_utils.cjs b/actions/setup/js/git_patch_utils.cjs new file mode 100644 index 0000000000..b007c0fb1f --- /dev/null +++ b/actions/setup/js/git_patch_utils.cjs @@ -0,0 +1,162 @@ +// @ts-check +/// + +/** + * Utilities shared by git patch generation and validation code. + * + * This module intentionally has no side effects and no coupling to the + * patch-generation orchestration in generate_git_patch.cjs. Each helper is + * pure/stateless or performs a well-defined local filesystem operation, which + * keeps the surface small, easy to test against a real git repo, and reusable + * by other safe-output handlers (e.g. bundle transport, create_pull_request + * fallback paths). + */ + +const fs = require("fs"); + +const { getErrorMessage } = require("./error_helpers.cjs"); +const { execGitSync } = require("./git_helpers.cjs"); + +/** + * Debug logging helper - logs to stderr when DEBUG env var matches + * @param {string} message - Debug message to log + */ +function debugLog(message) { + const debug = process.env.DEBUG || ""; + if (debug === "*" || debug.includes("generate_git_patch") || debug.includes("patch")) { + console.error(`[git_patch_utils] ${message}`); + } +} + +/** + * Sanitize a string for use as a patch filename component. + * Replaces path separators and special characters with dashes. + * @param {string} value - The value to sanitize + * @param {string} fallback - Fallback value when input is empty or nullish + * @returns {string} The sanitized string safe for use in a filename + */ +function sanitizeForFilename(value, fallback) { + if (!value) return fallback; + return value + .replace(/[/\\:*?"<>|]/g, "-") + .replace(/-{2,}/g, "-") + .replace(/^-|-$/g, "") + .toLowerCase(); +} + +/** + * Sanitize a branch name for use as a patch filename + * @param {string} branchName - The branch name to sanitize + * @returns {string} The sanitized branch name safe for use in a filename + */ +function sanitizeBranchNameForPatch(branchName) { + return sanitizeForFilename(branchName, "unknown"); +} + +/** + * Sanitize a repo slug for use in a filename + * @param {string} repoSlug - The repo slug (owner/repo) + * @returns {string} The sanitized slug safe for use in a filename + */ +function sanitizeRepoSlugForPatch(repoSlug) { + return sanitizeForFilename(repoSlug, ""); +} + +/** + * Get the patch file path for a given branch name + * @param {string} branchName - The branch name + * @returns {string} The full patch file path + */ +function getPatchPath(branchName) { + const sanitized = sanitizeBranchNameForPatch(branchName); + return `/tmp/gh-aw/aw-${sanitized}.patch`; +} + +/** + * Get the patch file path for a given branch name and repo slug + * Used for multi-repo scenarios to prevent patch file collisions + * @param {string} branchName - The branch name + * @param {string} repoSlug - The repository slug (owner/repo) + * @returns {string} The full patch file path including repo disambiguation + */ +function getPatchPathForRepo(branchName, repoSlug) { + const sanitizedBranch = sanitizeBranchNameForPatch(branchName); + const sanitizedRepo = sanitizeRepoSlugForPatch(repoSlug); + return `/tmp/gh-aw/aw-${sanitizedRepo}-${sanitizedBranch}.patch`; +} + +/** + * Builds the pathspec arguments to exclude specific files from a git command. + * Produces ["--", ":(exclude)pattern1", ":(exclude)pattern2", ...] or [] when + * the input is empty/unset. These are passed after a "--" so git treats them + * as pathspecs, not revisions. + * + * @param {string[] | undefined | null} excludedFiles - Glob patterns to exclude + * @returns {string[]} Arguments to append to a git format-patch or git diff call + */ +function buildExcludePathspecs(excludedFiles) { + if (!Array.isArray(excludedFiles) || excludedFiles.length === 0) { + return []; + } + return ["--", ...excludedFiles.map(p => `:(exclude)${p}`)]; +} + +/** + * Compute the net diff size in bytes between two refs in the given git repo. + * + * This is the value that should be compared against `max_patch_size` in + * push_to_pull_request_branch: it reflects how much the PR branch will + * actually change as a result of the push, independent of how the patch or + * bundle transport encodes the commit history. + * + * Implementation note: we use `git diff --binary --output=` rather + * than buffering the diff through execGitSync's stdout. That keeps memory + * usage O(1) regardless of the diff size (we just stat the file) and avoids + * hitting the execGitSync maxBuffer on large binary diffs. The temp file is + * removed in `finally` on success, failure, and stat failure alike. + * + * @param {Object} args - Arguments + * @param {string} args.baseRef - Base ref (commit SHA, branch, or ref) + * @param {string} args.headRef - Head ref (commit SHA, branch, or ref) + * @param {string} args.cwd - Working directory containing the git repo + * @param {string} args.tmpPath - Absolute path to the temp diff file (will be + * written and removed by this function) + * @param {string[]} [args.excludedFiles] - Glob patterns to exclude + * @returns {number | null} The net diff size in bytes, or null on failure + */ +function computeIncrementalDiffSize({ baseRef, headRef, cwd, tmpPath, excludedFiles }) { + if (!baseRef || !headRef || !cwd || !tmpPath) { + return null; + } + const excludeArgs = buildExcludePathspecs(excludedFiles); + let diffSize = null; + try { + execGitSync(["diff", "--binary", `--output=${tmpPath}`, `${baseRef}..${headRef}`, ...excludeArgs], { cwd }); + if (fs.existsSync(tmpPath)) { + diffSize = fs.statSync(tmpPath).size; + debugLog(`Computed incremental net diffSize=${diffSize} bytes (baseRef=${baseRef}..${headRef})`); + } + } catch (diffErr) { + debugLog(`Failed to compute incremental net diffSize - ${getErrorMessage(diffErr)}`); + } finally { + // Best-effort cleanup of the temp diff file; we only needed its size. + try { + if (fs.existsSync(tmpPath)) { + fs.unlinkSync(tmpPath); + } + } catch { + // Cleanup failure is non-fatal. + } + } + return diffSize; +} + +module.exports = { + sanitizeForFilename, + sanitizeBranchNameForPatch, + sanitizeRepoSlugForPatch, + getPatchPath, + getPatchPathForRepo, + buildExcludePathspecs, + computeIncrementalDiffSize, +}; diff --git a/actions/setup/js/git_patch_utils.test.cjs b/actions/setup/js/git_patch_utils.test.cjs new file mode 100644 index 0000000000..2f85981587 --- /dev/null +++ b/actions/setup/js/git_patch_utils.test.cjs @@ -0,0 +1,222 @@ +/** + * Tests for git_patch_utils.cjs + * + * Pure helpers (sanitize, path, pathspec) are tested as simple unit tests. + * `computeIncrementalDiffSize` is tested against a REAL local git repository + * created in a temp directory, so it exercises `git diff --output=`, + * the O(1) stat-based size measurement, and temp-file cleanup end-to-end. + * + * These tests require `git` to be installed on the runner. + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"; +import fs from "fs"; +import os from "os"; +import path from "path"; +import { spawnSync } from "child_process"; + +import { sanitizeForFilename, sanitizeBranchNameForPatch, sanitizeRepoSlugForPatch, getPatchPath, getPatchPathForRepo, buildExcludePathspecs, computeIncrementalDiffSize } from "./git_patch_utils.cjs"; + +// computeIncrementalDiffSize delegates to execGitSync from git_helpers.cjs, +// which calls the GitHub Actions `core.debug` / `core.error` globals. Stub +// them so this test suite can run outside of a workflow runner. +global.core = { + debug: vi.fn(), + error: vi.fn(), + info: vi.fn(), + warning: vi.fn(), +}; + +function execGit(args, options = {}) { + const result = spawnSync("git", args, { encoding: "utf8", ...options }); + if (result.error) throw result.error; + if (result.status !== 0 && !options.allowFailure) { + throw new Error(`git ${args.join(" ")} failed: ${result.stderr}`); + } + return result; +} + +function createTestRepo() { + const repoDir = fs.mkdtempSync(path.join(os.tmpdir(), "git-patch-utils-")); + execGit(["init", "-q"], { cwd: repoDir }); + execGit(["config", "user.name", "Test"], { cwd: repoDir }); + execGit(["config", "user.email", "test@example.com"], { cwd: repoDir }); + execGit(["config", "commit.gpgsign", "false"], { cwd: repoDir }); + fs.writeFileSync(path.join(repoDir, "README.md"), "# Test\n"); + execGit(["add", "."], { cwd: repoDir }); + execGit(["commit", "-q", "-m", "Initial commit"], { cwd: repoDir }); + execGit(["branch", "-M", "main"], { cwd: repoDir }); + return repoDir; +} + +function cleanupRepo(repoDir) { + if (repoDir && fs.existsSync(repoDir)) { + fs.rmSync(repoDir, { recursive: true, force: true }); + } +} + +describe("git_patch_utils - pure helpers", () => { + describe("sanitizeForFilename", () => { + it("returns fallback when value is empty or nullish", () => { + expect(sanitizeForFilename("", "fallback")).toBe("fallback"); + expect(sanitizeForFilename(null, "fallback")).toBe("fallback"); + expect(sanitizeForFilename(undefined, "fallback")).toBe("fallback"); + }); + + it("replaces path separators and special characters with dashes", () => { + expect(sanitizeForFilename("feat/foo", "x")).toBe("feat-foo"); + expect(sanitizeForFilename('a\\b:c*d?e"fh|i', "x")).toBe("a-b-c-d-e-f-g-h-i"); + }); + + it("collapses runs of dashes and trims leading/trailing dashes", () => { + expect(sanitizeForFilename("--foo//bar--", "x")).toBe("foo-bar"); + }); + + it("lowercases the result", () => { + expect(sanitizeForFilename("Feature/Mixed-Case", "x")).toBe("feature-mixed-case"); + }); + }); + + describe("sanitizeBranchNameForPatch / sanitizeRepoSlugForPatch", () => { + it("uses 'unknown' fallback for empty branch names", () => { + expect(sanitizeBranchNameForPatch("")).toBe("unknown"); + }); + it("uses empty fallback for empty repo slugs", () => { + expect(sanitizeRepoSlugForPatch("")).toBe(""); + }); + it("sanitizes owner/repo slugs", () => { + expect(sanitizeRepoSlugForPatch("github/Gh-AW")).toBe("github-gh-aw"); + }); + }); + + describe("getPatchPath / getPatchPathForRepo", () => { + it("returns the /tmp/gh-aw path with the sanitized branch name", () => { + expect(getPatchPath("feat/foo")).toBe("/tmp/gh-aw/aw-feat-foo.patch"); + }); + it("includes a sanitized repo slug for multi-repo scenarios", () => { + expect(getPatchPathForRepo("feat/foo", "github/gh-aw")).toBe("/tmp/gh-aw/aw-github-gh-aw-feat-foo.patch"); + }); + }); + + describe("buildExcludePathspecs", () => { + it("returns [] for undefined/null/empty inputs", () => { + expect(buildExcludePathspecs(undefined)).toEqual([]); + expect(buildExcludePathspecs(null)).toEqual([]); + expect(buildExcludePathspecs([])).toEqual([]); + }); + + it("produces [--, :(exclude), ...] for non-empty inputs", () => { + expect(buildExcludePathspecs(["*.lock", "dist/**"])).toEqual(["--", ":(exclude)*.lock", ":(exclude)dist/**"]); + }); + + it("ignores non-array inputs (returns [])", () => { + // @ts-expect-error - exercising runtime guard + expect(buildExcludePathspecs("not-an-array")).toEqual([]); + }); + }); +}); + +describe("git_patch_utils.computeIncrementalDiffSize - real git repo", () => { + let repoDir; + + beforeEach(() => { + repoDir = createTestRepo(); + }); + + afterEach(() => { + cleanupRepo(repoDir); + }); + + it("returns a positive size that matches the actual diff bytes for a single-file change", () => { + const baseSha = execGit(["rev-parse", "HEAD"], { cwd: repoDir }).stdout.trim(); + const body = "line\n".repeat(200); // deterministic content + fs.writeFileSync(path.join(repoDir, "file.txt"), body); + execGit(["add", "."], { cwd: repoDir }); + execGit(["commit", "-q", "-m", "add file"], { cwd: repoDir }); + const headSha = execGit(["rev-parse", "HEAD"], { cwd: repoDir }).stdout.trim(); + + const tmpPath = path.join(repoDir, ".diffsize.tmp"); + const size = computeIncrementalDiffSize({ + baseRef: baseSha, + headRef: headSha, + cwd: repoDir, + tmpPath, + }); + + // Cross-check against `git diff` run independently. + const expected = Buffer.byteLength(execGit(["diff", "--binary", `${baseSha}..${headSha}`], { cwd: repoDir }).stdout, "utf8"); + + expect(size).toBe(expected); + expect(size).toBeGreaterThan(body.length - 50); // at minimum ~the file contents + }); + + it("always cleans up the temp file, even on success", () => { + const baseSha = execGit(["rev-parse", "HEAD"], { cwd: repoDir }).stdout.trim(); + fs.writeFileSync(path.join(repoDir, "b.txt"), "hello\n"); + execGit(["add", "."], { cwd: repoDir }); + execGit(["commit", "-q", "-m", "add b"], { cwd: repoDir }); + const headSha = execGit(["rev-parse", "HEAD"], { cwd: repoDir }).stdout.trim(); + + const tmpPath = path.join(repoDir, ".diffsize.tmp"); + computeIncrementalDiffSize({ baseRef: baseSha, headRef: headSha, cwd: repoDir, tmpPath }); + + expect(fs.existsSync(tmpPath)).toBe(false); + }); + + it("returns 0 when the two refs are identical (empty net diff)", () => { + const sha = execGit(["rev-parse", "HEAD"], { cwd: repoDir }).stdout.trim(); + const tmpPath = path.join(repoDir, ".diffsize.tmp"); + const size = computeIncrementalDiffSize({ baseRef: sha, headRef: sha, cwd: repoDir, tmpPath }); + expect(size).toBe(0); + expect(fs.existsSync(tmpPath)).toBe(false); + }); + + it("honors excludedFiles pathspecs (excluded content does not contribute to diff size)", () => { + const baseSha = execGit(["rev-parse", "HEAD"], { cwd: repoDir }).stdout.trim(); + // Two files: one kept, one excluded. The excluded file is much larger. + fs.writeFileSync(path.join(repoDir, "keep.txt"), "keep\n"); + fs.writeFileSync(path.join(repoDir, "big.lock"), "x".repeat(20 * 1024)); + execGit(["add", "."], { cwd: repoDir }); + execGit(["commit", "-q", "-m", "add both"], { cwd: repoDir }); + const headSha = execGit(["rev-parse", "HEAD"], { cwd: repoDir }).stdout.trim(); + + const tmpPath = path.join(repoDir, ".diffsize.tmp"); + const sizeWithExclude = computeIncrementalDiffSize({ + baseRef: baseSha, + headRef: headSha, + cwd: repoDir, + tmpPath, + excludedFiles: ["*.lock"], + }); + const sizeNoExclude = computeIncrementalDiffSize({ + baseRef: baseSha, + headRef: headSha, + cwd: repoDir, + tmpPath, + }); + + expect(sizeWithExclude).toBeGreaterThan(0); + expect(sizeNoExclude).toBeGreaterThan(sizeWithExclude); + // The excluded 20 KB lock file should account for the majority of the delta. + expect(sizeNoExclude - sizeWithExclude).toBeGreaterThan(10 * 1024); + }); + + it("returns null for an invalid baseRef (and still cleans up)", () => { + const tmpPath = path.join(repoDir, ".diffsize.tmp"); + const size = computeIncrementalDiffSize({ + baseRef: "does-not-exist-ref", + headRef: "HEAD", + cwd: repoDir, + tmpPath, + }); + expect(size).toBeNull(); + expect(fs.existsSync(tmpPath)).toBe(false); + }); + + it("returns null when required arguments are missing", () => { + expect(computeIncrementalDiffSize({ baseRef: "", headRef: "HEAD", cwd: "/tmp", tmpPath: "/tmp/x" })).toBeNull(); + expect(computeIncrementalDiffSize({ baseRef: "HEAD", headRef: "", cwd: "/tmp", tmpPath: "/tmp/x" })).toBeNull(); + expect(computeIncrementalDiffSize({ baseRef: "HEAD", headRef: "HEAD", cwd: "", tmpPath: "/tmp/x" })).toBeNull(); + expect(computeIncrementalDiffSize({ baseRef: "HEAD", headRef: "HEAD", cwd: "/tmp", tmpPath: "" })).toBeNull(); + }); +}); diff --git a/actions/setup/js/push_to_pull_request_branch.cjs b/actions/setup/js/push_to_pull_request_branch.cjs index ad3c52dcf3..643cb90b95 100644 --- a/actions/setup/js/push_to_pull_request_branch.cjs +++ b/actions/setup/js/push_to_pull_request_branch.cjs @@ -175,14 +175,66 @@ async function main(config = {}) { isEmpty = !patchContent || !patchContent.trim(); } - if (!hasBundleFile && !isEmpty) { - const patchSizeBytes = Buffer.byteLength(patchContent, "utf8"); + // Validate patch/bundle size against `max_patch_size`. + // + // Size-check source of truth, in order of preference: + // 1. `message.diff_size` — the incremental net diff size recorded at + // patch/bundle generation time (this is the correct quantity to cap: + // how much the PR branch will actually change as a result of the push). + // 2. For bundle transport: the on-disk bundle file size. + // 3. For patch transport: the format-patch file size. + // + // Using `diff_size` when present fixes the long-running branch case where + // the transport file accumulates per-commit metadata + per-commit diffs and + // can be many MB even when each iteration only changes a few KB. + if (!isEmpty) { + const patchSizeBytes = hasBundleFile ? 0 : Buffer.byteLength(patchContent, "utf8"); const patchSizeKb = Math.ceil(patchSizeBytes / 1024); - core.info(`Patch size: ${patchSizeKb} KB (maximum allowed: ${maxSizeKb} KB)`); + let bundleSizeBytes = 0; + if (hasBundleFile) { + try { + bundleSizeBytes = fs.statSync(bundleFilePath).size; + } catch (statErr) { + core.warning(`Failed to stat bundle file for size check: ${getErrorMessage(statErr)}`); + } + } + const bundleSizeKb = Math.ceil(bundleSizeBytes / 1024); + + const diffSizeBytesRaw = message.diff_size; + const haveDiffSize = typeof diffSizeBytesRaw === "number" && diffSizeBytesRaw >= 0; + + let sizeForCheckBytes; + let sizeLabel; + if (haveDiffSize) { + sizeForCheckBytes = diffSizeBytesRaw; + sizeLabel = "Incremental diff size"; + } else if (hasBundleFile) { + sizeForCheckBytes = bundleSizeBytes; + sizeLabel = "Bundle size"; + } else { + sizeForCheckBytes = patchSizeBytes; + sizeLabel = "Patch size"; + } + const sizeForCheckKb = Math.ceil(sizeForCheckBytes / 1024); - if (patchSizeKb > maxSizeKb) { - const msg = `Patch size (${patchSizeKb} KB) exceeds maximum allowed size (${maxSizeKb} KB)`; + if (hasBundleFile) { + core.info(`Bundle file size: ${bundleSizeKb} KB`); + } else { + core.info(`Patch file size: ${patchSizeKb} KB`); + } + core.info(`${sizeLabel}: ${sizeForCheckKb} KB (maximum allowed: ${maxSizeKb} KB)`); + + if (sizeForCheckKb > maxSizeKb) { + let msg; + if (haveDiffSize) { + const transportLabel = hasBundleFile ? `Bundle size: ${bundleSizeKb} KB` : `Patch file size: ${patchSizeKb} KB`; + msg = `Incremental diff size (${sizeForCheckKb} KB) exceeds maximum allowed size (${maxSizeKb} KB). ${transportLabel}.`; + } else if (hasBundleFile) { + msg = `Bundle size (${sizeForCheckKb} KB) exceeds maximum allowed size (${maxSizeKb} KB)`; + } else { + msg = `Patch size (${sizeForCheckKb} KB) exceeds maximum allowed size (${maxSizeKb} KB)`; + } return { success: false, error: msg }; } diff --git a/actions/setup/js/push_to_pull_request_branch.test.cjs b/actions/setup/js/push_to_pull_request_branch.test.cjs index 0a2bc25112..4d5ea8fa60 100644 --- a/actions/setup/js/push_to_pull_request_branch.test.cjs +++ b/actions/setup/js/push_to_pull_request_branch.test.cjs @@ -1054,6 +1054,91 @@ index 0000000..abc1234 expect(result.success).toBe(true); expect(mockCore.info).toHaveBeenCalledWith("Patch size validation passed"); }); + + it("should prefer message.diff_size (incremental net diff) over patch file size", async () => { + // Simulate the long-running branch case: a large format-patch file + // (e.g. 2 MB of cumulative commit metadata + per-commit diffs) but a + // tiny incremental net diff (e.g. 5 KB of actual changes since + // origin/). The size check must use diff_size and accept the push. + const largePatch = "x".repeat(2 * 1024 * 1024); // 2 MB format-patch file + const patchPath = createPatchFile(largePatch); + + mockExec.getExecOutput.mockResolvedValue({ exitCode: 0, stdout: "abc123\n", stderr: "" }); + + const module = await loadModule(); + const handler = await module.main({ max_patch_size: 1024 }); // 1 MB max + const result = await handler({ patch_path: patchPath, diff_size: 5 * 1024 }, {}); + + expect(result.success).toBe(true); + expect(mockCore.info).toHaveBeenCalledWith("Patch size validation passed"); + // Verify the size check used the incremental (diff_size) value, not the + // 2 MB file size. + expect(mockCore.info).toHaveBeenCalledWith(expect.stringContaining("Incremental diff size: 5 KB")); + }); + + it("should reject when message.diff_size exceeds max size even if file size is small", async () => { + // Inverse case: small file (defensive — shouldn't happen in practice) + // but a recorded large diff_size should still cause rejection. This + // proves diff_size is the source of truth for the size check. + const patchPath = createPatchFile(); // small valid patch + + const module = await loadModule(); + const handler = await module.main({ max_patch_size: 1024 }); // 1 MB max + const result = await handler({ patch_path: patchPath, diff_size: 2 * 1024 * 1024 }, {}); + + expect(result.success).toBe(false); + expect(result.error).toContain("exceeds maximum"); + }); + + it("should fall back to patch file size when message.diff_size is not provided", async () => { + // Backward-compat: older MCP servers (or non-incremental code paths) + // do not set diff_size. The check must continue to work using the patch + // file size as the measurement. + const largePatch = "x".repeat(2 * 1024 * 1024); // 2 MB + const patchPath = createPatchFile(largePatch); + + const module = await loadModule(); + const handler = await module.main({ max_patch_size: 1024 }); // 1 MB max + const result = await handler({ patch_path: patchPath }, {}); + + expect(result.success).toBe(false); + expect(result.error).toContain("exceeds maximum"); + }); + + it("should enforce max_patch_size against the bundle file size when bundle transport is used", async () => { + // Bundle transport does not have a format-patch file. The max_patch_size + // limit must still apply — using the on-disk bundle file size — so large + // bundles cannot silently bypass the limit. + const bundlePath = path.join(tempDir, "test.bundle"); + // 2 MB dummy bundle file (contents don't matter; only size is checked) + fs.writeFileSync(bundlePath, Buffer.alloc(2 * 1024 * 1024)); + + const module = await loadModule(); + const handler = await module.main({ max_patch_size: 1024 }); // 1 MB max + const result = await handler({ bundle_path: bundlePath }, {}); + + expect(result.success).toBe(false); + expect(result.error).toContain("exceeds maximum"); + expect(result.error).toMatch(/Bundle size|Incremental diff size/); + }); + + it("should prefer diff_size over bundle file size for the limit check", async () => { + // Bundle is 2 MB on disk, but the incremental net diff is only 5 KB: + // the check must accept the push (limit reflects the real change, not the + // compressed transport size). + const bundlePath = path.join(tempDir, "test.bundle"); + fs.writeFileSync(bundlePath, Buffer.alloc(2 * 1024 * 1024)); + + mockExec.getExecOutput.mockResolvedValue({ exitCode: 0, stdout: "abc123\n", stderr: "" }); + + const module = await loadModule(); + const handler = await module.main({ max_patch_size: 1024 }); // 1 MB max + const result = await handler({ bundle_path: bundlePath, diff_size: 5 * 1024 }, {}); + + expect(result.success).toBe(true); + expect(mockCore.info).toHaveBeenCalledWith("Patch size validation passed"); + expect(mockCore.info).toHaveBeenCalledWith(expect.stringContaining("Incremental diff size: 5 KB")); + }); }); // ────────────────────────────────────────────────────── diff --git a/actions/setup/js/safe_outputs_handlers.cjs b/actions/setup/js/safe_outputs_handlers.cjs index ac92aab4d8..02bba1a83d 100644 --- a/actions/setup/js/safe_outputs_handlers.cjs +++ b/actions/setup/js/safe_outputs_handlers.cjs @@ -628,7 +628,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { } // prettier-ignore - server.debug(`Patch generated successfully: ${patchResult.patchPath} (${patchResult.patchSize} bytes, ${patchResult.patchLines} lines)`); + server.debug(`Patch generated successfully: ${patchResult.patchPath} (${patchResult.patchSize} bytes, ${patchResult.patchLines} lines, diffSize=${patchResult.diffSize ?? "(n/a)"} bytes)`); // Store the patch path in the entry so consumers know which file to use entry.patch_path = patchResult.patchPath; @@ -638,6 +638,16 @@ function createHandlers(server, appendSafeOutput, config = {}) { entry.base_commit = patchResult.baseCommit; } + // Store the incremental net diff size so push_to_pull_request_branch can + // validate `max_patch_size` against the actual incremental change relative + // to the existing PR branch head, not the (potentially much larger) size of + // the format-patch transport file. This is critical for the long-running + // branch pattern (e.g. autoloop) where the format-patch can include many + // commits but each iteration only changes a few KB. + if (typeof patchResult.diffSize === "number" && patchResult.diffSize >= 0) { + entry.diff_size = patchResult.diffSize; + } + appendSafeOutput(entry); return { content: [ diff --git a/actions/setup/setup.sh b/actions/setup/setup.sh index ae7899627b..48fd54cbb9 100755 --- a/actions/setup/setup.sh +++ b/actions/setup/setup.sh @@ -304,6 +304,7 @@ SAFE_OUTPUTS_FILES=( "estimate_tokens.cjs" "generate_git_patch.cjs" "generate_git_bundle.cjs" + "git_patch_utils.cjs" "get_base_branch.cjs" "get_current_branch.cjs" "normalize_branch_name.cjs"