Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .changeset/patch-push-pr-incremental-size-check.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

112 changes: 51 additions & 61 deletions actions/setup/js/generate_git_patch.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ const path = require("path");
const { getErrorMessage } = require("./error_helpers.cjs");
const { execGitSync, getGitAuthEnv } = require("./git_helpers.cjs");
const { ERR_SYSTEM } = require("./error_codes.cjs");
const { sanitizeForFilename, sanitizeBranchNameForPatch, sanitizeRepoSlugForPatch, getPatchPath, getPatchPathForRepo, buildExcludePathspecs, computeIncrementalDiffSize } = require("./git_patch_utils.cjs");

// sanitizeForFilename is re-exported below for backward compatibility with
// existing callers that imported it from this module.
void sanitizeForFilename;

/**
* Debug logging helper - logs to stderr when DEBUG env var matches
Expand All @@ -24,63 +29,6 @@ function debugLog(message) {
}
}

/**
* Sanitize a string for use as a patch filename component.
* Replaces path separators and special characters with dashes.
* @param {string} value - The value to sanitize
* @param {string} fallback - Fallback value when input is empty or nullish
* @returns {string} The sanitized string safe for use in a filename
*/
function sanitizeForFilename(value, fallback) {
if (!value) return fallback;
return value
.replace(/[/\\:*?"<>|]/g, "-")
.replace(/-{2,}/g, "-")
.replace(/^-|-$/g, "")
.toLowerCase();
}

/**
* Sanitize a branch name for use as a patch filename
* @param {string} branchName - The branch name to sanitize
* @returns {string} The sanitized branch name safe for use in a filename
*/
function sanitizeBranchNameForPatch(branchName) {
return sanitizeForFilename(branchName, "unknown");
}

/**
* Get the patch file path for a given branch name
* @param {string} branchName - The branch name
* @returns {string} The full patch file path
*/
function getPatchPath(branchName) {
const sanitized = sanitizeBranchNameForPatch(branchName);
return `/tmp/gh-aw/aw-${sanitized}.patch`;
}

/**
* Sanitize a repo slug for use in a filename
* @param {string} repoSlug - The repo slug (owner/repo)
* @returns {string} The sanitized slug safe for use in a filename
*/
function sanitizeRepoSlugForPatch(repoSlug) {
return sanitizeForFilename(repoSlug, "");
}

/**
* Get the patch file path for a given branch name and repo slug
* Used for multi-repo scenarios to prevent patch file collisions
* @param {string} branchName - The branch name
* @param {string} repoSlug - The repository slug (owner/repo)
* @returns {string} The full patch file path including repo disambiguation
*/
function getPatchPathForRepo(branchName, repoSlug) {
const sanitizedBranch = sanitizeBranchNameForPatch(branchName);
const sanitizedRepo = sanitizeRepoSlugForPatch(repoSlug);
return `/tmp/gh-aw/aw-${sanitizedRepo}-${sanitizedBranch}.patch`;
}

/**
* Generates a git patch file for the current changes
* @param {string} branchName - The branch name to generate patch for
Expand Down Expand Up @@ -111,15 +59,14 @@ async function generateGitPatch(branchName, baseBranch, options = {}) {
// These are appended after "--" so git treats them as pathspecs, not revisions.
// Using git's native pathspec magic keeps the exclusions out of the patch entirely
// without any post-processing of the generated patch file.
const excludePathspecs = Array.isArray(options.excludedFiles) && options.excludedFiles.length > 0 ? options.excludedFiles.map(p => `:(exclude)${p}`) : [];
const excludeArgsArr = buildExcludePathspecs(options.excludedFiles);

/**
* Returns the arguments to append to a format-patch call when excludedFiles is set.
* Produces ["--", ":(exclude)pattern1", ":(exclude)pattern2", ...] or [].
* @returns {string[]}
*/
function excludeArgs() {
return excludePathspecs.length > 0 ? ["--", ...excludePathspecs] : [];
return excludeArgsArr;
}
const patchPath = options.repoSlug ? getPatchPathForRepo(branchName, options.repoSlug) : getPatchPath(branchName);

Expand Down Expand Up @@ -294,6 +241,25 @@ async function generateGitPatch(branchName, baseBranch, options = {}) {
patchLines: 0,
};
}

// In incremental mode, the patch must be measured relative to the existing
// PR branch head (origin/<branch>), never relative to the default branch.
// If Strategy 1 did not produce a patch (e.g. format-patch yielded empty
// output for an unusual commit shape — excluded-files filtering away every
// change, or binary-only commits with unusual encoding), do NOT fall
// through to Strategy 2 or Strategy 3 — those use GITHUB_SHA..HEAD or
// merge-base with a remote ref and would produce a checkout-base diff
// (which can be many MB on a long-running branch). Returning an explicit
// error preserves the "incremental" contract that the patch reflects only
// the new commits.
if (!patchGenerated && mode === "incremental") {
debugLog(`Strategy 1 (incremental): format-patch produced no output for ${baseRef}..${branchName} despite ${commitCount} incremental commit(s), refusing to fall through to checkout-base strategies`);
return {
success: false,
error: `Cannot generate incremental patch: git format-patch produced no output for ${baseRef}..${branchName} despite ${commitCount} incremental commit(s).`,
patchPath: patchPath,
};
}
} catch (branchError) {
// Branch does not exist locally
debugLog(`Strategy 1: Branch '${branchName}' does not exist locally - ${getErrorMessage(branchError)}`);
Expand Down Expand Up @@ -450,12 +416,36 @@ async function generateGitPatch(branchName, baseBranch, options = {}) {
};
}

debugLog(`Final: SUCCESS - patchSize=${patchSize} bytes, patchLines=${patchLines}, baseCommit=${baseCommitSha || "(unknown)"}`);
// In incremental mode, also compute the net diff size between baseRef and the
// branch tip. The format-patch file size (patchSize) is the sum of every
// commit's individual diff plus per-commit metadata headers, which can be
// significantly larger than the actual net change. Consumers (e.g.
// push_to_pull_request_branch) should validate `max_patch_size` against the
// incremental net diff so the limit reflects how much the branch will
// actually change, not the cumulative size of the commit history.
//
// The measurement itself (stream to temp file via `git diff --output`, stat,
// cleanup) is extracted into git_patch_utils.computeIncrementalDiffSize so
// it is O(1) memory and independently unit-testable against a real repo.
let diffSize = null;
if (mode === "incremental" && baseCommitSha && branchName) {
diffSize = computeIncrementalDiffSize({
baseRef: baseCommitSha,
headRef: branchName,
cwd,
tmpPath: `${patchPath}.diff.tmp`,
excludedFiles: options.excludedFiles,
});
debugLog(`Final: diffSize=${diffSize ?? "(n/a)"} bytes (baseRef=${baseCommitSha}..${branchName})`);
}

debugLog(`Final: SUCCESS - patchSize=${patchSize} bytes, patchLines=${patchLines}, diffSize=${diffSize ?? "(n/a)"} bytes, baseCommit=${baseCommitSha || "(unknown)"}`);
return {
success: true,
patchPath: patchPath,
patchSize: patchSize,
patchLines: patchLines,
diffSize: diffSize,
baseCommit: baseCommitSha,
};
}
Expand Down
46 changes: 46 additions & 0 deletions actions/setup/js/git_patch_integration.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,52 @@ describe("git patch integration tests", () => {
}
});

it("should report diffSize as the net diff between origin/branch and HEAD in incremental mode", async () => {
// Reproduces the long-running branch scenario from the issue:
// - origin/<branch> already has accumulated history (e.g. many KB)
// - the agent makes a small new commit on top
// - the format-patch file size only reflects the *new* commit (because
// baseRef = origin/<branch>), but the returned diffSize must also be
// small and must NOT reflect the divergence from main.

// Create the long-running branch with a "large" accumulated payload.
execGit(["checkout", "-b", "long-running-branch"], { cwd: workingRepo });
const accumulated = "accumulated content line\n".repeat(2000); // ~50 KB
fs.writeFileSync(path.join(workingRepo, "accumulated.txt"), accumulated);
execGit(["add", "accumulated.txt"], { cwd: workingRepo });
execGit(["commit", "-m", "Accumulated work from previous iterations"], { cwd: workingRepo });
execGit(["push", "-u", "origin", "long-running-branch"], { cwd: workingRepo });

// Now the agent's "new iteration": a tiny incremental change.
fs.writeFileSync(path.join(workingRepo, "tiny.txt"), "tiny change\n");
execGit(["add", "tiny.txt"], { cwd: workingRepo });
execGit(["commit", "-m", "Tiny new iteration"], { cwd: workingRepo });

const origWorkspace = process.env.GITHUB_WORKSPACE;
const origDefaultBranch = process.env.DEFAULT_BRANCH;
process.env.GITHUB_WORKSPACE = workingRepo;
process.env.DEFAULT_BRANCH = "main";

try {
const result = await generateGitPatch("long-running-branch", "main", { mode: "incremental" });

expect(result.success).toBe(true);
expect(typeof result.diffSize).toBe("number");

// The incremental net diff is just the tiny.txt addition (well under 1 KB).
expect(result.diffSize).toBeGreaterThan(0);
expect(result.diffSize).toBeLessThan(1024);

// And the diffSize must NOT include the accumulated 50 KB payload that
// already exists on origin/long-running-branch — that is the entire
// point of the fix.
expect(result.diffSize).toBeLessThan(2000);
} finally {
process.env.GITHUB_WORKSPACE = origWorkspace;
process.env.DEFAULT_BRANCH = origDefaultBranch;
}
});

/**
* Sets GITHUB_WORKSPACE, DEFAULT_BRANCH, GITHUB_TOKEN, and GITHUB_SERVER_URL for
* a test, then restores the original values (or deletes them if they were unset).
Expand Down
162 changes: 162 additions & 0 deletions actions/setup/js/git_patch_utils.cjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
// @ts-check
/// <reference types="@actions/github-script" />

/**
* Utilities shared by git patch generation and validation code.
*
* This module intentionally has no side effects and no coupling to the
* patch-generation orchestration in generate_git_patch.cjs. Each helper is
* pure/stateless or performs a well-defined local filesystem operation, which
* keeps the surface small, easy to test against a real git repo, and reusable
* by other safe-output handlers (e.g. bundle transport, create_pull_request
* fallback paths).
*/

const fs = require("fs");

const { getErrorMessage } = require("./error_helpers.cjs");
const { execGitSync } = require("./git_helpers.cjs");

/**
* Debug logging helper - logs to stderr when DEBUG env var matches
* @param {string} message - Debug message to log
*/
function debugLog(message) {
const debug = process.env.DEBUG || "";
if (debug === "*" || debug.includes("generate_git_patch") || debug.includes("patch")) {
console.error(`[git_patch_utils] ${message}`);
}
}

/**
* Sanitize a string for use as a patch filename component.
* Replaces path separators and special characters with dashes.
* @param {string} value - The value to sanitize
* @param {string} fallback - Fallback value when input is empty or nullish
* @returns {string} The sanitized string safe for use in a filename
*/
function sanitizeForFilename(value, fallback) {
if (!value) return fallback;
return value
.replace(/[/\\:*?"<>|]/g, "-")
.replace(/-{2,}/g, "-")
.replace(/^-|-$/g, "")
.toLowerCase();
}

/**
* Sanitize a branch name for use as a patch filename
* @param {string} branchName - The branch name to sanitize
* @returns {string} The sanitized branch name safe for use in a filename
*/
function sanitizeBranchNameForPatch(branchName) {
return sanitizeForFilename(branchName, "unknown");
}

/**
* Sanitize a repo slug for use in a filename
* @param {string} repoSlug - The repo slug (owner/repo)
* @returns {string} The sanitized slug safe for use in a filename
*/
function sanitizeRepoSlugForPatch(repoSlug) {
return sanitizeForFilename(repoSlug, "");
}

/**
* Get the patch file path for a given branch name
* @param {string} branchName - The branch name
* @returns {string} The full patch file path
*/
function getPatchPath(branchName) {
const sanitized = sanitizeBranchNameForPatch(branchName);
return `/tmp/gh-aw/aw-${sanitized}.patch`;
}

/**
* Get the patch file path for a given branch name and repo slug
* Used for multi-repo scenarios to prevent patch file collisions
* @param {string} branchName - The branch name
* @param {string} repoSlug - The repository slug (owner/repo)
* @returns {string} The full patch file path including repo disambiguation
*/
function getPatchPathForRepo(branchName, repoSlug) {
const sanitizedBranch = sanitizeBranchNameForPatch(branchName);
const sanitizedRepo = sanitizeRepoSlugForPatch(repoSlug);
return `/tmp/gh-aw/aw-${sanitizedRepo}-${sanitizedBranch}.patch`;
}

/**
* Builds the pathspec arguments to exclude specific files from a git command.
* Produces ["--", ":(exclude)pattern1", ":(exclude)pattern2", ...] or [] when
* the input is empty/unset. These are passed after a "--" so git treats them
* as pathspecs, not revisions.
*
* @param {string[] | undefined | null} excludedFiles - Glob patterns to exclude
* @returns {string[]} Arguments to append to a git format-patch or git diff call
*/
function buildExcludePathspecs(excludedFiles) {
if (!Array.isArray(excludedFiles) || excludedFiles.length === 0) {
return [];
}
return ["--", ...excludedFiles.map(p => `:(exclude)${p}`)];
}

/**
* Compute the net diff size in bytes between two refs in the given git repo.
*
* This is the value that should be compared against `max_patch_size` in
* push_to_pull_request_branch: it reflects how much the PR branch will
* actually change as a result of the push, independent of how the patch or
* bundle transport encodes the commit history.
*
* Implementation note: we use `git diff --binary --output=<tmpfile>` rather
* than buffering the diff through execGitSync's stdout. That keeps memory
* usage O(1) regardless of the diff size (we just stat the file) and avoids
* hitting the execGitSync maxBuffer on large binary diffs. The temp file is
* removed in `finally` on success, failure, and stat failure alike.
*
* @param {Object} args - Arguments
* @param {string} args.baseRef - Base ref (commit SHA, branch, or ref)
* @param {string} args.headRef - Head ref (commit SHA, branch, or ref)
* @param {string} args.cwd - Working directory containing the git repo
* @param {string} args.tmpPath - Absolute path to the temp diff file (will be
* written and removed by this function)
* @param {string[]} [args.excludedFiles] - Glob patterns to exclude
* @returns {number | null} The net diff size in bytes, or null on failure
*/
function computeIncrementalDiffSize({ baseRef, headRef, cwd, tmpPath, excludedFiles }) {
if (!baseRef || !headRef || !cwd || !tmpPath) {
return null;
}
const excludeArgs = buildExcludePathspecs(excludedFiles);
let diffSize = null;
try {
execGitSync(["diff", "--binary", `--output=${tmpPath}`, `${baseRef}..${headRef}`, ...excludeArgs], { cwd });
if (fs.existsSync(tmpPath)) {
diffSize = fs.statSync(tmpPath).size;
debugLog(`Computed incremental net diffSize=${diffSize} bytes (baseRef=${baseRef}..${headRef})`);
}
} catch (diffErr) {
debugLog(`Failed to compute incremental net diffSize - ${getErrorMessage(diffErr)}`);
} finally {
// Best-effort cleanup of the temp diff file; we only needed its size.
try {
if (fs.existsSync(tmpPath)) {
fs.unlinkSync(tmpPath);
}
} catch {
// Cleanup failure is non-fatal.
}
}
return diffSize;
}

module.exports = {
sanitizeForFilename,
sanitizeBranchNameForPatch,
sanitizeRepoSlugForPatch,
getPatchPath,
getPatchPathForRepo,
buildExcludePathspecs,
computeIncrementalDiffSize,
};
Loading
Loading