From 3caf5d7226045f7cdf23b275294dd3b1a086c9a6 Mon Sep 17 00:00:00 2001 From: James Devine Date: Sat, 11 Apr 2026 17:14:04 +0100 Subject: [PATCH 01/28] feat(create-pr): align with gh-aw create-pull-request implementation Closes the gap between ado-aw and gh-aw's create-pull-request safe output: Security/Correctness: - Add file protection system blocking manifests (package.json, go.mod, Cargo.toml, etc.), CI configs (.github/, .pipelines/), and agent instruction files (.agents/, .claude/, .copilot/) by default - Add max-files limit per PR (default: 100, configurable) - Add 3-way merge fallback when patch application fails on conflicts - Replace predictable timestamp-based branch suffix with cryptographic random hex (rand crate) Feature Parity: - Add draft PR support (default: true, operator-enforced via isDraft) - Add fallback-as-work-item: record branch info on PR creation failure - Add excluded-files glob config to filter files from patches - Add if-no-changes config (warn/error/ignore) for empty patches - Add allowed-labels allowlist to restrict agent-provided labels - Add title-prefix config for operator branding - Add agent-provided labels parameter validated against allowed-labels Patch Format: - Migrate from raw git diff to git format-patch for proper commit metadata, rename detection, and binary file handling - Stage 2 applies patches via git am --3way with git apply fallback - Add collect_changes_from_diff_tree for committed patch changes - Add git to default bash command allowlist so agents can commit - Update tool description to encourage staging commits before PR creation Other: - Add provenance footer to PR body with timestamp and compiler version - Add ExecutionResult::failure_with_data for structured error responses Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Cargo.lock | 52 ++- Cargo.toml | 2 + src/compile/common.rs | 4 +- src/mcp.rs | 139 +++---- src/tools/create_pr.rs | 842 +++++++++++++++++++++++++++++++++++++---- src/tools/result.rs | 9 + 6 files changed, 898 insertions(+), 150 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7220c88..572b5e0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,9 +13,11 @@ dependencies = [ "clap", "dirs", "env_logger", + "glob-match", "inquire", "log", "percent-encoding", + "rand 0.10.1", "regex-lite", "reqwest", "rmcp", @@ -219,6 +221,17 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "chacha20" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" +dependencies = [ + "cfg-if", + "cpufeatures", + "rand_core 0.10.0", +] + [[package]] name = "chrono" version = "0.4.43" @@ -304,6 +317,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + [[package]] name = "crossterm" version = "0.29.0" @@ -661,10 +683,17 @@ dependencies = [ "cfg-if", "libc", "r-efi 6.0.0", + "rand_core 0.10.0", "wasip2", "wasip3", ] +[[package]] +name = "glob-match" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985c9503b412198aa4197559e9a318524ebc4519c229bfa05a535828c950b9d" + [[package]] name = "h2" version = "0.4.13" @@ -1364,7 +1393,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha", - "rand_core", + "rand_core 0.9.5", +] + +[[package]] +name = "rand" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207" +dependencies = [ + "chacha20", + "getrandom 0.4.2", + "rand_core 0.10.0", ] [[package]] @@ -1374,7 +1414,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.9.5", ] [[package]] @@ -1386,6 +1426,12 @@ dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "rand_core" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c8d0fd677905edcbeedbf2edb6494d676f0e98d54d5cf9bda0b061cb8fb8aba" + [[package]] name = "redox_syscall" version = "0.5.18" @@ -1532,7 +1578,7 @@ dependencies = [ "http-body-util", "paste", "pin-project-lite", - "rand", + "rand 0.9.2", "rmcp-macros", "schemars", "serde", diff --git a/Cargo.toml b/Cargo.toml index 6d12966..c2ebcd1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,8 @@ terminal_size = "0.4.3" url = "2.5.8" axum = { version = "0.8.8", features = ["tokio"] } subtle = "2.6.1" +rand = "0.10.1" +glob-match = "0.2.1" [dev-dependencies] reqwest = { version = "0.12", features = ["blocking"] } diff --git a/src/compile/common.rs b/src/compile/common.rs index a75ae6b..23f0b48 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -261,9 +261,9 @@ pub fn validate_checkout_list(repositories: &[Repository], checkout: &[String]) Ok(()) } -/// Default bash commands allowed for agents (matches gh-aw defaults + yq) +/// Default bash commands allowed for agents (matches gh-aw defaults + yq + git) const DEFAULT_BASH_COMMANDS: &[&str] = &[ - "cat", "date", "echo", "grep", "head", "ls", "pwd", "sort", "tail", "uniq", "wc", "yq", + "cat", "date", "echo", "git", "grep", "head", "ls", "pwd", "sort", "tail", "uniq", "wc", "yq", ]; /// Generate copilot CLI params from front matter configuration diff --git a/src/mcp.rs b/src/mcp.rs index 8384301..fd6aed0 100644 --- a/src/mcp.rs +++ b/src/mcp.rs @@ -52,15 +52,11 @@ fn slugify_title(title: &str) -> String { collapsed.chars().take(50).collect() } -/// Generate a short random suffix for branch uniqueness +/// Generate a short cryptographically random suffix for branch uniqueness fn generate_short_id() -> String { - use std::time::{SystemTime, UNIX_EPOCH}; - let timestamp = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_millis(); - // Take last 6 hex digits of timestamp for short unique suffix - format!("{:06x}", (timestamp & 0xFFFFFF) as u32) + use rand::RngExt; + let value: u32 = rand::rng().random(); + format!("{:08x}", value) } // ============================================================================ @@ -198,79 +194,80 @@ impl SafeOutputs { } }; - // Run git diff against the target branch to capture all changes - // Try origin/main first (remote tracking), then main, then HEAD as fallback - let diff_targets = ["origin/main", "main", "HEAD"]; - let mut last_error = String::new(); - let mut diff_output = None; - - for target in &diff_targets { - let output = Command::new("git") - .args(["diff", target]) - .current_dir(&git_dir) - .output() - .await - .map_err(|e| { - anyhow_to_mcp_error(anyhow::anyhow!("Failed to run git diff: {}", e)) - })?; + // Generate patch using git format-patch for proper commit metadata, + // rename detection, and binary file handling. + // + // Steps: + // 1. Stage all changes (tracked + untracked) + // 2. Create a temporary commit + // 3. Generate format-patch output + // 4. Reset to undo the temporary commit (preserving working tree) + + // Stage all changes including untracked files + let add_output = Command::new("git") + .args(["add", "-A"]) + .current_dir(&git_dir) + .output() + .await + .map_err(|e| { + anyhow_to_mcp_error(anyhow::anyhow!("Failed to run git add -A: {}", e)) + })?; - if output.status.success() { - diff_output = Some(output); - break; - } - last_error = String::from_utf8_lossy(&output.stderr).to_string(); + if !add_output.status.success() { + return Err(anyhow_to_mcp_error(anyhow::anyhow!( + "git add -A failed: {}", + String::from_utf8_lossy(&add_output.stderr) + ))); } - let mut patch = if let Some(output) = diff_output { - String::from_utf8_lossy(&output.stdout).to_string() - } else { + // Create a temporary commit with the staged changes + let commit_output = Command::new("git") + .args(["commit", "-m", "agent changes", "--allow-empty", "--no-verify"]) + .current_dir(&git_dir) + .output() + .await + .map_err(|e| { + anyhow_to_mcp_error(anyhow::anyhow!("Failed to create temporary commit: {}", e)) + })?; + + if !commit_output.status.success() { + // Reset staging on failure + let _ = Command::new("git") + .args(["reset", "HEAD", "--quiet"]) + .current_dir(&git_dir) + .output() + .await; return Err(anyhow_to_mcp_error(anyhow::anyhow!( - "git diff failed against all targets (origin/main, main, HEAD): {}", - last_error + "Failed to create temporary commit: {}", + String::from_utf8_lossy(&commit_output.stderr) ))); - }; + } - // Also include untracked files that have been added - let status_output = Command::new("git") - .args(["status", "--porcelain"]) + // Generate format-patch for the last commit (the temporary one) + let format_patch_output = Command::new("git") + .args(["format-patch", "-1", "--stdout", "-M"]) .current_dir(&git_dir) .output() .await - .map_err(|e| anyhow_to_mcp_error(anyhow::anyhow!("Failed to run git status: {}", e)))?; + .map_err(|e| { + anyhow_to_mcp_error(anyhow::anyhow!("Failed to run git format-patch: {}", e)) + })?; - if !status_output.status.success() { + // Undo the temporary commit but keep changes in working tree + let _ = Command::new("git") + .args(["reset", "HEAD~1", "--mixed", "--quiet"]) + .current_dir(&git_dir) + .output() + .await; + + if !format_patch_output.status.success() { return Err(anyhow_to_mcp_error(anyhow::anyhow!( - "git status failed: {}", - String::from_utf8_lossy(&status_output.stderr) + "git format-patch failed: {}", + String::from_utf8_lossy(&format_patch_output.stderr) ))); } - let status = String::from_utf8_lossy(&status_output.stdout); - for line in status.lines() { - if line.starts_with("?? ") { - // Untracked file - generate a diff for it - let file_path = line[3..].trim(); - let file_full_path = git_dir.join(file_path); - - if file_full_path.is_file() { - if let Ok(content) = tokio::fs::read_to_string(&file_full_path).await { - patch.push_str(&format!("diff --git a/{} b/{}\n", file_path, file_path)); - patch.push_str("new file mode 100644\n"); - patch.push_str("--- /dev/null\n"); - patch.push_str(&format!("+++ b/{}\n", file_path)); - - let line_count = content.lines().count(); - patch.push_str(&format!("@@ -0,0 +1,{} @@\n", line_count)); - - for line in content.lines() { - patch.push('+'); - patch.push_str(line); - patch.push('\n'); - } - } - } - } - } + let patch = String::from_utf8_lossy(&format_patch_output.stdout).to_string(); Ok(patch) } @@ -407,7 +404,10 @@ fields you want to update." #[tool( name = "create-pull-request", - description = "Create a new pull request to propose code changes. Use this after making file edits to submit them for review and merging. The PR will be created from the current branch with your committed changes. Use 'self' for the pipeline's own repository, or a repository alias from the checkout list." + description = "Create a new pull request to propose code changes. Before calling this tool, \ +stage and commit your changes using git add and git commit — each logical change should be its \ +own commit with a descriptive message. The PR will be created from your committed changes. \ +Use 'self' for the pipeline's own repository, or a repository alias from the checkout list." )] async fn create_pr( &self, @@ -464,6 +464,7 @@ fields you want to update." source_branch, patch_filename, repository.to_string(), + sanitized.labels, ); // Write to safe outputs @@ -1035,7 +1036,7 @@ mod tests { #[test] fn test_generate_short_id_format() { let id = generate_short_id(); - assert_eq!(id.len(), 6); + assert_eq!(id.len(), 8); assert!(id.chars().all(|c| c.is_ascii_hexdigit())); } diff --git a/src/tools/create_pr.rs b/src/tools/create_pr.rs index e42960c..c92bf73 100644 --- a/src/tools/create_pr.rs +++ b/src/tools/create_pr.rs @@ -12,6 +12,63 @@ use anyhow::{Context, ensure}; /// Maximum allowed patch file size (5 MB) const MAX_PATCH_SIZE_BYTES: u64 = 5 * 1024 * 1024; +/// Default maximum files allowed in a single PR +const DEFAULT_MAX_FILES: usize = 100; + +/// Runtime manifest files that are protected by default. +/// These are dependency/build files that could be modified to introduce supply chain attacks. +const PROTECTED_MANIFEST_BASENAMES: &[&str] = &[ + // npm / Node.js + "package.json", + "package-lock.json", + "yarn.lock", + "pnpm-lock.yaml", + "npm-shrinkwrap.json", + // Go + "go.mod", + "go.sum", + // Python + "requirements.txt", + "Pipfile", + "Pipfile.lock", + "pyproject.toml", + "setup.py", + "setup.cfg", + "poetry.lock", + // Ruby + "Gemfile", + "Gemfile.lock", + // Java / Kotlin / Gradle + "pom.xml", + "build.gradle", + "build.gradle.kts", + "settings.gradle", + "settings.gradle.kts", + "gradle.properties", + // .NET / C# + "Directory.Build.props", + "Directory.Build.targets", + "global.json", + // Rust + "Cargo.toml", + "Cargo.lock", +]; + +/// Path prefixes that are protected by default. +/// Files under these paths are blocked unless protected-files is set to "allowed". +const PROTECTED_PATH_PREFIXES: &[&str] = &[ + ".github/", + ".pipelines/", + ".azure-pipelines/", + ".agents/", + ".claude/", + ".codex/", + ".copilot/", +]; + +/// Exact filenames (at repo root) that are protected by default. +const PROTECTED_EXACT_PATHS: &[&str] = &["CODEOWNERS"]; + /// Resolve a reviewer identifier (email, display name, or ID) to an Azure DevOps identity ID. /// /// If the input is already a GUID, returns it directly. Otherwise, uses the Azure DevOps @@ -149,6 +206,11 @@ pub struct CreatePrParams { /// Required when multiple repositories are checked out. #[serde(default)] pub repository: Option, + + /// Labels to add to the PR for categorization. + /// These may be subject to an operator-configured allowlist. + #[serde(default)] + pub labels: Vec, } impl Validate for CreatePrParams { @@ -184,6 +246,9 @@ pub struct CreatePrResult { pub patch_file: String, /// Repository alias ("self" or alias from checkout list) pub repository: String, + /// Agent-provided labels (validated against allowed-labels at execution time) + #[serde(default)] + pub agent_labels: Vec, } impl crate::tools::ToolResult for CreatePrResult { @@ -205,6 +270,7 @@ impl CreatePrResult { source_branch: String, patch_file: String, repository: String, + agent_labels: Vec, ) -> Self { Self { name: Self::NAME.to_string(), @@ -213,6 +279,7 @@ impl CreatePrResult { source_branch, patch_file, repository, + agent_labels, } } } @@ -222,10 +289,20 @@ impl CreatePrResult { /// Example front matter: /// ```yaml /// safe-outputs: -/// create_pr: -/// auto_complete: true -/// delete_source_branch: true -/// squash_merge: true +/// create-pull-request: +/// target-branch: main +/// draft: true +/// auto-complete: true +/// delete-source-branch: true +/// squash-merge: true +/// title-prefix: "[Bot] " +/// if-no-changes: warn +/// max-files: 100 +/// protected-files: blocked +/// excluded-files: +/// - "*.lock" +/// allowed-labels: +/// - "automated" /// reviewers: /// - "user@example.com" /// labels: @@ -238,6 +315,10 @@ pub struct CreatePrConfig { #[serde(default = "default_target_branch", rename = "target-branch")] pub target_branch: String, + /// Whether to create the PR as a draft (default: true) + #[serde(default = "default_true")] + pub draft: bool, + /// Whether to set auto-complete on the PR (default: false) #[serde(default, rename = "auto-complete")] pub auto_complete: bool, @@ -250,6 +331,32 @@ pub struct CreatePrConfig { #[serde(default = "default_true", rename = "squash-merge")] pub squash_merge: bool, + /// Prefix to prepend to all PR titles + #[serde(default, rename = "title-prefix")] + pub title_prefix: Option, + + /// Behavior when the patch is empty: "warn" (default), "error", "ignore" + #[serde(default = "default_if_no_changes", rename = "if-no-changes")] + pub if_no_changes: String, + + /// Maximum number of files allowed in a single PR (default: 100) + #[serde(default = "default_max_files", rename = "max-files")] + pub max_files: usize, + + /// File protection policy: "blocked" (default) or "allowed" + /// Controls whether manifest/CI files can be modified + #[serde(default = "default_protected_files", rename = "protected-files")] + pub protected_files: String, + + /// Glob patterns for files to exclude from the patch + #[serde(default, rename = "excluded-files")] + pub excluded_files: Vec, + + /// Allowlist of labels the agent is permitted to use. + /// If empty, any labels are accepted. + #[serde(default, rename = "allowed-labels")] + pub allowed_labels: Vec, + /// Reviewers to add to the PR (email addresses or user IDs) #[serde(default)] pub reviewers: Vec, @@ -261,6 +368,10 @@ pub struct CreatePrConfig { /// Work item IDs to link to the PR #[serde(default, rename = "work-items")] pub work_items: Vec, + + /// Whether to create a fallback work item on PR failure (default: true) + #[serde(default = "default_true", rename = "fallback-as-work-item")] + pub fallback_as_work_item: bool, } fn default_target_branch() -> String { @@ -271,16 +382,36 @@ fn default_true() -> bool { true } +fn default_if_no_changes() -> String { + "warn".to_string() +} + +fn default_max_files() -> usize { + DEFAULT_MAX_FILES +} + +fn default_protected_files() -> String { + "blocked".to_string() +} + impl Default for CreatePrConfig { fn default() -> Self { Self { target_branch: default_target_branch(), + draft: true, auto_complete: false, delete_source_branch: true, squash_merge: true, + title_prefix: None, + if_no_changes: default_if_no_changes(), + max_files: default_max_files(), + protected_files: default_protected_files(), + excluded_files: Vec::new(), + allowed_labels: Vec::new(), reviewers: Vec::new(), labels: Vec::new(), work_items: Vec::new(), + fallback_as_work_item: true, } } } @@ -319,9 +450,17 @@ impl Executor for CreatePrResult { let config: CreatePrConfig = ctx.get_tool_config("create-pull-request"); debug!("Target branch from config: {}", config.target_branch); + debug!("Draft: {}", config.draft); debug!("Auto-complete: {}", config.auto_complete); debug!("Squash merge: {}", config.squash_merge); + // Apply title prefix if configured + let effective_title = if let Some(ref prefix) = config.title_prefix { + format!("{}{}", prefix, self.title) + } else { + self.title.clone() + }; + // Validate repository against allowed list debug!( "Validating repository '{}' against allowed list", @@ -419,6 +558,40 @@ impl Executor for CreatePrResult { .context("Failed to read patch file")?; debug!("Patch content size: {} bytes", patch_content.len()); + // Filter excluded files from patch content if configured + let patch_content = if !config.excluded_files.is_empty() { + debug!("Filtering {} excluded-files patterns from patch", config.excluded_files.len()); + let filtered = filter_excluded_files_from_patch(&patch_content, &config.excluded_files); + debug!("Patch size after exclusion: {} bytes (was {} bytes)", filtered.len(), patch_content.len()); + if filtered.trim().is_empty() { + // All files were excluded + match config.if_no_changes.as_str() { + "error" => { + return Ok(ExecutionResult::failure( + "All files in patch were excluded by excluded-files patterns".to_string(), + )); + } + "ignore" => { + return Ok(ExecutionResult::success( + "All files in patch were excluded — nothing to do".to_string(), + )); + } + _ => { + return Ok(ExecutionResult::failure( + "All files in patch were excluded by excluded-files patterns".to_string(), + )); + } + } + } + // Rewrite the filtered patch to the patch file + tokio::fs::write(&patch_path, &filtered) + .await + .context("Failed to write filtered patch file")?; + filtered + } else { + patch_content + }; + // Security: Validate patch paths before applying debug!("Validating patch paths for security"); if let Err(e) = validate_patch_paths(&patch_content) { @@ -430,6 +603,39 @@ impl Executor for CreatePrResult { } debug!("Patch path validation passed"); + // Extract file paths from patch for validation + let patch_paths = extract_paths_from_patch(&patch_content); + + // Security: File protection check + if config.protected_files != "allowed" { + let protected = find_protected_files(&patch_paths); + if !protected.is_empty() { + warn!( + "Patch modifies {} protected file(s): {:?}", + protected.len(), + protected + ); + return Ok(ExecutionResult::failure(format!( + "Patch modifies protected files (set protected-files: allowed to override): {}", + protected.join(", ") + ))); + } + } + + // Security: Max files per PR check + if patch_paths.len() > config.max_files { + warn!( + "Patch contains {} files, exceeding max of {}", + patch_paths.len(), + config.max_files + ); + return Ok(ExecutionResult::failure(format!( + "Patch contains {} files, exceeding maximum of {} files per PR", + patch_paths.len(), + config.max_files + ))); + } + // Use target branch from config let target_branch = &config.target_branch; let source_ref = format!("refs/heads/{}", self.source_branch); @@ -539,79 +745,128 @@ impl Executor for CreatePrResult { } debug!("Source branch created"); - // Security: Validate patch with git apply --check first (dry run) - debug!("Running git apply --check (dry run)"); - let check_output = Command::new("git") - .args(["apply", "--check", &patch_path.to_string_lossy()]) + // Apply the format-patch using git am --3way for proper conflict handling. + // git am handles the email-style patch format from git format-patch and + // --3way enables three-way merge for better conflict resolution. + debug!("Applying patch with git am --3way"); + let am_output = Command::new("git") + .args(["am", "--3way", &patch_path.to_string_lossy()]) .current_dir(&worktree_path) .output() .await - .context("Failed to run git apply --check")?; + .context("Failed to run git am")?; - if !check_output.status.success() { - warn!( - "Patch dry-run failed: {}", - String::from_utf8_lossy(&check_output.stderr) - ); - return Ok(ExecutionResult::failure(format!( - "Patch validation failed (git apply --check): {}", - String::from_utf8_lossy(&check_output.stderr) - ))); - } - debug!("Patch dry-run passed"); + // Track whether git am created a commit (affects how we collect changes) + let patch_committed = am_output.status.success(); - // Apply the patch - debug!("Applying patch"); - let apply_output = Command::new("git") - .args(["apply", &patch_path.to_string_lossy()]) - .current_dir(&worktree_path) - .output() - .await - .context("Failed to run git apply")?; + if !patch_committed { + let stderr = String::from_utf8_lossy(&am_output.stderr); + debug!("git am --3way failed: {}", stderr); - if !apply_output.status.success() { - warn!( - "Failed to apply patch: {}", - String::from_utf8_lossy(&apply_output.stderr) - ); - return Ok(ExecutionResult::failure(format!( - "Failed to apply patch: {}", - String::from_utf8_lossy(&apply_output.stderr) - ))); + // Abort the failed am to leave worktree clean + let _ = Command::new("git") + .args(["am", "--abort"]) + .current_dir(&worktree_path) + .output() + .await; + + // Fallback: try git apply (handles plain diff format for backward compatibility) + debug!("Falling back to git apply --3way"); + let apply_output = Command::new("git") + .args(["apply", "--3way", &patch_path.to_string_lossy()]) + .current_dir(&worktree_path) + .output() + .await + .context("Failed to run git apply --3way")?; + + if !apply_output.status.success() { + let err_msg = format!( + "Patch could not be applied (conflicts): {}", + String::from_utf8_lossy(&apply_output.stderr) + ); + warn!("{}", err_msg); + return Ok(ExecutionResult::failure(err_msg)); + } + debug!("Patch applied with git apply --3way fallback"); + } else { + debug!("Patch applied successfully with git am"); } - debug!("Patch applied successfully"); - // Get list of changed files using git status + // Collect changed files. The method depends on how the patch was applied: + // - git am: changes are committed → use git diff-tree to compare with parent + // - git apply: changes are in working tree → use git status --porcelain debug!("Getting list of changed files"); - let status_output = Command::new("git") - .args(["status", "--porcelain"]) - .current_dir(&worktree_path) - .output() - .await - .context("Failed to run git status")?; + let (status_str, use_diff_tree) = if patch_committed { + let diff_tree_output = Command::new("git") + .args(["diff-tree", "--no-commit-id", "-r", "--name-status", "HEAD"]) + .current_dir(&worktree_path) + .output() + .await + .context("Failed to run git diff-tree")?; - if !status_output.status.success() { - warn!( - "Failed to get git status: {}", - String::from_utf8_lossy(&status_output.stderr) - ); - return Ok(ExecutionResult::failure(format!( - "Failed to get git status: {}", - String::from_utf8_lossy(&status_output.stderr) - ))); - } + if !diff_tree_output.status.success() { + warn!( + "Failed to get diff-tree: {}", + String::from_utf8_lossy(&diff_tree_output.stderr) + ); + return Ok(ExecutionResult::failure(format!( + "Failed to get diff-tree: {}", + String::from_utf8_lossy(&diff_tree_output.stderr) + ))); + } + (String::from_utf8_lossy(&diff_tree_output.stdout).to_string(), true) + } else { + let status_output = Command::new("git") + .args(["status", "--porcelain"]) + .current_dir(&worktree_path) + .output() + .await + .context("Failed to run git status")?; - // Parse changed files and build ADO push payload - let status_str = String::from_utf8_lossy(&status_output.stdout); - debug!("Git status output:\n{}", status_str); - let changes = collect_changes_from_worktree(&worktree_path, &status_str).await?; + if !status_output.status.success() { + warn!( + "Failed to get git status: {}", + String::from_utf8_lossy(&status_output.stderr) + ); + return Ok(ExecutionResult::failure(format!( + "Failed to get git status: {}", + String::from_utf8_lossy(&status_output.stderr) + ))); + } + (String::from_utf8_lossy(&status_output.stdout).to_string(), false) + }; + + debug!("Change detection output:\n{}", status_str); + let changes = if use_diff_tree { + collect_changes_from_diff_tree(&worktree_path, &status_str).await? + } else { + collect_changes_from_worktree(&worktree_path, &status_str).await? + }; debug!("Collected {} file changes for push", changes.len()); if changes.is_empty() { - warn!("No changes detected after applying patch"); - return Ok(ExecutionResult::failure( - "No changes detected after applying patch".to_string(), - )); + // Handle no-changes based on config + match config.if_no_changes.as_str() { + "error" => { + warn!("No changes detected after applying patch (if-no-changes: error)"); + return Ok(ExecutionResult::failure( + "No changes detected after applying patch".to_string(), + )); + } + "ignore" => { + info!("No changes detected after applying patch (if-no-changes: ignore)"); + return Ok(ExecutionResult::success( + "No changes detected — nothing to do".to_string(), + )); + } + _ => { + // "warn" (default) + warn!("No changes detected after applying patch (if-no-changes: warn)"); + return Ok(ExecutionResult::failure( + "No changes detected after applying patch".to_string(), + )); + } + } } // Use ADO REST API to create branch and push changes @@ -671,7 +926,7 @@ impl Executor for CreatePrResult { "oldObjectId": "0000000000000000000000000000000000000000" }], "commits": [{ - "comment": self.title, + "comment": effective_title, "changes": changes, "parents": [base_commit] }] @@ -701,6 +956,9 @@ impl Executor for CreatePrResult { } debug!("Changes pushed successfully"); + // Append provenance footer to description + let description_with_footer = format!("{}{}", self.description, generate_pr_footer()); + // Create the pull request via REST API info!("Creating pull request"); let pr_url = format!( @@ -712,8 +970,9 @@ impl Executor for CreatePrResult { let mut pr_body = serde_json::json!({ "sourceRefName": source_ref, "targetRefName": target_ref, - "title": self.title, - "description": self.description, + "title": effective_title, + "description": description_with_footer, + "isDraft": config.draft, }); // Add work item links if configured @@ -728,12 +987,40 @@ impl Executor for CreatePrResult { ); } - // Add labels if configured - if !config.labels.is_empty() { - debug!("Adding {} labels", config.labels.len()); + // Validate and add labels (merge operator labels + validated agent labels) + let mut all_labels = config.labels.clone(); + + // Validate agent-provided labels against allowed-labels + if !self.agent_labels.is_empty() { + if !config.allowed_labels.is_empty() { + let disallowed: Vec<_> = self + .agent_labels + .iter() + .filter(|l| !config.allowed_labels.contains(l)) + .collect(); + if !disallowed.is_empty() { + warn!( + "Agent labels not in allowed-labels: {:?}", + disallowed + ); + return Ok(ExecutionResult::failure(format!( + "Agent-provided labels not in allowed-labels: {}", + disallowed.iter().map(|l| l.as_str()).collect::>().join(", ") + ))); + } + } + // Merge agent labels (dedup) + for label in &self.agent_labels { + if !all_labels.contains(label) { + all_labels.push(label.clone()); + } + } + } + + if !all_labels.is_empty() { + debug!("Adding {} labels", all_labels.len()); pr_body["labels"] = serde_json::json!( - config - .labels + all_labels .iter() .map(|l| serde_json::json!({"name": l})) .collect::>() @@ -752,6 +1039,41 @@ impl Executor for CreatePrResult { let status = pr_response.status(); let body = pr_response.text().await.unwrap_or_default(); warn!("Failed to create pull request: {} - {}", status, body); + + // Fallback: create a work item with branch reference if enabled + if config.fallback_as_work_item { + info!("PR creation failed, recording fallback info"); + let fallback_description = format!( + "## Pull Request Creation Failed\n\n\ + A pull request could not be created automatically.\n\n\ + **Branch:** `{}`\n\ + **Target:** `{}`\n\ + **Repository:** `{}`\n\n\ + **Error:** {} - {}\n\n\ + ### Original PR Description\n\n\ + {}\n\n\ + ---\n\ + *To create the PR manually, merge branch `{}` into `{}`.*", + self.source_branch, target_branch, self.repository, + status, body, + self.description, + self.source_branch, target_branch + ); + return Ok(ExecutionResult::failure_with_data( + format!( + "Failed to create pull request: {} - {}. Branch '{}' was pushed. Consider creating a work item to track the manual PR creation.", + status, body, self.source_branch, + ), + serde_json::json!({ + "fallback": "work-item", + "branch": self.source_branch, + "target_branch": target_branch, + "repository": self.repository, + "description": fallback_description + }), + )); + } + return Ok(ExecutionResult::failure(format!( "Failed to create pull request: {} - {}", status, body @@ -866,17 +1188,19 @@ impl Executor for CreatePrResult { } info!( - "PR #{} created successfully: {} -> {}", - pr_id, self.source_branch, target_branch + "PR #{} created successfully: {} -> {}{}", + pr_id, self.source_branch, target_branch, + if config.draft { " (draft)" } else { "" } ); Ok(ExecutionResult::success_with_data( - format!("Created pull request #{}: {}", pr_id, self.title), + format!("Created pull request #{}: {}", pr_id, effective_title), serde_json::json!({ "pull_request_id": pr_id, "url": pr_web_url, "source_branch": self.source_branch, - "target_branch": target_branch + "target_branch": target_branch, + "draft": config.draft }), )) } @@ -998,7 +1322,98 @@ async fn collect_changes_from_worktree( Ok(changes) } -/// Validate that patch file paths don't contain dangerous patterns +/// Collect file changes from a git diff-tree --name-status output. +/// +/// Used when git am has already committed the changes. Parses the output format: +/// `M\tpath`, `A\tpath`, `D\tpath`, `R100\told_path\tnew_path` +async fn collect_changes_from_diff_tree( + worktree_path: &std::path::Path, + diff_tree_output: &str, +) -> anyhow::Result> { + let mut changes = Vec::new(); + + for line in diff_tree_output.lines() { + let line = line.trim(); + if line.is_empty() { + continue; + } + + let parts: Vec<&str> = line.split('\t').collect(); + if parts.len() < 2 { + continue; + } + + let status_code = parts[0]; + let file_path = parts[1]; + + // Validate path for security + validate_single_path(file_path)?; + + let full_path = worktree_path.join(file_path); + + if status_code == "D" { + // Deleted file + changes.push(serde_json::json!({ + "changeType": "delete", + "item": { + "path": format!("/{}", file_path) + } + })); + } else if status_code == "A" { + // Added file + if full_path.is_file() { + let content = tokio::fs::read_to_string(&full_path) + .await + .with_context(|| format!("Failed to read new file: {}", file_path))?; + + changes.push(serde_json::json!({ + "changeType": "add", + "item": { + "path": format!("/{}", file_path) + }, + "newContent": { + "content": content, + "contentType": "rawtext" + } + })); + } + } else if status_code.starts_with('R') && parts.len() >= 3 { + // Renamed file: R100\told_path\tnew_path + let old_path = file_path; + let new_path = parts[2]; + validate_single_path(old_path)?; + validate_single_path(new_path)?; + + changes.push(serde_json::json!({ + "changeType": "rename", + "sourceServerItem": format!("/{}", old_path), + "item": { + "path": format!("/{}", new_path) + } + })); + } else { + // Modified or other — read current content + if full_path.is_file() { + let content = tokio::fs::read_to_string(&full_path) + .await + .with_context(|| format!("Failed to read modified file: {}", file_path))?; + + changes.push(serde_json::json!({ + "changeType": "edit", + "item": { + "path": format!("/{}", file_path) + }, + "newContent": { + "content": content, + "contentType": "rawtext" + } + })); + } + } + } + + Ok(changes) +} /// /// Security checks: /// - No path traversal (..) @@ -1078,6 +1493,165 @@ fn validate_single_path(path: &str) -> anyhow::Result<()> { Ok(()) } +/// Extract all file paths from a patch/diff content. +/// Returns deduplicated list of file paths referenced in diff headers. +fn extract_paths_from_patch(patch_content: &str) -> Vec { + let mut paths = Vec::new(); + for line in patch_content.lines() { + if line.starts_with("diff --git") { + let parts: Vec<&str> = line.split_whitespace().collect(); + for part in parts.iter().skip(2) { + let path = part.trim_start_matches("a/").trim_start_matches("b/"); + if !path.is_empty() { + paths.push(path.to_string()); + } + } + } else if line.starts_with("rename from ") || line.starts_with("rename to ") || + line.starts_with("copy from ") || line.starts_with("copy to ") { + if let Some(path) = line.split_whitespace().last() { + if !path.is_empty() { + paths.push(path.to_string()); + } + } + } + } + paths.sort(); + paths.dedup(); + paths +} + +/// Check if any file paths in the patch are protected. +/// +/// Protected files include: +/// - Runtime manifests (package.json, go.mod, Cargo.toml, etc.) +/// - CI/pipeline configurations (.github/, .pipelines/, etc.) +/// - Agent instruction files (.agents/, .claude/, .codex/, .copilot/) +/// - Access control files (CODEOWNERS) +/// +/// Returns a list of protected file paths found, or empty vec if none. +fn find_protected_files(paths: &[String]) -> Vec { + let mut protected = Vec::new(); + for path in paths { + let lower_path = path.to_lowercase(); + + // Check basename against manifest list + let basename = path.rsplit('/').next().unwrap_or(path); + let lower_basename = basename.to_lowercase(); + for manifest in PROTECTED_MANIFEST_BASENAMES { + if lower_basename == *manifest { + protected.push(path.clone()); + break; + } + } + + // Check path prefixes + for prefix in PROTECTED_PATH_PREFIXES { + if lower_path.starts_with(prefix) || lower_path.starts_with(&prefix.replace('/', "\\")) { + if !protected.contains(path) { + protected.push(path.clone()); + } + break; + } + } + + // Check exact paths + for exact in PROTECTED_EXACT_PATHS { + if lower_path == exact.to_lowercase() { + if !protected.contains(path) { + protected.push(path.clone()); + } + break; + } + } + } + protected +} + +/// Check if file paths match an allowed-files glob allowlist. +/// Returns list of paths that do NOT match any allowed pattern. +#[cfg(test)] +fn find_disallowed_files(paths: &[String], allowed_patterns: &[String]) -> Vec { + if allowed_patterns.is_empty() { + return Vec::new(); // No allowlist = all files allowed + } + paths + .iter() + .filter(|path| { + !allowed_patterns + .iter() + .any(|pattern| glob_match::glob_match(pattern, path)) + }) + .cloned() + .collect() +} + +/// Generate a provenance footer for the PR body +fn generate_pr_footer() -> String { + let timestamp = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%SZ"); + format!( + "\n\n---\n\ + > 🤖 *This pull request was created by an automated agent.*\n\ + > Generated at: {}\n\ + > Compiler: ado-aw v{}", + timestamp, + env!("CARGO_PKG_VERSION") + ) +} + +/// Filter out diff entries for files matching excluded-files glob patterns. +/// Splits the patch into per-file diff blocks and removes those matching any pattern. +fn filter_excluded_files_from_patch(patch_content: &str, excluded_patterns: &[String]) -> String { + if excluded_patterns.is_empty() { + return patch_content.to_string(); + } + + let mut result = String::with_capacity(patch_content.len()); + let mut current_block = String::new(); + let mut current_path: Option = None; + + for line in patch_content.lines() { + if line.starts_with("diff --git") { + // Flush previous block if it's not excluded + if let Some(ref path) = current_path { + if !excluded_patterns.iter().any(|p| glob_match::glob_match(p, path)) { + result.push_str(¤t_block); + } else { + debug!("Excluding file from patch: {}", path); + } + } else if !current_block.is_empty() { + result.push_str(¤t_block); + } + + // Start new block + current_block = String::new(); + current_block.push_str(line); + current_block.push('\n'); + + // Extract path from "diff --git a/path b/path" + let parts: Vec<&str> = line.split_whitespace().collect(); + current_path = parts.get(3).map(|p| { + p.trim_start_matches("b/").to_string() + }); + } else { + current_block.push_str(line); + current_block.push('\n'); + } + } + + // Flush last block + if let Some(ref path) = current_path { + if !excluded_patterns.iter().any(|p| glob_match::glob_match(p, path)) { + result.push_str(¤t_block); + } else { + debug!("Excluding file from patch: {}", path); + } + } else if !current_block.is_empty() { + result.push_str(¤t_block); + } + + result +} + #[cfg(test)] mod tests { use super::*; @@ -1088,6 +1662,7 @@ mod tests { title: "Fix bug in parser".to_string(), description: "This PR fixes a critical bug in the parser module.".to_string(), repository: None, + labels: vec![], }; assert!(params.validate().is_ok()); } @@ -1098,6 +1673,7 @@ mod tests { title: "Fix".to_string(), description: "This PR fixes a critical bug in the parser module.".to_string(), repository: None, + labels: vec![], }; assert!(params.validate().is_err()); } @@ -1108,6 +1684,7 @@ mod tests { title: "Fix bug in parser".to_string(), description: "Fix bug".to_string(), repository: None, + labels: vec![], }; assert!(params.validate().is_err()); } @@ -1116,9 +1693,16 @@ mod tests { fn test_config_default_target_branch() { let config = CreatePrConfig::default(); assert_eq!(config.target_branch, "main"); + assert!(config.draft); assert!(!config.auto_complete); assert!(config.delete_source_branch); assert!(config.squash_merge); + assert_eq!(config.if_no_changes, "warn"); + assert_eq!(config.max_files, 100); + assert_eq!(config.protected_files, "blocked"); + assert!(config.excluded_files.is_empty()); + assert!(config.allowed_labels.is_empty()); + assert!(config.fallback_as_work_item); } #[test] @@ -1213,4 +1797,110 @@ new file mode 100755 fn test_validate_single_path_null_byte() { assert!(validate_single_path("file\0.txt").is_err()); } + + // ─── Protected files detection ────────────────────────────────────────── + + #[test] + fn test_find_protected_files_manifests() { + let paths = vec![ + "src/main.rs".to_string(), + "package.json".to_string(), + "go.mod".to_string(), + ]; + let protected = find_protected_files(&paths); + assert_eq!(protected, vec!["package.json", "go.mod"]); + } + + #[test] + fn test_find_protected_files_ci_configs() { + let paths = vec![ + "src/lib.rs".to_string(), + ".github/workflows/ci.yml".to_string(), + ".pipelines/build.yml".to_string(), + ]; + let protected = find_protected_files(&paths); + assert!(protected.contains(&".github/workflows/ci.yml".to_string())); + assert!(protected.contains(&".pipelines/build.yml".to_string())); + } + + #[test] + fn test_find_protected_files_agent_configs() { + let paths = vec![ + ".agents/config.md".to_string(), + ".claude/settings.json".to_string(), + ".copilot/instructions.md".to_string(), + ]; + let protected = find_protected_files(&paths); + assert_eq!(protected.len(), 3); + } + + #[test] + fn test_find_protected_files_codeowners() { + let paths = vec!["CODEOWNERS".to_string(), "README.md".to_string()]; + let protected = find_protected_files(&paths); + assert_eq!(protected, vec!["CODEOWNERS"]); + } + + #[test] + fn test_find_protected_files_none() { + let paths = vec![ + "src/main.rs".to_string(), + "docs/README.md".to_string(), + ]; + let protected = find_protected_files(&paths); + assert!(protected.is_empty()); + } + + #[test] + fn test_find_protected_files_nested_manifest() { + let paths = vec!["services/api/package.json".to_string()]; + let protected = find_protected_files(&paths); + assert_eq!(protected, vec!["services/api/package.json"]); + } + + // ─── Excluded files filtering ─────────────────────────────────────────── + + #[test] + fn test_filter_excluded_files_basic() { + let patch = "diff --git a/src/main.rs b/src/main.rs\n--- a/src/main.rs\n+++ b/src/main.rs\n@@ -1 +1 @@\n-old\n+new\ndiff --git a/Cargo.lock b/Cargo.lock\n--- a/Cargo.lock\n+++ b/Cargo.lock\n@@ -1 +1 @@\n-old\n+new\n"; + let result = filter_excluded_files_from_patch(patch, &["*.lock".to_string()]); + assert!(result.contains("src/main.rs")); + assert!(!result.contains("Cargo.lock")); + } + + #[test] + fn test_filter_excluded_files_empty_patterns() { + let patch = "diff --git a/file.txt b/file.txt\n+content\n"; + let result = filter_excluded_files_from_patch(patch, &[]); + assert_eq!(result, patch); + } + + // ─── Extract paths from patch ─────────────────────────────────────────── + + #[test] + fn test_extract_paths_from_patch() { + let patch = "diff --git a/src/main.rs b/src/main.rs\nindex abc..def\n--- a/src/main.rs\n+++ b/src/main.rs\ndiff --git a/README.md b/README.md\n"; + let paths = extract_paths_from_patch(patch); + assert!(paths.contains(&"src/main.rs".to_string())); + assert!(paths.contains(&"README.md".to_string())); + } + + // ─── Allowed-labels validation ────────────────────────────────────────── + + #[test] + fn test_find_disallowed_files_with_allowlist() { + let paths = vec!["src/main.rs".to_string(), "tests/test.rs".to_string(), "docs/api.md".to_string()]; + let allowed = vec!["src/**".to_string()]; + let disallowed = find_disallowed_files(&paths, &allowed); + assert!(disallowed.contains(&"tests/test.rs".to_string())); + assert!(disallowed.contains(&"docs/api.md".to_string())); + assert!(!disallowed.contains(&"src/main.rs".to_string())); + } + + #[test] + fn test_find_disallowed_files_empty_allowlist() { + let paths = vec!["anything.rs".to_string()]; + let disallowed = find_disallowed_files(&paths, &[]); + assert!(disallowed.is_empty()); + } } diff --git a/src/tools/result.rs b/src/tools/result.rs index 242e8f2..450be98 100644 --- a/src/tools/result.rs +++ b/src/tools/result.rs @@ -140,6 +140,15 @@ impl ExecutionResult { data: None, } } + + /// Create a failed execution result with additional data + pub fn failure_with_data(message: impl Into, data: serde_json::Value) -> Self { + Self { + success: false, + message: message.into(), + data: Some(data), + } + } } /// Trait for executing tool results in Stage 2 of the pipeline. From 65e5099bcb26e06509fef930a62dac05e0a5f87f Mon Sep 17 00:00:00 2001 From: James Devine Date: Sat, 11 Apr 2026 17:44:45 +0100 Subject: [PATCH 02/28] fix(create-pr): address review feedback - Fix if-no-changes 'warn' mode: add ExecutionResult::warning() with dedicated exit code 2. Pipeline step maps exit code 2 to ##vso[task.complete result=SucceededWithIssues;] for yellow badge. Previously 'warn' was identical to 'error' (both returned failure). - Propagate git reset HEAD~1 failure in generate_patch instead of silently swallowing it. A failed reset would leave a synthetic commit in the repo, breaking subsequent operations. - Fix path extraction for filenames with spaces: switch extract_paths_from_patch and filter_excluded_files_from_patch from parsing 'diff --git' headers (breaks on quoted paths) to using '--- a/' and '+++ b/' lines which handle spaces correctly. - Remove dead find_disallowed_files function (was cfg(test) only, never wired into production executor). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/execute.rs | 22 +++++++--- src/main.rs | 12 ++++-- src/mcp.rs | 14 ++++++- src/tools/create_pr.rs | 94 ++++++++++++++++++------------------------ src/tools/result.rs | 20 +++++++++ templates/base.yml | 9 +++- 6 files changed, 105 insertions(+), 66 deletions(-) diff --git a/src/execute.rs b/src/execute.rs index 9a7bea8..85e15ef 100644 --- a/src/execute.rs +++ b/src/execute.rs @@ -156,7 +156,15 @@ pub async fn execute_safe_outputs( match execute_safe_output(entry, ctx).await { Ok((tool_name, result)) => { - if result.success { + if result.warning { + warn!( + "[{}/{}] {} warning: {}", + i + 1, + entries.len(), + tool_name, + result.message + ); + } else if result.success { info!( "[{}/{}] {} succeeded: {}", i + 1, @@ -173,12 +181,13 @@ pub async fn execute_safe_outputs( result.message ); } + let symbol = if result.warning { "⚠" } else if result.success { "✓" } else { "✗" }; println!( "[{}/{}] {} - {} - {}", i + 1, entries.len(), tool_name, - if result.success { "✓" } else { "✗" }, + symbol, result.message ); results.push(result); @@ -193,11 +202,12 @@ pub async fn execute_safe_outputs( } // Log final summary - let success_count = results.iter().filter(|r| r.success).count(); - let failure_count = results.len() - success_count; + let success_count = results.iter().filter(|r| r.success && !r.warning).count(); + let warning_count = results.iter().filter(|r| r.warning).count(); + let failure_count = results.iter().filter(|r| !r.success).count(); info!( - "Stage 2 execution complete: {} succeeded, {} failed", - success_count, failure_count + "Stage 2 execution complete: {} succeeded, {} warnings, {} failed", + success_count, warning_count, failure_count ); Ok(results) diff --git a/src/main.rs b/src/main.rs index 6ff98ca..0bf9c0e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -237,18 +237,24 @@ async fn main() -> Result<()> { // Print summary let success_count = results.iter().filter(|r| r.success).count(); - let failure_count = results.len() - success_count; + let warning_count = results.iter().filter(|r| r.warning).count(); + let failure_count = results.iter().filter(|r| !r.success).count(); println!("\n--- Execution Summary ---"); println!( - "Total: {} | Success: {} | Failed: {}", + "Total: {} | Success: {} | Warnings: {} | Failed: {}", results.len(), - success_count, + success_count - warning_count, + warning_count, failure_count ); if failure_count > 0 { std::process::exit(1); + } else if warning_count > 0 { + // Exit code 2 signals "succeeded with issues" — the pipeline + // step wraps this to emit ##vso[task.complete result=SucceededWithIssues;] + std::process::exit(2); } } Commands::Proxy { allowed_hosts } => { diff --git a/src/mcp.rs b/src/mcp.rs index fd6aed0..9982f05 100644 --- a/src/mcp.rs +++ b/src/mcp.rs @@ -254,11 +254,21 @@ impl SafeOutputs { })?; // Undo the temporary commit but keep changes in working tree - let _ = Command::new("git") + let reset_output = Command::new("git") .args(["reset", "HEAD~1", "--mixed", "--quiet"]) .current_dir(&git_dir) .output() - .await; + .await + .map_err(|e| { + anyhow_to_mcp_error(anyhow::anyhow!("Failed to run git reset: {}", e)) + })?; + + if !reset_output.status.success() { + return Err(anyhow_to_mcp_error(anyhow::anyhow!( + "git reset HEAD~1 failed (repository may retain synthetic commit): {}", + String::from_utf8_lossy(&reset_output.stderr) + ))); + } if !format_patch_output.status.success() { return Err(anyhow_to_mcp_error(anyhow::anyhow!( diff --git a/src/tools/create_pr.rs b/src/tools/create_pr.rs index c92bf73..d3ffd95 100644 --- a/src/tools/create_pr.rs +++ b/src/tools/create_pr.rs @@ -577,7 +577,9 @@ impl Executor for CreatePrResult { )); } _ => { - return Ok(ExecutionResult::failure( + // "warn" (default) — succeed with warning + warn!("All files in patch were excluded by excluded-files patterns (if-no-changes: warn)"); + return Ok(ExecutionResult::warning( "All files in patch were excluded by excluded-files patterns".to_string(), )); } @@ -860,9 +862,9 @@ impl Executor for CreatePrResult { )); } _ => { - // "warn" (default) + // "warn" (default) — succeed with warning warn!("No changes detected after applying patch (if-no-changes: warn)"); - return Ok(ExecutionResult::failure( + return Ok(ExecutionResult::warning( "No changes detected after applying patch".to_string(), )); } @@ -1494,24 +1496,28 @@ fn validate_single_path(path: &str) -> anyhow::Result<()> { } /// Extract all file paths from a patch/diff content. -/// Returns deduplicated list of file paths referenced in diff headers. +/// Returns deduplicated list of file paths referenced in the patch. +/// Uses `--- a/` and `+++ b/` lines for robust parsing (handles quoted paths +/// with spaces that break `diff --git` header parsing via split_whitespace). fn extract_paths_from_patch(patch_content: &str) -> Vec { let mut paths = Vec::new(); for line in patch_content.lines() { - if line.starts_with("diff --git") { - let parts: Vec<&str> = line.split_whitespace().collect(); - for part in parts.iter().skip(2) { - let path = part.trim_start_matches("a/").trim_start_matches("b/"); - if !path.is_empty() { - paths.push(path.to_string()); - } + if let Some(rest) = line.strip_prefix("--- a/") { + let path = rest.trim().trim_matches('"'); + if !path.is_empty() { + paths.push(path.to_string()); + } + } else if let Some(rest) = line.strip_prefix("+++ b/") { + let path = rest.trim().trim_matches('"'); + if !path.is_empty() { + paths.push(path.to_string()); } } else if line.starts_with("rename from ") || line.starts_with("rename to ") || line.starts_with("copy from ") || line.starts_with("copy to ") { - if let Some(path) = line.split_whitespace().last() { - if !path.is_empty() { - paths.push(path.to_string()); - } + // "rename from " / "rename to " + let path = line.splitn(3, ' ').nth(2).unwrap_or("").trim_matches('"'); + if !path.is_empty() { + paths.push(path.to_string()); } } } @@ -1567,24 +1573,6 @@ fn find_protected_files(paths: &[String]) -> Vec { protected } -/// Check if file paths match an allowed-files glob allowlist. -/// Returns list of paths that do NOT match any allowed pattern. -#[cfg(test)] -fn find_disallowed_files(paths: &[String], allowed_patterns: &[String]) -> Vec { - if allowed_patterns.is_empty() { - return Vec::new(); // No allowlist = all files allowed - } - paths - .iter() - .filter(|path| { - !allowed_patterns - .iter() - .any(|pattern| glob_match::glob_match(pattern, path)) - }) - .cloned() - .collect() -} - /// Generate a provenance footer for the PR body fn generate_pr_footer() -> String { let timestamp = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%SZ"); @@ -1600,6 +1588,7 @@ fn generate_pr_footer() -> String { /// Filter out diff entries for files matching excluded-files glob patterns. /// Splits the patch into per-file diff blocks and removes those matching any pattern. +/// Uses `+++ b/` lines for path extraction (robust with quoted/space-containing paths). fn filter_excluded_files_from_patch(patch_content: &str, excluded_patterns: &[String]) -> String { if excluded_patterns.is_empty() { return patch_content.to_string(); @@ -1622,17 +1611,16 @@ fn filter_excluded_files_from_patch(patch_content: &str, excluded_patterns: &[St result.push_str(¤t_block); } - // Start new block + // Start new block, path will be set when we see +++ b/ current_block = String::new(); current_block.push_str(line); current_block.push('\n'); - - // Extract path from "diff --git a/path b/path" - let parts: Vec<&str> = line.split_whitespace().collect(); - current_path = parts.get(3).map(|p| { - p.trim_start_matches("b/").to_string() - }); + current_path = None; } else { + // Extract path from "+++ b/path" line (handles quoted paths) + if let Some(rest) = line.strip_prefix("+++ b/") { + current_path = Some(rest.trim().trim_matches('"').to_string()); + } current_block.push_str(line); current_block.push('\n'); } @@ -1879,28 +1867,26 @@ new file mode 100755 #[test] fn test_extract_paths_from_patch() { - let patch = "diff --git a/src/main.rs b/src/main.rs\nindex abc..def\n--- a/src/main.rs\n+++ b/src/main.rs\ndiff --git a/README.md b/README.md\n"; + let patch = "diff --git a/src/main.rs b/src/main.rs\nindex abc..def\n--- a/src/main.rs\n+++ b/src/main.rs\ndiff --git a/README.md b/README.md\n--- a/README.md\n+++ b/README.md\n"; let paths = extract_paths_from_patch(patch); assert!(paths.contains(&"src/main.rs".to_string())); assert!(paths.contains(&"README.md".to_string())); } - // ─── Allowed-labels validation ────────────────────────────────────────── - #[test] - fn test_find_disallowed_files_with_allowlist() { - let paths = vec!["src/main.rs".to_string(), "tests/test.rs".to_string(), "docs/api.md".to_string()]; - let allowed = vec!["src/**".to_string()]; - let disallowed = find_disallowed_files(&paths, &allowed); - assert!(disallowed.contains(&"tests/test.rs".to_string())); - assert!(disallowed.contains(&"docs/api.md".to_string())); - assert!(!disallowed.contains(&"src/main.rs".to_string())); + fn test_extract_paths_from_patch_with_spaces() { + let patch = "diff --git \"a/path with spaces/file.txt\" \"b/path with spaces/file.txt\"\n--- a/path with spaces/file.txt\n+++ b/path with spaces/file.txt\n"; + let paths = extract_paths_from_patch(patch); + assert!(paths.contains(&"path with spaces/file.txt".to_string())); } #[test] - fn test_find_disallowed_files_empty_allowlist() { - let paths = vec!["anything.rs".to_string()]; - let disallowed = find_disallowed_files(&paths, &[]); - assert!(disallowed.is_empty()); + fn test_extract_paths_new_file() { + let patch = "diff --git a/new.txt b/new.txt\nnew file mode 100644\n--- /dev/null\n+++ b/new.txt\n"; + let paths = extract_paths_from_patch(patch); + assert!(paths.contains(&"new.txt".to_string())); + // /dev/null from --- should not be included (no a/ prefix) + assert!(!paths.contains(&"/dev/null".to_string())); } + } diff --git a/src/tools/result.rs b/src/tools/result.rs index 450be98..19e8086 100644 --- a/src/tools/result.rs +++ b/src/tools/result.rs @@ -106,6 +106,10 @@ impl Default for ExecutionContext { pub struct ExecutionResult { /// Whether the execution succeeded pub success: bool, + /// Whether this is a warning (succeeded with issues). + /// When true, success is also true — the action completed but with caveats. + #[serde(skip_serializing_if = "std::ops::Not::not")] + pub warning: bool, /// Human-readable message describing the outcome pub message: String, /// Optional additional data (e.g., work item ID) @@ -118,6 +122,7 @@ impl ExecutionResult { pub fn success(message: impl Into) -> Self { Self { success: true, + warning: false, message: message.into(), data: None, } @@ -127,15 +132,29 @@ impl ExecutionResult { pub fn success_with_data(message: impl Into, data: serde_json::Value) -> Self { Self { success: true, + warning: false, message: message.into(), data: Some(data), } } + /// Create a warning result (succeeded with issues). + /// The action completed but something noteworthy occurred. + /// Exit code 2 signals the pipeline to set SucceededWithIssues. + pub fn warning(message: impl Into) -> Self { + Self { + success: true, + warning: true, + message: message.into(), + data: None, + } + } + /// Create a failed execution result pub fn failure(message: impl Into) -> Self { Self { success: false, + warning: false, message: message.into(), data: None, } @@ -145,6 +164,7 @@ impl ExecutionResult { pub fn failure_with_data(message: impl Into, data: serde_json::Value) -> Self { Self { success: false, + warning: false, message: message.into(), data: Some(data), } diff --git a/templates/base.yml b/templates/base.yml index cb85e60..daaf298 100644 --- a/templates/base.yml +++ b/templates/base.yml @@ -634,7 +634,14 @@ jobs: mkdir -p "$(Agent.TempDirectory)/staging" displayName: "Prepare output directory" - - bash: ado-aw execute --source "{{ source_path }}" --safe-output-dir "$(Pipeline.Workspace)/analyzed_outputs_$(Build.BuildId)" --output-dir "$(Agent.TempDirectory)/staging" + - bash: | + ado-aw execute --source "{{ source_path }}" --safe-output-dir "$(Pipeline.Workspace)/analyzed_outputs_$(Build.BuildId)" --output-dir "$(Agent.TempDirectory)/staging" + EXIT_CODE=$? + if [ $EXIT_CODE -eq 2 ]; then + echo "##vso[task.complete result=SucceededWithIssues;]Executor completed with warnings" + exit 0 + fi + exit $EXIT_CODE displayName: Process safe outputs workingDirectory: {{ working_directory }} env: From 68b7f157a3ab253769d6c082c23ccab7f76723ca Mon Sep 17 00:00:00 2001 From: James Devine Date: Sun, 12 Apr 2026 10:06:00 +0100 Subject: [PATCH 03/28] =?UTF-8?q?fix(create-pr):=20address=20review=20feed?= =?UTF-8?q?back=20=E2=80=94=20format-patch,=20binary=20files,=20globs,=20v?= =?UTF-8?q?alidation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix format-patch to capture both committed and uncommitted changes by diffing against merge-base instead of only the last synthetic commit. Previously, if the agent committed changes per the tool description, the working tree was clean and format-patch produced an empty patch. - Add git identity flags (-c user.email/user.name) to synthetic commit to avoid "Please tell me who you are" errors in fresh environments. - Handle binary files in Stage 2 by falling back to base64 encoding when read_to_string fails (non-UTF-8 content). Uses ADO API contentType: "base64encoded" for binary files. - Fix renamed+modified files losing content changes. When diff-tree reports R with score < 100, emit both a rename and an edit change entry with the new file content. - Fix fragile success_count - warning_count usize subtraction in execution summary. Count success as success && !warning directly instead of subtracting, preventing potential underflow. - Normalize excluded-files glob patterns by auto-prepending **/ to patterns without /, so *.lock matches subdir/Cargo.lock. - Validate if-no-changes and protected-files string values at execution time, rejecting typos with a clear error message. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Cargo.lock | 1 + Cargo.toml | 1 + src/main.rs | 4 +- src/mcp.rs | 172 +++++++++++++++++++++++++----------- src/tools/create_pr.rs | 193 ++++++++++++++++++++++++++--------------- 5 files changed, 247 insertions(+), 124 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 572b5e0..80fbeed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9,6 +9,7 @@ dependencies = [ "anyhow", "async-trait", "axum", + "base64", "chrono", "clap", "dirs", diff --git a/Cargo.toml b/Cargo.toml index c2ebcd1..7cff4dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,7 @@ axum = { version = "0.8.8", features = ["tokio"] } subtle = "2.6.1" rand = "0.10.1" glob-match = "0.2.1" +base64 = "0.22.1" [dev-dependencies] reqwest = { version = "0.12", features = ["blocking"] } diff --git a/src/main.rs b/src/main.rs index 0bf9c0e..e8af318 100644 --- a/src/main.rs +++ b/src/main.rs @@ -236,7 +236,7 @@ async fn main() -> Result<()> { } // Print summary - let success_count = results.iter().filter(|r| r.success).count(); + let success_count = results.iter().filter(|r| r.success && !r.warning).count(); let warning_count = results.iter().filter(|r| r.warning).count(); let failure_count = results.iter().filter(|r| !r.success).count(); @@ -244,7 +244,7 @@ async fn main() -> Result<()> { println!( "Total: {} | Success: {} | Warnings: {} | Failed: {}", results.len(), - success_count - warning_count, + success_count, warning_count, failure_count ); diff --git a/src/mcp.rs b/src/mcp.rs index 9982f05..6b290e6 100644 --- a/src/mcp.rs +++ b/src/mcp.rs @@ -197,55 +197,93 @@ impl SafeOutputs { // Generate patch using git format-patch for proper commit metadata, // rename detection, and binary file handling. // - // Steps: - // 1. Stage all changes (tracked + untracked) - // 2. Create a temporary commit - // 3. Generate format-patch output - // 4. Reset to undo the temporary commit (preserving working tree) - - // Stage all changes including untracked files - let add_output = Command::new("git") - .args(["add", "-A"]) + // Handles both committed and uncommitted changes: + // 1. Find the merge-base with the upstream branch (origin/HEAD or origin/main) + // 2. If there are uncommitted changes, stage and create a temporary commit + // 3. Generate format-patch from merge-base..HEAD to capture ALL changes + // 4. If a temporary commit was created, reset it (preserving working tree) + + // Find the merge-base to diff against + let merge_base = Self::find_merge_base(&git_dir).await?; + debug!("Using merge base: {}", merge_base); + + // Check if there are uncommitted changes (staged or unstaged) + let status_output = Command::new("git") + .args(["status", "--porcelain"]) .current_dir(&git_dir) .output() .await .map_err(|e| { - anyhow_to_mcp_error(anyhow::anyhow!("Failed to run git add -A: {}", e)) + anyhow_to_mcp_error(anyhow::anyhow!("Failed to run git status: {}", e)) })?; - if !add_output.status.success() { - return Err(anyhow_to_mcp_error(anyhow::anyhow!( - "git add -A failed: {}", - String::from_utf8_lossy(&add_output.stderr) - ))); - } + let has_uncommitted = !String::from_utf8_lossy(&status_output.stdout) + .trim() + .is_empty(); + let mut made_synthetic_commit = false; - // Create a temporary commit with the staged changes - let commit_output = Command::new("git") - .args(["commit", "-m", "agent changes", "--allow-empty", "--no-verify"]) - .current_dir(&git_dir) - .output() - .await - .map_err(|e| { - anyhow_to_mcp_error(anyhow::anyhow!("Failed to create temporary commit: {}", e)) - })?; + if has_uncommitted { + debug!("Uncommitted changes detected, creating synthetic commit"); - if !commit_output.status.success() { - // Reset staging on failure - let _ = Command::new("git") - .args(["reset", "HEAD", "--quiet"]) + // Stage all changes including untracked files + let add_output = Command::new("git") + .args(["add", "-A"]) .current_dir(&git_dir) .output() - .await; - return Err(anyhow_to_mcp_error(anyhow::anyhow!( - "Failed to create temporary commit: {}", - String::from_utf8_lossy(&commit_output.stderr) - ))); + .await + .map_err(|e| { + anyhow_to_mcp_error(anyhow::anyhow!("Failed to run git add -A: {}", e)) + })?; + + if !add_output.status.success() { + return Err(anyhow_to_mcp_error(anyhow::anyhow!( + "git add -A failed: {}", + String::from_utf8_lossy(&add_output.stderr) + ))); + } + + // Create a temporary commit with git identity flags to avoid config dependency + let commit_output = Command::new("git") + .args([ + "-c", "user.email=agent@ado-aw", + "-c", "user.name=ADO Agent", + "commit", "-m", "agent changes", "--allow-empty", "--no-verify", + ]) + .current_dir(&git_dir) + .output() + .await + .map_err(|e| { + anyhow_to_mcp_error(anyhow::anyhow!( + "Failed to create temporary commit: {}", + e + )) + })?; + + if !commit_output.status.success() { + // Reset staging on failure + let _ = Command::new("git") + .args(["reset", "HEAD", "--quiet"]) + .current_dir(&git_dir) + .output() + .await; + return Err(anyhow_to_mcp_error(anyhow::anyhow!( + "Failed to create temporary commit: {}", + String::from_utf8_lossy(&commit_output.stderr) + ))); + } + made_synthetic_commit = true; + } else { + debug!("No uncommitted changes — capturing committed changes only"); } - // Generate format-patch for the last commit (the temporary one) + // Generate format-patch from merge-base..HEAD to capture all changes let format_patch_output = Command::new("git") - .args(["format-patch", "-1", "--stdout", "-M"]) + .args([ + "format-patch", + &format!("{}..HEAD", merge_base), + "--stdout", + "-M", + ]) .current_dir(&git_dir) .output() .await @@ -253,21 +291,23 @@ impl SafeOutputs { anyhow_to_mcp_error(anyhow::anyhow!("Failed to run git format-patch: {}", e)) })?; - // Undo the temporary commit but keep changes in working tree - let reset_output = Command::new("git") - .args(["reset", "HEAD~1", "--mixed", "--quiet"]) - .current_dir(&git_dir) - .output() - .await - .map_err(|e| { - anyhow_to_mcp_error(anyhow::anyhow!("Failed to run git reset: {}", e)) - })?; + // Undo the temporary commit (if we made one) but keep changes in working tree + if made_synthetic_commit { + let reset_output = Command::new("git") + .args(["reset", "HEAD~1", "--mixed", "--quiet"]) + .current_dir(&git_dir) + .output() + .await + .map_err(|e| { + anyhow_to_mcp_error(anyhow::anyhow!("Failed to run git reset: {}", e)) + })?; - if !reset_output.status.success() { - return Err(anyhow_to_mcp_error(anyhow::anyhow!( - "git reset HEAD~1 failed (repository may retain synthetic commit): {}", - String::from_utf8_lossy(&reset_output.stderr) - ))); + if !reset_output.status.success() { + return Err(anyhow_to_mcp_error(anyhow::anyhow!( + "git reset HEAD~1 failed (repository may retain synthetic commit): {}", + String::from_utf8_lossy(&reset_output.stderr) + ))); + } } if !format_patch_output.status.success() { @@ -294,6 +334,38 @@ impl SafeOutputs { format!("pr-{}-{}.patch", safe_repo, timestamp) } + /// Find the merge-base commit to diff against. + /// + /// Tries (in order): + /// 1. `git merge-base HEAD origin/HEAD` + /// 2. `git merge-base HEAD origin/main` + /// 3. Falls back to `HEAD~1` if no remote tracking branch is available + async fn find_merge_base(git_dir: &std::path::Path) -> Result { + use tokio::process::Command; + + for remote_ref in &["origin/HEAD", "origin/main"] { + let output = Command::new("git") + .args(["merge-base", "HEAD", remote_ref]) + .current_dir(git_dir) + .output() + .await + .ok(); + + if let Some(out) = output { + if out.status.success() { + let base = String::from_utf8_lossy(&out.stdout).trim().to_string(); + if !base.is_empty() { + return Ok(base); + } + } + } + } + + // Fallback: use HEAD~1 (single parent) + warn!("Could not find merge-base with origin, falling back to HEAD~1"); + Ok("HEAD~1".to_string()) + } + #[tool( description = "Log a transparency message when no significant actions are needed. Use this to confirm workflow completion and provide visibility when analysis is complete but no changes or outputs are required (e.g., 'No issues found', 'All checks passed'). This ensures the workflow produces human-visible output even when no other actions are taken." )] diff --git a/src/tools/create_pr.rs b/src/tools/create_pr.rs index d3ffd95..07998b8 100644 --- a/src/tools/create_pr.rs +++ b/src/tools/create_pr.rs @@ -454,6 +454,25 @@ impl Executor for CreatePrResult { debug!("Auto-complete: {}", config.auto_complete); debug!("Squash merge: {}", config.squash_merge); + // Validate string-enum config values + const VALID_IF_NO_CHANGES: &[&str] = &["warn", "error", "ignore"]; + if !VALID_IF_NO_CHANGES.contains(&config.if_no_changes.as_str()) { + return Ok(ExecutionResult::failure(format!( + "Invalid if-no-changes value '{}'. Must be one of: {}", + config.if_no_changes, + VALID_IF_NO_CHANGES.join(", ") + ))); + } + + const VALID_PROTECTED_FILES: &[&str] = &["blocked", "allowed"]; + if !VALID_PROTECTED_FILES.contains(&config.protected_files.as_str()) { + return Ok(ExecutionResult::failure(format!( + "Invalid protected-files value '{}'. Must be one of: {}", + config.protected_files, + VALID_PROTECTED_FILES.join(", ") + ))); + } + // Apply title prefix if configured let effective_title = if let Some(ref prefix) = config.title_prefix { format!("{}{}", prefix, self.title) @@ -1249,39 +1268,13 @@ async fn collect_changes_from_worktree( // New/untracked files "??" | "A " | " A" | "AM" => { if full_path.is_file() { - let content = tokio::fs::read_to_string(&full_path) - .await - .with_context(|| format!("Failed to read new file: {}", file_path))?; - - changes.push(serde_json::json!({ - "changeType": "add", - "item": { - "path": format!("/{}", file_path) - }, - "newContent": { - "content": content, - "contentType": "rawtext" - } - })); + changes.push(read_file_change("add", file_path, &full_path).await?); } } // Modified files " M" | "M " | "MM" => { if full_path.is_file() { - let content = tokio::fs::read_to_string(&full_path) - .await - .with_context(|| format!("Failed to read modified file: {}", file_path))?; - - changes.push(serde_json::json!({ - "changeType": "edit", - "item": { - "path": format!("/{}", file_path) - }, - "newContent": { - "content": content, - "contentType": "rawtext" - } - })); + changes.push(read_file_change("edit", file_path, &full_path).await?); } } // Renamed files - format is "R old_path -> new_path" @@ -1297,25 +1290,20 @@ async fn collect_changes_from_worktree( "path": format!("/{}", new_path.trim()) } })); + + // If status is "RM" (renamed + modified), also emit content + if status_code == "RM" { + let new_full_path = worktree_path.join(new_path.trim()); + if new_full_path.is_file() { + changes.push(read_file_change("edit", new_path.trim(), &new_full_path).await?); + } + } } } // Other statuses - try to handle as edit if file exists _ => { if full_path.is_file() { - let content = tokio::fs::read_to_string(&full_path) - .await - .with_context(|| format!("Failed to read file: {}", file_path))?; - - changes.push(serde_json::json!({ - "changeType": "edit", - "item": { - "path": format!("/{}", file_path) - }, - "newContent": { - "content": content, - "contentType": "rawtext" - } - })); + changes.push(read_file_change("edit", file_path, &full_path).await?); } } } @@ -1364,20 +1352,7 @@ async fn collect_changes_from_diff_tree( } else if status_code == "A" { // Added file if full_path.is_file() { - let content = tokio::fs::read_to_string(&full_path) - .await - .with_context(|| format!("Failed to read new file: {}", file_path))?; - - changes.push(serde_json::json!({ - "changeType": "add", - "item": { - "path": format!("/{}", file_path) - }, - "newContent": { - "content": content, - "contentType": "rawtext" - } - })); + changes.push(read_file_change("add", file_path, &full_path).await?); } } else if status_code.starts_with('R') && parts.len() >= 3 { // Renamed file: R100\told_path\tnew_path @@ -1386,6 +1361,7 @@ async fn collect_changes_from_diff_tree( validate_single_path(old_path)?; validate_single_path(new_path)?; + // Emit the rename changes.push(serde_json::json!({ "changeType": "rename", "sourceServerItem": format!("/{}", old_path), @@ -1393,29 +1369,62 @@ async fn collect_changes_from_diff_tree( "path": format!("/{}", new_path) } })); + + // If the file was also modified (similarity < 100), emit an edit with content + let new_full_path = worktree_path.join(new_path); + if status_code != "R100" && new_full_path.is_file() { + changes.push(read_file_change("edit", new_path, &new_full_path).await?); + } } else { // Modified or other — read current content if full_path.is_file() { - let content = tokio::fs::read_to_string(&full_path) - .await - .with_context(|| format!("Failed to read modified file: {}", file_path))?; - - changes.push(serde_json::json!({ - "changeType": "edit", - "item": { - "path": format!("/{}", file_path) - }, - "newContent": { - "content": content, - "contentType": "rawtext" - } - })); + changes.push(read_file_change("edit", file_path, &full_path).await?); } } } Ok(changes) } + +/// Read a file and produce an ADO push change entry. +/// Handles both text (rawtext) and binary (base64encoded) content. +async fn read_file_change( + change_type: &str, + file_path: &str, + full_path: &std::path::Path, +) -> anyhow::Result { + let bytes = tokio::fs::read(full_path) + .await + .with_context(|| format!("Failed to read file: {}", file_path))?; + + // Try UTF-8 first; fall back to base64 for binary files + match String::from_utf8(bytes.clone()) { + Ok(content) => Ok(serde_json::json!({ + "changeType": change_type, + "item": { + "path": format!("/{}", file_path) + }, + "newContent": { + "content": content, + "contentType": "rawtext" + } + })), + Err(_) => { + use base64::Engine; + let encoded = base64::engine::general_purpose::STANDARD.encode(&bytes); + Ok(serde_json::json!({ + "changeType": change_type, + "item": { + "path": format!("/{}", file_path) + }, + "newContent": { + "content": encoded, + "contentType": "base64encoded" + } + })) + } + } +} /// /// Security checks: /// - No path traversal (..) @@ -1589,11 +1598,19 @@ fn generate_pr_footer() -> String { /// Filter out diff entries for files matching excluded-files glob patterns. /// Splits the patch into per-file diff blocks and removes those matching any pattern. /// Uses `+++ b/` lines for path extraction (robust with quoted/space-containing paths). +/// +/// Patterns without a `/` are automatically prefixed with `**/` so that e.g. `*.lock` +/// matches `subdir/Cargo.lock` (not just root-level lockfiles). fn filter_excluded_files_from_patch(patch_content: &str, excluded_patterns: &[String]) -> String { if excluded_patterns.is_empty() { return patch_content.to_string(); } + let normalized: Vec = excluded_patterns + .iter() + .map(|p| normalize_glob_pattern(p)) + .collect(); + let mut result = String::with_capacity(patch_content.len()); let mut current_block = String::new(); let mut current_path: Option = None; @@ -1602,7 +1619,7 @@ fn filter_excluded_files_from_patch(patch_content: &str, excluded_patterns: &[St if line.starts_with("diff --git") { // Flush previous block if it's not excluded if let Some(ref path) = current_path { - if !excluded_patterns.iter().any(|p| glob_match::glob_match(p, path)) { + if !normalized.iter().any(|p| glob_match::glob_match(p, path)) { result.push_str(¤t_block); } else { debug!("Excluding file from patch: {}", path); @@ -1628,7 +1645,7 @@ fn filter_excluded_files_from_patch(patch_content: &str, excluded_patterns: &[St // Flush last block if let Some(ref path) = current_path { - if !excluded_patterns.iter().any(|p| glob_match::glob_match(p, path)) { + if !normalized.iter().any(|p| glob_match::glob_match(p, path)) { result.push_str(¤t_block); } else { debug!("Excluding file from patch: {}", path); @@ -1640,6 +1657,17 @@ fn filter_excluded_files_from_patch(patch_content: &str, excluded_patterns: &[St result } +/// Normalize a glob pattern for cross-directory matching. +/// Patterns without a `/` are prefixed with `**/` so that e.g. `*.lock` matches +/// `subdir/Cargo.lock`, not just root-level files. +fn normalize_glob_pattern(pattern: &str) -> String { + if pattern.contains('/') || pattern.starts_with("**/") { + pattern.to_string() + } else { + format!("**/{}", pattern) + } +} + #[cfg(test)] mod tests { use super::*; @@ -1856,6 +1884,27 @@ new file mode 100755 assert!(!result.contains("Cargo.lock")); } + #[test] + fn test_filter_excluded_files_nested_glob() { + // *.lock should match subdir/Cargo.lock thanks to auto-prepended **/ + let patch = "diff --git a/src/main.rs b/src/main.rs\n--- a/src/main.rs\n+++ b/src/main.rs\n@@ -1 +1 @@\n-old\n+new\ndiff --git a/services/api/package-lock.json b/services/api/package-lock.json\n--- a/services/api/package-lock.json\n+++ b/services/api/package-lock.json\n@@ -1 +1 @@\n-old\n+new\n"; + let result = + filter_excluded_files_from_patch(patch, &["package-lock.json".to_string()]); + assert!(result.contains("src/main.rs")); + assert!(!result.contains("package-lock.json")); + } + + #[test] + fn test_normalize_glob_pattern() { + // Patterns without / get **/ prefix + assert_eq!(normalize_glob_pattern("*.lock"), "**/*.lock"); + assert_eq!(normalize_glob_pattern("Cargo.toml"), "**/Cargo.toml"); + // Patterns with / stay as-is + assert_eq!(normalize_glob_pattern("src/*.rs"), "src/*.rs"); + // Already-prefixed patterns stay as-is + assert_eq!(normalize_glob_pattern("**/*.lock"), "**/*.lock"); + } + #[test] fn test_filter_excluded_files_empty_patterns() { let patch = "diff --git a/file.txt b/file.txt\n+content\n"; From d3656ddbd3993c17350dc4ac320a97e9c65e81ba Mon Sep 17 00:00:00 2001 From: James Devine Date: Sun, 12 Apr 2026 22:36:13 +0100 Subject: [PATCH 04/28] fix(create-pr): address review round 3 feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Convert if_no_changes and protected_files from String to proper Rust enums (IfNoChanges, ProtectedFiles) with serde rename_all for type-safe matching and deserialization-time validation - Rename fallback_as_work_item to record_branch_on_failure since no work item is actually created — it only records branch info - Fix find_merge_base single-commit repo fallback: use git rev-list --max-parents=0 HEAD instead of HEAD~1 which doesn't exist for repos with a single commit - Fix max_files double-counting renames: count diff --git blocks instead of deduplicated paths from --- and +++ lines - Add comment explaining empty-commit envelopes in filter_excluded_files_from_patch when format-patch is used - Add comment explaining ADO Pushes API sequential change processing for RM (rename + modify) status in collect_changes_from_worktree Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/mcp.rs | 25 ++++++-- src/tools/create_pr.rs | 131 ++++++++++++++++++++++++----------------- 2 files changed, 99 insertions(+), 57 deletions(-) diff --git a/src/mcp.rs b/src/mcp.rs index 6b290e6..93e474c 100644 --- a/src/mcp.rs +++ b/src/mcp.rs @@ -339,7 +339,7 @@ impl SafeOutputs { /// Tries (in order): /// 1. `git merge-base HEAD origin/HEAD` /// 2. `git merge-base HEAD origin/main` - /// 3. Falls back to `HEAD~1` if no remote tracking branch is available + /// 3. Root commit via `git rev-list --max-parents=0 HEAD` (handles single-commit repos) async fn find_merge_base(git_dir: &std::path::Path) -> Result { use tokio::process::Command; @@ -361,9 +361,26 @@ impl SafeOutputs { } } - // Fallback: use HEAD~1 (single parent) - warn!("Could not find merge-base with origin, falling back to HEAD~1"); - Ok("HEAD~1".to_string()) + // Fallback: find the root commit (works for single-commit repos where HEAD~1 + // doesn't exist) + warn!("Could not find merge-base with origin, falling back to root commit"); + let root_output = Command::new("git") + .args(["rev-list", "--max-parents=0", "HEAD"]) + .current_dir(git_dir) + .output() + .await + .ok(); + + if let Some(out) = root_output.filter(|o| o.status.success()) { + let sha = String::from_utf8_lossy(&out.stdout).trim().to_string(); + if !sha.is_empty() { + return Ok(sha); + } + } + + Err(anyhow_to_mcp_error(anyhow::anyhow!( + "Cannot determine diff base: no remote tracking branch and no commits found" + ))) } #[tool( diff --git a/src/tools/create_pr.rs b/src/tools/create_pr.rs index 07998b8..b6c7dc0 100644 --- a/src/tools/create_pr.rs +++ b/src/tools/create_pr.rs @@ -284,6 +284,28 @@ impl CreatePrResult { } } +/// Behavior when the patch is empty or all files were excluded +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum IfNoChanges { + /// Succeed with a warning (default) + Warn, + /// Fail the pipeline step + Error, + /// Succeed silently + Ignore, +} + +/// File protection policy controlling whether manifest/CI files can be modified +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum ProtectedFiles { + /// Block modifications to protected files (default) + Blocked, + /// Allow modifications to all files + Allowed, +} + /// Configuration for the create_pr tool (specified in front matter) /// /// Example front matter: @@ -337,7 +359,7 @@ pub struct CreatePrConfig { /// Behavior when the patch is empty: "warn" (default), "error", "ignore" #[serde(default = "default_if_no_changes", rename = "if-no-changes")] - pub if_no_changes: String, + pub if_no_changes: IfNoChanges, /// Maximum number of files allowed in a single PR (default: 100) #[serde(default = "default_max_files", rename = "max-files")] @@ -346,7 +368,7 @@ pub struct CreatePrConfig { /// File protection policy: "blocked" (default) or "allowed" /// Controls whether manifest/CI files can be modified #[serde(default = "default_protected_files", rename = "protected-files")] - pub protected_files: String, + pub protected_files: ProtectedFiles, /// Glob patterns for files to exclude from the patch #[serde(default, rename = "excluded-files")] @@ -369,9 +391,11 @@ pub struct CreatePrConfig { #[serde(default, rename = "work-items")] pub work_items: Vec, - /// Whether to create a fallback work item on PR failure (default: true) - #[serde(default = "default_true", rename = "fallback-as-work-item")] - pub fallback_as_work_item: bool, + /// Whether to record branch info in failure data when PR creation fails (default: true). + /// When enabled, the failure response includes the pushed branch name and target branch + /// so operators can manually create the PR. No work item is created automatically. + #[serde(default = "default_true", rename = "record-branch-on-failure")] + pub record_branch_on_failure: bool, } fn default_target_branch() -> String { @@ -382,16 +406,16 @@ fn default_true() -> bool { true } -fn default_if_no_changes() -> String { - "warn".to_string() +fn default_if_no_changes() -> IfNoChanges { + IfNoChanges::Warn } fn default_max_files() -> usize { DEFAULT_MAX_FILES } -fn default_protected_files() -> String { - "blocked".to_string() +fn default_protected_files() -> ProtectedFiles { + ProtectedFiles::Blocked } impl Default for CreatePrConfig { @@ -411,7 +435,7 @@ impl Default for CreatePrConfig { reviewers: Vec::new(), labels: Vec::new(), work_items: Vec::new(), - fallback_as_work_item: true, + record_branch_on_failure: true, } } } @@ -454,25 +478,6 @@ impl Executor for CreatePrResult { debug!("Auto-complete: {}", config.auto_complete); debug!("Squash merge: {}", config.squash_merge); - // Validate string-enum config values - const VALID_IF_NO_CHANGES: &[&str] = &["warn", "error", "ignore"]; - if !VALID_IF_NO_CHANGES.contains(&config.if_no_changes.as_str()) { - return Ok(ExecutionResult::failure(format!( - "Invalid if-no-changes value '{}'. Must be one of: {}", - config.if_no_changes, - VALID_IF_NO_CHANGES.join(", ") - ))); - } - - const VALID_PROTECTED_FILES: &[&str] = &["blocked", "allowed"]; - if !VALID_PROTECTED_FILES.contains(&config.protected_files.as_str()) { - return Ok(ExecutionResult::failure(format!( - "Invalid protected-files value '{}'. Must be one of: {}", - config.protected_files, - VALID_PROTECTED_FILES.join(", ") - ))); - } - // Apply title prefix if configured let effective_title = if let Some(ref prefix) = config.title_prefix { format!("{}{}", prefix, self.title) @@ -584,19 +589,18 @@ impl Executor for CreatePrResult { debug!("Patch size after exclusion: {} bytes (was {} bytes)", filtered.len(), patch_content.len()); if filtered.trim().is_empty() { // All files were excluded - match config.if_no_changes.as_str() { - "error" => { + match config.if_no_changes { + IfNoChanges::Error => { return Ok(ExecutionResult::failure( "All files in patch were excluded by excluded-files patterns".to_string(), )); } - "ignore" => { + IfNoChanges::Ignore => { return Ok(ExecutionResult::success( "All files in patch were excluded — nothing to do".to_string(), )); } - _ => { - // "warn" (default) — succeed with warning + IfNoChanges::Warn => { warn!("All files in patch were excluded by excluded-files patterns (if-no-changes: warn)"); return Ok(ExecutionResult::warning( "All files in patch were excluded by excluded-files patterns".to_string(), @@ -628,7 +632,7 @@ impl Executor for CreatePrResult { let patch_paths = extract_paths_from_patch(&patch_content); // Security: File protection check - if config.protected_files != "allowed" { + if config.protected_files != ProtectedFiles::Allowed { let protected = find_protected_files(&patch_paths); if !protected.is_empty() { warn!( @@ -643,16 +647,18 @@ impl Executor for CreatePrResult { } } - // Security: Max files per PR check - if patch_paths.len() > config.max_files { + // Security: Max files per PR check (count diff blocks, not paths, to avoid + // double-counting renames which appear in both --- and +++ lines) + let file_count = count_patch_files(&patch_content); + if file_count > config.max_files { warn!( "Patch contains {} files, exceeding max of {}", - patch_paths.len(), + file_count, config.max_files ); return Ok(ExecutionResult::failure(format!( "Patch contains {} files, exceeding maximum of {} files per PR", - patch_paths.len(), + file_count, config.max_files ))); } @@ -867,21 +873,20 @@ impl Executor for CreatePrResult { if changes.is_empty() { // Handle no-changes based on config - match config.if_no_changes.as_str() { - "error" => { + match config.if_no_changes { + IfNoChanges::Error => { warn!("No changes detected after applying patch (if-no-changes: error)"); return Ok(ExecutionResult::failure( "No changes detected after applying patch".to_string(), )); } - "ignore" => { + IfNoChanges::Ignore => { info!("No changes detected after applying patch (if-no-changes: ignore)"); return Ok(ExecutionResult::success( "No changes detected — nothing to do".to_string(), )); } - _ => { - // "warn" (default) — succeed with warning + IfNoChanges::Warn => { warn!("No changes detected after applying patch (if-no-changes: warn)"); return Ok(ExecutionResult::warning( "No changes detected after applying patch".to_string(), @@ -1061,9 +1066,9 @@ impl Executor for CreatePrResult { let body = pr_response.text().await.unwrap_or_default(); warn!("Failed to create pull request: {} - {}", status, body); - // Fallback: create a work item with branch reference if enabled - if config.fallback_as_work_item { - info!("PR creation failed, recording fallback info"); + // Record branch info for manual recovery if enabled + if config.record_branch_on_failure { + info!("PR creation failed, recording branch info for manual recovery"); let fallback_description = format!( "## Pull Request Creation Failed\n\n\ A pull request could not be created automatically.\n\n\ @@ -1082,11 +1087,11 @@ impl Executor for CreatePrResult { ); return Ok(ExecutionResult::failure_with_data( format!( - "Failed to create pull request: {} - {}. Branch '{}' was pushed. Consider creating a work item to track the manual PR creation.", + "Failed to create pull request: {} - {}. Branch '{}' was pushed — create the PR manually.", status, body, self.source_branch, ), serde_json::json!({ - "fallback": "work-item", + "fallback": "branch-recorded", "branch": self.source_branch, "target_branch": target_branch, "repository": self.repository, @@ -1278,6 +1283,10 @@ async fn collect_changes_from_worktree( } } // Renamed files - format is "R old_path -> new_path" + // For "RM" (renamed + modified), we emit both a rename and an edit change. + // The ADO Pushes API processes changes sequentially within a single push, + // so the rename establishes the file at the new path, then the edit updates + // its content — this is the correct way to express rename-with-modification. "R " | " R" | "RM" => { if let Some((old_path, new_path)) = file_path.split_once(" -> ") { validate_single_path(old_path.trim())?; @@ -1505,7 +1514,7 @@ fn validate_single_path(path: &str) -> anyhow::Result<()> { } /// Extract all file paths from a patch/diff content. -/// Returns deduplicated list of file paths referenced in the patch. +/// Returns deduplicated list of file paths referenced in the patch (both source and destination). /// Uses `--- a/` and `+++ b/` lines for robust parsing (handles quoted paths /// with spaces that break `diff --git` header parsing via split_whitespace). fn extract_paths_from_patch(patch_content: &str) -> Vec { @@ -1535,6 +1544,17 @@ fn extract_paths_from_patch(patch_content: &str) -> Vec { paths } +/// Count the number of distinct file changes in a patch. +/// Uses `diff --git` headers as the canonical count — each header represents exactly +/// one file change (add, edit, delete, or rename). This avoids double-counting renames, +/// which produce both `--- a/old` and `+++ b/new` lines. +fn count_patch_files(patch_content: &str) -> usize { + patch_content + .lines() + .filter(|line| line.starts_with("diff --git")) + .count() +} + /// Check if any file paths in the patch are protected. /// /// Protected files include: @@ -1601,6 +1621,11 @@ fn generate_pr_footer() -> String { /// /// Patterns without a `/` are automatically prefixed with `**/` so that e.g. `*.lock` /// matches `subdir/Cargo.lock` (not just root-level lockfiles). +/// +/// Note: When using format-patch output with multiple commits, excluding all diffs within +/// a commit leaves the `From ...` / `Subject:` envelope intact with no diff hunks. +/// `git am` treats these as empty commits, which is harmless but may leave no-op commits +/// on the branch. fn filter_excluded_files_from_patch(patch_content: &str, excluded_patterns: &[String]) -> String { if excluded_patterns.is_empty() { return patch_content.to_string(); @@ -1713,12 +1738,12 @@ mod tests { assert!(!config.auto_complete); assert!(config.delete_source_branch); assert!(config.squash_merge); - assert_eq!(config.if_no_changes, "warn"); + assert_eq!(config.if_no_changes, IfNoChanges::Warn); assert_eq!(config.max_files, 100); - assert_eq!(config.protected_files, "blocked"); + assert_eq!(config.protected_files, ProtectedFiles::Blocked); assert!(config.excluded_files.is_empty()); assert!(config.allowed_labels.is_empty()); - assert!(config.fallback_as_work_item); + assert!(config.record_branch_on_failure); } #[test] From 3013ac7cca1ae698841557f70537875ab7e35b50 Mon Sep 17 00:00:00 2001 From: James Devine Date: Sun, 12 Apr 2026 23:11:03 +0100 Subject: [PATCH 05/28] fix(create-pr): address review round 4 feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix case comparison bug in find_protected_files: normalize PROTECTED_MANIFEST_BASENAMES to lowercase so case-insensitive comparison works for Cargo.toml, Pipfile, Gemfile, Directory.Build.* - Fix empty-envelope false negative in excluded-files: use count_patch_files() instead of trim().is_empty() to detect when all diffs were excluded from format-patch output (commit envelopes remain) - Make ExecutionResult.warning private with is_warning() accessor to enforce the invariant that warning implies success - Add comment explaining excluded-files vs protected-files ordering: exclusion runs first, so excluded files are dropped before the protection check (intentional — no modification to block) - Add test for mixed-case protected manifest detection Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/execute.rs | 8 ++++---- src/main.rs | 4 ++-- src/tools/create_pr.rs | 45 +++++++++++++++++++++++++++++++----------- src/tools/result.rs | 8 ++++++-- 4 files changed, 46 insertions(+), 19 deletions(-) diff --git a/src/execute.rs b/src/execute.rs index 85e15ef..592442a 100644 --- a/src/execute.rs +++ b/src/execute.rs @@ -156,7 +156,7 @@ pub async fn execute_safe_outputs( match execute_safe_output(entry, ctx).await { Ok((tool_name, result)) => { - if result.warning { + if result.is_warning() { warn!( "[{}/{}] {} warning: {}", i + 1, @@ -181,7 +181,7 @@ pub async fn execute_safe_outputs( result.message ); } - let symbol = if result.warning { "⚠" } else if result.success { "✓" } else { "✗" }; + let symbol = if result.is_warning() { "⚠" } else if result.success { "✓" } else { "✗" }; println!( "[{}/{}] {} - {} - {}", i + 1, @@ -202,8 +202,8 @@ pub async fn execute_safe_outputs( } // Log final summary - let success_count = results.iter().filter(|r| r.success && !r.warning).count(); - let warning_count = results.iter().filter(|r| r.warning).count(); + let success_count = results.iter().filter(|r| r.success && !r.is_warning()).count(); + let warning_count = results.iter().filter(|r| r.is_warning()).count(); let failure_count = results.iter().filter(|r| !r.success).count(); info!( "Stage 2 execution complete: {} succeeded, {} warnings, {} failed", diff --git a/src/main.rs b/src/main.rs index e8af318..445a1d5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -236,8 +236,8 @@ async fn main() -> Result<()> { } // Print summary - let success_count = results.iter().filter(|r| r.success && !r.warning).count(); - let warning_count = results.iter().filter(|r| r.warning).count(); + let success_count = results.iter().filter(|r| r.success && !r.is_warning()).count(); + let warning_count = results.iter().filter(|r| r.is_warning()).count(); let failure_count = results.iter().filter(|r| !r.success).count(); println!("\n--- Execution Summary ---"); diff --git a/src/tools/create_pr.rs b/src/tools/create_pr.rs index b6c7dc0..71b0c1f 100644 --- a/src/tools/create_pr.rs +++ b/src/tools/create_pr.rs @@ -15,7 +15,8 @@ const MAX_PATCH_SIZE_BYTES: u64 = 5 * 1024 * 1024; /// Default maximum files allowed in a single PR const DEFAULT_MAX_FILES: usize = 100; -/// Runtime manifest files that are protected by default. +/// Runtime manifest files that are protected by default (all lowercase for +/// case-insensitive comparison). /// These are dependency/build files that could be modified to introduce supply chain attacks. const PROTECTED_MANIFEST_BASENAMES: &[&str] = &[ // npm / Node.js @@ -29,15 +30,15 @@ const PROTECTED_MANIFEST_BASENAMES: &[&str] = &[ "go.sum", // Python "requirements.txt", - "Pipfile", - "Pipfile.lock", + "pipfile", + "pipfile.lock", "pyproject.toml", "setup.py", "setup.cfg", "poetry.lock", // Ruby - "Gemfile", - "Gemfile.lock", + "gemfile", + "gemfile.lock", // Java / Kotlin / Gradle "pom.xml", "build.gradle", @@ -46,12 +47,12 @@ const PROTECTED_MANIFEST_BASENAMES: &[&str] = &[ "settings.gradle.kts", "gradle.properties", // .NET / C# - "Directory.Build.props", - "Directory.Build.targets", + "directory.build.props", + "directory.build.targets", "global.json", // Rust - "Cargo.toml", - "Cargo.lock", + "cargo.toml", + "cargo.lock", ]; /// Path prefixes that are protected by default. @@ -582,12 +583,19 @@ impl Executor for CreatePrResult { .context("Failed to read patch file")?; debug!("Patch content size: {} bytes", patch_content.len()); - // Filter excluded files from patch content if configured + // Filter excluded files from patch content if configured. + // Note: Exclusion runs before the protection check. If a protected file matches + // an excluded-files pattern, it is silently dropped from the patch rather than + // triggering a protection error. This is intentional — exclusion removes the file + // from the PR entirely, so there is no security-relevant modification to block. let patch_content = if !config.excluded_files.is_empty() { debug!("Filtering {} excluded-files patterns from patch", config.excluded_files.len()); let filtered = filter_excluded_files_from_patch(&patch_content, &config.excluded_files); debug!("Patch size after exclusion: {} bytes (was {} bytes)", filtered.len(), patch_content.len()); - if filtered.trim().is_empty() { + // Check if any actual diff blocks remain. For format-patch output the commit + // envelope (From , Subject:, etc.) persists even when all diffs are + // excluded, so we count diff --git headers rather than checking for empty string. + if count_patch_files(&filtered) == 0 { // All files were excluded match config.if_no_changes { IfNoChanges::Error => { @@ -1899,6 +1907,21 @@ new file mode 100755 assert_eq!(protected, vec!["services/api/package.json"]); } + #[test] + fn test_find_protected_files_mixed_case_manifests() { + let paths = vec![ + "Cargo.toml".to_string(), + "Cargo.lock".to_string(), + "Pipfile".to_string(), + "Gemfile".to_string(), + "Gemfile.lock".to_string(), + "Directory.Build.props".to_string(), + "sub/Directory.Build.targets".to_string(), + ]; + let protected = find_protected_files(&paths); + assert_eq!(protected.len(), 7); + } + // ─── Excluded files filtering ─────────────────────────────────────────── #[test] diff --git a/src/tools/result.rs b/src/tools/result.rs index 19e8086..71bb449 100644 --- a/src/tools/result.rs +++ b/src/tools/result.rs @@ -107,9 +107,9 @@ pub struct ExecutionResult { /// Whether the execution succeeded pub success: bool, /// Whether this is a warning (succeeded with issues). - /// When true, success is also true — the action completed but with caveats. + /// Invariant: warning == true implies success == true. #[serde(skip_serializing_if = "std::ops::Not::not")] - pub warning: bool, + warning: bool, /// Human-readable message describing the outcome pub message: String, /// Optional additional data (e.g., work item ID) @@ -118,6 +118,10 @@ pub struct ExecutionResult { } impl ExecutionResult { + /// Whether this is a warning (succeeded with issues) + pub fn is_warning(&self) -> bool { + self.warning + } /// Create a successful execution result pub fn success(message: impl Into) -> Self { Self { From cd4eb070ce9802bdcf53a944c68dd86cfb466a0d Mon Sep 17 00:00:00 2001 From: James Devine Date: Sun, 12 Apr 2026 23:30:05 +0100 Subject: [PATCH 06/28] fix(create-pr): address review round 5 feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix multi-commit diff-tree: record base SHA before git am and diff base_sha..HEAD to capture all commits, not just the last one - Fix filter_excluded_files_from_patch for multi-commit format-patch: track diff blocks vs commit envelopes separately so excluding a file doesn't corrupt adjacent commit headers - Remove dead Windows backslash path check in find_protected_files — git diffs always use forward slashes - Make label allowlist case-insensitive with eq_ignore_ascii_case - Add test for multi-commit envelope preservation during exclusion Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/tools/create_pr.rs | 146 ++++++++++++++++++++++++++++++----------- 1 file changed, 108 insertions(+), 38 deletions(-) diff --git a/src/tools/create_pr.rs b/src/tools/create_pr.rs index 71b0c1f..c102ea0 100644 --- a/src/tools/create_pr.rs +++ b/src/tools/create_pr.rs @@ -780,6 +780,18 @@ impl Executor for CreatePrResult { } debug!("Source branch created"); + // Record the worktree HEAD before applying the patch so we can diff against + // it later. For multi-commit patches, git am creates N commits and diff-tree HEAD + // alone only shows the last commit's changes — we need base_sha..HEAD. + let base_sha_output = Command::new("git") + .args(["rev-parse", "HEAD"]) + .current_dir(&worktree_path) + .output() + .await + .context("Failed to get worktree HEAD SHA")?; + let base_sha = String::from_utf8_lossy(&base_sha_output.stdout).trim().to_string(); + debug!("Worktree base SHA before patch: {}", base_sha); + // Apply the format-patch using git am --3way for proper conflict handling. // git am handles the email-style patch format from git format-patch and // --3way enables three-way merge for better conflict resolution. @@ -828,12 +840,13 @@ impl Executor for CreatePrResult { } // Collect changed files. The method depends on how the patch was applied: - // - git am: changes are committed → use git diff-tree to compare with parent + // - git am: changes are committed → use git diff-tree to compare base_sha..HEAD + // (covers all commits in multi-commit patches, not just the last one) // - git apply: changes are in working tree → use git status --porcelain debug!("Getting list of changed files"); let (status_str, use_diff_tree) = if patch_committed { let diff_tree_output = Command::new("git") - .args(["diff-tree", "--no-commit-id", "-r", "--name-status", "HEAD"]) + .args(["diff-tree", "-r", "--name-status", &base_sha, "HEAD"]) .current_dir(&worktree_path) .output() .await @@ -1030,7 +1043,12 @@ impl Executor for CreatePrResult { let disallowed: Vec<_> = self .agent_labels .iter() - .filter(|l| !config.allowed_labels.contains(l)) + .filter(|l| { + !config + .allowed_labels + .iter() + .any(|al| al.eq_ignore_ascii_case(l)) + }) .collect(); if !disallowed.is_empty() { warn!( @@ -1587,9 +1605,9 @@ fn find_protected_files(paths: &[String]) -> Vec { } } - // Check path prefixes + // Check path prefixes (git diffs always use forward slashes) for prefix in PROTECTED_PATH_PREFIXES { - if lower_path.starts_with(prefix) || lower_path.starts_with(&prefix.replace('/', "\\")) { + if lower_path.starts_with(prefix) { if !protected.contains(path) { protected.push(path.clone()); } @@ -1630,10 +1648,10 @@ fn generate_pr_footer() -> String { /// Patterns without a `/` are automatically prefixed with `**/` so that e.g. `*.lock` /// matches `subdir/Cargo.lock` (not just root-level lockfiles). /// -/// Note: When using format-patch output with multiple commits, excluding all diffs within -/// a commit leaves the `From ...` / `Subject:` envelope intact with no diff hunks. -/// `git am` treats these as empty commits, which is harmless but may leave no-op commits -/// on the branch. +/// For multi-commit format-patch output, commit envelopes (`From ...`, `Subject:`, +/// etc.) are always preserved — only `diff --git` blocks are subject to exclusion. When +/// all diffs within a commit are excluded, the envelope remains and `git am` treats it +/// as an empty commit. fn filter_excluded_files_from_patch(patch_content: &str, excluded_patterns: &[String]) -> String { if excluded_patterns.is_empty() { return patch_content.to_string(); @@ -1645,51 +1663,71 @@ fn filter_excluded_files_from_patch(patch_content: &str, excluded_patterns: &[St .collect(); let mut result = String::with_capacity(patch_content.len()); - let mut current_block = String::new(); + let mut current_diff_block = String::new(); let mut current_path: Option = None; + let mut in_diff_block = false; for line in patch_content.lines() { if line.starts_with("diff --git") { - // Flush previous block if it's not excluded - if let Some(ref path) = current_path { - if !normalized.iter().any(|p| glob_match::glob_match(p, path)) { - result.push_str(¤t_block); - } else { - debug!("Excluding file from patch: {}", path); - } - } else if !current_block.is_empty() { - result.push_str(¤t_block); - } + // Flush previous diff block if not excluded + flush_diff_block(&mut result, ¤t_diff_block, ¤t_path, &normalized); - // Start new block, path will be set when we see +++ b/ - current_block = String::new(); - current_block.push_str(line); - current_block.push('\n'); + // Start a new diff block + current_diff_block = String::new(); + current_diff_block.push_str(line); + current_diff_block.push('\n'); current_path = None; - } else { - // Extract path from "+++ b/path" line (handles quoted paths) + in_diff_block = true; + } else if in_diff_block && line.starts_with("From ") { + // A new commit envelope starts — flush the current diff block and + // switch back to envelope mode + flush_diff_block(&mut result, ¤t_diff_block, ¤t_path, &normalized); + current_diff_block = String::new(); + current_path = None; + in_diff_block = false; + + result.push_str(line); + result.push('\n'); + } else if in_diff_block { + // Extract path from "+++ b/path" line if let Some(rest) = line.strip_prefix("+++ b/") { current_path = Some(rest.trim().trim_matches('"').to_string()); } - current_block.push_str(line); - current_block.push('\n'); - } - } - - // Flush last block - if let Some(ref path) = current_path { - if !normalized.iter().any(|p| glob_match::glob_match(p, path)) { - result.push_str(¤t_block); + current_diff_block.push_str(line); + current_diff_block.push('\n'); } else { - debug!("Excluding file from patch: {}", path); + // Not inside a diff block — this is a commit envelope line + // (From , Subject:, etc.) or preamble. Always preserve. + result.push_str(line); + result.push('\n'); } - } else if !current_block.is_empty() { - result.push_str(¤t_block); } + // Flush the last diff block + flush_diff_block(&mut result, ¤t_diff_block, ¤t_path, &normalized); + result } +/// Helper to flush a diff block, excluding it if the path matches any pattern. +fn flush_diff_block( + result: &mut String, + block: &str, + path: &Option, + patterns: &[String], +) { + if block.is_empty() { + return; + } + if let Some(p) = path { + if patterns.iter().any(|pat| glob_match::glob_match(pat, p)) { + debug!("Excluding file from patch: {}", p); + return; + } + } + result.push_str(block); +} + /// Normalize a glob pattern for cross-directory matching. /// Patterns without a `/` are prefixed with `**/` so that e.g. `*.lock` matches /// `subdir/Cargo.lock`, not just root-level files. @@ -1942,6 +1980,38 @@ new file mode 100755 assert!(!result.contains("package-lock.json")); } + #[test] + fn test_filter_excluded_files_multi_commit_preserves_envelopes() { + // Multi-commit format-patch: commit envelopes must be preserved even + // when a diff block between them is excluded. + let patch = "\ +From abc123 Mon Sep 17 00:00:00 2001\n\ +Subject: [PATCH 1/2] First commit\n\ +---\n\ +diff --git a/file1.rs b/file1.rs\n\ +--- a/file1.rs\n\ ++++ b/file1.rs\n\ +@@ -1 +1 @@\n\ +-old\n\ ++new\n\ +\n\ +From def456 Mon Sep 17 00:00:00 2001\n\ +Subject: [PATCH 2/2] Second commit\n\ +---\n\ +diff --git a/file2.rs b/file2.rs\n\ +--- a/file2.rs\n\ ++++ b/file2.rs\n\ +@@ -1 +1 @@\n\ +-old\n\ ++new\n"; + // Exclude file1.rs — commit 2 envelope and file2 diff must survive + let result = filter_excluded_files_from_patch(patch, &["file1.rs".to_string()]); + assert!(!result.contains("file1.rs"), "file1.rs should be excluded"); + assert!(result.contains("From def456"), "commit 2 envelope must survive"); + assert!(result.contains("file2.rs"), "file2.rs should remain"); + assert!(result.contains("From abc123"), "commit 1 envelope must survive"); + } + #[test] fn test_normalize_glob_pattern() { // Patterns without / get **/ prefix From 2ef57236710ef1750fb5e87bbf854d26496dc437 Mon Sep 17 00:00:00 2001 From: James Devine Date: Mon, 13 Apr 2026 12:04:10 +0100 Subject: [PATCH 07/28] fix(create-pr): address review round 6 feedback - Broaden find_merge_base: detect actual default branch via git symbolic-ref refs/remotes/origin/HEAD before trying origin/main and origin/master, avoiding full-repo patches on non-main defaults - Fix case-insensitive label dedup: use eq_ignore_ascii_case in merge dedup to match the allowlist comparison, preventing duplicate labels - Strip empty commit envelopes from filtered patches: when all diffs within a commit are excluded, remove the envelope entirely to avoid git am errors on empty patch emails - Change draft default to false to match ADO default behavior and avoid breaking existing pipelines that don't set draft explicitly - Log synthetic commit SHA on reset failure for operator diagnostics - Rename executor step displayName to 'Execute safe outputs (Stage 2)' for clarity in pipeline logs Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/mcp.rs | 44 +++++++++- src/tools/create_pr.rs | 194 ++++++++++++++++++++++++++++------------- templates/base.yml | 2 +- 3 files changed, 173 insertions(+), 67 deletions(-) diff --git a/src/mcp.rs b/src/mcp.rs index 93e474c..5102042 100644 --- a/src/mcp.rs +++ b/src/mcp.rs @@ -293,6 +293,16 @@ impl SafeOutputs { // Undo the temporary commit (if we made one) but keep changes in working tree if made_synthetic_commit { + // Capture the synthetic commit SHA for diagnostics + let head_sha = Command::new("git") + .args(["rev-parse", "HEAD"]) + .current_dir(&git_dir) + .output() + .await + .ok() + .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string()) + .unwrap_or_else(|| "".to_string()); + let reset_output = Command::new("git") .args(["reset", "HEAD~1", "--mixed", "--quiet"]) .current_dir(&git_dir) @@ -304,7 +314,8 @@ impl SafeOutputs { if !reset_output.status.success() { return Err(anyhow_to_mcp_error(anyhow::anyhow!( - "git reset HEAD~1 failed (repository may retain synthetic commit): {}", + "git reset HEAD~1 failed (synthetic commit {} may remain): {}", + head_sha, String::from_utf8_lossy(&reset_output.stderr) ))); } @@ -337,13 +348,38 @@ impl SafeOutputs { /// Find the merge-base commit to diff against. /// /// Tries (in order): - /// 1. `git merge-base HEAD origin/HEAD` - /// 2. `git merge-base HEAD origin/main` + /// 1. Detect actual default branch via `git symbolic-ref refs/remotes/origin/HEAD` + /// 2. Common default branch names: `origin/main`, `origin/master` /// 3. Root commit via `git rev-list --max-parents=0 HEAD` (handles single-commit repos) async fn find_merge_base(git_dir: &std::path::Path) -> Result { use tokio::process::Command; - for remote_ref in &["origin/HEAD", "origin/main"] { + // First, try to discover the actual default branch from origin/HEAD + let symbolic_output = Command::new("git") + .args(["symbolic-ref", "refs/remotes/origin/HEAD"]) + .current_dir(git_dir) + .output() + .await + .ok(); + + let mut candidates: Vec = Vec::new(); + + if let Some(out) = symbolic_output.filter(|o| o.status.success()) { + let refname = String::from_utf8_lossy(&out.stdout).trim().to_string(); + // e.g. "refs/remotes/origin/main" → "origin/main" + if let Some(branch) = refname.strip_prefix("refs/remotes/") { + candidates.push(branch.to_string()); + } + } + + // Always try common defaults as fallbacks + for name in &["origin/main", "origin/master"] { + if !candidates.iter().any(|c| c == *name) { + candidates.push(name.to_string()); + } + } + + for remote_ref in &candidates { let output = Command::new("git") .args(["merge-base", "HEAD", remote_ref]) .current_dir(git_dir) diff --git a/src/tools/create_pr.rs b/src/tools/create_pr.rs index c102ea0..7569011 100644 --- a/src/tools/create_pr.rs +++ b/src/tools/create_pr.rs @@ -338,8 +338,8 @@ pub struct CreatePrConfig { #[serde(default = "default_target_branch", rename = "target-branch")] pub target_branch: String, - /// Whether to create the PR as a draft (default: true) - #[serde(default = "default_true")] + /// Whether to create the PR as a draft (default: false) + #[serde(default)] pub draft: bool, /// Whether to set auto-complete on the PR (default: false) @@ -423,7 +423,7 @@ impl Default for CreatePrConfig { fn default() -> Self { Self { target_branch: default_target_branch(), - draft: true, + draft: false, auto_complete: false, delete_source_branch: true, squash_merge: true, @@ -1061,9 +1061,9 @@ impl Executor for CreatePrResult { ))); } } - // Merge agent labels (dedup) + // Merge agent labels (case-insensitive dedup to match allowlist comparison) for label in &self.agent_labels { - if !all_labels.contains(label) { + if !all_labels.iter().any(|l| l.eq_ignore_ascii_case(label)) { all_labels.push(label.clone()); } } @@ -1648,10 +1648,9 @@ fn generate_pr_footer() -> String { /// Patterns without a `/` are automatically prefixed with `**/` so that e.g. `*.lock` /// matches `subdir/Cargo.lock` (not just root-level lockfiles). /// -/// For multi-commit format-patch output, commit envelopes (`From ...`, `Subject:`, -/// etc.) are always preserved — only `diff --git` blocks are subject to exclusion. When -/// all diffs within a commit are excluded, the envelope remains and `git am` treats it -/// as an empty commit. +/// For multi-commit format-patch output, commit envelopes and diff blocks are tracked +/// separately. Envelopes whose diffs are all excluded are stripped entirely to avoid +/// confusing `git am` with empty patch emails on some git versions. fn filter_excluded_files_from_patch(patch_content: &str, excluded_patterns: &[String]) -> String { if excluded_patterns.is_empty() { return patch_content.to_string(); @@ -1662,70 +1661,110 @@ fn filter_excluded_files_from_patch(patch_content: &str, excluded_patterns: &[St .map(|p| normalize_glob_pattern(p)) .collect(); - let mut result = String::with_capacity(patch_content.len()); - let mut current_diff_block = String::new(); + // Parse the patch into a sequence of segments: either commit envelopes or diff blocks. + // A commit envelope starts with "From " and includes everything up to the first + // "diff --git" line. A diff block starts with "diff --git" and continues until the + // next "diff --git" or "From " line. + struct Segment { + content: String, + kind: SegmentKind, + path: Option, + } + enum SegmentKind { + Envelope, + Diff, + } + + let mut segments: Vec = Vec::new(); + let mut current = String::new(); + let mut current_kind = SegmentKind::Envelope; let mut current_path: Option = None; - let mut in_diff_block = false; for line in patch_content.lines() { if line.starts_with("diff --git") { - // Flush previous diff block if not excluded - flush_diff_block(&mut result, ¤t_diff_block, ¤t_path, &normalized); - - // Start a new diff block - current_diff_block = String::new(); - current_diff_block.push_str(line); - current_diff_block.push('\n'); - current_path = None; - in_diff_block = true; - } else if in_diff_block && line.starts_with("From ") { - // A new commit envelope starts — flush the current diff block and - // switch back to envelope mode - flush_diff_block(&mut result, ¤t_diff_block, ¤t_path, &normalized); - current_diff_block = String::new(); + // Flush the previous segment + if !current.is_empty() { + segments.push(Segment { + content: std::mem::take(&mut current), + kind: current_kind, + path: current_path.take(), + }); + } + current_kind = SegmentKind::Diff; current_path = None; - in_diff_block = false; - - result.push_str(line); - result.push('\n'); - } else if in_diff_block { - // Extract path from "+++ b/path" line - if let Some(rest) = line.strip_prefix("+++ b/") { - current_path = Some(rest.trim().trim_matches('"').to_string()); + current.push_str(line); + current.push('\n'); + } else if line.starts_with("From ") && matches!(current_kind, SegmentKind::Diff) { + // A new commit envelope after a diff block + if !current.is_empty() { + segments.push(Segment { + content: std::mem::take(&mut current), + kind: current_kind, + path: current_path.take(), + }); } - current_diff_block.push_str(line); - current_diff_block.push('\n'); + current_kind = SegmentKind::Envelope; + current_path = None; + current.push_str(line); + current.push('\n'); } else { - // Not inside a diff block — this is a commit envelope line - // (From , Subject:, etc.) or preamble. Always preserve. - result.push_str(line); - result.push('\n'); + if matches!(current_kind, SegmentKind::Diff) { + if let Some(rest) = line.strip_prefix("+++ b/") { + current_path = Some(rest.trim().trim_matches('"').to_string()); + } + } + current.push_str(line); + current.push('\n'); } } + if !current.is_empty() { + segments.push(Segment { + content: current, + kind: current_kind, + path: current_path, + }); + } - // Flush the last diff block - flush_diff_block(&mut result, ¤t_diff_block, ¤t_path, &normalized); - - result -} + // Now filter: mark excluded diff blocks, then strip envelopes with no surviving diffs. + let mut keep: Vec = segments + .iter() + .map(|seg| match seg.kind { + SegmentKind::Envelope => true, // tentatively keep + SegmentKind::Diff => { + if let Some(ref p) = seg.path { + if normalized.iter().any(|pat| glob_match::glob_match(pat, p)) { + debug!("Excluding file from patch: {}", p); + return false; + } + } + true + } + }) + .collect(); -/// Helper to flush a diff block, excluding it if the path matches any pattern. -fn flush_diff_block( - result: &mut String, - block: &str, - path: &Option, - patterns: &[String], -) { - if block.is_empty() { - return; + // Strip envelopes whose subsequent diffs are all excluded. + // Walk backwards: an envelope is empty if no diff block before the next envelope is kept. + let mut i = segments.len(); + while i > 0 { + i -= 1; + if matches!(segments[i].kind, SegmentKind::Envelope) { + // Check if any diff block between this envelope and the next envelope is kept + let has_surviving_diff = (i + 1..segments.len()) + .take_while(|&j| matches!(segments[j].kind, SegmentKind::Diff)) + .any(|j| keep[j]); + if !has_surviving_diff { + keep[i] = false; + } + } } - if let Some(p) = path { - if patterns.iter().any(|pat| glob_match::glob_match(pat, p)) { - debug!("Excluding file from patch: {}", p); - return; + + let mut result = String::with_capacity(patch_content.len()); + for (seg, &kept) in segments.iter().zip(keep.iter()) { + if kept { + result.push_str(&seg.content); } } - result.push_str(block); + result } /// Normalize a glob pattern for cross-directory matching. @@ -1780,7 +1819,7 @@ mod tests { fn test_config_default_target_branch() { let config = CreatePrConfig::default(); assert_eq!(config.target_branch, "main"); - assert!(config.draft); + assert!(!config.draft); assert!(!config.auto_complete); assert!(config.delete_source_branch); assert!(config.squash_merge); @@ -2004,12 +2043,43 @@ diff --git a/file2.rs b/file2.rs\n\ @@ -1 +1 @@\n\ -old\n\ +new\n"; - // Exclude file1.rs — commit 2 envelope and file2 diff must survive + // Exclude file1.rs — commit 2 envelope and file2 diff must survive. + // Commit 1 envelope is stripped since all its diffs were excluded. let result = filter_excluded_files_from_patch(patch, &["file1.rs".to_string()]); assert!(!result.contains("file1.rs"), "file1.rs should be excluded"); assert!(result.contains("From def456"), "commit 2 envelope must survive"); assert!(result.contains("file2.rs"), "file2.rs should remain"); - assert!(result.contains("From abc123"), "commit 1 envelope must survive"); + assert!(!result.contains("From abc123"), "commit 1 envelope should be stripped (no surviving diffs)"); + } + + #[test] + fn test_filter_excluded_files_multi_commit_all_excluded() { + // When all diffs across all commits are excluded, the result should be empty + let patch = "\ +From abc123 Mon Sep 17 00:00:00 2001\n\ +Subject: [PATCH 1/2] First commit\n\ +---\n\ +diff --git a/file1.lock b/file1.lock\n\ +--- a/file1.lock\n\ ++++ b/file1.lock\n\ +@@ -1 +1 @@\n\ +-old\n\ ++new\n\ +\n\ +From def456 Mon Sep 17 00:00:00 2001\n\ +Subject: [PATCH 2/2] Second commit\n\ +---\n\ +diff --git a/file2.lock b/file2.lock\n\ +--- a/file2.lock\n\ ++++ b/file2.lock\n\ +@@ -1 +1 @@\n\ +-old\n\ ++new\n"; + let result = filter_excluded_files_from_patch(patch, &["*.lock".to_string()]); + // Both envelopes and their diffs should be stripped + assert!(!result.contains("From abc123")); + assert!(!result.contains("From def456")); + assert_eq!(count_patch_files(&result), 0); } #[test] diff --git a/templates/base.yml b/templates/base.yml index daaf298..6f0606d 100644 --- a/templates/base.yml +++ b/templates/base.yml @@ -642,7 +642,7 @@ jobs: exit 0 fi exit $EXIT_CODE - displayName: Process safe outputs + displayName: Execute safe outputs (Stage 2) workingDirectory: {{ working_directory }} env: {{ executor_ado_env }} From 92def8522085ea63da6d1f36543c3aa5c15a9d05 Mon Sep 17 00:00:00 2001 From: James Devine Date: Mon, 13 Apr 2026 12:23:20 +0100 Subject: [PATCH 08/28] fix(create-pr): address review round 7 feedback - Default draft to true (safety posture: agent PRs are drafts unless operator opts out) - Fix synthetic commit leak: reset before propagating format-patch errors - Add Dockerfile/docker-compose to protected manifest basenames - Remove redundant path validation for rename old_path - Truncate raw ADO API response body in fallback description (max 500 chars) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/mcp.rs | 14 ++++++++------ src/tools/create_pr.rs | 30 ++++++++++++++++++++++++------ 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src/mcp.rs b/src/mcp.rs index 5102042..56d5bc2 100644 --- a/src/mcp.rs +++ b/src/mcp.rs @@ -277,7 +277,7 @@ impl SafeOutputs { } // Generate format-patch from merge-base..HEAD to capture all changes - let format_patch_output = Command::new("git") + let format_patch_result = Command::new("git") .args([ "format-patch", &format!("{}..HEAD", merge_base), @@ -286,12 +286,9 @@ impl SafeOutputs { ]) .current_dir(&git_dir) .output() - .await - .map_err(|e| { - anyhow_to_mcp_error(anyhow::anyhow!("Failed to run git format-patch: {}", e)) - })?; + .await; - // Undo the temporary commit (if we made one) but keep changes in working tree + // Always undo the temporary commit before propagating errors if made_synthetic_commit { // Capture the synthetic commit SHA for diagnostics let head_sha = Command::new("git") @@ -321,6 +318,11 @@ impl SafeOutputs { } } + // Now check the format-patch result after cleanup + let format_patch_output = format_patch_result.map_err(|e| { + anyhow_to_mcp_error(anyhow::anyhow!("Failed to run git format-patch: {}", e)) + })?; + if !format_patch_output.status.success() { return Err(anyhow_to_mcp_error(anyhow::anyhow!( "git format-patch failed: {}", diff --git a/src/tools/create_pr.rs b/src/tools/create_pr.rs index 7569011..f146cb9 100644 --- a/src/tools/create_pr.rs +++ b/src/tools/create_pr.rs @@ -53,6 +53,12 @@ const PROTECTED_MANIFEST_BASENAMES: &[&str] = &[ // Rust "cargo.toml", "cargo.lock", + // Docker / container + "dockerfile", + "docker-compose.yml", + "docker-compose.yaml", + "compose.yml", + "compose.yaml", ]; /// Path prefixes that are protected by default. @@ -338,8 +344,8 @@ pub struct CreatePrConfig { #[serde(default = "default_target_branch", rename = "target-branch")] pub target_branch: String, - /// Whether to create the PR as a draft (default: false) - #[serde(default)] + /// Whether to create the PR as a draft (default: true) + #[serde(default = "default_draft")] pub draft: bool, /// Whether to set auto-complete on the PR (default: false) @@ -403,6 +409,10 @@ fn default_target_branch() -> String { "main".to_string() } +fn default_draft() -> bool { + true +} + fn default_true() -> bool { true } @@ -423,7 +433,7 @@ impl Default for CreatePrConfig { fn default() -> Self { Self { target_branch: default_target_branch(), - draft: false, + draft: true, auto_complete: false, delete_source_branch: true, squash_merge: true, @@ -1107,7 +1117,7 @@ impl Executor for CreatePrResult { ---\n\ *To create the PR manually, merge branch `{}` into `{}`.*", self.source_branch, target_branch, self.repository, - status, body, + status, truncate_error_body(&body, 500), self.description, self.source_branch, target_branch ); @@ -1393,7 +1403,7 @@ async fn collect_changes_from_diff_tree( // Renamed file: R100\told_path\tnew_path let old_path = file_path; let new_path = parts[2]; - validate_single_path(old_path)?; + // old_path (= file_path) is already validated above validate_single_path(new_path)?; // Emit the rename @@ -1495,6 +1505,14 @@ fn validate_patch_paths(patch_content: &str) -> anyhow::Result<()> { Ok(()) } +/// Truncate an error response body to avoid embedding large or sensitive content. +fn truncate_error_body(body: &str, max_len: usize) -> &str { + match body.char_indices().nth(max_len) { + Some((idx, _)) => &body[..idx], + None => body, + } +} + /// Validate a single file path for security issues fn validate_single_path(path: &str) -> anyhow::Result<()> { // Check for null bytes @@ -1819,7 +1837,7 @@ mod tests { fn test_config_default_target_branch() { let config = CreatePrConfig::default(); assert_eq!(config.target_branch, "main"); - assert!(!config.draft); + assert!(config.draft); assert!(!config.auto_complete); assert!(config.delete_source_branch); assert!(config.squash_merge); From f71b75492d670cb208c2b3d8ddf7d6efd1afde0c Mon Sep 17 00:00:00 2001 From: James Devine Date: Mon, 13 Apr 2026 12:57:54 +0100 Subject: [PATCH 09/28] fix(create-pr): address review round 8 feedback - Fix synthetic commit index mutation: add git reset HEAD after mixed reset to restore previously-untracked files to untracked state - Fix deleted files exclusion: fallback to --- a/ when +++ is /dev/null so excluded-files patterns match deletions - Sanitize ADO API response body via sanitize_text() to neutralize ##vso[ commands in fallback description - Rename record-branch-on-failure to fallback-as-work-item to match PR description (old name kept as serde alias for compat) - Add warning about potentially large patches at root-commit fallback Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/mcp.rs | 16 ++++++++++++++-- src/tools/create_pr.rs | 21 +++++++++++++++------ 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/src/mcp.rs b/src/mcp.rs index 4f41550..380e8b7 100644 --- a/src/mcp.rs +++ b/src/mcp.rs @@ -316,7 +316,9 @@ impl SafeOutputs { .output() .await; - // Always undo the temporary commit before propagating errors + // Always undo the temporary commit before propagating errors. + // We capture the original index state via `git stash` to restore it exactly, + // since `git reset --mixed` would leave previously-untracked files staged. if made_synthetic_commit { // Capture the synthetic commit SHA for diagnostics let head_sha = Command::new("git") @@ -328,6 +330,7 @@ impl SafeOutputs { .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string()) .unwrap_or_else(|| "".to_string()); + // Reset the synthetic commit, restoring changes to working tree let reset_output = Command::new("git") .args(["reset", "HEAD~1", "--mixed", "--quiet"]) .current_dir(&git_dir) @@ -344,6 +347,15 @@ impl SafeOutputs { String::from_utf8_lossy(&reset_output.stderr) ))); } + + // Unstage everything so the index matches the pre-generate_patch state. + // `git reset --mixed` leaves previously-untracked files as staged new files; + // this reset restores them to untracked. + let _ = Command::new("git") + .args(["reset", "HEAD", "--quiet"]) + .current_dir(&git_dir) + .output() + .await; } // Now check the format-patch result after cleanup @@ -429,7 +441,7 @@ impl SafeOutputs { // Fallback: find the root commit (works for single-commit repos where HEAD~1 // doesn't exist) - warn!("Could not find merge-base with origin, falling back to root commit"); + warn!("Could not find merge-base with origin, falling back to root commit. This may produce a very large patch if the repository has a long history."); let root_output = Command::new("git") .args(["rev-list", "--max-parents=0", "HEAD"]) .current_dir(git_dir) diff --git a/src/tools/create_pr.rs b/src/tools/create_pr.rs index 5093aca..edb5ef9 100644 --- a/src/tools/create_pr.rs +++ b/src/tools/create_pr.rs @@ -402,8 +402,12 @@ pub struct CreatePrConfig { /// Whether to record branch info in failure data when PR creation fails (default: true). /// When enabled, the failure response includes the pushed branch name and target branch /// so operators can manually create the PR. No work item is created automatically. - #[serde(default = "default_true", rename = "record-branch-on-failure")] - pub record_branch_on_failure: bool, + #[serde( + default = "default_true", + rename = "fallback-as-work-item", + alias = "record-branch-on-failure" + )] + pub fallback_as_work_item: bool, } fn default_target_branch() -> String { @@ -447,7 +451,7 @@ impl Default for CreatePrConfig { reviewers: Vec::new(), labels: Vec::new(), work_items: Vec::new(), - record_branch_on_failure: true, + fallback_as_work_item: true, } } } @@ -1104,7 +1108,7 @@ impl Executor for CreatePrResult { warn!("Failed to create pull request: {} - {}", status, body); // Record branch info for manual recovery if enabled - if config.record_branch_on_failure { + if config.fallback_as_work_item { info!("PR creation failed, recording branch info for manual recovery"); let fallback_description = format!( "## Pull Request Creation Failed\n\n\ @@ -1118,7 +1122,7 @@ impl Executor for CreatePrResult { ---\n\ *To create the PR manually, merge branch `{}` into `{}`.*", self.source_branch, target_branch, self.repository, - status, truncate_error_body(&body, 500), + status, sanitize_text(truncate_error_body(&body, 500)), self.description, self.source_branch, target_branch ); @@ -1730,6 +1734,11 @@ fn filter_excluded_files_from_patch(patch_content: &str, excluded_patterns: &[St if matches!(current_kind, SegmentKind::Diff) { if let Some(rest) = line.strip_prefix("+++ b/") { current_path = Some(rest.trim().trim_matches('"').to_string()); + } else if current_path.is_none() { + // For deleted files, +++ is /dev/null; use --- a/ as fallback + if let Some(rest) = line.strip_prefix("--- a/") { + current_path = Some(rest.trim().trim_matches('"').to_string()); + } } } current.push_str(line); @@ -1847,7 +1856,7 @@ mod tests { assert_eq!(config.protected_files, ProtectedFiles::Blocked); assert!(config.excluded_files.is_empty()); assert!(config.allowed_labels.is_empty()); - assert!(config.record_branch_on_failure); + assert!(config.fallback_as_work_item); } #[test] From 2d12eab4500769e534d545c4b3c121911ca48fb8 Mon Sep 17 00:00:00 2001 From: James Devine Date: Mon, 13 Apr 2026 12:59:23 +0100 Subject: [PATCH 10/28] fix(create-pr): remove record-branch-on-failure serde alias Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/tools/create_pr.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/tools/create_pr.rs b/src/tools/create_pr.rs index edb5ef9..ccebeaf 100644 --- a/src/tools/create_pr.rs +++ b/src/tools/create_pr.rs @@ -402,11 +402,7 @@ pub struct CreatePrConfig { /// Whether to record branch info in failure data when PR creation fails (default: true). /// When enabled, the failure response includes the pushed branch name and target branch /// so operators can manually create the PR. No work item is created automatically. - #[serde( - default = "default_true", - rename = "fallback-as-work-item", - alias = "record-branch-on-failure" - )] + #[serde(default = "default_true", rename = "fallback-as-work-item")] pub fallback_as_work_item: bool, } From b6d1a334aef64f8e1fa9cc052374943e3ceaac7a Mon Sep 17 00:00:00 2001 From: James Devine Date: Tue, 14 Apr 2026 01:16:25 +0100 Subject: [PATCH 11/28] fix(create-pr): address review round 9 feedback - Sanitize agent_labels in sanitize_fields() for defense-in-depth - Add warn! log when synthetic commit reset fails before returning error - Add conflict marker check after git apply --3way fallback to prevent pushing unresolved conflicts to ADO - Sanitize self.description in fallback failure data construction - Rename fallback-as-work-item to fallback-record-branch (clearer name) - Root-commit fallback now only applies to single-commit repos; fails with clear error for multi-commit repos with no remote tracking branch Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/mcp.rs | 26 ++++++++++++++++++++------ src/tools/create_pr.rs | 31 +++++++++++++++++++++++++------ 2 files changed, 45 insertions(+), 12 deletions(-) diff --git a/src/mcp.rs b/src/mcp.rs index 380e8b7..9b5c84c 100644 --- a/src/mcp.rs +++ b/src/mcp.rs @@ -341,6 +341,11 @@ impl SafeOutputs { })?; if !reset_output.status.success() { + warn!( + "WARNING: synthetic commit {} was not cleaned up; \ + run `git reset HEAD~1` to restore state", + head_sha + ); return Err(anyhow_to_mcp_error(anyhow::anyhow!( "git reset HEAD~1 failed (synthetic commit {} may remain): {}", head_sha, @@ -439,9 +444,8 @@ impl SafeOutputs { } } - // Fallback: find the root commit (works for single-commit repos where HEAD~1 - // doesn't exist) - warn!("Could not find merge-base with origin, falling back to root commit. This may produce a very large patch if the repository has a long history."); + // Fallback: find the root commit. Only valid for single-commit repos where HEAD~1 + // doesn't exist. For repos with longer history this would produce enormous patches. let root_output = Command::new("git") .args(["rev-list", "--max-parents=0", "HEAD"]) .current_dir(git_dir) @@ -450,14 +454,24 @@ impl SafeOutputs { .ok(); if let Some(out) = root_output.filter(|o| o.status.success()) { - let sha = String::from_utf8_lossy(&out.stdout).trim().to_string(); - if !sha.is_empty() { + let output_str = String::from_utf8_lossy(&out.stdout).to_string(); + let roots: Vec<&str> = output_str.trim().lines().collect(); + + // Only use root commit fallback for repos with a single commit + if roots.len() == 1 { + let sha = roots[0].to_string(); + warn!( + "Could not find merge-base with origin; using root commit {} \ + (single-commit repository)", + sha + ); return Ok(sha); } } Err(anyhow_to_mcp_error(anyhow::anyhow!( - "Cannot determine diff base: no remote tracking branch and no commits found" + "Cannot determine diff base: no remote tracking branch found. \ + Push a tracking branch or ensure origin/HEAD is configured." ))) } diff --git a/src/tools/create_pr.rs b/src/tools/create_pr.rs index ccebeaf..dcc1391 100644 --- a/src/tools/create_pr.rs +++ b/src/tools/create_pr.rs @@ -267,6 +267,9 @@ impl Sanitize for CreatePrResult { fn sanitize_fields(&mut self) { self.title = sanitize_text(&self.title); self.description = sanitize_text(&self.description); + for label in &mut self.agent_labels { + *label = sanitize_text(label); + } } } @@ -402,8 +405,8 @@ pub struct CreatePrConfig { /// Whether to record branch info in failure data when PR creation fails (default: true). /// When enabled, the failure response includes the pushed branch name and target branch /// so operators can manually create the PR. No work item is created automatically. - #[serde(default = "default_true", rename = "fallback-as-work-item")] - pub fallback_as_work_item: bool, + #[serde(default = "default_true", rename = "fallback-record-branch")] + pub fallback_record_branch: bool, } fn default_target_branch() -> String { @@ -447,7 +450,7 @@ impl Default for CreatePrConfig { reviewers: Vec::new(), labels: Vec::new(), work_items: Vec::new(), - fallback_as_work_item: true, + fallback_record_branch: true, } } } @@ -846,6 +849,22 @@ impl Executor for CreatePrResult { return Ok(ExecutionResult::failure(err_msg)); } debug!("Patch applied with git apply --3way fallback"); + + // Check for unresolved conflict markers — git apply --3way can succeed + // while leaving conflict markers in files + let conflict_check = Command::new("git") + .args(["diff", "--check"]) + .current_dir(&worktree_path) + .output() + .await + .context("Failed to run git diff --check")?; + + if !conflict_check.status.success() { + let err_msg = + "Patch applied with unresolved conflicts — resolve manually".to_string(); + warn!("{}", err_msg); + return Ok(ExecutionResult::failure(err_msg)); + } } else { debug!("Patch applied successfully with git am"); } @@ -1104,7 +1123,7 @@ impl Executor for CreatePrResult { warn!("Failed to create pull request: {} - {}", status, body); // Record branch info for manual recovery if enabled - if config.fallback_as_work_item { + if config.fallback_record_branch { info!("PR creation failed, recording branch info for manual recovery"); let fallback_description = format!( "## Pull Request Creation Failed\n\n\ @@ -1119,7 +1138,7 @@ impl Executor for CreatePrResult { *To create the PR manually, merge branch `{}` into `{}`.*", self.source_branch, target_branch, self.repository, status, sanitize_text(truncate_error_body(&body, 500)), - self.description, + sanitize_text(&self.description), self.source_branch, target_branch ); return Ok(ExecutionResult::failure_with_data( @@ -1852,7 +1871,7 @@ mod tests { assert_eq!(config.protected_files, ProtectedFiles::Blocked); assert!(config.excluded_files.is_empty()); assert!(config.allowed_labels.is_empty()); - assert!(config.fallback_as_work_item); + assert!(config.fallback_record_branch); } #[test] From 629f3010903fbb3f0d88c4f14fab95d625615d3e Mon Sep 17 00:00:00 2001 From: James Devine Date: Tue, 14 Apr 2026 01:26:01 +0100 Subject: [PATCH 12/28] fix(create-pr): address review round 10 feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Check git status exit code before inspecting stdout for uncommitted changes — prevents silent wrong behavior on broken repos - Remove redundant git reset HEAD after mixed reset (--mixed already restores untracked files to untracked state) - Remove dead condition in normalize_glob_pattern (starts_with(**/)) is always true when contains(/) is true) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/mcp.rs | 16 +++++++--------- src/tools/create_pr.rs | 2 +- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/mcp.rs b/src/mcp.rs index 9b5c84c..0a02e9e 100644 --- a/src/mcp.rs +++ b/src/mcp.rs @@ -245,6 +245,13 @@ impl SafeOutputs { anyhow_to_mcp_error(anyhow::anyhow!("Failed to run git status: {}", e)) })?; + if !status_output.status.success() { + return Err(anyhow_to_mcp_error(anyhow::anyhow!( + "git status failed: {}", + String::from_utf8_lossy(&status_output.stderr) + ))); + } + let has_uncommitted = !String::from_utf8_lossy(&status_output.stdout) .trim() .is_empty(); @@ -352,15 +359,6 @@ impl SafeOutputs { String::from_utf8_lossy(&reset_output.stderr) ))); } - - // Unstage everything so the index matches the pre-generate_patch state. - // `git reset --mixed` leaves previously-untracked files as staged new files; - // this reset restores them to untracked. - let _ = Command::new("git") - .args(["reset", "HEAD", "--quiet"]) - .current_dir(&git_dir) - .output() - .await; } // Now check the format-patch result after cleanup diff --git a/src/tools/create_pr.rs b/src/tools/create_pr.rs index dcc1391..7267cdd 100644 --- a/src/tools/create_pr.rs +++ b/src/tools/create_pr.rs @@ -1814,7 +1814,7 @@ fn filter_excluded_files_from_patch(patch_content: &str, excluded_patterns: &[St /// Patterns without a `/` are prefixed with `**/` so that e.g. `*.lock` matches /// `subdir/Cargo.lock`, not just root-level files. fn normalize_glob_pattern(pattern: &str) -> String { - if pattern.contains('/') || pattern.starts_with("**/") { + if pattern.contains('/') { pattern.to_string() } else { format!("**/{}", pattern) From 26dbb0aac67b881dc8c3b6dc73715d7c95263b3f Mon Sep 17 00:00:00 2001 From: James Devine Date: Tue, 14 Apr 2026 12:55:50 +0100 Subject: [PATCH 13/28] =?UTF-8?q?fix(create-pr):=20gh-aw=20parity=20?= =?UTF-8?q?=E2=80=94=20bugs,=20security,=20and=20missing=20features?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bugs: - Fix validate_patch_paths rename/copy parsing to use splitn(3) instead of split_whitespace().last(), preventing security bypass for paths with spaces. Add test for this scenario. - Add branch collision handling: query ADO refs API before push, retry with new random suffix if branch already exists. - Fix misleading comment in mcp.rs that referenced git stash when code uses git reset --mixed. Features: - Record base_commit SHA at patch generation time in NDJSON. Stage 2 uses recorded SHA when available, falling back to ADO API resolution for backward compatibility. - Add protected manifest basenames for Bun, Deno, Elixir, Haskell, Python (uv), and additional .NET packages. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Cargo.lock | 2 +- src/mcp.rs | 13 +-- src/safeoutputs/create_pr.rs | 165 ++++++++++++++++++++++++++++------- 3 files changed, 141 insertions(+), 39 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 213955e..1480372 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1579,7 +1579,7 @@ dependencies = [ "http-body-util", "paste", "pin-project-lite", - "rand 0.9.2", + "rand 0.9.4", "rmcp-macros", "schemars", "serde", diff --git a/src/mcp.rs b/src/mcp.rs index f06c503..31d0b2f 100644 --- a/src/mcp.rs +++ b/src/mcp.rs @@ -176,7 +176,7 @@ impl SafeOutputs { /// Generate a git diff patch from a specific directory /// If `repository` is Some, it's treated as a subdirectory of bounding_directory /// If `repository` is None or "self", use bounding_directory directly - async fn generate_patch(&self, repository: Option<&str>) -> Result { + async fn generate_patch(&self, repository: Option<&str>) -> Result<(String, String), McpError> { use tokio::process::Command; // Determine the git directory based on repository @@ -324,8 +324,10 @@ impl SafeOutputs { .await; // Always undo the temporary commit before propagating errors. - // We capture the original index state via `git stash` to restore it exactly, - // since `git reset --mixed` would leave previously-untracked files staged. + // Reset the synthetic commit, restoring changes to the working tree. + // `git reset --mixed HEAD~1` undoes the commit and resets the index + // to the parent tree, which leaves modified files as unstaged changes + // and previously-untracked files as untracked again. if made_synthetic_commit { // Capture the synthetic commit SHA for diagnostics let head_sha = Command::new("git") @@ -375,7 +377,7 @@ impl SafeOutputs { let patch = String::from_utf8_lossy(&format_patch_output.stdout).to_string(); - Ok(patch) + Ok((patch, merge_base)) } /// Generate a unique patch filename @@ -614,7 +616,7 @@ Use 'self' for the pipeline's own repository, or a repository alias from the che // Generate the patch from current git changes in the specified repository debug!("Generating patch for repository: {}", repository); - let patch_content = self.generate_patch(Some(repository)).await?; + let (patch_content, merge_base) = self.generate_patch(Some(repository)).await?; if patch_content.trim().is_empty() { warn!("No changes detected in repository '{}'", repository); @@ -654,6 +656,7 @@ Use 'self' for the pipeline's own repository, or a repository alias from the che patch_filename, repository.to_string(), sanitized.labels, + Some(merge_base), ); // Write to safe outputs diff --git a/src/safeoutputs/create_pr.rs b/src/safeoutputs/create_pr.rs index 2d42df2..caae0ef 100644 --- a/src/safeoutputs/create_pr.rs +++ b/src/safeoutputs/create_pr.rs @@ -53,6 +53,24 @@ const PROTECTED_MANIFEST_BASENAMES: &[&str] = &[ // Rust "cargo.toml", "cargo.lock", + // Bun + "bun.lockb", + "bunfig.toml", + // Deno + "deno.json", + "deno.jsonc", + "deno.lock", + // Elixir + "mix.exs", + "mix.lock", + // Haskell + "stack.yaml", + "stack.yaml.lock", + // Python (uv) + "uv.lock", + // .NET (additional) + "nuget.config", + "directory.packages.props", // Docker / container "dockerfile", "docker-compose.yml", @@ -256,6 +274,9 @@ pub struct CreatePrResult { /// Agent-provided labels (validated against allowed-labels at execution time) #[serde(default)] pub agent_labels: Vec, + /// Base commit SHA from patch generation (for reliable Stage 2 application) + #[serde(skip_serializing_if = "Option::is_none")] + pub base_commit: Option, } impl crate::safeoutputs::ToolResult for CreatePrResult { @@ -282,6 +303,7 @@ impl CreatePrResult { patch_file: String, repository: String, agent_labels: Vec, + base_commit: Option, ) -> Self { Self { name: Self::NAME.to_string(), @@ -291,6 +313,7 @@ impl CreatePrResult { patch_file, repository, agent_labels, + base_commit, } } } @@ -687,7 +710,8 @@ impl Executor for CreatePrResult { // Use target branch from config let target_branch = &config.target_branch; - let source_ref = format!("refs/heads/{}", self.source_branch); + let mut source_branch = self.source_branch.clone(); + let mut source_ref = format!("refs/heads/{}", source_branch); let target_ref = format!("refs/heads/{}", target_branch); debug!("Source ref: {}, Target ref: {}", source_ref, target_ref); @@ -774,9 +798,9 @@ impl Executor for CreatePrResult { }; // Create and checkout the source branch in the worktree - debug!("Creating source branch: {}", self.source_branch); + debug!("Creating source branch: {}", source_branch); let checkout_output = Command::new("git") - .args(["checkout", "-b", &self.source_branch]) + .args(["checkout", "-b", &source_branch]) .current_dir(&worktree_path) .output() .await @@ -957,27 +981,42 @@ impl Executor for CreatePrResult { ); debug!("Refs URL: {}", refs_url); - let refs_response = client - .get(&refs_url) - .basic_auth("", Some(token)) - .send() - .await - .context("Failed to get target branch ref")?; + // Resolve the base commit for the push. + // Prefer the merge-base SHA recorded at patch generation time (Stage 1) so the + // patch is applied against the exact commit it was created from. Fall back to + // querying the ADO refs API when the field is absent (backward compat with old + // NDJSON entries). + let base_commit: String = if let Some(ref recorded) = self.base_commit { + info!( + "Using recorded base_commit from Stage 1: {}", + recorded + ); + recorded.clone() + } else { + debug!("No recorded base_commit — resolving from ADO refs API"); + let refs_response = client + .get(&refs_url) + .basic_auth("", Some(token)) + .send() + .await + .context("Failed to get target branch ref")?; - if !refs_response.status().is_success() { - let status = refs_response.status(); - let body = refs_response.text().await.unwrap_or_default(); - warn!("Failed to get target branch ref: {} - {}", status, body); - return Ok(ExecutionResult::failure(format!( - "Failed to get target branch ref: {} - {}", - status, body - ))); - } + if !refs_response.status().is_success() { + let status = refs_response.status(); + let body = refs_response.text().await.unwrap_or_default(); + warn!("Failed to get target branch ref: {} - {}", status, body); + return Ok(ExecutionResult::failure(format!( + "Failed to get target branch ref: {} - {}", + status, body + ))); + } - let refs_data: serde_json::Value = refs_response.json().await?; - let base_commit = refs_data["value"][0]["objectId"] - .as_str() - .context("Could not find target branch commit")?; + let refs_data: serde_json::Value = refs_response.json().await?; + let resolved = refs_data["value"][0]["objectId"] + .as_str() + .context("Could not find target branch commit")?; + resolved.to_string() + }; debug!("Base commit: {}", base_commit); info!( @@ -985,6 +1024,45 @@ impl Executor for CreatePrResult { target_branch, base_commit ); + // Check if the source branch already exists (e.g. from a retry or previous run) + let check_ref_url = format!( + "{}{}/_apis/git/repositories/{}/refs?filter=heads/{}&api-version=7.1", + org_url, project, repo_id, source_branch + ); + debug!("Checking if source branch exists: {}", check_ref_url); + + let check_ref_response = client + .get(&check_ref_url) + .basic_auth("", Some(token)) + .send() + .await + .context("Failed to check source branch existence")?; + + if check_ref_response.status().is_success() { + let check_data: serde_json::Value = check_ref_response.json().await?; + let refs = check_data["value"].as_array(); + if refs.is_some_and(|r| !r.is_empty()) { + warn!( + "Branch '{}' already exists, generating new suffix", + source_branch + ); + use rand::RngExt; + let new_suffix: u32 = rand::rng().random(); + let new_hex = format!("{:08x}", new_suffix); + // Branch format is "agent/-"; replace the trailing hex + source_branch = if let Some(pos) = source_branch.rfind('-') { + format!("{}-{}", &source_branch[..pos], new_hex) + } else { + format!("{}-{}", source_branch, new_hex) + }; + source_ref = format!("refs/heads/{}", source_branch); + info!( + "Renamed source branch to '{}'", + source_branch + ); + } + } + // Push changes via ADO API (this creates the branch and commits in one call) info!("Pushing changes to ADO"); let push_url = format!( @@ -1136,19 +1214,19 @@ impl Executor for CreatePrResult { {}\n\n\ ---\n\ *To create the PR manually, merge branch `{}` into `{}`.*", - self.source_branch, target_branch, self.repository, + source_branch, target_branch, self.repository, status, sanitize_text(truncate_error_body(&body, 500)), sanitize_text(&self.description), - self.source_branch, target_branch + source_branch, target_branch ); return Ok(ExecutionResult::failure_with_data( format!( "Failed to create pull request: {} - {}. Branch '{}' was pushed — create the PR manually.", - status, body, self.source_branch, + status, body, source_branch, ), serde_json::json!({ "fallback": "branch-recorded", - "branch": self.source_branch, + "branch": source_branch, "target_branch": target_branch, "repository": self.repository, "description": fallback_description @@ -1271,7 +1349,7 @@ impl Executor for CreatePrResult { info!( "PR #{} created successfully: {} -> {}{}", - pr_id, self.source_branch, target_branch, + pr_id, source_branch, target_branch, if config.draft { " (draft)" } else { "" } ); @@ -1280,7 +1358,7 @@ impl Executor for CreatePrResult { serde_json::json!({ "pull_request_id": pr_id, "url": pr_web_url, - "source_branch": self.source_branch, + "source_branch": source_branch, "target_branch": target_branch, "draft": config.draft }), @@ -1514,11 +1592,10 @@ fn validate_patch_paths(patch_content: &str) -> anyhow::Result<()> { validate_single_path(clean_path)?; } } - } else if line.starts_with("rename from ") || line.starts_with("rename to ") { - let path = line.split_whitespace().last().unwrap_or(""); - validate_single_path(path)?; - } else if line.starts_with("copy from ") || line.starts_with("copy to ") { - let path = line.split_whitespace().last().unwrap_or(""); + } else if line.starts_with("rename from ") || line.starts_with("rename to ") + || line.starts_with("copy from ") || line.starts_with("copy to ") + { + let path = line.splitn(3, ' ').nth(2).unwrap_or("").trim_matches('"'); validate_single_path(path)?; } } @@ -1933,6 +2010,28 @@ new file mode 100755 assert!(validate_patch_paths(patch).is_err()); } + #[test] + fn test_validate_patch_paths_rename_with_spaces() { + // "rename from some dir/../.git/config" must be rejected. + // Previously split_whitespace().last() would only see "dir/../.git/config" + // (or just "config"), missing the traversal in the full path. + let patch = "diff --git a/old b/new\n\ + rename from some dir/../.git/config\n\ + rename to new name\n"; + let result = validate_patch_paths(patch); + assert!(result.is_err(), "rename with spaces and traversal should be rejected"); + + // Also verify copy paths with spaces are validated correctly + let patch_copy = "diff --git a/old b/new\n\ + copy from some dir/../.git/config\n\ + copy to new name\n"; + let result_copy = validate_patch_paths(patch_copy); + assert!( + result_copy.is_err(), + "copy with spaces and traversal should be rejected" + ); + } + #[test] fn test_validate_single_path_valid() { assert!(validate_single_path("src/main.rs").is_ok()); From e9d6d2ed64825fadb0c96d98520f8e29151bda91 Mon Sep 17 00:00:00 2001 From: James Devine Date: Tue, 14 Apr 2026 13:17:29 +0100 Subject: [PATCH 14/28] fix(create-pr): truncate error body in failure message, document base_commit tradeoff - Truncate raw ADO response body in failure_with_data message field (was only truncated in the data.description field) - Add detailed doc comment on base_commit explaining merge-base vs target branch HEAD tradeoff Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/safeoutputs/create_pr.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/safeoutputs/create_pr.rs b/src/safeoutputs/create_pr.rs index caae0ef..6cd1448 100644 --- a/src/safeoutputs/create_pr.rs +++ b/src/safeoutputs/create_pr.rs @@ -274,7 +274,16 @@ pub struct CreatePrResult { /// Agent-provided labels (validated against allowed-labels at execution time) #[serde(default)] pub agent_labels: Vec, - /// Base commit SHA from patch generation (for reliable Stage 2 application) + /// Base commit SHA recorded at patch generation time (merge-base of HEAD and + /// the upstream branch). When present, Stage 2 uses this as the parent commit + /// for the ADO Push API, ensuring the patch applies cleanly even if the target + /// branch has advanced since the agent ran. Falls back to resolving the live + /// target branch HEAD via the ADO refs API when absent (backward compatibility). + /// + /// Note: this is the merge-base, not the target branch HEAD. The PR diff in ADO + /// compares file states and displays correctly regardless; however, the branch + /// history shows a parent older than current main. This is normal for topic + /// branches and is resolved when the PR is merged. #[serde(skip_serializing_if = "Option::is_none")] pub base_commit: Option, } @@ -1222,7 +1231,7 @@ impl Executor for CreatePrResult { return Ok(ExecutionResult::failure_with_data( format!( "Failed to create pull request: {} - {}. Branch '{}' was pushed — create the PR manually.", - status, body, source_branch, + status, truncate_error_body(&body, 500), source_branch, ), serde_json::json!({ "fallback": "branch-recorded", From 175a85a82b00e30b19a1908f78318b05aa1496d3 Mon Sep 17 00:00:00 2001 From: James Devine Date: Tue, 14 Apr 2026 13:30:04 +0100 Subject: [PATCH 15/28] fix(create-pr): root-commit fallback only for truly single-commit repos The previous check (roots.len() == 1) matched virtually any non-grafted repo since most repos have exactly one root commit regardless of total commits. Now verifies the repo truly has only one commit via git rev-list --count HEAD before using the root as merge-base. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/mcp.rs | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/src/mcp.rs b/src/mcp.rs index 31d0b2f..f7c41d7 100644 --- a/src/mcp.rs +++ b/src/mcp.rs @@ -445,7 +445,9 @@ impl SafeOutputs { } // Fallback: find the root commit. Only valid for single-commit repos where HEAD~1 - // doesn't exist. For repos with longer history this would produce enormous patches. + // doesn't exist. Verify the repo truly has only one commit total before + // using the root commit — most repos have exactly one root commit regardless + // of total commits (roots.len() == 1 would match virtually any repo). let root_output = Command::new("git") .args(["rev-list", "--max-parents=0", "HEAD"]) .current_dir(git_dir) @@ -457,15 +459,35 @@ impl SafeOutputs { let output_str = String::from_utf8_lossy(&out.stdout).to_string(); let roots: Vec<&str> = output_str.trim().lines().collect(); - // Only use root commit fallback for repos with a single commit if roots.len() == 1 { - let sha = roots[0].to_string(); - warn!( - "Could not find merge-base with origin; using root commit {} \ - (single-commit repository)", - sha - ); - return Ok(sha); + // Check total commit count to distinguish single-commit repos + // from normal repos that happen to have one root + let count_output = Command::new("git") + .args(["rev-list", "--count", "HEAD"]) + .current_dir(git_dir) + .output() + .await + .ok(); + + let commit_count = count_output + .filter(|o| o.status.success()) + .and_then(|o| { + String::from_utf8_lossy(&o.stdout) + .trim() + .parse::() + .ok() + }) + .unwrap_or(0); + + if commit_count <= 1 { + let sha = roots[0].to_string(); + warn!( + "Could not find merge-base with origin; using root commit {} \ + (single-commit repository)", + sha + ); + return Ok(sha); + } } } From 6a64f8071a5fb5261769e16176d8f1297797eab3 Mon Sep 17 00:00:00 2001 From: James Devine Date: Tue, 14 Apr 2026 13:46:37 +0100 Subject: [PATCH 16/28] fix(create-pr): replace git diff --check with targeted conflict marker scan git diff --check exits non-zero for trailing whitespace in addition to conflict markers, causing false-positive rejections of valid patches. Replace with git grep for conflict marker patterns (<<<<<<< / ======= / >>>>>>>) which only detects actual merge conflicts. Also clarify that the local worktree branch name is independent of the remote branch name (ADO push is REST-only). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/safeoutputs/create_pr.rs | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/safeoutputs/create_pr.rs b/src/safeoutputs/create_pr.rs index 6cd1448..7c68743 100644 --- a/src/safeoutputs/create_pr.rs +++ b/src/safeoutputs/create_pr.rs @@ -806,7 +806,10 @@ impl Executor for CreatePrResult { worktree_path: worktree_path.clone(), }; - // Create and checkout the source branch in the worktree + // Create and checkout a local branch in the worktree for patch application. + // Note: this local branch name may differ from the final remote branch name + // if a collision is detected later — the ADO push is REST-only, so the local + // branch name is not used for the remote ref. debug!("Creating source branch: {}", source_branch); let checkout_output = Command::new("git") .args(["checkout", "-b", &source_branch]) @@ -883,18 +886,24 @@ impl Executor for CreatePrResult { } debug!("Patch applied with git apply --3way fallback"); - // Check for unresolved conflict markers — git apply --3way can succeed - // while leaving conflict markers in files + // Scan for unresolved conflict markers in working tree files. + // We use git grep instead of git diff --check because diff --check + // also flags trailing whitespace, producing false positives. let conflict_check = Command::new("git") - .args(["diff", "--check"]) + .args(["grep", "-l", "-P", r"^[<=>]{7}( |$)"]) .current_dir(&worktree_path) .output() .await - .context("Failed to run git diff --check")?; + .context("Failed to run git grep for conflict markers")?; - if !conflict_check.status.success() { - let err_msg = - "Patch applied with unresolved conflicts — resolve manually".to_string(); + // git grep exits 0 when matches are found, 1 when no matches + if conflict_check.status.success() { + let conflicting_files = + String::from_utf8_lossy(&conflict_check.stdout).trim().to_string(); + let err_msg = format!( + "Patch applied with unresolved conflict markers in: {}", + conflicting_files + ); warn!("{}", err_msg); return Ok(ExecutionResult::failure(err_msg)); } From 9ea04511d76bf70c78705467fda795c8128bcac4 Mon Sep 17 00:00:00 2001 From: James Devine Date: Tue, 14 Apr 2026 14:01:30 +0100 Subject: [PATCH 17/28] fix(create-pr): use ERE instead of PCRE for conflict marker detection Replace git grep -P (PCRE, not universally available) with -E (ERE, always available) for conflict marker scanning. On Alpine/minimal images without PCRE, -P would silently fail and miss conflict markers. Also sanitize truncated ADO error body in failure message for defense-in-depth consistency. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/safeoutputs/create_pr.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeoutputs/create_pr.rs b/src/safeoutputs/create_pr.rs index 7c68743..159032e 100644 --- a/src/safeoutputs/create_pr.rs +++ b/src/safeoutputs/create_pr.rs @@ -890,7 +890,7 @@ impl Executor for CreatePrResult { // We use git grep instead of git diff --check because diff --check // also flags trailing whitespace, producing false positives. let conflict_check = Command::new("git") - .args(["grep", "-l", "-P", r"^[<=>]{7}( |$)"]) + .args(["grep", "-l", "-E", r"^(<<<<<<<|=======|>>>>>>>)"]) .current_dir(&worktree_path) .output() .await @@ -1240,7 +1240,7 @@ impl Executor for CreatePrResult { return Ok(ExecutionResult::failure_with_data( format!( "Failed to create pull request: {} - {}. Branch '{}' was pushed — create the PR manually.", - status, truncate_error_body(&body, 500), source_branch, + status, sanitize_text(truncate_error_body(&body, 500)), source_branch, ), serde_json::json!({ "fallback": "branch-recorded", From 689024f72d69d676ba3108882296cf4ab137f46a Mon Sep 17 00:00:00 2001 From: James Devine Date: Tue, 14 Apr 2026 14:13:31 +0100 Subject: [PATCH 18/28] fix(create-pr): remove git from default bash allowlist MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git exposes subcommands like push, remote add, fetch, credential, and bundle which could be used to exfiltrate data or interact with unintended remotes inside the AWF container. gh-aw does not include git in its default bash allowlist — agents that need git should explicitly opt in via tools.bash: ["git", ...] in front matter. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/compile/common.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/compile/common.rs b/src/compile/common.rs index a1e098f..ff20b52 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -382,9 +382,9 @@ pub fn validate_checkout_list(repositories: &[Repository], checkout: &[String]) Ok(()) } -/// Default bash commands allowed for agents (matches gh-aw defaults + yq + git) +/// Default bash commands allowed for agents (matches gh-aw defaults + yq) const DEFAULT_BASH_COMMANDS: &[&str] = &[ - "cat", "date", "echo", "git", "grep", "head", "ls", "pwd", "sort", "tail", "uniq", "wc", "yq", + "cat", "date", "echo", "grep", "head", "ls", "pwd", "sort", "tail", "uniq", "wc", "yq", ]; /// Generate copilot CLI params from front matter configuration From a9875ba3fbaa6138f70558ad6e0ad932d0e13ed4 Mon Sep 17 00:00:00 2001 From: James Devine Date: Tue, 14 Apr 2026 14:16:21 +0100 Subject: [PATCH 19/28] fix(create-pr): update tool description to match synthetic commit behavior The tool description told agents to 'git add and git commit' before calling create-pull-request, but git is not in the default bash allowlist. The synthetic commit in generate_patch() handles uncommitted changes automatically, so agents don't need git access. Updated the description to reflect this. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/mcp.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/mcp.rs b/src/mcp.rs index f7c41d7..cec7df0 100644 --- a/src/mcp.rs +++ b/src/mcp.rs @@ -617,9 +617,8 @@ fields you want to update." #[tool( name = "create-pull-request", - description = "Create a new pull request to propose code changes. Before calling this tool, \ -stage and commit your changes using git add and git commit — each logical change should be its \ -own commit with a descriptive message. The PR will be created from your committed changes. \ + description = "Create a new pull request to propose code changes. This tool captures all \ +changes in the repository (both committed and uncommitted) and creates a PR from them. \ Use 'self' for the pipeline's own repository, or a repository alias from the checkout list." )] async fn create_pr( From 79dda3f2b584313f095951c1b674903a70cf4479 Mon Sep 17 00:00:00 2001 From: James Devine Date: Tue, 14 Apr 2026 14:19:35 +0100 Subject: [PATCH 20/28] =?UTF-8?q?fix(create-pr):=20address=20latest=20revi?= =?UTF-8?q?ew=20=E2=80=94=20draft+auto-complete=20warning,=20index=20clean?= =?UTF-8?q?up,=20SHA=20validation,=20CODEOWNERS?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Warn when draft: true and auto-complete: true are both set (mutually exclusive in ADO — auto-complete silently fails on draft PRs) - Reset index on git add -A failure to leave working tree clean - Validate base_commit SHA format (40 hex chars) before trusting Stage 1 NDJSON data - Protect docs/CODEOWNERS in addition to root CODEOWNERS Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/mcp.rs | 6 ++++++ src/safeoutputs/create_pr.rs | 15 ++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/mcp.rs b/src/mcp.rs index cec7df0..3fdb8de 100644 --- a/src/mcp.rs +++ b/src/mcp.rs @@ -271,6 +271,12 @@ impl SafeOutputs { })?; if !add_output.status.success() { + // Reset index to clean state on failure + let _ = Command::new("git") + .args(["reset", "HEAD", "--quiet"]) + .current_dir(&git_dir) + .output() + .await; return Err(anyhow_to_mcp_error(anyhow::anyhow!( "git add -A failed: {}", String::from_utf8_lossy(&add_output.stderr) diff --git a/src/safeoutputs/create_pr.rs b/src/safeoutputs/create_pr.rs index 159032e..b60fbcc 100644 --- a/src/safeoutputs/create_pr.rs +++ b/src/safeoutputs/create_pr.rs @@ -92,7 +92,7 @@ const PROTECTED_PATH_PREFIXES: &[&str] = &[ ]; /// Exact filenames (at repo root) that are protected by default. -const PROTECTED_EXACT_PATHS: &[&str] = &["CODEOWNERS"]; +const PROTECTED_EXACT_PATHS: &[&str] = &["CODEOWNERS", "docs/CODEOWNERS"]; /// Resolve a reviewer identifier (email, display name, or ID) to an Azure DevOps identity ID. /// @@ -525,6 +525,12 @@ impl Executor for CreatePrResult { debug!("Auto-complete: {}", config.auto_complete); debug!("Squash merge: {}", config.squash_merge); + if config.draft && config.auto_complete { + warn!( + "auto-complete cannot be set on a draft PR; set draft: false to enable auto-complete" + ); + } + // Apply title prefix if configured let effective_title = if let Some(ref prefix) = config.title_prefix { format!("{}{}", prefix, self.title) @@ -1005,6 +1011,13 @@ impl Executor for CreatePrResult { // querying the ADO refs API when the field is absent (backward compat with old // NDJSON entries). let base_commit: String = if let Some(ref recorded) = self.base_commit { + // Validate SHA format before trusting Stage 1 data + if recorded.len() != 40 || !recorded.chars().all(|c| c.is_ascii_hexdigit()) { + anyhow::bail!( + "Invalid base_commit SHA from Stage 1 NDJSON: {:?}", + recorded + ); + } info!( "Using recorded base_commit from Stage 1: {}", recorded From d95f1414bf543d6edebcd6005c94cbda20e415c4 Mon Sep 17 00:00:00 2001 From: James Devine Date: Tue, 14 Apr 2026 14:55:55 +0100 Subject: [PATCH 21/28] refactor(create-pr): replace patch content filtering with git --exclude flags Replace the 125-line filter_excluded_files_from_patch() function (which parsed multi-commit format-patch content, tracked envelope boundaries, and did glob matching) with --exclude flags on git am / git apply. This matches gh-aw's approach of filtering at the git level rather than post-processing patch content, eliminating: - Complex patch segment parser (envelopes, diff blocks, path extraction) - Multi-commit envelope stripping logic - glob-match crate dependency - normalize_glob_pattern helper - 6 filter-related tests Net: -266 lines, simpler and more robust. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Cargo.lock | 7 - Cargo.toml | 1 - src/safeoutputs/create_pr.rs | 312 +++-------------------------------- 3 files changed, 27 insertions(+), 293 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1480372..c6fc080 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -14,7 +14,6 @@ dependencies = [ "clap", "dirs", "env_logger", - "glob-match", "inquire", "log", "percent-encoding", @@ -689,12 +688,6 @@ dependencies = [ "wasip3", ] -[[package]] -name = "glob-match" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9985c9503b412198aa4197559e9a318524ebc4519c229bfa05a535828c950b9d" - [[package]] name = "h2" version = "0.4.13" diff --git a/Cargo.toml b/Cargo.toml index 7cff4dc..48a0015 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,7 +31,6 @@ url = "2.5.8" axum = { version = "0.8.8", features = ["tokio"] } subtle = "2.6.1" rand = "0.10.1" -glob-match = "0.2.1" base64 = "0.22.1" [dev-dependencies] diff --git a/src/safeoutputs/create_pr.rs b/src/safeoutputs/create_pr.rs index b60fbcc..1d1db2a 100644 --- a/src/safeoutputs/create_pr.rs +++ b/src/safeoutputs/create_pr.rs @@ -635,47 +635,24 @@ impl Executor for CreatePrResult { .context("Failed to read patch file")?; debug!("Patch content size: {} bytes", patch_content.len()); - // Filter excluded files from patch content if configured. - // Note: Exclusion runs before the protection check. If a protected file matches - // an excluded-files pattern, it is silently dropped from the patch rather than - // triggering a protection error. This is intentional — exclusion removes the file - // from the PR entirely, so there is no security-relevant modification to block. - let patch_content = if !config.excluded_files.is_empty() { - debug!("Filtering {} excluded-files patterns from patch", config.excluded_files.len()); - let filtered = filter_excluded_files_from_patch(&patch_content, &config.excluded_files); - debug!("Patch size after exclusion: {} bytes (was {} bytes)", filtered.len(), patch_content.len()); - // Check if any actual diff blocks remain. For format-patch output the commit - // envelope (From , Subject:, etc.) persists even when all diffs are - // excluded, so we count diff --git headers rather than checking for empty string. - if count_patch_files(&filtered) == 0 { - // All files were excluded - match config.if_no_changes { - IfNoChanges::Error => { - return Ok(ExecutionResult::failure( - "All files in patch were excluded by excluded-files patterns".to_string(), - )); - } - IfNoChanges::Ignore => { - return Ok(ExecutionResult::success( - "All files in patch were excluded — nothing to do".to_string(), - )); - } - IfNoChanges::Warn => { - warn!("All files in patch were excluded by excluded-files patterns (if-no-changes: warn)"); - return Ok(ExecutionResult::warning( - "All files in patch were excluded by excluded-files patterns".to_string(), - )); - } - } - } - // Rewrite the filtered patch to the patch file - tokio::fs::write(&patch_path, &filtered) - .await - .context("Failed to write filtered patch file")?; - filtered - } else { - patch_content - }; + // Excluded files are handled via --exclude flags on git am / git apply, + // which filters them at the git level rather than post-processing patch content. + // This is the same approach used by gh-aw (via :(exclude) pathspecs). + // Note: Exclusion happens during patch application (before the protection check). + // If a protected file matches an excluded-files pattern, it is silently dropped + // from the patch rather than triggering a protection error. + let exclude_args: Vec = config + .excluded_files + .iter() + .map(|p| format!("--exclude={}", p)) + .collect(); + if !exclude_args.is_empty() { + debug!( + "Will apply {} excluded-files patterns via --exclude flags", + exclude_args.len() + ); + } + let patch_content = patch_content; // Security: Validate patch paths before applying debug!("Validating patch paths for security"); @@ -851,9 +828,13 @@ impl Executor for CreatePrResult { // Apply the format-patch using git am --3way for proper conflict handling. // git am handles the email-style patch format from git format-patch and // --3way enables three-way merge for better conflict resolution. + // Excluded files are filtered via --exclude flags at the git level. debug!("Applying patch with git am --3way"); + let mut am_args: Vec = + vec!["am".into(), "--3way".into(), patch_path.to_string_lossy().into_owned()]; + am_args.extend(exclude_args.iter().cloned()); let am_output = Command::new("git") - .args(["am", "--3way", &patch_path.to_string_lossy()]) + .args(&am_args) .current_dir(&worktree_path) .output() .await @@ -875,8 +856,11 @@ impl Executor for CreatePrResult { // Fallback: try git apply (handles plain diff format for backward compatibility) debug!("Falling back to git apply --3way"); + let mut apply_args: Vec = + vec!["apply".into(), "--3way".into(), patch_path.to_string_lossy().into_owned()]; + apply_args.extend(exclude_args.iter().cloned()); let apply_output = Command::new("git") - .args(["apply", "--3way", &patch_path.to_string_lossy()]) + .args(&apply_args) .current_dir(&worktree_path) .output() .await @@ -1787,147 +1771,6 @@ fn generate_pr_footer() -> String { ) } -/// Filter out diff entries for files matching excluded-files glob patterns. -/// Splits the patch into per-file diff blocks and removes those matching any pattern. -/// Uses `+++ b/` lines for path extraction (robust with quoted/space-containing paths). -/// -/// Patterns without a `/` are automatically prefixed with `**/` so that e.g. `*.lock` -/// matches `subdir/Cargo.lock` (not just root-level lockfiles). -/// -/// For multi-commit format-patch output, commit envelopes and diff blocks are tracked -/// separately. Envelopes whose diffs are all excluded are stripped entirely to avoid -/// confusing `git am` with empty patch emails on some git versions. -fn filter_excluded_files_from_patch(patch_content: &str, excluded_patterns: &[String]) -> String { - if excluded_patterns.is_empty() { - return patch_content.to_string(); - } - - let normalized: Vec = excluded_patterns - .iter() - .map(|p| normalize_glob_pattern(p)) - .collect(); - - // Parse the patch into a sequence of segments: either commit envelopes or diff blocks. - // A commit envelope starts with "From " and includes everything up to the first - // "diff --git" line. A diff block starts with "diff --git" and continues until the - // next "diff --git" or "From " line. - struct Segment { - content: String, - kind: SegmentKind, - path: Option, - } - enum SegmentKind { - Envelope, - Diff, - } - - let mut segments: Vec = Vec::new(); - let mut current = String::new(); - let mut current_kind = SegmentKind::Envelope; - let mut current_path: Option = None; - - for line in patch_content.lines() { - if line.starts_with("diff --git") { - // Flush the previous segment - if !current.is_empty() { - segments.push(Segment { - content: std::mem::take(&mut current), - kind: current_kind, - path: current_path.take(), - }); - } - current_kind = SegmentKind::Diff; - current_path = None; - current.push_str(line); - current.push('\n'); - } else if line.starts_with("From ") && matches!(current_kind, SegmentKind::Diff) { - // A new commit envelope after a diff block - if !current.is_empty() { - segments.push(Segment { - content: std::mem::take(&mut current), - kind: current_kind, - path: current_path.take(), - }); - } - current_kind = SegmentKind::Envelope; - current_path = None; - current.push_str(line); - current.push('\n'); - } else { - if matches!(current_kind, SegmentKind::Diff) { - if let Some(rest) = line.strip_prefix("+++ b/") { - current_path = Some(rest.trim().trim_matches('"').to_string()); - } else if current_path.is_none() { - // For deleted files, +++ is /dev/null; use --- a/ as fallback - if let Some(rest) = line.strip_prefix("--- a/") { - current_path = Some(rest.trim().trim_matches('"').to_string()); - } - } - } - current.push_str(line); - current.push('\n'); - } - } - if !current.is_empty() { - segments.push(Segment { - content: current, - kind: current_kind, - path: current_path, - }); - } - - // Now filter: mark excluded diff blocks, then strip envelopes with no surviving diffs. - let mut keep: Vec = segments - .iter() - .map(|seg| match seg.kind { - SegmentKind::Envelope => true, // tentatively keep - SegmentKind::Diff => { - if let Some(ref p) = seg.path { - if normalized.iter().any(|pat| glob_match::glob_match(pat, p)) { - debug!("Excluding file from patch: {}", p); - return false; - } - } - true - } - }) - .collect(); - - // Strip envelopes whose subsequent diffs are all excluded. - // Walk backwards: an envelope is empty if no diff block before the next envelope is kept. - let mut i = segments.len(); - while i > 0 { - i -= 1; - if matches!(segments[i].kind, SegmentKind::Envelope) { - // Check if any diff block between this envelope and the next envelope is kept - let has_surviving_diff = (i + 1..segments.len()) - .take_while(|&j| matches!(segments[j].kind, SegmentKind::Diff)) - .any(|j| keep[j]); - if !has_surviving_diff { - keep[i] = false; - } - } - } - - let mut result = String::with_capacity(patch_content.len()); - for (seg, &kept) in segments.iter().zip(keep.iter()) { - if kept { - result.push_str(&seg.content); - } - } - result -} - -/// Normalize a glob pattern for cross-directory matching. -/// Patterns without a `/` are prefixed with `**/` so that e.g. `*.lock` matches -/// `subdir/Cargo.lock`, not just root-level files. -fn normalize_glob_pattern(pattern: &str) -> String { - if pattern.contains('/') { - pattern.to_string() - } else { - format!("**/{}", pattern) - } -} #[cfg(test)] mod tests { @@ -2172,107 +2015,6 @@ new file mode 100755 assert_eq!(protected.len(), 7); } - // ─── Excluded files filtering ─────────────────────────────────────────── - - #[test] - fn test_filter_excluded_files_basic() { - let patch = "diff --git a/src/main.rs b/src/main.rs\n--- a/src/main.rs\n+++ b/src/main.rs\n@@ -1 +1 @@\n-old\n+new\ndiff --git a/Cargo.lock b/Cargo.lock\n--- a/Cargo.lock\n+++ b/Cargo.lock\n@@ -1 +1 @@\n-old\n+new\n"; - let result = filter_excluded_files_from_patch(patch, &["*.lock".to_string()]); - assert!(result.contains("src/main.rs")); - assert!(!result.contains("Cargo.lock")); - } - - #[test] - fn test_filter_excluded_files_nested_glob() { - // *.lock should match subdir/Cargo.lock thanks to auto-prepended **/ - let patch = "diff --git a/src/main.rs b/src/main.rs\n--- a/src/main.rs\n+++ b/src/main.rs\n@@ -1 +1 @@\n-old\n+new\ndiff --git a/services/api/package-lock.json b/services/api/package-lock.json\n--- a/services/api/package-lock.json\n+++ b/services/api/package-lock.json\n@@ -1 +1 @@\n-old\n+new\n"; - let result = - filter_excluded_files_from_patch(patch, &["package-lock.json".to_string()]); - assert!(result.contains("src/main.rs")); - assert!(!result.contains("package-lock.json")); - } - - #[test] - fn test_filter_excluded_files_multi_commit_preserves_envelopes() { - // Multi-commit format-patch: commit envelopes must be preserved even - // when a diff block between them is excluded. - let patch = "\ -From abc123 Mon Sep 17 00:00:00 2001\n\ -Subject: [PATCH 1/2] First commit\n\ ----\n\ -diff --git a/file1.rs b/file1.rs\n\ ---- a/file1.rs\n\ -+++ b/file1.rs\n\ -@@ -1 +1 @@\n\ --old\n\ -+new\n\ -\n\ -From def456 Mon Sep 17 00:00:00 2001\n\ -Subject: [PATCH 2/2] Second commit\n\ ----\n\ -diff --git a/file2.rs b/file2.rs\n\ ---- a/file2.rs\n\ -+++ b/file2.rs\n\ -@@ -1 +1 @@\n\ --old\n\ -+new\n"; - // Exclude file1.rs — commit 2 envelope and file2 diff must survive. - // Commit 1 envelope is stripped since all its diffs were excluded. - let result = filter_excluded_files_from_patch(patch, &["file1.rs".to_string()]); - assert!(!result.contains("file1.rs"), "file1.rs should be excluded"); - assert!(result.contains("From def456"), "commit 2 envelope must survive"); - assert!(result.contains("file2.rs"), "file2.rs should remain"); - assert!(!result.contains("From abc123"), "commit 1 envelope should be stripped (no surviving diffs)"); - } - - #[test] - fn test_filter_excluded_files_multi_commit_all_excluded() { - // When all diffs across all commits are excluded, the result should be empty - let patch = "\ -From abc123 Mon Sep 17 00:00:00 2001\n\ -Subject: [PATCH 1/2] First commit\n\ ----\n\ -diff --git a/file1.lock b/file1.lock\n\ ---- a/file1.lock\n\ -+++ b/file1.lock\n\ -@@ -1 +1 @@\n\ --old\n\ -+new\n\ -\n\ -From def456 Mon Sep 17 00:00:00 2001\n\ -Subject: [PATCH 2/2] Second commit\n\ ----\n\ -diff --git a/file2.lock b/file2.lock\n\ ---- a/file2.lock\n\ -+++ b/file2.lock\n\ -@@ -1 +1 @@\n\ --old\n\ -+new\n"; - let result = filter_excluded_files_from_patch(patch, &["*.lock".to_string()]); - // Both envelopes and their diffs should be stripped - assert!(!result.contains("From abc123")); - assert!(!result.contains("From def456")); - assert_eq!(count_patch_files(&result), 0); - } - - #[test] - fn test_normalize_glob_pattern() { - // Patterns without / get **/ prefix - assert_eq!(normalize_glob_pattern("*.lock"), "**/*.lock"); - assert_eq!(normalize_glob_pattern("Cargo.toml"), "**/Cargo.toml"); - // Patterns with / stay as-is - assert_eq!(normalize_glob_pattern("src/*.rs"), "src/*.rs"); - // Already-prefixed patterns stay as-is - assert_eq!(normalize_glob_pattern("**/*.lock"), "**/*.lock"); - } - - #[test] - fn test_filter_excluded_files_empty_patterns() { - let patch = "diff --git a/file.txt b/file.txt\n+content\n"; - let result = filter_excluded_files_from_patch(patch, &[]); - assert_eq!(result, patch); - } - // ─── Extract paths from patch ─────────────────────────────────────────── #[test] From 15ff03f0fdae68f1e61203f8d14ba5c3dc2014d7 Mon Sep 17 00:00:00 2001 From: James Devine Date: Tue, 14 Apr 2026 15:09:41 +0100 Subject: [PATCH 22/28] fix(create-pr): fix --exclude flag ordering and protected-files exclusion bypass - Place --exclude flags before the patch path in git am / git apply (POSIX: options after positional args are treated as file arguments) - Filter excluded-files patterns from patch paths before the protected-files check, so excluded files don't trigger protection errors (matching the documented behavior) - Add glob_match_simple() helper for excluded-files path matching Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/safeoutputs/create_pr.rs | 65 ++++++++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 6 deletions(-) diff --git a/src/safeoutputs/create_pr.rs b/src/safeoutputs/create_pr.rs index 1d1db2a..0a9b08b 100644 --- a/src/safeoutputs/create_pr.rs +++ b/src/safeoutputs/create_pr.rs @@ -665,8 +665,19 @@ impl Executor for CreatePrResult { } debug!("Patch path validation passed"); - // Extract file paths from patch for validation - let patch_paths = extract_paths_from_patch(&patch_content); + // Extract file paths from patch for validation. + // Filter out excluded files before the protection check — if a protected file + // matches an excluded-files pattern, it will be excluded from the patch by + // git am/apply --exclude and should not trigger a protection error. + let patch_paths: Vec = extract_paths_from_patch(&patch_content) + .into_iter() + .filter(|p| { + !config.excluded_files.iter().any(|pat| { + // Match the same way git --exclude does: pattern matches path suffix + p.ends_with(pat) || glob_match_simple(pat, p) + }) + }) + .collect(); // Security: File protection check if config.protected_files != ProtectedFiles::Allowed { @@ -830,9 +841,9 @@ impl Executor for CreatePrResult { // --3way enables three-way merge for better conflict resolution. // Excluded files are filtered via --exclude flags at the git level. debug!("Applying patch with git am --3way"); - let mut am_args: Vec = - vec!["am".into(), "--3way".into(), patch_path.to_string_lossy().into_owned()]; + let mut am_args: Vec = vec!["am".into(), "--3way".into()]; am_args.extend(exclude_args.iter().cloned()); + am_args.push(patch_path.to_string_lossy().into_owned()); let am_output = Command::new("git") .args(&am_args) .current_dir(&worktree_path) @@ -856,9 +867,9 @@ impl Executor for CreatePrResult { // Fallback: try git apply (handles plain diff format for backward compatibility) debug!("Falling back to git apply --3way"); - let mut apply_args: Vec = - vec!["apply".into(), "--3way".into(), patch_path.to_string_lossy().into_owned()]; + let mut apply_args: Vec = vec!["apply".into(), "--3way".into()]; apply_args.extend(exclude_args.iter().cloned()); + apply_args.push(patch_path.to_string_lossy().into_owned()); let apply_output = Command::new("git") .args(&apply_args) .current_dir(&worktree_path) @@ -1625,6 +1636,48 @@ fn truncate_error_body(body: &str, max_len: usize) -> &str { } } +/// Simple glob matching for excluded-files patterns. +/// Supports `*` (match any sequence within a path segment) and leading `**/` +/// (match any directory prefix). Patterns without `/` are treated as basename +/// matches (e.g., `*.lock` matches `subdir/Cargo.lock`). +fn glob_match_simple(pattern: &str, path: &str) -> bool { + let (pattern, path) = if !pattern.contains('/') { + // Basename-only pattern: match against the last path component + let basename = path.rsplit('/').next().unwrap_or(path); + (pattern.to_string(), basename.to_string()) + } else if let Some(rest) = pattern.strip_prefix("**/") { + // **/foo matches foo at any depth + (rest.to_string(), path.to_string()) + } else { + (pattern.to_string(), path.to_string()) + }; + + // Simple wildcard matching: * matches any sequence of non-/ characters + let parts: Vec<&str> = pattern.split('*').collect(); + if parts.len() == 1 { + return path == pattern; + } + let mut remaining = path.as_str(); + for (i, part) in parts.iter().enumerate() { + if part.is_empty() { + continue; + } + if i == 0 { + if !remaining.starts_with(part) { + return false; + } + remaining = &remaining[part.len()..]; + } else if i == parts.len() - 1 { + return remaining.ends_with(part); + } else if let Some(pos) = remaining.find(part) { + remaining = &remaining[pos + part.len()..]; + } else { + return false; + } + } + true +} + /// Validate a single file path for security issues fn validate_single_path(path: &str) -> anyhow::Result<()> { // Check for null bytes From cf8b2669a10280f6434e5b07a0b5d441cf025e79 Mon Sep 17 00:00:00 2001 From: James Devine Date: Tue, 14 Apr 2026 16:01:59 +0100 Subject: [PATCH 23/28] fix(create-pr): fix glob_match_simple for **/ patterns, remove dead rebinding The **/ prefix strip was followed by exact match against the full path, so **/Cargo.lock would not match subdir/Cargo.lock. Now tries matching against every directory suffix of the path. Also removed dead let patch_content = patch_content; rebinding. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/safeoutputs/create_pr.rs | 41 ++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/src/safeoutputs/create_pr.rs b/src/safeoutputs/create_pr.rs index 0a9b08b..fcbf32c 100644 --- a/src/safeoutputs/create_pr.rs +++ b/src/safeoutputs/create_pr.rs @@ -652,7 +652,6 @@ impl Executor for CreatePrResult { exclude_args.len() ); } - let patch_content = patch_content; // Security: Validate patch paths before applying debug!("Validating patch paths for security"); @@ -1641,23 +1640,39 @@ fn truncate_error_body(body: &str, max_len: usize) -> &str { /// (match any directory prefix). Patterns without `/` are treated as basename /// matches (e.g., `*.lock` matches `subdir/Cargo.lock`). fn glob_match_simple(pattern: &str, path: &str) -> bool { - let (pattern, path) = if !pattern.contains('/') { + if !pattern.contains('/') { // Basename-only pattern: match against the last path component let basename = path.rsplit('/').next().unwrap_or(path); - (pattern.to_string(), basename.to_string()) - } else if let Some(rest) = pattern.strip_prefix("**/") { - // **/foo matches foo at any depth - (rest.to_string(), path.to_string()) - } else { - (pattern.to_string(), path.to_string()) - }; - - // Simple wildcard matching: * matches any sequence of non-/ characters + return wildcard_match(pattern, basename); + } + + if let Some(rest) = pattern.strip_prefix("**/") { + // **/foo matches foo at any depth: try matching against the full path + // and every directory suffix (e.g., "a/b/c" → "a/b/c", "b/c", "c") + if wildcard_match(rest, path) { + return true; + } + let mut search = path; + while let Some(pos) = search.find('/') { + search = &search[pos + 1..]; + if wildcard_match(rest, search) { + return true; + } + } + return false; + } + + wildcard_match(pattern, path) +} + +/// Match a pattern containing `*` wildcards against a string. +/// `*` matches any sequence of characters (including empty). +fn wildcard_match(pattern: &str, text: &str) -> bool { let parts: Vec<&str> = pattern.split('*').collect(); if parts.len() == 1 { - return path == pattern; + return text == pattern; } - let mut remaining = path.as_str(); + let mut remaining = text; for (i, part) in parts.iter().enumerate() { if part.is_empty() { continue; From 1c67bee3ad5d3d7cc3e16dc8fa26021646a0d392 Mon Sep 17 00:00:00 2001 From: James Devine Date: Tue, 14 Apr 2026 16:21:01 +0100 Subject: [PATCH 24/28] =?UTF-8?q?fix(create-pr):=20address=20review=20roun?= =?UTF-8?q?d=20=E2=80=94=20dedup,=20copies,=20title=20length,=20shallow=20?= =?UTF-8?q?clones?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - count_patch_files: deduplicate by b/ path to avoid over-counting in multi-commit format-patch output - ExecutionResult::warning: add #[serde(default)] for forward compat - collect_changes_from_diff_tree: handle C (copy) status codes analogously to renames, instead of falling through to edit - Validate effective_title length (max 400 chars) after prepending title-prefix, before ADO push - Improve find_merge_base error message to mention shallow clones as a possible cause - extract_paths_from_patch: skip lines before first diff --git header to avoid false positives from commit messages quoting patches - Branch collision: retry up to 3 times instead of one-shot - Include commit SHA in git reset IO error for operator recovery Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/mcp.rs | 9 ++- src/safeoutputs/create_pr.rs | 119 +++++++++++++++++++++++------------ src/safeoutputs/result.rs | 2 +- 3 files changed, 87 insertions(+), 43 deletions(-) diff --git a/src/mcp.rs b/src/mcp.rs index 3fdb8de..8375afb 100644 --- a/src/mcp.rs +++ b/src/mcp.rs @@ -352,7 +352,11 @@ impl SafeOutputs { .output() .await .map_err(|e| { - anyhow_to_mcp_error(anyhow::anyhow!("Failed to run git reset: {}", e)) + anyhow_to_mcp_error(anyhow::anyhow!( + "Failed to run git reset (synthetic commit {} may remain): {}", + head_sha, + e + )) })?; if !reset_output.status.success() { @@ -499,7 +503,8 @@ impl SafeOutputs { Err(anyhow_to_mcp_error(anyhow::anyhow!( "Cannot determine diff base: no remote tracking branch found. \ - Push a tracking branch or ensure origin/HEAD is configured." + This can happen with shallow clones (fetchDepth: 1). \ + Ensure the pipeline fetches full history or push a tracking branch." ))) } diff --git a/src/safeoutputs/create_pr.rs b/src/safeoutputs/create_pr.rs index fcbf32c..554d487 100644 --- a/src/safeoutputs/create_pr.rs +++ b/src/safeoutputs/create_pr.rs @@ -538,6 +538,14 @@ impl Executor for CreatePrResult { self.title.clone() }; + // ADO PR titles have a 400-character limit + if effective_title.len() > 400 { + return Ok(ExecutionResult::failure(format!( + "PR title too long after applying title-prefix ({} chars, max 400)", + effective_title.len() + ))); + } + // Validate repository against allowed list debug!( "Validating repository '{}' against allowed list", @@ -1049,43 +1057,44 @@ impl Executor for CreatePrResult { target_branch, base_commit ); - // Check if the source branch already exists (e.g. from a retry or previous run) - let check_ref_url = format!( - "{}{}/_apis/git/repositories/{}/refs?filter=heads/{}&api-version=7.1", - org_url, project, repo_id, source_branch - ); - debug!("Checking if source branch exists: {}", check_ref_url); + // Check if the source branch already exists (e.g. from a retry or previous run). + // Retry with new random suffixes up to 3 times. + for attempt in 0..3 { + let check_ref_url = format!( + "{}{}/_apis/git/repositories/{}/refs?filter=heads/{}&api-version=7.1", + org_url, project, repo_id, source_branch + ); + debug!("Checking if source branch exists (attempt {}): {}", attempt + 1, check_ref_url); - let check_ref_response = client - .get(&check_ref_url) - .basic_auth("", Some(token)) - .send() - .await - .context("Failed to check source branch existence")?; + let check_ref_response = client + .get(&check_ref_url) + .basic_auth("", Some(token)) + .send() + .await + .context("Failed to check source branch existence")?; - if check_ref_response.status().is_success() { - let check_data: serde_json::Value = check_ref_response.json().await?; - let refs = check_data["value"].as_array(); - if refs.is_some_and(|r| !r.is_empty()) { - warn!( - "Branch '{}' already exists, generating new suffix", - source_branch - ); - use rand::RngExt; - let new_suffix: u32 = rand::rng().random(); - let new_hex = format!("{:08x}", new_suffix); - // Branch format is "agent/-"; replace the trailing hex - source_branch = if let Some(pos) = source_branch.rfind('-') { - format!("{}-{}", &source_branch[..pos], new_hex) - } else { - format!("{}-{}", source_branch, new_hex) - }; - source_ref = format!("refs/heads/{}", source_branch); - info!( - "Renamed source branch to '{}'", - source_branch - ); + if check_ref_response.status().is_success() { + let check_data: serde_json::Value = check_ref_response.json().await?; + let refs = check_data["value"].as_array(); + if refs.is_some_and(|r| !r.is_empty()) { + warn!( + "Branch '{}' already exists, generating new suffix (attempt {})", + source_branch, attempt + 1 + ); + use rand::RngExt; + let new_suffix: u32 = rand::rng().random(); + let new_hex = format!("{:08x}", new_suffix); + source_branch = if let Some(pos) = source_branch.rfind('-') { + format!("{}-{}", &source_branch[..pos], new_hex) + } else { + format!("{}-{}", source_branch, new_hex) + }; + source_ref = format!("refs/heads/{}", source_branch); + info!("Renamed source branch to '{}'", source_branch); + continue; + } } + break; } // Push changes via ADO API (this creates the branch and commits in one call) @@ -1543,6 +1552,15 @@ async fn collect_changes_from_diff_tree( if status_code != "R100" && new_full_path.is_file() { changes.push(read_file_change("edit", new_path, &new_full_path).await?); } + } else if status_code.starts_with('C') && parts.len() >= 3 { + // Copied file: C100\tsrc_path\tdest_path + let dest_path = parts[2]; + validate_single_path(dest_path)?; + + let dest_full_path = worktree_path.join(dest_path); + if dest_full_path.is_file() { + changes.push(read_file_change("add", dest_path, &dest_full_path).await?); + } } else { // Modified or other — read current content if full_path.is_file() { @@ -1743,7 +1761,22 @@ fn validate_single_path(path: &str) -> anyhow::Result<()> { /// with spaces that break `diff --git` header parsing via split_whitespace). fn extract_paths_from_patch(patch_content: &str) -> Vec { let mut paths = Vec::new(); + let mut in_diff = false; for line in patch_content.lines() { + // Only extract paths after the first diff --git header to avoid + // false positives from commit messages that quote patch fragments + if line.starts_with("diff --git") { + in_diff = true; + continue; + } + if !in_diff { + continue; + } + // A new commit envelope resets — skip until next diff --git + if line.starts_with("From ") { + in_diff = false; + continue; + } if let Some(rest) = line.strip_prefix("--- a/") { let path = rest.trim().trim_matches('"'); if !path.is_empty() { @@ -1768,15 +1801,21 @@ fn extract_paths_from_patch(patch_content: &str) -> Vec { paths } -/// Count the number of distinct file changes in a patch. -/// Uses `diff --git` headers as the canonical count — each header represents exactly -/// one file change (add, edit, delete, or rename). This avoids double-counting renames, -/// which produce both `--- a/old` and `+++ b/new` lines. +/// Count the number of distinct files changed in a patch. +/// In multi-commit format-patch output, the same file may appear in multiple commits. +/// Deduplicates by extracting the `b/` path from each `diff --git a/... b/...` header. fn count_patch_files(patch_content: &str) -> usize { + use std::collections::HashSet; patch_content .lines() - .filter(|line| line.starts_with("diff --git")) - .count() + .filter_map(|line| { + // Extract "b/" from "diff --git a/ b/" + line.strip_prefix("diff --git a/") + .and_then(|rest| rest.rsplit_once(" b/")) + .map(|(_, b_path)| b_path) + }) + .collect::>() + .len() } /// Check if any file paths in the patch are protected. diff --git a/src/safeoutputs/result.rs b/src/safeoutputs/result.rs index f73e788..a7dcfb8 100644 --- a/src/safeoutputs/result.rs +++ b/src/safeoutputs/result.rs @@ -113,7 +113,7 @@ pub struct ExecutionResult { pub success: bool, /// Whether this is a warning (succeeded with issues). /// Invariant: warning == true implies success == true. - #[serde(skip_serializing_if = "std::ops::Not::not")] + #[serde(default, skip_serializing_if = "std::ops::Not::not")] warning: bool, /// Human-readable message describing the outcome pub message: String, From a159cfb48b624b72a3560f12b62386b8ab77bb0e Mon Sep 17 00:00:00 2001 From: James Devine Date: Tue, 14 Apr 2026 16:43:00 +0100 Subject: [PATCH 25/28] fix(create-pr): conflict markers, Unicode title, file count, TOCTOU retry - Conflict marker regex: require trailing \s on <<<<<<< and >>>>>>> to avoid false positives from rst/markdown heading underlines - Title length: use chars().count() instead of len() for Unicode-safe character counting (CJK chars are multi-byte) - count_patch_files: reuse extract_paths_from_patch for robust dedup instead of fragile rsplit_once(" b/") header parsing - Remove dead ends_with(pat) check (literal string match never matches glob patterns containing *) - TOCTOU branch collision: retry push on ADO 409/400 "already exists" in addition to pre-check loop Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/safeoutputs/create_pr.rs | 96 ++++++++++++++++++++++++++---------- 1 file changed, 69 insertions(+), 27 deletions(-) diff --git a/src/safeoutputs/create_pr.rs b/src/safeoutputs/create_pr.rs index 554d487..316394d 100644 --- a/src/safeoutputs/create_pr.rs +++ b/src/safeoutputs/create_pr.rs @@ -539,10 +539,11 @@ impl Executor for CreatePrResult { }; // ADO PR titles have a 400-character limit - if effective_title.len() > 400 { + let title_char_count = effective_title.chars().count(); + if title_char_count > 400 { return Ok(ExecutionResult::failure(format!( "PR title too long after applying title-prefix ({} chars, max 400)", - effective_title.len() + title_char_count ))); } @@ -679,10 +680,7 @@ impl Executor for CreatePrResult { let patch_paths: Vec = extract_paths_from_patch(&patch_content) .into_iter() .filter(|p| { - !config.excluded_files.iter().any(|pat| { - // Match the same way git --exclude does: pattern matches path suffix - p.ends_with(pat) || glob_match_simple(pat, p) - }) + !config.excluded_files.iter().any(|pat| glob_match_simple(pat, p)) }) .collect(); @@ -895,10 +893,12 @@ impl Executor for CreatePrResult { debug!("Patch applied with git apply --3way fallback"); // Scan for unresolved conflict markers in working tree files. - // We use git grep instead of git diff --check because diff --check - // also flags trailing whitespace, producing false positives. + // Use specific patterns: <<<<<<< and >>>>>>> always have a trailing space + // and marker (e.g., "<<<<<<< HEAD"), while ======= is on its own line. + // We require the trailing space on <<<<<<< and >>>>>>> to avoid false + // positives from reStructuredText/markdown heading underlines. let conflict_check = Command::new("git") - .args(["grep", "-l", "-E", r"^(<<<<<<<|=======|>>>>>>>)"]) + .args(["grep", "-l", "-E", r"^(<<<<<<<\s|>>>>>>>\s)"]) .current_dir(&worktree_path) .output() .await @@ -1137,11 +1137,63 @@ impl Executor for CreatePrResult { if !push_response.status().is_success() { let status = push_response.status(); let body = push_response.text().await.unwrap_or_default(); - warn!("Failed to push changes: {} - {}", status, body); - return Ok(ExecutionResult::failure(format!( - "Failed to push changes: {} - {}", - status, body - ))); + + // Handle TOCTOU branch collision: if the push fails because the branch + // was created between our check and the push, retry with a new suffix + if status.as_u16() == 409 + || (status.as_u16() == 400 && body.contains("already exists")) + { + warn!( + "Push failed due to branch collision, retrying with new suffix: {}", + body + ); + use rand::RngExt; + let new_suffix: u32 = rand::rng().random(); + let new_hex = format!("{:08x}", new_suffix); + source_branch = if let Some(pos) = source_branch.rfind('-') { + format!("{}-{}", &source_branch[..pos], new_hex) + } else { + format!("{}-{}", source_branch, new_hex) + }; + source_ref = format!("refs/heads/{}", source_branch); + info!("Retrying push with branch '{}'", source_branch); + + let retry_body = serde_json::json!({ + "refUpdates": [{ + "name": source_ref, + "oldObjectId": "0000000000000000000000000000000000000000" + }], + "commits": [{ + "comment": effective_title, + "changes": changes, + "parents": [base_commit] + }] + }); + + let retry_response = client + .post(&push_url) + .basic_auth("", Some(token)) + .json(&retry_body) + .send() + .await + .context("Failed to push changes (retry)")?; + + if !retry_response.status().is_success() { + let retry_status = retry_response.status(); + let retry_body_text = retry_response.text().await.unwrap_or_default(); + warn!("Retry push also failed: {} - {}", retry_status, retry_body_text); + return Ok(ExecutionResult::failure(format!( + "Failed to push changes after retry: {} - {}", + retry_status, retry_body_text + ))); + } + } else { + warn!("Failed to push changes: {} - {}", status, body); + return Ok(ExecutionResult::failure(format!( + "Failed to push changes: {} - {}", + status, body + ))); + } } debug!("Changes pushed successfully"); @@ -1802,20 +1854,10 @@ fn extract_paths_from_patch(patch_content: &str) -> Vec { } /// Count the number of distinct files changed in a patch. -/// In multi-commit format-patch output, the same file may appear in multiple commits. -/// Deduplicates by extracting the `b/` path from each `diff --git a/... b/...` header. +/// Reuses `extract_paths_from_patch` which correctly handles quoted paths, +/// renames, copies, and multi-commit deduplication. fn count_patch_files(patch_content: &str) -> usize { - use std::collections::HashSet; - patch_content - .lines() - .filter_map(|line| { - // Extract "b/" from "diff --git a/ b/" - line.strip_prefix("diff --git a/") - .and_then(|rest| rest.rsplit_once(" b/")) - .map(|(_, b_path)| b_path) - }) - .collect::>() - .len() + extract_paths_from_patch(patch_content).len() } /// Check if any file paths in the patch are protected. From 491747196ce807d93759f1036371c9fee60db890 Mon Sep 17 00:00:00 2001 From: James Devine Date: Tue, 14 Apr 2026 17:27:11 +0100 Subject: [PATCH 26/28] fix(create-pr): git am does not support --exclude, use git apply directly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git am has no --exclude flag — passing it causes git am to always fail, falling through to git apply. Fix: when excluded-files is configured, skip git am and use git apply --3way --exclude directly. When no exclusions, use git am as before with git apply as fallback. Also adds 7 unit tests for glob_match_simple and wildcard_match which gate the excluded-files security filter. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/safeoutputs/create_pr.rs | 164 +++++++++++++++++++++++++++-------- 1 file changed, 130 insertions(+), 34 deletions(-) diff --git a/src/safeoutputs/create_pr.rs b/src/safeoutputs/create_pr.rs index 316394d..d4e57e8 100644 --- a/src/safeoutputs/create_pr.rs +++ b/src/safeoutputs/create_pr.rs @@ -841,37 +841,16 @@ impl Executor for CreatePrResult { let base_sha = String::from_utf8_lossy(&base_sha_output.stdout).trim().to_string(); debug!("Worktree base SHA before patch: {}", base_sha); - // Apply the format-patch using git am --3way for proper conflict handling. - // git am handles the email-style patch format from git format-patch and - // --3way enables three-way merge for better conflict resolution. - // Excluded files are filtered via --exclude flags at the git level. - debug!("Applying patch with git am --3way"); - let mut am_args: Vec = vec!["am".into(), "--3way".into()]; - am_args.extend(exclude_args.iter().cloned()); - am_args.push(patch_path.to_string_lossy().into_owned()); - let am_output = Command::new("git") - .args(&am_args) - .current_dir(&worktree_path) - .output() - .await - .context("Failed to run git am")?; - - // Track whether git am created a commit (affects how we collect changes) - let patch_committed = am_output.status.success(); - - if !patch_committed { - let stderr = String::from_utf8_lossy(&am_output.stderr); - debug!("git am --3way failed: {}", stderr); - - // Abort the failed am to leave worktree clean - let _ = Command::new("git") - .args(["am", "--abort"]) - .current_dir(&worktree_path) - .output() - .await; - - // Fallback: try git apply (handles plain diff format for backward compatibility) - debug!("Falling back to git apply --3way"); + // Apply the patch. Strategy depends on whether excluded-files are configured: + // - Without exclusions: prefer git am --3way (preserves commit metadata) + // with git apply --3way as fallback + // - With exclusions: use git apply --3way directly (git am does not support + // --exclude flags; git apply does) + let use_git_apply = !exclude_args.is_empty(); + let patch_committed; + + if use_git_apply { + debug!("Using git apply --3way (excluded-files configured)"); let mut apply_args: Vec = vec!["apply".into(), "--3way".into()]; apply_args.extend(exclude_args.iter().cloned()); apply_args.push(patch_path.to_string_lossy().into_owned()); @@ -890,7 +869,7 @@ impl Executor for CreatePrResult { warn!("{}", err_msg); return Ok(ExecutionResult::failure(err_msg)); } - debug!("Patch applied with git apply --3way fallback"); + debug!("Patch applied with git apply --3way"); // Scan for unresolved conflict markers in working tree files. // Use specific patterns: <<<<<<< and >>>>>>> always have a trailing space @@ -904,7 +883,6 @@ impl Executor for CreatePrResult { .await .context("Failed to run git grep for conflict markers")?; - // git grep exits 0 when matches are found, 1 when no matches if conflict_check.status.success() { let conflicting_files = String::from_utf8_lossy(&conflict_check.stdout).trim().to_string(); @@ -915,8 +893,71 @@ impl Executor for CreatePrResult { warn!("{}", err_msg); return Ok(ExecutionResult::failure(err_msg)); } + + patch_committed = false; } else { - debug!("Patch applied successfully with git am"); + // No exclusions — use git am --3way for proper commit metadata preservation + debug!("Applying patch with git am --3way"); + let am_output = Command::new("git") + .args(["am", "--3way", &patch_path.to_string_lossy()]) + .current_dir(&worktree_path) + .output() + .await + .context("Failed to run git am")?; + + patch_committed = am_output.status.success(); + + if !patch_committed { + let stderr = String::from_utf8_lossy(&am_output.stderr); + debug!("git am --3way failed: {}", stderr); + + // Abort the failed am to leave worktree clean + let _ = Command::new("git") + .args(["am", "--abort"]) + .current_dir(&worktree_path) + .output() + .await; + + // Fallback: try git apply --3way + debug!("Falling back to git apply --3way"); + let apply_output = Command::new("git") + .args(["apply", "--3way", &patch_path.to_string_lossy()]) + .current_dir(&worktree_path) + .output() + .await + .context("Failed to run git apply --3way")?; + + if !apply_output.status.success() { + let err_msg = format!( + "Patch could not be applied (conflicts): {}", + String::from_utf8_lossy(&apply_output.stderr) + ); + warn!("{}", err_msg); + return Ok(ExecutionResult::failure(err_msg)); + } + debug!("Patch applied with git apply --3way fallback"); + + // Scan for unresolved conflict markers + let conflict_check = Command::new("git") + .args(["grep", "-l", "-E", r"^(<<<<<<<\s|>>>>>>>\s)"]) + .current_dir(&worktree_path) + .output() + .await + .context("Failed to run git grep for conflict markers")?; + + if conflict_check.status.success() { + let conflicting_files = + String::from_utf8_lossy(&conflict_check.stdout).trim().to_string(); + let err_msg = format!( + "Patch applied with unresolved conflict markers in: {}", + conflicting_files + ); + warn!("{}", err_msg); + return Ok(ExecutionResult::failure(err_msg)); + } + } else { + debug!("Patch applied successfully with git am"); + } } // Collect changed files. The method depends on how the patch was applied: @@ -2164,6 +2205,61 @@ new file mode 100755 assert_eq!(protected.len(), 7); } + // ─── Glob matching ──────────────────────────────────────────────────── + + #[test] + fn test_glob_match_simple_basename_wildcard() { + assert!(glob_match_simple("*.lock", "Cargo.lock")); + assert!(glob_match_simple("*.lock", "subdir/Cargo.lock")); + assert!(glob_match_simple("*.lock", "a/b/c/yarn.lock")); + assert!(!glob_match_simple("*.lock", "Cargo.toml")); + } + + #[test] + fn test_glob_match_simple_basename_exact() { + assert!(glob_match_simple("package.json", "package.json")); + assert!(glob_match_simple("package.json", "subdir/package.json")); + assert!(!glob_match_simple("package.json", "other.json")); + } + + #[test] + fn test_glob_match_simple_double_star_prefix() { + assert!(glob_match_simple("**/Cargo.lock", "Cargo.lock")); + assert!(glob_match_simple("**/Cargo.lock", "subdir/Cargo.lock")); + assert!(glob_match_simple("**/Cargo.lock", "a/b/c/Cargo.lock")); + assert!(!glob_match_simple("**/Cargo.lock", "Cargo.toml")); + } + + #[test] + fn test_glob_match_simple_double_star_wildcard() { + assert!(glob_match_simple("**/*.json", "package.json")); + assert!(glob_match_simple("**/*.json", "subdir/package.json")); + assert!(!glob_match_simple("**/*.json", "package.lock")); + } + + #[test] + fn test_glob_match_simple_path_with_slash() { + assert!(glob_match_simple("src/*.rs", "src/main.rs")); + assert!(!glob_match_simple("src/*.rs", "tests/main.rs")); + } + + #[test] + fn test_glob_match_does_not_match_adjacent_protected() { + // *.lock should not match package.json + assert!(!glob_match_simple("*.lock", "package.json")); + assert!(!glob_match_simple("*.lock", "go.mod")); + } + + #[test] + fn test_wildcard_match() { + assert!(wildcard_match("*.lock", "Cargo.lock")); + assert!(wildcard_match("foo", "foo")); + assert!(!wildcard_match("foo", "bar")); + assert!(wildcard_match("*", "anything")); + assert!(wildcard_match("pre*suf", "pre-middle-suf")); + assert!(!wildcard_match("pre*suf", "pre-middle-other")); + } + // ─── Extract paths from patch ─────────────────────────────────────────── #[test] From f77529200d007aef9238acac274dcb58887d7253 Mon Sep 17 00:00:00 2001 From: James Devine Date: Tue, 14 Apr 2026 17:52:23 +0100 Subject: [PATCH 27/28] fix(create-pr): align validate_patch_paths with extract_paths_from_patch - Use strip_prefix for ---/+++ lines instead of split_whitespace to correctly handle paths with spaces - Add in_diff guard to skip commit message bodies in format-patch output, preventing false-positive rejections from commit messages that reference .git paths or contain patch-like content - Both functions now use consistent parsing approach Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/safeoutputs/create_pr.rs | 42 +++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/src/safeoutputs/create_pr.rs b/src/safeoutputs/create_pr.rs index d4e57e8..bb5813a 100644 --- a/src/safeoutputs/create_pr.rs +++ b/src/safeoutputs/create_pr.rs @@ -1711,23 +1711,39 @@ async fn read_file_change( /// - No absolute paths /// - No null bytes fn validate_patch_paths(patch_content: &str) -> anyhow::Result<()> { + let mut in_diff = false; for line in patch_content.lines() { - // Check diff headers for file paths + // Only validate paths within diff blocks, not commit message bodies. + // format-patch output includes commit messages before each diff section. if line.starts_with("diff --git") { - // Extract paths from "diff --git a/path b/path" - let parts: Vec<&str> = line.split_whitespace().collect(); - for part in parts.iter().skip(2) { - let path = part.trim_start_matches("a/").trim_start_matches("b/"); - validate_single_path(path)?; - } - } else if line.starts_with("---") || line.starts_with("+++") { - // Extract path from "--- a/path" or "+++ b/path" - if let Some(path) = line.split_whitespace().nth(1) { - if path != "/dev/null" { - let clean_path = path.trim_start_matches("a/").trim_start_matches("b/"); - validate_single_path(clean_path)?; + in_diff = true; + // Extract paths using strip_prefix for correct handling of spaces + if let Some(rest) = line.strip_prefix("diff --git a/") { + // The b/ path starts after the last " b/" — but for simple validation, + // validate the a/ path (everything before " b/") and the b/ path + if let Some((a_path, b_path)) = rest.rsplit_once(" b/") { + validate_single_path(a_path.trim_matches('"'))?; + validate_single_path(b_path.trim_matches('"'))?; } } + continue; + } + // Reset on commit envelope boundaries + if line.starts_with("From ") && in_diff { + in_diff = false; + continue; + } + if !in_diff { + continue; + } + if let Some(rest) = line.strip_prefix("--- a/") { + let path = rest.trim().trim_matches('"'); + validate_single_path(path)?; + } else if let Some(rest) = line.strip_prefix("+++ b/") { + let path = rest.trim().trim_matches('"'); + validate_single_path(path)?; + } else if line.starts_with("--- /dev/null") || line.starts_with("+++ /dev/null") { + // New or deleted files — no path to validate } else if line.starts_with("rename from ") || line.starts_with("rename to ") || line.starts_with("copy from ") || line.starts_with("copy to ") { From b8bf0842ba45e8addf547d2810e51ee3fe671791 Mon Sep 17 00:00:00 2001 From: James Devine Date: Tue, 14 Apr 2026 18:07:12 +0100 Subject: [PATCH 28/28] refactor(create-pr): use glob-match crate, improve merge-base error messages - Replace hand-rolled wildcard_match with glob-match crate for correct glob semantics (* does not cross directory separators) - Add test asserting src/*.rs does not match src/nested/file.rs - Distinguish 'no tracking branch' from 'diverged branches (no common ancestor)' in find_merge_base error messages Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Cargo.lock | 7 ++++ Cargo.toml | 1 + src/mcp.rs | 29 ++++++++++++--- src/safeoutputs/create_pr.rs | 69 ++++++------------------------------ 4 files changed, 42 insertions(+), 64 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c6fc080..1480372 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -14,6 +14,7 @@ dependencies = [ "clap", "dirs", "env_logger", + "glob-match", "inquire", "log", "percent-encoding", @@ -688,6 +689,12 @@ dependencies = [ "wasip3", ] +[[package]] +name = "glob-match" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985c9503b412198aa4197559e9a318524ebc4519c229bfa05a535828c950b9d" + [[package]] name = "h2" version = "0.4.13" diff --git a/Cargo.toml b/Cargo.toml index 48a0015..4d16944 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,7 @@ axum = { version = "0.8.8", features = ["tokio"] } subtle = "2.6.1" rand = "0.10.1" base64 = "0.22.1" +glob-match = "0.2.1" [dev-dependencies] reqwest = { version = "0.12", features = ["blocking"] } diff --git a/src/mcp.rs b/src/mcp.rs index 8375afb..25e1fe5 100644 --- a/src/mcp.rs +++ b/src/mcp.rs @@ -436,6 +436,7 @@ impl SafeOutputs { } } + let mut found_remote_ref = false; for remote_ref in &candidates { let output = Command::new("git") .args(["merge-base", "HEAD", remote_ref]) @@ -451,6 +452,16 @@ impl SafeOutputs { return Ok(base); } } + // Check if the ref exists even though merge-base failed (diverged branches) + let ref_check = Command::new("git") + .args(["rev-parse", "--verify", remote_ref]) + .current_dir(git_dir) + .output() + .await + .ok(); + if ref_check.is_some_and(|o| o.status.success()) { + found_remote_ref = true; + } } } @@ -501,11 +512,19 @@ impl SafeOutputs { } } - Err(anyhow_to_mcp_error(anyhow::anyhow!( - "Cannot determine diff base: no remote tracking branch found. \ - This can happen with shallow clones (fetchDepth: 1). \ - Ensure the pipeline fetches full history or push a tracking branch." - ))) + if found_remote_ref { + Err(anyhow_to_mcp_error(anyhow::anyhow!( + "Cannot determine diff base: remote tracking branch exists but shares no \ + common ancestry with HEAD (orphan or force-pushed branch). \ + Ensure HEAD is based on the target branch." + ))) + } else { + Err(anyhow_to_mcp_error(anyhow::anyhow!( + "Cannot determine diff base: no remote tracking branch found. \ + This can happen with shallow clones (fetchDepth: 1). \ + Ensure the pipeline fetches full history or push a tracking branch." + ))) + } } #[tool( diff --git a/src/safeoutputs/create_pr.rs b/src/safeoutputs/create_pr.rs index bb5813a..54d7ef9 100644 --- a/src/safeoutputs/create_pr.rs +++ b/src/safeoutputs/create_pr.rs @@ -1766,58 +1766,17 @@ fn truncate_error_body(body: &str, max_len: usize) -> &str { /// Supports `*` (match any sequence within a path segment) and leading `**/` /// (match any directory prefix). Patterns without `/` are treated as basename /// matches (e.g., `*.lock` matches `subdir/Cargo.lock`). +/// Match a file path against a glob pattern for excluded-files filtering. +/// Patterns without `/` are treated as basename matches (e.g., `*.lock` matches +/// `subdir/Cargo.lock`). Patterns with `**/` prefix match at any depth. +/// Uses the `glob-match` crate for correct glob semantics (`*` does not cross `/`). fn glob_match_simple(pattern: &str, path: &str) -> bool { if !pattern.contains('/') { - // Basename-only pattern: match against the last path component - let basename = path.rsplit('/').next().unwrap_or(path); - return wildcard_match(pattern, basename); - } - - if let Some(rest) = pattern.strip_prefix("**/") { - // **/foo matches foo at any depth: try matching against the full path - // and every directory suffix (e.g., "a/b/c" → "a/b/c", "b/c", "c") - if wildcard_match(rest, path) { - return true; - } - let mut search = path; - while let Some(pos) = search.find('/') { - search = &search[pos + 1..]; - if wildcard_match(rest, search) { - return true; - } - } - return false; - } - - wildcard_match(pattern, path) -} - -/// Match a pattern containing `*` wildcards against a string. -/// `*` matches any sequence of characters (including empty). -fn wildcard_match(pattern: &str, text: &str) -> bool { - let parts: Vec<&str> = pattern.split('*').collect(); - if parts.len() == 1 { - return text == pattern; + // Basename-only pattern: auto-prefix with **/ for any-depth matching + let full_pattern = format!("**/{}", pattern); + return glob_match::glob_match(&full_pattern, path); } - let mut remaining = text; - for (i, part) in parts.iter().enumerate() { - if part.is_empty() { - continue; - } - if i == 0 { - if !remaining.starts_with(part) { - return false; - } - remaining = &remaining[part.len()..]; - } else if i == parts.len() - 1 { - return remaining.ends_with(part); - } else if let Some(pos) = remaining.find(part) { - remaining = &remaining[pos + part.len()..]; - } else { - return false; - } - } - true + glob_match::glob_match(pattern, path) } /// Validate a single file path for security issues @@ -2257,6 +2216,8 @@ new file mode 100755 fn test_glob_match_simple_path_with_slash() { assert!(glob_match_simple("src/*.rs", "src/main.rs")); assert!(!glob_match_simple("src/*.rs", "tests/main.rs")); + // * should NOT cross directory boundaries + assert!(!glob_match_simple("src/*.rs", "src/nested/file.rs")); } #[test] @@ -2266,16 +2227,6 @@ new file mode 100755 assert!(!glob_match_simple("*.lock", "go.mod")); } - #[test] - fn test_wildcard_match() { - assert!(wildcard_match("*.lock", "Cargo.lock")); - assert!(wildcard_match("foo", "foo")); - assert!(!wildcard_match("foo", "bar")); - assert!(wildcard_match("*", "anything")); - assert!(wildcard_match("pre*suf", "pre-middle-suf")); - assert!(!wildcard_match("pre*suf", "pre-middle-other")); - } - // ─── Extract paths from patch ─────────────────────────────────────────── #[test]