From becb32be3554abebfcf6b6283fbf393fd9768f03 Mon Sep 17 00:00:00 2001 From: Lucas Earl Date: Mon, 21 Jul 2025 16:27:44 -0600 Subject: [PATCH 1/4] Add automated breaking changes detection for PRsImplements git-based analysis to detect API breaking changes in DataFusion PRs:- LogicalPlan enum modifications and variant removals - DataFrame API public method changes- Generates detailed reports and integrates with GitHub ActionsAddresses #16532 --- .github/workflows/dev.yml | 50 ++++- ci/scripts/README.md | 49 +++++ ci/scripts/breaking-changes-report.md | 33 ++++ ci/scripts/detect-breaking-changes.sh | 251 ++++++++++++++++++++++++++ 4 files changed, 382 insertions(+), 1 deletion(-) create mode 100644 ci/scripts/README.md create mode 100644 ci/scripts/breaking-changes-report.md create mode 100755 ci/scripts/detect-breaking-changes.sh diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index aa4bd862e09e4..6465cce7a22d4 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -16,13 +16,58 @@ # under the License. name: Dev -on: [push, pull_request] +on: [push, pull_request, workflow_dispatch] concurrency: group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} cancel-in-progress: true jobs: + breaking-changes: + name: Breaking Changes Detection + runs-on: ubuntu-latest + if: github.event_name == 'pull_request' + permissions: + contents: read + pull-requests: write + issues: write + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + + - name: Install GitHub CLI + run: | + if ! command -v gh &> /dev/null; then + curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg + echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null + sudo apt update + sudo apt install gh + fi + + - name: Run breaking changes detection + shell: bash + run: | + chmod +x ./ci/scripts/detect-breaking-changes.sh + ./ci/scripts/detect-breaking-changes.sh + timeout-minutes: 5 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Upload breaking changes report + if: failure() + uses: actions/upload-artifact@v4 + with: + name: breaking-changes-report + path: breaking-changes-report.md + retention-days: 30 + license-header-check: runs-on: ubuntu-latest name: Check License Header @@ -35,6 +80,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch full history + token: ${{ secrets.GITHUB_TOKEN }} - uses: actions/setup-node@v4 with: node-version: "20" diff --git a/ci/scripts/README.md b/ci/scripts/README.md new file mode 100644 index 0000000000000..147ed266072af --- /dev/null +++ b/ci/scripts/README.md @@ -0,0 +1,49 @@ +# CI Scripts + +This directory contains scripts used in the DataFusion CI/CD pipeline. + +## Breaking Changes Detection + +### `detect-breaking-changes.sh` + +Automatically detects potential breaking changes in DataFusion pull requests using git-based analysis. + +**Features:** +- Detects changes to LogicalPlan enums and variants +- Monitors DataFrame API modifications +- Checks SQL parser keyword changes +- Generates detailed reports with diff snippets +- Integrates with GitHub to post PR comments +- Lightweight git-based analysis (no compilation required) + +**Usage:** +```bash +# Run from repository root +./ci/scripts/detect-breaking-changes.sh + +# With custom base ref +GITHUB_BASE_REF=v42.0.0 ./ci/scripts/detect-breaking-changes.sh +``` + +**Environment Variables:** +- `GITHUB_BASE_REF`: Base branch/tag to compare against (default: `main`) +- `GITHUB_HEAD_REF`: Head ref being tested (default: `HEAD`) +- `GITHUB_TOKEN`: Required for posting PR comments +- `GITHUB_ENV`: Set by GitHub Actions for CI integration + +**Output:** +- Exit code 0: No breaking changes detected +- Exit code 1: Breaking changes detected +- Generates `breaking-changes-report.md` when issues found + +### `breaking-changes-report.md` + +Template/example of the report generated when breaking changes are detected. Contains: +- Summary of detected changes +- Links to DataFusion API stability guidelines +- Required actions for PR authors +- Approval requirements + +## Integration + +The breaking changes detection is integrated into the GitHub Actions workflow (`.github/workflows/dev.yml`) and runs automatically on all pull requests. \ No newline at end of file diff --git a/ci/scripts/breaking-changes-report.md b/ci/scripts/breaking-changes-report.md new file mode 100644 index 0000000000000..86e1a3a322812 --- /dev/null +++ b/ci/scripts/breaking-changes-report.md @@ -0,0 +1,33 @@ +# 🚨 Breaking Changes Report + +## Summary +Breaking changes detected in this PR that require the `api-change` label. + +## DataFusion API Stability Guidelines +Per the [API Health Policy](https://datafusion.apache.org/contributor-guide/specification/api-health-policy.html): + +### Git-Based Analysis: +- ⚠️ Breaking changes detected but specific patterns not identified +- Review the modified files below for potential API changes + +### Source Files Modified: +- datafusion/core/src/dataframe/mod.rs + +### All Files Modified: +- .github/workflows/dev.yml +- ci/scripts/detect-breaking-changes.sh +- datafusion/core/src/dataframe/mod.rs + +## Required Actions: +1. Add the `api-change` label to this PR +2. Update CHANGELOG.md with breaking change details +3. Consider adding deprecation warnings before removal +4. Update migration guide if needed + +## Approval Requirements: +- Breaking changes require approval from a DataFusion maintainer +- Consider if this change is necessary or if a deprecation path exists + +## Note: +This analysis uses git-based pattern detection to identify common breaking change patterns. +Review the specific changes carefully to determine if they truly constitute breaking changes. \ No newline at end of file diff --git a/ci/scripts/detect-breaking-changes.sh b/ci/scripts/detect-breaking-changes.sh new file mode 100755 index 0000000000000..5c37e1e4a9373 --- /dev/null +++ b/ci/scripts/detect-breaking-changes.sh @@ -0,0 +1,251 @@ +#!/bin/bash + +set -e + +# Ensure we're in the repository root +if [ ! -f "Cargo.toml" ] || [ ! -d ".git" ]; then + echo "❌ Error: This script must be run from the repository root directory" + echo "Current directory: $(pwd)" + echo "Expected: A directory containing Cargo.toml and .git" + exit 1 +fi + +BASE_REF="${GITHUB_BASE_REF:-main}" +HEAD_REF="${GITHUB_HEAD_REF:-HEAD}" + +echo "🔍 DataFusion Breaking Changes Detection" +echo "Comparing against: $BASE_REF" +echo "Current ref: $HEAD_REF" + +# Skip installation - we're only using git-based analysis +echo "📋 Using git-based analysis for breaking change detection..." + +detect_datafusion_breaking_changes() { + local breaking_changes_found=1 # 1 = false in bash + + echo "📋 Checking DataFusion-specific API rules..." + + # Validate that we can access git and the base ref exists + if ! git rev-parse --verify "$BASE_REF" >/dev/null 2>&1; then + echo "⚠️ Warning: Base ref '$BASE_REF' not found, skipping git-based analysis" + return 1 # No breaking changes (can't detect them) + fi + + # Only run git-based checks if we have a valid base ref + if [ -n "$BASE_REF" ]; then + echo "Checking for Rust source file changes..." + if git diff "$BASE_REF" --name-only 2>/dev/null | grep -qE "\.rs$"; then + echo "Rust files modified - running detailed checks..." + + echo "Checking LogicalPlan stability..." + if check_logical_plan_changes; then + echo "❌ Breaking changes detected in LogicalPlan" + breaking_changes_found=0 + else + echo "✅ No LogicalPlan breaking changes detected" + fi + + echo "Checking DataFrame API..." + if check_dataframe_api_changes; then + echo "❌ Breaking changes detected in DataFrame API" + breaking_changes_found=0 + else + echo "✅ No DataFrame API breaking changes detected" + fi + + echo "Checking SQL parser compatibility..." + if check_sql_parser_changes; then + echo "❌ Breaking changes detected in SQL parser" + breaking_changes_found=0 + else + echo "✅ No SQL parser breaking changes detected" + fi + else + echo "✅ No Rust source files modified - no breaking changes possible" + fi + else + echo "⚠️ Skipping git-based checks (no base ref available)" + fi + + return $breaking_changes_found +} + +check_logical_plan_changes() { + # Check for changes in LogicalPlan files using git diff + if git diff "$BASE_REF"..HEAD --name-only 2>/dev/null | grep -qE "(logical_plan|logical.*expr)"; then + echo " LogicalPlan-related files modified" + + # Check for enum variant removals (potential breaking change) + if git diff "$BASE_REF"..HEAD 2>/dev/null | grep -qE "^-.*enum.*LogicalPlan|^-.*pub.*enum"; then + echo " Enum definitions modified" + return 0 # Breaking change detected + fi + + # Check for removed enum variants + if git diff "$BASE_REF"..HEAD 2>/dev/null | grep -qE "^-\s*[A-Z][a-zA-Z]*\s*[\(\{,]"; then + echo " Enum variants potentially removed" + return 0 # Breaking change detected + fi + fi + + return 1 # No breaking changes +} + +check_dataframe_api_changes() { + # Check if DataFrame public methods were changed + if git diff "$BASE_REF" --name-only 2>/dev/null | grep -qE "dataframe"; then + echo " DataFrame files modified, checking for API changes..." + + # Check for ANY changes to public methods (additions, removals, modifications) + if git diff "$BASE_REF" 2>/dev/null | grep -qE "^[-+].*pub fn"; then + echo " Public DataFrame API methods modified" + return 0 # Breaking change detected + fi + fi + + return 1 # No breaking changes +} + +check_sql_parser_changes() { + # Check for SQL keyword removals + if git diff "$BASE_REF"..HEAD --name-only 2>/dev/null | grep -qE "keywords"; then + if git diff "$BASE_REF"..HEAD 2>/dev/null | grep -qE "^-.*,"; then + echo " SQL keywords potentially removed" + return 0 # Breaking change detected + fi + fi + + return 1 # No breaking changes +} + +generate_breaking_changes_report() { + local output_file="breaking-changes-report.md" + local timestamp=$(date -u +"%Y-%m-%d %H:%M:%S UTC") + + cat > "$output_file" << EOF +# 🚨 Breaking Changes Report + +**Generated:** $timestamp +**Base Ref:** $BASE_REF +**Head Ref:** $HEAD_REF + +## Summary +Breaking changes detected in this PR that require the \`api-change\` label. + +## DataFusion API Stability Guidelines +Per the [API Health Policy](https://datafusion.apache.org/contributor-guide/specification/api-health-policy.html): + +EOF + + echo "### Git-Based Analysis:" >> "$output_file" + + # Only add git-based analysis if we have a base ref + if [ -n "$BASE_REF" ]; then + local changes_found=false + + # Check for DataFrame API changes with details - EXCLUDE script files + if git diff "$BASE_REF" --name-only 2>/dev/null | grep -qE "dataframe.*\.rs$"; then + # Only look at .rs files, exclude ci/scripts + local dataframe_diff=$(git diff "$BASE_REF" 2>/dev/null -- "datafusion/*/src/**/*.rs" | grep -E "^[-+].*pub fn") + if [ -n "$dataframe_diff" ]; then + echo "- ⚠️ **DataFrame API changes detected:**" >> "$output_file" + echo "\`\`\`diff" >> "$output_file" + echo "$dataframe_diff" | head -10 >> "$output_file" + echo "\`\`\`" >> "$output_file" + echo "" >> "$output_file" + changes_found=true + fi + fi + + # Check for LogicalPlan changes with details - EXCLUDE script files + local logical_plan_files=$(git diff "$BASE_REF" --name-only 2>/dev/null | grep -E "(logical_plan|logical.*expr).*\.rs$") + if [ -n "$logical_plan_files" ]; then + echo "- ⚠️ **LogicalPlan changes detected in:**" >> "$output_file" + echo "$logical_plan_files" | sed 's/^/ - /' >> "$output_file" + echo "" >> "$output_file" + changes_found=true + fi + + # Show general public API changes - ONLY in .rs files, EXCLUDE ci/scripts + local api_changes=$(git diff "$BASE_REF" 2>/dev/null -- "datafusion/" "ballista/" | grep -E "^[-+].*pub (fn|struct|enum)") + if [ -n "$api_changes" ]; then + echo "- ⚠️ **Public API signature changes:**" >> "$output_file" + echo "\`\`\`diff" >> "$output_file" + echo "$api_changes" | head -10 >> "$output_file" + echo "\`\`\`" >> "$output_file" + echo "" >> "$output_file" + changes_found=true + fi + + if [ "$changes_found" = false ]; then + echo "- ⚠️ Breaking changes detected but specific patterns not identified" >> "$output_file" + echo "- Review the modified files below for potential API changes" >> "$output_file" + fi + + # Add list of changed SOURCE files only + echo "" >> "$output_file" + echo "### Source Files Modified:" >> "$output_file" + git diff "$BASE_REF" --name-only 2>/dev/null | grep -E "\.rs$" | head -20 | sed 's/^/- /' >> "$output_file" || { + echo "- No Rust source files modified" >> "$output_file" + } + + echo "" >> "$output_file" + echo "### All Files Modified:" >> "$output_file" + git diff "$BASE_REF" --name-only 2>/dev/null | head -20 | sed 's/^/- /' >> "$output_file" + else + echo "- ⚠️ Git-based analysis skipped (no base ref available)" >> "$output_file" + fi + + cat >> "$output_file" << EOF + +## Required Actions: +1. Add the \`api-change\` label to this PR +2. Update CHANGELOG.md with breaking change details +3. Consider adding deprecation warnings before removal +4. Update migration guide if needed + +## Approval Requirements: +- Breaking changes require approval from a DataFusion maintainer +- Consider if this change is necessary or if a deprecation path exists + +## Note: +This analysis uses git-based pattern detection to identify common breaking change patterns. +Review the specific changes carefully to determine if they truly constitute breaking changes. +EOF + + echo "📋 Report generated: $output_file" +} + +main() { + if detect_datafusion_breaking_changes; then + echo "" + echo "🚨 FINAL RESULT: Breaking changes detected!" + # Only set GITHUB_ENV if it exists (in CI) + if [ -n "$GITHUB_ENV" ]; then + echo "BREAKING_CHANGES_DETECTED=true" >> "$GITHUB_ENV" + fi + + generate_breaking_changes_report + + # Only try to comment if we have GitHub CLI and are in a PR context + if command -v gh >/dev/null 2>&1 && [ -n "$GITHUB_TOKEN" ] && [ -n "$GITHUB_REPOSITORY" ] && [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then + echo "💬 Adding PR comment..." + gh pr comment --body-file breaking-changes-report.md || { + echo "⚠️ Could not add PR comment, but report was generated" + } + else + echo "⚠️ Skipping PR comment (not in PR context or missing GitHub CLI)" + fi + + exit 1 + else + echo "" + echo "🎉 FINAL RESULT: No breaking changes detected" + # Only set GITHUB_ENV if it exists (in CI) + if [ -n "$GITHUB_ENV" ]; then + echo "BREAKING_CHANGES_DETECTED=false" >> "$GITHUB_ENV" + fi + fi +} + +main "$@" \ No newline at end of file From 71b1621ca4f55f080bd4a6e8ad08e18906526093 Mon Sep 17 00:00:00 2001 From: Lucas Earl Date: Mon, 21 Jul 2025 19:03:45 -0600 Subject: [PATCH 2/4] Delete ci/scripts/breaking-changes-report.md --- ci/scripts/breaking-changes-report.md | 33 --------------------------- 1 file changed, 33 deletions(-) delete mode 100644 ci/scripts/breaking-changes-report.md diff --git a/ci/scripts/breaking-changes-report.md b/ci/scripts/breaking-changes-report.md deleted file mode 100644 index 86e1a3a322812..0000000000000 --- a/ci/scripts/breaking-changes-report.md +++ /dev/null @@ -1,33 +0,0 @@ -# 🚨 Breaking Changes Report - -## Summary -Breaking changes detected in this PR that require the `api-change` label. - -## DataFusion API Stability Guidelines -Per the [API Health Policy](https://datafusion.apache.org/contributor-guide/specification/api-health-policy.html): - -### Git-Based Analysis: -- ⚠️ Breaking changes detected but specific patterns not identified -- Review the modified files below for potential API changes - -### Source Files Modified: -- datafusion/core/src/dataframe/mod.rs - -### All Files Modified: -- .github/workflows/dev.yml -- ci/scripts/detect-breaking-changes.sh -- datafusion/core/src/dataframe/mod.rs - -## Required Actions: -1. Add the `api-change` label to this PR -2. Update CHANGELOG.md with breaking change details -3. Consider adding deprecation warnings before removal -4. Update migration guide if needed - -## Approval Requirements: -- Breaking changes require approval from a DataFusion maintainer -- Consider if this change is necessary or if a deprecation path exists - -## Note: -This analysis uses git-based pattern detection to identify common breaking change patterns. -Review the specific changes carefully to determine if they truly constitute breaking changes. \ No newline at end of file From 0136a3fa0164ab43f23efba76bf5352e136460e7 Mon Sep 17 00:00:00 2001 From: Lucas Earl Date: Mon, 21 Jul 2025 22:54:29 -0600 Subject: [PATCH 3/4] Address reviewer feedback: Remove unnecessary dependencies- Remove Rust toolchain installation (not needed for git-based analysis)- Remove GitHub CLI installation (pre-installed on GitHub runners) - Improve script robustness with consistent git diff range syntax- Make DataFrame API detection more specific to avoid false positivesAddresses reviewer comments on PR #16541 --- .github/workflows/dev.yml | 13 ------------- ci/scripts/detect-breaking-changes.sh | 23 ++++++++++++----------- 2 files changed, 12 insertions(+), 24 deletions(-) diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 6465cce7a22d4..d1702440fd57c 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -38,18 +38,6 @@ jobs: fetch-depth: 0 token: ${{ secrets.GITHUB_TOKEN }} - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@stable - - - name: Install GitHub CLI - run: | - if ! command -v gh &> /dev/null; then - curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg - echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null - sudo apt update - sudo apt install gh - fi - - name: Run breaking changes detection shell: bash run: | @@ -58,7 +46,6 @@ jobs: timeout-minutes: 5 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Upload breaking changes report if: failure() diff --git a/ci/scripts/detect-breaking-changes.sh b/ci/scripts/detect-breaking-changes.sh index 5c37e1e4a9373..5f75cdce11e8e 100755 --- a/ci/scripts/detect-breaking-changes.sh +++ b/ci/scripts/detect-breaking-changes.sh @@ -1,6 +1,6 @@ #!/bin/bash -set -e +set -euo pipefail # Add -u for undefined variables and -o pipefail for pipe failures # Ensure we're in the repository root if [ ! -f "Cargo.toml" ] || [ ! -d ".git" ]; then @@ -34,7 +34,7 @@ detect_datafusion_breaking_changes() { # Only run git-based checks if we have a valid base ref if [ -n "$BASE_REF" ]; then echo "Checking for Rust source file changes..." - if git diff "$BASE_REF" --name-only 2>/dev/null | grep -qE "\.rs$"; then + if git diff "$BASE_REF"..HEAD --name-only 2>/dev/null | grep -qE "\.rs$"; then echo "Rust files modified - running detailed checks..." echo "Checking LogicalPlan stability..." @@ -93,11 +93,12 @@ check_logical_plan_changes() { check_dataframe_api_changes() { # Check if DataFrame public methods were changed - if git diff "$BASE_REF" --name-only 2>/dev/null | grep -qE "dataframe"; then + if git diff "$BASE_REF"..HEAD --name-only 2>/dev/null | grep -qE "dataframe.*\.rs$"; then echo " DataFrame files modified, checking for API changes..." # Check for ANY changes to public methods (additions, removals, modifications) - if git diff "$BASE_REF" 2>/dev/null | grep -qE "^[-+].*pub fn"; then + # Only look at actual DataFrame source files, not test files + if git diff "$BASE_REF"..HEAD 2>/dev/null -- "datafusion/core/src/dataframe/" | grep -qE "^[-+].*pub fn"; then echo " Public DataFrame API methods modified" return 0 # Breaking change detected fi @@ -144,9 +145,9 @@ EOF local changes_found=false # Check for DataFrame API changes with details - EXCLUDE script files - if git diff "$BASE_REF" --name-only 2>/dev/null | grep -qE "dataframe.*\.rs$"; then - # Only look at .rs files, exclude ci/scripts - local dataframe_diff=$(git diff "$BASE_REF" 2>/dev/null -- "datafusion/*/src/**/*.rs" | grep -E "^[-+].*pub fn") + if git diff "$BASE_REF"..HEAD --name-only 2>/dev/null | grep -qE "dataframe.*\.rs$"; then + # Only look at .rs files in DataFrame source directories + local dataframe_diff=$(git diff "$BASE_REF"..HEAD 2>/dev/null -- "datafusion/core/src/dataframe/" | grep -E "^[-+].*pub fn") if [ -n "$dataframe_diff" ]; then echo "- ⚠️ **DataFrame API changes detected:**" >> "$output_file" echo "\`\`\`diff" >> "$output_file" @@ -166,8 +167,8 @@ EOF changes_found=true fi - # Show general public API changes - ONLY in .rs files, EXCLUDE ci/scripts - local api_changes=$(git diff "$BASE_REF" 2>/dev/null -- "datafusion/" "ballista/" | grep -E "^[-+].*pub (fn|struct|enum)") + # Show general public API changes - ONLY in .rs files, EXCLUDE ci/scripts and tests + local api_changes=$(git diff "$BASE_REF"..HEAD 2>/dev/null -- "datafusion/" "ballista/" | grep -E "^[-+].*pub (fn|struct|enum)" | grep -v "/tests/") if [ -n "$api_changes" ]; then echo "- ⚠️ **Public API signature changes:**" >> "$output_file" echo "\`\`\`diff" >> "$output_file" @@ -185,13 +186,13 @@ EOF # Add list of changed SOURCE files only echo "" >> "$output_file" echo "### Source Files Modified:" >> "$output_file" - git diff "$BASE_REF" --name-only 2>/dev/null | grep -E "\.rs$" | head -20 | sed 's/^/- /' >> "$output_file" || { + git diff "$BASE_REF"..HEAD --name-only 2>/dev/null | grep -E "\.rs$" | head -20 | sed 's/^/- /' >> "$output_file" || { echo "- No Rust source files modified" >> "$output_file" } echo "" >> "$output_file" echo "### All Files Modified:" >> "$output_file" - git diff "$BASE_REF" --name-only 2>/dev/null | head -20 | sed 's/^/- /' >> "$output_file" + git diff "$BASE_REF"..HEAD --name-only 2>/dev/null | head -20 | sed 's/^/- /' >> "$output_file" else echo "- ⚠️ Git-based analysis skipped (no base ref available)" >> "$output_file" fi From 5f77dc5ea203ac86fcf9da893f39c7825115035a Mon Sep 17 00:00:00 2001 From: Lucas Earl Date: Mon, 21 Jul 2025 22:57:44 -0600 Subject: [PATCH 4/4] Optimize checkout configuration and explain fetch-depth requirement- Remove unnecessary token parameter (not needed for public repo)- Add clear comments explaining why fetch-depth: 0 is required- Improve base ref handling for GitHub Actions PR context- Use origin/$GITHUB_BASE_REF for proper remote branch referenceAddresses reviewer question about checkout configuration necessity. --- .github/workflows/dev.yml | 4 +++- ci/scripts/detect-breaking-changes.sh | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index d1702440fd57c..2c050fddc413f 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -35,8 +35,10 @@ jobs: - name: Checkout code uses: actions/checkout@v4 with: + # Need full history to access base branch for git diff operations + # GitHub Actions only fetches single commit by default (fetch-depth: 1) + # Our script uses: git diff "$BASE_REF"..HEAD which requires base branch fetch-depth: 0 - token: ${{ secrets.GITHUB_TOKEN }} - name: Run breaking changes detection shell: bash diff --git a/ci/scripts/detect-breaking-changes.sh b/ci/scripts/detect-breaking-changes.sh index 5f75cdce11e8e..288a78b39ff60 100755 --- a/ci/scripts/detect-breaking-changes.sh +++ b/ci/scripts/detect-breaking-changes.sh @@ -13,6 +13,13 @@ fi BASE_REF="${GITHUB_BASE_REF:-main}" HEAD_REF="${GITHUB_HEAD_REF:-HEAD}" +# In GitHub Actions PR context, use the proper base reference +if [ -n "$GITHUB_BASE_REF" ]; then + # For PRs, GitHub provides GITHUB_BASE_REF (e.g., "main") + # We need to reference it as origin/$GITHUB_BASE_REF + BASE_REF="origin/$GITHUB_BASE_REF" +fi + echo "🔍 DataFusion Breaking Changes Detection" echo "Comparing against: $BASE_REF" echo "Current ref: $HEAD_REF"