From d11df6fa63a18af8c35b79aae9ecdae1d12e2217 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 7 Feb 2026 07:16:28 +0000
Subject: [PATCH 1/3] Initial plan


From d79621f76d477d7ae7d7dde7b191da6169bd1632 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 7 Feb 2026 07:24:22 +0000
Subject: [PATCH 2/3] Add test failure reporting script and CI integration

Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com>
---
 .github/workflows/ci.yml        |  41 +++++++++++
 scripts/report-test-failures.sh | 125 ++++++++++++++++++++++++++++++++
 2 files changed, 166 insertions(+)
 create mode 100755 scripts/report-test-failures.sh

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index be39432808c..97845de87be 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -110,6 +110,7 @@ jobs:
           fi
 
       - name: Run unit tests with coverage
+        id: run-unit-tests
         run: |
           set -o pipefail
           # Run tests with JSON output for artifacts, but also show failures
@@ -124,6 +125,24 @@ jobs:
           # Generate coverage HTML report
           go tool cover -html=coverage.out -o coverage.html
 
+      - name: Report test failures
+        if: failure() && steps.run-unit-tests.outcome == 'failure'
+        run: |
+          echo "## 🔍 Unit Test Failure Analysis" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "Analyzing unit test results" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          
+          # Run the failure report script
+          if ./scripts/report-test-failures.sh test-result-unit.json | tee /tmp/failure-report.txt; then
+            echo "No failures detected in JSON output (unexpected - tests failed but no failure records found)" >> $GITHUB_STEP_SUMMARY
+          else
+            # Script found failures - add to summary
+            echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+            cat /tmp/failure-report.txt >> $GITHUB_STEP_SUMMARY
+            echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+          fi
+
       # Coverage reports for recent builds only - 7 days is sufficient for debugging recent changes
       - name: Upload coverage report
         uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
@@ -343,6 +362,7 @@ jobs:
         run: make build
         
       - name: Run integration tests - ${{ matrix.test-group.name }}
+        id: run-tests
         run: |
           set -o pipefail
           # Sanitize the test group name for use in filename
@@ -359,6 +379,27 @@ jobs:
             go test -v -parallel=8 -timeout=10m -tags 'integration' -run '${{ matrix.test-group.pattern }}' -json ${{ matrix.test-group.packages }} | tee "test-result-integration-${SAFE_NAME}.json"
           fi
 
+      - name: Report test failures
+        if: failure() && steps.run-tests.outcome == 'failure'
+        run: |
+          # Sanitize the test group name to match the file created in the previous step
+          SAFE_NAME=$(echo "${{ matrix.test-group.name }}" | sed 's/[^a-zA-Z0-9]/-/g' | sed 's/--*/-/g')
+          
+          echo "## 🔍 Test Failure Analysis" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "Analyzing test results for: **${{ matrix.test-group.name }}**" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          
+          # Run the failure report script
+          if ./scripts/report-test-failures.sh "test-result-integration-${SAFE_NAME}.json" | tee /tmp/failure-report.txt; then
+            echo "No failures detected in JSON output (unexpected - tests failed but no failure records found)" >> $GITHUB_STEP_SUMMARY
+          else
+            # Script found failures - add to summary
+            echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+            cat /tmp/failure-report.txt >> $GITHUB_STEP_SUMMARY
+            echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+          fi
+
       - name: Upload integration test results
         if: always()  # Upload even if tests fail so canary_go can track coverage
         uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
diff --git a/scripts/report-test-failures.sh b/scripts/report-test-failures.sh
new file mode 100755
index 00000000000..7c19c6e29a0
--- /dev/null
+++ b/scripts/report-test-failures.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+# Report test failures from JSON test result files
+# Parses the JSON output from 'go test -json' format and prints failure details
+
+set -euo pipefail
+
+if [ $# -eq 0 ]; then
+  echo "Usage: $0 <test-result-file> [test-result-file...]"
+  echo "Reports test failures from JSON test result files"
+  echo ""
+  echo "This script extracts and displays all test failures including:"
+  echo "  - Individual test failures (Action:\"fail\" with Test field)"
+  echo "  - Package-level failures (Action:\"fail\" without Test field)"
+  echo "  - Test output leading up to failures"
+  exit 1
+fi
+
+# Track if any failures found
+FAILURES_FOUND=0
+TOTAL_FILES=0
+
+for file in "$@"; do
+  if [ ! -f "$file" ]; then
+    echo "⚠️  Warning: File $file does not exist, skipping..."
+    continue
+  fi
+  
+  TOTAL_FILES=$((TOTAL_FILES + 1))
+  
+  # Extract all failure entries from the JSON log
+  # Look for lines with "Action":"fail"
+  FAIL_ENTRIES=$(grep '"Action":"fail"' "$file" 2>/dev/null || true)
+  
+  if [ -z "$FAIL_ENTRIES" ]; then
+    continue
+  fi
+  
+  FAILURES_FOUND=1
+  
+  echo ""
+  echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+  echo "❌ FAILURES FOUND in: $file"
+  echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+  echo ""
+  
+  # Process each failure entry
+  echo "$FAIL_ENTRIES" | while IFS= read -r fail_line; do
+    # Extract package name
+    PACKAGE=$(echo "$fail_line" | grep -o '"Package":"[^"]*"' | sed 's/"Package":"\([^"]*\)"/\1/' || echo "unknown")
+    
+    # Extract test name (if present)
+    TEST_NAME=$(echo "$fail_line" | grep -o '"Test":"[^"]*"' | sed 's/"Test":"\([^"]*\)"/\1/' || echo "")
+    
+    # Extract elapsed time (if present)
+    ELAPSED=$(echo "$fail_line" | grep -o '"Elapsed":[0-9.]*' | sed 's/"Elapsed"://' || echo "")
+    
+    if [ -n "$TEST_NAME" ]; then
+      echo "📍 Test Failure:"
+      echo "   Package: $PACKAGE"
+      echo "   Test:    $TEST_NAME"
+      if [ -n "$ELAPSED" ]; then
+        echo "   Elapsed: ${ELAPSED}s"
+      fi
+      echo ""
+      
+      # Try to extract the last few output lines before this failure
+      # This helps show the actual error message
+      echo "   Recent test output:"
+      grep "\"Test\":\"$TEST_NAME\"" "$file" | grep '"Action":"output"' | tail -10 | while IFS= read -r output_line; do
+        OUTPUT=$(echo "$output_line" | sed 's/.*"Output":"\(.*\)".*/\1/' | sed 's/\\n/\n/g' | sed 's/\\t/\t/g')
+        echo "   $OUTPUT"
+      done
+      echo ""
+    else
+      echo "📦 Package-level Failure:"
+      echo "   Package: $PACKAGE"
+      if [ -n "$ELAPSED" ]; then
+        echo "   Elapsed: ${ELAPSED}s"
+      fi
+      echo ""
+      echo "   ⚠️  No individual test marked as failed!"
+      echo "   This could indicate:"
+      echo "   - A test panicked during initialization"
+      echo "   - A race condition detected by -race flag"
+      echo "   - A build/compilation issue in test code"
+      echo "   - A test timeout"
+      echo ""
+      echo "   Recent package output (last 20 lines):"
+      grep "\"Package\":\"$PACKAGE\"" "$file" | grep '"Action":"output"' | tail -20 | while IFS= read -r output_line; do
+        OUTPUT=$(echo "$output_line" | sed 's/.*"Output":"\(.*\)".*/\1/' | sed 's/\\n/\n/g' | sed 's/\\t/\t/g')
+        echo "   $OUTPUT"
+      done
+      echo ""
+    fi
+    
+    echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+    echo ""
+  done
+done
+
+if [ $FAILURES_FOUND -eq 0 ]; then
+  if [ $TOTAL_FILES -eq 0 ]; then
+    echo "❌ ERROR: No valid test result files found"
+    exit 1
+  else
+    echo "✅ No test failures found in $TOTAL_FILES file(s)"
+    exit 0
+  fi
+else
+  echo ""
+  echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+  echo "Summary: Test failures detected"
+  echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+  echo ""
+  echo "💡 Debugging tips:"
+  echo "   1. Review the test output above for error messages"
+  echo "   2. If no individual test failed, check for:"
+  echo "      - Race conditions (run locally with -race flag)"
+  echo "      - Test initialization panics"
+  echo "      - Build errors in test files"
+  echo "   3. Run the test locally with: go test -v -tags integration <package>"
+  echo "   4. Add -run <TestName> to run a specific failing test"
+  echo ""
+  exit 1
+fi

From 2c358c4cc69ec53230d066331d55878ae7d7114a Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 7 Feb 2026 07:27:07 +0000
Subject: [PATCH 3/3] Add comprehensive tests for report-test-failures.sh
 script

Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com>
---
 scripts/report-test-failures_test.sh | 123 +++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)
 create mode 100755 scripts/report-test-failures_test.sh

diff --git a/scripts/report-test-failures_test.sh b/scripts/report-test-failures_test.sh
new file mode 100755
index 00000000000..e172b789b42
--- /dev/null
+++ b/scripts/report-test-failures_test.sh
@@ -0,0 +1,123 @@
+#!/bin/bash
+# Test script for report-test-failures.sh
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPORT_SCRIPT="$SCRIPT_DIR/report-test-failures.sh"
+TEST_DIR=$(mktemp -d)
+
+cleanup() {
+  rm -rf "$TEST_DIR"
+}
+trap cleanup EXIT
+
+echo "Testing report-test-failures.sh"
+echo "================================"
+echo ""
+
+# Test 1: No failures (should exit 0)
+echo "Test 1: No failures"
+cat > "$TEST_DIR/no-failures.json" << 'EOF'
+{"Time":"2026-02-07T04:43:04.632424046Z","Action":"pass","Package":"github.com/github/gh-aw/pkg/workflow","Test":"TestSomething","Elapsed":0.01}
+{"Time":"2026-02-07T04:43:04.632424046Z","Action":"pass","Package":"github.com/github/gh-aw/pkg/workflow","Elapsed":0.5}
+EOF
+
+if "$REPORT_SCRIPT" "$TEST_DIR/no-failures.json" > /dev/null 2>&1; then
+  echo "✅ PASS: Correctly reported no failures"
+else
+  echo "❌ FAIL: Should exit 0 when no failures"
+  exit 1
+fi
+echo ""
+
+# Test 2: Individual test failure
+echo "Test 2: Individual test failure"
+cat > "$TEST_DIR/individual-failure.json" << 'EOF'
+{"Time":"2026-02-07T04:43:04.632424046Z","Action":"run","Package":"github.com/github/gh-aw/pkg/workflow","Test":"TestSomething"}
+{"Time":"2026-02-07T04:43:04.632424046Z","Action":"output","Package":"github.com/github/gh-aw/pkg/workflow","Test":"TestSomething","Output":"=== RUN   TestSomething\n"}
+{"Time":"2026-02-07T04:43:04.632424046Z","Action":"output","Package":"github.com/github/gh-aw/pkg/workflow","Test":"TestSomething","Output":"    test.go:123: expected 5, got 3\n"}
+{"Time":"2026-02-07T04:43:04.632424046Z","Action":"fail","Package":"github.com/github/gh-aw/pkg/workflow","Test":"TestSomething","Elapsed":0.01}
+{"Time":"2026-02-07T04:43:04.672198249Z","Action":"fail","Package":"github.com/github/gh-aw/pkg/workflow","Elapsed":10.837}
+EOF
+
+if "$REPORT_SCRIPT" "$TEST_DIR/individual-failure.json" > /tmp/test2-output.txt 2>&1; then
+  echo "❌ FAIL: Should exit 1 when failures found"
+  exit 1
+else
+  if grep -q "TestSomething" /tmp/test2-output.txt && grep -q "test.go:123" /tmp/test2-output.txt; then
+    echo "✅ PASS: Correctly detected and reported individual test failure"
+  else
+    echo "❌ FAIL: Missing expected failure details"
+    cat /tmp/test2-output.txt
+    exit 1
+  fi
+fi
+echo ""
+
+# Test 3: Package-level failure (no individual test)
+echo "Test 3: Package-level failure only"
+cat > "$TEST_DIR/package-failure.json" << 'EOF'
+{"Time":"2026-02-07T04:43:04.632424046Z","Action":"pass","Package":"github.com/github/gh-aw/pkg/workflow","Test":"TestA","Elapsed":0.01}
+{"Time":"2026-02-07T04:43:04.669870648Z","Action":"output","Package":"github.com/github/gh-aw/pkg/workflow","Output":"FAIL\n"}
+{"Time":"2026-02-07T04:43:04.672171709Z","Action":"output","Package":"github.com/github/gh-aw/pkg/workflow","Output":"FAIL\tgithub.com/github/gh-aw/pkg/workflow\t10.837s\n"}
+{"Time":"2026-02-07T04:43:04.672198249Z","Action":"fail","Package":"github.com/github/gh-aw/pkg/workflow","Elapsed":10.837}
+EOF
+
+if "$REPORT_SCRIPT" "$TEST_DIR/package-failure.json" > /tmp/test3-output.txt 2>&1; then
+  echo "❌ FAIL: Should exit 1 when failures found"
+  exit 1
+else
+  if grep -q "Package-level Failure" /tmp/test3-output.txt && grep -q "No individual test marked as failed" /tmp/test3-output.txt; then
+    echo "✅ PASS: Correctly detected and reported package-level failure"
+  else
+    echo "❌ FAIL: Missing expected package-level failure details"
+    cat /tmp/test3-output.txt
+    exit 1
+  fi
+fi
+echo ""
+
+# Test 4: Multiple files
+echo "Test 4: Multiple test result files"
+cat > "$TEST_DIR/file1.json" << 'EOF'
+{"Time":"2026-02-07T04:43:04.632424046Z","Action":"pass","Package":"github.com/github/gh-aw/pkg/workflow","Test":"TestA","Elapsed":0.01}
+EOF
+
+cat > "$TEST_DIR/file2.json" << 'EOF'
+{"Time":"2026-02-07T04:43:04.632424046Z","Action":"fail","Package":"github.com/github/gh-aw/pkg/cli","Test":"TestB","Elapsed":0.02}
+EOF
+
+if "$REPORT_SCRIPT" "$TEST_DIR/file1.json" "$TEST_DIR/file2.json" > /tmp/test4-output.txt 2>&1; then
+  echo "❌ FAIL: Should exit 1 when failures found"
+  exit 1
+else
+  if grep -q "TestB" /tmp/test4-output.txt; then
+    echo "✅ PASS: Correctly processed multiple files"
+  else
+    echo "❌ FAIL: Missing failure from second file"
+    cat /tmp/test4-output.txt
+    exit 1
+  fi
+fi
+echo ""
+
+# Test 5: Non-existent file
+echo "Test 5: Non-existent file handling"
+if "$REPORT_SCRIPT" "$TEST_DIR/nonexistent.json" > /tmp/test5-output.txt 2>&1; then
+  echo "❌ FAIL: Should exit 1 when no valid files"
+  exit 1
+else
+  if grep -q "ERROR: No valid test result files found" /tmp/test5-output.txt; then
+    echo "✅ PASS: Correctly handled non-existent file"
+  else
+    echo "❌ FAIL: Wrong error message for non-existent file"
+    cat /tmp/test5-output.txt
+    exit 1
+  fi
+fi
+echo ""
+
+echo "================================"
+echo "All tests passed! ✅"
+echo ""