diff --git a/.github/create_workflow_report.py b/.github/create_workflow_report.py index a7f30f72aedf..8c074abd76c9 100755 --- a/.github/create_workflow_report.py +++ b/.github/create_workflow_report.py @@ -253,11 +253,17 @@ def get_commit_statuses(sha: str) -> pd.DataFrame: for item in all_data ] - return ( - pd.DataFrame(parsed) - .sort_values(by=["job_status", "job_name"], ascending=[True, True]) - .reset_index(drop=True) - ) + # Create DataFrame + df = pd.DataFrame(parsed) + + # Drop duplicates keeping the first occurrence (newest status for each context) + # GitHub returns statuses in reverse chronological order + df = df.drop_duplicates(subset=["job_name"], keep="first") + + # Sort by status and job name + return df.sort_values( + by=["job_status", "job_name"], ascending=[True, True] + ).reset_index(drop=True) def get_pr_info_from_number(pr_number: str) -> dict: @@ -291,13 +297,23 @@ def get_checks_fails(client: Client, job_url: str): Get tests that did not succeed for the given job URL. Exclude checks that have status 'error' as they are counted in get_checks_errors. """ - columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link" - query = f"""SELECT {columns} FROM `gh-data`.checks - WHERE task_url LIKE '{job_url}%' - AND test_status IN ('FAIL', 'ERROR') - AND check_status!='error' - ORDER BY check_name, test_name - """ + query = f"""SELECT job_status, job_name, status as test_status, test_name, results_link + FROM ( + SELECT + argMax(check_status, check_start_time) as job_status, + check_name as job_name, + argMax(test_status, check_start_time) as status, + test_name, + report_url as results_link, + task_url + FROM `gh-data`.checks + GROUP BY check_name, test_name, report_url, task_url + ) + WHERE task_url LIKE '{job_url}%' + AND test_status IN ('FAIL', 'ERROR') + AND job_status!='error' + ORDER BY job_name, test_name + """ return client.query_dataframe(query) @@ -305,14 +321,26 @@ def get_checks_known_fails(client: Client, job_url: str, known_fails: dict): """ Get tests that are known to fail for the given job URL. """ - assert len(known_fails) > 0, "cannot query the database with empty known fails" - columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link" - query = f"""SELECT {columns} FROM `gh-data`.checks - WHERE task_url LIKE '{job_url}%' - AND test_status='BROKEN' - AND test_name IN ({','.join(f"'{test}'" for test in known_fails.keys())}) - ORDER BY test_name, check_name - """ + if len(known_fails) == 0: + return pd.DataFrame() + + query = f"""SELECT job_status, job_name, status as test_status, test_name, results_link + FROM ( + SELECT + argMax(check_status, check_start_time) as job_status, + check_name as job_name, + argMax(test_status, check_start_time) as status, + test_name, + report_url as results_link, + task_url + FROM `gh-data`.checks + GROUP BY check_name, test_name, report_url, task_url + ) + WHERE task_url LIKE '{job_url}%' + AND test_status='BROKEN' + AND test_name IN ({','.join(f"'{test}'" for test in known_fails.keys())}) + ORDER BY job_name, test_name + """ df = client.query_dataframe(query) @@ -333,12 +361,22 @@ def get_checks_errors(client: Client, job_url: str): """ Get checks that have status 'error' for the given job URL. """ - columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link" - query = f"""SELECT {columns} FROM `gh-data`.checks - WHERE task_url LIKE '{job_url}%' - AND check_status=='error' - ORDER BY check_name, test_name - """ + query = f"""SELECT job_status, job_name, status as test_status, test_name, results_link + FROM ( + SELECT + argMax(check_status, check_start_time) as job_status, + check_name as job_name, + argMax(test_status, check_start_time) as status, + test_name, + report_url as results_link, + task_url + FROM `gh-data`.checks + GROUP BY check_name, test_name, report_url, task_url + ) + WHERE task_url LIKE '{job_url}%' + AND job_status=='error' + ORDER BY job_name, test_name + """ return client.query_dataframe(query) diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index c5ca0080ec4b..75dc2a6bbf82 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -649,12 +649,13 @@ jobs: ${{ toJson(needs) }} EOF python3 ./tests/ci/ci_buddy.py --check-wf-status - - name: Create and upload combined report + - name: Create and upload report if: ${{ !cancelled() }} env: CHECKS_DATABASE_HOST: ${{ secrets.CHECKS_DATABASE_HOST }} CHECKS_DATABASE_USER: ${{ secrets.CLICKHOUSE_TEST_STAT_LOGIN }} CHECKS_DATABASE_PASSWORD: ${{ secrets.CLICKHOUSE_TEST_STAT_PASSWORD }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} COMMIT_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} PR_NUMBER: ${{ github.event.pull_request.number || 0 }} ACTIONS_RUN_URL: ${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }} @@ -664,6 +665,8 @@ jobs: REPORT_LINK=$(python3 .github/create_workflow_report.py --pr-number $PR_NUMBER --commit-sha $COMMIT_SHA --actions-run-url $ACTIONS_RUN_URL --known-fails tests/broken_tests.json --cves) + echo $REPORT_LINK + IS_VALID_URL=$(echo $REPORT_LINK | grep -E '^https?://') if [[ -n $IS_VALID_URL ]]; then echo "Combined CI Report: [View Report]($REPORT_LINK)" >> $GITHUB_STEP_SUMMARY