From efedec39ceafbbc7ab992e630309040a115da78b Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Fri, 6 Jun 2025 09:11:25 -0400 Subject: [PATCH 1/8] smarter pr number fetching for grype and report --- .github/create_workflow_report.py | 10 ++++++++++ .github/workflows/grype_scan.yml | 4 ++-- .github/workflows/release_branches.yml | 4 +--- tests/ci/version_helper.py | 8 ++++++++ 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/.github/create_workflow_report.py b/.github/create_workflow_report.py index a7f30f72aedf..c34457f36488 100755 --- a/.github/create_workflow_report.py +++ b/.github/create_workflow_report.py @@ -506,6 +506,16 @@ def parse_args() -> argparse.Namespace: def main(): args = parse_args() + if args.pr_number is None or args.commit_sha is None: + run_details = get_run_details(args.actions_run_url) + if args.pr_number is None: + if len(run_details["pull_requests"]) > 0: + args.pr_number = run_details["pull_requests"][0]["number"] + else: + args.pr_number = 0 + if args.commit_sha is None: + args.commit_sha = run_details["head_commit"]["id"] + db_client = Client( host=os.getenv(DATABASE_HOST_VAR), user=os.getenv(DATABASE_USER_VAR), diff --git a/.github/workflows/grype_scan.yml b/.github/workflows/grype_scan.yml index e749448b81ba..d4ce9b57977e 100644 --- a/.github/workflows/grype_scan.yml +++ b/.github/workflows/grype_scan.yml @@ -49,7 +49,7 @@ jobs: run: | python3 ./tests/ci/version_helper.py | tee /tmp/version_info source /tmp/version_info - echo "docker_image=${{ inputs.docker_image }}:${{ github.event.pull_request.number || 0 }}-$CLICKHOUSE_VERSION_STRING" >> $GITHUB_OUTPUT + echo "docker_image=${{ inputs.docker_image }}:$PR_NUMBER-$CLICKHOUSE_VERSION_STRING" >> $GITHUB_OUTPUT echo "commit_sha=$CLICKHOUSE_VERSION_GITHASH" >> $GITHUB_OUTPUT - name: Run Grype Scan @@ -67,9 +67,9 @@ jobs: env: S3_BUCKET: "altinity-build-artifacts" COMMIT_SHA: ${{ steps.set_version.outputs.commit_sha || github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - PR_NUMBER: ${{ github.event.pull_request.number || 0 }} DOCKER_IMAGE: ${{ steps.set_version.outputs.docker_image || inputs.docker_image }} run: | + echo "PR_NUMBER=$PR_NUMBER" ./.github/grype/transform_and_upload_results_s3.sh - name: Create step summary diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index c5ca0080ec4b..2f61a658785d 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -655,14 +655,12 @@ jobs: CHECKS_DATABASE_HOST: ${{ secrets.CHECKS_DATABASE_HOST }} CHECKS_DATABASE_USER: ${{ secrets.CLICKHOUSE_TEST_STAT_LOGIN }} CHECKS_DATABASE_PASSWORD: ${{ secrets.CLICKHOUSE_TEST_STAT_PASSWORD }} - COMMIT_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - PR_NUMBER: ${{ github.event.pull_request.number || 0 }} ACTIONS_RUN_URL: ${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }} shell: bash run: | pip install clickhouse-driver==0.2.8 numpy==1.26.4 pandas==2.0.3 - REPORT_LINK=$(python3 .github/create_workflow_report.py --pr-number $PR_NUMBER --commit-sha $COMMIT_SHA --actions-run-url $ACTIONS_RUN_URL --known-fails tests/broken_tests.json --cves) + REPORT_LINK=$(python3 .github/create_workflow_report.py --actions-run-url $ACTIONS_RUN_URL --known-fails tests/broken_tests.json --cves) IS_VALID_URL=$(echo $REPORT_LINK | grep -E '^https?://') if [[ -n $IS_VALID_URL ]]; then diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index afee29c9c193..f097d4a57b9f 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -4,6 +4,8 @@ from pathlib import Path from typing import Any, Dict, Iterable, List, Literal, Optional, Set, Tuple, Union +from pr_info import PRInfo # grype scan needs to know the PR number + from git_helper import TWEAK, Git, get_tags, git_runner, removeprefix, VersionType FILE_WITH_VERSION_PATH = "cmake/autogenerated_versions.txt" @@ -531,6 +533,12 @@ def main(): if args.update_part or args.update_cmake: update_cmake_version(version) + # grype scan needs to know the PR number + pr_info = PRInfo() + print(f"PR_NUMBER={pr_info.number}") + if args.export: + print(f"export PR_NUMBER") + for k, v in version.as_dict().items(): name = f"CLICKHOUSE_VERSION_{k.upper()}" print(f"{name}='{v}'") From 0166c918c3c8ddf9eb65e434d7795b3c19a5317d Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Fri, 6 Jun 2025 13:05:06 -0400 Subject: [PATCH 2/8] let the grype workflow compute the tag --- .github/grype/run_grype_scan.sh | 2 +- .github/workflows/grype_scan.yml | 11 ++++++++++- .github/workflows/release_branches.yml | 3 ++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/.github/grype/run_grype_scan.sh b/.github/grype/run_grype_scan.sh index c5ce0b1b10d3..af428e37d669 100755 --- a/.github/grype/run_grype_scan.sh +++ b/.github/grype/run_grype_scan.sh @@ -3,7 +3,7 @@ set -e IMAGE=$1 -GRYPE_VERSION="v0.80.1" +GRYPE_VERSION=${GRYPE_VERSION:-"v0.92.2"} docker pull $IMAGE docker pull anchore/grype:${GRYPE_VERSION} diff --git a/.github/workflows/grype_scan.yml b/.github/workflows/grype_scan.yml index d4ce9b57977e..fd6368a0953b 100644 --- a/.github/workflows/grype_scan.yml +++ b/.github/workflows/grype_scan.yml @@ -15,11 +15,17 @@ on: description: 'Docker image. If no tag, it will be determined by version_helper.py' required: true type: string + tag-suffix: + description: 'Tag suffix. To be appended the version from version_helper.py' + required: false + type: string + default: "" env: PYTHONUNBUFFERED: 1 AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + GRYPE_VERSION: "v0.92.2-arm64v8" jobs: grype_scan: @@ -46,10 +52,12 @@ jobs: - name: Set image tag if not given if: ${{ !contains(inputs.docker_image, ':') }} id: set_version + env: + TAG_SUFFIX: ${{ inputs.tag-suffix }} run: | python3 ./tests/ci/version_helper.py | tee /tmp/version_info source /tmp/version_info - echo "docker_image=${{ inputs.docker_image }}:$PR_NUMBER-$CLICKHOUSE_VERSION_STRING" >> $GITHUB_OUTPUT + echo "docker_image=${{ inputs.docker_image }}:$PR_NUMBER-$CLICKHOUSE_VERSION_STRING$TAG_SUFFIX" >> $GITHUB_OUTPUT echo "commit_sha=$CLICKHOUSE_VERSION_GITHASH" >> $GITHUB_OUTPUT - name: Run Grype Scan @@ -67,6 +75,7 @@ jobs: env: S3_BUCKET: "altinity-build-artifacts" COMMIT_SHA: ${{ steps.set_version.outputs.commit_sha || github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + PR_NUMBER: ${{ env.PR_NUMBER || github.event.pull_request.number || 0 }} DOCKER_IMAGE: ${{ steps.set_version.outputs.docker_image || inputs.docker_image }} run: | echo "PR_NUMBER=$PR_NUMBER" diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 8270b3de1e14..c16e821aced4 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -593,7 +593,8 @@ jobs: uses: ./.github/workflows/grype_scan.yml secrets: inherit with: - docker_image: altinityinfra/clickhouse-${{ matrix.image }}:${{ github.event.pull_request.number || 0 }}-${{ fromJson(needs.RunConfig.outputs.data).version }}${{ matrix.suffix }} + docker_image: altinityinfra/clickhouse-${{ matrix.image }} + tag-suffix: ${{ matrix.suffix }} FinishCheck: if: ${{ !cancelled() }} needs: From 28ade05fc218d2dcb569328ecefaed8bb762f872 Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Fri, 6 Jun 2025 15:05:54 -0400 Subject: [PATCH 3/8] fix python deps --- .github/workflows/grype_scan.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/grype_scan.yml b/.github/workflows/grype_scan.yml index fd6368a0953b..860b77d60b3f 100644 --- a/.github/workflows/grype_scan.yml +++ b/.github/workflows/grype_scan.yml @@ -45,7 +45,7 @@ jobs: sudo apt-get install -y python3-pip python3-venv python3 -m venv venv source venv/bin/activate - pip install --upgrade requests chardet urllib3 + pip install --upgrade requests chardet urllib3 unidiff boto3 PyGithub pip install testflows==$TESTFLOWS_VERSION awscli==1.33.28 echo PATH=$PATH >>$GITHUB_ENV From eabac0f8afa5ff00a41938364d7c1014b2ec6650 Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Mon, 9 Jun 2025 11:22:35 -0400 Subject: [PATCH 4/8] add branch name to release report --- .github/create_workflow_report.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/.github/create_workflow_report.py b/.github/create_workflow_report.py index 403b0c60f7e4..cbb616cd4807 100755 --- a/.github/create_workflow_report.py +++ b/.github/create_workflow_report.py @@ -5,6 +5,7 @@ from itertools import combinations import json from datetime import datetime +from functools import lru_cache import requests import pandas as pd @@ -196,6 +197,29 @@ """ +@lru_cache +def get_run_details(run_url: str) -> dict: + """ + Fetch run details for a given run URL. + """ + run_id = run_url.split("/")[-1] + + headers = { + "Authorization": f"token {os.getenv('GITHUB_TOKEN')}", + "Accept": "application/vnd.github.v3+json", + } + + url = f"https://api.github.com/repos/{GITHUB_REPO}/actions/runs/{run_id}" + response = requests.get(url, headers=headers) + + if response.status_code != 200: + raise Exception( + f"Failed to fetch run details: {response.status_code} {response.text}" + ) + + return response.json() + + def get_commit_statuses(sha: str) -> pd.DataFrame: """ Fetch commit statuses for a given SHA and return as a pandas DataFrame. @@ -589,7 +613,9 @@ def main(): ) if args.pr_number == "0": - pr_info_html = "Release" + run_details = get_run_details(args.actions_run_url) + branch_name = run_details.get("head_branch", "unknown branch") + pr_info_html = f"Release ({branch_name})" else: try: pr_info = get_pr_info_from_number(args.pr_number) From 3d3000f9e8d700b1dccebb61c580cf1819d4fc83 Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Mon, 9 Jun 2025 12:40:31 -0400 Subject: [PATCH 5/8] use version from RunConfig --- .github/workflows/grype_scan.yml | 13 ++++++++++++- .github/workflows/release_branches.yml | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/.github/workflows/grype_scan.yml b/.github/workflows/grype_scan.yml index 860b77d60b3f..e68c3e63e283 100644 --- a/.github/workflows/grype_scan.yml +++ b/.github/workflows/grype_scan.yml @@ -15,6 +15,11 @@ on: description: 'Docker image. If no tag, it will be determined by version_helper.py' required: true type: string + version: + description: 'Version tag. If no version, it will be determined by version_helper.py' + required: false + type: string + default: "" tag-suffix: description: 'Tag suffix. To be appended the version from version_helper.py' required: false @@ -54,10 +59,16 @@ jobs: id: set_version env: TAG_SUFFIX: ${{ inputs.tag-suffix }} + SPECIFIED_VERSION: ${{ inputs.version }} run: | python3 ./tests/ci/version_helper.py | tee /tmp/version_info source /tmp/version_info - echo "docker_image=${{ inputs.docker_image }}:$PR_NUMBER-$CLICKHOUSE_VERSION_STRING$TAG_SUFFIX" >> $GITHUB_OUTPUT + if [ -z "$SPECIFIED_VERSION" ]; then + VERSION=$CLICKHOUSE_VERSION_STRING + else + VERSION=$SPECIFIED_VERSION + fi + echo "docker_image=${{ inputs.docker_image }}:$PR_NUMBER-$VERSION$TAG_SUFFIX" >> $GITHUB_OUTPUT echo "commit_sha=$CLICKHOUSE_VERSION_GITHASH" >> $GITHUB_OUTPUT - name: Run Grype Scan diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index c16e821aced4..75abdf0346f9 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -594,6 +594,7 @@ jobs: secrets: inherit with: docker_image: altinityinfra/clickhouse-${{ matrix.image }} + version: ${{ fromJson(needs.RunConfig.outputs.data).version }} tag-suffix: ${{ matrix.suffix }} FinishCheck: if: ${{ !cancelled() }} From eee7504f536fe29a47531c9997a03d016419025e Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Mon, 9 Jun 2025 13:00:26 -0400 Subject: [PATCH 6/8] use report action from main branch --- .../actions/create_workflow_report/action.yml | 41 ++ .../ci_run_report.html.jinja | 269 ++++++++++ .../create_workflow_report.py | 463 ++++++++---------- .github/workflows/release_branches.yml | 21 +- 4 files changed, 525 insertions(+), 269 deletions(-) create mode 100644 .github/actions/create_workflow_report/action.yml create mode 100644 .github/actions/create_workflow_report/ci_run_report.html.jinja rename .github/{ => actions/create_workflow_report}/create_workflow_report.py (56%) diff --git a/.github/actions/create_workflow_report/action.yml b/.github/actions/create_workflow_report/action.yml new file mode 100644 index 000000000000..fde62d01e29d --- /dev/null +++ b/.github/actions/create_workflow_report/action.yml @@ -0,0 +1,41 @@ +name: Create and Upload Combined Report +description: Create and upload a combined CI report +inputs: + final: + description: "Control whether the report is final or a preview" + required: false + default: "false" +runs: + using: "composite" + steps: + - name: Create and upload workflow report + env: + PR_NUMBER: ${{ github.event.pull_request.number || 0 }} + COMMIT_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + ACTIONS_RUN_URL: ${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }} + FINAL: ${{ inputs.final }} + shell: bash + run: | + pip install clickhouse-driver==0.2.8 numpy==1.26.4 pandas==2.0.3 jinja2==3.1.5 + + CMD="python3 .github/actions/create_workflow_report/create_workflow_report.py" + ARGS="--pr-number $PR_NUMBER --commit-sha $COMMIT_SHA --actions-run-url $ACTIONS_RUN_URL --known-fails tests/broken_tests.json --cves" + + set +e + if [[ "$FINAL" == "false" ]]; then + REPORT_LINK=$($CMD $ARGS --mark-preview) + else + REPORT_LINK=$($CMD $ARGS) + fi + + echo $REPORT_LINK + + if [[ "$FINAL" == "true" ]]; then + IS_VALID_URL=$(echo $REPORT_LINK | grep -E '^https?://') + if [[ -n $IS_VALID_URL ]]; then + echo "Workflow Run Report: [View Report]($REPORT_LINK)" >> $GITHUB_STEP_SUMMARY + else + echo "Error: $REPORT_LINK" >> $GITHUB_STEP_SUMMARY + exit 1 + fi + fi diff --git a/.github/actions/create_workflow_report/ci_run_report.html.jinja b/.github/actions/create_workflow_report/ci_run_report.html.jinja new file mode 100644 index 000000000000..a92c1aa34e3a --- /dev/null +++ b/.github/actions/create_workflow_report/ci_run_report.html.jinja @@ -0,0 +1,269 @@ + + + + + + + + + {{ title }} + + + + +

+ +

{{ title }}

+ + + + + + + + + + + + + + + + + + + + + + + +
Pull Request{{ pr_info_html }}
Workflow Run{{ workflow_id }}
Commit{{ commit_sha }}
Build ReportBuild Report
Date {{ date }}
+ {% if is_preview %} +

This is a preview. The workflow is not yet finished.

+ {% endif %} +

Table of Contents

+ + + {%- if pr_number != 0 -%} +

New Fails in PR

+

Compared with base sha {{ base_sha }}

+ {{ new_fails_html }} + {%- endif %} + +

CI Jobs Status

+ {{ ci_jobs_status_html }} + +

Checks Errors

+ {{ checks_errors_html }} + +

Checks New Fails

+ {{ checks_fails_html }} + +

Regression New Fails

+ {{ regression_fails_html }} + +

Docker Images CVEs

+ {{ docker_images_cves_html }} + +

Checks Known Fails

+

+ Fail reason conventions:
+ KNOWN - Accepted fail and fix is not planned
+ INVESTIGATE - We don't know why it fails
+ NEEDSFIX - Investigation done and a fix is needed to make it pass
+

+ {{ checks_known_fails_html }} + + + + \ No newline at end of file diff --git a/.github/create_workflow_report.py b/.github/actions/create_workflow_report/create_workflow_report.py similarity index 56% rename from .github/create_workflow_report.py rename to .github/actions/create_workflow_report/create_workflow_report.py index cbb616cd4807..8918b1db7728 100755 --- a/.github/create_workflow_report.py +++ b/.github/actions/create_workflow_report/create_workflow_report.py @@ -7,12 +7,12 @@ from datetime import datetime from functools import lru_cache -import requests import pandas as pd +from jinja2 import Environment, FileSystemLoader +import requests from clickhouse_driver import Client import boto3 from botocore.exceptions import NoCredentialsError -import pandas as pd DATABASE_HOST_VAR = "CHECKS_DATABASE_HOST" DATABASE_USER_VAR = "CHECKS_DATABASE_USER" @@ -20,181 +20,13 @@ S3_BUCKET = "altinity-build-artifacts" GITHUB_REPO = "Altinity/ClickHouse" +# Set up the Jinja2 environment +template_dir = os.path.dirname(__file__) -css = """ - /* Base colors for Altinity */ - :root { - --altinity-background: #000D45; - --altinity-accent: #189DCF; - --altinity-highlight: #FFC600; - --altinity-gray: #6c757d; - --altinity-light-gray: #f8f9fa; - --altinity-white: #ffffff; - } - - /* Body and heading fonts */ - body { - font-family: Arimo, "Proxima Nova", "Helvetica Neue", Helvetica, Arial, sans-serif; - font-size: 1rem; - background-color: var(--altinity-background); - color: var(--altinity-light-gray); - padding: 2rem; - } - - h1, h2, h3, h4, h5, h6 { - font-family: Figtree, "Proxima Nova", "Helvetica Neue", Helvetica, Arial, sans-serif; - color: var(--altinity-white); - } - - .logo { - width: auto; - height: 5em; - } - - /* General table styling */ - table { - min-width: min(900px, 98vw); - margin: 1rem 0; - border-collapse: collapse; - background-color: var(--altinity-white); - border: 1px solid var(--altinity-accent); - box-shadow: 0 0 8px rgba(0, 0, 0, 0.05); - color: var(--altinity-background); - } - - /* Table header styling */ - th { - background-color: var(--altinity-accent); - color: var(--altinity-white); - padding: 10px 16px; - text-align: left; - border: none; - border-bottom: 2px solid var(--altinity-background); - white-space: nowrap; - } - th.hth { - border-bottom: 1px solid var(--altinity-accent); - border-right: 2px solid var(--altinity-background); - } - - /* Table header sorting styling */ - th { - cursor: pointer; - } - th.no-sort { - pointer-events: none; - } - th::after, - th::before { - transition: color 0.2s ease-in-out; - font-size: 1.2em; - color: transparent; - } - th::after { - margin-left: 3px; - content: '\\025B8'; - } - th:hover::after { - color: inherit; - } - th.dir-d::after { - color: inherit; - content: '\\025BE'; - } - th.dir-u::after { - color: inherit; - content: '\\025B4'; - } - - /* Table body row styling */ - tr:hover { - background-color: var(--altinity-light-gray); - } - - /* Table cell styling */ - td { - padding: 8px 8px; - border: 1px solid var(--altinity-accent); - } - - /* Link styling */ - a { - color: var(--altinity-accent); - text-decoration: none; - } - a:hover { - color: var(--altinity-highlight); - text-decoration: underline; - } -""" - -script = """ - -""" - -logo = """ -

-""" +# Load the template +template = Environment(loader=FileSystemLoader(template_dir)).get_template( + "ci_run_report.html.jinja" +) @lru_cache @@ -316,6 +148,29 @@ def get_pr_info_from_number(pr_number: str) -> dict: return response.json() +@lru_cache +def get_run_details(run_url: str) -> dict: + """ + Fetch run details for a given run URL. + """ + run_id = run_url.split("/")[-1] + + headers = { + "Authorization": f"token {os.getenv('GITHUB_TOKEN')}", + "Accept": "application/vnd.github.v3+json", + } + + url = f"https://api.github.com/repos/{GITHUB_REPO}/actions/runs/{run_id}" + response = requests.get(url, headers=headers) + + if response.status_code != 200: + raise Exception( + f"Failed to fetch run details: {response.status_code} {response.text}" + ) + + return response.json() + + def get_checks_fails(client: Client, job_url: str): """ Get tests that did not succeed for the given job URL. @@ -433,14 +288,14 @@ def get_regression_fails(client: Client, job_url: str): architecture as arch, test_name, argMax(result, start_time) AS status, - job_url, job_name, - report_url as results_link + report_url as results_link, + job_url FROM `gh-data`.clickhouse_regression_results GROUP BY architecture, test_name, job_url, job_name, report_url ORDER BY length(test_name) DESC ) - WHERE job_url='{job_url}' + WHERE job_url LIKE '{job_url}%' AND status IN ('Fail', 'Error') """ df = client.query_dataframe(query) @@ -449,7 +304,105 @@ def get_regression_fails(client: Client, job_url: str): return df +def get_new_fails_this_pr( + client: Client, + pr_info: dict, + checks_fails: pd.DataFrame, + regression_fails: pd.DataFrame, +): + """ + Get tests that failed in the PR but passed in the base branch. + Compares both checks and regression test results. + """ + base_sha = pr_info.get("base", {}).get("sha") + if not base_sha: + raise Exception("No base SHA found for PR") + + # Modify tables to have the same columns + if len(checks_fails) > 0: + checks_fails = checks_fails.copy().drop(columns=["job_status"]) + if len(regression_fails) > 0: + regression_fails = regression_fails.copy() + regression_fails["job_name"] = regression_fails.apply( + lambda row: f"{row['arch']} {row['job_name']}".strip(), axis=1 + ) + regression_fails["test_status"] = regression_fails["status"] + + # Combine both types of fails and select only desired columns + desired_columns = ["job_name", "test_name", "test_status", "results_link"] + all_pr_fails = pd.concat([checks_fails, regression_fails], ignore_index=True)[ + desired_columns + ] + if len(all_pr_fails) == 0: + return pd.DataFrame() + + # Get all checks from the base branch that didn't fail + base_checks_query = f"""SELECT job_name, status as test_status, test_name, results_link + FROM ( + SELECT + check_name as job_name, + argMax(test_status, check_start_time) as status, + test_name, + report_url as results_link, + task_url + FROM `gh-data`.checks + WHERE commit_sha='{base_sha}' + GROUP BY check_name, test_name, report_url, task_url + ) + WHERE test_status NOT IN ('FAIL', 'ERROR') + ORDER BY job_name, test_name + """ + base_checks = client.query_dataframe(base_checks_query) + + # Get regression results from base branch that didn't fail + base_regression_query = f"""SELECT arch, job_name, status, test_name, results_link + FROM ( + SELECT + architecture as arch, + test_name, + argMax(result, start_time) AS status, + job_url, + job_name, + report_url as results_link + FROM `gh-data`.clickhouse_regression_results + WHERE results_link LIKE'%/{base_sha}/%' + GROUP BY architecture, test_name, job_url, job_name, report_url + ORDER BY length(test_name) DESC + ) + WHERE status NOT IN ('Fail', 'Error') + """ + base_regression = client.query_dataframe(base_regression_query) + if len(base_regression) > 0: + base_regression["job_name"] = base_regression.apply( + lambda row: f"{row['arch']} {row['job_name']}".strip(), axis=1 + ) + base_regression["test_status"] = base_regression["status"] + base_regression = base_regression.drop(columns=["arch", "status"]) + + # Combine base results + base_results = pd.concat([base_checks, base_regression], ignore_index=True) + + # Find tests that failed in PR but passed in base + pr_failed_tests = set(zip(all_pr_fails["job_name"], all_pr_fails["test_name"])) + base_passed_tests = set(zip(base_results["job_name"], base_results["test_name"])) + + new_fails = pr_failed_tests.intersection(base_passed_tests) + + # Filter PR results to only include new fails + mask = all_pr_fails.apply( + lambda row: (row["job_name"], row["test_name"]) in new_fails, axis=1 + ) + new_fails_df = all_pr_fails[mask] + + return new_fails_df + + def get_cves(pr_number, commit_sha): + """ + Fetch Grype results from S3. + + If no results are available for download, returns ... (Ellipsis). + """ s3_client = boto3.client("s3", endpoint_url=os.getenv("S3_URL")) s3_prefix = f"{pr_number}/{commit_sha}/grype/" @@ -462,6 +415,11 @@ def get_cves(pr_number, commit_sha): content["Prefix"] for content in response.get("CommonPrefixes", []) ] + if len(grype_result_dirs) == 0: + # We were asked to check the CVE data, but none was found, + # maybe this is a preview report and grype results are not available yet + return ... + for path in grype_result_dirs: file_key = f"{path}result.json" file_response = s3_client.get_object(Bucket=S3_BUCKET, Key=file_key) @@ -535,21 +493,21 @@ def format_results_as_html_table(results) -> str: ), }, escape=False, - ).replace(' border="1"', "") + border=0, + classes=["test-results-table"], + ) return html def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Create a combined CI report.") - parser.add_argument( + parser.add_argument( # Need the full URL rather than just the ID to query the databases "--actions-run-url", required=True, help="URL of the actions run" ) parser.add_argument( - "--pr-number", required=True, help="Pull request number for the S3 path" - ) - parser.add_argument( - "--commit-sha", required=True, help="Commit SHA for the S3 path" + "--pr-number", help="Pull request number for the S3 path", type=int ) + parser.add_argument("--commit-sha", help="Commit SHA for the S3 path") parser.add_argument( "--no-upload", action="store_true", help="Do not upload the report" ) @@ -592,6 +550,7 @@ def main(): "job_statuses": get_commit_statuses(args.commit_sha), "checks_fails": get_checks_fails(db_client, args.actions_run_url), "checks_known_fails": [], + "pr_new_fails": [], "checks_errors": get_checks_errors(db_client, args.actions_run_url), "regression_fails": get_regression_fails(db_client, args.actions_run_url), "docker_images_cves": ( @@ -599,6 +558,12 @@ def main(): ), } + # get_cves returns ... in the case where no Grype result files were found. + # This might occur when run in preview mode. + cves_not_checked = not args.cves or ( + args.mark_preview and fail_results["docker_images_cves"] is ... + ) + if args.known_fails: if not os.path.exists(args.known_fails): print(f"Known fails file {args.known_fails} not found.") @@ -612,7 +577,7 @@ def main(): db_client, args.actions_run_url, known_fails ) - if args.pr_number == "0": + if args.pr_number == 0: run_details = get_run_details(args.actions_run_url) branch_name = run_details.get("head_branch", "unknown branch") pr_info_html = f"Release ({branch_name})" @@ -622,11 +587,17 @@ def main(): pr_info_html = f""" #{pr_info.get("number")} ({pr_info.get("base", {}).get('ref')} <- {pr_info.get("head", {}).get('ref')}) {pr_info.get("title")} """ + fail_results["pr_new_fails"] = get_new_fails_this_pr( + db_client, + pr_info, + fail_results["checks_fails"], + fail_results["regression_fails"], + ) except Exception as e: pr_info_html = e high_cve_count = 0 - if len(fail_results["docker_images_cves"]) > 0: + if not cves_not_checked and len(fail_results["docker_images_cves"]) > 0: high_cve_count = ( fail_results["docker_images_cves"]["severity"] .str.lower() @@ -634,72 +605,60 @@ def main(): .sum() ) - title = "ClickHouse® CI Workflow Run Report" - - html_report = f""" - - - - - - - {title} - - - {logo} -

{title}

- - - - - - - - - - - - - -
Pull Request{pr_info_html}
Workflow Run{args.actions_run_url.split('/')[-1]}
Commit{args.commit_sha}
Date{datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC
- -

Table of Contents

-{'

This is a preview. FinishCheck has not completed.

' if args.mark_preview else ""} - - -

CI Jobs Status

-{format_results_as_html_table(fail_results['job_statuses'])} - -

Checks Errors

-{format_results_as_html_table(fail_results['checks_errors'])} - -

Checks New Fails

-{format_results_as_html_table(fail_results['checks_fails'])} - -

Regression New Fails

-{format_results_as_html_table(fail_results['regression_fails'])} - -

Docker Images CVEs

-{"

Not Checked

" if not args.cves else format_results_as_html_table(fail_results['docker_images_cves'])} - -

Checks Known Fails

-{"

Not Checked

" if not args.known_fails else format_results_as_html_table(fail_results['checks_known_fails'])} - -{script} - - -""" + # Define the context for rendering + context = { + "title": "ClickHouse® CI Workflow Run Report", + "github_repo": GITHUB_REPO, + "s3_bucket": S3_BUCKET, + "pr_info_html": pr_info_html, + "pr_number": args.pr_number, + "workflow_id": args.actions_run_url.split("/")[-1], + "commit_sha": args.commit_sha, + "base_sha": "" if args.pr_number == 0 else pr_info.get("base", {}).get("sha"), + "date": f"{datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC", + "is_preview": args.mark_preview, + "counts": { + "jobs_status": f"{sum(fail_results['job_statuses']['job_status'] != 'success')} fail/error", + "checks_errors": len(fail_results["checks_errors"]), + "checks_new_fails": len(fail_results["checks_fails"]), + "regression_new_fails": len(fail_results["regression_fails"]), + "cves": "N/A" if cves_not_checked else f"{high_cve_count} high/critical", + "checks_known_fails": ( + "N/A" + if not args.known_fails + else len(fail_results["checks_known_fails"]) + ), + "pr_new_fails": len(fail_results["pr_new_fails"]), + }, + "ci_jobs_status_html": format_results_as_html_table( + fail_results["job_statuses"] + ), + "checks_errors_html": format_results_as_html_table( + fail_results["checks_errors"] + ), + "checks_fails_html": format_results_as_html_table(fail_results["checks_fails"]), + "regression_fails_html": format_results_as_html_table( + fail_results["regression_fails"] + ), + "docker_images_cves_html": ( + "

Not Checked

" + if cves_not_checked + else format_results_as_html_table(fail_results["docker_images_cves"]) + ), + "checks_known_fails_html": ( + "

Not Checked

" + if not args.known_fails + else format_results_as_html_table(fail_results["checks_known_fails"]) + ), + "new_fails_html": format_results_as_html_table(fail_results["pr_new_fails"]), + } + + # Render the template with the context + rendered_html = template.render(context) + report_name = "ci_run_report.html" report_path = Path(report_name) - report_path.write_text(html_report, encoding="utf-8") + report_path.write_text(rendered_html, encoding="utf-8") if args.no_upload: print(f"Report saved to {report_path}") @@ -714,7 +673,7 @@ def main(): s3_client.put_object( Bucket=S3_BUCKET, Key=report_destination_key, - Body=html_report, + Body=rendered_html, ContentType="text/html; charset=utf-8", ) except NoCredentialsError: diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 75abdf0346f9..091f5fa41406 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -653,24 +653,11 @@ jobs: python3 ./tests/ci/ci_buddy.py --check-wf-status - name: Create and upload report if: ${{ !cancelled() }} + uses: ./.github/actions/create_workflow_report@b229c9e6f9d1e100ba32f271b6bba112d9894afb env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} CHECKS_DATABASE_HOST: ${{ secrets.CHECKS_DATABASE_HOST }} CHECKS_DATABASE_USER: ${{ secrets.CLICKHOUSE_TEST_STAT_LOGIN }} CHECKS_DATABASE_PASSWORD: ${{ secrets.CLICKHOUSE_TEST_STAT_PASSWORD }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - ACTIONS_RUN_URL: ${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }} - shell: bash - run: | - pip install clickhouse-driver==0.2.8 numpy==1.26.4 pandas==2.0.3 - - REPORT_LINK=$(python3 .github/create_workflow_report.py --actions-run-url $ACTIONS_RUN_URL --known-fails tests/broken_tests.json --cves) - - echo $REPORT_LINK - - IS_VALID_URL=$(echo $REPORT_LINK | grep -E '^https?://') - if [[ -n $IS_VALID_URL ]]; then - echo "Combined CI Report: [View Report]($REPORT_LINK)" >> $GITHUB_STEP_SUMMARY - else - echo "Error: $REPORT_LINK" >> $GITHUB_STEP_SUMMARY - exit 1 - fi + with: + final: true From 27ce9e6c154265da72f1b7d4786e7f2ec96e1e81 Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Wed, 11 Jun 2025 09:06:12 -0400 Subject: [PATCH 7/8] support live updating report --- .github/workflows/release_branches.yml | 10 +++++++++- .github/workflows/reusable_test.yml | 10 ++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 091f5fa41406..b6ba377143ba 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -69,6 +69,14 @@ jobs: - name: Re-create GH statuses for skipped jobs if any run: | python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --update-gh-statuses + - name: Note report location to summary + env: + PR_NUMBER: ${{ github.event.pull_request.number || 0 }} + COMMIT_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + run: | + REPORT_LINK=https://s3.amazonaws.com/altinity-build-artifacts/$PR_NUMBER/$COMMIT_SHA/ci_run_report.html + echo "Workflow Run Report: [View Report]($REPORT_LINK)" >> $GITHUB_STEP_SUMMARY + BuildDockers: needs: [RunConfig] if: ${{ !failure() && !cancelled() }} @@ -653,7 +661,7 @@ jobs: python3 ./tests/ci/ci_buddy.py --check-wf-status - name: Create and upload report if: ${{ !cancelled() }} - uses: ./.github/actions/create_workflow_report@b229c9e6f9d1e100ba32f271b6bba112d9894afb + uses: ./.github/actions/create_workflow_report env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} CHECKS_DATABASE_HOST: ${{ secrets.CHECKS_DATABASE_HOST }} diff --git a/.github/workflows/reusable_test.yml b/.github/workflows/reusable_test.yml index 4cb00cc9e83f..64f41c3c28a4 100644 --- a/.github/workflows/reusable_test.yml +++ b/.github/workflows/reusable_test.yml @@ -163,6 +163,16 @@ jobs: if: ${{ !cancelled() }} run: | python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --post --job-name '${{inputs.test_name}}' + - name: Update workflow report + if: ${{ !cancelled() }} + uses: ./.github/actions/create_workflow_report + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + CHECKS_DATABASE_HOST: ${{ secrets.CHECKS_DATABASE_HOST }} + CHECKS_DATABASE_USER: ${{ secrets.CLICKHOUSE_TEST_STAT_LOGIN }} + CHECKS_DATABASE_PASSWORD: ${{ secrets.CLICKHOUSE_TEST_STAT_PASSWORD }} + with: + final: false - name: Mark as done if: ${{ !cancelled() }} run: | From 3a36e8865a14786b4d96c2f0b2115ac3185c6e03 Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Wed, 11 Jun 2025 13:49:19 -0400 Subject: [PATCH 8/8] small fixes --- .../create_workflow_report/create_workflow_report.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/actions/create_workflow_report/create_workflow_report.py b/.github/actions/create_workflow_report/create_workflow_report.py index 8918b1db7728..4f360b9f092d 100755 --- a/.github/actions/create_workflow_report/create_workflow_report.py +++ b/.github/actions/create_workflow_report/create_workflow_report.py @@ -470,7 +470,7 @@ def format_test_status(text: str) -> str: color = ( "red" if text.lower().startswith("fail") - else "orange" if text.lower() in ("error", "broken") else "green" + else "orange" if text.lower() in ("error", "broken", "pending") else "green" ) return f'{text}' @@ -560,9 +560,7 @@ def main(): # get_cves returns ... in the case where no Grype result files were found. # This might occur when run in preview mode. - cves_not_checked = not args.cves or ( - args.mark_preview and fail_results["docker_images_cves"] is ... - ) + cves_not_checked = not args.cves or fail_results["docker_images_cves"] is ... if args.known_fails: if not os.path.exists(args.known_fails): @@ -618,7 +616,7 @@ def main(): "date": f"{datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC", "is_preview": args.mark_preview, "counts": { - "jobs_status": f"{sum(fail_results['job_statuses']['job_status'] != 'success')} fail/error", + "jobs_status": f"{sum(fail_results['job_statuses']['job_status'] != 'success')} fail/error/pending", "checks_errors": len(fail_results["checks_errors"]), "checks_new_fails": len(fail_results["checks_fails"]), "regression_new_fails": len(fail_results["regression_fails"]),