From 717ec5dcf59f25e0c7430177c0a96e151ce5181b Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Wed, 21 May 2025 20:29:14 -0400 Subject: [PATCH 1/3] add grype scanning --- .github/grype/parse_vulnerabilities_grype.py | 32 +++++ .github/grype/run_grype_scan.sh | 18 +++ .../grype/transform_and_upload_results_s3.sh | 13 ++ .github/workflows/grype_scan.yml | 132 ++++++++++++++++++ .github/workflows/release_branches.yml | 18 +++ 5 files changed, 213 insertions(+) create mode 100644 .github/grype/parse_vulnerabilities_grype.py create mode 100755 .github/grype/run_grype_scan.sh create mode 100755 .github/grype/transform_and_upload_results_s3.sh create mode 100644 .github/workflows/grype_scan.yml diff --git a/.github/grype/parse_vulnerabilities_grype.py b/.github/grype/parse_vulnerabilities_grype.py new file mode 100644 index 000000000000..fec2ef3bfac7 --- /dev/null +++ b/.github/grype/parse_vulnerabilities_grype.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 +import json + +from testflows.core import * + +xfails = {} + + +@Name("docker vulnerabilities") +@XFails(xfails) +@TestModule +def docker_vulnerabilities(self): + with Given("I gather grype scan results"): + with open("./result.json", "r") as f: + results = json.load(f) + + for vulnerability in results["matches"]: + with Test( + f"{vulnerability['vulnerability']['id']}@{vulnerability['vulnerability']['namespace']},{vulnerability['vulnerability']['severity']}", + flags=TE, + ): + note(vulnerability) + critical_levels = set(["HIGH", "CRITICAL"]) + if vulnerability['vulnerability']["severity"].upper() in critical_levels: + with Then( + f"Found vulnerability of {vulnerability['vulnerability']['severity']} severity" + ): + result(Fail) + + +if main(): + docker_vulnerabilities() diff --git a/.github/grype/run_grype_scan.sh b/.github/grype/run_grype_scan.sh new file mode 100755 index 000000000000..c5ce0b1b10d3 --- /dev/null +++ b/.github/grype/run_grype_scan.sh @@ -0,0 +1,18 @@ +set -x +set -e + +IMAGE=$1 + +GRYPE_VERSION="v0.80.1" + +docker pull $IMAGE +docker pull anchore/grype:${GRYPE_VERSION} + +docker run \ + --rm --volume /var/run/docker.sock:/var/run/docker.sock \ + --name Grype anchore/grype:${GRYPE_VERSION} \ + --scope all-layers \ + -o json \ + $IMAGE > result.json + +ls -sh diff --git a/.github/grype/transform_and_upload_results_s3.sh b/.github/grype/transform_and_upload_results_s3.sh new file mode 100755 index 000000000000..7a10b02887ef --- /dev/null +++ b/.github/grype/transform_and_upload_results_s3.sh @@ -0,0 +1,13 @@ +DOCKER_IMAGE=$(echo "$DOCKER_IMAGE" | sed 's/[\/:]/_/g') + +S3_PATH="s3://$S3_BUCKET/$PR_NUMBER/$COMMIT_SHA/grype/$DOCKER_IMAGE" +HTTPS_S3_PATH="https://s3.amazonaws.com/$S3_BUCKET/$PR_NUMBER/$COMMIT_SHA/grype/$DOCKER_IMAGE" +echo "https_s3_path=$HTTPS_S3_PATH" >> $GITHUB_OUTPUT + +tfs --no-colors transform nice raw.log nice.log.txt +tfs --no-colors report results -a $HTTPS_S3_PATH raw.log - --copyright "Altinity LTD" | tfs --no-colors document convert > results.html + +aws s3 cp --no-progress nice.log.txt $S3_PATH/nice.log.txt --content-type "text/plain; charset=utf-8" || echo "nice log file not found". +aws s3 cp --no-progress results.html $S3_PATH/results.html || echo "results file not found". +aws s3 cp --no-progress raw.log $S3_PATH/raw.log || echo "raw.log file not found". +aws s3 cp --no-progress result.json $S3_PATH/result.json --content-type "text/plain; charset=utf-8" || echo "result.json not found". \ No newline at end of file diff --git a/.github/workflows/grype_scan.yml b/.github/workflows/grype_scan.yml new file mode 100644 index 000000000000..e749448b81ba --- /dev/null +++ b/.github/workflows/grype_scan.yml @@ -0,0 +1,132 @@ +name: Grype Scan +run-name: Grype Scan ${{ inputs.docker_image }} + +on: + workflow_dispatch: + # Inputs for manual run + inputs: + docker_image: + description: 'Docker image. If no tag, it will be determined by version_helper.py' + required: true + workflow_call: + # Inputs for workflow call + inputs: + docker_image: + description: 'Docker image. If no tag, it will be determined by version_helper.py' + required: true + type: string +env: + PYTHONUNBUFFERED: 1 + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + +jobs: + grype_scan: + name: Grype Scan + runs-on: [self-hosted, altinity-on-demand, altinity-func-tester-aarch64] + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker + uses: docker/setup-buildx-action@v3 + + - name: Set up Python + run: | + export TESTFLOWS_VERSION="2.4.19" + sudo apt-get update + sudo apt-get install -y python3-pip python3-venv + python3 -m venv venv + source venv/bin/activate + pip install --upgrade requests chardet urllib3 + pip install testflows==$TESTFLOWS_VERSION awscli==1.33.28 + echo PATH=$PATH >>$GITHUB_ENV + + - name: Set image tag if not given + if: ${{ !contains(inputs.docker_image, ':') }} + id: set_version + run: | + python3 ./tests/ci/version_helper.py | tee /tmp/version_info + source /tmp/version_info + echo "docker_image=${{ inputs.docker_image }}:${{ github.event.pull_request.number || 0 }}-$CLICKHOUSE_VERSION_STRING" >> $GITHUB_OUTPUT + echo "commit_sha=$CLICKHOUSE_VERSION_GITHASH" >> $GITHUB_OUTPUT + + - name: Run Grype Scan + run: | + DOCKER_IMAGE=${{ steps.set_version.outputs.docker_image || inputs.docker_image }} + ./.github/grype/run_grype_scan.sh $DOCKER_IMAGE + + - name: Parse grype results + run: | + python3 -u ./.github/grype/parse_vulnerabilities_grype.py -o nice --no-colors --log raw.log --test-to-end + + - name: Transform and Upload Grype Results + if: always() + id: upload_results + env: + S3_BUCKET: "altinity-build-artifacts" + COMMIT_SHA: ${{ steps.set_version.outputs.commit_sha || github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + PR_NUMBER: ${{ github.event.pull_request.number || 0 }} + DOCKER_IMAGE: ${{ steps.set_version.outputs.docker_image || inputs.docker_image }} + run: | + ./.github/grype/transform_and_upload_results_s3.sh + + - name: Create step summary + if: always() + id: create_summary + run: | + jq -r '"**Image**: \(.source.target.userInput)"' result.json >> $GITHUB_STEP_SUMMARY + jq -r '.distro | "**Distro**: \(.name):\(.version)"' result.json >> $GITHUB_STEP_SUMMARY + if jq -e '.matches | length == 0' result.json > /dev/null; then + echo "No CVEs" >> $GITHUB_STEP_SUMMARY + else + echo "| Severity | Count |" >> $GITHUB_STEP_SUMMARY + echo "|------------|-------|" >> $GITHUB_STEP_SUMMARY + jq -r ' + .matches | + map(.vulnerability.severity) | + group_by(.) | + map({severity: .[0], count: length}) | + sort_by(.severity) | + map("| \(.severity) | \(.count) |") | + .[] + ' result.json >> $GITHUB_STEP_SUMMARY + fi + + HIGH_COUNT=$(jq -r '.matches | map(.vulnerability.severity) | map(select(. == "High")) | length' result.json) + CRITICAL_COUNT=$(jq -r '.matches | map(.vulnerability.severity) | map(select(. == "Critical")) | length' result.json) + TOTAL_HIGH_CRITICAL=$((HIGH_COUNT + CRITICAL_COUNT)) + echo "total_high_critical=$TOTAL_HIGH_CRITICAL" >> $GITHUB_OUTPUT + + if [ $TOTAL_HIGH_CRITICAL -gt 0 ]; then + echo '## High and Critical vulnerabilities found' >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + cat raw.log | tfs --no-colors show tests | grep -Pi 'High|Critical' >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + fi + + - name: Set commit status + if: always() + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + github.rest.repos.createCommitStatus({ + owner: context.repo.owner, + repo: context.repo.repo, + sha: '${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}', + state: '${{ steps.create_summary.outputs.total_high_critical > 0 && 'failure' || 'success' }}', + target_url: '${{ steps.upload_results.outputs.https_s3_path }}/results.html', + description: 'Grype Scan Completed with ${{ steps.create_summary.outputs.total_high_critical }} high/critical vulnerabilities', + context: 'Grype Scan ${{ steps.set_version.outputs.docker_image || inputs.docker_image }}' + }) + + - name: Upload artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: grype-results-${{ hashFiles('raw.log') }} + path: | + result.json + nice.log.txt diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 8c60559a4399..10dc41627c07 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -577,6 +577,23 @@ jobs: test_name: Sign aarch64 runner_type: altinity-on-demand, altinity-func-tester data: ${{ needs.RunConfig.outputs.data }} + GrypeScan: + needs: [RunConfig, DockerServerImage, DockerKeeperImage] + if: ${{ !failure() && !cancelled() }} + strategy: + fail-fast: false + matrix: + include: + - image: server + suffix: '' + - image: server + suffix: '-alpine' + - image: keeper + suffix: '' + uses: ./.github/workflows/grype_scan.yml + secrets: inherit + with: + docker_image: altinityinfra/clickhouse-${{ matrix.image }}:${{ github.event.pull_request.number || 0 }}-${{ fromJson(needs.RunConfig.outputs.data).version }}${{ matrix.suffix }} FinishCheck: if: ${{ !cancelled() }} needs: @@ -611,6 +628,7 @@ jobs: - CompatibilityCheckAarch64 - RegressionTestsRelease - RegressionTestsAarch64 + - GrypeScan - SignRelease runs-on: [self-hosted, altinity-on-demand, altinity-style-checker-aarch64] steps: From 4b2d33f2f43ad7c09737cdd9d53062f4c3cca022 Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Wed, 21 May 2025 20:38:28 -0400 Subject: [PATCH 2/3] update report to new format --- .github/create_combined_ci_report.py | 295 ----------- .github/create_workflow_report.py | 653 +++++++++++++++++++++++++ .github/workflows/release_branches.yml | 4 +- 3 files changed, 655 insertions(+), 297 deletions(-) delete mode 100755 .github/create_combined_ci_report.py create mode 100755 .github/create_workflow_report.py diff --git a/.github/create_combined_ci_report.py b/.github/create_combined_ci_report.py deleted file mode 100755 index 07b6a80763bb..000000000000 --- a/.github/create_combined_ci_report.py +++ /dev/null @@ -1,295 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import os -from pathlib import Path -from itertools import combinations -import json - -import requests -from clickhouse_driver import Client -import boto3 -from botocore.exceptions import NoCredentialsError - -DATABASE_HOST_VAR = "CHECKS_DATABASE_HOST" -DATABASE_USER_VAR = "CHECKS_DATABASE_USER" -DATABASE_PASSWORD_VAR = "CHECKS_DATABASE_PASSWORD" -S3_BUCKET = "altinity-build-artifacts" - - -def get_checks_fails(client: Client, job_url: str): - """ - Get tests that did not succeed for the given job URL. - Exclude checks that have status 'error' as they are counted in get_checks_errors. - """ - columns = ( - "check_status, check_name, test_status, test_name, report_url as results_link" - ) - query = f"""SELECT {columns} FROM `gh-data`.checks - WHERE task_url='{job_url}' - AND test_status IN ('FAIL', 'ERROR') - AND check_status!='error' - ORDER BY check_name, test_name - """ - return client.query_dataframe(query) - - -def get_checks_known_fails(client: Client, job_url: str, known_fails: dict): - """ - Get tests that are known to fail for the given job URL. - """ - assert len(known_fails) > 0, "cannot query the database with empty known fails" - columns = ( - "check_status, check_name, test_status, test_name, report_url as results_link" - ) - query = f"""SELECT {columns} FROM `gh-data`.checks - WHERE task_url='{job_url}' - AND test_status='BROKEN' - AND test_name IN ({','.join(f"'{test}'" for test in known_fails.keys())}) - ORDER BY test_name, check_name - """ - - df = client.query_dataframe(query) - - df.insert( - len(df.columns) - 1, - "reason", - df["test_name"] - .astype(str) - .apply( - lambda test_name: known_fails[test_name].get("reason", "No reason given") - ), - ) - - return df - - -def get_checks_errors(client: Client, job_url: str): - """ - Get checks that have status 'error' for the given job URL. - """ - columns = ( - "check_status, check_name, test_status, test_name, report_url as results_link" - ) - query = f"""SELECT {columns} FROM `gh-data`.checks - WHERE task_url='{job_url}' - AND check_status=='error' - ORDER BY check_name, test_name - """ - return client.query_dataframe(query) - - -def drop_prefix_rows(df, column_to_clean): - """ - Drop rows from the dataframe if: - - the row matches another row completely except for the specified column - - the specified column of that row is a prefix of the same column in another row - """ - to_drop = set() - reference_columns = [col for col in df.columns if col != column_to_clean] - for (i, row_1), (j, row_2) in combinations(df.iterrows(), 2): - if all(row_1[col] == row_2[col] for col in reference_columns): - if row_2[column_to_clean].startswith(row_1[column_to_clean]): - to_drop.add(i) - elif row_1[column_to_clean].startswith(row_2[column_to_clean]): - to_drop.add(j) - return df.drop(to_drop) - - -def get_regression_fails(client: Client, job_url: str): - """ - Get regression tests that did not succeed for the given job URL. - """ - # If you rename the alias for report_url, also update the formatters in format_results_as_html_table - # Nested SELECT handles test reruns - query = f"""SELECT arch, job_name, status, test_name, results_link - FROM ( - SELECT - architecture as arch, - test_name, - argMax(result, start_time) AS status, - job_url, - job_name, - report_url as results_link - FROM `gh-data`.clickhouse_regression_results - GROUP BY architecture, test_name, job_url, job_name, report_url - ORDER BY length(test_name) DESC - ) - WHERE job_url='{job_url}' - AND status IN ('Fail', 'Error') - """ - - df = client.query_dataframe(query) - df = drop_prefix_rows(df, "test_name") - df["job_name"] = df["job_name"].str.title() - return df - - -def url_to_html_link(url: str) -> str: - if not url: - return "" - text = url.split("/")[-1] - if not text: - text = "results" - return f'{text}' - - -def format_test_name_for_linewrap(text: str) -> str: - """Tweak the test name to improve line wrapping.""" - return text.replace(".py::", "/") - - -def format_results_as_html_table(results) -> str: - if len(results) == 0: - return "
Nothing to report
" - results.columns = [col.replace("_", " ").title() for col in results.columns] - html = ( - results.to_html( - index=False, - formatters={ - "Results Link": url_to_html_link, - "Test Name": format_test_name_for_linewrap, - }, - escape=False, - ) # tbody/thead tags interfere with the table sorting script - .replace("\n", "") - .replace("\n", "") - .replace("\n", "") - .replace("\n", "") - .replace('Nothing to report
" + results.columns = [col.replace("_", " ").title() for col in results.columns] + html = results.to_html( + index=False, + formatters={ + "Results Link": url_to_html_link, + "Test Name": format_test_name_for_linewrap, + "Test Status": format_test_status, + "Job Status": format_test_status, + "Status": format_test_status, + "Message": lambda m: m.replace("\n", " "), + "Identifier": lambda i: url_to_html_link( + "https://nvd.nist.gov/vuln/detail/" + i + ), + }, + escape=False, + ).replace(' border="1"', "") + return html + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Create a combined CI report.") + parser.add_argument( + "--actions-run-url", required=True, help="URL of the actions run" + ) + parser.add_argument( + "--pr-number", required=True, help="Pull request number for the S3 path" + ) + parser.add_argument( + "--commit-sha", required=True, help="Commit SHA for the S3 path" + ) + parser.add_argument( + "--no-upload", action="store_true", help="Do not upload the report" + ) + parser.add_argument( + "--known-fails", type=str, help="Path to the file with known fails" + ) + parser.add_argument( + "--cves", action="store_true", help="Get CVEs from Grype results" + ) + parser.add_argument( + "--mark-preview", action="store_true", help="Mark the report as a preview" + ) + return parser.parse_args() + + +def main(): + args = parse_args() + + db_client = Client( + host=os.getenv(DATABASE_HOST_VAR), + user=os.getenv(DATABASE_USER_VAR), + password=os.getenv(DATABASE_PASSWORD_VAR), + port=9440, + secure="y", + verify=False, + settings={"use_numpy": True}, + ) + + fail_results = { + "job_statuses": get_commit_statuses(args.commit_sha), + "checks_fails": get_checks_fails(db_client, args.actions_run_url), + "checks_known_fails": [], + "checks_errors": get_checks_errors(db_client, args.actions_run_url), + "regression_fails": get_regression_fails(db_client, args.actions_run_url), + "docker_images_cves": ( + [] if not args.cves else get_cves(args.pr_number, args.commit_sha) + ), + } + + if args.known_fails: + if not os.path.exists(args.known_fails): + print(f"Known fails file {args.known_fails} not found.") + exit(1) + + with open(args.known_fails) as f: + known_fails = json.load(f) + + if known_fails: + fail_results["checks_known_fails"] = get_checks_known_fails( + db_client, args.actions_run_url, known_fails + ) + + if args.pr_number == "0": + pr_info_html = "Release" + else: + try: + pr_info = get_pr_info_from_number(args.pr_number) + pr_info_html = f""" + #{pr_info.get("number")} ({pr_info.get("base", {}).get('ref')} <- {pr_info.get("head", {}).get('ref')}) {pr_info.get("title")} + """ + except Exception as e: + pr_info_html = e + + high_cve_count = 0 + if len(fail_results["docker_images_cves"]) > 0: + high_cve_count = ( + fail_results["docker_images_cves"]["severity"] + .str.lower() + .isin(("high", "critical")) + .sum() + ) + + title = "ClickHouse® CI Workflow Run Report" + + html_report = f""" + + + + + + +| Pull Request | {pr_info_html} | +
|---|---|
| Workflow Run | {args.actions_run_url.split('/')[-1]} | +
| Commit | {args.commit_sha} | +
| Date | {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC | +
This is a preview. FinishCheck has not completed.
' if args.mark_preview else ""} +Not Checked
" if not args.cves else format_results_as_html_table(fail_results['docker_images_cves'])} + +Not Checked
" if not args.known_fails else format_results_as_html_table(fail_results['checks_known_fails'])} + +{script} + + +""" + report_name = "ci_run_report.html" + report_path = Path(report_name) + report_path.write_text(html_report, encoding="utf-8") + + if args.no_upload: + print(f"Report saved to {report_path}") + exit(0) + + report_destination_key = f"{args.pr_number}/{args.commit_sha}/{report_name}" + + # Upload the report to S3 + s3_client = boto3.client("s3", endpoint_url=os.getenv("S3_URL")) + + try: + s3_client.put_object( + Bucket=S3_BUCKET, + Key=report_destination_key, + Body=html_report, + ContentType="text/html; charset=utf-8", + ) + except NoCredentialsError: + print("Credentials not available for S3 upload.") + + print(f"https://s3.amazonaws.com/{S3_BUCKET}/" + report_destination_key) + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 10dc41627c07..c5ca0080ec4b 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -660,9 +660,9 @@ jobs: ACTIONS_RUN_URL: ${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }} shell: bash run: | - pip install clickhouse-driver==0.2.8 numpy==1.26.4 pandas==2.2.0 + pip install clickhouse-driver==0.2.8 numpy==1.26.4 pandas==2.0.3 - REPORT_LINK=$(python3 .github/create_combined_ci_report.py --pr-number $PR_NUMBER --commit-sha $COMMIT_SHA --actions-run-url $ACTIONS_RUN_URL --known-fails tests/broken_tests.json) + REPORT_LINK=$(python3 .github/create_workflow_report.py --pr-number $PR_NUMBER --commit-sha $COMMIT_SHA --actions-run-url $ACTIONS_RUN_URL --known-fails tests/broken_tests.json --cves) IS_VALID_URL=$(echo $REPORT_LINK | grep -E '^https?://') if [[ -n $IS_VALID_URL ]]; then From 7d628da15ece099e9805f563f0017afe4e382aeb Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Fri, 30 May 2025 14:14:10 -0400 Subject: [PATCH 3/3] update report to new format --- .github/create_workflow_report.py | 90 ++++++++++++++++++-------- .github/workflows/release_branches.yml | 4 +- 2 files changed, 67 insertions(+), 27 deletions(-) diff --git a/.github/create_workflow_report.py b/.github/create_workflow_report.py index a7f30f72aedf..8c074abd76c9 100755 --- a/.github/create_workflow_report.py +++ b/.github/create_workflow_report.py @@ -253,11 +253,17 @@ def get_commit_statuses(sha: str) -> pd.DataFrame: for item in all_data ] - return ( - pd.DataFrame(parsed) - .sort_values(by=["job_status", "job_name"], ascending=[True, True]) - .reset_index(drop=True) - ) + # Create DataFrame + df = pd.DataFrame(parsed) + + # Drop duplicates keeping the first occurrence (newest status for each context) + # GitHub returns statuses in reverse chronological order + df = df.drop_duplicates(subset=["job_name"], keep="first") + + # Sort by status and job name + return df.sort_values( + by=["job_status", "job_name"], ascending=[True, True] + ).reset_index(drop=True) def get_pr_info_from_number(pr_number: str) -> dict: @@ -291,13 +297,23 @@ def get_checks_fails(client: Client, job_url: str): Get tests that did not succeed for the given job URL. Exclude checks that have status 'error' as they are counted in get_checks_errors. """ - columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link" - query = f"""SELECT {columns} FROM `gh-data`.checks - WHERE task_url LIKE '{job_url}%' - AND test_status IN ('FAIL', 'ERROR') - AND check_status!='error' - ORDER BY check_name, test_name - """ + query = f"""SELECT job_status, job_name, status as test_status, test_name, results_link + FROM ( + SELECT + argMax(check_status, check_start_time) as job_status, + check_name as job_name, + argMax(test_status, check_start_time) as status, + test_name, + report_url as results_link, + task_url + FROM `gh-data`.checks + GROUP BY check_name, test_name, report_url, task_url + ) + WHERE task_url LIKE '{job_url}%' + AND test_status IN ('FAIL', 'ERROR') + AND job_status!='error' + ORDER BY job_name, test_name + """ return client.query_dataframe(query) @@ -305,14 +321,26 @@ def get_checks_known_fails(client: Client, job_url: str, known_fails: dict): """ Get tests that are known to fail for the given job URL. """ - assert len(known_fails) > 0, "cannot query the database with empty known fails" - columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link" - query = f"""SELECT {columns} FROM `gh-data`.checks - WHERE task_url LIKE '{job_url}%' - AND test_status='BROKEN' - AND test_name IN ({','.join(f"'{test}'" for test in known_fails.keys())}) - ORDER BY test_name, check_name - """ + if len(known_fails) == 0: + return pd.DataFrame() + + query = f"""SELECT job_status, job_name, status as test_status, test_name, results_link + FROM ( + SELECT + argMax(check_status, check_start_time) as job_status, + check_name as job_name, + argMax(test_status, check_start_time) as status, + test_name, + report_url as results_link, + task_url + FROM `gh-data`.checks + GROUP BY check_name, test_name, report_url, task_url + ) + WHERE task_url LIKE '{job_url}%' + AND test_status='BROKEN' + AND test_name IN ({','.join(f"'{test}'" for test in known_fails.keys())}) + ORDER BY job_name, test_name + """ df = client.query_dataframe(query) @@ -333,12 +361,22 @@ def get_checks_errors(client: Client, job_url: str): """ Get checks that have status 'error' for the given job URL. """ - columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link" - query = f"""SELECT {columns} FROM `gh-data`.checks - WHERE task_url LIKE '{job_url}%' - AND check_status=='error' - ORDER BY check_name, test_name - """ + query = f"""SELECT job_status, job_name, status as test_status, test_name, results_link + FROM ( + SELECT + argMax(check_status, check_start_time) as job_status, + check_name as job_name, + argMax(test_status, check_start_time) as status, + test_name, + report_url as results_link, + task_url + FROM `gh-data`.checks + GROUP BY check_name, test_name, report_url, task_url + ) + WHERE task_url LIKE '{job_url}%' + AND job_status=='error' + ORDER BY job_name, test_name + """ return client.query_dataframe(query) diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index c5ca0080ec4b..7fb9f79da516 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -649,12 +649,13 @@ jobs: ${{ toJson(needs) }} EOF python3 ./tests/ci/ci_buddy.py --check-wf-status - - name: Create and upload combined report + - name: Create and upload report if: ${{ !cancelled() }} env: CHECKS_DATABASE_HOST: ${{ secrets.CHECKS_DATABASE_HOST }} CHECKS_DATABASE_USER: ${{ secrets.CLICKHOUSE_TEST_STAT_LOGIN }} CHECKS_DATABASE_PASSWORD: ${{ secrets.CLICKHOUSE_TEST_STAT_PASSWORD }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} COMMIT_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} PR_NUMBER: ${{ github.event.pull_request.number || 0 }} ACTIONS_RUN_URL: ${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }} @@ -663,6 +664,7 @@ jobs: pip install clickhouse-driver==0.2.8 numpy==1.26.4 pandas==2.0.3 REPORT_LINK=$(python3 .github/create_workflow_report.py --pr-number $PR_NUMBER --commit-sha $COMMIT_SHA --actions-run-url $ACTIONS_RUN_URL --known-fails tests/broken_tests.json --cves) + echo $REPORT_LINK IS_VALID_URL=$(echo $REPORT_LINK | grep -E '^https?://') if [[ -n $IS_VALID_URL ]]; then