From dc39e3e288d2bb718e62fe72451849e5a8aed54b Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Sun, 21 May 2023 23:08:14 +0200 Subject: [PATCH] Switch build-info and build images to run on public runners We used to have low number of Public runners on GitHub actions and Apache Software Foundation projects were competing for them, that's why we switched `build-images` workflow to run on self-hosted runners, and we had to add workflow-embedded list of committers to make sure that build-info also uses self-hosted runners. Recently however the ASF has 900 runners and if anything, it's the public runners that are in abundance, while self-hosted runners are limited, which sometimes leads to "Wait for CI images" jobs timeout while waiting for self-hosted runners to build the image. Those "wait-for-ci-images" jobs do almost nothing, just waiting, so using self-hosted runners for those is not needed, we can easily switch those to public runners as well. Also "wait-for-ci-images" currently performs "test-pytest-collection" as an optimisation - preventing multiple "test" jobs from failing in case of pytest collection fails, however with public runners, it adds extra 4 minutes to tests, and failing pytest collection is rather rare case, so we can safely remove this step. This PR does the following changes: * build-info jobs always run on public runners * wait-for-ci-images job are always run on public runners * wait-for-ci-images does not run test for pytest collection * all the other jobs run on either self-hosted or public runners depending who the actor is (committers - run on self-hosted, non-committers, run on public runners) --- .github/workflows/build-images.yml | 4 +- .github/workflows/ci.yml | 70 +-------------- .../airflow_breeze/commands/ci_commands.py | 13 +++ .../commands/ci_commands_config.py | 9 +- .../src/airflow_breeze/global_constants.py | 54 ++++++++++- .../airflow_breeze/utils/selective_checks.py | 24 ++++- images/breeze/output-commands-hash.txt | 4 +- images/breeze/output-commands.svg | 90 +++++++++---------- images/breeze/output_ci_selective-check.svg | 60 ++++++++----- 9 files changed, 186 insertions(+), 142 deletions(-) diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml index fa8686890d0fe..76073bbe90af1 100644 --- a/.github/workflows/build-images.yml +++ b/.github/workflows/build-images.yml @@ -50,11 +50,10 @@ jobs: build-info: timeout-minutes: 10 name: "Build Info" - runs-on: ${{ github.repository == 'apache/airflow' && 'self-hosted' || 'ubuntu-20.04' }} + runs-on: 'ubuntu-20.04' env: TARGET_BRANCH: ${{ github.event.pull_request.base.ref }} outputs: - runs-on: ${{ github.repository == 'apache/airflow' && 'self-hosted' || 'ubuntu-20.04' }} python-versions: "${{ steps.selective-checks.python-versions }}" upgrade-to-newer-dependencies: ${{ steps.selective-checks.outputs.upgrade-to-newer-dependencies }} all-python-versions-list-as-string: >- @@ -66,6 +65,7 @@ jobs: cache-directive: ${{ steps.selective-checks.outputs.cache-directive }} default-branch: ${{ steps.selective-checks.outputs.default-branch }} default-constraints-branch: ${{ steps.selective-checks.outputs.default-constraints-branch }} + runs-on: ${{ steps.selective-checks.outputs.runs-on }} target-commit-sha: "${{steps.discover-pr-merge-commit.outputs.target-commit-sha || github.event.pull_request.head.sha || github.sha diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e31afad5d2408..3c19cf0217fbf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -65,64 +65,7 @@ jobs: # to find the list of authors and replace them, so any changes to the # formatting of the contains(fromJSON()) structure below will need to be # reflected in that workflow too. - runs-on: >- - ${{ ( - ( - github.event_name == 'push' || - github.event_name == 'schedule' || - contains(fromJSON('[ - "BasPH", - "Fokko", - "KevinYang21", - "Taragolis", - "XD-DENG", - "aijamalnk", - "alexvanboxel", - "aoen", - "artwr", - "ashb", - "bbovenzi", - "bolkedebruin", - "criccomini", - "dimberman", - "dstandish", - "eladkal", - "ephraimbuddy", - "feluelle", - "feng-tao", - "houqp", - "hussein-awala", - "jedcunningham", - "jgao54", - "jghoman", - "jhtimmins", - "jmcarp", - "josh-fell", - "kaxil", - "leahecole", - "malthe", - "mik-laj", - "milton0825", - "mistercrunch", - "msumit", - "o-nikolas", - "pierrejeambrun", - "pingzh", - "potiuk", - "r39132", - "ryanahamilton", - "ryw", - "saguziel", - "sekikn", - "turbaszek", - "uranusjr", - "vikramkoka", - "xinbinhuang", - "yuqian90", - "zhongjiajie" - ]'), github.event.pull_request.user.login) - ) && github.repository == 'apache/airflow' - ) && 'self-hosted' || 'ubuntu-20.04' }} + runs-on: "ubuntu-20.04" env: GITHUB_CONTEXT: ${{ toJson(github) }} outputs: @@ -171,11 +114,11 @@ jobs: skip-pre-commits: ${{ steps.selective-checks.outputs.skip-pre-commits }} helm-test-packages: ${{ steps.selective-checks.outputs.helm-test-packages }} debug-resources: ${{ steps.selective-checks.outputs.debug-resources }} + runs-on: ${{ steps.selective-checks.outputs.runs-on }} source-head-repo: ${{ steps.source-run-info.outputs.source-head-repo }} pull-request-labels: ${{ steps.source-run-info.outputs.pr-labels }} in-workflow-build: ${{ steps.source-run-info.outputs.in-workflow-build }} build-job-description: ${{ steps.source-run-info.outputs.build-job-description }} - runs-on: ${{ steps.source-run-info.outputs.runs-on }} canary-run: ${{ steps.source-run-info.outputs.canary-run }} run-coverage: ${{ steps.source-run-info.outputs.run-coverage }} steps: @@ -525,7 +468,7 @@ jobs: wait-for-ci-images: timeout-minutes: 120 name: "Wait for CI images" - runs-on: "${{needs.build-info.outputs.runs-on}}" + runs-on: "ubuntu-20.04" needs: [build-info, build-ci-images] if: needs.build-info.outputs.image-build == 'true' env: @@ -548,13 +491,6 @@ jobs: env: PYTHON_VERSIONS: ${{ needs.build-info.outputs.python-versions-list-as-string }} DEBUG_RESOURCES: ${{needs.build-info.outputs.debug-resources}} - - name: "Tests Pytest collection" - run: breeze testing tests --run-in-parallel --collect-only - if: needs.build-info.outputs.run-tests == 'true' - env: - PYTHON_MAJOR_MINOR_VERSION: "${{needs.build-info.outputs.default-python-version}}" - BACKEND: sqlite - PARALLEL_TEST_TYPES: "${{needs.build-info.outputs.parallel-test-types-list-as-string}}" - name: "Fix ownership" run: breeze ci fix-ownership if: always() diff --git a/dev/breeze/src/airflow_breeze/commands/ci_commands.py b/dev/breeze/src/airflow_breeze/commands/ci_commands.py index 02692b3c2b3f1..5987cbb54a871 100644 --- a/dev/breeze/src/airflow_breeze/commands/ci_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/ci_commands.py @@ -42,6 +42,7 @@ from airflow_breeze.utils.common_options import ( option_answer, option_dry_run, + option_github_repository, option_verbose, ) from airflow_breeze.utils.confirm import Answer, user_confirm @@ -220,6 +221,14 @@ def get_changed_files(commit_ref: str | None) -> tuple[str, ...]: envvar="GITHUB_EVENT_NAME", show_default=True, ) +@option_github_repository +@click.option( + "--github-actor", + help="Actor that triggered the event (Github user)", + envvar="GITHUB_ACTOR", + type=str, + default="", +) @option_verbose @option_dry_run def selective_check( @@ -228,6 +237,8 @@ def selective_check( default_branch: str, default_constraints_branch: str, github_event_name: str, + github_repository: str, + github_actor: str, ): from airflow_breeze.utils.selective_checks import SelectiveChecks @@ -243,6 +254,8 @@ def selective_check( default_constraints_branch=default_constraints_branch, pr_labels=tuple(ast.literal_eval(pr_labels)) if pr_labels else (), github_event=github_event, + github_repository=github_repository, + github_actor=github_actor, ) print(str(sc), file=sys.stderr) diff --git a/dev/breeze/src/airflow_breeze/commands/ci_commands_config.py b/dev/breeze/src/airflow_breeze/commands/ci_commands_config.py index 964e33bcb98ef..d2dda5ff28751 100644 --- a/dev/breeze/src/airflow_breeze/commands/ci_commands_config.py +++ b/dev/breeze/src/airflow_breeze/commands/ci_commands_config.py @@ -44,9 +44,16 @@ "--pr-labels", "--default-branch", "--default-constraints-branch", + ], + }, + { + "name": "Github parameters", + "options": [ "--github-event-name", + "--github-repository", + "--github-actor", ], - } + }, ], "breeze ci get-workflow-info": [ { diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index d1ccc8dab462b..eec14c2951ca0 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -193,7 +193,59 @@ def get_default_platform_machine() -> str: BREEZE_INIT_COMMAND = "" DRY_RUN_DOCKER = False INSTALL_AIRFLOW_VERSION = "" -SQLITE_URL = "sqlite:////root/airflow/airflow.db" + + +COMMITTERS = [ + "BasPH", + "Fokko", + "KevinYang21", + "Taragolis", + "XD-DENG", + "aijamalnk", + "alexvanboxel", + "aoen", + "artwr", + "ashb", + "bbovenzi", + "bolkedebruin", + "criccomini", + "dimberman", + "dstandish", + "eladkal", + "ephraimbuddy", + "feluelle", + "feng-tao", + "houqp", + "hussein-awala", + "jedcunningham", + "jgao54", + "jghoman", + "jhtimmins", + "jmcarp", + "josh-fell", + "kaxil", + "leahecole", + "malthe", + "mik-laj", + "milton0825", + "mistercrunch", + "msumit", + "o-nikolas", + "pierrejeambrun", + "pingzh", + "potiuk", + "r39132", + "ryanahamilton", + "ryw", + "saguziel", + "sekikn", + "turbaszek", + "uranusjr", + "vikramkoka", + "xinbinhuang", + "yuqian90", + "zhongjiajie", +] def get_airflow_version(): diff --git a/dev/breeze/src/airflow_breeze/utils/selective_checks.py b/dev/breeze/src/airflow_breeze/utils/selective_checks.py index 04c90d5562bb3..0f23845ad517b 100644 --- a/dev/breeze/src/airflow_breeze/utils/selective_checks.py +++ b/dev/breeze/src/airflow_breeze/utils/selective_checks.py @@ -43,10 +43,15 @@ from re import match from typing import Any, Dict, List, TypeVar -from typing_extensions import Literal +if sys.version_info >= (3, 9): + from typing import Literal +else: + from typing_extensions import Literal from airflow_breeze.global_constants import ( ALL_PYTHON_MAJOR_MINOR_VERSIONS, + APACHE_AIRFLOW_GITHUB_REPOSITORY, + COMMITTERS, CURRENT_KUBERNETES_VERSIONS, CURRENT_MSSQL_VERSIONS, CURRENT_MYSQL_VERSIONS, @@ -59,6 +64,8 @@ DEFAULT_PYTHON_MAJOR_MINOR_VERSION, HELM_VERSION, KIND_VERSION, + RUNS_ON_PUBLIC_RUNNER, + RUNS_ON_SELF_HOSTED_RUNNER, GithubEvents, SelectiveUnitTestTypes, all_helm_test_packages, @@ -295,6 +302,8 @@ def __init__( commit_ref: str | None = None, pr_labels: tuple[str, ...] = (), github_event: GithubEvents = GithubEvents.PULL_REQUEST, + github_repository: str = APACHE_AIRFLOW_GITHUB_REPOSITORY, + github_actor: str = "", ): self._files = files self._default_branch = default_branch @@ -302,6 +311,8 @@ def __init__( self._commit_ref = commit_ref self._pr_labels = pr_labels self._github_event = github_event + self._github_repository = github_repository + self._github_actor = github_actor def __important_attributes(self) -> tuple[Any, ...]: return tuple(getattr(self, f) for f in self.__HASHABLE_FIELDS) @@ -618,7 +629,7 @@ def _get_test_types_to_run(self) -> list[str]: @staticmethod def _extract_long_provider_tests(current_test_types: set[str]): """ - In case there are Provider tests in the list of test to run (either in the form of + In case there are Provider tests in the list of test to run - either in the form of Providers or Providers[...] we subtract them from the test type, and add them to the list of tests to run individually. @@ -765,3 +776,12 @@ def affected_providers_list_as_string(self) -> str | None: if affected_providers == "ALL_PROVIDERS": return _ALL_PROVIDERS_LIST return " ".join(sorted(affected_providers)) + + @cached_property + def runs_on(self) -> str: + if self._github_repository == APACHE_AIRFLOW_GITHUB_REPOSITORY: + if self._github_event in [GithubEvents.SCHEDULE, GithubEvents.PUSH]: + return RUNS_ON_SELF_HOSTED_RUNNER + if self._github_actor in COMMITTERS: + return RUNS_ON_SELF_HOSTED_RUNNER + return RUNS_ON_PUBLIC_RUNNER diff --git a/images/breeze/output-commands-hash.txt b/images/breeze/output-commands-hash.txt index bb1a3b5ca934d..519db1894e68e 100644 --- a/images/breeze/output-commands-hash.txt +++ b/images/breeze/output-commands-hash.txt @@ -7,8 +7,8 @@ ci:fix-ownership:fee2c9ec9ef19686792002ae054fecdd ci:free-space:47234aa0a60b0efd84972e6e797379f8 ci:get-workflow-info:01ee34c33ad62fa5dc33e0ac8773223f ci:resource-check:1d4fe47dff9fc64ac1648ec4beb2d85c -ci:selective-check:3a085894f24cb909812fbc8253a21e13 -ci:e51cbc38a202b92b7dc6288f6344c412 +ci:selective-check:8a39978ee69d496dae2533d37a48b137 +ci:2868dbcdd482663e9d6ccd00055b9cac ci-image:build:3ffe4dd24ae7090415543e27d8504955 ci-image:pull:c16c6e57c748bfe9b365b4ffafb18472 ci-image:verify:aee88f55e8837028d19316356e29b009 diff --git a/images/breeze/output-commands.svg b/images/breeze/output-commands.svg index 876133e651b5c..ba821bd664d4e 100644 --- a/images/breeze/output-commands.svg +++ b/images/breeze/output-commands.svg @@ -35,8 +35,8 @@ .breeze-help-r1 { fill: #c5c8c6;font-weight: bold } .breeze-help-r2 { fill: #c5c8c6 } .breeze-help-r3 { fill: #d0b344;font-weight: bold } -.breeze-help-r4 { fill: #68a0b3;font-weight: bold } -.breeze-help-r5 { fill: #868887 } +.breeze-help-r4 { fill: #868887 } +.breeze-help-r5 { fill: #68a0b3;font-weight: bold } .breeze-help-r6 { fill: #98a84b;font-weight: bold } .breeze-help-r7 { fill: #8d7b39 } @@ -190,50 +190,50 @@ -Usage: breeze [OPTIONSCOMMAND [ARGS]... +Usage: breeze [OPTIONS] COMMAND [ARGS]... -╭─ Basic flags ────────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---python-pPython major/minor version used in Airflow image for images.(>3.7< | 3.8 | 3.9 | 3.10) -[default: 3.7]                                               ---backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite] ---postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15)[default: 11] ---mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7] ---mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest] ---integrationIntegration(s) to enable when running (can be more than one).                             -(all | all-testable | cassandra | celery | kafka | kerberos | mongo | otel | pinot |      -statsd | statsd | trino)                                                                  ---forward-credentials-fForward local credentials to container when running. ---db-reset-dReset DB when entering the container. ---max-timeMaximum time that the command should take - if it takes longer, the command will fail. -(INTEGER RANGE)                                                                        ---github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---answer-aForce answer to questions.(y | n | q | yes | no | quit) ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Basic developer commands ───────────────────────────────────────────────────────────────────────────────────────────╮ -start-airflow     Enter breeze environment and starts all Airflow components in the tmux session. Compile assets   -if contents of www directory changed.                                                            -static-checks     Run static checks.                                                                               -build-docs        Build documentation in the container.                                                            -down              Stop running breeze environment.                                                                 -shell             Enter breeze environment. this is the default command use when no other is selected.             -exec              Joins the interactive shell of running airflow container.                                        -compile-www-assetsCompiles www assets.                                                                             -cleanup           Cleans the cache of parameters, docker cache and optionally built CI/PROD images.                -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Advanced command groups ────────────────────────────────────────────────────────────────────────────────────────────╮ -testing                Tools that developers can use to run tests                                                  -ci-image               Tools that developers can use to manually manage CI images                                  -k8s                    Tools that developers use to run Kubernetes tests                                           -prod-image             Tools that developers can use to manually manage PROD images                                -setup                  Tools that developers can use to configure Breeze                                           -release-management     Tools that release managers can use to prepare and manage Airflow releases                  -ci                     Tools that CI workflows use to cleanup/manage CI environment                                -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Basic flags ────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--python-pPython major/minor version used in Airflow image for images.(>3.7< | 3.8 | 3.9 | 3.10) +[default: 3.7]                                               +--backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite] +--postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15)[default: 11] +--mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7] +--mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest] +--integrationIntegration(s) to enable when running (can be more than one).                             +(all | all-testable | cassandra | celery | kafka | kerberos | mongo | otel | pinot |      +statsd | statsd | trino)                                                                  +--forward-credentials-fForward local credentials to container when running. +--db-reset-dReset DB when entering the container. +--max-timeMaximum time that the command should take - if it takes longer, the command will fail. +(INTEGER RANGE)                                                                        +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--answer-aForce answer to questions.(y | n | q | yes | no | quit) +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Basic developer commands ───────────────────────────────────────────────────────────────────────────────────────────╮ +start-airflow     Enter breeze environment and starts all Airflow components in the tmux session. Compile assets   +if contents of www directory changed.                                                            +static-checks     Run static checks.                                                                               +build-docs        Build documentation in the container.                                                            +down              Stop running breeze environment.                                                                 +shell             Enter breeze environment. this is the default command use when no other is selected.             +exec              Joins the interactive shell of running airflow container.                                        +compile-www-assetsCompiles www assets.                                                                             +cleanup           Cleans the cache of parameters, docker cache and optionally built CI/PROD images.                +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Advanced command groups ────────────────────────────────────────────────────────────────────────────────────────────╮ +testing                Tools that developers can use to run tests                                                  +ci-image               Tools that developers can use to manually manage CI images                                  +k8s                    Tools that developers use to run Kubernetes tests                                           +prod-image             Tools that developers can use to manually manage PROD images                                +setup                  Tools that developers can use to configure Breeze                                           +release-management     Tools that release managers can use to prepare and manage Airflow releases                  +ci                     Tools that CI workflows use to cleanup/manage CI environment                                +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_ci_selective-check.svg b/images/breeze/output_ci_selective-check.svg index 0d1250f1e20cf..186e49cce9925 100644 --- a/images/breeze/output_ci_selective-check.svg +++ b/images/breeze/output_ci_selective-check.svg @@ -1,4 +1,4 @@ - + - + @@ -105,9 +105,21 @@ + + + + + + + + + + + + - Command: ci selective-check + Command: ci selective-check @@ -118,26 +130,30 @@ -Usage: breeze ci selective-check [OPTIONS] +Usage: breeze ci selective-check [OPTIONS] Checks what kind of tests should be run for an incoming commit. -╭─ Selective check flags ──────────────────────────────────────────────────────────────────────────────────────────────╮ ---commit-refCommit-ish reference to the commit that should be checked(TEXT) ---pr-labelsPython array formatted PR labels assigned to the PR(TEXT) ---default-branchBranch against which the PR should be run(TEXT)[default: main] ---default-constraints-branchConstraints Branch against which the PR should be run(TEXT) -[default: constraints-main]                           ---github-event-nameName of the GitHub event that triggered the check                                    -(pull_request | pull_request_review | pull_request_target | pull_request_workflow |  -push | schedule | workflow_dispatch | workflow_run)                                  -[default: pull_request]                                                              -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Selective check flags ──────────────────────────────────────────────────────────────────────────────────────────────╮ +--commit-refCommit-ish reference to the commit that should be checked(TEXT) +--pr-labelsPython array formatted PR labels assigned to the PR(TEXT) +--default-branchBranch against which the PR should be run(TEXT)[default: main] +--default-constraints-branchConstraints Branch against which the PR should be run(TEXT) +[default: constraints-main]                           +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Github parameters ──────────────────────────────────────────────────────────────────────────────────────────────────╮ +--github-event-nameName of the GitHub event that triggered the check                                           +(pull_request | pull_request_review | pull_request_target | pull_request_workflow | push |  +schedule | workflow_dispatch | workflow_run)                                                +[default: pull_request]                                                                     +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +--github-actorActor that triggered the event (Github user)(TEXT) +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯