From b1691d3daa1f3a06aac2c9887cf83d8ddf6ced45 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Wed, 22 Nov 2023 14:39:16 +0100 Subject: [PATCH] Consolidate environment variable calculation in ShellParams in Breeze Historically (when moved from Bash) the environment variables set for docker or docker-compose commands were set in a few places - some were set directly in the code, some were retrieved from shell params and eventually some were set from default values or hardcoded constants. This all happened in various models and it was scattered around the code and it was difficult to grasp what was going on. This PR consolidates it so that all variables are set in ShellParams object. * the attributes have been reviewed and None/"" default values were set where needed * the attributes were sorted * calculation of dynamic properties was moved to ShellParams * missing properties were added to ShellParams, so that all variables have corresponding properties * the get_env_variables_for_docker_commands is now a method in ShellParams object, mapping is done explicitly from self.ATTRIBUTE and default values if not set are set in this single place if not set in case the variables are are retrieved from elsewhere * we use ShellParams in all places where we execute docker commands (we used BuildCiParams) sometimes needlessly * tests are added to cover the "attribute" + "incoming env var" to the env vars passed to Docker Compose/Docker Most importantly the docker and docker-compose env files are now automatically generated and git-ignored so that we only need to maintain the list of variables to pass in a single plase - in ShellParams `env_variables_for_docker_commands` method. --- .pre-commit-config.yaml | 18 +- .../commands/developer_commands.py | 39 +- .../commands/release_management_commands.py | 23 +- .../commands/testing_commands.py | 94 +++-- .../src/airflow_breeze/global_constants.py | 1 + .../src/airflow_breeze/params/shell_params.py | 355 +++++++++++++++--- .../utils/docker_command_utils.py | 201 +--------- .../src/airflow_breeze/utils/path_utils.py | 4 + .../src/airflow_breeze/utils/reinstall.py | 2 +- .../src/airflow_breeze/utils/run_utils.py | 5 +- dev/breeze/tests/test_shell_params.py | 198 ++++++++++ scripts/ci/docker-compose/.gitignore | 1 + scripts/ci/docker-compose/_docker.env | 85 ----- scripts/ci/docker-compose/base.yml | 74 +--- .../pre_commit_check_provider_yaml_files.py | 8 +- .../pre_commit_migration_reference.py | 7 +- scripts/ci/pre_commit/pre_commit_mypy.py | 13 +- .../pre_commit_update_er_diagram.py | 20 +- tests/cli/commands/test_celery_command.py | 4 +- 19 files changed, 641 insertions(+), 511 deletions(-) create mode 100644 dev/breeze/tests/test_shell_params.py create mode 100644 scripts/ci/docker-compose/.gitignore delete mode 100644 scripts/ci/docker-compose/_docker.env diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f7e886afd1313..bf8f53483ae5e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -934,7 +934,7 @@ repos: ^generated/provider_dependencies.json$ require_serial: true pass_filenames: false - additional_dependencies: ['rich>=12.4.4', 'rich-click>=1.7.0', 'inputimeout', 'pyyaml', 'packaging'] + additional_dependencies: ['rich>=12.4.4', 'rich-click>=1.7.0', 'inputimeout', 'pyyaml', 'packaging', 'filelock'] - id: check-example-dags-urls name: Check that example dags url include provider versions entry: ./scripts/ci/pre_commit/pre_commit_update_example_dags_paths.py @@ -1025,7 +1025,7 @@ repos: entry: ./scripts/ci/pre_commit/pre_commit_mypy.py files: ^dev/.*\.py$ require_serial: true - additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml', 'jsonschema'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml', 'jsonschema', 'filelock', 'markdown-it-py'] - id: mypy-core name: Run mypy for core language: python @@ -1033,7 +1033,7 @@ repos: files: \.py$ exclude: ^.*/.*_vendor/|^airflow/migrations|^airflow/providers|^dev|^docs|^provider_packages|^tests/providers|^tests/system/providers|^tests/dags/test_imports.py require_serial: true - additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml', 'jsonschema'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml', 'jsonschema', 'filelock', 'markdown-it-py'] - id: mypy-providers name: Run mypy for providers language: python @@ -1041,7 +1041,7 @@ repos: files: ^airflow/providers/.*\.py$|^tests/providers/.*\.py$|^tests/system/providers/.*\.py$ exclude: ^.*/.*_vendor/ require_serial: true - additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml', 'jsonschema'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml', 'jsonschema', 'filelock', 'markdown-it-py'] - id: mypy-docs name: Run mypy for /docs/ folder language: python @@ -1049,13 +1049,13 @@ repos: files: ^docs/.*\.py$ exclude: ^docs/rtd-deprecation require_serial: true - additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml', 'jsonschema'] - - id: check-provider-yaml-valid + additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml', 'jsonschema', 'filelock', 'markdown-it-py'] + - id: check-provider-yaml-valid name: Validate provider.yaml files entry: ./scripts/ci/pre_commit/pre_commit_check_provider_yaml_files.py language: python files: ^airflow/providers/.*/provider\.yaml$ - additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'markdown-it-py'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml', 'jsonschema', 'filelock', 'markdown-it-py'] require_serial: true - id: update-migration-references name: Update migration ref doc @@ -1063,12 +1063,12 @@ repos: entry: ./scripts/ci/pre_commit/pre_commit_migration_reference.py pass_filenames: false files: ^airflow/migrations/versions/.*\.py$|^docs/apache-airflow/migrations-ref\.rst$ - additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'markdown-it-py'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml', 'jsonschema', 'filelock', 'markdown-it-py'] - id: update-er-diagram name: Update ER diagram language: python entry: ./scripts/ci/pre_commit/pre_commit_update_er_diagram.py pass_filenames: false files: ^airflow/migrations/versions/.*\.py$|^docs/apache-airflow/migrations-ref\.rst$ - additional_dependencies: ['rich>=12.4.4'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml', 'jsonschema', 'filelock', 'markdown-it-py'] ## ONLY ADD PRE-COMMITS HERE THAT REQUIRE CI IMAGE diff --git a/dev/breeze/src/airflow_breeze/commands/developer_commands.py b/dev/breeze/src/airflow_breeze/commands/developer_commands.py index a8c257a2df345..83abfb4575466 100644 --- a/dev/breeze/src/airflow_breeze/commands/developer_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/developer_commands.py @@ -85,7 +85,6 @@ from airflow_breeze.utils.docker_command_utils import ( check_docker_resources, fix_ownership_using_docker, - get_env_variables_for_docker_commands, get_extra_docker_flags, perform_environment_checks, ) @@ -431,17 +430,17 @@ def build_docs( perform_environment_checks() fix_ownership_using_docker() cleanup_python_generated_files() - params = BuildCiParams( + build_params = BuildCiParams( github_repository=github_repository, python=DEFAULT_PYTHON_MAJOR_MINOR_VERSION, builder=builder ) - rebuild_or_pull_ci_image_if_needed(command_params=params) + rebuild_or_pull_ci_image_if_needed(command_params=build_params) if clean_build: docs_dir = AIRFLOW_SOURCES_ROOT / "docs" for dir_name in ["_build", "_doctrees", "_inventory_cache", "_api"]: for directory in docs_dir.rglob(dir_name): get_console().print(f"[info]Removing {directory}") shutil.rmtree(directory, ignore_errors=True) - ci_image_name = params.airflow_image_name + ci_image_name = build_params.airflow_image_name doc_builder = DocBuildParams( package_filter=package_filter, docs_only=docs_only, @@ -450,8 +449,11 @@ def build_docs( skip_environment_initialization=True, short_doc_packages=expand_all_provider_packages(doc_packages), ) - extra_docker_flags = get_extra_docker_flags(MOUNT_SELECTED) - env = get_env_variables_for_docker_commands(params) + shell_params = ShellParams( + github_repository=github_repository, + python=DEFAULT_PYTHON_MAJOR_MINOR_VERSION, + ) + extra_docker_flags = get_extra_docker_flags(mount_sources=MOUNT_SELECTED) cmd = [ "docker", "run", @@ -463,7 +465,7 @@ def build_docs( "/opt/airflow/scripts/in_container/run_docs_build.sh", *doc_builder.args_doc_builder, ] - process = run_command(cmd, text=True, env=env, check=False) + process = run_command(cmd, text=True, check=False, env=shell_params.env_variables_for_docker_commands) fix_ownership_using_docker() if process.returncode == 0: get_console().print( @@ -714,11 +716,10 @@ def down(preserve_volumes: bool, cleanup_mypy_cache: bool): if not preserve_volumes: command_to_execute.append("--volumes") shell_params = ShellParams(backend="all", include_mypy_volume=True) - env_variables = get_env_variables_for_docker_commands(shell_params) - run_command(command_to_execute, env=env_variables) + run_command(command_to_execute, env=shell_params.env_variables_for_docker_commands) if cleanup_mypy_cache: command_to_execute = ["docker", "volume", "rm", "--force", "mypy-cache-volume"] - run_command(command_to_execute, env=env_variables) + run_command(command_to_execute) @main.command(name="exec", help="Joins the interactive shell of running airflow container.") @@ -796,9 +797,8 @@ def enter_shell(**kwargs) -> RunCommandResult: if shell_params.include_mypy_volume: create_mypy_volume_if_needed() shell_params.print_badge_info() - cmd = ["docker", "compose", "run", "--service-ports", "-e", "BREEZE", "--rm", "airflow"] + cmd = ["docker", "compose", "run", "--service-ports", "--rm", "airflow"] cmd_added = shell_params.command_passed - env_variables = get_env_variables_for_docker_commands(shell_params) if cmd_added is not None: cmd.extend(["-c", cmd_added]) if "arm64" in DOCKER_DEFAULT_PLATFORM: @@ -817,7 +817,11 @@ def enter_shell(**kwargs) -> RunCommandResult: sys.exit(1) command_result = run_command( - cmd, env=env_variables, text=True, check=False, output_outside_the_group=True + cmd, + text=True, + check=False, + env=shell_params.env_variables_for_docker_commands, + output_outside_the_group=True, ) if command_result.returncode == 0: return command_result @@ -834,13 +838,12 @@ def stop_exec_on_error(returncode: int): def find_airflow_container() -> str | None: - exec_shell_params = ShellParams() - check_docker_resources(exec_shell_params.airflow_image_name) - exec_shell_params.print_badge_info() - env_variables = get_env_variables_for_docker_commands(exec_shell_params) + shell_params = ShellParams() + check_docker_resources(shell_params.airflow_image_name) + shell_params.print_badge_info() cmd = ["docker", "compose", "ps", "--all", "--filter", "status=running", "airflow"] docker_compose_ps_command = run_command( - cmd, text=True, capture_output=True, env=env_variables, check=False + cmd, text=True, capture_output=True, check=False, env=shell_params.env_variables_for_docker_commands ) if get_dry_run(): return "CONTAINER_ID" diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py index 732475d5dfafc..226509132c485 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py @@ -97,7 +97,6 @@ from airflow_breeze.utils.docker_command_utils import ( check_remote_ghcr_io_commands, fix_ownership_using_docker, - get_env_variables_for_docker_commands, get_extra_docker_flags, perform_environment_checks, ) @@ -149,15 +148,15 @@ def run_docker_command_with_debug( - params: ShellParams, + shell_params: ShellParams, command: list[str], debug: bool, enable_input: bool = False, output_outside_the_group: bool = False, **kwargs, ) -> RunCommandResult: - env_variables = get_env_variables_for_docker_commands(params) - extra_docker_flags = get_extra_docker_flags(mount_sources=params.mount_sources) + env = shell_params.env_variables_for_docker_commands + extra_docker_flags = get_extra_docker_flags(mount_sources=shell_params.mount_sources) if enable_input or debug: term_flag = "-it" else: @@ -169,7 +168,7 @@ def run_docker_command_with_debug( *extra_docker_flags, "--pull", "never", - params.airflow_image_name_with_tag, + shell_params.airflow_image_name_with_tag, ] if debug: cmd_string = " ".join([shlex.quote(s) for s in command if s != "-c"]) @@ -187,16 +186,16 @@ def run_docker_command_with_debug( ) return run_command( base_command, - env=env_variables, output_outside_the_group=output_outside_the_group, + env=env, **kwargs, ) else: base_command.extend(command) return run_command( base_command, - env=env_variables, check=False, + env=env, output_outside_the_group=output_outside_the_group, **kwargs, ) @@ -610,7 +609,7 @@ def run_generate_constraints( "/opt/airflow/scripts/in_container/run_generate_constraints.sh", ] generate_constraints_result = run_docker_command_with_debug( - params=shell_params, + shell_params=shell_params, command=cmd_to_run, debug=debug, output=output, @@ -825,7 +824,7 @@ def _run_command_for_providers( ) -> tuple[int, str]: shell_params.install_selected_providers = " ".join(list_of_providers) result_command = run_docker_command_with_debug( - params=shell_params, + shell_params=shell_params, command=cmd_to_run, debug=False, output=output, @@ -957,7 +956,7 @@ def install_provider_packages( ) else: result_command = run_docker_command_with_debug( - params=shell_params, + shell_params=shell_params, command=cmd_to_run, debug=debug, output_outside_the_group=True, @@ -999,6 +998,8 @@ def verify_provider_packages( fix_ownership_using_docker() cleanup_python_generated_files() shell_params = ShellParams( + backend="sqlite", + executor="SequentialExecutor", mount_sources=MOUNT_SELECTED, github_repository=github_repository, python=DEFAULT_PYTHON_MAJOR_MINOR_VERSION, @@ -1015,7 +1016,7 @@ def verify_provider_packages( "python /opt/airflow/scripts/in_container/verify_providers.py", ] result_command = run_docker_command_with_debug( - params=shell_params, + shell_params=shell_params, command=cmd_to_run, debug=debug, output_outside_the_group=True, diff --git a/dev/breeze/src/airflow_breeze/commands/testing_commands.py b/dev/breeze/src/airflow_breeze/commands/testing_commands.py index fdd712087b21d..3c07d619bf932 100644 --- a/dev/breeze/src/airflow_breeze/commands/testing_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/testing_commands.py @@ -71,11 +71,9 @@ from airflow_breeze.utils.custom_param_types import BetterChoice from airflow_breeze.utils.docker_command_utils import ( fix_ownership_using_docker, - get_env_variables_for_docker_commands, perform_environment_checks, remove_docker_networks, ) -from airflow_breeze.utils.packages import get_suspended_provider_folders from airflow_breeze.utils.parallel import ( GenericRegexpProgressMatcher, SummarizeAfter, @@ -147,7 +145,7 @@ def docker_compose_tests( def _run_test( - exec_shell_params: ShellParams, + shell_params: ShellParams, extra_pytest_args: tuple, db_reset: bool, output: Output | None, @@ -155,23 +153,16 @@ def _run_test( output_outside_the_group: bool = False, skip_docker_compose_down: bool = False, ) -> tuple[int, str]: - env_variables = get_env_variables_for_docker_commands(exec_shell_params) - env_variables["RUN_TESTS"] = "true" - if db_reset: - env_variables["DB_RESET"] = "true" - env_variables["TEST_TYPE"] = exec_shell_params.test_type - env_variables["COLLECT_ONLY"] = str(exec_shell_params.collect_only).lower() - env_variables["REMOVE_ARM_PACKAGES"] = str(exec_shell_params.remove_arm_packages).lower() - env_variables["SUSPENDED_PROVIDERS_FOLDERS"] = " ".join(get_suspended_provider_folders()).strip() - if "[" in exec_shell_params.test_type and not exec_shell_params.test_type.startswith("Providers"): + shell_params.run_tests = True + shell_params.db_reset = db_reset + if "[" in shell_params.test_type and not shell_params.test_type.startswith("Providers"): get_console(output=output).print( "[error]Only 'Providers' test type can specify actual tests with \\[\\][/]" ) sys.exit(1) - project_name = file_name_from_test_type(exec_shell_params.test_type) + project_name = file_name_from_test_type(shell_params.test_type) compose_project_name = f"airflow-test-{project_name}" - # This is needed for Docker-compose 1 compatibility - env_variables["COMPOSE_PROJECT_NAME"] = compose_project_name + env = shell_params.env_variables_for_docker_commands down_cmd = [ "docker", "compose", @@ -181,7 +172,7 @@ def _run_test( "--remove-orphans", "--volumes", ] - run_command(down_cmd, env=env_variables, output=output, check=False) + run_command(down_cmd, output=output, check=False, env=env) run_cmd = [ "docker", "compose", @@ -195,17 +186,17 @@ def _run_test( ] run_cmd.extend( generate_args_for_pytest( - test_type=exec_shell_params.test_type, + test_type=shell_params.test_type, test_timeout=test_timeout, - skip_provider_tests=exec_shell_params.skip_provider_tests, - skip_db_tests=exec_shell_params.skip_db_tests, - run_db_tests_only=exec_shell_params.run_db_tests_only, - backend=exec_shell_params.backend, - use_xdist=exec_shell_params.use_xdist, - enable_coverage=exec_shell_params.enable_coverage, - collect_only=exec_shell_params.collect_only, - parallelism=exec_shell_params.parallelism, - parallel_test_types_list=exec_shell_params.parallel_test_types_list, + skip_provider_tests=shell_params.skip_provider_tests, + skip_db_tests=shell_params.skip_db_tests, + run_db_tests_only=shell_params.run_db_tests_only, + backend=shell_params.backend, + use_xdist=shell_params.use_xdist, + enable_coverage=shell_params.enable_coverage, + collect_only=shell_params.collect_only, + parallelism=shell_params.parallelism, + parallel_test_types_list=shell_params.parallel_test_types_list, helm_test_package=None, ) ) @@ -214,10 +205,10 @@ def _run_test( remove_docker_networks(networks=[f"{compose_project_name}_default"]) result = run_command( run_cmd, - env=env_variables, output=output, check=False, output_outside_the_group=output_outside_the_group, + env=env, ) if os.environ.get("CI") == "true" and result.returncode != 0: ps_result = run_command( @@ -257,19 +248,19 @@ def _run_test( "--force", "-v", ], - env=env_variables, output=output, check=False, + env=env, verbose_override=False, ) remove_docker_networks(networks=[f"{compose_project_name}_default"]) - return result.returncode, f"Test: {exec_shell_params.test_type}" + return result.returncode, f"Test: {shell_params.test_type}" def _run_tests_in_pool( tests_to_run: list[str], parallelism: int, - exec_shell_params: ShellParams, + shell_params: ShellParams, extra_pytest_args: tuple, test_timeout: int, db_reset: bool, @@ -316,7 +307,7 @@ def _run_tests_in_pool( pool.apply_async( _run_test, kwds={ - "exec_shell_params": exec_shell_params.clone_with_test(test_type=test_type), + "shell_params": shell_params.clone_with_test(test_type=test_type), "extra_pytest_args": extra_pytest_args, "db_reset": db_reset, "output": outputs[index], @@ -339,7 +330,7 @@ def _run_tests_in_pool( def run_tests_in_parallel( - exec_shell_params: ShellParams, + shell_params: ShellParams, extra_pytest_args: tuple, db_reset: bool, test_timeout: int, @@ -350,9 +341,9 @@ def run_tests_in_parallel( skio_docker_compose_down: bool, ) -> None: _run_tests_in_pool( - tests_to_run=exec_shell_params.parallel_test_types_list, + tests_to_run=shell_params.parallel_test_types_list, parallelism=parallelism, - exec_shell_params=exec_shell_params, + shell_params=shell_params, extra_pytest_args=extra_pytest_args, test_timeout=test_timeout, db_reset=db_reset, @@ -567,7 +558,7 @@ def _run_test_command( test_list = [test for test in test_list if test not in excluded_test_list] if skip_provider_tests or "Providers" in excluded_test_list: test_list = [test for test in test_list if not test.startswith("Providers")] - exec_shell_params = ShellParams( + shell_params = ShellParams( python=python, backend=backend, integration=integration, @@ -592,7 +583,7 @@ def _run_test_command( skip_provider_tests=skip_provider_tests, parallel_test_types_list=test_list, ) - rebuild_or_pull_ci_image_if_needed(command_params=exec_shell_params) + rebuild_or_pull_ci_image_if_needed(command_params=shell_params) fix_ownership_using_docker() cleanup_python_generated_files() perform_environment_checks() @@ -604,7 +595,7 @@ def _run_test_command( ) sys.exit(1) run_tests_in_parallel( - exec_shell_params=exec_shell_params, + shell_params=shell_params, extra_pytest_args=extra_pytest_args, db_reset=db_reset, test_timeout=test_timeout, @@ -615,15 +606,15 @@ def _run_test_command( skio_docker_compose_down=skip_docker_compose_down, ) else: - if exec_shell_params.test_type == "Default": + if shell_params.test_type == "Default": if any([arg.startswith("tests") for arg in extra_pytest_args]): # in case some tests are specified as parameters, do not pass "tests" as default - exec_shell_params.test_type = "None" - exec_shell_params.parallel_test_types_list = [] + shell_params.test_type = "None" + shell_params.parallel_test_types_list = [] else: - exec_shell_params.test_type = "All" + shell_params.test_type = "All" returncode, _ = _run_test( - exec_shell_params=exec_shell_params, + shell_params=shell_params, extra_pytest_args=extra_pytest_args, db_reset=db_reset, output=None, @@ -682,7 +673,7 @@ def integration_tests( ): docker_filesystem = get_filesystem_type("/var/lib/docker") get_console().print(f"Docker filesystem: {docker_filesystem}") - exec_shell_params = ShellParams( + shell_params = ShellParams( python=python, backend=backend, integration=integration, @@ -701,7 +692,7 @@ def integration_tests( cleanup_python_generated_files() perform_environment_checks() returncode, _ = _run_test( - exec_shell_params=exec_shell_params, + shell_params=shell_params, extra_pytest_args=extra_pytest_args, db_reset=db_reset, output=None, @@ -744,16 +735,17 @@ def helm_tests( parallelism: int, use_xdist: bool, ): - exec_shell_params = ShellParams( + if helm_test_package == "all": + helm_test_package = "" + shell_params = ShellParams( image_tag=image_tag, mount_sources=mount_sources, github_repository=github_repository, + run_tests=True, + test_type="Helm", + helm_test_package=helm_test_package, ) - env_variables = get_env_variables_for_docker_commands(exec_shell_params) - env_variables["RUN_TESTS"] = "true" - env_variables["TEST_TYPE"] = "Helm" - if helm_test_package != "all": - env_variables["HELM_TEST_PACKAGE"] = helm_test_package + env = shell_params.env_variables_for_docker_commands perform_environment_checks() fix_ownership_using_docker() cleanup_python_generated_files() @@ -772,6 +764,6 @@ def helm_tests( helm_test_package=helm_test_package, ) cmd = ["docker", "compose", "run", "--service-ports", "--rm", "airflow", *pytest_args, *extra_pytest_args] - result = run_command(cmd, env=env_variables, check=False, output_outside_the_group=True) + result = run_command(cmd, check=False, env=env, output_outside_the_group=True) fix_ownership_using_docker() sys.exit(result.returncode) diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index 83ded0f66aa0d..f712dc8470fea 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -200,6 +200,7 @@ def get_default_platform_machine() -> str: MSSQL_HOST_PORT = "21433" FLOWER_HOST_PORT = "25555" REDIS_HOST_PORT = "26379" +CELERY_BROKER_URLS_MAP = {"rabbitmq": "amqp://guest:guest@rabbitmq:5672", "redis": "redis://redis:6379/0"} SQLITE_URL = "sqlite:////root/airflow/sqlite/airflow.db" PYTHONDONTWRITEBYTECODE = True diff --git a/dev/breeze/src/airflow_breeze/params/shell_params.py b/dev/breeze/src/airflow_breeze/params/shell_params.py index 182befccdda7d..233dfb933e4ca 100644 --- a/dev/breeze/src/airflow_breeze/params/shell_params.py +++ b/dev/breeze/src/airflow_breeze/params/shell_params.py @@ -19,6 +19,8 @@ import os from copy import deepcopy from dataclasses import dataclass, field +from functools import cached_property +from os import _Environ from pathlib import Path from airflow_breeze.branch_defaults import AIRFLOW_BRANCH, DEFAULT_AIRFLOW_CONSTRAINTS_BRANCH @@ -32,26 +34,39 @@ ALLOWED_POSTGRES_VERSIONS, ALLOWED_PYTHON_MAJOR_MINOR_VERSIONS, APACHE_AIRFLOW_GITHUB_REPOSITORY, + CELERY_BROKER_URLS_MAP, DEFAULT_CELERY_BROKER, DOCKER_DEFAULT_PLATFORM, + FLOWER_HOST_PORT, MOUNT_ALL, MOUNT_REMOVE, MOUNT_SELECTED, MOUNT_SKIP, + MSSQL_HOST_PORT, + MYSQL_HOST_PORT, + POSTGRES_HOST_PORT, + REDIS_HOST_PORT, + SSH_PORT, START_AIRFLOW_DEFAULT_ALLOWED_EXECUTORS, TESTABLE_INTEGRATIONS, + WEBSERVER_HOST_PORT, get_airflow_version, ) from airflow_breeze.utils.console import get_console +from airflow_breeze.utils.host_info_utils import get_host_group_id, get_host_os +from airflow_breeze.utils.packages import get_suspended_provider_folders from airflow_breeze.utils.path_utils import ( AIRFLOW_SOURCES_ROOT, BUILD_CACHE_DIR, + GENERATED_DOCKER_COMPOSE_ENV_FILE, + GENERATED_DOCKER_ENV_FILE, + GENERATED_DOCKER_LOCK_FILE, MSSQL_TMP_DIR_NAME, SCRIPTS_CI_DIR, ) from airflow_breeze.utils.run_tests import file_name_from_test_type -from airflow_breeze.utils.run_utils import get_filesystem_type, run_command -from airflow_breeze.utils.shared_options import get_verbose +from airflow_breeze.utils.run_utils import commit_sha, get_filesystem_type, run_command +from airflow_breeze.utils.shared_options import get_forced_answer, get_verbose DOCKER_COMPOSE_DIR = SCRIPTS_CI_DIR / "docker-compose" @@ -64,6 +79,29 @@ def add_mssql_compose_file(compose_file_list: list[Path]): compose_file_list.append(DOCKER_COMPOSE_DIR / "backend-mssql-docker-volume.yml") +def _set_var(env: dict[str, str], variable: str, attribute: str | bool | None, default: str | None = None): + """Set variable in env dict. + + Priorities: + 1. attribute comes first if not None + 2. then environment variable if set + 3. then not None default value if environment variable is None + 4. if default is None, then the key is not set at all in dictionary + + """ + if attribute is not None: + if isinstance(attribute, bool): + env[variable] = str(attribute).lower() + else: + env[variable] = str(attribute) + else: + os_variable_value = os.environ.get(variable) + if os_variable_value is not None: + env[variable] = os_variable_value + elif default is not None: + env[variable] = default + + @dataclass class ShellParams: """ @@ -71,26 +109,33 @@ class ShellParams: """ airflow_branch: str = os.environ.get("DEFAULT_BRANCH", AIRFLOW_BRANCH) - default_constraints_branch: str = os.environ.get( - "DEFAULT_CONSTRAINTS_BRANCH", DEFAULT_AIRFLOW_CONSTRAINTS_BRANCH - ) + airflow_constraints_mode: str = ALLOWED_CONSTRAINTS_MODES_CI[0] airflow_constraints_reference: str = DEFAULT_AIRFLOW_CONSTRAINTS_BRANCH airflow_extras: str = "" backend: str = ALLOWED_BACKENDS[0] base_branch: str = "main" builder: str = "autodetect" - ci: bool = False + celery_broker: str = DEFAULT_CELERY_BROKER + celery_flower: bool = False collect_only: bool = False + database_isolation: bool = False db_reset: bool = False + default_constraints_branch: str = os.environ.get( + "DEFAULT_CONSTRAINTS_BRANCH", DEFAULT_AIRFLOW_CONSTRAINTS_BRANCH + ) dev_mode: bool = False + downgrade_sqlalchemy: bool = False + dry_run: bool = False + enable_coverage: bool = False + executor: str = START_AIRFLOW_DEFAULT_ALLOWED_EXECUTORS extra_args: tuple = () force_build: bool = False - forward_ports: bool = True forward_credentials: str = "false" - airflow_constraints_mode: str = ALLOWED_CONSTRAINTS_MODES_CI[0] + forward_ports: bool = True github_actions: str = os.environ.get("GITHUB_ACTIONS", "false") github_repository: str = APACHE_AIRFLOW_GITHUB_REPOSITORY github_token: str = os.environ.get("GITHUB_TOKEN", "") + helm_test_package: str | None = None image_tag: str | None = None include_mypy_volume: bool = False install_airflow_version: str = "" @@ -104,89 +149,97 @@ class ShellParams: mssql_version: str = ALLOWED_MSSQL_VERSIONS[0] mysql_version: str = ALLOWED_MYSQL_VERSIONS[0] num_runs: str = "" - run_db_tests_only: bool = False - skip_db_tests: bool = False + only_min_version_update: bool = False package_format: str = ALLOWED_INSTALLATION_PACKAGE_FORMATS[0] + parallel_test_types_list: list[str] = field(default_factory=list) + parallelism: int = 0 platform: str = DOCKER_DEFAULT_PLATFORM postgres_version: str = ALLOWED_POSTGRES_VERSIONS[0] python: str = ALLOWED_PYTHON_MAJOR_MINOR_VERSIONS[0] + regenerate_missing_docs: bool = False remove_arm_packages: bool = False - skip_environment_initialization: bool = False + run_db_tests_only: bool = False + run_system_tests: bool = os.environ.get("RUN_SYSTEM_TESTS", "false") == "true" + run_tests: bool = False skip_constraints: bool = False + skip_db_tests: bool = False + skip_environment_initialization: bool = False + skip_provider_dependencies_check: bool = False skip_provider_tests: bool = False + skip_ssh_setup: bool = os.environ.get("SKIP_SSH_SETUP", "false") == "true" + standalone_dag_processor: bool = False start_airflow: str = "false" test_type: str | None = None + upgrade_boto: bool = False use_airflow_version: str | None = None use_packages_from_dist: bool = False - version_suffix_for_pypi: str = "" - dry_run: bool = False - verbose: bool = False - upgrade_boto: bool = False - downgrade_sqlalchemy: bool = False - executor: str = START_AIRFLOW_DEFAULT_ALLOWED_EXECUTORS - celery_broker: str = DEFAULT_CELERY_BROKER - celery_flower: bool = False - only_min_version_update: bool = False - regenerate_missing_docs: bool = False - skip_provider_dependencies_check: bool = False - standalone_dag_processor: bool = False - database_isolation: bool = False use_xdist: bool = False - enable_coverage: bool = False - parallelism: int = 0 - parallel_test_types_list: list[str] = field(default_factory=list) + verbose: bool = False + version_suffix_for_pypi: str = "" def clone_with_test(self, test_type: str) -> ShellParams: new_params = deepcopy(self) new_params.test_type = test_type return new_params - @property + @cached_property + def host_user_id(self) -> str: + return get_host_group_id() + + @cached_property + def host_group_id(self) -> str: + return get_host_group_id() + + @cached_property + def host_os(self) -> str: + return get_host_os() + + @cached_property def airflow_version(self): return get_airflow_version() - @property + @cached_property def airflow_version_for_production_image(self): cmd = ["docker", "run", "--entrypoint", "/bin/bash", f"{self.airflow_image_name}"] cmd.extend(["-c", 'echo "${AIRFLOW_VERSION}"']) output = run_command(cmd, capture_output=True, text=True) return output.stdout.strip() if output.stdout else "UNKNOWN_VERSION" - @property + @cached_property def airflow_base_image_name(self) -> str: image = f"ghcr.io/{self.github_repository.lower()}" return image - @property + @cached_property def airflow_image_name(self) -> str: """Construct CI image link""" image = f"{self.airflow_base_image_name}/{self.airflow_branch}/ci/python{self.python}" return image - @property + @cached_property def airflow_image_name_with_tag(self) -> str: image = self.airflow_image_name return image if not self.image_tag else image + f":{self.image_tag}" - @property + @cached_property def airflow_image_kubernetes(self) -> str: image = f"{self.airflow_base_image_name}/{self.airflow_branch}/kubernetes/python{self.python}" return image - @property + @cached_property def airflow_sources(self): return AIRFLOW_SOURCES_ROOT - @property + @cached_property def image_type(self) -> str: return "CI" - @property + @cached_property def md5sum_cache_dir(self) -> Path: cache_dir = Path(BUILD_CACHE_DIR, self.airflow_branch, self.python, self.image_type) return cache_dir - @property + @cached_property def backend_version(self) -> str: version = "" if self.backend == "postgres": @@ -197,10 +250,9 @@ def backend_version(self) -> str: version = self.mssql_version return version - @property + @cached_property def sqlite_url(self) -> str: - sqlite_url = "sqlite:////root/airflow/sqlite/airflow.db" - return sqlite_url + return "sqlite:////root/airflow/sqlite/airflow.db" def print_badge_info(self): if get_verbose(): @@ -218,7 +270,7 @@ def get_backend_compose_files(self, backend: str) -> list[Path]: return [backend_docker_compose_file] return [backend_docker_compose_file, DOCKER_COMPOSE_DIR / f"backend-{backend}-port.yml"] - @property + @cached_property def compose_file(self) -> str: compose_file_list: list[Path] = [] backend_files: list[Path] = [] @@ -279,12 +331,29 @@ def compose_file(self) -> str: compose_file_list.append(DOCKER_COMPOSE_DIR / "integration-kerberos.yml") return os.pathsep.join([os.fspath(f) for f in compose_file_list]) - @property + @cached_property def command_passed(self): - cmd = str(self.extra_args[0]) if self.extra_args else None - return cmd + return str(self.extra_args[0]) if self.extra_args else None + + @cached_property + def airflow_celery_broker_url(self) -> str: + if not self.celery_broker: + return "" + broker_url = CELERY_BROKER_URLS_MAP.get(self.celery_broker) + if not broker_url: + get_console().print( + f"[warning]The broker {self.celery_broker} should " + f"be one of {CELERY_BROKER_URLS_MAP.keys()}" + ) + return "" + # Map from short form (rabbitmq/redis) to actual urls + return broker_url - @property + @cached_property + def suspended_providers_folders(self): + return " ".join(get_suspended_provider_folders()).strip() + + @cached_property def mssql_data_volume(self) -> str: docker_filesystem = get_filesystem_type("/var/lib/docker") # Make sure the test type is not too long to be used as a volume name in docker-compose @@ -299,3 +368,199 @@ def mssql_data_volume(self) -> str: else: # mssql_data_volume variable is only used in case of tmpfs return "" + + @cached_property + def rootless_docker(self) -> bool: + try: + response = run_command( + ["docker", "info", "-f", "{{println .SecurityOptions}}"], + capture_output=True, + check=False, + text=True, + ) + if response.returncode == 0 and "rootless" in response.stdout.strip(): + get_console().print("[info]Docker is running in rootless mode.[/]\n") + return True + except FileNotFoundError: + # we ignore if docker is missing + pass + return False + + @cached_property + def env_variables_for_docker_commands(self) -> _Environ: + """ + Constructs environment variables needed by the docker-compose command, based on Shell parameters + passed to it. + + This is the only place where you need to add environment variables if you want to pass them to + docker or docker-compose. + + :return: dictionary of env variables to use for docker-compose and docker command + """ + + _env: dict[str, str] = {} + _set_var(_env, "AIRFLOW_CI_IMAGE", self.airflow_image_name) + _set_var(_env, "AIRFLOW_CI_IMAGE_WITH_TAG", self.airflow_image_name_with_tag) + _set_var( + _env, "AIRFLOW_CONSTRAINTS_MODE", self.airflow_constraints_mode, "constraints-source-providers" + ) + _set_var( + _env, + "AIRFLOW_CONSTRAINTS_REFERENCE", + self.airflow_constraints_reference, + "constraints-source-providers", + ) + _set_var(_env, "AIRFLOW_ENABLE_AIP_44", None, "true") + _set_var(_env, "AIRFLOW_ENV", "development") + _set_var(_env, "AIRFLOW_EXTRAS", self.airflow_extras) + _set_var(_env, "AIRFLOW_IMAGE_KUBERNETES", self.airflow_image_kubernetes) + _set_var(_env, "AIRFLOW_VERSION", self.airflow_version) + _set_var(_env, "AIRFLOW__CELERY__BROKER_URL", self.airflow_celery_broker_url) + _set_var(_env, "AIRFLOW__CORE__EXECUTOR", self.executor) + _set_var(_env, "ANSWER", get_forced_answer() or "") + _set_var(_env, "BACKEND", self.backend) + _set_var(_env, "BASE_BRANCH", self.base_branch, "main") + _set_var(_env, "BREEZE", "true") + _set_var(_env, "BREEZE_INIT_COMMAND", None, "") + _set_var(_env, "CELERY_FLOWER", self.celery_flower) + _set_var(_env, "CI", None, "false") + _set_var(_env, "CI_BUILD_ID", None, "0") + _set_var(_env, "CI_EVENT_TYPE", None, "pull_request") + _set_var(_env, "CI_JOB_ID", None, "0") + _set_var(_env, "CI_TARGET_BRANCH", self.airflow_branch) + _set_var(_env, "CI_TARGET_REPO", self.github_repository) + _set_var(_env, "COLLECT_ONLY", self.collect_only) + _set_var(_env, "COMMIT_SHA", None, commit_sha()) + _set_var(_env, "COMPOSE_FILE", self.compose_file) + _set_var(_env, "DATABASE_ISOLATION", self.database_isolation) + _set_var(_env, "DB_RESET", self.db_reset) + _set_var(_env, "DEFAULT_BRANCH", self.airflow_branch) + _set_var(_env, "DEFAULT_CONSTRAINTS_BRANCH", self.default_constraints_branch) + _set_var(_env, "DEV_MODE", self.dev_mode) + _set_var(_env, "DOCKER_IS_ROOTLESS", self.rootless_docker) + _set_var(_env, "DOWNGRADE_SQLALCHEMY", self.downgrade_sqlalchemy) + _set_var(_env, "ENABLED_SYSTEMS", None, "") + _set_var(_env, "FLOWER_HOST_PORT", None, FLOWER_HOST_PORT) + _set_var(_env, "GITHUB_ACTIONS", self.github_actions) + _set_var(_env, "HELM_TEST_PACKAGE", self.helm_test_package, "") + _set_var(_env, "HOST_GROUP_ID", self.host_group_id) + _set_var(_env, "HOST_OS", self.host_os) + _set_var(_env, "HOST_USER_ID", self.host_user_id) + _set_var(_env, "INIT_SCRIPT_FILE", None, "init.sh") + _set_var(_env, "INSTALL_AIRFLOW_VERSION", self.install_airflow_version) + _set_var(_env, "INSTALL_PROVIDERS_FROM_SOURCES", self.install_providers_from_sources) + _set_var(_env, "INSTALL_SELECTED_PROVIDERS", self.install_selected_providers) + _set_var(_env, "ISSUE_ID", self.issue_id) + _set_var(_env, "LOAD_DEFAULT_CONNECTIONS", self.load_default_connections) + _set_var(_env, "LOAD_EXAMPLES", self.load_example_dags) + _set_var(_env, "MSSQL_DATA_VOLUME", self.mssql_data_volume) + _set_var(_env, "MSSQL_HOST_PORT", None, MSSQL_HOST_PORT) + _set_var(_env, "MSSQL_VERSION", self.mssql_version) + _set_var(_env, "MYSQL_HOST_PORT", None, MYSQL_HOST_PORT) + _set_var(_env, "MYSQL_VERSION", self.mysql_version) + _set_var(_env, "NUM_RUNS", self.num_runs) + _set_var(_env, "ONLY_MIN_VERSION_UPDATE", self.only_min_version_update) + _set_var(_env, "PACKAGE_FORMAT", self.package_format) + _set_var(_env, "POSTGRES_HOST_PORT", None, POSTGRES_HOST_PORT) + _set_var(_env, "POSTGRES_VERSION", self.postgres_version) + _set_var(_env, "PYTHONDONTWRITEBYTECODE", "true") + _set_var(_env, "PYTHON_MAJOR_MINOR_VERSION", self.python) + _set_var(_env, "REDIS_HOST_PORT", None, REDIS_HOST_PORT) + _set_var(_env, "REGENERATE_MISSING_DOCS", self.regenerate_missing_docs) + _set_var(_env, "REMOVE_ARM_PACKAGES", self.remove_arm_packages) + _set_var(_env, "RUN_SYSTEM_TESTS", self.run_system_tests) + _set_var(_env, "RUN_TESTS", self.run_tests) + _set_var(_env, "SKIP_CONSTRAINTS", self.skip_constraints) + _set_var(_env, "SKIP_ENVIRONMENT_INITIALIZATION", self.skip_environment_initialization) + _set_var(_env, "SKIP_SSH_SETUP", self.skip_ssh_setup) + _set_var(_env, "SQLITE_URL", self.sqlite_url) + _set_var(_env, "SSH_PORT", None, SSH_PORT) + _set_var(_env, "STANDALONE_DAG_PROCESSOR", self.standalone_dag_processor) + _set_var(_env, "START_AIRFLOW", self.start_airflow) + _set_var(_env, "SUSPENDED_PROVIDERS_FOLDERS", self.suspended_providers_folders) + _set_var(_env, "TEST_TYPE", self.test_type, "") + _set_var(_env, "UPGRADE_BOTO", self.upgrade_boto) + _set_var(_env, "USE_AIRFLOW_VERSION", self.use_airflow_version, "") + _set_var(_env, "USE_PACKAGES_FROM_DIST", self.use_packages_from_dist) + _set_var(_env, "USE_XDIST", self.use_xdist) + _set_var(_env, "VERBOSE", get_verbose()) + _set_var(_env, "VERBOSE_COMMANDS", None, "false") + _set_var(_env, "VERSION_SUFFIX_FOR_PYPI", self.version_suffix_for_pypi) + _set_var(_env, "WEBSERVER_HOST_PORT", None, WEBSERVER_HOST_PORT) + _set_var(_env, "_AIRFLOW_RUN_DB_TESTS_ONLY", self.run_db_tests_only) + _set_var(_env, "_AIRFLOW_SKIP_DB_TESTS", self.skip_db_tests) + + self._generate_env_for_docker_compose_file_if_needed(_env) + + target_environment = deepcopy(os.environ) + target_environment.update(_env) + return target_environment + + @staticmethod + def _generate_env_for_docker_compose_file_if_needed(env: dict[str, str]): + """ + Generates docker-compose env file if needed. + + :param env: dictionary of env variables to use for docker-compose and docker env files. + + Writes env files for docker and docker compose to make sure the envs will be passed + to docker-compose/docker when running commands. + + The list of variables might change over time, and we want to keep the list updated only in + one place (above env_variables_for_docker_commands method). So we need to regenerate the env + files automatically when new variable is added to the list or removed. + + Docker-Compose based tests can start in parallel, so we want to make sure we generate it once + per invocation of breeze command otherwise there could be nasty race condition that + the file would be empty while another compose tries to use it when starting. + + Also, it means that we need to pass the values through environment rather than writing + them to the file directly, because they might differ between different parallel runs + of compose or docker. + + Unfortunately docker and docker-compose do not share the same env files any more as of Compose V2 + format for passing variables from the environment, so we need to generate both files. + Documentation is a bit vague about this. + + The docker file contain simply list of all variables that should be passed to docker. + + See https://docs.docker.com/engine/reference/commandline/run/#env + + > When running the command, the Docker CLI client checks the value the variable has in + > your local environment and passes it to the container. If no = is provided and that + > variable is not exported in your local environment, the variable isn't set in the container. + + The docker-compose file should instead contain VARIABLE=${VARIABLE} for each variable + that should be passed to docker compose. + + From https://docs.docker.com/compose/compose-file/05-services/#env_file + + > VAL may be omitted, in such cases the variable value is an empty string. =VAL may be omitted, + > in such cases the variable is unset. + + """ + from filelock import FileLock + + with FileLock(GENERATED_DOCKER_LOCK_FILE): + if GENERATED_DOCKER_ENV_FILE.exists(): + generated_keys = GENERATED_DOCKER_ENV_FILE.read_text().splitlines() + if set(env.keys()) == set(generated_keys): + # we check if the set of env variables had not changed since last run + # if so - cool, we do not need to do anything else + return + else: + if get_verbose(): + get_console().print( + f"[info]The keys has changed vs last run. Regenerating[/]: " + f"{GENERATED_DOCKER_ENV_FILE} and {GENERATED_DOCKER_COMPOSE_ENV_FILE}" + ) + if get_verbose(): + get_console().print(f"[info]Generating new docker env file [/]: {GENERATED_DOCKER_ENV_FILE}") + GENERATED_DOCKER_ENV_FILE.write_text("\n".join(sorted(env.keys()))) + if get_verbose(): + get_console().print( + f"[info]Generating new docker compose env file [/]: {GENERATED_DOCKER_COMPOSE_ENV_FILE}" + ) + GENERATED_DOCKER_COMPOSE_ENV_FILE.write_text( + "\n".join([f"{k}=${{{k}}}" for k in sorted(env.keys())]) + ) diff --git a/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py b/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py index 78a8d69754ba0..9a7945333944d 100644 --- a/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py @@ -20,17 +20,15 @@ import copy import json import os -import random import re import sys from subprocess import DEVNULL, CalledProcessError, CompletedProcess from typing import TYPE_CHECKING from airflow_breeze.params.build_prod_params import BuildProdParams -from airflow_breeze.utils.host_info_utils import get_host_group_id, get_host_os, get_host_user_id +from airflow_breeze.utils.host_info_utils import get_host_os from airflow_breeze.utils.image import find_available_ci_image -from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT -from airflow_breeze.utils.shared_options import get_forced_answer +from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT, GENERATED_DOCKER_ENV_FILE try: from packaging import version @@ -38,38 +36,26 @@ # We handle the ImportError so that autocomplete works with just click installed version = None # type: ignore[assignment] -from airflow_breeze.branch_defaults import AIRFLOW_BRANCH from airflow_breeze.global_constants import ( ALLOWED_CELERY_BROKERS, ALLOWED_DEBIAN_VERSIONS, - ALLOWED_PACKAGE_FORMATS, APACHE_AIRFLOW_GITHUB_REPOSITORY, DEFAULT_PYTHON_MAJOR_MINOR_VERSION, - FLOWER_HOST_PORT, MIN_DOCKER_COMPOSE_VERSION, MIN_DOCKER_VERSION, MOUNT_ALL, MOUNT_REMOVE, MOUNT_SELECTED, - MSSQL_HOST_PORT, - MYSQL_HOST_PORT, - POSTGRES_HOST_PORT, - REDIS_HOST_PORT, - SSH_PORT, - WEBSERVER_HOST_PORT, ) from airflow_breeze.utils.console import Output, get_console from airflow_breeze.utils.run_utils import ( RunCommandResult, check_if_buildx_plugin_installed, - commit_sha, run_command, ) if TYPE_CHECKING: - from airflow_breeze.params.build_ci_params import BuildCiParams from airflow_breeze.params.common_build_params import CommonBuildParams - from airflow_breeze.params.shell_params import ShellParams # Those are volumes that are mounted when MOUNT_SELECTED is chosen (which is the default when # entering Breeze. MOUNT_SELECTED prevents to mount the files that you can have accidentally added @@ -118,7 +104,8 @@ def get_extra_docker_flags(mount_sources: str, include_mypy_volume: bool = False Returns extra docker flags based on the type of mounting we want to do for sources. :param mount_sources: type of mounting we want to have - :param include_mypy_volume: include mypy_volume + :param env: environment variables to pass to docker + :param include_mypy_volume: includes mypy_volume :return: extra flag as list of strings """ extra_docker_flags = [] @@ -141,22 +128,10 @@ def get_extra_docker_flags(mount_sources: str, include_mypy_volume: bool = False extra_docker_flags.extend(["--mount", f"type=bind,src={AIRFLOW_SOURCES_ROOT / 'files'},dst=/files"]) extra_docker_flags.extend(["--mount", f"type=bind,src={AIRFLOW_SOURCES_ROOT / 'dist'},dst=/dist"]) extra_docker_flags.extend(["--rm"]) - extra_docker_flags.extend( - ["--env-file", f"{AIRFLOW_SOURCES_ROOT / 'scripts' / 'ci' / 'docker-compose' / '_docker.env' }"] - ) + extra_docker_flags.extend(["--env-file", GENERATED_DOCKER_ENV_FILE.as_posix()]) return extra_docker_flags -def is_docker_rootless(): - response = run_command( - ["docker", "info", "-f", "{{println .SecurityOptions}}"], capture_output=True, check=True, text=True - ) - if "rootless" in response.stdout.strip(): - get_console().print("[info]Docker is running in rootless mode.[/]\n") - return True - return False - - def check_docker_resources(airflow_image_name: str) -> RunCommandResult: """ Check if we have enough resources to run docker. This is done via running script embedded in our image. @@ -378,16 +353,6 @@ def check_docker_compose_version(): sys.exit(1) -def get_env_variable_value(arg_name: str, params: CommonBuildParams | ShellParams): - raw_value = getattr(params, arg_name, None) - value = str(raw_value) if raw_value is not None else "" - value = "true" if raw_value is True else value - value = "false" if raw_value is False else value - if arg_name == "upgrade_to_newer_dependencies" and value == "true": - value = f"{random.randrange(2**32):x}" - return value - - def prepare_docker_build_cache_command( image_params: CommonBuildParams, ) -> list[str]: @@ -524,155 +489,6 @@ def set_value_to_default_if_not_set(env: dict[str, str], name: str, default: str env[name] = os.environ.get(name, default) -def update_expected_environment_variables(env: dict[str, str]) -> None: - """ - Updates default values for unset environment variables. - - :param env: environment variables to update with missing values if not set. - """ - answer = get_forced_answer() - set_value_to_default_if_not_set(env, "AIRFLOW_CONSTRAINTS_MODE", "constraints-source-providers") - set_value_to_default_if_not_set(env, "AIRFLOW_CONSTRAINTS_REFERENCE", "constraints-source-providers") - set_value_to_default_if_not_set(env, "AIRFLOW_EXTRAS", "") - set_value_to_default_if_not_set(env, "AIRFLOW_ENABLE_AIP_44", "true") - set_value_to_default_if_not_set(env, "AIRFLOW_ENV", "development") - set_value_to_default_if_not_set(env, "ANSWER", answer or "") - set_value_to_default_if_not_set(env, "BASE_BRANCH", "main") - set_value_to_default_if_not_set(env, "BREEZE", "true") - set_value_to_default_if_not_set(env, "BREEZE_INIT_COMMAND", "") - set_value_to_default_if_not_set(env, "CI", "false") - set_value_to_default_if_not_set(env, "CI_BUILD_ID", "0") - set_value_to_default_if_not_set(env, "CI_EVENT_TYPE", "pull_request") - set_value_to_default_if_not_set(env, "CI_JOB_ID", "0") - set_value_to_default_if_not_set(env, "CI_TARGET_BRANCH", AIRFLOW_BRANCH) - set_value_to_default_if_not_set(env, "CI_TARGET_REPO", APACHE_AIRFLOW_GITHUB_REPOSITORY) - set_value_to_default_if_not_set(env, "COMMIT_SHA", commit_sha()) - set_value_to_default_if_not_set(env, "COLLECT_ONLY", "false") - set_value_to_default_if_not_set(env, "DB_RESET", "false") - set_value_to_default_if_not_set(env, "DEFAULT_BRANCH", AIRFLOW_BRANCH) - set_value_to_default_if_not_set(env, "DOCKER_IS_ROOTLESS", "false") - set_value_to_default_if_not_set(env, "ENABLED_SYSTEMS", "") - set_value_to_default_if_not_set(env, "HELM_TEST_PACKAGE", "") - set_value_to_default_if_not_set(env, "HOST_GROUP_ID", get_host_group_id()) - set_value_to_default_if_not_set(env, "HOST_OS", get_host_os()) - set_value_to_default_if_not_set(env, "HOST_USER_ID", get_host_user_id()) - set_value_to_default_if_not_set(env, "INIT_SCRIPT_FILE", "init.sh") - set_value_to_default_if_not_set(env, "INSTALL_PACKAGES_FROM_CONTEXT", "false") - set_value_to_default_if_not_set(env, "INSTALL_PROVIDERS_FROM_SOURCES", "true") - set_value_to_default_if_not_set(env, "LOAD_DEFAULT_CONNECTIONS", "false") - set_value_to_default_if_not_set(env, "LOAD_EXAMPLES", "false") - set_value_to_default_if_not_set(env, "ONLY_MIN_VERSION_UPDATE", "false") - set_value_to_default_if_not_set(env, "PACKAGE_FORMAT", ALLOWED_PACKAGE_FORMATS[0]) - set_value_to_default_if_not_set(env, "PYTHONDONTWRITEBYTECODE", "true") - set_value_to_default_if_not_set(env, "REGENERATE_MISSING_DOCS", "false") - set_value_to_default_if_not_set(env, "REMOVE_ARM_PACKAGES", "false") - set_value_to_default_if_not_set(env, "RUN_SYSTEM_TESTS", "false") - set_value_to_default_if_not_set(env, "RUN_TESTS", "false") - set_value_to_default_if_not_set(env, "SKIP_ENVIRONMENT_INITIALIZATION", "false") - set_value_to_default_if_not_set(env, "SKIP_SSH_SETUP", "false") - set_value_to_default_if_not_set(env, "SUSPENDED_PROVIDERS_FOLDERS", "") - set_value_to_default_if_not_set(env, "TEST_TYPE", "") - set_value_to_default_if_not_set(env, "UPGRADE_BOTO", "false") - set_value_to_default_if_not_set(env, "DOWNGRADE_SQLALCHEMY", "false") - set_value_to_default_if_not_set(env, "UPGRADE_TO_NEWER_DEPENDENCIES", "false") - set_value_to_default_if_not_set(env, "USE_PACKAGES_FROM_DIST", "false") - set_value_to_default_if_not_set(env, "VERBOSE", "false") - set_value_to_default_if_not_set(env, "VERBOSE_COMMANDS", "false") - set_value_to_default_if_not_set(env, "VERSION_SUFFIX_FOR_PYPI", "") - - -DERIVE_ENV_VARIABLES_FROM_ATTRIBUTES = { - "_AIRFLOW_RUN_DB_TESTS_ONLY": "run_db_tests_only", - "_AIRFLOW_SKIP_DB_TESTS": "skip_db_tests", - "AIRFLOW_CI_IMAGE": "airflow_image_name", - "AIRFLOW_CI_IMAGE_WITH_TAG": "airflow_image_name_with_tag", - "AIRFLOW_CONSTRAINTS_MODE": "airflow_constraints_mode", - "AIRFLOW_CONSTRAINTS_REFERENCE": "airflow_constraints_reference", - "AIRFLOW_EXTRAS": "airflow_extras", - "AIRFLOW_IMAGE_KUBERNETES": "airflow_image_kubernetes", - "AIRFLOW_PROD_IMAGE": "airflow_image_name", - "AIRFLOW_SOURCES": "airflow_sources", - "AIRFLOW_VERSION": "airflow_version", - "AIRFLOW__CORE__EXECUTOR": "executor", - "BACKEND": "backend", - "BASE_BRANCH": "base_branch", - "COMPOSE_FILE": "compose_file", - "DATABASE_ISOLATION": "database_isolation", - "DB_RESET": "db_reset", - "DEV_MODE": "dev_mode", - "DEFAULT_CONSTRAINTS_BRANCH": "default_constraints_branch", - "GITHUB_ACTIONS": "github_actions", - "INSTALL_AIRFLOW_VERSION": "install_airflow_version", - "INSTALL_PROVIDERS_FROM_SOURCES": "install_providers_from_sources", - "INSTALL_SELECTED_PROVIDERS": "install_selected_providers", - "ISSUE_ID": "issue_id", - "LOAD_DEFAULT_CONNECTIONS": "load_default_connections", - "LOAD_EXAMPLES": "load_example_dags", - "MSSQL_DATA_VOLUME": "mssql_data_volume", - "MSSQL_VERSION": "mssql_version", - "MYSQL_VERSION": "mysql_version", - "NUM_RUNS": "num_runs", - "ONLY_MIN_VERSION_UPDATE": "only_min_version_update", - "PACKAGE_FORMAT": "package_format", - "POSTGRES_VERSION": "postgres_version", - "PYTHON_MAJOR_MINOR_VERSION": "python", - "REGENERATE_MISSING_DOCS": "regenerate_missing_docs", - "SKIP_CONSTRAINTS": "skip_constraints", - "SKIP_ENVIRONMENT_INITIALIZATION": "skip_environment_initialization", - "SQLITE_URL": "sqlite_url", - "START_AIRFLOW": "start_airflow", - "UPGRADE_BOTO": "upgrade_boto", - "USE_XDIST": "use_xdist", - "DOWNGRADE_SQLALCHEMY": "downgrade_sqlalchemy", - "USE_AIRFLOW_VERSION": "use_airflow_version", - "USE_PACKAGES_FROM_DIST": "use_packages_from_dist", - "VERSION_SUFFIX_FOR_PYPI": "version_suffix_for_pypi", - "CELERY_FLOWER": "celery_flower", - "STANDALONE_DAG_PROCESSOR": "standalone_dag_processor", -} - - -DOCKER_VARIABLE_CONSTANTS = { - "FLOWER_HOST_PORT": FLOWER_HOST_PORT, - "MSSQL_HOST_PORT": MSSQL_HOST_PORT, - "MYSQL_HOST_PORT": MYSQL_HOST_PORT, - "POSTGRES_HOST_PORT": POSTGRES_HOST_PORT, - "REDIS_HOST_PORT": REDIS_HOST_PORT, - "SSH_PORT": SSH_PORT, - "WEBSERVER_HOST_PORT": WEBSERVER_HOST_PORT, - "CELERY_BROKER_URLS": "amqp://guest:guest@rabbitmq:5672,redis://redis:6379/0", -} - - -def get_env_variables_for_docker_commands(params: ShellParams | BuildCiParams) -> dict[str, str]: - """ - Constructs environment variables needed by the docker-compose command, based on Shell parameters - passed to it. - - * It checks if appropriate params are defined for all the needed docker compose environment variables - * It sets the environment values from the parameters passed - * For the constant parameters that we do not have parameters for, we only override the constant values - if the env variable that we run with does not have it. - * Updates all other environment variables that docker-compose expects with default values if missing - - :param params: shell parameters passed. - :return: dictionary of env variables to set - """ - env_variables: dict[str, str] = os.environ.copy() - for variable in DERIVE_ENV_VARIABLES_FROM_ATTRIBUTES: - param_name = DERIVE_ENV_VARIABLES_FROM_ATTRIBUTES[variable] - param_value = get_env_variable_value(param_name, params=params) - env_variables[variable] = str(param_value) if param_value is not None else "" - # Set constant defaults if not defined - for variable in DOCKER_VARIABLE_CONSTANTS: - constant_param_value = DOCKER_VARIABLE_CONSTANTS[variable] - if not env_variables.get(variable): - env_variables[variable] = str(constant_param_value) - prepare_broker_url(params, env_variables) - update_expected_environment_variables(env_variables) - return env_variables - - def prepare_broker_url(params, env_variables): """Prepare broker url for celery executor""" urls = env_variables["CELERY_BROKER_URLS"].split(",") @@ -687,8 +503,6 @@ def prepare_broker_url(params, env_variables): def perform_environment_checks(): check_docker_is_running() check_docker_version() - if is_docker_rootless(): - os.environ["DOCKER_IS_ROOTLESS"] = "true" check_docker_compose_version() @@ -743,8 +557,7 @@ def fix_ownership_using_docker(): shell_params = find_available_ci_image( github_repository=APACHE_AIRFLOW_GITHUB_REPOSITORY, ) - extra_docker_flags = get_extra_docker_flags(MOUNT_ALL) - env = get_env_variables_for_docker_commands(shell_params) + extra_docker_flags = get_extra_docker_flags(mount_sources=MOUNT_ALL) cmd = [ "docker", "run", @@ -753,7 +566,7 @@ def fix_ownership_using_docker(): OWNERSHIP_CLEANUP_DOCKER_TAG, "/opt/airflow/scripts/in_container/run_fix_ownership.py", ] - run_command(cmd, text=True, env=env, check=False) + run_command(cmd, text=True, check=False, env=shell_params.env_variables_for_docker_commands) def remove_docker_networks(networks: list[str] | None = None) -> None: diff --git a/dev/breeze/src/airflow_breeze/utils/path_utils.py b/dev/breeze/src/airflow_breeze/utils/path_utils.py index 3c8ca42fcce13..ced497229eb56 100644 --- a/dev/breeze/src/airflow_breeze/utils/path_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/path_utils.py @@ -290,6 +290,10 @@ def find_airflow_sources_root_to_operate_on() -> Path: DIST_DIR = AIRFLOW_SOURCES_ROOT / "dist" DOCS_DIR = AIRFLOW_SOURCES_ROOT / "docs" SCRIPTS_CI_DIR = AIRFLOW_SOURCES_ROOT / "scripts" / "ci" +SCRIPTS_CI_DOCKER_COMPOSE_DIR = SCRIPTS_CI_DIR / "docker-compose" +GENERATED_DOCKER_COMPOSE_ENV_FILE = SCRIPTS_CI_DOCKER_COMPOSE_DIR / "_generated_docker_compose.env" +GENERATED_DOCKER_ENV_FILE = SCRIPTS_CI_DOCKER_COMPOSE_DIR / "_generated_docker.env" +GENERATED_DOCKER_LOCK_FILE = SCRIPTS_CI_DOCKER_COMPOSE_DIR / "_generated.lock" DOCKER_CONTEXT_DIR = AIRFLOW_SOURCES_ROOT / "docker-context-files" CACHE_TMP_FILE_DIR = tempfile.TemporaryDirectory() OUTPUT_LOG = Path(CACHE_TMP_FILE_DIR.name, "out.log") diff --git a/dev/breeze/src/airflow_breeze/utils/reinstall.py b/dev/breeze/src/airflow_breeze/utils/reinstall.py index 68a0310eaa294..228db55654ce4 100644 --- a/dev/breeze/src/airflow_breeze/utils/reinstall.py +++ b/dev/breeze/src/airflow_breeze/utils/reinstall.py @@ -39,7 +39,7 @@ def reinstall_breeze(breeze_sources: Path, re_run: bool = True): if re_run: # Make sure we don't loop forever if the metadata hash hasn't been updated yet (else it is tricky to # run pre-commit checks via breeze!) - os.environ["SKIP_UPGRADE_CHECK"] = "1" + os.environ["SKIP_UPGRADE_CHECK"] = "true" os.execl(sys.executable, sys.executable, *sys.argv) get_console().print(f"\n[info]Breeze has been reinstalled from {breeze_sources}. Exiting now.[/]\n\n") sys.exit(0) diff --git a/dev/breeze/src/airflow_breeze/utils/run_utils.py b/dev/breeze/src/airflow_breeze/utils/run_utils.py index 2553e75ce06d2..8cc1f70809320 100644 --- a/dev/breeze/src/airflow_breeze/utils/run_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/run_utils.py @@ -250,7 +250,10 @@ def get_filesystem_type(filepath: str): :return: type of filesystem """ # We import it locally so that click autocomplete works - import psutil + try: + import psutil + except ImportError: + return "unknown" root_type = "unknown" for part in psutil.disk_partitions(all=True): diff --git a/dev/breeze/tests/test_shell_params.py b/dev/breeze/tests/test_shell_params.py new file mode 100644 index 0000000000000..4a39126d555a6 --- /dev/null +++ b/dev/breeze/tests/test_shell_params.py @@ -0,0 +1,198 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from unittest.mock import patch + +import pytest +from rich.console import Console + +from airflow_breeze.params.shell_params import ShellParams + +console = Console(width=400, color_system="standard") + + +@pytest.mark.parametrize( + "env_vars, kwargs, expected_vars", + [ + pytest.param( + {}, + {"python": 3.12}, + { + "DEFAULT_BRANCH": "main", + "AIRFLOW_CI_IMAGE": "ghcr.io/apache/airflow/main/ci/python3.12", + "AIRFLOW_CI_IMAGE_WITH_TAG": "ghcr.io/apache/airflow/main/ci/python3.12", + "PYTHON_MAJOR_MINOR_VERSION": "3.12", + }, + id="python3.12", + ), + pytest.param( + {}, + {"python": 3.9}, + { + "AIRFLOW_CI_IMAGE": "ghcr.io/apache/airflow/main/ci/python3.9", + "AIRFLOW_CI_IMAGE_WITH_TAG": "ghcr.io/apache/airflow/main/ci/python3.9", + "PYTHON_MAJOR_MINOR_VERSION": "3.9", + }, + id="python3.9", + ), + pytest.param( + {}, + {"python": 3.9, "image_tag": "a_tag"}, + { + "AIRFLOW_CI_IMAGE": "ghcr.io/apache/airflow/main/ci/python3.9", + "AIRFLOW_CI_IMAGE_WITH_TAG": "ghcr.io/apache/airflow/main/ci/python3.9:a_tag", + "PYTHON_MAJOR_MINOR_VERSION": "3.9", + }, + id="With tag", + ), + pytest.param( + {}, + {"airflow_branch": "v2-7-test"}, + { + "DEFAULT_BRANCH": "v2-7-test", + "AIRFLOW_CI_IMAGE": "ghcr.io/apache/airflow/v2-7-test/ci/python3.8", + "PYTHON_MAJOR_MINOR_VERSION": "3.8", + }, + id="With release branch", + ), + pytest.param( + {"DEFAULT_BRANCH": "v2-8-test"}, + {}, + { + "DEFAULT_BRANCH": "main", # DEFAULT_BRANCH is overridden from sources + "AIRFLOW_CI_IMAGE": "ghcr.io/apache/airflow/main/ci/python3.8", + "PYTHON_MAJOR_MINOR_VERSION": "3.8", + }, + id="Branch variable from sources not from original env", + ), + pytest.param( + {}, + {}, + { + "FLOWER_HOST_PORT": "25555", + }, + id="Default flower port", + ), + pytest.param( + {"FLOWER_HOST_PORT": "1234"}, + {}, + { + "FLOWER_HOST_PORT": "1234", + }, + id="Overridden flower host", + ), + pytest.param( + {}, + {"celery_broker": "redis"}, + { + "AIRFLOW__CELERY__BROKER_URL": "redis://redis:6379/0", + }, + id="Celery executor with redis broker", + ), + pytest.param( + {}, + {"celery_broker": "unknown"}, + { + "AIRFLOW__CELERY__BROKER_URL": "", + }, + id="No URL for celery if bad broker specified", + ), + pytest.param( + {}, + {}, + { + "CI_EVENT_TYPE": "pull_request", + }, + id="Default CI event type", + ), + pytest.param( + {"CI_EVENT_TYPE": "push"}, + {}, + { + "CI_EVENT_TYPE": "push", + }, + id="Override CI event type by variable", + ), + pytest.param( + {}, + {}, + { + "INIT_SCRIPT_FILE": "init.sh", + }, + id="Default init script file", + ), + pytest.param( + {"INIT_SCRIPT_FILE": "my_init.sh"}, + {}, + { + "INIT_SCRIPT_FILE": "my_init.sh", + }, + id="Override init script file by variable", + ), + pytest.param( + {}, + {}, + { + "CI": "false", + }, + id="CI false by default", + ), + pytest.param( + {"CI": "true"}, + {}, + { + "CI": "true", + }, + id="Unless it's overridden by environment variable", + ), + pytest.param( + {}, + {}, + { + "ENABLED_SYSTEMS": "", + }, + id="ENABLED_SYSTEMS empty by default even if they are None in ShellParams", + ), + ], +) +def test_shell_params_to_env_var_conversion( + env_vars: dict[str, str], kwargs: dict[str, str | bool], expected_vars: dict[str, str] +): + with patch("os.environ", env_vars): + shell_params = ShellParams(**kwargs) + env_vars = shell_params.env_variables_for_docker_commands + error = False + for expected_key, expected_value in expected_vars.items(): + if expected_key not in env_vars: + if expected_value is not None: + console.print(f"[red] Expected variable {expected_key} missing.[/]\nVariables retrieved:") + console.print(env_vars) + error = True + elif expected_key is None: + console.print(f"[red] The variable {expected_key} is not expected.[/]\nVariables retrieved:") + console.print(env_vars) + error = True + elif env_vars[expected_key] != expected_value: + console.print( + f"[red] The expected variable {expected_key} value '{env_vars[expected_key]}' is different than expected {expected_value}[/]\n" + f"Variables retrieved:" + ) + console.print(env_vars) + error = True + assert not error, "Some values are not as expected." diff --git a/scripts/ci/docker-compose/.gitignore b/scripts/ci/docker-compose/.gitignore new file mode 100644 index 0000000000000..2f218456b281b --- /dev/null +++ b/scripts/ci/docker-compose/.gitignore @@ -0,0 +1 @@ +_generated* diff --git a/scripts/ci/docker-compose/_docker.env b/scripts/ci/docker-compose/_docker.env deleted file mode 100644 index ae3c567bfdc5b..0000000000000 --- a/scripts/ci/docker-compose/_docker.env +++ /dev/null @@ -1,85 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -_AIRFLOW_RUN_DB_TESTS_ONLY -_AIRFLOW_SKIP_DB_TESTS -AIRFLOW_CI_IMAGE -AIRFLOW_EXTRAS -AIRFLOW_ENABLE_AIP_44 -AIRFLOW_ENV -AIRFLOW_CONSTRAINTS_REFERENCE -ANSWER -BACKEND -BASE_BRANCH -BREEZE -BREEZE_INIT_COMMAND -CI -CI_BUILD_ID -CI_JOB_ID -CI_EVENT_TYPE -CI_TARGET_REPO -CI_TARGET_BRANCH -COLLECT_ONLY -COMMIT_SHA -# Needed for docker-compose 1 compatibility -COMPOSE_PROJECT_NAME -DATABASE_ISOLATION -DB_RESET -DEFAULT_BRANCH -DEFAULT_CONSTRAINTS_BRANCH -DEV_MODE -DOCKER_IS_ROOTLESS -ENABLED_SYSTEMS -GITHUB_ACTIONS -HELM_TEST_PACKAGE -HOST_USER_ID -HOST_GROUP_ID -HOST_OS -INIT_SCRIPT_FILE -INSTALL_AIRFLOW_VERSION -AIRFLOW_CONSTRAINTS_MODE -INSTALL_PROVIDERS_FROM_SOURCES -INSTALL_SELECTED_PROVIDERS -USE_AIRFLOW_VERSION -USE_PACKAGES_FROM_DIST -ISSUE_ID -LOAD_DEFAULT_CONNECTIONS -LOAD_EXAMPLES -MYSQL_VERSION -NUM_RUNS -ONLY_MIN_VERSION_UPDATE -PACKAGE_FORMAT -POSTGRES_VERSION -PYTHONDONTWRITEBYTECODE -PYTHON_MAJOR_MINOR_VERSION -REGENERATE_MISSING_DOCS -REMOVE_ARM_PACKAGES -RUN_TESTS -RUN_SYSTEM_TESTS -SKIP_CONSTRAINTS -SKIP_ENVIRONMENT_INITIALIZATION -SKIP_SSH_SETUP -STANDALONE_DAG_PROCESSOR -START_AIRFLOW -SUSPENDED_PROVIDERS_FOLDERS -TEST_TYPE -UPGRADE_BOTO -UPGRADE_TO_NEWER_DEPENDENCIES -USE_XDIST -DOWNGRADE_SQLALCHEMY -VERBOSE -VERBOSE_COMMANDS -VERSION_SUFFIX_FOR_PYPI diff --git a/scripts/ci/docker-compose/base.yml b/scripts/ci/docker-compose/base.yml index 76e41adfcf039..dbed87be120df 100644 --- a/scripts/ci/docker-compose/base.yml +++ b/scripts/ci/docker-compose/base.yml @@ -23,78 +23,10 @@ services: - USER=root - ADDITIONAL_PATH=~/.local/bin - KUBECONFIG=/files/.kube/config - # We need all those env variables here because docker-compose-v2 does not really work well - # With env files and there are many problems with it: - - _AIRFLOW_RUN_DB_TESTS_ONLY=${_AIRFLOW_RUN_DB_TESTS_ONLY} - - _AIRFLOW_SKIP_DB_TESTS=${_AIRFLOW_SKIP_DB_TESTS} - - AIRFLOW_CI_IMAGE=${AIRFLOW_CI_IMAGE} - - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS} - - AIRFLOW_ENABLE_AIP_44=${AIRFLOW_ENABLE_AIP_44} - - AIRFLOW_ENV=${AIRFLOW_ENV} - - AIRFLOW_CONSTRAINTS_REFERENCE=${AIRFLOW_CONSTRAINTS_REFERENCE} - - AIRFLOW__CORE__EXECUTOR=${AIRFLOW__CORE__EXECUTOR} - - ANSWER=${ANSWER} - - BACKEND=${BACKEND} - - BASE_BRANCH=${BASE_BRANCH} - - BREEZE=${BREEZE} - - BREEZE_INIT_COMMAND=${BREEZE_INIT_COMMAND} - - CI=${CI} - - CI_BUILD_ID=${CI_BUILD_ID} - - CI_JOB_ID=${CI_JOB_ID} - - CI_EVENT_TYPE=${CI_EVENT_TYPE} - - CI_TARGET_REPO=${CI_TARGET_REPO} - - CI_TARGET_BRANCH=${CI_TARGET_BRANCH} - - COMMIT_SHA=${COMMIT_SHA} - - COLLECT_ONLY=${COLLECT_ONLY} - - DATABASE_ISOLATION=${DATABASE_ISOLATION} - - DB_RESET=${DB_RESET} - - DEFAULT_BRANCH=${DEFAULT_BRANCH} - - DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH} - - DEV_MODE=${DEV_MODE} - - DOCKER_IS_ROOTLESS=${DOCKER_IS_ROOTLESS} - - ENABLED_SYSTEMS=${ENABLED_SYSTEMS} - - GITHUB_ACTIONS=${GITHUB_ACTIONS} - - HELM_TEST_PACKAGE=${HELM_TEST_PACKAGE} - - HOST_USER_ID=${HOST_USER_ID} - - HOST_GROUP_ID=${HOST_GROUP_ID} - - HOST_OS=${HOST_OS} - - INIT_SCRIPT_FILE=${INIT_SCRIPT_FILE} - - INSTALL_AIRFLOW_VERSION=${INSTALL_AIRFLOW_VERSION} - - AIRFLOW_CONSTRAINTS_MODE=${AIRFLOW_CONSTRAINTS_MODE} - - INSTALL_PROVIDERS_FROM_SOURCES=${INSTALL_PROVIDERS_FROM_SOURCES} - - INSTALL_SELECTED_PROVIDERS=${INSTALL_SELECTED_PROVIDERS} - - USE_AIRFLOW_VERSION=${USE_AIRFLOW_VERSION} - - USE_PACKAGES_FROM_DIST=${USE_PACKAGES_FROM_DIST} - - ISSUE_ID=${ISSUE_ID} - - LOAD_DEFAULT_CONNECTIONS=${LOAD_DEFAULT_CONNECTIONS} - - LOAD_EXAMPLES=${LOAD_EXAMPLES} - - MYSQL_VERSION=${MYSQL_VERSION} - - NUM_RUNS=${NUM_RUNS} - - ONLY_MIN_VERSION_UPDATE=${ONLY_MIN_VERSION_UPDATE} - - PACKAGE_FORMAT=${PACKAGE_FORMAT} - - POSTGRES_VERSION=${POSTGRES_VERSION} - - PYTHONDONTWRITEBYTECODE=${PYTHONDONTWRITEBYTECODE} - - PYTHON_MAJOR_MINOR_VERSION=${PYTHON_MAJOR_MINOR_VERSION} - - REGENERATE_MISSING_DOCS=${REGENERATE_MISSING_DOCS} - - REMOVE_ARM_PACKAGES=${REMOVE_ARM_PACKAGES} - - RUN_TESTS=${RUN_TESTS} - - RUN_SYSTEM_TESTS=${RUN_SYSTEM_TESTS} - - SKIP_CONSTRAINTS=${SKIP_CONSTRAINTS} - - SKIP_ENVIRONMENT_INITIALIZATION=${SKIP_ENVIRONMENT_INITIALIZATION} - - SKIP_SSH_SETUP=${SKIP_SSH_SETUP} - - STANDALONE_DAG_PROCESSOR=${STANDALONE_DAG_PROCESSOR} - - START_AIRFLOW=${START_AIRFLOW} - - SUSPENDED_PROVIDERS_FOLDERS=${SUSPENDED_PROVIDERS_FOLDERS} - - TEST_TYPE=${TEST_TYPE} - - UPGRADE_BOTO=${UPGRADE_BOTO} - - USE_XDIST=${USE_XDIST} - - DOWNGRADE_SQLALCHEMY=${DOWNGRADE_SQLALCHEMY} - - UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES} - - VERBOSE=${VERBOSE} - - VERBOSE_COMMANDS=${VERBOSE_COMMANDS} - - VERSION_SUFFIX_FOR_PYPI=${VERSION_SUFFIX_FOR_PYPI} + env_file: + - _generated_docker_compose.env volumes: - # Pass docker to inside of the container so that Kind and Moto tests can use it. + # Pass docker to inside the container so that Kind and Moto tests can use it. # NOTE! Even if we are using "desktop-linux" context where "/var/run/docker.sock" is not used, # Docker engine works fine because "/var/run/docker.sock" is mounted at the VM and there # the /var/run/docker.sock is available. See https://github.com/docker/for-mac/issues/6545 diff --git a/scripts/ci/pre_commit/pre_commit_check_provider_yaml_files.py b/scripts/ci/pre_commit/pre_commit_check_provider_yaml_files.py index 400b958065ff7..c5126237d31d3 100755 --- a/scripts/ci/pre_commit/pre_commit_check_provider_yaml_files.py +++ b/scripts/ci/pre_commit/pre_commit_check_provider_yaml_files.py @@ -34,11 +34,14 @@ if __name__ == "__main__": sys.path.insert(0, str(AIRFLOW_SOURCES / "dev" / "breeze" / "src")) - from airflow_breeze.global_constants import MOUNT_SELECTED + from airflow_breeze.global_constants import DEFAULT_PYTHON_MAJOR_MINOR_VERSION, MOUNT_SELECTED + from airflow_breeze.params.shell_params import ShellParams from airflow_breeze.utils.console import get_console from airflow_breeze.utils.docker_command_utils import get_extra_docker_flags from airflow_breeze.utils.run_utils import get_ci_image_for_pre_commits, run_command + shell_params = ShellParams(python=DEFAULT_PYTHON_MAJOR_MINOR_VERSION) + cmd = "python3 /opt/airflow/scripts/in_container/run_provider_yaml_files_check.py" if len(sys.argv) > 1: cmd = cmd + " " + " ".join([shlex.quote(f) for f in sys.argv[1:]]) @@ -48,7 +51,7 @@ "docker", "run", "-t", - *get_extra_docker_flags(MOUNT_SELECTED), + *get_extra_docker_flags(mount_sources=MOUNT_SELECTED), "-e", "SKIP_ENVIRONMENT_INITIALIZATION=true", "-e", @@ -60,6 +63,7 @@ cmd, ], check=False, + env=shell_params.env_variables_for_docker_commands, ) if cmd_result.returncode != 0: get_console().print( diff --git a/scripts/ci/pre_commit/pre_commit_migration_reference.py b/scripts/ci/pre_commit/pre_commit_migration_reference.py index 2f508aaf3a252..9dec2e33d94e5 100755 --- a/scripts/ci/pre_commit/pre_commit_migration_reference.py +++ b/scripts/ci/pre_commit/pre_commit_migration_reference.py @@ -33,18 +33,20 @@ if __name__ == "__main__": sys.path.insert(0, str(AIRFLOW_SOURCES / "dev" / "breeze" / "src")) - from airflow_breeze.global_constants import MOUNT_SELECTED + from airflow_breeze.global_constants import DEFAULT_PYTHON_MAJOR_MINOR_VERSION, MOUNT_SELECTED + from airflow_breeze.params.shell_params import ShellParams from airflow_breeze.utils.console import get_console from airflow_breeze.utils.docker_command_utils import get_extra_docker_flags from airflow_breeze.utils.run_utils import get_ci_image_for_pre_commits, run_command + shell_params = ShellParams(python=DEFAULT_PYTHON_MAJOR_MINOR_VERSION, db_reset=True, backend="none") airflow_image = get_ci_image_for_pre_commits() cmd_result = run_command( [ "docker", "run", "-t", - *get_extra_docker_flags(MOUNT_SELECTED), + *get_extra_docker_flags(mount_sources=MOUNT_SELECTED), "-e", "SKIP_ENVIRONMENT_INITIALIZATION=true", "--pull", @@ -54,6 +56,7 @@ "python3 /opt/airflow/scripts/in_container/run_migration_reference.py", ], check=False, + env=shell_params.env_variables_for_docker_commands, ) if cmd_result.returncode != 0: get_console().print( diff --git a/scripts/ci/pre_commit/pre_commit_mypy.py b/scripts/ci/pre_commit/pre_commit_mypy.py index d478a3c5eea02..35f8a687e80ac 100755 --- a/scripts/ci/pre_commit/pre_commit_mypy.py +++ b/scripts/ci/pre_commit/pre_commit_mypy.py @@ -39,17 +39,17 @@ from common_precommit_utils import filter_out_providers_on_non_main_branch sys.path.insert(0, str(AIRFLOW_SOURCES / "dev" / "breeze" / "src")) - from airflow_breeze.global_constants import MOUNT_SELECTED # isort: skip + from airflow_breeze.global_constants import DEFAULT_PYTHON_MAJOR_MINOR_VERSION, MOUNT_SELECTED + from airflow_breeze.params.shell_params import ShellParams from airflow_breeze.utils.console import get_console # isort: skip from airflow_breeze.utils.docker_command_utils import get_extra_docker_flags # isort: skip from airflow_breeze.utils.path_utils import create_mypy_volume_if_needed # isort: skip - from airflow_breeze.utils.packages import get_suspended_provider_folders from airflow_breeze.utils.run_utils import ( get_ci_image_for_pre_commits, run_command, ) - suspended_providers_folders = get_suspended_provider_folders() + shell_params = ShellParams(python=DEFAULT_PYTHON_MAJOR_MINOR_VERSION, backend="none") files_to_test = filter_out_providers_on_non_main_branch(sys.argv[1:]) if files_to_test == ["--namespace-packages"]: @@ -62,13 +62,9 @@ "docker", "run", "-t", - *get_extra_docker_flags(MOUNT_SELECTED, include_mypy_volume=True), + *get_extra_docker_flags(mount_sources=MOUNT_SELECTED, include_mypy_volume=True), "-e", "SKIP_ENVIRONMENT_INITIALIZATION=true", - "-e", - f"SUSPENDED_PROVIDERS_FOLDERS={' '.join(suspended_providers_folders)}", - "-e", - "BACKEND=sqlite", "--pull", "never", airflow_image, @@ -76,6 +72,7 @@ *files_to_test, ], check=False, + env=shell_params.env_variables_for_docker_commands, ) if cmd_result.returncode != 0: upgrading = os.environ.get("UPGRADE_TO_NEWER_DEPENDENCIES", "false") != "false" diff --git a/scripts/ci/pre_commit/pre_commit_update_er_diagram.py b/scripts/ci/pre_commit/pre_commit_update_er_diagram.py index d333f008e057c..89f3111bf92e3 100755 --- a/scripts/ci/pre_commit/pre_commit_update_er_diagram.py +++ b/scripts/ci/pre_commit/pre_commit_update_er_diagram.py @@ -27,31 +27,29 @@ f"To run this script, run the ./{__file__} command" ) -AIRFLOW_SOURCES = Path(__file__).parents[3].resolve() -GITHUB_REPOSITORY = os.environ.get("GITHUB_REPOSITORY", "apache/airflow") -os.environ["SKIP_GROUP_OUTPUT"] = "true" if __name__ == "__main__": + AIRFLOW_SOURCES = Path(__file__).parents[3].resolve() sys.path.insert(0, str(AIRFLOW_SOURCES / "dev" / "breeze" / "src")) - from airflow_breeze.global_constants import MOUNT_SELECTED + GITHUB_REPOSITORY = os.environ.get("GITHUB_REPOSITORY", "apache/airflow") + os.environ["SKIP_GROUP_OUTPUT"] = "true" + os.environ["SKIP_UPGRADE_CHECK"] = "true" + from airflow_breeze.global_constants import DEFAULT_PYTHON_MAJOR_MINOR_VERSION, MOUNT_SELECTED + from airflow_breeze.params.shell_params import ShellParams from airflow_breeze.utils.console import get_console from airflow_breeze.utils.docker_command_utils import ( get_extra_docker_flags, - update_expected_environment_variables, ) from airflow_breeze.utils.run_utils import get_ci_image_for_pre_commits, run_command - env = os.environ.copy() - env["DB_RESET"] = "true" - env["AIRFLOW__DATABASE__SQL_ALCHEMY_CONN"] = "sqlite:////root/airflow/airflow.db" - update_expected_environment_variables(env) + shell_params = ShellParams(python=DEFAULT_PYTHON_MAJOR_MINOR_VERSION, db_reset=True, backend="none") airflow_image = get_ci_image_for_pre_commits() cmd_result = run_command( [ "docker", "run", "-t", - *get_extra_docker_flags(MOUNT_SELECTED), + *get_extra_docker_flags(mount_sources=MOUNT_SELECTED), "-e", "AIRFLOW__DATABASE__SQL_ALCHEMY_CONN", "--pull", @@ -60,8 +58,8 @@ "-c", "python3 /opt/airflow/scripts/in_container/run_prepare_er_diagram.py", ], - env=env, check=False, + env=shell_params.env_variables_for_docker_commands, ) if cmd_result.returncode != 0: get_console().print( diff --git a/tests/cli/commands/test_celery_command.py b/tests/cli/commands/test_celery_command.py index 9527adf9be680..cbd87fea588d3 100644 --- a/tests/cli/commands/test_celery_command.py +++ b/tests/cli/commands/test_celery_command.py @@ -258,7 +258,7 @@ def test_run_command(self, mock_celery_app): mock_celery_app.start.assert_called_once_with( [ "flower", - "amqp://guest:guest@rabbitmq:5672/", + conf.get("celery", "BROKER_URL"), "--address=my-hostname", "--port=3333", "--broker-api=http://username:password@rabbitmq-server-name:15672/api/", @@ -313,7 +313,7 @@ def test_run_command_daemon(self, mock_celery_app, mock_daemon, mock_setup_locat mock_celery_app.start.assert_called_once_with( [ "flower", - "amqp://guest:guest@rabbitmq:5672/", + conf.get("celery", "BROKER_URL"), "--address=my-hostname", "--port=3333", "--broker-api=http://username:password@rabbitmq-server-name:15672/api/",