diff --git a/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py b/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py index 124d6ca31809b..2eecbfff3ca81 100644 --- a/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py @@ -455,7 +455,10 @@ def should_we_run_the_build(build_ci_params: BuildCiParams) -> bool: # We import those locally so that click autocomplete works from inputimeout import TimeoutOccurred - if not md5sum_check_if_build_is_needed(md5sum_cache_dir=build_ci_params.md5sum_cache_dir): + if not md5sum_check_if_build_is_needed( + md5sum_cache_dir=build_ci_params.md5sum_cache_dir, + skip_provider_dependencies_check=build_ci_params.skip_provider_dependencies_check, + ): return False try: answer = user_confirm( @@ -631,6 +634,7 @@ def rebuild_or_pull_ci_image_if_needed(command_params: ShellParams | BuildCiPara image_tag=command_params.image_tag, platform=command_params.platform, force_build=command_params.force_build, + skip_provider_dependencies_check=command_params.skip_provider_dependencies_check, ) if command_params.image_tag is not None and command_params.image_tag != "latest": return_code, message = run_pull_image( diff --git a/dev/breeze/src/airflow_breeze/commands/developer_commands.py b/dev/breeze/src/airflow_breeze/commands/developer_commands.py index 40fc0954780e9..671ced7d5a649 100644 --- a/dev/breeze/src/airflow_breeze/commands/developer_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/developer_commands.py @@ -515,6 +515,9 @@ def static_checks( force_build=force_build, image_tag=image_tag, github_repository=github_repository, + # for static checks we do not want to regenerate dependencies before pre-commits are run + # we want the pre-commit to do it for us (and detect the case the dependencies are updated) + skip_provider_dependencies_check=True, ) if not skip_image_check: rebuild_or_pull_ci_image_if_needed(command_params=build_params) diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index c0740801d59c0..6e07343aecc77 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -323,13 +323,13 @@ def get_airflow_extras(): "setup.cfg", "Dockerfile.ci", ".dockerignore", + "generated/provider_dependencies.json", "scripts/docker/common.sh", "scripts/docker/install_additional_dependencies.sh", "scripts/docker/install_airflow.sh", "scripts/docker/install_airflow_dependencies_from_branch_tip.sh", "scripts/docker/install_from_docker_context_files.sh", "scripts/docker/install_mysql.sh", - *ALL_PROVIDER_YAML_FILES, ] ENABLED_SYSTEMS = "" diff --git a/dev/breeze/src/airflow_breeze/params/build_ci_params.py b/dev/breeze/src/airflow_breeze/params/build_ci_params.py index 1f0663ec9f95c..1f49f0597b53c 100644 --- a/dev/breeze/src/airflow_breeze/params/build_ci_params.py +++ b/dev/breeze/src/airflow_breeze/params/build_ci_params.py @@ -37,6 +37,7 @@ class BuildCiParams(CommonBuildParams): airflow_pre_cached_pip_packages: bool = True force_build: bool = False eager_upgrade_additional_requirements: str = "" + skip_provider_dependencies_check: bool = False @property def airflow_version(self): diff --git a/dev/breeze/src/airflow_breeze/params/shell_params.py b/dev/breeze/src/airflow_breeze/params/shell_params.py index a70a404d0ed64..99bc8c4d3cfd5 100644 --- a/dev/breeze/src/airflow_breeze/params/shell_params.py +++ b/dev/breeze/src/airflow_breeze/params/shell_params.py @@ -125,6 +125,7 @@ class ShellParams: celery_flower: bool = False only_min_version_update: bool = False regenerate_missing_docs: bool = False + skip_provider_dependencies_check: bool = False def clone_with_test(self, test_type: str) -> ShellParams: new_params = deepcopy(self) diff --git a/dev/breeze/src/airflow_breeze/utils/md5_build_check.py b/dev/breeze/src/airflow_breeze/utils/md5_build_check.py index 235fbf22495b4..54b46c99164c0 100644 --- a/dev/breeze/src/airflow_breeze/utils/md5_build_check.py +++ b/dev/breeze/src/airflow_breeze/utils/md5_build_check.py @@ -20,11 +20,14 @@ from __future__ import annotations import hashlib +import os +import sys from pathlib import Path -from airflow_breeze.global_constants import FILES_FOR_REBUILD_CHECK +from airflow_breeze.global_constants import ALL_PROVIDER_YAML_FILES, FILES_FOR_REBUILD_CHECK from airflow_breeze.utils.console import get_console from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT +from airflow_breeze.utils.run_utils import run_command def check_md5checksum_in_cache_modified(file_hash: str, cache_path: Path, update: bool) -> bool: @@ -59,8 +62,19 @@ def generate_md5(filename, file_size: int = 65536): return hash_md5.hexdigest() +def check_md5_sum_for_file(file_to_check: str, md5sum_cache_dir: Path, update: bool): + file_to_get_md5 = AIRFLOW_SOURCES_ROOT / file_to_check + md5_checksum = generate_md5(file_to_get_md5) + sub_dir_name = file_to_get_md5.parts[-2] + actual_file_name = file_to_get_md5.parts[-1] + cache_file_name = Path(md5sum_cache_dir, sub_dir_name + "-" + actual_file_name + ".md5sum") + file_content = md5_checksum + " " + str(file_to_get_md5) + "\n" + is_modified = check_md5checksum_in_cache_modified(file_content, cache_file_name, update=update) + return is_modified + + def calculate_md5_checksum_for_files( - md5sum_cache_dir: Path, update: bool = False + md5sum_cache_dir: Path, update: bool = False, skip_provider_dependencies_check: bool = False ) -> tuple[list[str], list[str]]: """ Calculates checksums for all interesting files and stores the hashes in the md5sum_cache_dir. @@ -68,35 +82,63 @@ def calculate_md5_checksum_for_files( :param md5sum_cache_dir: directory where to store cached information :param update: whether to update the hashes + :param skip_provider_dependencies_check: whether to skip regeneration of the provider dependencies :return: Tuple of two lists: modified and not-modified files """ not_modified_files = [] modified_files = [] - for calculate_md5_file in FILES_FOR_REBUILD_CHECK: - file_to_get_md5 = AIRFLOW_SOURCES_ROOT / calculate_md5_file - md5_checksum = generate_md5(file_to_get_md5) - sub_dir_name = file_to_get_md5.parts[-2] - actual_file_name = file_to_get_md5.parts[-1] - cache_file_name = Path(md5sum_cache_dir, sub_dir_name + "-" + actual_file_name + ".md5sum") - file_content = md5_checksum + " " + str(file_to_get_md5) + "\n" - is_modified = check_md5checksum_in_cache_modified(file_content, cache_file_name, update=update) + if not skip_provider_dependencies_check: + modified_provider_yaml_files = [] + for file in ALL_PROVIDER_YAML_FILES: + # Only check provider yaml files once and save the result immediately. + # If we need to regenerate the dependencies and they are not modified then + # all is fine and we can save checksums for the new files + if check_md5_sum_for_file(file, md5sum_cache_dir, True): + modified_provider_yaml_files.append(file) + if modified_provider_yaml_files: + get_console().print( + "[info]Attempting to generate provider dependencies. " + "Provider yaml files changed since last check:[/]" + ) + get_console().print( + [os.fspath(file.relative_to(AIRFLOW_SOURCES_ROOT)) for file in modified_provider_yaml_files] + ) + # Regenerate provider_dependencies.json + run_command( + [ + sys.executable, + os.fspath( + AIRFLOW_SOURCES_ROOT + / "scripts" + / "ci" + / "pre_commit" + / "pre_commit_update_providers_dependencies.py" + ), + ], + cwd=AIRFLOW_SOURCES_ROOT, + ) + for file in FILES_FOR_REBUILD_CHECK: + is_modified = check_md5_sum_for_file(file, md5sum_cache_dir, update) if is_modified: - modified_files.append(calculate_md5_file) + modified_files.append(file) else: - not_modified_files.append(calculate_md5_file) + not_modified_files.append(file) return modified_files, not_modified_files -def md5sum_check_if_build_is_needed(md5sum_cache_dir: Path) -> bool: +def md5sum_check_if_build_is_needed(md5sum_cache_dir: Path, skip_provider_dependencies_check: bool) -> bool: """ Checks if build is needed based on whether important files were modified. :param md5sum_cache_dir: directory where cached md5 sums are stored + :param skip_provider_dependencies_check: whether to skip regeneration of the provider dependencies :return: True if build is needed. """ build_needed = False - modified_files, not_modified_files = calculate_md5_checksum_for_files(md5sum_cache_dir, update=False) + modified_files, not_modified_files = calculate_md5_checksum_for_files( + md5sum_cache_dir, update=False, skip_provider_dependencies_check=skip_provider_dependencies_check + ) if modified_files: get_console().print( f"[warning]The following important files are modified in {AIRFLOW_SOURCES_ROOT} " diff --git a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py index e2fc0ca713362..b63a60b14b224 100755 --- a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py +++ b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py @@ -209,12 +209,28 @@ def check_if_different_provider_used(file_path: Path) -> None: console.print("[red]Errors found during verification. Exiting!") console.print() sys.exit(1) - DEPENDENCIES_JSON_FILE_PATH.write_text(json.dumps(unique_sorted_dependencies, indent=2) + "\n") - console.print( - f"[yellow]If you see changes to the {DEPENDENCIES_JSON_FILE_PATH} file - " - f"do not modify the file manually. Let pre-commit do the job!" - ) - console.print() - console.print("[green]Verification complete! Success!\n") - console.print(f"Written {DEPENDENCIES_JSON_FILE_PATH}") - console.print() + old_dependencies = DEPENDENCIES_JSON_FILE_PATH.read_text() + new_dependencies = json.dumps(unique_sorted_dependencies, indent=2) + "\n" + if new_dependencies != old_dependencies: + DEPENDENCIES_JSON_FILE_PATH.write_text(json.dumps(unique_sorted_dependencies, indent=2) + "\n") + if os.environ.get("CI"): + console.print() + console.print(f"[info]Written {DEPENDENCIES_JSON_FILE_PATH}") + console.print( + f"[yellow]You will need to run breeze locally and commit " + f"{DEPENDENCIES_JSON_FILE_PATH.relative_to(AIRFLOW_SOURCES_ROOT)}!\n" + ) + console.print() + else: + console.print() + console.print( + f"[yellow]Regenerated new dependencies. Please commit " + f"{DEPENDENCIES_JSON_FILE_PATH.relative_to(AIRFLOW_SOURCES_ROOT)}!\n" + ) + console.print(f"[info]Written {DEPENDENCIES_JSON_FILE_PATH}") + console.print() + else: + console.print( + "[green]No need to regenerate dependencies!\n[/]" + f"The {DEPENDENCIES_JSON_FILE_PATH.relative_to(AIRFLOW_SOURCES_ROOT)} is up to date!\n" + )