From 463737910a92505a2bf6ad7390daade3e6f7b345 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Sun, 12 Nov 2023 20:40:44 +0100 Subject: [PATCH] Move prepare-provider-packages to be run entirely in Breeze Python This is a follow-up after #35586 and it depends on this one. It moves the whole functionality of preparing provider packages to breeze, removing the need of doing it in the Breeze CI image. Since we have Python breeze with its own environment managed via `pipx` we can now make sure that all the necessary packages are installed in this environment and run package building in the same environment Breeze uses. Previously we have been running all the package building inside the CI image for two reasons: * we could rely on the same version of build tools (wheel/setuptools) being installed in the CI image * security of the provider package preparation that used setuptools pre PEP-517 way of building packages that executed setup.py code In order to isolate execution of potentially arbitrary code in setup.py from the HOST environment in CI - where the host environment might have access to secrets and tokens that would allow it to break out of the sandbox for PRs coming from forks. The setup.py file has been prepared by breeze using JINJA templates but it was potentially possible to manipulate provider package directory structure and get "Python" injection into generated setup.py, so it was safer to run it in the isolated Breeze CI environment. This PR makes it secure to run it in the Host environment, because instead of generating setup.cfg and setup.py we generate pyproject.toml with all the necessary information and we are using PEP-517 compliant way of building provider packages - no arbitrary code executed via setup.py is possible this way on the host, so we can safely build provider packages in the host. We are generating declarative pyproject.toml for that rather than imperative setup.py, so we are safe to run the build process in the host without being afraid of executing arbitrary code. We are using flit as build tool - this is one of the popular build tools - created by Python Packaging team. It is simple and not too opinionated, it supports PEP-517 as well as PEP-621, so most of the project mnetadata in pyproject toml can be added to PEP-621 compliant "project" section of pyproject.toml. Together with the change we improves the process of generation of the extracted sources for the providers. Originally we copied the whole sources of Airflow to a single directory (provider_packages) and run sequentially provider packages building from that single directory, however it made it impossible to parallelise such builds - all providers had to be built sequentially. We change the approach now - instead of copying all airflow sources once to the single directory, we build providers in separate subdirectories of files/provider_packages/PROVIDER_ID and we only copy there relevant sources (i.e. only provider's subfolder from the "airflow/providers". This is quite a bit faster (each provider only gets built using only its own sources so just scanning the directory is faster) but it also allows to run package preparation in parallel because each provider is fully isolated from others. This PR also excludes not-needed `prepare_providers_package.py` and unneded `provider_packages` folder used to prepare providers before as well as bash script to build the providers and some unused bash functions. --- .rat-excludes | 3 + NOTICE | 2 +- airflow/www/webpack.config.js | 2 +- chart/NOTICE | 2 +- dev/breeze/README.md | 2 +- dev/breeze/pyproject.toml | 13 +- .../commands/release_management_commands.py | 226 ++- .../release_management_commands_config.py | 4 +- .../provider_documentation.py | 217 +-- .../prepare_providers/provider_packages.py | 245 ++++ .../templates/CHANGELOG_TEMPLATE.rst.jinja2 | 6 - .../PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2 | 8 +- .../PROVIDER_COMMITS_TEMPLATE.rst.jinja2 | 14 +- .../PROVIDER_INDEX_TEMPLATE.rst.jinja2 | 3 - .../PROVIDER_README_TEMPLATE.rst.jinja2 | 12 +- .../PROVIDER__INIT__PY_TEMPLATE.py.jinja2 | 3 +- .../UPDATE_CHANGELOG_TEMPLATE.rst.jinja2 | 6 - .../get_provider_info_TEMPLATE.py.jinja2 | 23 +- .../templates/pyproject_TEMPLATE.toml.jinja2 | 109 ++ .../src/airflow_breeze/utils/packages.py | 243 +++- .../src/airflow_breeze/utils/versions.py | 6 + dev/breeze/tests/test_packages.py | 193 ++- .../tests/test_provider_documentation.py | 102 +- .../MANIFEST_TEMPLATE.in.jinja2 | 36 - .../PROVIDER_README_TEMPLATE.rst.jinja2 | 109 -- .../SETUP_TEMPLATE.cfg.jinja2 | 84 -- .../SETUP_TEMPLATE.py.jinja2 | 47 - .../prepare_provider_packages.py | 1277 ----------------- ...e-management_prepare-provider-packages.svg | 46 +- ...e-management_prepare-provider-packages.txt | 2 +- provider_packages/.gitignore | 7 - provider_packages/INSTALL | 15 - provider_packages/LICENSE | 201 --- provider_packages/NOTICE | 6 - provider_packages/dist | 1 - provider_packages/pyproject.toml | 1 - scripts/in_container/_in_container_utils.sh | 96 -- .../run_prepare_provider_packages.sh | 149 -- 38 files changed, 1024 insertions(+), 2497 deletions(-) create mode 100644 dev/breeze/src/airflow_breeze/prepare_providers/provider_packages.py rename dev/{provider_packages => breeze/src/airflow_breeze/templates}/get_provider_info_TEMPLATE.py.jinja2 (50%) create mode 100644 dev/breeze/src/airflow_breeze/templates/pyproject_TEMPLATE.toml.jinja2 delete mode 100644 dev/provider_packages/MANIFEST_TEMPLATE.in.jinja2 delete mode 100644 dev/provider_packages/PROVIDER_README_TEMPLATE.rst.jinja2 delete mode 100644 dev/provider_packages/SETUP_TEMPLATE.cfg.jinja2 delete mode 100644 dev/provider_packages/SETUP_TEMPLATE.py.jinja2 delete mode 100755 dev/provider_packages/prepare_provider_packages.py delete mode 100644 provider_packages/.gitignore delete mode 100644 provider_packages/INSTALL delete mode 100644 provider_packages/LICENSE delete mode 100644 provider_packages/NOTICE delete mode 120000 provider_packages/dist delete mode 120000 provider_packages/pyproject.toml delete mode 100755 scripts/in_container/run_prepare_provider_packages.sh diff --git a/.rat-excludes b/.rat-excludes index b37f97594cc84..751742b1afc59 100644 --- a/.rat-excludes +++ b/.rat-excludes @@ -142,3 +142,6 @@ doap_airflow.rdf # nvm (Node Version Manager) .nvmrc + +# PKG-INFO file +PKG-INFO diff --git a/NOTICE b/NOTICE index 84c77cd4fc12c..33371e44a76a4 100644 --- a/NOTICE +++ b/NOTICE @@ -1,5 +1,5 @@ Apache Airflow -Copyright 2016-2021 The Apache Software Foundation +Copyright 2016-2023 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/airflow/www/webpack.config.js b/airflow/www/webpack.config.js index fc5c6a6497679..6ac1f3a208890 100644 --- a/airflow/www/webpack.config.js +++ b/airflow/www/webpack.config.js @@ -40,7 +40,7 @@ const BUILD_DIR = path.resolve(__dirname, "./static/dist"); // Convert licenses json into a standard format for LICENSES.txt const formatLicenses = (packages) => { let text = `Apache Airflow -Copyright 2016-2021 The Apache Software Foundation +Copyright 2016-2023 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/chart/NOTICE b/chart/NOTICE index 3f68897ba6559..ff6e647c9ebed 100644 --- a/chart/NOTICE +++ b/chart/NOTICE @@ -1,5 +1,5 @@ Apache Airflow -Copyright 2016-2021 The Apache Software Foundation +Copyright 2016-2023 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/dev/breeze/README.md b/dev/breeze/README.md index 06ddef4a32e9e..932e490678407 100644 --- a/dev/breeze/README.md +++ b/dev/breeze/README.md @@ -66,6 +66,6 @@ PLEASE DO NOT MODIFY THE HASH BELOW! IT IS AUTOMATICALLY UPDATED BY PRE-COMMIT. --------------------------------------------------------------------------------------------------------- -Package config hash: 51d9c2ec8af90c2941d58cf28397e9972d31718bc5d74538eb0614ed9418310e7b1d14bb3ee11f4df6e8403390869838217dc641cdb1416a223b7cf69adf1b20 +Package config hash: 772a3746780de67ba5c0198928b5f6d7461eb6ae3b9537665dab9c504515290d975bb52ef6b3bacc04148702402ab9b02c6e529910f2f727771df56ef56c1554 --------------------------------------------------------------------------------------------------------- diff --git a/dev/breeze/pyproject.toml b/dev/breeze/pyproject.toml index 47d7c04dc1734..c160576653925 100644 --- a/dev/breeze/pyproject.toml +++ b/dev/breeze/pyproject.toml @@ -48,26 +48,25 @@ dependencies = [ "black>=23.11.0", "click>=8.1.7", "filelock>=3.13.0", + "flit>=3.5.0", + "gitpython>=3.1.40", "inputimeout>=1.0.4", "jinja2>=3.1.0", + "jsonschema>=4.19.1", "packaging>=23.2", "pendulum>=2.1.2,<3", "pre-commit>=3.5.0", "psutil>=5.9.6", - "pytest>=7.4.0", + "pygithub>=2.1.1", "pytest-xdist>=3.3.1", + "pytest>=7.4.0", "pyyaml>=6.0.1", - "PyGithub>=2.1.1", "requests>=2.30.0", - "rich>=13.6.0", "rich-click>=1.7.1", - "gitpython>=3.1.40", + "rich>=13.6.0", "semver>=3.0.2", "tabulate>=0.9.0", "twine>=4.0.2", - "wheel>=0.41.3", - "setuptools>=68.2.2", - "jsonschema>=4.19.1", ] [project.scripts] diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py index d9206496c7014..cdc40645ab01f 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py @@ -27,7 +27,7 @@ from datetime import datetime from pathlib import Path from subprocess import DEVNULL -from typing import IO, Generator, NamedTuple +from typing import IO, Any, Generator, NamedTuple import click from rich.progress import Progress @@ -45,6 +45,18 @@ MULTI_PLATFORM, ) from airflow_breeze.params.shell_params import ShellParams +from airflow_breeze.prepare_providers.provider_packages import ( + PrepareReleasePackageErrorBuildingPackageException, + PrepareReleasePackageTagExistException, + PrepareReleasePackageWrongSetupException, + build_provider_package, + cleanup_build_remnants, + copy_provider_sources_to_target, + generate_build_files, + get_packages_list_to_act_on, + move_built_packages_and_cleanup, + should_skip_the_package, +) from airflow_breeze.utils.add_back_references import ( start_generating_back_references, ) @@ -90,12 +102,13 @@ ) from airflow_breeze.utils.github import download_constraints_file, get_active_airflow_versions from airflow_breeze.utils.packages import ( + PackageSuspendedException, expand_all_provider_packages, find_matching_long_package_names, get_available_packages, get_provider_details, get_provider_packages_metadata, - get_removed_provider_ids, + make_sure_remote_apache_exists_and_fetch, ) from airflow_breeze.utils.parallel import ( GenericRegexpProgressMatcher, @@ -226,9 +239,9 @@ def prepare_airflow_packages( sys.exit(result_command.returncode) -def provider_documentation_summary(documentation: str, message_type: MessageType, packages: list[str]): +def provider_action_summary(description: str, message_type: MessageType, packages: list[str]): if packages: - get_console().print(f"{documentation}: {len(packages)}\n") + get_console().print(f"{description}: {len(packages)}\n") get_console().print(f"[{message_type.value}]{' '.join(packages)}") get_console().print() @@ -287,7 +300,6 @@ def prepare_provider_documentation( PrepareReleaseDocsNoChangesException, PrepareReleaseDocsUserQuitException, PrepareReleaseDocsUserSkippedException, - make_sure_remote_apache_exists_and_fetch, update_changelog, update_min_airflow_version, update_release_notes, @@ -300,7 +312,6 @@ def prepare_provider_documentation( if not skip_git_fetch: run_command(["git", "remote", "rm", "apache-https-for-providers"], check=False, stderr=DEVNULL) make_sure_remote_apache_exists_and_fetch(github_repository=github_repository) - provider_packages_metadata = get_provider_packages_metadata() no_changes_packages = [] doc_only_packages = [] error_packages = [] @@ -308,76 +319,62 @@ def prepare_provider_documentation( success_packages = [] suspended_packages = [] removed_packages = [] - for provider_package_id in provider_packages: - provider_metadata = provider_packages_metadata.get(provider_package_id) - if not provider_metadata: - get_console().print( - f"[error]The package {provider_package_id} is not a provider package. Exiting[/]" - ) - sys.exit(1) - if provider_metadata.get("removed", False): - get_console().print( - f"[warning]The package: {provider_package_id} is scheduled for removal, but " - f"since you asked for it, it will be built [/]\n" - ) - elif provider_metadata.get("suspended"): - get_console().print( - f"[warning]The package: {provider_package_id} is suspended " f"skipping it [/]\n" - ) - suspended_packages.append(provider_package_id) - continue + for provider_id in provider_packages: + provider_metadata = basic_provider_checks(provider_id) if os.environ.get("GITHUB_ACTIONS", "false") != "true": get_console().print("-" * get_console().width) try: with_breaking_changes = False maybe_with_new_features = False - with ci_group(f"Update release notes for package '{provider_package_id}' "): + with ci_group(f"Update release notes for package '{provider_id}' "): get_console().print("Updating documentation for the latest release version.") if not only_min_version_update: with_breaking_changes, maybe_with_new_features = update_release_notes( - provider_package_id, + provider_id, reapply_templates_only=reapply_templates_only, base_branch=base_branch, regenerate_missing_docs=reapply_templates_only, non_interactive=non_interactive, ) update_min_airflow_version( - provider_package_id=provider_package_id, + provider_package_id=provider_id, with_breaking_changes=with_breaking_changes, maybe_with_new_features=maybe_with_new_features, ) - with ci_group(f"Updates changelog for last release of package '{provider_package_id}'"): + with ci_group(f"Updates changelog for last release of package '{provider_id}'"): update_changelog( - package_id=provider_package_id, + package_id=provider_id, base_branch=base_branch, reapply_templates_only=reapply_templates_only, with_breaking_changes=with_breaking_changes, maybe_with_new_features=maybe_with_new_features, ) except PrepareReleaseDocsNoChangesException: - no_changes_packages.append(provider_package_id) + no_changes_packages.append(provider_id) except PrepareReleaseDocsChangesOnlyException: - doc_only_packages.append(provider_package_id) + doc_only_packages.append(provider_id) except PrepareReleaseDocsErrorOccurredException: - error_packages.append(provider_package_id) + error_packages.append(provider_id) except PrepareReleaseDocsUserSkippedException: - user_skipped_packages.append(provider_package_id) + user_skipped_packages.append(provider_id) + except PackageSuspendedException: + suspended_packages.append(provider_id) except PrepareReleaseDocsUserQuitException: break else: if provider_metadata.get("removed"): - removed_packages.append(provider_package_id) + removed_packages.append(provider_id) else: - success_packages.append(provider_package_id) + success_packages.append(provider_id) get_console().print() - get_console().print("\n[info]Summary of prepared packages:\n") - provider_documentation_summary("Success", MessageType.SUCCESS, success_packages) - provider_documentation_summary("Scheduled for removal", MessageType.SUCCESS, removed_packages) - provider_documentation_summary("Docs only", MessageType.SUCCESS, doc_only_packages) - provider_documentation_summary("Skipped on no changes", MessageType.WARNING, no_changes_packages) - provider_documentation_summary("Suspended", MessageType.WARNING, suspended_packages) - provider_documentation_summary("Skipped by user", MessageType.SPECIAL, user_skipped_packages) - provider_documentation_summary("Errors", MessageType.ERROR, error_packages) + get_console().print("\n[info]Summary of prepared documentation:\n") + provider_action_summary("Success", MessageType.SUCCESS, success_packages) + provider_action_summary("Scheduled for removal", MessageType.SUCCESS, removed_packages) + provider_action_summary("Docs only", MessageType.SUCCESS, doc_only_packages) + provider_action_summary("Skipped on no changes", MessageType.WARNING, no_changes_packages) + provider_action_summary("Suspended", MessageType.WARNING, suspended_packages) + provider_action_summary("Skipped by user", MessageType.SPECIAL, user_skipped_packages) + provider_action_summary("Errors", MessageType.ERROR, error_packages) if error_packages: get_console().print("\n[errors]There were errors when generating packages. Exiting!\n") sys.exit(1) @@ -386,10 +383,27 @@ def prepare_provider_documentation( sys.exit(0) get_console().print("\n[success]Successfully prepared documentation for packages!\n\n") get_console().print( - "\n[info]Please review the updated files, classify " "the changelog entries and commit the changes.\n" + "\n[info]Please review the updated files, classify the changelog entries and commit the changes.\n" ) +def basic_provider_checks(provider_package_id: str) -> dict[str, Any]: + provider_packages_metadata = get_provider_packages_metadata() + provider_metadata = provider_packages_metadata.get(provider_package_id) + if not provider_metadata: + get_console().print(f"[error]The package {provider_package_id} is not a provider package. Exiting[/]") + sys.exit(1) + if provider_metadata.get("removed", False): + get_console().print( + f"[warning]The package: {provider_package_id} is scheduled for removal, but " + f"since you asked for it, it will be built [/]\n" + ) + elif provider_metadata.get("suspended"): + get_console().print(f"[warning]The package: {provider_package_id} is suspended " f"skipping it [/]\n") + raise PackageSuspendedException() + return provider_metadata + + @release_management.command( name="prepare-provider-packages", help="Prepare sdist/whl packages of Airflow Providers.", @@ -401,48 +415,110 @@ def prepare_provider_documentation( type=click.File("rt"), help="Read list of packages from text file (one package per line).", ) -@option_debug_release_management -@argument_provider_packages +@click.option( + "--skip-tag-check", + default=False, + is_flag=True, + help="Skip checking if the tag already exists in the remote repository", +) +@click.option( + "--skip-deleting-generated-files", + default=False, + is_flag=True, + help="Skip deleting files that were used to generate provider package. Useful for debugging and " + "developing changes to the build process.", +) +@click.option( + "--clean-dist", + default=False, + is_flag=True, + help="Clean dist directory before building packages. Useful when you want to build multiple packages " + " in a clean environment", +) @option_github_repository +@argument_provider_packages @option_verbose @option_dry_run def prepare_provider_packages( package_format: str, version_suffix_for_pypi: str, - package_list_file: IO, - debug: bool, - provider_packages: tuple[str, ...], + package_list_file: IO | None, + skip_tag_check: bool, + skip_deleting_generated_files: bool, + clean_dist: bool, github_repository: str, + provider_packages: tuple[str, ...], ): perform_environment_checks() cleanup_python_generated_files() - packages_list = list(provider_packages) - - removed_provider_ids = get_removed_provider_ids() - if package_list_file: - packages_list.extend( - [ - package.strip() - for package in package_list_file.readlines() - if package.strip() not in removed_provider_ids - ] - ) - shell_params = ShellParams( - mount_sources=MOUNT_ALL, - github_repository=github_repository, - python=DEFAULT_PYTHON_MAJOR_MINOR_VERSION, - package_format=package_format, - skip_environment_initialization=True, - version_suffix_for_pypi=version_suffix_for_pypi, - ) - rebuild_or_pull_ci_image_if_needed(command_params=shell_params) - cmd_to_run = ["/opt/airflow/scripts/in_container/run_prepare_provider_packages.sh", *packages_list] - result_command = run_docker_command_with_debug( - params=shell_params, - command=cmd_to_run, - debug=debug, + packages_list = get_packages_list_to_act_on(package_list_file, provider_packages) + if not skip_tag_check: + run_command(["git", "remote", "rm", "apache-https-for-providers"], check=False, stderr=DEVNULL) + make_sure_remote_apache_exists_and_fetch(github_repository=github_repository) + success_packages = [] + skipped_as_already_released_packages = [] + suspended_packages = [] + wrong_setup_packages = [] + error_packages = [] + if clean_dist: + get_console().print("\n[warning]Cleaning dist directory before building packages[/]\n") + shutil.rmtree(DIST_DIR, ignore_errors=True) + DIST_DIR.mkdir(parents=True, exist_ok=True) + for provider_id in packages_list: + try: + basic_provider_checks(provider_id) + if not skip_tag_check and should_skip_the_package(provider_id, version_suffix_for_pypi): + continue + get_console().print() + with ci_group(f"Preparing provider package [special]{provider_id}"): + get_console().print() + target_provider_root_sources_path = copy_provider_sources_to_target(provider_id) + generate_build_files( + provider_id=provider_id, + version_suffix=version_suffix_for_pypi, + target_provider_root_sources_path=target_provider_root_sources_path, + ) + cleanup_build_remnants(target_provider_root_sources_path) + build_provider_package( + provider_id=provider_id, + version_suffix=version_suffix_for_pypi, + package_format=package_format, + target_provider_root_sources_path=target_provider_root_sources_path, + ) + move_built_packages_and_cleanup( + target_provider_root_sources_path, DIST_DIR, skip_cleanup=skip_deleting_generated_files + ) + except PrepareReleasePackageTagExistException: + skipped_as_already_released_packages.append(provider_id) + except PrepareReleasePackageWrongSetupException: + wrong_setup_packages.append(provider_id) + except PrepareReleasePackageErrorBuildingPackageException: + error_packages.append(provider_id) + except PackageSuspendedException: + suspended_packages.append(provider_id) + else: + get_console().print(f"\n[success]Generated package [special]{provider_id}") + success_packages.append(provider_id) + get_console().print() + get_console().print("\n[info]Summary of prepared packages:\n") + provider_action_summary("Success", MessageType.SUCCESS, success_packages) + provider_action_summary( + "Skipped as already released", MessageType.SUCCESS, skipped_as_already_released_packages ) - sys.exit(result_command.returncode) + provider_action_summary("Suspended", MessageType.WARNING, suspended_packages) + provider_action_summary("Wrong setup generated", MessageType.ERROR, wrong_setup_packages) + provider_action_summary("Errors", MessageType.ERROR, error_packages) + if error_packages or wrong_setup_packages: + get_console().print("\n[errors]There were errors when generating packages. Exiting!\n") + sys.exit(1) + if not success_packages and not skipped_as_already_released_packages: + get_console().print("\n[warning]No packages prepared!\n") + sys.exit(0) + get_console().print("\n[success]Successfully built packages!\n\n") + get_console().print("\n[info]Packages available in dist:\n") + for file in sorted(DIST_DIR.glob("apache*")): + get_console().print(file.name) + get_console().print() def run_generate_constraints( diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py index ad658d228ce62..a5b84869b416f 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py @@ -107,8 +107,10 @@ "options": [ "--package-format", "--version-suffix-for-pypi", + "--clean-dist", + "--skip-tag-check", + "--skip-deleting-generated-files", "--package-list-file", - "--debug", "--github-repository", ], } diff --git a/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py b/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py index 5cddc80d921a1..3c0370f33e382 100644 --- a/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py +++ b/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py @@ -28,33 +28,27 @@ from enum import Enum from pathlib import Path from shutil import copyfile -from typing import Any, Iterable, NamedTuple +from typing import Any, NamedTuple import jinja2 import semver -from packaging.requirements import Requirement from rich.syntax import Syntax -from airflow_breeze.global_constants import PROVIDER_DEPENDENCIES from airflow_breeze.utils.black_utils import black_format from airflow_breeze.utils.confirm import Answer, user_confirm from airflow_breeze.utils.console import get_console from airflow_breeze.utils.packages import ( + HTTPS_REMOTE, ProviderPackageDetails, get_provider_details, get_provider_jinja_context, get_provider_packages_metadata, - get_provider_requirements, - get_removed_provider_ids, get_source_package_path, -) -from airflow_breeze.utils.path_utils import ( - BREEZE_SOURCES_ROOT, + render_template, ) from airflow_breeze.utils.run_utils import run_command from airflow_breeze.utils.shared_options import get_verbose - -HTTPS_REMOTE = "apache-https-for-providers" +from airflow_breeze.utils.versions import get_version_tag PR_PATTERN = re.compile(r".*\(#(\d+)\)") @@ -116,34 +110,6 @@ class Change(NamedTuple): pr: str | None -class PipRequirements(NamedTuple): - """Store details about python packages""" - - package: str - version_required: str - - @classmethod - def from_requirement(cls, requirement_string: str) -> PipRequirements: - req = Requirement(requirement_string) - - package = req.name - if req.extras: - # Sort extras by name - package += f"[{','.join(sorted(req.extras))}]" - - version_required = "" - if req.specifier: - # String representation of `packaging.specifiers.SpecifierSet` sorted by the operator - # which might not looking good, e.g. '>=5.3.0,<6,!=5.3.3,!=5.3.2' transform into the - # '!=5.3.3,!=5.3.2,<6,>=5.3.0'. Instead of that we sort by version and resulting string would be - # '>=5.3.0,!=5.3.2,!=5.3.3,<6' - version_required = ",".join(map(str, sorted(req.specifier, key=lambda spec: spec.version))) - if req.marker: - version_required += f"; {req.marker}" - - return cls(package=package, version_required=version_required.strip()) - - class TypeOfChange(Enum): DOCUMENTATION = "d" BUGFIX = "b" @@ -191,69 +157,6 @@ class PrepareReleaseDocsUserQuitException(Exception): } -def make_sure_remote_apache_exists_and_fetch(github_repository: str = "apache/airflow"): - """Make sure that apache remote exist in git. - - We need to take a log from the apache repository main branch - not locally because we might - not have the latest version. Also, the local repo might be shallow, so we need to - un-shallow it to see all the history. - - This will: - * check if the remote exists and add if it does not - * check if the local repo is shallow, mark it to un-shallow in this case - * fetch from the remote including all tags and overriding local tags in case - they are set differently - - """ - try: - run_command(["git", "remote", "get-url", HTTPS_REMOTE], text=True, capture_output=True) - except subprocess.CalledProcessError as ex: - if ex.returncode == 128 or ex.returncode == 2: - run_command( - [ - "git", - "remote", - "add", - HTTPS_REMOTE, - f"https://github.com/{github_repository}.git", - ], - check=True, - ) - else: - get_console().print( - f"[error]Error {ex}[/]\n" f"[error]When checking if {HTTPS_REMOTE} is set.[/]\n\n" - ) - sys.exit(1) - get_console().print("[info]Fetching full history and tags from remote.") - get_console().print("[info]This might override your local tags!") - result = run_command( - ["git", "rev-parse", "--is-shallow-repository"], - check=True, - capture_output=True, - text=True, - ) - is_shallow_repo = result.stdout.strip() == "true" - fetch_command = ["git", "fetch", "--tags", "--force", HTTPS_REMOTE] - if is_shallow_repo: - fetch_command.append("--unshallow") - try: - run_command(fetch_command) - except subprocess.CalledProcessError as e: - get_console().print( - f"[error]Error {e}[/]\n" - f"[error]When fetching tags from remote. Your tags might not be refreshed.[/]\n\n" - f'[warning]Please refresh the tags manually via:[/]\n\n"' - f'{" ".join(fetch_command)}\n\n' - ) - sys.exit(1) - - -def _get_version_tag(version: str, provider_package_id: str, version_suffix: str = ""): - if version_suffix is None: - version_suffix = "" - return f"providers-{provider_package_id.replace('.','-')}/{version}{version_suffix}" - - def _get_git_log_command(from_commit: str | None = None, to_commit: str | None = None) -> list[str]: """Get git command to run for the current repo from the current folder. @@ -369,7 +272,7 @@ def _get_all_changes_for_package( """ provider_details = get_provider_details(provider_package_id) current_version = provider_details.versions[0] - current_tag_no_suffix = _get_version_tag(current_version, provider_package_id) + current_tag_no_suffix = get_version_tag(current_version, provider_package_id) if get_verbose(): get_console().print(f"[info]Checking if tag '{current_tag_no_suffix}' exist.") result = run_command( @@ -449,7 +352,7 @@ def _get_all_changes_for_package( current_version = provider_details.versions[0] list_of_list_of_changes: list[list[Change]] = [] for version in provider_details.versions[1:]: - version_tag = _get_version_tag(version, provider_package_id) + version_tag = get_version_tag(version, provider_package_id) result = run_command( _get_git_log_command(next_version_tag, version_tag), cwd=provider_details.source_provider_package_path, @@ -583,60 +486,6 @@ def _verify_changelog_exists(package: str) -> Path: return changelog_path -def _convert_pip_requirements_to_table(requirements: Iterable[str], markdown: bool = True) -> str: - """ - Converts PIP requirement list to a Markdown table. - :param requirements: requirements list - :param markdown: if True, Markdown format is used else rst - :return: formatted table - """ - from tabulate import tabulate - - headers = ["PIP package", "Version required"] - table_data = [] - for dependency in requirements: - req = PipRequirements.from_requirement(dependency) - formatted_package = f"`{req.package}`" if markdown else f"``{req.package}``" - formatted_version = "" - if req.version_required: - formatted_version = f"`{req.version_required}`" if markdown else f"``{req.version_required}``" - table_data.append((formatted_package, formatted_version)) - return tabulate(table_data, headers=headers, tablefmt="pipe" if markdown else "rst") - - -def _convert_cross_package_dependencies_to_table( - cross_package_dependencies: list[str], - markdown: bool = True, -) -> str: - """ - Converts cross-package dependencies to a Markdown table - :param cross_package_dependencies: list of cross-package dependencies - :param markdown: if True, Markdown format is used else rst - :return: formatted table - """ - from tabulate import tabulate - - headers = ["Dependent package", "Extra"] - table_data = [] - prefix = "apache-airflow-providers-" - base_url = "https://airflow.apache.org/docs/" - for dependency in cross_package_dependencies: - pip_package_name = f"{prefix}{dependency.replace('.','-')}" - url_suffix = f"{dependency.replace('.','-')}" - if markdown: - url = f"[{pip_package_name}]({base_url}{url_suffix})" - else: - url = f"`{pip_package_name} <{base_url}{prefix}{url_suffix}>`_" - table_data.append((url, f"`{dependency}`" if markdown else f"``{dependency}``")) - return tabulate(table_data, headers=headers, tablefmt="pipe" if markdown else "rst") - - -def _get_cross_provider_dependent_packages(provider_package_id: str) -> list[str]: - if provider_package_id in get_removed_provider_ids(): - return [] - return PROVIDER_DEPENDENCIES[provider_package_id]["cross-providers-deps"] - - def _get_additional_package_info(provider_package_path: Path) -> str: """Returns additional info for the package. @@ -658,38 +507,6 @@ def _get_additional_package_info(provider_package_path: Path) -> str: return "" -def render_template( - template_name: str, - context: dict[str, Any], - extension: str, - autoescape: bool = True, - keep_trailing_newline: bool = False, -) -> str: - """ - Renders template based on its name. Reads the template from _TEMPLATE.md.jinja2 in current dir. - :param template_name: name of the template to use - :param context: Jinja2 context - :param extension: Target file extension - :param autoescape: Whether to autoescape HTML - :param keep_trailing_newline: Whether to keep the newline in rendered output - :return: rendered template - """ - import jinja2 - - template_loader = jinja2.FileSystemLoader( - searchpath=BREEZE_SOURCES_ROOT / "src" / "airflow_breeze" / "templates" - ) - template_env = jinja2.Environment( - loader=template_loader, - undefined=jinja2.StrictUndefined, - autoescape=autoescape, - keep_trailing_newline=keep_trailing_newline, - ) - template = template_env.get_template(f"{template_name}_TEMPLATE{extension}.jinja2") - content: str = template.render(context) - return content - - def replace_content(file_path: Path, old_text: str, new_text: str, provider_id: str): if new_text != old_text: _, temp_file_path = tempfile.mkstemp() @@ -1048,31 +865,17 @@ def get_provider_documentation_jinja_context( provider_id: str, with_breaking_changes: bool, maybe_with_new_features: bool ) -> dict[str, Any]: provider_details = get_provider_details(provider_id) - current_release_version = provider_details.versions[0] jinja_context = get_provider_jinja_context( provider_id=provider_id, - current_release_version=current_release_version, + current_release_version=provider_details.versions[0], version_suffix="", - with_breaking_changes=with_breaking_changes, - maybe_with_new_features=maybe_with_new_features, ) + jinja_context["WITH_BREAKING_CHANGES"] = with_breaking_changes + jinja_context["MAYBE_WITH_NEW_FEATURES"] = maybe_with_new_features + jinja_context["ADDITIONAL_INFO"] = ( _get_additional_package_info(provider_package_path=provider_details.source_provider_package_path), ) - jinja_context["CROSS_PROVIDERS_DEPENDENCIES"] = _get_cross_provider_dependent_packages(provider_id) - cross_providers_dependencies = _get_cross_provider_dependent_packages(provider_package_id=provider_id) - jinja_context["CROSS_PROVIDERS_DEPENDENCIES_TABLE"] = _convert_cross_package_dependencies_to_table( - cross_providers_dependencies - ) - jinja_context["CROSS_PROVIDERS_DEPENDENCIES_TABLE_RST"] = _convert_cross_package_dependencies_to_table( - cross_providers_dependencies, markdown=False - ) - jinja_context["PIP_REQUIREMENTS_TABLE"] = _convert_pip_requirements_to_table( - get_provider_requirements(provider_id) - ) - jinja_context["PIP_REQUIREMENTS_TABLE_RST"] = _convert_pip_requirements_to_table( - get_provider_requirements(provider_id), markdown=False - ) return jinja_context diff --git a/dev/breeze/src/airflow_breeze/prepare_providers/provider_packages.py b/dev/breeze/src/airflow_breeze/prepare_providers/provider_packages.py new file mode 100644 index 0000000000000..5b004eb27d528 --- /dev/null +++ b/dev/breeze/src/airflow_breeze/prepare_providers/provider_packages.py @@ -0,0 +1,245 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import shutil +import subprocess +import sys +from pathlib import Path +from shutil import copytree, rmtree +from stat import S_IRGRP, S_IROTH, S_IRUSR, S_IWUSR +from typing import IO, Any + +from airflow_breeze.utils.console import get_console +from airflow_breeze.utils.packages import ( + get_available_packages, + get_latest_provider_tag, + get_provider_details, + get_provider_jinja_context, + get_removed_provider_ids, + get_source_package_path, + get_target_root_for_copied_provider_sources, + render_template, + tag_exists_for_provider, +) +from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT +from airflow_breeze.utils.run_utils import run_command + +LICENCE_RST = """ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +""" + + +class PrepareReleasePackageTagExistException(Exception): + """Tag already exist for the package.""" + + +class PrepareReleasePackageWrongSetupException(Exception): + """Wrong setup prepared for the package.""" + + +class PrepareReleasePackageErrorBuildingPackageException(Exception): + """Error when building the package.""" + + +def copy_provider_sources_to_target(provider_id: str) -> Path: + target_provider_root_path = get_target_root_for_copied_provider_sources(provider_id) + + if target_provider_root_path.exists() and not target_provider_root_path.is_dir(): + get_console().print( + f"[error]Target folder for {provider_id} sources is not a directory " + f"please delete {target_provider_root_path} and try again!" + ) + rmtree(target_provider_root_path, ignore_errors=True) + target_provider_root_path.mkdir(parents=True) + source_provider_sources_path = get_source_package_path(provider_id) + relative_provider_path = source_provider_sources_path.relative_to(AIRFLOW_SOURCES_ROOT) + target_providers_sub_folder = target_provider_root_path / relative_provider_path + get_console().print( + f"[info]Copying provider sources: " f"{source_provider_sources_path} -> {target_providers_sub_folder}" + ) + copytree(source_provider_sources_path, target_providers_sub_folder) + shutil.copy(AIRFLOW_SOURCES_ROOT / "LICENSE", target_providers_sub_folder / "LICENSE") + # We do not copy NOTICE from the top level source of Airflow because NOTICE only refers to + # Airflow sources - not to providers. If any of the providers is going to have a code that + # requires NOTICE, then it should be stored in the provider sources (airflow/providers/PROVIDER_ID) + # And it will be copied from there. + (target_providers_sub_folder / ".latest-doc-only-change.txt").unlink(missing_ok=True) + (target_providers_sub_folder / "CHANGELOG.rst").unlink(missing_ok=True) + (target_providers_sub_folder / "provider.yaml").unlink(missing_ok=True) + return target_provider_root_path + + +def get_provider_package_jinja_context(provider_id: str, version_suffix: str) -> dict[str, Any]: + provider_details = get_provider_details(provider_id) + jinja_context = get_provider_jinja_context( + provider_id=provider_id, + current_release_version=provider_details.versions[0], + version_suffix=version_suffix, + ) + return jinja_context + + +def _prepare_get_provider_info_py_file(context: dict[str, Any], provider_id: str, target_path: Path): + from airflow_breeze.utils.black_utils import black_format + + get_provider_template_name = "get_provider_info" + get_provider_content = render_template( + template_name=get_provider_template_name, + context=context, + extension=".py", + autoescape=False, + keep_trailing_newline=True, + ) + target_provider_specific_path = (target_path / "airflow" / "providers").joinpath(*provider_id.split(".")) + (target_provider_specific_path / "get_provider_info.py").write_text(black_format(get_provider_content)) + get_console().print(f"[info]Generated get_provider_info.py in {target_provider_specific_path}[/]") + + +def _prepare_pyproject_toml_file(context: dict[str, Any], target_path: Path): + manifest_content = render_template( + template_name="pyproject", + context=context, + extension=".toml", + autoescape=False, + keep_trailing_newline=True, + ) + (target_path / "pyproject.toml").write_text(manifest_content) + get_console().print(f"[info]Generated pyproject.toml in {target_path}[/]") + + +def _prepare_readme_file(context: dict[str, Any], target_path: Path): + readme_content = LICENCE_RST + render_template( + template_name="PROVIDER_README", context=context, extension=".rst" + ) + (target_path / "README.rst").write_text(readme_content) + get_console().print(f"[info]Generated README.rst in {target_path}[/]") + + +def generate_build_files(provider_id: str, version_suffix: str, target_provider_root_sources_path: Path): + get_console().print(f"\n[info]Generate build files for {provider_id}\n") + jinja_context = get_provider_package_jinja_context(provider_id=provider_id, version_suffix=version_suffix) + _prepare_get_provider_info_py_file(jinja_context, provider_id, target_provider_root_sources_path) + _prepare_pyproject_toml_file(jinja_context, target_provider_root_sources_path) + _prepare_readme_file(jinja_context, target_provider_root_sources_path) + get_console().print(f"\n[info]Generated package build files for {provider_id}[/]\n") + + +def should_skip_the_package(provider_id: str, version_suffix: str) -> bool: + """Return True if the package should be skipped. + + For RC and official releases we check if the "officially released" version exists + and skip the released if it was. This allows to skip packages that have not been + marked for release in this wave. For "dev" suffixes, we always build all packages. + """ + if version_suffix.startswith("rc") or version_suffix == "": + current_tag = get_latest_provider_tag(provider_id, version_suffix) + if tag_exists_for_provider(provider_id, current_tag): + get_console().print(f"[warning]The tag {current_tag} exists. Skipping the package.[/]") + return True + return False + + +def cleanup_build_remnants(target_provider_root_sources_path: Path): + get_console().print(f"\n[info]Cleaning remnants in {target_provider_root_sources_path}") + for file in target_provider_root_sources_path.glob("*.egg-info"): + shutil.rmtree(file, ignore_errors=True) + shutil.rmtree(target_provider_root_sources_path / "build", ignore_errors=True) + shutil.rmtree(target_provider_root_sources_path / "dist", ignore_errors=True) + get_console().print(f"[info]Cleaned remnants in {target_provider_root_sources_path}\n") + + +def build_provider_package( + provider_id: str, version_suffix: str, target_provider_root_sources_path: Path, package_format: str +): + get_console().print( + f"\n[info]Building provider package: {provider_id} in format {package_format} in " + f"{target_provider_root_sources_path}\n" + ) + command: list[str] = [sys.executable, "-m", "flit", "build", "--no-setup-py", "--no-use-vcs"] + if package_format != "both": + command.extend(["--format", package_format]) + try: + run_command(command, check=True, cwd=target_provider_root_sources_path) + except subprocess.CalledProcessError as ex: + get_console().print("[error]The command returned an error %s", ex) + raise PrepareReleasePackageErrorBuildingPackageException() + get_console().print( + f"\n[info]Prepared provider package {provider_id} in " f"format {package_format}[/]\n" + ) + + +def move_built_packages_and_cleanup( + target_provider_root_sources_path: Path, dist_folder: Path, skip_cleanup: bool +): + for file in (target_provider_root_sources_path / "dist").glob("apache*"): + get_console().print(f"[info]Moving {file} to {dist_folder}") + # Shutil can move packages also between filesystems + target_file = dist_folder / file.name + target_file.unlink(missing_ok=True) + # Change ownership to group/other to read the file + file.chmod(S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) + shutil.move(file.as_posix(), dist_folder.as_posix()) + + if skip_cleanup: + get_console().print( + f"[warning]NOT Cleaning up the {target_provider_root_sources_path} because " + f"it was requested by the user[/]\n" + f"\nYou can use the generated packages to work on the build" + f"process and bring the changes back to the templates in Breeze " + f"src/airflow_breeze/templates" + ) + else: + get_console().print(f"[info]Cleaning up {target_provider_root_sources_path}") + shutil.rmtree(target_provider_root_sources_path, ignore_errors=True) + get_console().print(f"[info]Cleaned up {target_provider_root_sources_path}") + + +def get_packages_list_to_act_on( + package_list_file: IO | None, provider_packages: tuple[str, ...] +) -> list[str]: + if package_list_file and provider_packages: + get_console().print( + "[error]You cannot specify individual provider packages when you specify package list file." + ) + sys.exit(1) + if package_list_file: + removed_provider_ids = get_removed_provider_ids() + return [ + package.strip() + for package in package_list_file.readlines() + if package.strip() not in removed_provider_ids + ] + elif provider_packages: + return list(provider_packages) + return get_available_packages() diff --git a/dev/breeze/src/airflow_breeze/templates/CHANGELOG_TEMPLATE.rst.jinja2 b/dev/breeze/src/airflow_breeze/templates/CHANGELOG_TEMPLATE.rst.jinja2 index d0d6e7cd32542..3a42506c0b7a1 100644 --- a/dev/breeze/src/airflow_breeze/templates/CHANGELOG_TEMPLATE.rst.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/CHANGELOG_TEMPLATE.rst.jinja2 @@ -16,12 +16,6 @@ specific language governing permissions and limitations under the License. - NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_INDEX_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY - #} {{ version }} {{ version_header }} diff --git a/dev/breeze/src/airflow_breeze/templates/PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2 b/dev/breeze/src/airflow_breeze/templates/PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2 index c1efba1ef053d..594379827e878 100644 --- a/dev/breeze/src/airflow_breeze/templates/PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2 @@ -19,8 +19,6 @@ NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN WHEN PREPARING PACKAGES. - IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY #} .. Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file @@ -39,4 +37,10 @@ specific language governing permissions and limitations under the License. + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE + OVERWRITTEN WHEN PREPARING PACKAGES. + + .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + .. include:: {{ CHANGELOG_RELATIVE_PATH }}/CHANGELOG.rst diff --git a/dev/breeze/src/airflow_breeze/templates/PROVIDER_COMMITS_TEMPLATE.rst.jinja2 b/dev/breeze/src/airflow_breeze/templates/PROVIDER_COMMITS_TEMPLATE.rst.jinja2 index 5de862ac38108..f89b0913bc0e0 100644 --- a/dev/breeze/src/airflow_breeze/templates/PROVIDER_COMMITS_TEMPLATE.rst.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/PROVIDER_COMMITS_TEMPLATE.rst.jinja2 @@ -16,12 +16,6 @@ specific language governing permissions and limitations under the License. - NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_INDEX_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY - #} .. Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file @@ -40,7 +34,13 @@ specific language governing permissions and limitations under the License. -.. THE REMAINDER OF THE FILE IS AUTOMATICALLY GENERATED. IT WILL BE OVERWRITTEN AT RELEASE TIME! + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE + OVERWRITTEN WHEN PREPARING PACKAGES. + + .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_COMMITS_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + .. THE REMAINDER OF THE FILE IS AUTOMATICALLY GENERATED. IT WILL BE OVERWRITTEN AT RELEASE TIME! Package {{ PACKAGE_PIP_NAME }} ------------------------------------------------------ diff --git a/dev/breeze/src/airflow_breeze/templates/PROVIDER_INDEX_TEMPLATE.rst.jinja2 b/dev/breeze/src/airflow_breeze/templates/PROVIDER_INDEX_TEMPLATE.rst.jinja2 index aa2e10194d6ca..4289016fde14a 100644 --- a/dev/breeze/src/airflow_breeze/templates/PROVIDER_INDEX_TEMPLATE.rst.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/PROVIDER_INDEX_TEMPLATE.rst.jinja2 @@ -19,9 +19,6 @@ NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN WHEN PREPARING PACKAGES. - IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_INDEX_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY - #} .. toctree:: diff --git a/dev/breeze/src/airflow_breeze/templates/PROVIDER_README_TEMPLATE.rst.jinja2 b/dev/breeze/src/airflow_breeze/templates/PROVIDER_README_TEMPLATE.rst.jinja2 index d91e53b805791..9bcff72fe85ee 100644 --- a/dev/breeze/src/airflow_breeze/templates/PROVIDER_README_TEMPLATE.rst.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/PROVIDER_README_TEMPLATE.rst.jinja2 @@ -16,12 +16,6 @@ specific language governing permissions and limitations under the License. - NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_INDEX_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY - #} .. Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file @@ -40,6 +34,12 @@ specific language governing permissions and limitations under the License. + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE + OVERWRITTEN WHEN PREPARING PACKAGES. + + .. IF YOU WANT TO MODIFY TEMPLATE FOR THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_README_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + Package ``{{ PACKAGE_PIP_NAME }}`` diff --git a/dev/breeze/src/airflow_breeze/templates/PROVIDER__INIT__PY_TEMPLATE.py.jinja2 b/dev/breeze/src/airflow_breeze/templates/PROVIDER__INIT__PY_TEMPLATE.py.jinja2 index 6e0a1bf616649..9acce6226f111 100644 --- a/dev/breeze/src/airflow_breeze/templates/PROVIDER__INIT__PY_TEMPLATE.py.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/PROVIDER__INIT__PY_TEMPLATE.py.jinja2 @@ -16,8 +16,7 @@ specific language governing permissions and limitations under the License. --#} -# +#} # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information diff --git a/dev/breeze/src/airflow_breeze/templates/UPDATE_CHANGELOG_TEMPLATE.rst.jinja2 b/dev/breeze/src/airflow_breeze/templates/UPDATE_CHANGELOG_TEMPLATE.rst.jinja2 index 3482fc0339f6b..f38c103ae7e6f 100644 --- a/dev/breeze/src/airflow_breeze/templates/UPDATE_CHANGELOG_TEMPLATE.rst.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/UPDATE_CHANGELOG_TEMPLATE.rst.jinja2 @@ -16,12 +16,6 @@ specific language governing permissions and limitations under the License. - NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_INDEX_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY - #} .. Review and move the new changes to one of the sections above: {%- for change in new_changes %} diff --git a/dev/provider_packages/get_provider_info_TEMPLATE.py.jinja2 b/dev/breeze/src/airflow_breeze/templates/get_provider_info_TEMPLATE.py.jinja2 similarity index 50% rename from dev/provider_packages/get_provider_info_TEMPLATE.py.jinja2 rename to dev/breeze/src/airflow_breeze/templates/get_provider_info_TEMPLATE.py.jinja2 index b6e50ceeed520..5340dc9b76a14 100644 --- a/dev/provider_packages/get_provider_info_TEMPLATE.py.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/get_provider_info_TEMPLATE.py.jinja2 @@ -1,3 +1,22 @@ +{# + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +#} # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -18,8 +37,8 @@ # NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE # OVERWRITTEN WHEN PREPARING PACKAGES. # -# IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE -# `get_provider_info_TEMPLATE.py.jinja2` IN the `provider_packages` DIRECTORY +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY def get_provider_info(): diff --git a/dev/breeze/src/airflow_breeze/templates/pyproject_TEMPLATE.toml.jinja2 b/dev/breeze/src/airflow_breeze/templates/pyproject_TEMPLATE.toml.jinja2 new file mode 100644 index 0000000000000..2965235c233ec --- /dev/null +++ b/dev/breeze/src/airflow_breeze/templates/pyproject_TEMPLATE.toml.jinja2 @@ -0,0 +1,109 @@ +{# + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +#} +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE +# OVERWRITTEN WHEN PREPARING PACKAGES. + +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +# +[build-system] +requires = ["flit_core >=3.2,<4"] +build-backend = "flit_core.buildapi" + +[project] +name = "{{ PACKAGE_PIP_NAME }}" +version = "{{RELEASE}}{{ VERSION_SUFFIX }}" +description = "Provider package {{ PACKAGE_PIP_NAME }} for Apache Airflow" +readme = "README.rst" +authors = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +maintainers = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +keywords = [ "airflow-provider", "{{ PROVIDER_ID }}", "airflow", "integration" ] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Framework :: Apache Airflow", + "Framework :: Apache Airflow :: Provider", + "License :: OSI Approved :: Apache Software License", + {%- for python_version in SUPPORTED_PYTHON_VERSIONS %} + "Programming Language :: Python :: {{ python_version }}", + {%- endfor %} + "Topic :: System :: Monitoring", +] +requires-python = "~=3.8" +dependencies = [ +{{- INSTALL_REQUIREMENTS }} +] + +[project.urls] +"Documentation" = "https://airflow.apache.org/docs/{{ PACKAGE_PIP_NAME }}/{{RELEASE}}" +"Changelog" = "https://airflow.apache.org/docs/{{ PACKAGE_PIP_NAME }}/{{RELEASE}}/changelog.html" +"Bug Tracker" = "https://github.com/apache/airflow/issues" +"Source Code" = "https://github.com/apache/airflow" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Twitter" = "https://twitter.com/ApacheAirflow" +"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[project.entry-points."apache_airflow_provider"] +provider_info = "airflow.providers.{{ PROVIDER_ID }}.get_provider_info:get_provider_info" + +{%- if PLUGINS %} +[project.entry-points."airflow.plugins"] +{%- for plugin in PLUGINS %} +{{ plugin.name }} = "{{ plugin.package_name }}:{{ plugin.class_name }}" +{%- endfor %} +{%- endif %} + +{%- if EXTRAS_REQUIREMENTS %} +[project.optional-dependencies] +{%- for extra_name, dependencies_list in EXTRAS_REQUIREMENTS.items() %} +"{{ extra_name }}" = [ +{%- for dependency in dependencies_list %} + "{{ dependency }}", +{%- endfor %} +] +{%- endfor %} +{%- endif %} + +[tool.flit.module] +name = "airflow.providers.{{ PROVIDER_ID }}" diff --git a/dev/breeze/src/airflow_breeze/utils/packages.py b/dev/breeze/src/airflow_breeze/utils/packages.py index 3f6d6dba06f9a..df2c4b1d51632 100644 --- a/dev/breeze/src/airflow_breeze/utils/packages.py +++ b/dev/breeze/src/airflow_breeze/utils/packages.py @@ -20,6 +20,8 @@ import fnmatch import json import os +import subprocess +import sys from enum import Enum from functools import lru_cache from pathlib import Path @@ -34,18 +36,20 @@ from airflow_breeze.utils.console import get_console from airflow_breeze.utils.path_utils import ( AIRFLOW_PROVIDERS_ROOT, + BREEZE_SOURCES_ROOT, + DIST_DIR, DOCS_ROOT, PROVIDER_DEPENDENCIES_JSON_FILE_PATH, ) from airflow_breeze.utils.publish_docs_helpers import ( - _filepath_to_module, - _filepath_to_system_tests, _load_schema, get_provider_yaml_paths, ) -from airflow_breeze.utils.versions import strip_leading_zeros_from_version +from airflow_breeze.utils.run_utils import run_command +from airflow_breeze.utils.versions import get_version_tag, strip_leading_zeros_from_version MIN_AIRFLOW_VERSION = "2.5.0" +HTTPS_REMOTE = "apache-https-for-providers" LONG_PROVIDERS_PREFIX = "apache-airflow-providers-" @@ -93,6 +97,40 @@ class ProviderPackageDetails(NamedTuple): removed: bool +class PackageSuspendedException(Exception): + """Exception raised when package is suspended.""" + + +class PipRequirements(NamedTuple): + """Store details about python packages""" + + package: str + version_required: str + + @classmethod + def from_requirement(cls, requirement_string: str) -> PipRequirements: + from packaging.requirements import Requirement + + req = Requirement(requirement_string) + + package = req.name + if req.extras: + # Sort extras by name + package += f"[{','.join(sorted(req.extras))}]" + + version_required = "" + if req.specifier: + # String representation of `packaging.specifiers.SpecifierSet` sorted by the operator + # which might not looking good, e.g. '>=5.3.0,<6,!=5.3.3,!=5.3.2' transform into the + # '!=5.3.3,!=5.3.2,<6,>=5.3.0'. Instead of that we sort by version and resulting string would be + # '>=5.3.0,!=5.3.2,!=5.3.3,<6' + version_required = ",".join(map(str, sorted(req.specifier, key=lambda spec: spec.version))) + if req.marker: + version_required += f"; {req.marker}" + + return cls(package=package, version_required=version_required.strip()) + + @lru_cache def get_provider_packages_metadata() -> dict[str, dict[str, Any]]: """ @@ -112,10 +150,6 @@ def get_provider_packages_metadata() -> dict[str, dict[str, Any]]: jsonschema.validate(provider, schema=schema) except jsonschema.ValidationError: raise Exception(f"Unable to parse: {provider_yaml_path}.") - provider_yaml_dir = os.path.dirname(provider_yaml_path) - provider["python-module"] = _filepath_to_module(provider_yaml_dir) - provider["package-dir"] = provider_yaml_dir - provider["system-tests-dir"] = _filepath_to_system_tests(provider_yaml_dir) result[get_short_package_name(provider["package-name"])] = provider return result @@ -189,6 +223,7 @@ def get_available_packages( """ Return provider ids for all packages that are available currently (not suspended). + :rtype: object :param include_non_provider_doc_packages: whether the non-provider doc packages should be included (packages like apache-airflow, helm-chart, docker-stack) :param include_all_providers: whether "all-providers" should be included ni the list. @@ -300,6 +335,10 @@ def get_documentation_package_path(provider_id: str) -> Path: return DOCS_ROOT / f"apache-airflow-providers-{provider_id.replace('.', '-')}" +def get_target_root_for_copied_provider_sources(provider_id: str) -> Path: + return (DIST_DIR / "provider_packages").joinpath(*provider_id.split(".")) + + def get_pip_package_name(provider_id: str) -> str: """ Returns PIP package name for the package id. @@ -346,7 +385,7 @@ def apply_version_suffix(install_clause: str) -> str: else: dependencies = PROVIDER_DEPENDENCIES.get(provider_id)["deps"] install_requires = [apply_version_suffix(clause) for clause in dependencies] - return "".join(f"\n {ir}" for ir in install_requires) + return "".join(f'\n "{ir}",' for ir in install_requires) def get_package_extras(provider_id: str) -> dict[str, list[str]]: @@ -436,12 +475,43 @@ def get_python_requires(provider_id: str) -> str: return python_requires +def convert_cross_package_dependencies_to_table( + cross_package_dependencies: list[str], + markdown: bool = True, +) -> str: + """ + Converts cross-package dependencies to a Markdown table + :param cross_package_dependencies: list of cross-package dependencies + :param markdown: if True, Markdown format is used else rst + :return: formatted table + """ + from tabulate import tabulate + + headers = ["Dependent package", "Extra"] + table_data = [] + prefix = "apache-airflow-providers-" + base_url = "https://airflow.apache.org/docs/" + for dependency in cross_package_dependencies: + pip_package_name = f"{prefix}{dependency.replace('.','-')}" + url_suffix = f"{dependency.replace('.','-')}" + if markdown: + url = f"[{pip_package_name}]({base_url}{url_suffix})" + else: + url = f"`{pip_package_name} <{base_url}{prefix}{url_suffix}>`_" + table_data.append((url, f"`{dependency}`" if markdown else f"``{dependency}``")) + return tabulate(table_data, headers=headers, tablefmt="pipe" if markdown else "rst") + + +def get_cross_provider_dependent_packages(provider_package_id: str) -> list[str]: + if provider_package_id in get_removed_provider_ids(): + return [] + return PROVIDER_DEPENDENCIES[provider_package_id]["cross-providers-deps"] + + def get_provider_jinja_context( provider_id: str, current_release_version: str, version_suffix: str, - with_breaking_changes: bool, - maybe_with_new_features: bool, ): provider_details = get_provider_details(provider_id=provider_id) release_version_no_leading_zeros = strip_leading_zeros_from_version(current_release_version) @@ -449,30 +519,20 @@ def get_provider_jinja_context( supported_python_versions = [ p for p in ALLOWED_PYTHON_MAJOR_MINOR_VERSIONS if p not in provider_details.excluded_python_versions ] + cross_providers_dependencies = get_cross_provider_dependent_packages(provider_package_id=provider_id) context: dict[str, Any] = { - "WITH_BREAKING_CHANGES": with_breaking_changes, - "MAYBE_WITH_NEW_FEATURES": maybe_with_new_features, - "ENTITY_TYPES": list(EntityType), - "README_FILE": "README.rst", "PROVIDER_ID": provider_details.provider_id, "PACKAGE_PIP_NAME": get_pip_package_name(provider_details.provider_id), "PACKAGE_WHEEL_NAME": get_wheel_package_name(provider_details.provider_id), "FULL_PACKAGE_NAME": provider_details.full_package_name, - "PROVIDER_PATH": provider_details.full_package_name.replace(".", "/"), "RELEASE": current_release_version, "RELEASE_NO_LEADING_ZEROS": release_version_no_leading_zeros, - "VERSION_SUFFIX": version_suffix or "", + "VERSION_SUFFIX": f".{version_suffix}" if version_suffix else "", "PIP_REQUIREMENTS": get_provider_requirements(provider_details.provider_id), - "PROVIDER_TYPE": "Provider", - "PROVIDERS_FOLDER": "providers", "PROVIDER_DESCRIPTION": provider_details.provider_description, "INSTALL_REQUIREMENTS": get_install_requirements( provider_id=provider_details.provider_id, version_suffix=version_suffix ), - "SETUP_REQUIREMENTS": """ - setuptools - wheel -""", "EXTRAS_REQUIREMENTS": get_package_extras(provider_id=provider_details.provider_id), "CHANGELOG_RELATIVE_PATH": os.path.relpath( provider_details.source_provider_package_path, @@ -480,11 +540,146 @@ def get_provider_jinja_context( ), "CHANGELOG": changelog, "SUPPORTED_PYTHON_VERSIONS": supported_python_versions, - "PYTHON_REQUIRES": get_python_requires(provider_id), "PLUGINS": provider_details.plugins, "MIN_AIRFLOW_VERSION": get_min_airflow_version(provider_id), - "PREINSTALLED_PROVIDER": provider_details.provider_id in PREINSTALLED_PROVIDERS, "PROVIDER_REMOVED": provider_details.removed, "PROVIDER_INFO": get_provider_info_dict(provider_id), + "CROSS_PROVIDERS_DEPENDENCIES": get_cross_provider_dependent_packages(provider_id), + "CROSS_PROVIDERS_DEPENDENCIES_TABLE_RST": convert_cross_package_dependencies_to_table( + cross_providers_dependencies, markdown=False + ), + "PIP_REQUIREMENTS_TABLE_RST": convert_pip_requirements_to_table( + get_provider_requirements(provider_id), markdown=False + ), } return context + + +def render_template( + template_name: str, + context: dict[str, Any], + extension: str, + autoescape: bool = True, + keep_trailing_newline: bool = False, +) -> str: + """ + Renders template based on its name. Reads the template from _TEMPLATE.md.jinja2 in current dir. + :param template_name: name of the template to use + :param context: Jinja2 context + :param extension: Target file extension + :param autoescape: Whether to autoescape HTML + :param keep_trailing_newline: Whether to keep the newline in rendered output + :return: rendered template + """ + import jinja2 + + template_loader = jinja2.FileSystemLoader( + searchpath=BREEZE_SOURCES_ROOT / "src" / "airflow_breeze" / "templates" + ) + template_env = jinja2.Environment( + loader=template_loader, + undefined=jinja2.StrictUndefined, + autoescape=autoescape, + keep_trailing_newline=keep_trailing_newline, + ) + template = template_env.get_template(f"{template_name}_TEMPLATE{extension}.jinja2") + content: str = template.render(context) + return content + + +def make_sure_remote_apache_exists_and_fetch(github_repository: str = "apache/airflow"): + """Make sure that apache remote exist in git. + + We need to take a log from the apache repository main branch - not locally because we might + not have the latest version. Also, the local repo might be shallow, so we need to + un-shallow it to see all the history. + + This will: + * check if the remote exists and add if it does not + * check if the local repo is shallow, mark it to un-shallow in this case + * fetch from the remote including all tags and overriding local tags in case + they are set differently + + """ + try: + run_command(["git", "remote", "get-url", HTTPS_REMOTE], text=True, capture_output=True) + except subprocess.CalledProcessError as ex: + if ex.returncode == 128 or ex.returncode == 2: + run_command( + [ + "git", + "remote", + "add", + HTTPS_REMOTE, + f"https://github.com/{github_repository}.git", + ], + check=True, + ) + else: + get_console().print( + f"[error]Error {ex}[/]\n" f"[error]When checking if {HTTPS_REMOTE} is set.[/]\n\n" + ) + sys.exit(1) + get_console().print("[info]Fetching full history and tags from remote.") + get_console().print("[info]This might override your local tags!") + result = run_command( + ["git", "rev-parse", "--is-shallow-repository"], + check=True, + capture_output=True, + text=True, + ) + is_shallow_repo = result.stdout.strip() == "true" + fetch_command = ["git", "fetch", "--tags", "--force", HTTPS_REMOTE] + if is_shallow_repo: + fetch_command.append("--unshallow") + try: + run_command(fetch_command) + except subprocess.CalledProcessError as e: + get_console().print( + f"[error]Error {e}[/]\n" + f"[error]When fetching tags from remote. Your tags might not be refreshed.[/]\n\n" + f'[warning]Please refresh the tags manually via:[/]\n\n"' + f'{" ".join(fetch_command)}\n\n' + ) + sys.exit(1) + + +def convert_pip_requirements_to_table(requirements: Iterable[str], markdown: bool = True) -> str: + """ + Converts PIP requirement list to a Markdown table. + :param requirements: requirements list + :param markdown: if True, Markdown format is used else rst + :return: formatted table + """ + from tabulate import tabulate + + headers = ["PIP package", "Version required"] + table_data = [] + for dependency in requirements: + req = PipRequirements.from_requirement(dependency) + formatted_package = f"`{req.package}`" if markdown else f"``{req.package}``" + formatted_version = "" + if req.version_required: + formatted_version = f"`{req.version_required}`" if markdown else f"``{req.version_required}``" + table_data.append((formatted_package, formatted_version)) + return tabulate(table_data, headers=headers, tablefmt="pipe" if markdown else "rst") + + +def tag_exists_for_provider(provider_id: str, current_tag: str) -> bool: + """Return true if the tag exists in the provider repository.""" + provider_details = get_provider_details(provider_id) + result = run_command( + ["git", "rev-parse", current_tag], + cwd=provider_details.source_provider_package_path, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + return result.returncode == 0 + + +def get_latest_provider_tag(provider_id: str, suffix: str) -> str: + """Returns latest tag for the provider.""" + provider_details = get_provider_details(provider_id) + current_version = provider_details.versions[0] + return get_version_tag(current_version, provider_id, suffix) diff --git a/dev/breeze/src/airflow_breeze/utils/versions.py b/dev/breeze/src/airflow_breeze/utils/versions.py index abb8d601f2907..88c5986f7d975 100644 --- a/dev/breeze/src/airflow_breeze/utils/versions.py +++ b/dev/breeze/src/airflow_breeze/utils/versions.py @@ -28,3 +28,9 @@ def strip_leading_zeros_from_version(version: str) -> str: :return: string with leading 0s after dot replaced. """ return ".".join(str(int(i)) for i in version.split(".")) + + +def get_version_tag(version: str, provider_package_id: str, version_suffix: str = ""): + if version_suffix is None: + version_suffix = "" + return f"providers-{provider_package_id.replace('.','-')}/{version}{version_suffix}" diff --git a/dev/breeze/tests/test_packages.py b/dev/breeze/tests/test_packages.py index b83e496e500c1..e50d03d0ce5dc 100644 --- a/dev/breeze/tests/test_packages.py +++ b/dev/breeze/tests/test_packages.py @@ -16,24 +16,36 @@ # under the License. from __future__ import annotations +from typing import Iterable + import pytest from airflow_breeze.global_constants import REGULAR_DOC_PACKAGES from airflow_breeze.utils.packages import ( + PipRequirements, + convert_cross_package_dependencies_to_table, + convert_pip_requirements_to_table, expand_all_provider_packages, find_matching_long_package_names, get_available_packages, + get_cross_provider_dependent_packages, get_documentation_package_path, get_install_requirements, get_long_package_name, + get_min_airflow_version, get_package_extras, + get_pip_package_name, get_provider_details, + get_provider_info_dict, + get_provider_jinja_context, get_provider_requirements, get_removed_provider_ids, get_short_package_name, get_source_package_path, get_suspended_provider_folders, get_suspended_provider_ids, + get_wheel_package_name, + validate_provider_info_with_runtime_schema, ) from airflow_breeze.utils.path_utils import AIRFLOW_PROVIDERS_ROOT, AIRFLOW_SOURCES_ROOT, DOCS_ROOT @@ -151,8 +163,8 @@ def test_get_install_requirements(): assert ( get_install_requirements("asana", "").strip() == """ - apache-airflow>=2.5.0 - asana>=0.10,<4.0.0 + "apache-airflow>=2.5.0", + "asana>=0.10,<4.0.0", """.strip() ) @@ -198,3 +210,180 @@ def test_get_provider_details(): assert provider_details.plugins == [] assert provider_details.changelog_path == provider_details.source_provider_package_path / "CHANGELOG.rst" assert not provider_details.removed + + +@pytest.mark.parametrize( + "provider_id, pip_package_name", + [ + ("asana", "apache-airflow-providers-asana"), + ("apache.hdfs", "apache-airflow-providers-apache-hdfs"), + ], +) +def test_get_pip_package_name(provider_id: str, pip_package_name: str): + assert get_pip_package_name(provider_id) == pip_package_name + + +@pytest.mark.parametrize( + "provider_id, wheel_package_name", + [ + ("asana", "apache_airflow_providers_asana"), + ("apache.hdfs", "apache_airflow_providers_apache_hdfs"), + ], +) +def test_get_wheel_package_name(provider_id: str, wheel_package_name: str): + assert get_wheel_package_name(provider_id) == wheel_package_name + + +@pytest.mark.parametrize( + "requirement_string, expected", + [ + pytest.param("apache-airflow", ("apache-airflow", ""), id="no-version-specifier"), + pytest.param( + "apache-airflow <2.7,>=2.5", ("apache-airflow", ">=2.5,<2.7"), id="range-version-specifier" + ), + pytest.param("watchtower~=3.0.1", ("watchtower", "~=3.0.1"), id="compat-version-specifier"), + pytest.param("PyGithub!=1.58", ("PyGithub", "!=1.58"), id="not-equal-version-specifier"), + pytest.param( + "apache-airflow[amazon,google,microsoft.azure,docker]>2.7.0", + ("apache-airflow[amazon,docker,google,microsoft.azure]", ">2.7.0"), + id="package-with-extra", + ), + pytest.param( + 'mysql-connector-python>=8.0.11; platform_machine != "aarch64"', + ("mysql-connector-python", '>=8.0.11; platform_machine != "aarch64"'), + id="version-with-platform-marker", + ), + pytest.param( + "backports.zoneinfo>=0.2.1;python_version<'3.9'", + ("backports.zoneinfo", '>=0.2.1; python_version < "3.9"'), + id="version-with-python-marker", + ), + pytest.param( + "celery>=5.3.0,<6,!=5.3.3,!=5.3.2", + ("celery", ">=5.3.0,!=5.3.2,!=5.3.3,<6"), + id="complex-version-specifier", + ), + pytest.param( + "apache-airflow; python_version<'3.12' or platform_machine != 'i386'", + ("apache-airflow", '; python_version < "3.12" or platform_machine != "i386"'), + id="no-version-specifier-with-complex-marker", + ), + ], +) +def test_parse_pip_requirements_parse(requirement_string: str, expected: tuple[str, str]): + assert PipRequirements.from_requirement(requirement_string) == expected + + +@pytest.mark.parametrize( + "requirements, markdown, table", + [ + ( + ["apache-airflow>2.5.0", "apache-airflow-providers-http"], + False, + """ +================================= ================== +PIP package Version required +================================= ================== +``apache-airflow`` ``>2.5.0`` +``apache-airflow-providers-http`` +================================= ================== +""", + ), + ( + ["apache-airflow>2.5.0", "apache-airflow-providers-http"], + True, + """ +| PIP package | Version required | +|:--------------------------------|:-------------------| +| `apache-airflow` | `>2.5.0` | +| `apache-airflow-providers-http` | | +""", + ), + ], +) +def test_convert_pip_requirements_to_table(requirements: Iterable[str], markdown: bool, table: str): + assert convert_pip_requirements_to_table(requirements, markdown).strip() == table.strip() + + +def test_validate_provider_info_with_schema(): + for provider in get_available_packages(): + validate_provider_info_with_runtime_schema(get_provider_info_dict(provider)) + + +@pytest.mark.parametrize( + "provider_id, min_version", + [ + ("amazon", "2.5.0"), + ("common.io", "2.8.0"), + ], +) +def test_get_min_airflow_version(provider_id: str, min_version: str): + assert get_min_airflow_version(provider_id) == min_version + + +def test_convert_cross_package_dependencies_to_table(): + EXPECTED = """ +| Dependent package | Extra | +|:------------------------------------------------------------------------------------|:--------------| +| [apache-airflow-providers-common-sql](https://airflow.apache.org/docs/common-sql) | `common.sql` | +| [apache-airflow-providers-google](https://airflow.apache.org/docs/google) | `google` | +| [apache-airflow-providers-openlineage](https://airflow.apache.org/docs/openlineage) | `openlineage` | +""" + assert ( + convert_cross_package_dependencies_to_table(get_cross_provider_dependent_packages("trino")).strip() + == EXPECTED.strip() + ) + + +def test_get_provider_info_dict(): + provider_info_dict = get_provider_info_dict("amazon") + assert provider_info_dict["name"] == "Amazon" + assert provider_info_dict["package-name"] == "apache-airflow-providers-amazon" + assert "Amazon" in provider_info_dict["description"] + assert provider_info_dict["suspended"] is False + assert provider_info_dict["filesystems"] == ["airflow.providers.amazon.aws.fs.s3"] + assert len(provider_info_dict["versions"]) > 45 + assert len(provider_info_dict["dependencies"]) > 10 + assert len(provider_info_dict["integrations"]) > 35 + assert len(provider_info_dict["hooks"]) > 30 + assert len(provider_info_dict["triggers"]) > 15 + assert len(provider_info_dict["operators"]) > 20 + assert len(provider_info_dict["sensors"]) > 15 + assert len(provider_info_dict["transfers"]) > 15 + assert len(provider_info_dict["extra-links"]) > 5 + assert len(provider_info_dict["connection-types"]) > 3 + assert len(provider_info_dict["notifications"]) > 2 + assert len(provider_info_dict["secrets-backends"]) > 1 + assert len(provider_info_dict["logging"]) > 1 + assert len(provider_info_dict["additional-extras"]) > 3 + assert len(provider_info_dict["config"].keys()) > 1 + assert len(provider_info_dict["executors"]) > 0 + + +def test_provider_jinja_context(): + provider_info = get_provider_info_dict("amazon") + version = provider_info["versions"][0] + context = get_provider_jinja_context( + provider_id="amazon", current_release_version=version, version_suffix="rc1" + ) + expected = { + "PROVIDER_ID": "amazon", + "PACKAGE_PIP_NAME": "apache-airflow-providers-amazon", + "PACKAGE_WHEEL_NAME": "apache_airflow_providers_amazon", + "FULL_PACKAGE_NAME": "airflow.providers.amazon", + "RELEASE": version, + "RELEASE_NO_LEADING_ZEROS": version, + "VERSION_SUFFIX": ".rc1", + "PROVIDER_DESCRIPTION": "Amazon integration (including `Amazon Web Services (AWS) `__).\n", + "CHANGELOG_RELATIVE_PATH": "../../airflow/providers/amazon", + "SUPPORTED_PYTHON_VERSIONS": ["3.8", "3.9", "3.10", "3.11"], + "PLUGINS": [], + "MIN_AIRFLOW_VERSION": "2.5.0", + "PROVIDER_REMOVED": False, + "PROVIDER_INFO": provider_info, + } + + for key, value in expected.items(): + assert context[key] == value + assert context["EXTRAS_REQUIREMENTS"]["google"] == ["apache-airflow-providers-google"] + assert len(context["PIP_REQUIREMENTS"]) > 10 diff --git a/dev/breeze/tests/test_provider_documentation.py b/dev/breeze/tests/test_provider_documentation.py index 764038769d190..c11e73349bf48 100644 --- a/dev/breeze/tests/test_provider_documentation.py +++ b/dev/breeze/tests/test_provider_documentation.py @@ -16,23 +16,18 @@ # under the License. from __future__ import annotations -from typing import Iterable - import pytest from airflow_breeze.prepare_providers.provider_documentation import ( Change, - PipRequirements, _convert_git_changes_to_table, - _convert_pip_requirements_to_table, _find_insertion_index_for_version, _get_change_from_line, _get_changes_classified, _get_git_log_command, - _get_version_tag, _verify_changelog_exists, + get_version_tag, ) -from airflow_breeze.utils.packages import get_pip_package_name, get_wheel_package_name from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT CHANGELOG_CONTENT = """ @@ -94,7 +89,7 @@ def test_find_insertion_index_insert_new_changelog(): ], ) def test_get_version_tag(version: str, provider_id: str, suffix: str, tag: str): - assert _get_version_tag(version, provider_id, suffix) == tag + assert get_version_tag(version, provider_id, suffix) == tag @pytest.mark.parametrize( @@ -122,28 +117,6 @@ def test_get_git_log_command_wrong(): _get_git_log_command(None, "to_commit") -@pytest.mark.parametrize( - "provider_id, pip_package_name", - [ - ("asana", "apache-airflow-providers-asana"), - ("apache.hdfs", "apache-airflow-providers-apache-hdfs"), - ], -) -def test_get_pip_package_name(provider_id: str, pip_package_name: str): - assert get_pip_package_name(provider_id) == pip_package_name - - -@pytest.mark.parametrize( - "provider_id, wheel_package_name", - [ - ("asana", "apache_airflow_providers_asana"), - ("apache.hdfs", "apache_airflow_providers_apache_hdfs"), - ], -) -def test_get_wheel_package_name(provider_id: str, wheel_package_name: str): - assert get_wheel_package_name(provider_id) == wheel_package_name - - @pytest.mark.parametrize( "line, version, change", [ @@ -237,77 +210,6 @@ def test_convert_git_changes_to_table(input: str, output: str, markdown: bool, c assert list_of_changes[2].pr == "12346" -@pytest.mark.parametrize( - "requirement_string, expected", - [ - pytest.param("apache-airflow", ("apache-airflow", ""), id="no-version-specifier"), - pytest.param( - "apache-airflow <2.7,>=2.5", ("apache-airflow", ">=2.5,<2.7"), id="range-version-specifier" - ), - pytest.param("watchtower~=3.0.1", ("watchtower", "~=3.0.1"), id="compat-version-specifier"), - pytest.param("PyGithub!=1.58", ("PyGithub", "!=1.58"), id="not-equal-version-specifier"), - pytest.param( - "apache-airflow[amazon,google,microsoft.azure,docker]>2.7.0", - ("apache-airflow[amazon,docker,google,microsoft.azure]", ">2.7.0"), - id="package-with-extra", - ), - pytest.param( - 'mysql-connector-python>=8.0.11; platform_machine != "aarch64"', - ("mysql-connector-python", '>=8.0.11; platform_machine != "aarch64"'), - id="version-with-platform-marker", - ), - pytest.param( - "backports.zoneinfo>=0.2.1;python_version<'3.9'", - ("backports.zoneinfo", '>=0.2.1; python_version < "3.9"'), - id="version-with-python-marker", - ), - pytest.param( - "celery>=5.3.0,<6,!=5.3.3,!=5.3.2", - ("celery", ">=5.3.0,!=5.3.2,!=5.3.3,<6"), - id="complex-version-specifier", - ), - pytest.param( - "apache-airflow; python_version<'3.12' or platform_machine != 'i386'", - ("apache-airflow", '; python_version < "3.12" or platform_machine != "i386"'), - id="no-version-specifier-with-complex-marker", - ), - ], -) -def test_parse_pip_requirements_parse(requirement_string, expected): - assert PipRequirements.from_requirement(requirement_string) == expected - - -@pytest.mark.parametrize( - "requirements, markdown, table", - [ - ( - ["apache-airflow>2.5.0", "apache-airflow-providers-http"], - False, - """ -================================= ================== -PIP package Version required -================================= ================== -``apache-airflow`` ``>2.5.0`` -``apache-airflow-providers-http`` -================================= ================== -""", - ), - ( - ["apache-airflow>2.5.0", "apache-airflow-providers-http"], - True, - """ -| PIP package | Version required | -|:--------------------------------|:-------------------| -| `apache-airflow` | `>2.5.0` | -| `apache-airflow-providers-http` | | -""", - ), - ], -) -def test_convert_pip_requirements_to_table(requirements: Iterable[str], markdown: bool, table: str): - assert _convert_pip_requirements_to_table(requirements, markdown).strip() == table.strip() - - def test_verify_changelog_exists(): assert ( _verify_changelog_exists("asana") diff --git a/dev/provider_packages/MANIFEST_TEMPLATE.in.jinja2 b/dev/provider_packages/MANIFEST_TEMPLATE.in.jinja2 deleted file mode 100644 index 83013eefb4e7f..0000000000000 --- a/dev/provider_packages/MANIFEST_TEMPLATE.in.jinja2 +++ /dev/null @@ -1,36 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE -# OVERWRITTEN WHEN PREPARING PACKAGES. - -# IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE -# `MANIFEST_TEMPLATE.py.jinja2` IN the `provider_packages` DIRECTORY - - -{% if PROVIDER_PACKAGE_ID == 'amazon' %} -include airflow/providers/amazon/aws/hooks/batch_waiters.json -include airflow/providers/amazon/aws/waiters/*.json -{% elif PROVIDER_PACKAGE_ID == 'cncf.kubernetes' %} -include airflow/providers/cncf/kubernetes/*.jinja2 -{% endif %} - -include NOTICE -include LICENSE -include CHANGELOG.rst -global-exclude __pycache__ *.pyc diff --git a/dev/provider_packages/PROVIDER_README_TEMPLATE.rst.jinja2 b/dev/provider_packages/PROVIDER_README_TEMPLATE.rst.jinja2 deleted file mode 100644 index 13e088aa94f03..0000000000000 --- a/dev/provider_packages/PROVIDER_README_TEMPLATE.rst.jinja2 +++ /dev/null @@ -1,109 +0,0 @@ -{# - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - - NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_INDEX_TEMPLATE.rst.jinja2` IN the `dev/provider_packages` DIRECTORY - --#} - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - - -Package ``{{ PACKAGE_PIP_NAME }}`` - -Release: ``{{ RELEASE }}{{ VERSION_SUFFIX }}`` - - -{{ PROVIDER_DESCRIPTION | safe }} - -Provider package ----------------- - -This is a provider package for ``{{PROVIDER_PACKAGE_ID}}`` provider. All classes for this provider package -are in ``{{FULL_PACKAGE_NAME}}`` python package. - -You can find package information and changelog for the provider -in the `documentation `_. - -{%- if PROVIDER_REMOVED %} - - .. warning:: - - This provider is not maintained anymore by the community. It has been removed and is not going to be - updated anymore. The removal was done according to the process described in - `Removing community providers `_ - - Feel free to contact Airflow Development Mailing List if you have any questions. - -{%- endif %} - -Installation ------------- - -You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below -for the minimum Airflow version supported) via -``pip install {{PACKAGE_PIP_NAME}}`` - -The package supports the following python versions: {{ ",".join(SUPPORTED_PYTHON_VERSIONS) }} - -{%- if PIP_REQUIREMENTS %} - -Requirements ------------- - -{{ PIP_REQUIREMENTS_TABLE_RST | safe }} - -{%- endif %} -{%- if CROSS_PROVIDERS_DEPENDENCIES %} - -Cross provider package dependencies ------------------------------------ - -Those are dependencies that might be needed in order to use all the features of the package. -You need to install the specified provider packages in order to use them. - -You can install such cross-provider dependencies when installing from PyPI. For example: - -.. code-block:: bash - - pip install {{ PACKAGE_PIP_NAME }}[{{ CROSS_PROVIDERS_DEPENDENCIES[0] }}] - - -{{ CROSS_PROVIDERS_DEPENDENCIES_TABLE_RST | safe }} - -{%- endif %} - -The changelog for the provider package can be found in the -`changelog `_. diff --git a/dev/provider_packages/SETUP_TEMPLATE.cfg.jinja2 b/dev/provider_packages/SETUP_TEMPLATE.cfg.jinja2 deleted file mode 100644 index c3fdba076b568..0000000000000 --- a/dev/provider_packages/SETUP_TEMPLATE.cfg.jinja2 +++ /dev/null @@ -1,84 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE -# OVERWRITTEN WHEN PREPARING PACKAGES. -# -# IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE -# `SETUP_TEMPLATE.cfg.jinja2` IN the `dev/provider_packages` DIRECTORY - - -[metadata] -name = {{ PACKAGE_PIP_NAME }} -summary = {{ PROVIDER_TYPE }} for Apache Airflow. Implements {{ PACKAGE_PIP_NAME }} package -description = {{ PROVIDER_TYPE }} package {{ PACKAGE_PIP_NAME }} for Apache Airflow -long_description = file: README.rst -long_description_content_type = text/x-rst -author = Apache Software Foundation -author_email = dev@airflow.apache.org -url = https://airflow.apache.org/ -download_url = https://archive.apache.org/dist/airflow/{{ PROVIDERS_FOLDER }} -license = Apache License 2.0 -license_files = - LICENSE - NOTICE -classifiers = - Development Status :: 5 - Production/Stable - Environment :: Console - Environment :: Web Environment - Intended Audience :: Developers - Intended Audience :: System Administrators - Framework :: Apache Airflow - Framework :: Apache Airflow :: Provider - License :: OSI Approved :: Apache Software License -{%- for python_version in SUPPORTED_PYTHON_VERSIONS %} - Programming Language :: Python :: {{ python_version }} -{%- endfor %} - Topic :: System :: Monitoring -project_urls= - Documentation=https://airflow.apache.org/docs/{{ PACKAGE_PIP_NAME }}/{{RELEASE}}/ - Changelog=https://airflow.apache.org/docs/{{ PACKAGE_PIP_NAME }}/{{RELEASE}}/changelog.html - Bug Tracker=https://github.com/apache/airflow/issues - Source Code=https://github.com/apache/airflow - Slack Chat=https://s.apache.org/airflow-slack - Twitter=https://twitter.com/ApacheAirflow - YouTube=https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/ - -[bdist_wheel] -python_tag=py3 - -[options] -zip_safe = False -include_package_data = True -python_requires = ~=3.8 -packages = find: -setup_requires = {{ SETUP_REQUIREMENTS }} -install_requires = {{ INSTALL_REQUIREMENTS }} - -[options.entry_points] -apache_airflow_provider= - provider_info=airflow.providers.{{ PROVIDER_PACKAGE_ID }}.get_provider_info:get_provider_info -{%- if PLUGINS %} -airflow.plugins= -{%- for plugin in PLUGINS %} - {{ plugin.name }}={{ plugin.package_name }}:{{ plugin.class_name }} -{%- endfor %} -{%- endif %} - - -[files] -packages = airflow.providers.{{ PROVIDER_PACKAGE_ID }} diff --git a/dev/provider_packages/SETUP_TEMPLATE.py.jinja2 b/dev/provider_packages/SETUP_TEMPLATE.py.jinja2 deleted file mode 100644 index 4f4726532124c..0000000000000 --- a/dev/provider_packages/SETUP_TEMPLATE.py.jinja2 +++ /dev/null @@ -1,47 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE -# OVERWRITTEN WHEN PREPARING PACKAGES. -# -# IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE -# `SETUP_TEMPLATE.py.jinja2` IN the `dev/provider_packages` DIRECTORY - -"""Setup.py for the {{ PACKAGE_PIP_NAME }} package.""" - -from setuptools import find_namespace_packages, setup - -version = '{{ RELEASE_NO_LEADING_ZEROS }}' - - -def do_setup(): - """Perform the package {{ PACKAGE_PIP_NAME }} setup.""" - setup( - version=version, - extras_require={{ EXTRAS_REQUIREMENTS }}, - packages=find_namespace_packages( - include=['airflow.providers.{{ PROVIDER_PACKAGE_ID }}', - 'airflow.providers.{{ PROVIDER_PACKAGE_ID }}.*', - 'airflow.providers.{{ PROVIDER_PACKAGE_ID }}_vendor', - 'airflow.providers.{{ PROVIDER_PACKAGE_ID }}_vendor.*'], - ), - ) - - -if __name__ == "__main__": - do_setup() diff --git a/dev/provider_packages/prepare_provider_packages.py b/dev/provider_packages/prepare_provider_packages.py deleted file mode 100755 index f018c30c21a73..0000000000000 --- a/dev/provider_packages/prepare_provider_packages.py +++ /dev/null @@ -1,1277 +0,0 @@ -#!/usr/bin/env python3 - -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""Setup.py for the Provider packages of Airflow project.""" -from __future__ import annotations - -import glob -import json -import logging -import os -import re -import shutil -import subprocess -import sys -import tempfile -import textwrap -from collections import namedtuple -from contextlib import contextmanager -from datetime import datetime, timedelta -from enum import Enum -from functools import lru_cache -from pathlib import Path -from shutil import copyfile -from typing import Any, Generator, Iterable, NamedTuple - -import jinja2 -import jsonschema -import rich_click as click -import semver as semver -from black import Mode, TargetVersion, format_str, parse_pyproject_toml -from packaging.version import Version -from rich.console import Console -from rich.syntax import Syntax -from yaml import safe_load - -ALL_PYTHON_VERSIONS = ["3.8", "3.9", "3.10", "3.11"] - -MIN_AIRFLOW_VERSION = "2.5.0" - -INITIAL_CHANGELOG_CONTENT = """ - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -.. NOTE TO CONTRIBUTORS: - Please, only add notes to the Changelog just below the "Changelog" header when there - are some breaking changes and you want to add an explanation to the users on how they are supposed - to deal with them. The changelog is updated and maintained semi-automatically by release manager. - -``{{ package_name }}`` - -Changelog ---------- - -1.0.0 -..... - -Initial version of the provider. -""" - -HTTPS_REMOTE = "apache-https-for-providers" -HEAD_OF_HTTPS_REMOTE = f"{HTTPS_REMOTE}" - -MY_DIR_PATH = Path(__file__).parent -AIRFLOW_SOURCES_ROOT_PATH = MY_DIR_PATH.parents[1] -AIRFLOW_PATH = AIRFLOW_SOURCES_ROOT_PATH / "airflow" -DIST_PATH = AIRFLOW_SOURCES_ROOT_PATH / "dist" -PROVIDERS_PATH = AIRFLOW_PATH / "providers" -DOCUMENTATION_PATH = AIRFLOW_SOURCES_ROOT_PATH / "docs" - -DEPENDENCIES_JSON_FILE_PATH = AIRFLOW_SOURCES_ROOT_PATH / "generated" / "provider_dependencies.json" - -TARGET_PROVIDER_PACKAGES_PATH = AIRFLOW_SOURCES_ROOT_PATH / "provider_packages" -GENERATED_AIRFLOW_PATH = TARGET_PROVIDER_PACKAGES_PATH / "airflow" -GENERATED_PROVIDERS_PATH = GENERATED_AIRFLOW_PATH / "providers" - -PROVIDER_RUNTIME_DATA_SCHEMA_PATH = AIRFLOW_SOURCES_ROOT_PATH / "airflow" / "provider_info.schema.json" - -CROSS_PROVIDERS_DEPS = "cross-providers-deps" -DEPS = "deps" - -sys.path.insert(0, str(AIRFLOW_SOURCES_ROOT_PATH)) - - -ALL_DEPENDENCIES = json.loads(DEPENDENCIES_JSON_FILE_PATH.read_text()) - -# those imports need to come after the above sys.path.insert to make sure that Airflow -# sources are importable without having to add the airflow sources to the PYTHONPATH before -# running the script -from setup import PREINSTALLED_PROVIDERS, ALL_PROVIDERS # type: ignore[attr-defined] # isort:skip # noqa - -# Note - we do not test protocols as they are not really part of the official API of -# Apache Airflow - -logger = logging.getLogger(__name__) - -PY3 = sys.version_info[0] == 3 - -console = Console(width=400, color_system="standard") - - -class PluginInfo(NamedTuple): - name: str - package_name: str - class_name: str - - -class ProviderPackageDetails(NamedTuple): - provider_package_id: str - full_package_name: str - pypi_package_name: str - source_provider_package_path: str - documentation_provider_package_path: Path - provider_description: str - versions: list[str] - excluded_python_versions: list[str] - plugins: list[PluginInfo] - removed: bool - - -class EntityType(Enum): - Operators = "Operators" - Transfers = "Transfers" - Sensors = "Sensors" - Hooks = "Hooks" - Secrets = "Secrets" - - -@click.group(context_settings={"help_option_names": ["-h", "--help"], "max_content_width": 500}) -def cli(): - ... - - -option_skip_tag_check = click.option( - "--skip-tag-check/--no-skip-tag-check", - default=False, - is_flag=True, - help="Skip checking if the tag already exists in the remote repository", -) - -option_git_update = click.option( - "--git-update/--no-git-update", - default=True, - is_flag=True, - help=f"If the git remote {HTTPS_REMOTE} already exists, don't try to update it", -) - -option_package_format = click.option( - "--package-format", - type=click.Choice(["wheel", "sdist", "both"]), - help="Format of packages.", - default="wheel", - show_default=True, - envvar="PACKAGE_FORMAT", -) - -option_version_suffix = click.option( - "--version-suffix", - metavar="suffix", - help=textwrap.dedent( - """ - adds version suffix to version of the packages. - only useful when generating rc candidates for pypi.""" - ), -) -option_verbose = click.option( - "--verbose", - is_flag=True, - help="Print verbose information about performed steps", -) -argument_package_id = click.argument("package_id") - - -@contextmanager -def with_group(title: str) -> Generator[None, None, None]: - """ - If used in GitHub Action, creates an expandable group in the GitHub Action log. - Otherwise, display simple text groups. - - For more information, see: - https://docs.github.com/en/free-pro-team@latest/actions/reference/workflow-commands-for-github-actions#grouping-log-lines - """ - if os.environ.get("GITHUB_ACTIONS", "false") != "true": - console.print("#" * 10 + " [bright_blue]" + title + "[/] " + "#" * 10) - yield - return - console.print(f"::group::[bright_blue]{title}[/]") - yield - console.print("::endgroup::") - - -def get_source_airflow_folder() -> str: - """ - Returns source directory for whole airflow (from the main airflow project). - - :return: the folder path - """ - return os.path.abspath(AIRFLOW_SOURCES_ROOT_PATH) - - -def get_source_providers_folder() -> str: - """ - Returns source directory for providers (from the main airflow project). - - :return: the folder path - """ - return os.path.join(get_source_airflow_folder(), "airflow", "providers") - - -def get_target_folder() -> str: - """ - Returns target directory for providers (in the provider_packages folder) - - :return: the folder path - """ - return os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, "provider_packages")) - - -def get_target_providers_folder() -> str: - """ - Returns target directory for providers (in the provider_packages folder) - - :return: the folder path - """ - return os.path.abspath(os.path.join(get_target_folder(), "airflow", "providers")) - - -def get_target_providers_package_folder(provider_package_id: str) -> str: - """ - Returns target package folder based on package_id - - :return: the folder path - """ - return os.path.join(get_target_providers_folder(), *provider_package_id.split(".")) - - -def get_pip_package_name(provider_package_id: str) -> str: - """ - Returns PIP package name for the package id. - - :param provider_package_id: id of the package - :return: the name of pip package - """ - return "apache-airflow-providers-" + provider_package_id.replace(".", "-") - - -def get_wheel_package_name(provider_package_id: str) -> str: - """ - Returns Wheel package name for the package id. - - :param provider_package_id: id of the package - :return: the name of pip package - """ - return "apache_airflow_providers_" + provider_package_id.replace(".", "_") - - -def get_install_requirements(provider_package_id: str, version_suffix: str) -> str: - """ - Returns install requirements for the package. - - :param provider_package_id: id of the provider package - :param version_suffix: optional version suffix for packages - - :return: install requirements of the package - """ - - def apply_version_suffix(install_clause: str) -> str: - if install_clause.startswith("apache-airflow") and ">=" in install_clause and version_suffix != "": - # This is workaround for `pip` way of handling `--pre` installation switch. It apparently does - # not modify the meaning of `install_requires` to include also pre-releases, so we need to - # modify our internal provider and airflow package version references to include all pre-releases - # including all development releases. When you specify dependency as >= X.Y.Z, and you - # have packages X.Y.Zdev0 or X.Y.Zrc1 in a local file, such package is not considered - # as fulfilling the requirement even if `--pre` switch is used. - return install_clause + ".dev0" - return install_clause - - if provider_package_id in get_removed_provider_ids(): - provider_info = get_provider_info_from_provider_yaml(provider_package_id) - dependencies = provider_info["dependencies"] - else: - dependencies = ALL_DEPENDENCIES[provider_package_id][DEPS] - install_requires = [apply_version_suffix(clause) for clause in dependencies] - return "".join(f"\n {ir}" for ir in install_requires) - - -def get_setup_requirements() -> str: - """ - Returns setup requirements (common for all package for now). - :return: setup requirements - """ - return """ - setuptools - wheel -""" - - -def get_package_extras(provider_package_id: str) -> dict[str, list[str]]: - """ - Finds extras for the package specified. - - :param provider_package_id: id of the package - """ - if provider_package_id == "providers": - return {} - if provider_package_id in get_removed_provider_ids(): - return {} - extras_dict: dict[str, list[str]] = { - module: [get_pip_package_name(module)] - for module in ALL_DEPENDENCIES[provider_package_id][CROSS_PROVIDERS_DEPS] - } - provider_yaml_dict = get_provider_yaml(provider_package_id) - additional_extras = provider_yaml_dict.get("additional-extras") - if additional_extras: - for entry in additional_extras: - name = entry["name"] - dependencies = entry["dependencies"] - if name in extras_dict: - # remove non-versioned dependencies if versioned ones are coming - existing_dependencies = set(extras_dict[name]) - for new_dependency in dependencies: - for dependency in existing_dependencies: - # remove extra if exists as non-versioned one - if new_dependency.startswith(dependency): - extras_dict[name].remove(dependency) - break - extras_dict[name].append(new_dependency) - else: - extras_dict[name] = dependencies - return extras_dict - - -def render_template( - template_name: str, - context: dict[str, Any], - extension: str, - autoescape: bool = True, - keep_trailing_newline: bool = False, -) -> str: - """ - Renders template based on its name. Reads the template from _TEMPLATE.md.jinja2 in current dir. - :param template_name: name of the template to use - :param context: Jinja2 context - :param extension: Target file extension - :param autoescape: Whether to autoescape HTML - :param keep_trailing_newline: Whether to keep the newline in rendered output - :return: rendered template - """ - import jinja2 - - template_loader = jinja2.FileSystemLoader(searchpath=MY_DIR_PATH) - template_env = jinja2.Environment( - loader=template_loader, - undefined=jinja2.StrictUndefined, - autoescape=autoescape, - keep_trailing_newline=keep_trailing_newline, - ) - template = template_env.get_template(f"{template_name}_TEMPLATE{extension}.jinja2") - content: str = template.render(context) - return content - - -PR_PATTERN = re.compile(r".*\(#(\d+)\)") - - -class Change(NamedTuple): - """Stores details about commits""" - - full_hash: str - short_hash: str - date: str - version: str - message: str - message_without_backticks: str - pr: str | None - - -def get_change_from_line(line: str, version: str): - split_line = line.split(" ", maxsplit=3) - message = split_line[3] - pr = None - pr_match = PR_PATTERN.match(message) - if pr_match: - pr = pr_match.group(1) - return Change( - full_hash=split_line[0], - short_hash=split_line[1], - date=split_line[2], - version=version, - message=message, - message_without_backticks=message.replace("`", "'").replace("&39;", "'"), - pr=pr, - ) - - -def convert_pip_requirements_to_table(requirements: Iterable[str], markdown: bool = True) -> str: - """ - Converts PIP requirement list to a Markdown table. - :param requirements: requirements list - :param markdown: if True, Markdown format is used else rst - :return: formatted table - """ - from tabulate import tabulate - - headers = ["PIP package", "Version required"] - table_data = [] - for dependency in requirements: - found = re.match(r"(^[^<=>~]*)([^<=>~]?.*)$", dependency) - if found: - package = found.group(1) - version_required = found.group(2) - if version_required != "": - version_required = f"`{version_required}`" if markdown else f"``{version_required}``" - table_data.append((f"`{package}`" if markdown else f"``{package}``", version_required)) - else: - table_data.append((dependency, "")) - return tabulate(table_data, headers=headers, tablefmt="pipe" if markdown else "rst") - - -def convert_cross_package_dependencies_to_table( - cross_package_dependencies: list[str], - markdown: bool = True, -) -> str: - """ - Converts cross-package dependencies to a Markdown table - :param cross_package_dependencies: list of cross-package dependencies - :param markdown: if True, Markdown format is used else rst - :return: formatted table - """ - from tabulate import tabulate - - headers = ["Dependent package", "Extra"] - table_data = [] - prefix = "apache-airflow-providers-" - base_url = "https://airflow.apache.org/docs/" - for dependency in cross_package_dependencies: - pip_package_name = f"{prefix}{dependency.replace('.','-')}" - url_suffix = f"{dependency.replace('.','-')}" - if markdown: - url = f"[{pip_package_name}]({base_url}{url_suffix})" - else: - url = f"`{pip_package_name} <{base_url}{prefix}{url_suffix}>`_" - table_data.append((url, f"`{dependency}`" if markdown else f"``{dependency}``")) - return tabulate(table_data, headers=headers, tablefmt="pipe" if markdown else "rst") - - -LICENCE = """ -""" - -LICENCE_RST = """ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -""" - -""" -Keeps information about historical releases. -""" -ReleaseInfo = namedtuple( - "ReleaseInfo", "release_version release_version_no_leading_zeros last_commit_hash content file_name" -) - - -def strip_leading_zeros(version: str) -> str: - """ - Strips leading zeros from version number. - - This converts 1974.04.03 to 1974.4.3 as the format with leading month and day zeros is not accepted - by PIP versioning. - - :param version: version number in CALVER format (potentially with leading 0s in date and month) - :return: string with leading 0s after dot replaced. - """ - return ".".join(str(int(i)) for i in version.split(".")) - - -def get_previous_release_info( - previous_release_version: str | None, past_releases: list[ReleaseInfo], current_release_version: str -) -> str | None: - """Find previous release. - - In case we are re-running current release, we assume that last release was - the previous one. This is needed so that we can generate list of changes - since the previous release. - - :param previous_release_version: known last release version - :param past_releases: list of past releases - :param current_release_version: release that we are working on currently - """ - previous_release = None - if previous_release_version == current_release_version: - # Re-running for current release - use previous release as base for git log - if len(past_releases) > 1: - previous_release = past_releases[1].last_commit_hash - else: - previous_release = past_releases[0].last_commit_hash if past_releases else None - return previous_release - - -def check_if_release_version_ok( - past_releases: list[ReleaseInfo], - current_release_version: str, -) -> tuple[str, str | None]: - """Check if the release version passed is not later than the last release version. - - :param past_releases: all past releases (if there are any) - :param current_release_version: release version to check - :return: Tuple of current/previous_release (previous might be None if there are no releases) - """ - previous_release_version = past_releases[0].release_version if past_releases else None - if current_release_version == "": - if previous_release_version: - current_release_version = previous_release_version - else: - current_release_version = (datetime.today() + timedelta(days=5)).strftime("%Y.%m.%d") - if previous_release_version: - if Version(current_release_version) < Version(previous_release_version): - console.print( - f"[red]The release {current_release_version} must be not less than " - f"{previous_release_version} - last release for the package[/]" - ) - raise Exception("Bad release version") - return current_release_version, previous_release_version - - -def get_cross_provider_dependent_packages(provider_package_id: str) -> list[str]: - """Returns cross-provider dependencies for the package. - - :param provider_package_id: package id - :return: list of cross-provider dependencies - """ - if provider_package_id in get_removed_provider_ids(): - return [] - return ALL_DEPENDENCIES[provider_package_id][CROSS_PROVIDERS_DEPS] - - -def make_current_directory_safe(verbose: bool): - """Makes current directory safe for Git. - - New git checks if git ownership for the folder is not manipulated with. We - are running this command only inside the container where the directory is - mounted from "regular" user to "root" user which is used inside the - container, so this is quite ok to assume the directory it is used is safe. - - It's also ok to leave it as safe - it is a global option inside the - container so it will disappear when we exit. - - :param verbose: whether to print commands being executed - """ - safe_dir_remove_command = ["git", "config", "--global", "--unset-all", "safe.directory"] - if verbose: - console.print(f"Running command: '{' '.join(safe_dir_remove_command)}'") - # we ignore result of this call - subprocess.call(safe_dir_remove_command) - safe_dir_add_command = ["git", "config", "--global", "--add", "safe.directory", "/opt/airflow"] - if verbose: - console.print(f"Running command: '{' '.join(safe_dir_add_command)}'") - subprocess.check_call(safe_dir_add_command) - - -def get_git_tag_check_command(tag: str) -> list[str]: - """Get git command to check if tag exits. - - :param tag: Tag to check - :return: git command to run - """ - return [ - "git", - "rev-parse", - tag, - ] - - -def get_source_package_path(provider_package_id: str) -> str: - """Retrieves source package path from package id. - - :param provider_package_id: id of the package - :return: path of the providers folder - """ - return os.path.join(PROVIDERS_PATH, *provider_package_id.split(".")) - - -def get_documentation_package_path(provider_package_id: str) -> Path: - """Retrieves documentation package path from package id. - - :param provider_package_id: id of the package - :return: path of the documentation folder - """ - return DOCUMENTATION_PATH / f"apache-airflow-providers-{provider_package_id.replace('.','-')}" - - -def get_generated_package_path(provider_package_id: str) -> str: - """Retrieves generated package path from package id. - - :param provider_package_id: id of the package - :return: path of the providers folder - """ - provider_package_path = os.path.join(GENERATED_PROVIDERS_PATH, *provider_package_id.split(".")) - return provider_package_path - - -def validate_provider_info_with_runtime_schema(provider_info: dict[str, Any]) -> None: - """Validates provider info against the runtime schema. - - This way we check if the provider info in the packages is future-compatible. - The Runtime Schema should only change when there is a major version change. - - :param provider_info: provider info to validate - """ - - with open(PROVIDER_RUNTIME_DATA_SCHEMA_PATH) as schema_file: - schema = json.load(schema_file) - try: - jsonschema.validate(provider_info, schema=schema) - except jsonschema.ValidationError as ex: - console.print("[red]Provider info not validated against runtime schema[/]") - raise Exception( - "Error when validating schema. The schema must be compatible with " - "airflow/provider_info.schema.json.", - ex, - ) - - -def get_provider_yaml(provider_package_id: str) -> dict[str, Any]: - """Retrieves provider info from the provider YAML file. - - The provider yaml file contains more information than provider_info that is - used at runtime. This method converts the full provider yaml file into - stripped-down provider info and validates it against deprecated 2.0.0 schema - and runtime schema. - - :param provider_package_id: package id to retrieve provider.yaml from - :return: provider_info dictionary - """ - provider_yaml_file_name = os.path.join(get_source_package_path(provider_package_id), "provider.yaml") - if not os.path.exists(provider_yaml_file_name): - raise Exception(f"The provider.yaml file is missing: {provider_yaml_file_name}") - with open(provider_yaml_file_name) as provider_file: - provider_yaml_dict = safe_load(provider_file) - return provider_yaml_dict - - -def get_provider_info_from_provider_yaml(provider_package_id: str) -> dict[str, Any]: - """Retrieves provider info from the provider yaml file. - - :param provider_package_id: package id to retrieve provider.yaml from - :return: provider_info dictionary - """ - provider_yaml_dict = get_provider_yaml(provider_package_id=provider_package_id) - validate_provider_info_with_runtime_schema(provider_yaml_dict) - return provider_yaml_dict - - -def get_version_tag(version: str, provider_package_id: str, version_suffix: str = ""): - if version_suffix is None: - version_suffix = "" - return f"providers-{provider_package_id.replace('.','-')}/{version}{version_suffix}" - - -def get_provider_details(provider_package_id: str) -> ProviderPackageDetails: - provider_info = get_provider_info_from_provider_yaml(provider_package_id) - plugins: list[PluginInfo] = [] - if "plugins" in provider_info: - for plugin in provider_info["plugins"]: - package_name, class_name = plugin["plugin-class"].rsplit(".", maxsplit=1) - plugins.append( - PluginInfo( - name=plugin["name"], - package_name=package_name, - class_name=class_name, - ) - ) - return ProviderPackageDetails( - provider_package_id=provider_package_id, - full_package_name=f"airflow.providers.{provider_package_id}", - pypi_package_name=f"apache-airflow-providers-{provider_package_id.replace('.', '-')}", - source_provider_package_path=get_source_package_path(provider_package_id), - documentation_provider_package_path=get_documentation_package_path(provider_package_id), - provider_description=provider_info["description"], - versions=provider_info["versions"], - excluded_python_versions=provider_info.get("excluded-python-versions") or [], - plugins=plugins, - removed=provider_info.get("removed", False), - ) - - -def get_provider_requirements(provider_package_id: str) -> list[str]: - provider_yaml = get_provider_yaml(provider_package_id) - return provider_yaml["dependencies"] - - -def get_provider_jinja_context( - provider_info: dict[str, Any], - provider_details: ProviderPackageDetails, - current_release_version: str, - version_suffix: str, -): - verify_provider_package(provider_details.provider_package_id) - changelog_path = verify_changelog_exists(provider_details.provider_package_id) - cross_providers_dependencies = get_cross_provider_dependent_packages( - provider_package_id=provider_details.provider_package_id - ) - release_version_no_leading_zeros = strip_leading_zeros(current_release_version) - pip_requirements_table = convert_pip_requirements_to_table( - get_provider_requirements(provider_details.provider_package_id) - ) - pip_requirements_table_rst = convert_pip_requirements_to_table( - get_provider_requirements(provider_details.provider_package_id), markdown=False - ) - cross_providers_dependencies_table_rst = convert_cross_package_dependencies_to_table( - cross_providers_dependencies, markdown=False - ) - with open(changelog_path) as changelog_file: - changelog = changelog_file.read() - supported_python_versions = [ - p for p in ALL_PYTHON_VERSIONS if p not in provider_details.excluded_python_versions - ] - python_requires = "~=3.8" - for p in provider_details.excluded_python_versions: - python_requires += f", !={p}" - min_airflow_version = MIN_AIRFLOW_VERSION - for dependency in provider_info["dependencies"]: - if dependency.startswith("apache-airflow>="): - current_min_airflow_version = dependency.split(">=")[1] - if Version(current_min_airflow_version) > Version(min_airflow_version): - min_airflow_version = current_min_airflow_version - context: dict[str, Any] = { - "ENTITY_TYPES": list(EntityType), - "README_FILE": "README.rst", - "PROVIDER_PACKAGE_ID": provider_details.provider_package_id, - "PACKAGE_PIP_NAME": get_pip_package_name(provider_details.provider_package_id), - "PACKAGE_WHEEL_NAME": get_wheel_package_name(provider_details.provider_package_id), - "FULL_PACKAGE_NAME": provider_details.full_package_name, - "PROVIDER_PATH": provider_details.full_package_name.replace(".", "/"), - "RELEASE": current_release_version, - "RELEASE_NO_LEADING_ZEROS": release_version_no_leading_zeros, - "VERSION_SUFFIX": version_suffix or "", - "CROSS_PROVIDERS_DEPENDENCIES": cross_providers_dependencies, - "PIP_REQUIREMENTS": get_provider_requirements(provider_details.provider_package_id), - "PROVIDER_TYPE": "Provider", - "PROVIDERS_FOLDER": "providers", - "PROVIDER_DESCRIPTION": provider_details.provider_description, - "INSTALL_REQUIREMENTS": get_install_requirements( - provider_package_id=provider_details.provider_package_id, version_suffix=version_suffix - ), - "SETUP_REQUIREMENTS": get_setup_requirements(), - "EXTRAS_REQUIREMENTS": get_package_extras(provider_package_id=provider_details.provider_package_id), - "CROSS_PROVIDERS_DEPENDENCIES_TABLE_RST": cross_providers_dependencies_table_rst, - "PIP_REQUIREMENTS_TABLE": pip_requirements_table, - "PIP_REQUIREMENTS_TABLE_RST": pip_requirements_table_rst, - "PROVIDER_INFO": provider_info, - "CHANGELOG_RELATIVE_PATH": os.path.relpath( - provider_details.source_provider_package_path, - provider_details.documentation_provider_package_path, - ), - "CHANGELOG": changelog, - "SUPPORTED_PYTHON_VERSIONS": supported_python_versions, - "PYTHON_REQUIRES": python_requires, - "PLUGINS": provider_details.plugins, - "MIN_AIRFLOW_VERSION": min_airflow_version, - "PREINSTALLED_PROVIDER": provider_details.provider_package_id in PREINSTALLED_PROVIDERS, - "PROVIDER_REMOVED": provider_details.removed, - } - return context - - -def prepare_readme_file(context): - readme_content = LICENCE_RST + render_template( - template_name="PROVIDER_README", context=context, extension=".rst" - ) - readme_file_path = os.path.join(TARGET_PROVIDER_PACKAGES_PATH, "README.rst") - with open(readme_file_path, "w") as readme_file: - readme_file.write(readme_content) - - -def update_setup_files( - provider_package_id: str, - version_suffix: str, -): - """Updates generated setup.cfg/setup.py/manifest.in/provider_info for packages. - - :param provider_package_id: id of the package - :param version_suffix: version suffix corresponding to the version in the code - :returns False if the package should be skipped, True if everything generated properly - """ - verify_provider_package(provider_package_id) - provider_details = get_provider_details(provider_package_id) - provider_info = get_provider_info_from_provider_yaml(provider_package_id) - current_release_version = provider_details.versions[0] - jinja_context = get_provider_jinja_context( - provider_info=provider_info, - provider_details=provider_details, - current_release_version=current_release_version, - version_suffix=version_suffix, - ) - console.print() - console.print(f"Generating setup files for {provider_package_id}") - console.print() - prepare_setup_py_file(jinja_context) - prepare_setup_cfg_file(jinja_context) - prepare_get_provider_info_py_file(jinja_context, provider_package_id) - prepare_manifest_in_file(jinja_context) - prepare_readme_file(jinja_context) - return True - - -def replace_content(file_path, old_text, new_text, provider_package_id): - if new_text != old_text: - _, temp_file_path = tempfile.mkstemp() - try: - if os.path.isfile(file_path): - copyfile(file_path, temp_file_path) - with open(file_path, "w") as readme_file: - readme_file.write(new_text) - console.print() - console.print(f"Generated {file_path} file for the {provider_package_id} provider") - console.print() - if old_text != "": - subprocess.call(["diff", "--color=always", temp_file_path, file_path]) - finally: - os.remove(temp_file_path) - - -AUTOMATICALLY_GENERATED_MARKER = "AUTOMATICALLY GENERATED" -AUTOMATICALLY_GENERATED_CONTENT = ( - f".. THE REMAINDER OF THE FILE IS {AUTOMATICALLY_GENERATED_MARKER}. " - f"IT WILL BE OVERWRITTEN AT RELEASE TIME!" -) - - -# Taken from pygrep hooks we are using in pre-commit -# https://github.com/pre-commit/pygrep-hooks/blob/main/.pre-commit-hooks.yaml -BACKTICKS_CHECK = re.compile(r"^(?! ).*(^| )`[^`]+`([^_]|$)", re.MULTILINE) - - -def _update_file( - context: dict[str, Any], - template_name: str, - extension: str, - file_name: str, - provider_package_id: str, - target_path: Path, - regenerate_missing_docs: bool, -) -> bool: - file_path = target_path / file_name - if regenerate_missing_docs and file_path.exists(): - return True - new_text = render_template( - template_name=template_name, context=context, extension=extension, keep_trailing_newline=True - ) - file_path = target_path / file_name - old_text = "" - if os.path.isfile(file_path): - with open(file_path) as readme_file_read: - old_text = readme_file_read.read() - replace_content(file_path, old_text, new_text, provider_package_id) - index_path = target_path / "index.rst" - if not index_path.exists(): - console.print(f"[red]ERROR! The index must exist for the provider docs: {index_path}") - sys.exit(1) - - expected_link_in_index = f"<{file_name.split('.')[0]}>" - if expected_link_in_index not in index_path.read_text(): - console.print( - f"\n[red]ERROR! The {index_path} must contain " - f"link to the generated documentation:[/]\n\n" - f"[yellow]{expected_link_in_index}[/]\n\n" - f"[bright_blue]Please make sure to add it to {index_path}.\n" - ) - - console.print(f"Checking for backticks correctly generated in: {file_path}") - match = BACKTICKS_CHECK.search(file_path.read_text()) - if match: - console.print( - f"\n[red]ERROR: Single backticks (`) found in {file_path}:[/]\n\n" - f"[yellow]{match.group(0)}[/]\n\n" - f"[bright_blue]Please fix them by replacing with double backticks (``).[/]\n" - ) - return False - - # TODO: uncomment me. Linting revealed that our already generated provider docs have duplicate links - # in the generated files, we should fix those and uncomment linting as separate step - so that - # we do not hold current release for fixing the docs. - # console.print(f"Linting: {file_path}") - # errors = restructuredtext_lint.lint_file(file_path) - # real_errors = False - # if errors: - # for error in errors: - # # Skip known issue: linter with doc role similar to https://github.com/OCA/pylint-odoo/issues/38 - # if ( - # 'No role entry for "doc"' in error.message - # or 'Unknown interpreted text role "doc"' in error.message - # ): - # continue - # real_errors = True - # console.print(f"* [red] {error.message}") - # if real_errors: - # console.print(f"\n[red] Errors found in {file_path}") - # return False - - console.print(f"[green]Generated {file_path} for {provider_package_id} is OK[/]") - - return True - - -@lru_cache(maxsize=None) -def black_mode() -> Mode: - config = parse_pyproject_toml(os.path.join(AIRFLOW_SOURCES_ROOT_PATH, "pyproject.toml")) - target_versions = {TargetVersion[val.upper()] for val in config.get("target_version", ())} - return Mode( - target_versions=target_versions, - line_length=config.get("line_length", Mode.line_length), - ) - - -def black_format(content) -> str: - return format_str(content, mode=black_mode()) - - -def prepare_setup_py_file(context): - setup_py_template_name = "SETUP" - setup_py_file_path = os.path.abspath(os.path.join(get_target_folder(), "setup.py")) - setup_py_content = render_template( - template_name=setup_py_template_name, context=context, extension=".py", autoescape=False - ) - with open(setup_py_file_path, "w") as setup_py_file: - setup_py_file.write(black_format(setup_py_content)) - - -def prepare_setup_cfg_file(context): - setup_cfg_template_name = "SETUP" - setup_cfg_file_path = os.path.abspath(os.path.join(get_target_folder(), "setup.cfg")) - setup_cfg_content = render_template( - template_name=setup_cfg_template_name, - context=context, - extension=".cfg", - autoescape=False, - keep_trailing_newline=True, - ) - with open(setup_cfg_file_path, "w") as setup_cfg_file: - setup_cfg_file.write(setup_cfg_content) - - -def prepare_get_provider_info_py_file(context, provider_package_id: str): - get_provider_template_name = "get_provider_info" - get_provider_file_path = os.path.abspath( - os.path.join( - get_target_providers_package_folder(provider_package_id), - "get_provider_info.py", - ) - ) - get_provider_content = render_template( - template_name=get_provider_template_name, - context=context, - extension=".py", - autoescape=False, - keep_trailing_newline=True, - ) - with open(get_provider_file_path, "w") as get_provider_file: - get_provider_file.write(black_format(get_provider_content)) - - -def prepare_manifest_in_file(context): - target = os.path.abspath(os.path.join(get_target_folder(), "MANIFEST.in")) - content = render_template( - template_name="MANIFEST", - context=context, - extension=".in", - autoescape=False, - keep_trailing_newline=True, - ) - with open(target, "w") as fh: - fh.write(content) - - -def get_all_providers() -> list[str]: - """Returns all providers for regular packages. - - :return: list of providers that are considered for provider packages - """ - return list(ALL_PROVIDERS) - - -def get_removed_provider_ids() -> list[str]: - """ - Yields the ids of suspended providers. - """ - import yaml - - removed_provider_ids = [] - for provider_path in PROVIDERS_PATH.rglob("provider.yaml"): - provider_yaml = yaml.safe_load(provider_path.read_text()) - package_name = provider_yaml.get("package-name") - if provider_yaml.get("removed", False): - if not provider_yaml.get("suspended"): - console.print( - f"[error]The provider {package_name} is marked for removal in provider.yaml, but " - f"not suspended. Please suspend the provider first before removing it.\n" - ) - sys.exit(1) - removed_provider_ids.append(package_name[len("apache-airflow-providers-") :].replace("-", ".")) - return removed_provider_ids - - -def verify_provider_package(provider_package_id: str) -> None: - """Verifies if the provider package is good. - - :param provider_package_id: package id to verify - """ - if provider_package_id not in get_all_providers(): - if provider_package_id in get_removed_provider_ids(): - console.print() - console.print( - f"[yellow]The package: {provider_package_id} is suspended, but " - f"since you asked for it, it will be built [/]" - ) - console.print() - else: - console.print(f"[red]Wrong package name: {provider_package_id}[/]") - console.print("Use one of:") - console.print(get_all_providers()) - console.print(f"[red]The package {provider_package_id} is not a provider package.") - sys.exit(1) - - -def verify_changelog_exists(package: str) -> str: - provider_details = get_provider_details(package) - changelog_path = os.path.join(provider_details.source_provider_package_path, "CHANGELOG.rst") - if not os.path.isfile(changelog_path): - console.print(f"\n[red]ERROR: Missing {changelog_path}[/]\n") - console.print("[info]Please add the file with initial content:") - console.print("----- START COPYING AFTER THIS LINE ------- ") - processed_changelog = jinja2.Template(INITIAL_CHANGELOG_CONTENT, autoescape=True).render( - package_name=provider_details.pypi_package_name, - ) - syntax = Syntax( - processed_changelog, - "rst", - theme="ansi_dark", - ) - console.print(syntax) - console.print("----- END COPYING BEFORE THIS LINE ------- ") - sys.exit(1) - return changelog_path - - -@cli.command() -def list_providers_packages(): - """List all provider packages.""" - providers = get_all_providers() - # if provider needs to be not considered in release add it here - # this is useful for cases where provider is WIP for a long period thus we don't want to release it yet. - providers_to_remove_from_release = [] - for provider in providers: - if provider not in providers_to_remove_from_release: - console.print(provider) - - -def tag_exists_for_version(provider_package_id: str, current_tag: str, verbose: bool): - provider_details = get_provider_details(provider_package_id) - if verbose: - console.print(f"Checking if tag `{current_tag}` exists.") - if not subprocess.call( - get_git_tag_check_command(current_tag), - cwd=provider_details.source_provider_package_path, - stderr=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - ): - if verbose: - console.print(f"Tag `{current_tag}` exists.") - return True - if verbose: - console.print(f"Tag `{current_tag}` does not exist.") - return False - - -@cli.command() -@option_version_suffix -@option_git_update -@argument_package_id -@option_verbose -@option_skip_tag_check -def generate_setup_files( - version_suffix: str, git_update: bool, package_id: str, verbose: bool, skip_tag_check: bool -): - """Generates setup files for the package. - - See `list-providers-packages` subcommand for the possible PACKAGE_ID values. - """ - provider_package_id = package_id - with with_group(f"Generate setup files for '{provider_package_id}'"): - if not skip_tag_check: - current_tag = get_current_tag(provider_package_id, version_suffix, git_update, verbose) - if tag_exists_for_version(provider_package_id, current_tag, verbose): - console.print(f"[yellow]The tag {current_tag} exists. Not preparing the package.[/]") - sys.exit(64) - if update_setup_files(provider_package_id, version_suffix): - console.print(f"[green]Generated regular package setup files for {provider_package_id}[/]") - else: - sys.exit(64) - - -def get_current_tag(provider_package_id: str, suffix: str, git_update: bool, verbose: bool): - verify_provider_package(provider_package_id) - provider_info = get_provider_info_from_provider_yaml(provider_package_id) - versions: list[str] = provider_info["versions"] - current_version = versions[0] - current_tag = get_version_tag(current_version, provider_package_id, suffix) - return current_tag - - -def cleanup_remnants(verbose: bool): - if verbose: - console.print("Cleaning remnants") - files = glob.glob("*.egg-info") - for file in files: - shutil.rmtree(file, ignore_errors=True) - files = glob.glob("build") - for file in files: - shutil.rmtree(file, ignore_errors=True) - - -def verify_setup_cfg_prepared(provider_package): - with open("setup.cfg") as f: - setup_content = f.read() - search_for = f"providers-{provider_package.replace('.','-')} for Apache Airflow" - if search_for not in setup_content: - console.print( - f"[red]The setup.py is probably prepared for another package. " - f"It does not contain [bold]{search_for}[/bold]![/]" - ) - console.print( - f"\nRun:\n\n[bold]./dev/provider_packages/prepare_provider_packages.py " - f"generate-setup-files {provider_package}[/bold]\n" - ) - raise Exception("Wrong setup!") - - -@cli.command() -@option_package_format -@option_git_update -@option_version_suffix -@argument_package_id -@option_verbose -@option_skip_tag_check -def build_provider_packages( - package_format: str, - git_update: bool, - version_suffix: str, - package_id: str, - verbose: bool, - skip_tag_check: bool, -): - """Builds provider package. - - See `list-providers-packages` subcommand for the possible PACKAGE_ID values. - """ - - import tempfile - - # we cannot use context managers because if the directory gets deleted (which bdist_wheel does), - # the context manager will throw an exception when trying to delete it again - tmp_build_dir = tempfile.TemporaryDirectory().name - tmp_dist_dir = tempfile.TemporaryDirectory().name - try: - provider_package_id = package_id - with with_group(f"Prepare provider package for '{provider_package_id}'"): - if not skip_tag_check and (version_suffix.startswith("rc") or version_suffix == ""): - # For RC and official releases we check if the "officially released" version exists - # and skip the released if it was. This allows to skip packages that have not been - # marked for release. For "dev" suffixes, we always build all packages - released_tag = get_current_tag(provider_package_id, "", git_update, verbose) - if tag_exists_for_version(provider_package_id, released_tag, verbose): - console.print(f"[yellow]The tag {released_tag} exists. Skipping the package.[/]") - return False - console.print(f"Changing directory to {TARGET_PROVIDER_PACKAGES_PATH}") - os.chdir(TARGET_PROVIDER_PACKAGES_PATH) - cleanup_remnants(verbose) - provider_package = package_id - verify_setup_cfg_prepared(provider_package) - - console.print(f"Building provider package: {provider_package} in format {package_format}") - command: list[str] = ["python3", "setup.py", "build", "--build-temp", tmp_build_dir] - if version_suffix is not None: - command.extend(["egg_info", "--tag-build", version_suffix]) - if package_format in ["sdist", "both"]: - command.append("sdist") - if package_format in ["wheel", "both"]: - command.extend(["bdist_wheel", "--bdist-dir", tmp_dist_dir]) - console.print(f"Executing command: '{' '.join(command)}'") - try: - subprocess.check_call(args=command, stdout=subprocess.DEVNULL) - except subprocess.CalledProcessError as ex: - console.print("[red]The command returned an error %s", ex) - sys.exit(ex.returncode) - console.print( - f"[green]Prepared provider package {provider_package} in format {package_format}[/]" - ) - finally: - shutil.rmtree(tmp_build_dir, ignore_errors=True) - shutil.rmtree(tmp_dist_dir, ignore_errors=True) - - -if __name__ == "__main__": - # The cli exit code is: - # * 0 in case of success - # * 1 in case of error - # * 64 in case of skipped package - # * 65 in case user decided to quit - # * 66 in case package has doc-only changes - try: - cli() - except KeyboardInterrupt: - print("Interrupted") - try: - sys.exit(65) - except SystemExit: - os._exit(65) diff --git a/images/breeze/output_release-management_prepare-provider-packages.svg b/images/breeze/output_release-management_prepare-provider-packages.svg index f465122c13d0d..9bdb76be04754 100644 --- a/images/breeze/output_release-management_prepare-provider-packages.svg +++ b/images/breeze/output_release-management_prepare-provider-packages.svg @@ -1,4 +1,4 @@ - +