diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 01cf502daa1e9..b840699fbad2c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -814,7 +814,7 @@ jobs: run: rm -fv ./dist/* - name: "Prepare provider documentation" run: > - breeze release-management prepare-provider-documentation + breeze release-management prepare-provider-documentation --non-interactive ${{ needs.build-info.outputs.affected-providers-list-as-string }} - name: "Prepare provider packages: wheel" run: > diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1a8b98f03f4cd..19e85aef8bd72 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1004,7 +1004,7 @@ repos: entry: ./scripts/ci/pre_commit/pre_commit_mypy.py files: ^dev/.*\.py$ require_serial: true - additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml', 'jsonschema'] - id: mypy-core name: Run mypy for core language: python @@ -1012,7 +1012,7 @@ repos: files: \.py$ exclude: ^.*/.*_vendor/|^airflow/migrations|^airflow/providers|^dev|^docs|^provider_packages|^tests/providers|^tests/system/providers|^tests/dags/test_imports.py require_serial: true - additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml', 'jsonschema'] - id: mypy-providers name: Run mypy for providers language: python @@ -1020,7 +1020,7 @@ repos: files: ^airflow/providers/.*\.py$|^tests/providers/.*\.py$|^tests/system/providers/.*\.py$ exclude: ^.*/.*_vendor/ require_serial: true - additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml', 'jsonschema'] - id: mypy-docs name: Run mypy for /docs/ folder language: python @@ -1028,7 +1028,7 @@ repos: files: ^docs/.*\.py$ exclude: ^docs/rtd-deprecation require_serial: true - additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml', 'jsonschema'] - id: check-provider-yaml-valid name: Validate provider.yaml files entry: ./scripts/ci/pre_commit/pre_commit_check_provider_yaml_files.py diff --git a/.rat-excludes b/.rat-excludes index b37f97594cc84..751742b1afc59 100644 --- a/.rat-excludes +++ b/.rat-excludes @@ -142,3 +142,6 @@ doap_airflow.rdf # nvm (Node Version Manager) .nvmrc + +# PKG-INFO file +PKG-INFO diff --git a/BREEZE.rst b/BREEZE.rst index b9f4136337fd2..04eecace44397 100644 --- a/BREEZE.rst +++ b/BREEZE.rst @@ -2047,12 +2047,6 @@ The below example perform documentation preparation for provider packages. breeze release-management prepare-provider-documentation -By default, the documentation preparation runs package verification to check if all packages are -importable, but you can add ``--skip-package-verification`` to skip it. - -.. code-block:: bash - - breeze release-management prepare-provider-documentation --skip-package-verification You can also add ``--answer yes`` to perform non-interactive build. diff --git a/NOTICE b/NOTICE index 84c77cd4fc12c..33371e44a76a4 100644 --- a/NOTICE +++ b/NOTICE @@ -1,5 +1,5 @@ Apache Airflow -Copyright 2016-2021 The Apache Software Foundation +Copyright 2016-2023 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/airflow/www/webpack.config.js b/airflow/www/webpack.config.js index fc5c6a6497679..6ac1f3a208890 100644 --- a/airflow/www/webpack.config.js +++ b/airflow/www/webpack.config.js @@ -40,7 +40,7 @@ const BUILD_DIR = path.resolve(__dirname, "./static/dist"); // Convert licenses json into a standard format for LICENSES.txt const formatLicenses = (packages) => { let text = `Apache Airflow -Copyright 2016-2021 The Apache Software Foundation +Copyright 2016-2023 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/chart/NOTICE b/chart/NOTICE index 3f68897ba6559..ff6e647c9ebed 100644 --- a/chart/NOTICE +++ b/chart/NOTICE @@ -1,5 +1,5 @@ Apache Airflow -Copyright 2016-2021 The Apache Software Foundation +Copyright 2016-2023 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/dev/README_RELEASE_PROVIDER_PACKAGES.md b/dev/README_RELEASE_PROVIDER_PACKAGES.md index 9569c8db94866..62f26309a433b 100644 --- a/dev/README_RELEASE_PROVIDER_PACKAGES.md +++ b/dev/README_RELEASE_PROVIDER_PACKAGES.md @@ -143,6 +143,10 @@ separately this command: breeze release-management prepare-provider-documentation qubole ``` +In case you want to **just** regenerate the documentation because you fixed something in the templates, add +`--reapply-templates` flag to the command above. If you want to just update min airflow version for all +packages, you can use `--only-min-version-update` flag - this will only update the min version in the +`__init__.py` files of the packages and package documentation without bumping the provider versions. This command will not only prepare documentation but will also help the release manager to review changes implemented in all providers, and determine which of the providers should be released. For each diff --git a/dev/breeze/README.md b/dev/breeze/README.md index a5d495bb1522b..1f4e7c65c9802 100644 --- a/dev/breeze/README.md +++ b/dev/breeze/README.md @@ -66,6 +66,6 @@ PLEASE DO NOT MODIFY THE HASH BELOW! IT IS AUTOMATICALLY UPDATED BY PRE-COMMIT. --------------------------------------------------------------------------------------------------------- -Package config hash: abef89e76b6c1cbfe37d4a083a9e75259d0169662c666c9e2549ca91ddf12d9f1274a4c7ab44e999619c0aaf9fdb56f299397e8c528fafbc94caf45f7cc70ad9 +Package config hash: 4b93f639e3365c7420475b6e7850624c74f24bc167c9cb5ce41971720942d2f1195ca4806cbd23b34dd4991a020b9549cdf4143e7c6173c42bda52b2911850ad --------------------------------------------------------------------------------------------------------- diff --git a/dev/breeze/setup.cfg b/dev/breeze/setup.cfg index 9c163b288e348..e81ec3aa9de51 100644 --- a/dev/breeze/setup.cfg +++ b/dev/breeze/setup.cfg @@ -55,26 +55,28 @@ package_dir= =src packages = find: install_requires = + PyGithub>=2.1.1 + black>=23.11.0 click>=8.1.7 filelock>=3.13.0 + flit>=3.9.0 + gitpython>=3.1.40 inputimeout>=1.0.4 jinja2>=3.1.0 + jsonschema>=4.19.1 packaging>=23.2 pendulum>=2.1.2,<3 pre-commit>=3.5.0 psutil>=5.9.6 - pytest>=7.4.0 pytest-xdist>=3.3.1 + pytest>=7.4.0 pyyaml>=6.0.1 - PyGithub>=2.1.1 requests>=2.30.0 - rich>=13.6.0 rich-click>=1.7.1 - gitpython>=3.1.40 + rich>=13.6.0 + semver>=3.0.2 + tabulate>=0.9.0 twine>=4.0.2 - wheel>=0.41.3 - setuptools>=68.2.2 - jsonschema>=4.19.1 [options.packages.find] where=src diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py index 6ea971447e0a4..d61846811c46c 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py @@ -26,7 +26,8 @@ from copy import deepcopy from datetime import datetime from pathlib import Path -from typing import IO, Generator, NamedTuple +from subprocess import DEVNULL +from typing import IO, Any, Generator, NamedTuple import click from rich.progress import Progress @@ -44,6 +45,17 @@ MULTI_PLATFORM, ) from airflow_breeze.params.shell_params import ShellParams +from airflow_breeze.prepare_providers.provider_packages import ( + PrepareReleasePackageErrorBuildingPackageException, + PrepareReleasePackageTagExistException, + PrepareReleasePackageWrongSetupException, + build_provider_package, + cleanup_build_remnants, + copy_provider_sources_to_target, + generate_build_files, + move_built_packages_and_cleanup, + should_skip_the_package, +) from airflow_breeze.utils.add_back_references import ( start_generating_back_references, ) @@ -79,7 +91,7 @@ option_version_suffix_for_pypi, ) from airflow_breeze.utils.confirm import Answer, user_confirm -from airflow_breeze.utils.console import Output, get_console +from airflow_breeze.utils.console import MessageType, Output, get_console from airflow_breeze.utils.custom_param_types import BetterChoice from airflow_breeze.utils.docker_command_utils import ( check_remote_ghcr_io_commands, @@ -88,7 +100,16 @@ perform_environment_checks, ) from airflow_breeze.utils.github import download_constraints_file, get_active_airflow_versions -from airflow_breeze.utils.packages import convert_to_long_package_names, expand_all_provider_packages +from airflow_breeze.utils.packages import ( + PackageSuspendedException, + expand_all_provider_packages, + find_matching_long_package_names, + get_available_packages, + get_provider_details, + get_provider_packages_metadata, + get_removed_provider_ids, + make_sure_remote_apache_exists_and_fetch, +) from airflow_breeze.utils.parallel import ( GenericRegexpProgressMatcher, SummarizeAfter, @@ -115,8 +136,7 @@ run_command, run_compile_www_assets, ) -from airflow_breeze.utils.shared_options import get_dry_run, get_forced_answer, get_verbose -from airflow_breeze.utils.suspended_providers import get_removed_provider_ids +from airflow_breeze.utils.shared_options import get_dry_run, get_verbose option_debug_release_management = click.option( "--debug", @@ -219,11 +239,24 @@ def prepare_airflow_packages( sys.exit(result_command.returncode) +def provider_action_summary(description: str, message_type: MessageType, packages: list[str]): + if packages: + get_console().print(f"{description}: {len(packages)}\n") + get_console().print(f"[{message_type.value}]{' '.join(packages)}") + get_console().print() + + @release_management.command( name="prepare-provider-documentation", help="Prepare CHANGELOG, README and COMMITS information for providers.", ) -@option_debug_release_management +@click.option( + "--skip-git-fetch", + is_flag=True, + help="Skips removal and recreation of `apache-https-rof-providers` remote in git. By default, the " + "remote is recreated and fetched to make sure that it's up to date and that recent commits " + "are not missing", +) @click.option( "--base-branch", type=str, @@ -237,48 +270,138 @@ def prepare_airflow_packages( help="Only update minimum version in __init__.py files and regenerate corresponding documentation", ) @click.option( - "--regenerate-missing-docs", + "--reapply-templates-only", is_flag=True, - help="Only regenerate missing documentation, do not bump version. Useful if templates were added" + help="Only reapply templates, do not bump version. Useful if templates were added" " and you need to regenerate documentation.", ) +@click.option( + "--non-interactive", + is_flag=True, + help="Run in non-interactive mode. Provides random answers to the type of changes and confirms release" + "for providers prepared for release - useful to test the script in non-interactive mode in CI.", +) @argument_provider_packages @option_verbose @option_dry_run @option_answer def prepare_provider_documentation( github_repository: str, + skip_git_fetch: bool, base_branch: str, - debug: bool, - provider_packages: list[str], + provider_packages: tuple[str], only_min_version_update: bool, - regenerate_missing_docs: bool, + reapply_templates_only: bool, + non_interactive: bool, ): - perform_environment_checks() - check_remote_ghcr_io_commands() - cleanup_python_generated_files() - shell_params = ShellParams( - mount_sources=MOUNT_ALL, - github_repository=github_repository, - python=DEFAULT_PYTHON_MAJOR_MINOR_VERSION, - base_branch=base_branch, - only_min_version_update=only_min_version_update, - regenerate_missing_docs=regenerate_missing_docs, - skip_environment_initialization=True, + from airflow_breeze.prepare_providers.provider_documentation import ( + PrepareReleaseDocsChangesOnlyException, + PrepareReleaseDocsErrorOccurredException, + PrepareReleaseDocsNoChangesException, + PrepareReleaseDocsUserQuitException, + PrepareReleaseDocsUserSkippedException, + update_changelog, + update_min_airflow_version, + update_release_notes, ) - rebuild_or_pull_ci_image_if_needed(command_params=shell_params) - cmd_to_run = [ - "/opt/airflow/scripts/in_container/run_prepare_provider_documentation.sh", - *provider_packages, - ] - answer = get_forced_answer() - result_command = run_docker_command_with_debug( - params=shell_params, - command=cmd_to_run, - enable_input=answer is None or answer[0].lower() != "y", - debug=debug, + + cleanup_python_generated_files() + if not provider_packages: + provider_packages = get_available_packages() + + if not skip_git_fetch: + run_command(["git", "remote", "rm", "apache-https-for-providers"], check=False, stderr=DEVNULL) + make_sure_remote_apache_exists_and_fetch(github_repository=github_repository) + no_changes_packages = [] + doc_only_packages = [] + error_packages = [] + user_skipped_packages = [] + success_packages = [] + suspended_packages = [] + removed_packages = [] + for provider_id in provider_packages: + provider_metadata = basic_provider_checks(provider_id) + if os.environ.get("GITHUB_ACTIONS", "false") != "true": + get_console().print("-" * get_console().width) + try: + with_breaking_changes = False + maybe_with_new_features = False + with ci_group(f"Update release notes for package '{provider_id}' "): + get_console().print("Updating documentation for the latest release version.") + if not only_min_version_update: + with_breaking_changes, maybe_with_new_features = update_release_notes( + provider_id, + reapply_templates_only=reapply_templates_only, + base_branch=base_branch, + regenerate_missing_docs=reapply_templates_only, + non_interactive=non_interactive, + ) + update_min_airflow_version( + provider_package_id=provider_id, + with_breaking_changes=with_breaking_changes, + maybe_with_new_features=maybe_with_new_features, + ) + with ci_group(f"Updates changelog for last release of package '{provider_id}'"): + update_changelog( + package_id=provider_id, + base_branch=base_branch, + reapply_templates_only=reapply_templates_only, + with_breaking_changes=with_breaking_changes, + maybe_with_new_features=maybe_with_new_features, + ) + except PrepareReleaseDocsNoChangesException: + no_changes_packages.append(provider_id) + except PrepareReleaseDocsChangesOnlyException: + doc_only_packages.append(provider_id) + except PrepareReleaseDocsErrorOccurredException: + error_packages.append(provider_id) + except PrepareReleaseDocsUserSkippedException: + user_skipped_packages.append(provider_id) + except PackageSuspendedException: + suspended_packages.append(provider_id) + except PrepareReleaseDocsUserQuitException: + break + else: + if provider_metadata.get("removed"): + removed_packages.append(provider_id) + else: + success_packages.append(provider_id) + get_console().print() + get_console().print("\n[info]Summary of prepared documentation:\n") + provider_action_summary("Success", MessageType.SUCCESS, success_packages) + provider_action_summary("Scheduled for removal", MessageType.SUCCESS, removed_packages) + provider_action_summary("Docs only", MessageType.SUCCESS, doc_only_packages) + provider_action_summary("Skipped on no changes", MessageType.WARNING, no_changes_packages) + provider_action_summary("Suspended", MessageType.WARNING, suspended_packages) + provider_action_summary("Skipped by user", MessageType.SPECIAL, user_skipped_packages) + provider_action_summary("Errors", MessageType.ERROR, error_packages) + if error_packages: + get_console().print("\n[errors]There were errors when generating packages. Exiting!\n") + sys.exit(1) + if not success_packages and not doc_only_packages and not removed_packages: + get_console().print("\n[warning]No packages prepared!\n") + sys.exit(0) + get_console().print("\n[success]Successfully prepared documentation for packages!\n\n") + get_console().print( + "\n[info]Please review the updated files, classify the changelog entries and commit the changes.\n" ) - sys.exit(result_command.returncode) + + +def basic_provider_checks(provider_package_id: str) -> dict[str, Any]: + provider_packages_metadata = get_provider_packages_metadata() + provider_metadata = provider_packages_metadata.get(provider_package_id) + if not provider_metadata: + get_console().print(f"[error]The package {provider_package_id} is not a provider package. Exiting[/]") + sys.exit(1) + if provider_metadata.get("removed", False): + get_console().print( + f"[warning]The package: {provider_package_id} is scheduled for removal, but " + f"since you asked for it, it will be built [/]\n" + ) + elif provider_metadata.get("suspended"): + get_console().print(f"[warning]The package: {provider_package_id} is suspended " f"skipping it [/]\n") + raise PackageSuspendedException() + return provider_metadata @release_management.command( @@ -292,23 +415,49 @@ def prepare_provider_documentation( type=click.File("rt"), help="Read list of packages from text file (one package per line).", ) -@option_debug_release_management -@argument_provider_packages +@click.option( + "--skip-tag-check", + default=False, + is_flag=True, + help="Skip checking if the tag already exists in the remote repository", +) +@click.option( + "--skip-deleting-generated-files", + default=False, + is_flag=True, + help="Skip deleting files that were used to generate provider package. Useful for debugging and " + "developing changes to the build process.", +) +@click.option( + "--clean-dist", + default=False, + is_flag=True, + help="Clean dist directory before building packages. Useful when you want to build multiple packages " + " in a clean environment", +) @option_github_repository +@argument_provider_packages @option_verbose @option_dry_run def prepare_provider_packages( package_format: str, version_suffix_for_pypi: str, package_list_file: IO, - debug: bool, - provider_packages: tuple[str, ...], + skip_tag_check: bool, + skip_deleting_generated_files: bool, + clean_dist: bool, github_repository: str, + provider_packages: tuple[str, ...], ): perform_environment_checks() cleanup_python_generated_files() + if not provider_packages: + provider_packages = get_available_packages() packages_list = list(provider_packages) + if not skip_tag_check: + run_command(["git", "remote", "rm", "apache-https-for-providers"], check=False, stderr=DEVNULL) + make_sure_remote_apache_exists_and_fetch(github_repository=github_repository) removed_provider_ids = get_removed_provider_ids() if package_list_file: packages_list.extend( @@ -318,22 +467,64 @@ def prepare_provider_packages( if package.strip() not in removed_provider_ids ] ) - shell_params = ShellParams( - mount_sources=MOUNT_ALL, - github_repository=github_repository, - python=DEFAULT_PYTHON_MAJOR_MINOR_VERSION, - package_format=package_format, - skip_environment_initialization=True, - version_suffix_for_pypi=version_suffix_for_pypi, - ) - rebuild_or_pull_ci_image_if_needed(command_params=shell_params) - cmd_to_run = ["/opt/airflow/scripts/in_container/run_prepare_provider_packages.sh", *packages_list] - result_command = run_docker_command_with_debug( - params=shell_params, - command=cmd_to_run, - debug=debug, + success_packages = [] + skipped_as_already_released_packages = [] + suspended_packages = [] + wrong_setup_packages = [] + error_packages = [] + if clean_dist: + get_console().print("\n[warning]Cleaning dist directory before building packages[/]\n") + shutil.rmtree(DIST_DIR, ignore_errors=True) + DIST_DIR.mkdir(parents=True, exist_ok=True) + for provider_id in provider_packages: + try: + basic_provider_checks(provider_id) + if not skip_tag_check and should_skip_the_package(provider_id, version_suffix_for_pypi): + continue + if os.environ.get("GITHUB_ACTIONS", "false") != "true": + get_console().print("-" * get_console().width) + target_provider_root_sources_path = copy_provider_sources_to_target(provider_id) + generate_build_files( + provider_id=provider_id, + version_suffix=version_suffix_for_pypi, + target_provider_root_sources_path=target_provider_root_sources_path, + ) + cleanup_build_remnants(target_provider_root_sources_path) + build_provider_package( + provider_id=provider_id, + version_suffix=version_suffix_for_pypi, + package_format=package_format, + target_provider_root_sources_path=target_provider_root_sources_path, + ) + move_built_packages_and_cleanup( + target_provider_root_sources_path, DIST_DIR, skip_cleanup=skip_deleting_generated_files + ) + except PrepareReleasePackageTagExistException: + skipped_as_already_released_packages.append(provider_id) + except PrepareReleasePackageWrongSetupException: + wrong_setup_packages.append(provider_id) + except PrepareReleasePackageErrorBuildingPackageException: + error_packages.append(provider_id) + except PackageSuspendedException: + suspended_packages.append(provider_id) + else: + success_packages.append(provider_id) + get_console().print() + get_console().print("\n[info]Summary of prepared packages:\n") + provider_action_summary("Success", MessageType.SUCCESS, success_packages) + provider_action_summary( + "Skipped as already released", MessageType.SUCCESS, skipped_as_already_released_packages ) - sys.exit(result_command.returncode) + provider_action_summary("Suspended", MessageType.WARNING, suspended_packages) + provider_action_summary("Wrong setup generated", MessageType.ERROR, wrong_setup_packages) + provider_action_summary("Errors", MessageType.ERROR, error_packages) + if error_packages or wrong_setup_packages: + get_console().print("\n[errors]There were errors when generating packages. Exiting!\n") + sys.exit(1) + if not success_packages and not skipped_as_already_released_packages: + get_console().print("\n[warning]No packages prepared!\n") + sys.exit(0) + get_console().print("\n[success]Successfully built packages!\n\n") def run_generate_constraints( @@ -869,8 +1060,8 @@ def publish_docs( "Provide the path of cloned airflow-site repo\n" ) - current_packages = convert_to_long_package_names( - package_filters=package_filter, packages_short_form=expand_all_provider_packages(doc_packages) + current_packages = find_matching_long_package_names( + short_packages=expand_all_provider_packages(doc_packages), filters=package_filter ) print(f"Publishing docs for {len(current_packages)} package(s)") for pkg in current_packages: @@ -1112,46 +1303,34 @@ def is_package_in_dist(dist_files: list[str], package: str) -> bool: ) -def get_prs_for_package(package_id: str) -> list[int]: - import yaml - +def get_prs_for_package(provider_id: str) -> list[int]: pr_matcher = re.compile(r".*\(#([0-9]*)\)``$") - changelog_path = ( - AIRFLOW_SOURCES_ROOT / "airflow" / "providers" / package_id.replace(".", os.sep) / "CHANGELOG.rst" - ) - # load yaml from file - provider_yaml_dict = yaml.safe_load( - ( - AIRFLOW_SOURCES_ROOT - / "airflow" - / "providers" - / package_id.replace(r".", os.sep) - / "provider.yaml" - ).read_text() - ) - current_release_version = provider_yaml_dict["versions"][0] prs = [] - with open(changelog_path) as changelog_file: - changelog_lines = changelog_file.readlines() - extract_prs = False - skip_line = False - for line in changelog_lines: - if skip_line: - # Skip first "....." header - skip_line = False - elif line.strip() == current_release_version: - extract_prs = True - skip_line = True - elif extract_prs: - if len(line) > 1 and all(c == "." for c in line.strip()): - # Header for next version reached - break - if line.startswith(".. Below changes are excluded from the changelog"): - # The reminder of PRs is not important skipping it - break - match_result = pr_matcher.match(line.strip()) - if match_result: - prs.append(int(match_result.group(1))) + provider_yaml_dict = get_provider_packages_metadata().get(provider_id) + if not provider_yaml_dict: + raise RuntimeError(f"The provider id {provider_id} does not have provider.yaml file") + current_release_version = provider_yaml_dict["versions"][0] + provider_details = get_provider_details(provider_id) + changelog_lines = provider_details.changelog_path.read_text().splitlines() + extract_prs = False + skip_line = False + for line in changelog_lines: + if skip_line: + # Skip first "....." header + skip_line = False + elif line.strip() == current_release_version: + extract_prs = True + skip_line = True + elif extract_prs: + if len(line) > 1 and all(c == "." for c in line.strip()): + # Header for next version reached + break + if line.startswith(".. Below changes are excluded from the changelog"): + # The reminder of PRs is not important skipping it + break + match_result = pr_matcher.match(line.strip()) + if match_result: + prs.append(int(match_result.group(1))) return prs @@ -1208,18 +1387,18 @@ class ProviderPRInfo(NamedTuple): if only_available_in_dist: files_in_dist = os.listdir(str(AIRFLOW_SOURCES_ROOT / "dist")) prepared_package_ids = [] - for package_id in provider_packages: - if not only_available_in_dist or is_package_in_dist(files_in_dist, package_id): - get_console().print(f"Extracting PRs for provider {package_id}") - prepared_package_ids.append(package_id) + for provider_id in provider_packages: + if not only_available_in_dist or is_package_in_dist(files_in_dist, provider_id): + get_console().print(f"Extracting PRs for provider {provider_id}") + prepared_package_ids.append(provider_id) else: get_console().print( - f"Skipping extracting PRs for provider {package_id} as it is missing in dist" + f"Skipping extracting PRs for provider {provider_id} as it is missing in dist" ) continue - prs = get_prs_for_package(package_id) - provider_prs[package_id] = [pr for pr in prs if pr not in excluded_prs] - all_prs.update(provider_prs[package_id]) + prs = get_prs_for_package(provider_id) + provider_prs[provider_id] = [pr for pr in prs if pr not in excluded_prs] + all_prs.update(provider_prs[provider_id]) g = Github(github_token) repo = g.get_repo("apache/airflow") pull_requests: dict[int, PullRequest.PullRequest | Issue.Issue] = {} @@ -1239,21 +1418,21 @@ class ProviderPRInfo(NamedTuple): get_console().print(f"[red]The PR #{pr_number} could not be found[/]") progress.advance(task) providers: dict[str, ProviderPRInfo] = {} - for package_id in prepared_package_ids: - pull_request_list = [pull_requests[pr] for pr in provider_prs[package_id] if pr in pull_requests] + for provider_id in prepared_package_ids: + pull_request_list = [pull_requests[pr] for pr in provider_prs[provider_id] if pr in pull_requests] provider_yaml_dict = yaml.safe_load( ( AIRFLOW_SOURCES_ROOT / "airflow" / "providers" - / package_id.replace(".", os.sep) + / provider_id.replace(".", os.sep) / "provider.yaml" ).read_text() ) if pull_request_list: - providers[package_id] = ProviderPRInfo( + providers[provider_id] = ProviderPRInfo( version=provider_yaml_dict["versions"][0], - provider_package_id=package_id, + provider_package_id=provider_id, pypi_package_name=provider_yaml_dict["package-name"], pr_list=pull_request_list, ) diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py index a9ff84f5d6a9a..a5b84869b416f 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py @@ -107,8 +107,10 @@ "options": [ "--package-format", "--version-suffix-for-pypi", + "--clean-dist", + "--skip-tag-check", + "--skip-deleting-generated-files", "--package-list-file", - "--debug", "--github-repository", ], } @@ -117,11 +119,12 @@ { "name": "Provider documentation preparation flags", "options": [ - "--debug", "--github-repository", + "--skip-git-fetch", "--base-branch", "--only-min-version-update", - "--regenerate-missing-docs", + "--reapply-templates-only", + "--non-interactive", ], } ], diff --git a/dev/breeze/src/airflow_breeze/commands/testing_commands.py b/dev/breeze/src/airflow_breeze/commands/testing_commands.py index efbfa3667c342..66ffb593c356e 100644 --- a/dev/breeze/src/airflow_breeze/commands/testing_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/testing_commands.py @@ -74,6 +74,7 @@ perform_environment_checks, remove_docker_networks, ) +from airflow_breeze.utils.packages import get_suspended_provider_folders from airflow_breeze.utils.parallel import ( GenericRegexpProgressMatcher, SummarizeAfter, @@ -87,7 +88,6 @@ run_docker_compose_tests, ) from airflow_breeze.utils.run_utils import get_filesystem_type, run_command -from airflow_breeze.utils.suspended_providers import get_suspended_providers_folders LOW_MEMORY_CONDITION = 8 * 1024 * 1024 * 1024 @@ -161,7 +161,7 @@ def _run_test( env_variables["TEST_TYPE"] = exec_shell_params.test_type env_variables["COLLECT_ONLY"] = str(exec_shell_params.collect_only).lower() env_variables["REMOVE_ARM_PACKAGES"] = str(exec_shell_params.remove_arm_packages).lower() - env_variables["SUSPENDED_PROVIDERS_FOLDERS"] = " ".join(get_suspended_providers_folders()).strip() + env_variables["SUSPENDED_PROVIDERS_FOLDERS"] = " ".join(get_suspended_provider_folders()).strip() if "[" in exec_shell_params.test_type and not exec_shell_params.test_type.startswith("Providers"): get_console(output=output).print( "[error]Only 'Providers' test type can specify actual tests with \\[\\][/]" diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index a4ae86a7396b6..1ea794071287d 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -361,6 +361,7 @@ def get_airflow_extras(): "trino", ] ALL_PROVIDER_YAML_FILES = Path(AIRFLOW_SOURCES_ROOT, "airflow", "providers").rglob("provider.yaml") +PROVIDER_RUNTIME_DATA_SCHEMA_PATH = AIRFLOW_SOURCES_ROOT / "airflow" / "provider_info.schema.json" with Path(AIRFLOW_SOURCES_ROOT, "generated", "provider_dependencies.json").open() as f: PROVIDER_DEPENDENCIES = json.load(f) diff --git a/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py b/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py new file mode 100644 index 0000000000000..3c0370f33e382 --- /dev/null +++ b/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py @@ -0,0 +1,981 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import difflib +import os +import random +import re +import subprocess +import sys +import tempfile +from copy import deepcopy +from enum import Enum +from pathlib import Path +from shutil import copyfile +from typing import Any, NamedTuple + +import jinja2 +import semver +from rich.syntax import Syntax + +from airflow_breeze.utils.black_utils import black_format +from airflow_breeze.utils.confirm import Answer, user_confirm +from airflow_breeze.utils.console import get_console +from airflow_breeze.utils.packages import ( + HTTPS_REMOTE, + ProviderPackageDetails, + get_provider_details, + get_provider_jinja_context, + get_provider_packages_metadata, + get_source_package_path, + render_template, +) +from airflow_breeze.utils.run_utils import run_command +from airflow_breeze.utils.shared_options import get_verbose +from airflow_breeze.utils.versions import get_version_tag + +PR_PATTERN = re.compile(r".*\(#(\d+)\)") + +AUTOMATICALLY_GENERATED_MARKER = "AUTOMATICALLY GENERATED" +AUTOMATICALLY_GENERATED_CONTENT = ( + f".. THE REMAINDER OF THE FILE IS {AUTOMATICALLY_GENERATED_MARKER}. " + f"IT WILL BE OVERWRITTEN AT RELEASE TIME!" +) + +# Taken from pygrep hooks we are using in pre-commit +# https://github.com/pre-commit/pygrep-hooks/blob/main/.pre-commit-hooks.yaml +BACKTICKS_CHECK = re.compile(r"^(?! {4}).*(^| )`[^`]+`([^_]|$)", re.MULTILINE) + + +INITIAL_CHANGELOG_CONTENT = """ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. NOTE TO CONTRIBUTORS: + Please, only add notes to the Changelog just below the "Changelog" header when there + are some breaking changes and you want to add an explanation to the users on how they are supposed + to deal with them. The changelog is updated and maintained semi-automatically by release manager. + +``{{ package_name }}`` + +Changelog +--------- + +1.0.0 +..... + +Initial version of the provider. +""" + + +class Change(NamedTuple): + """Stores details about commits""" + + full_hash: str + short_hash: str + date: str + version: str + message: str + message_without_backticks: str + pr: str | None + + +class TypeOfChange(Enum): + DOCUMENTATION = "d" + BUGFIX = "b" + FEATURE = "f" + BREAKING_CHANGE = "x" + SKIP = "s" + + +class ClassifiedChanges: + """Stores lists of changes classified automatically""" + + def __init__(self): + self.fixes: list[Change] = [] + self.features: list[Change] = [] + self.breaking_changes: list[Change] = [] + self.other: list[Change] = [] + + +class PrepareReleaseDocsChangesOnlyException(Exception): + """Raised when package has only documentation changes.""" + + +class PrepareReleaseDocsNoChangesException(Exception): + """Raised when package has no changes.""" + + +class PrepareReleaseDocsErrorOccurredException(Exception): + """Raised when error occurred when preparing packages changes.""" + + +class PrepareReleaseDocsUserSkippedException(Exception): + """Raised when user skipped package.""" + + +class PrepareReleaseDocsUserQuitException(Exception): + """Raised when user decided to quit.""" + + +TYPE_OF_CHANGE_DESCRIPTION = { + TypeOfChange.DOCUMENTATION: "Documentation only changes - no version change needed, " + "only documentation needs to be updated", + TypeOfChange.BUGFIX: "Bugfix/Misc changes only - bump in PATCHLEVEL version needed", + TypeOfChange.FEATURE: "Feature changes - bump in MINOR version needed", + TypeOfChange.BREAKING_CHANGE: "Breaking changes - bump in MAJOR version needed", +} + + +def _get_git_log_command(from_commit: str | None = None, to_commit: str | None = None) -> list[str]: + """Get git command to run for the current repo from the current folder. + + The current directory should always be the package folder. + + :param from_commit: if present - base commit from which to start the log from + :param to_commit: if present - final commit which should be the start of the log + :return: git command to run + """ + git_cmd = [ + "git", + "log", + "--pretty=format:%H %h %cd %s", + "--date=short", + ] + if from_commit and to_commit: + git_cmd.append(f"{from_commit}...{to_commit}") + elif from_commit: + git_cmd.append(from_commit) + elif to_commit: + raise ValueError("It makes no sense to specify to_commit without from_commit.") + git_cmd.extend(["--", "."]) + return git_cmd + + +def _get_change_from_line(line: str, version: str) -> Change: + split_line = line.split(" ", maxsplit=3) + message = split_line[3] + pr = None + pr_match = PR_PATTERN.match(message) + if pr_match: + pr = pr_match.group(1) + return Change( + full_hash=split_line[0], + short_hash=split_line[1], + date=split_line[2], + version=version, + message=message, + message_without_backticks=message.replace("`", "'").replace("&39;", "'"), + pr=pr, + ) + + +def _convert_git_changes_to_table( + version: str, changes: str, base_url: str, markdown: bool = True +) -> tuple[str, list[Change]]: + """ + Converts list of changes from its string form to markdown/RST table and array of change information + + The changes are in the form of multiple lines where each line consists of: + FULL_COMMIT_HASH SHORT_COMMIT_HASH COMMIT_DATE COMMIT_SUBJECT + + The subject can contain spaces but one of the preceding values can, so we can make split + 3 times on spaces to break it up. + :param version: Version from which the changes are + :param changes: list of changes in a form of multiple-line string + :param base_url: base url for the commit URL + :param markdown: if True, Markdown format is used else rst + :return: formatted table + list of changes (starting from the latest) + """ + from tabulate import tabulate + + lines = changes.splitlines() + headers = ["Commit", "Committed", "Subject"] + table_data = [] + changes_list: list[Change] = [] + for line in lines: + if line == "": + continue + change = _get_change_from_line(line, version) + table_data.append( + ( + f"[{change.short_hash}]({base_url}{change.full_hash})" + if markdown + else f"`{change.short_hash} <{base_url}{change.full_hash}>`_", + change.date, + f"`{change.message_without_backticks}`" + if markdown + else f"``{change.message_without_backticks}``", + ) + ) + changes_list.append(change) + header = "" + if not table_data: + return header, [] + table = tabulate(table_data, headers=headers, tablefmt="pipe" if markdown else "rst") + if not markdown: + header += f"\n\n{version}\n" + "." * len(version) + "\n\n" + release_date = table_data[0][1] + header += f"Latest change: {release_date}\n\n" + return header + table, changes_list + + +def _print_changes_table(changes_table): + syntax = Syntax(changes_table, "rst", theme="ansi_dark") + get_console().print(syntax) + + +def _get_all_changes_for_package( + provider_package_id: str, + base_branch: str, + reapply_templates_only: bool, +) -> tuple[bool, list[list[Change]], str]: + """Retrieves all changes for the package. + + :param provider_package_id: provider package id + :param base_branch: base branch to check changes in apache remote for changes + :param reapply_templates_only: whether to only reapply templates without bumping the version + :return tuple of: + bool (whether to proceed with update) + list of lists of changes for all past versions (might be empty) + the same list converted to string RST table + """ + provider_details = get_provider_details(provider_package_id) + current_version = provider_details.versions[0] + current_tag_no_suffix = get_version_tag(current_version, provider_package_id) + if get_verbose(): + get_console().print(f"[info]Checking if tag '{current_tag_no_suffix}' exist.") + result = run_command( + ["git", "rev-parse", current_tag_no_suffix], + cwd=provider_details.source_provider_package_path, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + if not reapply_templates_only and result.returncode == 0: + if get_verbose(): + get_console().print(f"[info]The tag {current_tag_no_suffix} exists.") + # The tag already exists + result = run_command( + _get_git_log_command(f"{HTTPS_REMOTE}/{base_branch}", current_tag_no_suffix), + cwd=provider_details.source_provider_package_path, + capture_output=True, + text=True, + check=True, + ) + changes = result.stdout.strip() + if changes: + provider_details = get_provider_details(provider_package_id) + doc_only_change_file = ( + provider_details.source_provider_package_path / ".latest-doc-only-change.txt" + ) + if doc_only_change_file.exists(): + last_doc_only_hash = doc_only_change_file.read_text().strip() + try: + result = run_command( + _get_git_log_command(f"{HTTPS_REMOTE}/{base_branch}", last_doc_only_hash), + cwd=provider_details.source_provider_package_path, + capture_output=True, + text=True, + check=True, + ) + changes_since_last_doc_only_check = result.stdout.strip() + if not changes_since_last_doc_only_check: + get_console().print( + "\n[warning]The provider has doc-only changes since the last release. Skipping[/]" + ) + raise PrepareReleaseDocsChangesOnlyException() + if len(changes.splitlines()) > len(changes_since_last_doc_only_check.splitlines()): + # if doc-only was released after previous release - use it as starting point + # but if before - stay with the releases from last tag. + changes = changes_since_last_doc_only_check + except subprocess.CalledProcessError: + # ignore when the commit mentioned as last doc-only change is obsolete + pass + get_console().print( + f"[warning]The provider {provider_package_id} has {len(changes.splitlines())} " + f"changes since last release[/]" + ) + get_console().print(f"\n[info]Provider: {provider_package_id}[/]\n") + changes_table, array_of_changes = _convert_git_changes_to_table( + f"NEXT VERSION AFTER + {provider_details.versions[0]}", + changes, + base_url="https://github.com/apache/airflow/commit/", + markdown=False, + ) + _print_changes_table(changes_table) + return False, [array_of_changes], changes_table + else: + get_console().print(f"[info]No changes for {provider_package_id}") + return False, [], "" + if len(provider_details.versions) == 1: + get_console().print( + f"[info]The provider '{provider_package_id}' has never " + f"been released but it is ready to release!\n" + ) + else: + get_console().print( + f"[info]New version of the '{provider_package_id}' " f"package is ready to be released!\n" + ) + next_version_tag = f"{HTTPS_REMOTE}/{base_branch}" + changes_table = "" + current_version = provider_details.versions[0] + list_of_list_of_changes: list[list[Change]] = [] + for version in provider_details.versions[1:]: + version_tag = get_version_tag(version, provider_package_id) + result = run_command( + _get_git_log_command(next_version_tag, version_tag), + cwd=provider_details.source_provider_package_path, + capture_output=True, + text=True, + check=True, + ) + changes = result.stdout.strip() + changes_table_for_version, array_of_changes_for_version = _convert_git_changes_to_table( + current_version, changes, base_url="https://github.com/apache/airflow/commit/", markdown=False + ) + changes_table += changes_table_for_version + list_of_list_of_changes.append(array_of_changes_for_version) + next_version_tag = version_tag + current_version = version + result = run_command( + _get_git_log_command(next_version_tag), + cwd=provider_details.source_provider_package_path, + capture_output=True, + text=True, + check=True, + ) + changes = result.stdout.strip() + changes_table_for_version, array_of_changes_for_version = _convert_git_changes_to_table( + current_version, changes, base_url="https://github.com/apache/airflow/commit/", markdown=False + ) + changes_table += changes_table_for_version + return True, list_of_list_of_changes, changes_table + + +def _ask_the_user_for_the_type_of_changes(non_interactive: bool) -> TypeOfChange: + """Ask user to specify type of changes (case-insensitive). + + :return: Type of change. + """ + # have to do that while waiting for Python 3.11+ StrEnum [*TypeOfChange] :( + type_of_changes_array = [t.value for t in TypeOfChange] + if non_interactive: + # Simulate all possible non-terminal answers - this is useful for running on CI where we want to + # Test all possibilities. + return TypeOfChange(random.choice(type_of_changes_array)) + display_answers = "/".join(type_of_changes_array) + "/q" + while True: + get_console().print( + "[warning]Type of change (d)ocumentation, (b)ugfix, (f)eature, (x)breaking " + f"change, (s)kip, (q)uit [{display_answers}]?[/] ", + end="", + ) + try: + given_answer = input("").lower() + except KeyboardInterrupt: + raise PrepareReleaseDocsUserQuitException() + if given_answer == "q": + raise PrepareReleaseDocsUserQuitException() + if given_answer in type_of_changes_array: + return TypeOfChange(given_answer) + get_console().print( + f"[warning] Wrong answer given: '{given_answer}'. " f"Should be one of {display_answers}" + ) + + +def _mark_latest_changes_as_documentation_only( + provider_package_id: str, list_of_list_of_latest_changes: list[list[Change]] +): + latest_change = list_of_list_of_latest_changes[0][0] + provider_details = get_provider_details(provider_id=provider_package_id) + get_console().print( + f"[special]Marking last change: {latest_change.short_hash} and all above " + f"changes since the last release as doc-only changes!" + ) + (provider_details.source_provider_package_path / ".latest-doc-only-change.txt").write_text( + latest_change.full_hash + "\n" + ) + raise PrepareReleaseDocsChangesOnlyException() + + +def _update_version_in_provider_yaml( + provider_package_id: str, + type_of_change: TypeOfChange, +) -> tuple[bool, bool]: + """ + Updates provider version based on the type of change selected by the user + :param type_of_change: type of change selected + :param provider_package_id: provider package + :return: tuple of two bools: (with_breaking_change, maybe_with_new_features) + """ + provider_details = get_provider_details(provider_package_id) + version = provider_details.versions[0] + v = semver.VersionInfo.parse(version) + with_breaking_changes = False + maybe_with_new_features = False + if type_of_change == TypeOfChange.BREAKING_CHANGE: + v = v.bump_major() + with_breaking_changes = True + # we do not know, but breaking changes may also contain new features + maybe_with_new_features = True + elif type_of_change == TypeOfChange.FEATURE: + v = v.bump_minor() + maybe_with_new_features = True + elif type_of_change == TypeOfChange.BUGFIX: + v = v.bump_patch() + provider_yaml_path = get_source_package_path(provider_package_id) / "provider.yaml" + original_text = provider_yaml_path.read_text() + new_text = re.sub(r"versions:", f"versions:\n - {v}", original_text, 1) + provider_yaml_path.write_text(new_text) + # IMPORTANT!!! Whenever we update provider.yaml files, we MUST clear cache for + # get_provider_packages_metadata function, because otherwise anything next will not use it + get_provider_packages_metadata.cache_clear() + get_console().print(f"[special]Bumped version to {v}\n") + return with_breaking_changes, maybe_with_new_features + + +def _verify_changelog_exists(package: str) -> Path: + provider_details = get_provider_details(package) + changelog_path = Path(provider_details.source_provider_package_path) / "CHANGELOG.rst" + if not os.path.isfile(changelog_path): + get_console().print(f"\n[error]ERROR: Missing {changelog_path}[/]\n") + get_console().print("[info]Please add the file with initial content:") + get_console().print("----- START COPYING AFTER THIS LINE ------- ") + processed_changelog = jinja2.Template(INITIAL_CHANGELOG_CONTENT, autoescape=True).render( + package_name=provider_details.pypi_package_name, + ) + syntax = Syntax( + processed_changelog, + "rst", + theme="ansi_dark", + ) + get_console().print(syntax) + get_console().print("----- END COPYING BEFORE THIS LINE ------- ") + sys.exit(1) + return changelog_path + + +def _get_additional_package_info(provider_package_path: Path) -> str: + """Returns additional info for the package. + + :param provider_package_path: path for the package + :return: additional information for the path (empty string if missing) + """ + additional_info_file_path = provider_package_path / "ADDITIONAL_INFO.md" + if additional_info_file_path.is_file(): + additional_info = additional_info_file_path.read_text() + additional_info_lines = additional_info.splitlines(keepends=True) + result = "" + skip_comment = True + for line in additional_info_lines: + if line.startswith(" -->"): + skip_comment = False + elif not skip_comment: + result += line + return result + return "" + + +def replace_content(file_path: Path, old_text: str, new_text: str, provider_id: str): + if new_text != old_text: + _, temp_file_path = tempfile.mkstemp() + try: + if file_path.is_file(): + copyfile(file_path, temp_file_path) + file_path.write_text(new_text) + get_console().print(f"\n[info]Generated {file_path} file for the {provider_id} provider\n") + if old_text != "": + run_command(["diff", "--color=always", temp_file_path, file_path.as_posix()], check=False) + finally: + os.unlink(temp_file_path) + + +def _update_file( + context: dict[str, Any], + template_name: str, + extension: str, + file_name: str, + provider_package_id: str, + target_path: Path, + regenerate_missing_docs: bool, +) -> None: + target_file_path = target_path / file_name + if regenerate_missing_docs and target_file_path.exists(): + if get_verbose(): + get_console().print( + f"[warnings]The {target_file_path} exists - not regenerating it " + f"for the provider {provider_package_id}[/]" + ) + return + new_text = render_template( + template_name=template_name, context=context, extension=extension, keep_trailing_newline=True + ) + target_file_path = target_path / file_name + old_text = "" + if target_file_path.is_file(): + old_text = target_file_path.read_text() + replace_content(target_file_path, old_text, new_text, provider_package_id) + index_path = target_path / "index.rst" + if not index_path.exists(): + get_console().print(f"[error]ERROR! The index must exist for the provider docs: {index_path}") + raise PrepareReleaseDocsErrorOccurredException() + + expected_link_in_index = f"<{file_name.split('.')[0]}>" + if expected_link_in_index not in index_path.read_text(): + get_console().print( + f"\n[error]ERROR! The {index_path} must contain " + f"link to the generated documentation:[/]\n\n" + f"[warning]{expected_link_in_index}[/]\n\n" + f"[info]Please make sure to add it to {index_path}.\n" + ) + + get_console().print(f"[info]Checking for backticks correctly generated in: {target_file_path}") + match = BACKTICKS_CHECK.search(target_file_path.read_text()) + if match: + get_console().print( + f"\n[error]ERROR: Single backticks (`) found in {target_file_path}:[/]\n\n" + f"[warning]{match.group(0)}[/]\n\n" + f"[info]Please fix them by replacing with double backticks (``).[/]\n" + ) + raise PrepareReleaseDocsErrorOccurredException() + + # TODO: uncomment me. Linting revealed that our already generated provider docs have duplicate links + # in the generated files, we should fix those and uncomment linting as separate step - so that + # we do not hold current release for fixing the docs. + # console.print(f"Linting: {file_path}") + # errors = restructuredtext_lint.lint_file(file_path) + # real_errors = False + # if errors: + # for error in errors: + # # Skip known issue: linter with doc role similar to https://github.com/OCA/pylint-odoo/issues/38 + # if ( + # 'No role entry for "doc"' in error.message + # or 'Unknown interpreted text role "doc"' in error.message + # ): + # continue + # real_errors = True + # console.print(f"* [red] {error.message}") + # if real_errors: + # console.print(f"\n[red] Errors found in {file_path}") + # raise PrepareReleaseDocsErrorOccurredException() + + get_console().print(f"[success]Generated {target_file_path} for {provider_package_id} is OK[/]") + return + + +def _update_changelog_rst( + context: dict[str, Any], + provider_package_id: str, + target_path: Path, + regenerate_missing_docs: bool, +) -> None: + _update_file( + context=context, + template_name="PROVIDER_CHANGELOG", + extension=".rst", + file_name="changelog.rst", + provider_package_id=provider_package_id, + target_path=target_path, + regenerate_missing_docs=regenerate_missing_docs, + ) + + +def _update_commits_rst( + context: dict[str, Any], + provider_package_id: str, + target_path: Path, + regenerate_missing_docs: bool, +) -> None: + _update_file( + context=context, + template_name="PROVIDER_COMMITS", + extension=".rst", + file_name="commits.rst", + provider_package_id=provider_package_id, + target_path=target_path, + regenerate_missing_docs=regenerate_missing_docs, + ) + + +def update_release_notes( + provider_package_id: str, + reapply_templates_only: bool, + base_branch: str, + regenerate_missing_docs: bool, + non_interactive: bool, +) -> tuple[bool, bool]: + """Updates generated files. + + This includes the readme, changes, and/or setup.cfg/setup.py/manifest.in/provider_info. + + :param provider_package_id: id of the package + :param reapply_templates_only: regenerate already released documentation only - without updating versions + :param base_branch: base branch to check changes in apache remote for changes + :param regenerate_missing_docs: whether to regenerate missing docs + :param non_interactive: run in non-interactive mode (useful for CI) + :return: tuple of two bools: (with_breaking_change, maybe_with_new_features) + """ + proceed, list_of_list_of_changes, changes_as_table = _get_all_changes_for_package( + provider_package_id=provider_package_id, + base_branch=base_branch, + reapply_templates_only=reapply_templates_only, + ) + with_breaking_changes = False + maybe_with_new_features = False + if not reapply_templates_only: + if proceed: + if non_interactive: + answer = Answer.YES + else: + answer = user_confirm(f"Provider {provider_package_id} marked for release. Proceed?") + if answer == Answer.NO: + get_console().print( + f"\n[warning]Skipping provider: {provider_package_id} " f"on user request![/]\n" + ) + raise PrepareReleaseDocsUserSkippedException() + elif answer == Answer.QUIT: + raise PrepareReleaseDocsUserQuitException() + elif not list_of_list_of_changes: + get_console().print( + f"\n[warning]Provider: {provider_package_id} - " + f"skipping documentation generation. No changes![/]\n" + ) + raise PrepareReleaseDocsNoChangesException() + else: + type_of_change = _ask_the_user_for_the_type_of_changes(non_interactive=non_interactive) + if type_of_change == TypeOfChange.SKIP: + raise PrepareReleaseDocsUserSkippedException() + get_console().print( + f"[info]Provider {provider_package_id} has been classified as:[/]\n\n" + f"[special]{TYPE_OF_CHANGE_DESCRIPTION[type_of_change]}" + ) + get_console().print() + if type_of_change == TypeOfChange.DOCUMENTATION: + _mark_latest_changes_as_documentation_only(provider_package_id, list_of_list_of_changes) + elif type_of_change in [TypeOfChange.BUGFIX, TypeOfChange.FEATURE, TypeOfChange.BREAKING_CHANGE]: + with_breaking_changes, maybe_with_new_features = _update_version_in_provider_yaml( + provider_package_id=provider_package_id, type_of_change=type_of_change + ) + proceed, list_of_list_of_changes, changes_as_table = _get_all_changes_for_package( + provider_package_id=provider_package_id, + base_branch=base_branch, + reapply_templates_only=reapply_templates_only, + ) + provider_details = get_provider_details(provider_package_id) + _verify_changelog_exists(provider_details.provider_id) + jinja_context = get_provider_documentation_jinja_context( + provider_id=provider_package_id, + with_breaking_changes=with_breaking_changes, + maybe_with_new_features=maybe_with_new_features, + ) + jinja_context["DETAILED_CHANGES_RST"] = changes_as_table + jinja_context["DETAILED_CHANGES_PRESENT"] = bool(changes_as_table) + _update_changelog_rst( + jinja_context, + provider_package_id, + provider_details.documentation_provider_package_path, + regenerate_missing_docs, + ) + _update_commits_rst( + jinja_context, + provider_package_id, + provider_details.documentation_provider_package_path, + regenerate_missing_docs, + ) + return with_breaking_changes, maybe_with_new_features + + +def _find_insertion_index_for_version(content: list[str], version: str) -> tuple[int, bool]: + """Finds insertion index for the specified version from the .rst changelog content. + + :param content: changelog split into separate lines + :param version: version to look for + + :return: A 2-tuple. The first item indicates the insertion index, while the + second is a boolean indicating whether to append (False) or insert (True) + to the changelog. + """ + changelog_found = False + skip_next_line = False + index = 0 + for index, line in enumerate(content): + if not changelog_found and line.strip() == version: + changelog_found = True + skip_next_line = True + elif not skip_next_line and line and all(char == "." for char in line): + return index - 2, changelog_found + else: + skip_next_line = False + return index, changelog_found + + +def _get_changes_classified( + changes: list[Change], with_breaking_changes: bool, maybe_with_new_features: bool +) -> ClassifiedChanges: + """Pre-classifies changes based on commit message, it's wildly guessing now, + + The classification also includes the decision made by the release manager when classifying the release. + + However, if we switch to semantic commits, it could be automated. This list + is supposed to be manually reviewed and re-classified by release manager + anyway. + + :param changes: list of changes + :return: list of changes classified semi-automatically to the fix/feature/breaking/other buckets + """ + classified_changes = ClassifiedChanges() + for change in changes: + if "fix" in change.message.lower(): + classified_changes.fixes.append(change) + elif "add" in change.message.lower() and maybe_with_new_features: + classified_changes.features.append(change) + elif "breaking" in change.message.lower() and with_breaking_changes: + classified_changes.breaking_changes.append(change) + else: + classified_changes.other.append(change) + return classified_changes + + +def _generate_new_changelog( + package_id: str, + provider_details: ProviderPackageDetails, + changes: list[list[Change]], + context: dict[str, Any], + with_breaking_changes: bool, + maybe_with_new_features: bool, +): + latest_version = provider_details.versions[0] + current_changelog = provider_details.changelog_path.read_text() + current_changelog_lines = current_changelog.splitlines() + insertion_index, append = _find_insertion_index_for_version(current_changelog_lines, latest_version) + new_context = deepcopy(context) + if append: + if not changes: + get_console().print( + f"[success]The provider {package_id} changelog for `{latest_version}` " + "has first release. Not updating the changelog.[/]" + ) + return + new_changes = [ + change for change in changes[0] if change.pr and "(#" + change.pr + ")" not in current_changelog + ] + if not new_changes: + get_console().print( + f"[success]The provider {package_id} changelog for `{latest_version}` " + "has no new changes. Not updating the changelog.[/]" + ) + return + new_context["new_changes"] = new_changes + generated_new_changelog = render_template( + template_name="UPDATE_CHANGELOG", context=new_context, extension=".rst" + ) + else: + if changes: + classified_changes = _get_changes_classified( + changes[0], + with_breaking_changes=with_breaking_changes, + maybe_with_new_features=maybe_with_new_features, + ) + else: + # change log exist but without version 1.0.0 entry + classified_changes = None + + new_context.update( + { + "version": latest_version, + "version_header": "." * len(latest_version), + "classified_changes": classified_changes, + } + ) + generated_new_changelog = render_template( + template_name="CHANGELOG", context=new_context, extension=".rst" + ) + new_changelog_lines = current_changelog_lines[0:insertion_index] + new_changelog_lines.extend(generated_new_changelog.splitlines()) + new_changelog_lines.extend(current_changelog_lines[insertion_index:]) + diff = "\n".join(difflib.context_diff(current_changelog_lines, new_changelog_lines, n=5)) + syntax = Syntax(diff, "diff") + get_console().print(syntax) + if not append: + get_console().print( + f"[success]The provider {package_id} changelog for `{latest_version}` " + "version is missing. Generating fresh changelog.[/]" + ) + else: + get_console().print( + f"[success]Appending the provider {package_id} changelog for `{latest_version}` version.[/]" + ) + provider_details.changelog_path.write_text("\n".join(new_changelog_lines) + "\n") + + +def _update_index_rst( + context: dict[str, Any], + provider_package_id: str, + target_path: Path, +): + index_update = render_template( + template_name="PROVIDER_INDEX", context=context, extension=".rst", keep_trailing_newline=True + ) + index_file_path = target_path / "index.rst" + old_text = "" + if index_file_path.is_file(): + old_text = index_file_path.read_text() + new_text = deepcopy(old_text) + lines = old_text.splitlines(keepends=False) + for index, line in enumerate(lines): + if AUTOMATICALLY_GENERATED_MARKER in line: + new_text = "\n".join(lines[:index]) + new_text += "\n" + AUTOMATICALLY_GENERATED_CONTENT + "\n" + new_text += index_update + replace_content(index_file_path, old_text, new_text, provider_package_id) + + +def get_provider_documentation_jinja_context( + provider_id: str, with_breaking_changes: bool, maybe_with_new_features: bool +) -> dict[str, Any]: + provider_details = get_provider_details(provider_id) + jinja_context = get_provider_jinja_context( + provider_id=provider_id, + current_release_version=provider_details.versions[0], + version_suffix="", + ) + jinja_context["WITH_BREAKING_CHANGES"] = with_breaking_changes + jinja_context["MAYBE_WITH_NEW_FEATURES"] = maybe_with_new_features + + jinja_context["ADDITIONAL_INFO"] = ( + _get_additional_package_info(provider_package_path=provider_details.source_provider_package_path), + ) + return jinja_context + + +def update_changelog( + package_id: str, + base_branch: str, + reapply_templates_only: bool, + with_breaking_changes: bool, + maybe_with_new_features: bool, +): + """Internal update changelog method. + + :param package_id: package id + :param base_branch: base branch to check changes in apache remote for changes + :param reapply_templates_only: only reapply templates, no changelog generation + :param with_breaking_changes: whether there are any breaking changes + :param maybe_with_new_features: whether there are any new features + """ + provider_details = get_provider_details(package_id) + jinja_context = get_provider_documentation_jinja_context( + provider_id=package_id, + with_breaking_changes=with_breaking_changes, + maybe_with_new_features=maybe_with_new_features, + ) + proceed, changes, _ = _get_all_changes_for_package( + provider_package_id=package_id, base_branch=base_branch, reapply_templates_only=reapply_templates_only + ) + if not proceed: + get_console().print( + f"[warning]The provider {package_id} is not being released. Skipping the package.[/]" + ) + raise PrepareReleaseDocsNoChangesException() + if reapply_templates_only: + get_console().print("[info]Only reapply templates, no changelog update[/]") + else: + _generate_new_changelog( + package_id=package_id, + provider_details=provider_details, + changes=changes, + context=jinja_context, + with_breaking_changes=with_breaking_changes, + maybe_with_new_features=maybe_with_new_features, + ) + get_console().print(f"\n[info]Update index.rst for {package_id}\n") + _update_index_rst(jinja_context, package_id, provider_details.documentation_provider_package_path) + + +def _generate_init_py_file_for_provider( + context: dict[str, Any], + target_path: Path, +): + init_py_content = black_format( + render_template( + template_name="PROVIDER__INIT__PY", + context=context, + extension=".py", + keep_trailing_newline=True, + ) + ) + init_py_path = target_path / "__init__.py" + init_py_path.write_text(init_py_content) + + +def _replace_min_airflow_version_in_provider_yaml( + context: dict[str, Any], + target_path: Path, +): + provider_yaml_path = target_path / "provider.yaml" + provider_yaml_txt = provider_yaml_path.read_text() + provider_yaml_txt = re.sub( + r" {2}- apache-airflow>=.*", + f" - apache-airflow>={context['MIN_AIRFLOW_VERSION']}", + provider_yaml_txt, + ) + provider_yaml_path.write_text(provider_yaml_txt) + # IMPORTANT!!! Whenever we update provider.yaml files, we MUST clear cache for + # get_provider_packages_metadata function, because otherwise anything next will not use it + get_provider_packages_metadata.cache_clear() + + +def update_min_airflow_version( + provider_package_id: str, with_breaking_changes: bool, maybe_with_new_features: bool +): + """Updates min airflow version in provider yaml and __init__.py + + :param provider_package_id: provider package id + :param with_breaking_changes: whether there are any breaking changes + :param maybe_with_new_features: whether there are any new features + :return: + """ + provider_details = get_provider_details(provider_package_id) + jinja_context = get_provider_documentation_jinja_context( + provider_id=provider_package_id, + with_breaking_changes=with_breaking_changes, + maybe_with_new_features=maybe_with_new_features, + ) + _generate_init_py_file_for_provider( + context=jinja_context, + target_path=provider_details.source_provider_package_path, + ) + _replace_min_airflow_version_in_provider_yaml( + context=jinja_context, target_path=provider_details.source_provider_package_path + ) diff --git a/dev/breeze/src/airflow_breeze/prepare_providers/provider_packages.py b/dev/breeze/src/airflow_breeze/prepare_providers/provider_packages.py new file mode 100644 index 0000000000000..b2c00d64d7c02 --- /dev/null +++ b/dev/breeze/src/airflow_breeze/prepare_providers/provider_packages.py @@ -0,0 +1,234 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import shutil +import subprocess +import sys +from pathlib import Path +from shutil import copytree, rmtree +from typing import Any + +from airflow_breeze.utils.ci_group import ci_group +from airflow_breeze.utils.console import get_console +from airflow_breeze.utils.packages import ( + get_latest_provider_tag, + get_provider_details, + get_provider_jinja_context, + get_source_package_path, + get_target_root_for_copied_provider_sources, + render_template, + tag_exists_for_provider, +) +from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT +from airflow_breeze.utils.run_utils import run_command +from airflow_breeze.utils.shared_options import get_verbose + +LICENCE_RST = """ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +""" + + +class PrepareReleasePackageTagExistException(Exception): + """Tag already exist for the package.""" + + +class PrepareReleasePackageWrongSetupException(Exception): + """Wrong setup prepared for the package.""" + + +class PrepareReleasePackageErrorBuildingPackageException(Exception): + """Error when building the package.""" + + +def copy_provider_sources_to_target(provider_id: str) -> Path: + target_provider_root_path = get_target_root_for_copied_provider_sources(provider_id) + + if target_provider_root_path.exists() and not target_provider_root_path.is_dir(): + get_console().print( + f"[error]Target folder for {provider_id} sources is not a directory " + f"please delete {target_provider_root_path} and try again!" + ) + rmtree(target_provider_root_path, ignore_errors=True) + target_provider_root_path.mkdir(parents=True) + source_provider_sources_path = get_source_package_path(provider_id) + relative_provider_path = source_provider_sources_path.relative_to(AIRFLOW_SOURCES_ROOT) + target_providers_sub_folder = target_provider_root_path / relative_provider_path + get_console().print( + f"[info]Copying provider sources: " f"{source_provider_sources_path} -> {target_providers_sub_folder}" + ) + copytree(source_provider_sources_path, target_providers_sub_folder) + shutil.copy(AIRFLOW_SOURCES_ROOT / "LICENSE", target_providers_sub_folder / "LICENSE") + # We do not copy NOTICE from the top level source of Airflow because NOTICE only refers to + # Airflow sources - not to providers. If any of the providers is going to have a code that + # requires NOTICE, then it should be stored in the provider sources (airflow/providers/PROVIDER_ID) + # And it will be copied from there. + (target_providers_sub_folder / ".latest-doc-only-change.txt").unlink(missing_ok=True) + (target_providers_sub_folder / "CHANGELOG.rst").unlink(missing_ok=True) + (target_providers_sub_folder / "provider.yaml").unlink(missing_ok=True) + return target_provider_root_path + + +def get_provider_package_jinja_context(provider_id: str, version_suffix: str) -> dict[str, Any]: + provider_details = get_provider_details(provider_id) + jinja_context = get_provider_jinja_context( + provider_id=provider_id, + current_release_version=provider_details.versions[0], + version_suffix=version_suffix, + ) + return jinja_context + + +def _prepare_get_provider_info_py_file(context: dict[str, Any], provider_id: str, target_path: Path): + from airflow_breeze.utils.black_utils import black_format + + get_provider_template_name = "get_provider_info" + get_provider_content = render_template( + template_name=get_provider_template_name, + context=context, + extension=".py", + autoescape=False, + keep_trailing_newline=True, + ) + target_provider_specific_path = (target_path / "airflow" / "providers").joinpath(*provider_id.split(".")) + (target_provider_specific_path / "get_provider_info.py").write_text(black_format(get_provider_content)) + get_console().print(f"[info]Generated get_provider_info.py in {target_provider_specific_path}[/]") + + +def _prepare_pyproject_toml_file(context: dict[str, Any], target_path: Path): + manifest_content = render_template( + template_name="pyproject", + context=context, + extension=".toml", + autoescape=False, + keep_trailing_newline=True, + ) + (target_path / "pyproject.toml").write_text(manifest_content) + get_console().print(f"[info]Generated pyproject.toml in {target_path}[/]") + + +def _prepare_readme_file(context: dict[str, Any], target_path: Path): + readme_content = LICENCE_RST + render_template( + template_name="PROVIDER_README", context=context, extension=".rst" + ) + (target_path / "README.rst").write_text(readme_content) + get_console().print(f"[info]Generated README.rst in {target_path}[/]") + + +def _update_build_files(provider_id: str, version_suffix: str, target_provider_root_sources_path: Path): + """Updates generated setup.cfg/setup.py/manifest.in/provider_info for packages. + + :param provider_id: id of the package + :param version_suffix: version suffix corresponding to the version in the code + :returns False if the package should be skipped, True if everything generated properly + """ + jinja_context = get_provider_package_jinja_context(provider_id=provider_id, version_suffix=version_suffix) + get_console().print() + get_console().print(f"Generating build files for {provider_id}") + get_console().print() + _prepare_get_provider_info_py_file(jinja_context, provider_id, target_provider_root_sources_path) + _prepare_pyproject_toml_file(jinja_context, target_provider_root_sources_path) + _prepare_readme_file(jinja_context, target_provider_root_sources_path) + + +def generate_build_files(provider_id: str, version_suffix: str, target_provider_root_sources_path: Path): + with ci_group(f"Generate setup files for '{provider_id}'"): + _update_build_files(provider_id, version_suffix, target_provider_root_sources_path) + get_console().print(f"[success]Generated regular package setup files for {provider_id}[/]\n") + + +def should_skip_the_package(provider_id: str, version_suffix: str) -> bool: + """Return True if the package should be skipped. + + For RC and official releases we check if the "officially released" version exists + and skip the released if it was. This allows to skip packages that have not been + marked for release in this wave. For "dev" suffixes, we always build all packages. + """ + if version_suffix.startswith("rc") or version_suffix == "": + current_tag = get_latest_provider_tag(provider_id, version_suffix) + if tag_exists_for_provider(provider_id, current_tag): + get_console().print(f"[warning]The tag {current_tag} exists. Skipping the package.[/]") + return True + return False + + +def cleanup_build_remnants(target_provider_root_sources_path: Path): + if get_verbose(): + get_console().print(f"Cleaning remnants in {target_provider_root_sources_path}") + for file in target_provider_root_sources_path.glob("*.egg-info"): + shutil.rmtree(file, ignore_errors=True) + shutil.rmtree(target_provider_root_sources_path / "build", ignore_errors=True) + shutil.rmtree(target_provider_root_sources_path / "dist", ignore_errors=True) + + +def build_provider_package( + provider_id: str, version_suffix: str, target_provider_root_sources_path: Path, package_format: str +): + get_console().print( + f"[info]Building provider package: {provider_id} in format {package_format} in " + f"{target_provider_root_sources_path}" + ) + command: list[str] = [sys.executable, "-m", "flit", "build", "--no-setup-py", "--no-use-vcs"] + if package_format != "both": + command.extend(["--format", package_format]) + try: + run_command(command, check=True, cwd=target_provider_root_sources_path) + except subprocess.CalledProcessError as ex: + get_console().print("[error]The command returned an error %s", ex) + raise PrepareReleasePackageErrorBuildingPackageException() + get_console().print( + f"\n[success]Prepared provider package {provider_id} in " f"format {package_format}[/]" + ) + + +def move_built_packages_and_cleanup( + target_provider_root_sources_path: Path, dist_folder: Path, skip_cleanup: bool +): + for file in (target_provider_root_sources_path / "dist").glob("apache*"): + get_console().print(f"[info] Moving {file} to {dist_folder}") + # Shutil can move packages also between filesystems + target_file = dist_folder / file.name + target_file.unlink(missing_ok=True) + shutil.move(file.as_posix(), dist_folder.as_posix()) + + if skip_cleanup: + get_console().print( + f"[warning]NOT Cleaning up the {target_provider_root_sources_path} because " + f"it was requested by the user[/]\n" + f"\nYou can use the generated packages to work on the build" + f"process and bring the changes back to the templates in Breeze " + f"src/airflow_breeze/templates" + ) + else: + get_console().print(f"[info]Cleaning up {target_provider_root_sources_path}") + shutil.rmtree(target_provider_root_sources_path, ignore_errors=True) diff --git a/dev/provider_packages/CHANGELOG_TEMPLATE.rst.jinja2 b/dev/breeze/src/airflow_breeze/templates/CHANGELOG_TEMPLATE.rst.jinja2 similarity index 56% rename from dev/provider_packages/CHANGELOG_TEMPLATE.rst.jinja2 rename to dev/breeze/src/airflow_breeze/templates/CHANGELOG_TEMPLATE.rst.jinja2 index b8c5cbe2653ff..c0ba1da95535d 100644 --- a/dev/provider_packages/CHANGELOG_TEMPLATE.rst.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/CHANGELOG_TEMPLATE.rst.jinja2 @@ -16,27 +16,50 @@ specific language governing permissions and limitations under the License. - NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. +#} + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at - IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_INDEX_TEMPLATE.rst.jinja2` IN the `dev/provider_packages` DIRECTORY + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE + OVERWRITTEN WHEN PREPARING PACKAGES. + + .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY -#} {{ version }} {{ version_header }} +{%- if WITH_BREAKING_CHANGES %} + Breaking changes ~~~~~~~~~~~~~~~~ {% for breaking_change in classified_changes.breaking_changes %} * ``{{ breaking_change.message_without_backticks | safe }}`` {%- endfor %} +{%- endif %} + +{%- if MAYBE_WITH_NEW_FEATURES %} Features ~~~~~~~~ {% for feature in classified_changes.features %} * ``{{ feature.message_without_backticks | safe }}`` {%- endfor %} +{%- endif %} Bug Fixes ~~~~~~~~~ diff --git a/dev/provider_packages/PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2 b/dev/breeze/src/airflow_breeze/templates/PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2 similarity index 85% rename from dev/provider_packages/PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2 rename to dev/breeze/src/airflow_breeze/templates/PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2 index 086599b381e25..38e1543bcf099 100644 --- a/dev/provider_packages/PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2 @@ -16,12 +16,7 @@ specific language governing permissions and limitations under the License. - NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/provider_packages` DIRECTORY --#} +#} .. Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information @@ -39,4 +34,10 @@ specific language governing permissions and limitations under the License. + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE + OVERWRITTEN WHEN PREPARING PACKAGES. + + .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + .. include:: {{ CHANGELOG_RELATIVE_PATH }}/CHANGELOG.rst diff --git a/dev/provider_packages/PROVIDER_COMMITS_TEMPLATE.rst.jinja2 b/dev/breeze/src/airflow_breeze/templates/PROVIDER_COMMITS_TEMPLATE.rst.jinja2 similarity index 82% rename from dev/provider_packages/PROVIDER_COMMITS_TEMPLATE.rst.jinja2 rename to dev/breeze/src/airflow_breeze/templates/PROVIDER_COMMITS_TEMPLATE.rst.jinja2 index fdf0d3ff1e204..6adb897305612 100644 --- a/dev/provider_packages/PROVIDER_COMMITS_TEMPLATE.rst.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/PROVIDER_COMMITS_TEMPLATE.rst.jinja2 @@ -16,13 +16,7 @@ specific language governing permissions and limitations under the License. - NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_INDEX_TEMPLATE.rst.jinja2` IN the `dev/provider_packages` DIRECTORY - --#} +#} .. Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information @@ -40,14 +34,21 @@ specific language governing permissions and limitations under the License. -.. THE REMAINDER OF THE FILE IS AUTOMATICALLY GENERATED. IT WILL BE OVERWRITTEN AT RELEASE TIME! + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE + OVERWRITTEN WHEN PREPARING PACKAGES. + + .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_COMMITS_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + .. THE REMAINDER OF THE FILE IS AUTOMATICALLY GENERATED. IT WILL BE OVERWRITTEN AT RELEASE TIME! + Package {{ PACKAGE_PIP_NAME }} ------------------------------------------------------ {{ PROVIDER_DESCRIPTION | safe }} -This is detailed commit list of changes for versions provider package: ``{{PROVIDER_PACKAGE_ID}}``. +This is detailed commit list of changes for versions provider package: ``{{PROVIDER_ID}}``. For high-level changelog, see :doc:`package information including changelog `. {%- if DETAILED_CHANGES_PRESENT %} diff --git a/dev/provider_packages/PROVIDER_INDEX_TEMPLATE.rst.jinja2 b/dev/breeze/src/airflow_breeze/templates/PROVIDER_INDEX_TEMPLATE.rst.jinja2 similarity index 74% rename from dev/provider_packages/PROVIDER_INDEX_TEMPLATE.rst.jinja2 rename to dev/breeze/src/airflow_breeze/templates/PROVIDER_INDEX_TEMPLATE.rst.jinja2 index 4e7943ced2cb8..08d108474271c 100644 --- a/dev/provider_packages/PROVIDER_INDEX_TEMPLATE.rst.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/PROVIDER_INDEX_TEMPLATE.rst.jinja2 @@ -16,13 +16,31 @@ specific language governing permissions and limitations under the License. - NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. +-#} + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at - IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_INDEX_TEMPLATE.rst.jinja2` IN the `dev/provider_packages` DIRECTORY + .. http://www.apache.org/licenses/LICENSE-2.0 --#} + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE + OVERWRITTEN WHEN PREPARING PACKAGES. + + .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_INDEX_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + .. THE REMAINDER OF THE FILE IS AUTOMATICALLY GENERATED. IT WILL BE OVERWRITTEN AT RELEASE TIME! .. toctree:: :hidden: @@ -42,7 +60,7 @@ Release: {{ RELEASE }}{{ VERSION_SUFFIX }} Provider package ---------------- -This is a provider package for ``{{PROVIDER_PACKAGE_ID}}`` provider. All classes for this provider package +This is a provider package for ``{{PROVIDER_ID}}`` provider. All classes for this provider package are in ``{{FULL_PACKAGE_NAME}}`` python package. {%- if PROVIDER_REMOVED %} diff --git a/dev/provider_packages/PROVIDER_README_TEMPLATE.rst.jinja2 b/dev/breeze/src/airflow_breeze/templates/PROVIDER_README_TEMPLATE.rst.jinja2 similarity index 90% rename from dev/provider_packages/PROVIDER_README_TEMPLATE.rst.jinja2 rename to dev/breeze/src/airflow_breeze/templates/PROVIDER_README_TEMPLATE.rst.jinja2 index 13e088aa94f03..9bcff72fe85ee 100644 --- a/dev/provider_packages/PROVIDER_README_TEMPLATE.rst.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/PROVIDER_README_TEMPLATE.rst.jinja2 @@ -16,13 +16,7 @@ specific language governing permissions and limitations under the License. - NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_INDEX_TEMPLATE.rst.jinja2` IN the `dev/provider_packages` DIRECTORY - --#} +#} .. Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information @@ -40,6 +34,12 @@ specific language governing permissions and limitations under the License. + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE + OVERWRITTEN WHEN PREPARING PACKAGES. + + .. IF YOU WANT TO MODIFY TEMPLATE FOR THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_README_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + Package ``{{ PACKAGE_PIP_NAME }}`` @@ -51,7 +51,7 @@ Release: ``{{ RELEASE }}{{ VERSION_SUFFIX }}`` Provider package ---------------- -This is a provider package for ``{{PROVIDER_PACKAGE_ID}}`` provider. All classes for this provider package +This is a provider package for ``{{PROVIDER_ID}}`` provider. All classes for this provider package are in ``{{FULL_PACKAGE_NAME}}`` python package. You can find package information and changelog for the provider diff --git a/dev/provider_packages/PROVIDER__INIT__PY_TEMPLATE.py.jinja2 b/dev/breeze/src/airflow_breeze/templates/PROVIDER__INIT__PY_TEMPLATE.py.jinja2 similarity index 88% rename from dev/provider_packages/PROVIDER__INIT__PY_TEMPLATE.py.jinja2 rename to dev/breeze/src/airflow_breeze/templates/PROVIDER__INIT__PY_TEMPLATE.py.jinja2 index c057d63e3421b..9acce6226f111 100644 --- a/dev/provider_packages/PROVIDER__INIT__PY_TEMPLATE.py.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/PROVIDER__INIT__PY_TEMPLATE.py.jinja2 @@ -16,8 +16,7 @@ specific language governing permissions and limitations under the License. --#} -# +#} # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -38,8 +37,8 @@ # NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE # OVERWRITTEN WHEN PREPARING DOCUMENTATION FOR THE PACKAGES. # -# IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE -# `PROVIDER__INIT__PY_TEMPLATE.py.jinja2` IN the `dev/provider_packages` DIRECTORY +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `PROVIDER__INIT__PY_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY # from __future__ import annotations @@ -58,5 +57,5 @@ if packaging.version.parse(packaging.version.parse(airflow_version).base_version "{{ MIN_AIRFLOW_VERSION }}" ): raise RuntimeError( - f"The package `{{ PACKAGE_PIP_NAME }}:{__version__}` requires Apache Airflow {{ MIN_AIRFLOW_VERSION }}+" # NOQA: E501 + f"The package `{{ PACKAGE_PIP_NAME }}:{__version__}` needs Apache Airflow {{ MIN_AIRFLOW_VERSION }}+" ) diff --git a/dev/provider_packages/UPDATE_CHANGELOG_TEMPLATE.rst.jinja2 b/dev/breeze/src/airflow_breeze/templates/UPDATE_CHANGELOG_TEMPLATE.rst.jinja2 similarity index 80% rename from dev/provider_packages/UPDATE_CHANGELOG_TEMPLATE.rst.jinja2 rename to dev/breeze/src/airflow_breeze/templates/UPDATE_CHANGELOG_TEMPLATE.rst.jinja2 index 421a96a3ebf58..f38c103ae7e6f 100644 --- a/dev/provider_packages/UPDATE_CHANGELOG_TEMPLATE.rst.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/UPDATE_CHANGELOG_TEMPLATE.rst.jinja2 @@ -16,12 +16,6 @@ specific language governing permissions and limitations under the License. - NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_INDEX_TEMPLATE.rst.jinja2` IN the `dev/provider_packages` DIRECTORY - #} .. Review and move the new changes to one of the sections above: {%- for change in new_changes %} diff --git a/dev/breeze/src/airflow_breeze/templates/get_provider_info_TEMPLATE.py.jinja2 b/dev/breeze/src/airflow_breeze/templates/get_provider_info_TEMPLATE.py.jinja2 new file mode 100644 index 0000000000000..5340dc9b76a14 --- /dev/null +++ b/dev/breeze/src/airflow_breeze/templates/get_provider_info_TEMPLATE.py.jinja2 @@ -0,0 +1,45 @@ +{# + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +#} +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE +# OVERWRITTEN WHEN PREPARING PACKAGES. +# +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +def get_provider_info(): + return {{ PROVIDER_INFO }} diff --git a/dev/breeze/src/airflow_breeze/templates/pyproject_TEMPLATE.toml.jinja2 b/dev/breeze/src/airflow_breeze/templates/pyproject_TEMPLATE.toml.jinja2 new file mode 100644 index 0000000000000..2965235c233ec --- /dev/null +++ b/dev/breeze/src/airflow_breeze/templates/pyproject_TEMPLATE.toml.jinja2 @@ -0,0 +1,109 @@ +{# + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +#} +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE +# OVERWRITTEN WHEN PREPARING PACKAGES. + +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +# +[build-system] +requires = ["flit_core >=3.2,<4"] +build-backend = "flit_core.buildapi" + +[project] +name = "{{ PACKAGE_PIP_NAME }}" +version = "{{RELEASE}}{{ VERSION_SUFFIX }}" +description = "Provider package {{ PACKAGE_PIP_NAME }} for Apache Airflow" +readme = "README.rst" +authors = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +maintainers = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +keywords = [ "airflow-provider", "{{ PROVIDER_ID }}", "airflow", "integration" ] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Framework :: Apache Airflow", + "Framework :: Apache Airflow :: Provider", + "License :: OSI Approved :: Apache Software License", + {%- for python_version in SUPPORTED_PYTHON_VERSIONS %} + "Programming Language :: Python :: {{ python_version }}", + {%- endfor %} + "Topic :: System :: Monitoring", +] +requires-python = "~=3.8" +dependencies = [ +{{- INSTALL_REQUIREMENTS }} +] + +[project.urls] +"Documentation" = "https://airflow.apache.org/docs/{{ PACKAGE_PIP_NAME }}/{{RELEASE}}" +"Changelog" = "https://airflow.apache.org/docs/{{ PACKAGE_PIP_NAME }}/{{RELEASE}}/changelog.html" +"Bug Tracker" = "https://github.com/apache/airflow/issues" +"Source Code" = "https://github.com/apache/airflow" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Twitter" = "https://twitter.com/ApacheAirflow" +"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[project.entry-points."apache_airflow_provider"] +provider_info = "airflow.providers.{{ PROVIDER_ID }}.get_provider_info:get_provider_info" + +{%- if PLUGINS %} +[project.entry-points."airflow.plugins"] +{%- for plugin in PLUGINS %} +{{ plugin.name }} = "{{ plugin.package_name }}:{{ plugin.class_name }}" +{%- endfor %} +{%- endif %} + +{%- if EXTRAS_REQUIREMENTS %} +[project.optional-dependencies] +{%- for extra_name, dependencies_list in EXTRAS_REQUIREMENTS.items() %} +"{{ extra_name }}" = [ +{%- for dependency in dependencies_list %} + "{{ dependency }}", +{%- endfor %} +] +{%- endfor %} +{%- endif %} + +[tool.flit.module] +name = "airflow.providers.{{ PROVIDER_ID }}" diff --git a/dev/breeze/src/airflow_breeze/utils/add_back_references.py b/dev/breeze/src/airflow_breeze/utils/add_back_references.py index 108c3599059ab..ba18679ecaf6d 100644 --- a/dev/breeze/src/airflow_breeze/utils/add_back_references.py +++ b/dev/breeze/src/airflow_breeze/utils/add_back_references.py @@ -128,25 +128,25 @@ def generate_back_references(link: str, base_path: Path): create_back_reference_html(relative_path, dest_file_path) -def start_generating_back_references(airflow_site_directory: Path, short_provider_package_ids: list[str]): +def start_generating_back_references(airflow_site_directory: Path, short_provider_ids: list[str]): docs_archive_path = airflow_site_directory / "docs-archive" airflow_docs_path = docs_archive_path / "apache-airflow" helm_docs_path = docs_archive_path / "helm-chart" - if "apache-airflow" in short_provider_package_ids: + if "apache-airflow" in short_provider_ids: generate_back_references(airflow_redirects_link, airflow_docs_path) - short_provider_package_ids.remove("apache-airflow") - if "helm-chart" in short_provider_package_ids: + short_provider_ids.remove("apache-airflow") + if "helm-chart" in short_provider_ids: generate_back_references(helm_redirects_link, helm_docs_path) - short_provider_package_ids.remove("helm-chart") - if "docker-stack" in short_provider_package_ids: + short_provider_ids.remove("helm-chart") + if "docker-stack" in short_provider_ids: get_console().print("[info]Skipping docker-stack package. No back-reference needed.") - short_provider_package_ids.remove("docker-stack") - if "apache-airflow-providers" in short_provider_package_ids: + short_provider_ids.remove("docker-stack") + if "apache-airflow-providers" in short_provider_ids: get_console().print("[info]Skipping apache-airflow-providers package. No back-reference needed.") - short_provider_package_ids.remove("apache-airflow-providers") - if short_provider_package_ids: + short_provider_ids.remove("apache-airflow-providers") + if short_provider_ids: all_providers = [ - f"apache-airflow-providers-{package.replace('.','-')}" for package in short_provider_package_ids + f"apache-airflow-providers-{package.replace('.','-')}" for package in short_provider_ids ] for p in all_providers: get_console().print(f"Processing airflow provider: {p}") diff --git a/dev/provider_packages/MANIFEST_TEMPLATE.in.jinja2 b/dev/breeze/src/airflow_breeze/utils/black_utils.py similarity index 54% rename from dev/provider_packages/MANIFEST_TEMPLATE.in.jinja2 rename to dev/breeze/src/airflow_breeze/utils/black_utils.py index 83013eefb4e7f..23891b8206c94 100644 --- a/dev/provider_packages/MANIFEST_TEMPLATE.in.jinja2 +++ b/dev/breeze/src/airflow_breeze/utils/black_utils.py @@ -1,4 +1,3 @@ -# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -15,22 +14,25 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from __future__ import annotations + +import os +from functools import lru_cache + +from black import Mode, TargetVersion, format_str, parse_pyproject_toml -# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE -# OVERWRITTEN WHEN PREPARING PACKAGES. +from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT -# IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE -# `MANIFEST_TEMPLATE.py.jinja2` IN the `provider_packages` DIRECTORY +@lru_cache(maxsize=None) +def _black_mode() -> Mode: + config = parse_pyproject_toml(os.path.join(AIRFLOW_SOURCES_ROOT, "pyproject.toml")) + target_versions = {TargetVersion[val.upper()] for val in config.get("target_version", ())} + return Mode( + target_versions=target_versions, + line_length=config.get("line_length", Mode.line_length), + ) -{% if PROVIDER_PACKAGE_ID == 'amazon' %} -include airflow/providers/amazon/aws/hooks/batch_waiters.json -include airflow/providers/amazon/aws/waiters/*.json -{% elif PROVIDER_PACKAGE_ID == 'cncf.kubernetes' %} -include airflow/providers/cncf/kubernetes/*.jinja2 -{% endif %} -include NOTICE -include LICENSE -include CHANGELOG.rst -global-exclude __pycache__ *.pyc +def black_format(content) -> str: + return format_str(content, mode=_black_mode()) diff --git a/dev/breeze/src/airflow_breeze/utils/console.py b/dev/breeze/src/airflow_breeze/utils/console.py index c1a14c86f5240..d579e58176f9b 100644 --- a/dev/breeze/src/airflow_breeze/utils/console.py +++ b/dev/breeze/src/airflow_breeze/utils/console.py @@ -43,6 +43,7 @@ def get_theme() -> Theme: "info": "bold", "warning": "italic", "error": "italic underline", + "special": "bold italic underline", } ) except ImportError: @@ -56,6 +57,7 @@ def get_theme() -> Theme: "info": "bright_blue", "warning": "bright_yellow", "error": "red", + "special": "magenta", } ) @@ -65,6 +67,7 @@ class MessageType(Enum): INFO = "info" WARNING = "warning" ERROR = "error" + SPECIAL = "special" def message_type_from_return_code(return_code: int) -> MessageType: diff --git a/dev/breeze/src/airflow_breeze/utils/packages.py b/dev/breeze/src/airflow_breeze/utils/packages.py index d3b6b81e840eb..7d1927f108caa 100644 --- a/dev/breeze/src/airflow_breeze/utils/packages.py +++ b/dev/breeze/src/airflow_breeze/utils/packages.py @@ -19,18 +19,186 @@ import fnmatch import json -from typing import Iterable +import os +import re +import subprocess +import sys +from enum import Enum +from functools import lru_cache +from pathlib import Path +from typing import Any, Iterable, NamedTuple -from airflow_breeze.global_constants import REGULAR_DOC_PACKAGES -from airflow_breeze.utils.path_utils import PROVIDER_DEPENDENCIES_JSON_FILE_PATH -from airflow_breeze.utils.suspended_providers import get_removed_provider_ids +from airflow_breeze.global_constants import ( + ALLOWED_PYTHON_MAJOR_MINOR_VERSIONS, + PROVIDER_DEPENDENCIES, + PROVIDER_RUNTIME_DATA_SCHEMA_PATH, + REGULAR_DOC_PACKAGES, +) +from airflow_breeze.utils.console import get_console +from airflow_breeze.utils.path_utils import ( + AIRFLOW_PROVIDERS_ROOT, + BREEZE_SOURCES_ROOT, + DIST_DIR, + DOCS_ROOT, + PROVIDER_DEPENDENCIES_JSON_FILE_PATH, +) +from airflow_breeze.utils.publish_docs_helpers import ( + _load_schema, + get_provider_yaml_paths, +) +from airflow_breeze.utils.run_utils import run_command +from airflow_breeze.utils.versions import get_version_tag, strip_leading_zeros_from_version + +MIN_AIRFLOW_VERSION = "2.5.0" +HTTPS_REMOTE = "apache-https-for-providers" LONG_PROVIDERS_PREFIX = "apache-airflow-providers-" +# TODO: use single source of truth for those +# for now we need to keep them in sync with the ones in setup.py +PREINSTALLED_PROVIDERS = [ + # Until we cut off the 2.8.0 branch and bump current airflow version to 2.9.0, we should + # Keep common.io commented out in order ot be able to generate PyPI constraints because + # The version from PyPI has requirement of apache-airflow>=2.8.0 + # "common.io", + "common.sql", + "ftp", + "http", + "imap", + "sqlite", +] + + +class EntityType(Enum): + Operators = "Operators" + Transfers = "Transfers" + Sensors = "Sensors" + Hooks = "Hooks" + Secrets = "Secrets" + + +class PluginInfo(NamedTuple): + name: str + package_name: str + class_name: str + + +class ProviderPackageDetails(NamedTuple): + provider_id: str + full_package_name: str + pypi_package_name: str + source_provider_package_path: Path + documentation_provider_package_path: Path + changelog_path: Path + provider_description: str + dependencies: list[str] + versions: list[str] + excluded_python_versions: list[str] + plugins: list[PluginInfo] + removed: bool + + +class PackageSuspendedException(Exception): + """Exception raised when package is suspended.""" + + +@lru_cache +def get_provider_packages_metadata() -> dict[str, dict[str, Any]]: + """ + Load all data from providers files + + :return: A list containing the contents of all provider.yaml files. + """ + import jsonschema + import yaml + + schema = _load_schema() + result: dict[str, dict[str, Any]] = {} + for provider_yaml_path in get_provider_yaml_paths(): + with open(provider_yaml_path) as yaml_file: + provider = yaml.safe_load(yaml_file) + try: + jsonschema.validate(provider, schema=schema) + except jsonschema.ValidationError: + raise Exception(f"Unable to parse: {provider_yaml_path}.") + result[get_short_package_name(provider["package-name"])] = provider + return result + + +def validate_provider_info_with_runtime_schema(provider_info: dict[str, Any]) -> None: + """Validates provider info against the runtime schema. + + This way we check if the provider info in the packages is future-compatible. + The Runtime Schema should only change when there is a major version change. + + :param provider_info: provider info to validate + """ + import jsonschema + + schema = json.loads(PROVIDER_RUNTIME_DATA_SCHEMA_PATH.read_text()) + try: + jsonschema.validate(provider_info, schema=schema) + except jsonschema.ValidationError as ex: + get_console().print("[red]Provider info not validated against runtime schema[/]") + raise Exception( + "Error when validating schema. The schema must be compatible with " + "airflow/provider_info.schema.json.", + ex, + ) + + +def get_provider_info_dict(provider_id: str) -> dict[str, Any]: + """Retrieves provider info from the provider yaml file. + + :param provider_id: package id to retrieve provider.yaml from + :return: provider_info dictionary + """ + provider_yaml_dict = get_provider_packages_metadata().get(provider_id) + if provider_yaml_dict: + validate_provider_info_with_runtime_schema(provider_yaml_dict) + return provider_yaml_dict or {} + + +@lru_cache +def get_suspended_provider_ids() -> list[str]: + return [ + provider_id + for provider_id, provider_metadata in get_provider_packages_metadata().items() + if provider_metadata.get("suspended", False) + ] + + +@lru_cache +def get_suspended_provider_folders() -> list[str]: + return [provider_id.replace(".", "/") for provider_id in get_suspended_provider_ids()] + +@lru_cache +def get_removed_provider_ids() -> list[str]: + return [ + provider_id + for provider_id, provider_metadata in get_provider_packages_metadata().items() + if provider_metadata.get("removed", False) + ] + + +def get_provider_requirements(provider_id: str) -> list[str]: + package_metadata = get_provider_packages_metadata().get(provider_id) + return package_metadata["dependencies"] if package_metadata else [] + + +@lru_cache def get_available_packages( include_non_provider_doc_packages: bool = False, include_all_providers: bool = False ) -> list[str]: + """ + Return provider ids for all packages that are available currently (not suspended). + + :param include_non_provider_doc_packages: whether the non-provider doc packages should be included + (packages like apache-airflow, helm-chart, docker-stack) + :param include_all_providers: whether "all-providers" should be included ni the list. + + """ provider_ids: list[str] = list(json.loads(PROVIDER_DEPENDENCIES_JSON_FILE_PATH.read_text()).keys()) available_packages = [] if include_non_provider_doc_packages: @@ -42,6 +210,7 @@ def get_available_packages( def expand_all_provider_packages(short_doc_packages: tuple[str, ...]) -> tuple[str, ...]: + """In case there are "all-providers" in the list, expand the list with all providers.""" if "all-providers" in short_doc_packages: packages = [package for package in short_doc_packages if package != "all-providers"] packages.extend(get_available_packages()) @@ -52,31 +221,58 @@ def expand_all_provider_packages(short_doc_packages: tuple[str, ...]) -> tuple[s def get_long_package_names(short_form_providers: Iterable[str]) -> tuple[str, ...]: providers: list[str] = [] for short_form_provider in short_form_providers: - if short_form_provider in REGULAR_DOC_PACKAGES: - providers.append(short_form_provider) - continue - short_form_provider.split(".") - parts = "-".join(short_form_provider.split(".")) - providers.append(LONG_PROVIDERS_PREFIX + parts) + long_package_name = get_long_package_name(short_form_provider) + providers.append(long_package_name) + return tuple(providers) + + +def get_long_package_name(short_form_provider: str) -> str: + if short_form_provider in REGULAR_DOC_PACKAGES: + long_package_name = short_form_provider + else: + long_package_name = LONG_PROVIDERS_PREFIX + "-".join(short_form_provider.split(".")) + return long_package_name + + +def get_short_package_names(long_form_providers: Iterable[str]) -> tuple[str, ...]: + providers: list[str] = [] + for long_form_provider in long_form_providers: + providers.append(get_short_package_name(long_form_provider)) return tuple(providers) -def convert_to_long_package_names( - package_filters: tuple[str, ...], packages_short_form: tuple[str, ...] +def get_short_package_name(long_form_provider: str) -> str: + if long_form_provider in REGULAR_DOC_PACKAGES: + return long_form_provider + else: + if not long_form_provider.startswith(LONG_PROVIDERS_PREFIX): + raise ValueError( + f"Invalid provider name: {long_form_provider}. " f"Should start with {LONG_PROVIDERS_PREFIX}" + ) + return long_form_provider[len(LONG_PROVIDERS_PREFIX) :].replace("-", ".") + + +def find_matching_long_package_names( + short_packages: tuple[str, ...], + filters: tuple[str, ...] | None = None, ) -> tuple[str, ...]: - """Filters the package list against a set of filters. + """Finds matching long package names based on short package name and package filters specified. + + The sequence of specified packages / filters is kept (filters first, packages next). In case there + are filters that do not match any of the packages error is raised. - A packet is returned if it matches at least one filter. The function keeps the order of the packages. + :param short_packages: short forms of package names + :param filters: package filters specified """ available_doc_packages = list( get_long_package_names(get_available_packages(include_non_provider_doc_packages=True)) ) - if not package_filters and not packages_short_form: - available_doc_packages.extend(package_filters) + if not filters and not short_packages: + available_doc_packages.extend(filters or ()) return tuple(set(available_doc_packages)) - processed_package_filters = list(package_filters) - processed_package_filters.extend(get_long_package_names(packages_short_form)) + processed_package_filters = list(filters or ()) + processed_package_filters.extend(get_long_package_names(short_packages)) removed_packages: list[str] = [ f"apache-airflow-providers-{provider.replace('.','-')}" for provider in get_removed_provider_ids() @@ -99,3 +295,364 @@ def convert_to_long_package_names( if any(fnmatch.fnmatch(p, f) for f in processed_package_filters) ] ) + + +def get_source_package_path(provider_id: str) -> Path: + return AIRFLOW_PROVIDERS_ROOT.joinpath(*provider_id.split(".")) + + +def get_documentation_package_path(provider_id: str) -> Path: + return DOCS_ROOT / f"apache-airflow-providers-{provider_id.replace('.', '-')}" + + +def get_target_root_for_copied_provider_sources(provider_id: str) -> Path: + return (DIST_DIR / "provider_packages").joinpath(*provider_id.split(".")) + + +def get_pip_package_name(provider_id: str) -> str: + """ + Returns PIP package name for the package id. + + :param provider_id: id of the package + :return: the name of pip package + """ + return "apache-airflow-providers-" + provider_id.replace(".", "-") + + +def get_wheel_package_name(provider_id: str) -> str: + """ + Returns Wheel package name prefix for the package id. + + :param provider_id: id of the package + :return: the name of wheel package prefix + """ + return "apache_airflow_providers_" + provider_id.replace(".", "_") + + +def get_install_requirements(provider_id: str, version_suffix: str) -> str: + """ + Returns install requirements for the package. + + :param provider_id: id of the provider package + :param version_suffix: optional version suffix for packages + + :return: install requirements of the package + """ + + def apply_version_suffix(install_clause: str) -> str: + if install_clause.startswith("apache-airflow") and ">=" in install_clause and version_suffix != "": + # This is workaround for `pip` way of handling `--pre` installation switch. It apparently does + # not modify the meaning of `install_requires` to include also pre-releases, so we need to + # modify our internal provider and airflow package version references to include all pre-releases + # including all development releases. When you specify dependency as >= X.Y.Z, and you + # have packages X.Y.Zdev0 or X.Y.Zrc1 in a local file, such package is not considered + # as fulfilling the requirement even if `--pre` switch is used. + return install_clause + ".dev0" + return install_clause + + if provider_id in get_removed_provider_ids(): + dependencies = get_provider_requirements(provider_id) + else: + dependencies = PROVIDER_DEPENDENCIES.get(provider_id)["deps"] + install_requires = [apply_version_suffix(clause) for clause in dependencies] + return "".join(f'\n "{ir}",' for ir in install_requires) + + +def get_package_extras(provider_id: str) -> dict[str, list[str]]: + """ + Finds extras for the package specified. + + :param provider_id: id of the package + """ + if provider_id == "providers": + return {} + if provider_id in get_removed_provider_ids(): + return {} + extras_dict: dict[str, list[str]] = { + module: [get_pip_package_name(module)] + for module in PROVIDER_DEPENDENCIES.get(provider_id)["cross-providers-deps"] + } + provider_yaml_dict = get_provider_packages_metadata().get(provider_id) + additional_extras = provider_yaml_dict.get("additional-extras") if provider_yaml_dict else None + if additional_extras: + for entry in additional_extras: + name = entry["name"] + dependencies = entry["dependencies"] + if name in extras_dict: + # remove non-versioned dependencies if versioned ones are coming + existing_dependencies = set(extras_dict[name]) + for new_dependency in dependencies: + for dependency in existing_dependencies: + # remove extra if exists as non-versioned one + if new_dependency.startswith(dependency): + extras_dict[name].remove(dependency) + break + extras_dict[name].append(new_dependency) + else: + extras_dict[name] = dependencies + return extras_dict + + +def get_provider_details(provider_id: str) -> ProviderPackageDetails: + provider_info = get_provider_packages_metadata().get(provider_id) + if not provider_info: + raise RuntimeError(f"The provider {provider_id} has no provider.yaml defined.") + plugins: list[PluginInfo] = [] + if "plugins" in provider_info: + for plugin in provider_info["plugins"]: + package_name, class_name = plugin["plugin-class"].rsplit(".", maxsplit=1) + plugins.append( + PluginInfo( + name=plugin["name"], + package_name=package_name, + class_name=class_name, + ) + ) + return ProviderPackageDetails( + provider_id=provider_id, + full_package_name=f"airflow.providers.{provider_id}", + pypi_package_name=f"apache-airflow-providers-{provider_id.replace('.', '-')}", + source_provider_package_path=get_source_package_path(provider_id), + documentation_provider_package_path=get_documentation_package_path(provider_id), + changelog_path=get_source_package_path(provider_id) / "CHANGELOG.rst", + provider_description=provider_info["description"], + dependencies=provider_info["dependencies"], + versions=provider_info["versions"], + excluded_python_versions=provider_info.get("excluded-python-versions") or [], + plugins=plugins, + removed=provider_info.get("removed", False), + ) + + +def get_min_airflow_version(provider_id: str) -> str: + from packaging.version import Version as PackagingVersion + + provider_details = get_provider_details(provider_id=provider_id) + min_airflow_version = MIN_AIRFLOW_VERSION + for dependency in provider_details.dependencies: + if dependency.startswith("apache-airflow>="): + current_min_airflow_version = dependency.split(">=")[1] + if PackagingVersion(current_min_airflow_version) > PackagingVersion(MIN_AIRFLOW_VERSION): + min_airflow_version = current_min_airflow_version + return min_airflow_version + + +def get_python_requires(provider_id: str) -> str: + python_requires = "~=3.8" + provider_details = get_provider_details(provider_id=provider_id) + for p in provider_details.excluded_python_versions: + python_requires += f", !={p}" + return python_requires + + +def convert_cross_package_dependencies_to_table( + cross_package_dependencies: list[str], + markdown: bool = True, +) -> str: + """ + Converts cross-package dependencies to a Markdown table + :param cross_package_dependencies: list of cross-package dependencies + :param markdown: if True, Markdown format is used else rst + :return: formatted table + """ + from tabulate import tabulate + + headers = ["Dependent package", "Extra"] + table_data = [] + prefix = "apache-airflow-providers-" + base_url = "https://airflow.apache.org/docs/" + for dependency in cross_package_dependencies: + pip_package_name = f"{prefix}{dependency.replace('.','-')}" + url_suffix = f"{dependency.replace('.','-')}" + if markdown: + url = f"[{pip_package_name}]({base_url}{url_suffix})" + else: + url = f"`{pip_package_name} <{base_url}{prefix}{url_suffix}>`_" + table_data.append((url, f"`{dependency}`" if markdown else f"``{dependency}``")) + return tabulate(table_data, headers=headers, tablefmt="pipe" if markdown else "rst") + + +def get_cross_provider_dependent_packages(provider_package_id: str) -> list[str]: + if provider_package_id in get_removed_provider_ids(): + return [] + return PROVIDER_DEPENDENCIES[provider_package_id]["cross-providers-deps"] + + +def get_provider_jinja_context( + provider_id: str, + current_release_version: str, + version_suffix: str, +): + provider_details = get_provider_details(provider_id=provider_id) + release_version_no_leading_zeros = strip_leading_zeros_from_version(current_release_version) + changelog = provider_details.changelog_path.read_text() + supported_python_versions = [ + p for p in ALLOWED_PYTHON_MAJOR_MINOR_VERSIONS if p not in provider_details.excluded_python_versions + ] + cross_providers_dependencies = get_cross_provider_dependent_packages(provider_package_id=provider_id) + context: dict[str, Any] = { + "PROVIDER_ID": provider_details.provider_id, + "PACKAGE_PIP_NAME": get_pip_package_name(provider_details.provider_id), + "PACKAGE_WHEEL_NAME": get_wheel_package_name(provider_details.provider_id), + "FULL_PACKAGE_NAME": provider_details.full_package_name, + "RELEASE": current_release_version, + "RELEASE_NO_LEADING_ZEROS": release_version_no_leading_zeros, + "VERSION_SUFFIX": version_suffix or "", + "PIP_REQUIREMENTS": get_provider_requirements(provider_details.provider_id), + "PROVIDER_DESCRIPTION": provider_details.provider_description, + "INSTALL_REQUIREMENTS": get_install_requirements( + provider_id=provider_details.provider_id, version_suffix=version_suffix + ), + "EXTRAS_REQUIREMENTS": get_package_extras(provider_id=provider_details.provider_id), + "CHANGELOG_RELATIVE_PATH": os.path.relpath( + provider_details.source_provider_package_path, + provider_details.documentation_provider_package_path, + ), + "CHANGELOG": changelog, + "SUPPORTED_PYTHON_VERSIONS": supported_python_versions, + "PLUGINS": provider_details.plugins, + "MIN_AIRFLOW_VERSION": get_min_airflow_version(provider_id), + "PROVIDER_REMOVED": provider_details.removed, + "PROVIDER_INFO": get_provider_info_dict(provider_id), + "CROSS_PROVIDERS_DEPENDENCIES": get_cross_provider_dependent_packages(provider_id), + "CROSS_PROVIDERS_DEPENDENCIES_TABLE_RST": convert_cross_package_dependencies_to_table( + cross_providers_dependencies, markdown=False + ), + "PIP_REQUIREMENTS_TABLE_RST": convert_pip_requirements_to_table( + get_provider_requirements(provider_id), markdown=False + ), + } + return context + + +def render_template( + template_name: str, + context: dict[str, Any], + extension: str, + autoescape: bool = True, + keep_trailing_newline: bool = False, +) -> str: + """ + Renders template based on its name. Reads the template from _TEMPLATE.md.jinja2 in current dir. + :param template_name: name of the template to use + :param context: Jinja2 context + :param extension: Target file extension + :param autoescape: Whether to autoescape HTML + :param keep_trailing_newline: Whether to keep the newline in rendered output + :return: rendered template + """ + import jinja2 + + template_loader = jinja2.FileSystemLoader( + searchpath=BREEZE_SOURCES_ROOT / "src" / "airflow_breeze" / "templates" + ) + template_env = jinja2.Environment( + loader=template_loader, + undefined=jinja2.StrictUndefined, + autoescape=autoescape, + keep_trailing_newline=keep_trailing_newline, + ) + template = template_env.get_template(f"{template_name}_TEMPLATE{extension}.jinja2") + content: str = template.render(context) + return content + + +def make_sure_remote_apache_exists_and_fetch(github_repository: str = "apache/airflow"): + """Make sure that apache remote exist in git. + + We need to take a log from the apache repository main branch - not locally because we might + not have the latest version. Also, the local repo might be shallow, so we need to + un-shallow it to see all the history. + + This will: + * check if the remote exists and add if it does not + * check if the local repo is shallow, mark it to un-shallow in this case + * fetch from the remote including all tags and overriding local tags in case + they are set differently + + """ + try: + run_command(["git", "remote", "get-url", HTTPS_REMOTE], text=True, capture_output=True) + except subprocess.CalledProcessError as ex: + if ex.returncode == 128 or ex.returncode == 2: + run_command( + [ + "git", + "remote", + "add", + HTTPS_REMOTE, + f"https://github.com/{github_repository}.git", + ], + check=True, + ) + else: + get_console().print( + f"[error]Error {ex}[/]\n" f"[error]When checking if {HTTPS_REMOTE} is set.[/]\n\n" + ) + sys.exit(1) + get_console().print("[info]Fetching full history and tags from remote.") + get_console().print("[info]This might override your local tags!") + result = run_command( + ["git", "rev-parse", "--is-shallow-repository"], + check=True, + capture_output=True, + text=True, + ) + is_shallow_repo = result.stdout.strip() == "true" + fetch_command = ["git", "fetch", "--tags", "--force", HTTPS_REMOTE] + if is_shallow_repo: + fetch_command.append("--unshallow") + try: + run_command(fetch_command) + except subprocess.CalledProcessError as e: + get_console().print( + f"[error]Error {e}[/]\n" + f"[error]When fetching tags from remote. Your tags might not be refreshed.[/]\n\n" + f'[warning]Please refresh the tags manually via:[/]\n\n"' + f'{" ".join(fetch_command)}\n\n' + ) + sys.exit(1) + + +def convert_pip_requirements_to_table(requirements: Iterable[str], markdown: bool = True) -> str: + """ + Converts PIP requirement list to a Markdown table. + :param requirements: requirements list + :param markdown: if True, Markdown format is used else rst + :return: formatted table + """ + from tabulate import tabulate + + headers = ["PIP package", "Version required"] + table_data = [] + for dependency in requirements: + found = re.match(r"(^[^<=>~]*)([^<=>~]?.*)$", dependency) + if found: + package = found.group(1) + version_required = found.group(2) + if version_required != "": + version_required = f"`{version_required}`" if markdown else f"``{version_required}``" + table_data.append((f"`{package}`" if markdown else f"``{package}``", version_required)) + else: + table_data.append((dependency, "")) + return tabulate(table_data, headers=headers, tablefmt="pipe" if markdown else "rst") + + +def tag_exists_for_provider(provider_id: str, current_tag: str) -> bool: + """Return true if the tag exists in the provider repository.""" + provider_details = get_provider_details(provider_id) + result = run_command( + ["git", "rev-parse", current_tag], + cwd=provider_details.source_provider_package_path, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + return result.returncode == 0 + + +def get_latest_provider_tag(provider_id: str, suffix: str) -> str: + """Returns latest tag for the provider.""" + provider_details = get_provider_details(provider_id) + current_version = provider_details.versions[0] + return get_version_tag(current_version, provider_id, suffix) diff --git a/dev/breeze/src/airflow_breeze/utils/parallel.py b/dev/breeze/src/airflow_breeze/utils/parallel.py index ea4ef06030ad0..ad7e197d1c5dd 100644 --- a/dev/breeze/src/airflow_breeze/utils/parallel.py +++ b/dev/breeze/src/airflow_breeze/utils/parallel.py @@ -228,7 +228,7 @@ def get_single_tuple_array(title: str, t: NamedTuple) -> Table: for key, value in t._asdict().items(): table.add_column(header=key, header_style="info") row.append(get_printable_value(key, value)) - table.add_row(*row, style="magenta") + table.add_row(*row, style="special") return table @@ -245,7 +245,7 @@ def get_multi_tuple_array(title: str, tuples: list[tuple[NamedTuple, ...]]) -> T for named_tuple in t: for key, value in named_tuple._asdict().items(): row.append(get_printable_value(key, value)) - table.add_row(*row, style="magenta") + table.add_row(*row, style="special") return table diff --git a/dev/breeze/src/airflow_breeze/utils/path_utils.py b/dev/breeze/src/airflow_breeze/utils/path_utils.py index 1abdddac97dd0..65a32286ced5e 100644 --- a/dev/breeze/src/airflow_breeze/utils/path_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/path_utils.py @@ -268,6 +268,7 @@ def find_airflow_sources_root_to_operate_on() -> Path: TESTS_PROVIDERS_ROOT = AIRFLOW_SOURCES_ROOT / "tests" / "providers" SYSTEM_TESTS_PROVIDERS_ROOT = AIRFLOW_SOURCES_ROOT / "tests" / "system" / "providers" AIRFLOW_PROVIDERS_ROOT = AIRFLOW_SOURCES_ROOT / "airflow" / "providers" +DOCS_ROOT = AIRFLOW_SOURCES_ROOT / "docs" BUILD_CACHE_DIR = AIRFLOW_SOURCES_ROOT / ".build" GENERATED_DIR = AIRFLOW_SOURCES_ROOT / "generated" CONSTRAINTS_CACHE_DIR = BUILD_CACHE_DIR / "constraints" diff --git a/dev/breeze/src/airflow_breeze/utils/publish_docs_builder.py b/dev/breeze/src/airflow_breeze/utils/publish_docs_builder.py index b8e64d66f3615..c3b380b07b61c 100644 --- a/dev/breeze/src/airflow_breeze/utils/publish_docs_builder.py +++ b/dev/breeze/src/airflow_breeze/utils/publish_docs_builder.py @@ -28,7 +28,8 @@ from airflow_breeze.utils.console import Output, get_console from airflow_breeze.utils.docs_errors import DocBuildError, parse_sphinx_warnings from airflow_breeze.utils.helm_chart_utils import chart_version -from airflow_breeze.utils.publish_docs_helpers import load_package_data, pretty_format_path +from airflow_breeze.utils.packages import get_provider_packages_metadata, get_short_package_name +from airflow_breeze.utils.publish_docs_helpers import pretty_format_path from airflow_breeze.utils.spelling_checks import SpellingError, parse_spelling_warnings PROCESS_TIMEOUT = 15 * 60 @@ -95,8 +96,7 @@ def _current_version(self): if self.package_name == "apache-airflow": return get_airflow_version() if self.package_name.startswith("apache-airflow-providers-"): - all_providers_yaml = load_package_data(include_suspended=True) - provider = next(p for p in all_providers_yaml if p["package-name"] == self.package_name) + provider = get_provider_packages_metadata().get(get_short_package_name(self.package_name)) return provider["versions"][0] if self.package_name == "helm-chart": return chart_version() diff --git a/dev/breeze/src/airflow_breeze/utils/publish_docs_helpers.py b/dev/breeze/src/airflow_breeze/utils/publish_docs_helpers.py index 80116471f7739..8c5d63748cb74 100644 --- a/dev/breeze/src/airflow_breeze/utils/publish_docs_helpers.py +++ b/dev/breeze/src/airflow_breeze/utils/publish_docs_helpers.py @@ -23,8 +23,6 @@ from pathlib import Path from typing import Any -import yaml - CONSOLE_WIDTH = 180 ROOT_DIR = Path(__file__).parents[5].resolve() @@ -56,33 +54,6 @@ def get_provider_yaml_paths(): return sorted(glob(f"{ROOT_DIR}/airflow/providers/**/provider.yaml", recursive=True)) -def load_package_data(include_suspended: bool = False) -> list[dict[str, Any]]: - """ - Load all data from providers files - - :return: A list containing the contents of all provider.yaml files. - """ - import jsonschema - - schema = _load_schema() - result = [] - for provider_yaml_path in get_provider_yaml_paths(): - with open(provider_yaml_path) as yaml_file: - provider = yaml.safe_load(yaml_file) - try: - jsonschema.validate(provider, schema=schema) - except jsonschema.ValidationError: - raise Exception(f"Unable to parse: {provider_yaml_path}.") - if provider["suspended"] and not include_suspended: - continue - provider_yaml_dir = os.path.dirname(provider_yaml_path) - provider["python-module"] = _filepath_to_module(provider_yaml_dir) - provider["package-dir"] = provider_yaml_dir - provider["system-tests-dir"] = _filepath_to_system_tests(provider_yaml_dir) - result.append(provider) - return result - - def pretty_format_path(path: str, start: str) -> str: """Formats path nicely.""" relpath = os.path.relpath(path, start) diff --git a/dev/breeze/src/airflow_breeze/utils/run_tests.py b/dev/breeze/src/airflow_breeze/utils/run_tests.py index 042c578fd95af..d2ce641fe4188 100644 --- a/dev/breeze/src/airflow_breeze/utils/run_tests.py +++ b/dev/breeze/src/airflow_breeze/utils/run_tests.py @@ -23,9 +23,9 @@ from subprocess import DEVNULL from airflow_breeze.utils.console import Output, get_console +from airflow_breeze.utils.packages import get_suspended_provider_folders from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT from airflow_breeze.utils.run_utils import run_command -from airflow_breeze.utils.suspended_providers import get_suspended_providers_folders def verify_an_image( @@ -103,7 +103,7 @@ def test_paths(test_type: str, backend: str, helm_test_package: str | None) -> t def get_suspended_provider_args() -> list[str]: pytest_args = [] - suspended_folders = get_suspended_providers_folders() + suspended_folders = get_suspended_provider_folders() for providers in suspended_folders: pytest_args.extend( [ diff --git a/dev/breeze/src/airflow_breeze/utils/run_utils.py b/dev/breeze/src/airflow_breeze/utils/run_utils.py index d78e402a48e0b..7b1bcd073eef5 100644 --- a/dev/breeze/src/airflow_breeze/utils/run_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/run_utils.py @@ -172,6 +172,8 @@ def shorten_command(_index: int, _argument: str) -> str: get_console(output=output).print( "[error]========================= STDERR end ==============================[/]" ) + if check: + raise return ex diff --git a/dev/breeze/src/airflow_breeze/utils/suspended_providers.py b/dev/breeze/src/airflow_breeze/utils/suspended_providers.py deleted file mode 100644 index be918e356572a..0000000000000 --- a/dev/breeze/src/airflow_breeze/utils/suspended_providers.py +++ /dev/null @@ -1,62 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -import sys - -from airflow_breeze.utils.console import get_console -from airflow_breeze.utils.path_utils import AIRFLOW_PROVIDERS_ROOT, AIRFLOW_SOURCES_ROOT - - -def get_suspended_providers_folders() -> list[str]: - """ - Returns a list of suspended providers folders that should be - skipped when running tests (without any prefix - for example apache/beam, yandex, google etc.). - """ - import yaml - - suspended_providers = [] - for provider_path in AIRFLOW_PROVIDERS_ROOT.rglob("provider.yaml"): - provider_yaml = yaml.safe_load(provider_path.read_text()) - if provider_yaml.get("suspended"): - suspended_providers.append( - provider_path.parent.relative_to(AIRFLOW_SOURCES_ROOT) - .as_posix() - .replace("airflow/providers/", "") - ) - return suspended_providers - - -def get_removed_provider_ids() -> list[str]: - """ - Yields the ids of suspended providers. - """ - import yaml - - removed_provider_ids = [] - for provider_path in AIRFLOW_PROVIDERS_ROOT.rglob("provider.yaml"): - provider_yaml = yaml.safe_load(provider_path.read_text()) - package_name = provider_yaml.get("package-name") - if provider_yaml.get("removed", False): - if not provider_yaml.get("suspended"): - get_console().print( - f"[error]The provider {package_name} is marked for removal in provider.yaml, but " - f"not suspended. Please suspend the provider first before removing it.\n" - ) - sys.exit(1) - removed_provider_ids.append(package_name[len("apache-airflow-providers-") :].replace("-", ".")) - return removed_provider_ids diff --git a/dev/breeze/src/airflow_breeze/utils/versions.py b/dev/breeze/src/airflow_breeze/utils/versions.py new file mode 100644 index 0000000000000..88c5986f7d975 --- /dev/null +++ b/dev/breeze/src/airflow_breeze/utils/versions.py @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + + +def strip_leading_zeros_from_version(version: str) -> str: + """ + Strips leading zeros from version number. + + This converts 1974.04.03 to 1974.4.3 as the format with leading month and day zeros is not accepted + by PIP versioning. + + :param version: version number in CALVER format (potentially with leading 0s in date and month) + :return: string with leading 0s after dot replaced. + """ + return ".".join(str(int(i)) for i in version.split(".")) + + +def get_version_tag(version: str, provider_package_id: str, version_suffix: str = ""): + if version_suffix is None: + version_suffix = "" + return f"providers-{provider_package_id.replace('.','-')}/{version}{version_suffix}" diff --git a/dev/breeze/tests/test_packages.py b/dev/breeze/tests/test_packages.py index f520c8328afdd..24b9b571a9a04 100644 --- a/dev/breeze/tests/test_packages.py +++ b/dev/breeze/tests/test_packages.py @@ -16,8 +16,31 @@ # under the License. from __future__ import annotations +from typing import Iterable + +import pytest + from airflow_breeze.global_constants import REGULAR_DOC_PACKAGES -from airflow_breeze.utils.packages import get_available_packages +from airflow_breeze.utils.packages import ( + convert_pip_requirements_to_table, + expand_all_provider_packages, + find_matching_long_package_names, + get_available_packages, + get_documentation_package_path, + get_install_requirements, + get_long_package_name, + get_package_extras, + get_pip_package_name, + get_provider_details, + get_provider_requirements, + get_removed_provider_ids, + get_short_package_name, + get_source_package_path, + get_suspended_provider_folders, + get_suspended_provider_ids, + get_wheel_package_name, +) +from airflow_breeze.utils.path_utils import AIRFLOW_PROVIDERS_ROOT, AIRFLOW_SOURCES_ROOT, DOCS_ROOT def test_get_available_packages(): @@ -25,6 +48,16 @@ def test_get_available_packages(): assert all(package not in REGULAR_DOC_PACKAGES for package in get_available_packages()) +def test_expand_all_provider_packages(): + assert len(expand_all_provider_packages(("all-providers",))) > 70 + + +def test_expand_all_provider_packages_deduplicate_with_other_packages(): + assert len(expand_all_provider_packages(("all-providers",))) == len( + expand_all_provider_packages(("all-providers", "amazon", "google")) + ) + + def test_get_available_packages_include_non_provider_doc_packages(): all_packages_including_regular_docs = get_available_packages(include_non_provider_doc_packages=True) for package in REGULAR_DOC_PACKAGES: @@ -41,3 +74,184 @@ def test_get_available_packages_include_non_provider_doc_packages_and_all_provid assert package in all_packages_including_regular_docs assert "all-providers" in all_packages_including_regular_docs + + +def test_get_short_package_name(): + assert get_short_package_name("apache-airflow") == "apache-airflow" + assert get_short_package_name("docker-stack") == "docker-stack" + assert get_short_package_name("apache-airflow-providers-amazon") == "amazon" + assert get_short_package_name("apache-airflow-providers-apache-hdfs") == "apache.hdfs" + + +def test_error_on_get_short_package_name(): + with pytest.raises(ValueError, match="Invalid provider name"): + get_short_package_name("wrong-provider-name") + + +def test_get_long_package_name(): + assert get_long_package_name("apache-airflow") == "apache-airflow" + assert get_long_package_name("docker-stack") == "docker-stack" + assert get_long_package_name("amazon") == "apache-airflow-providers-amazon" + assert get_long_package_name("apache.hdfs") == "apache-airflow-providers-apache-hdfs" + + +def test_get_provider_requirements(): + # update me when asana dependencies change + assert get_provider_requirements("asana") == ["apache-airflow>=2.5.0", "asana>=0.10,<4.0.0"] + + +def test_get_removed_providers(): + # Modify it every time we schedule provider for removal or remove it + assert ["qubole"] == get_removed_provider_ids() + + +def test_get_suspended_provider_ids(): + # Modify it every time we suspend/resume provider + assert ["qubole"] == get_suspended_provider_ids() + + +def test_get_suspended_provider_folders(): + # Modify it every time we suspend/resume provider + assert ["qubole"] == get_suspended_provider_folders() + + +@pytest.mark.parametrize( + "short_packages, filters, long_packages", + [ + (("amazon",), (), ("apache-airflow-providers-amazon",)), + (("apache.hdfs",), (), ("apache-airflow-providers-apache-hdfs",)), + (("amazon",), (), ("apache-airflow-providers-amazon",)), + ( + ("apache.hdfs",), + ("apache-airflow-providers-amazon",), + ("apache-airflow-providers-amazon", "apache-airflow-providers-apache-hdfs"), + ), + ( + ("apache.hdfs",), + ("apache-airflow-providers-ama*",), + ("apache-airflow-providers-amazon", "apache-airflow-providers-apache-hdfs"), + ), + ], +) +def test_find_matching_long_package_name( + short_packages: tuple[str, ...], filters: tuple[str, ...], long_packages: tuple[str, ...] +): + assert find_matching_long_package_names(short_packages=short_packages, filters=filters) == long_packages + + +def test_find_matching_long_package_name_bad_filter(): + with pytest.raises(SystemExit, match=r"Some filters did not find any package: \['bad-filter-\*"): + find_matching_long_package_names(short_packages=(), filters=("bad-filter-*",)) + + +def test_get_source_package_path(): + assert get_source_package_path("apache.hdfs") == AIRFLOW_PROVIDERS_ROOT / "apache" / "hdfs" + + +def test_get_documentation_package_path(): + assert get_documentation_package_path("apache.hdfs") == DOCS_ROOT / "apache-airflow-providers-apache-hdfs" + + +def test_get_install_requirements(): + assert ( + get_install_requirements("asana", "").strip() + == """ + "apache-airflow>=2.5.0", + "asana>=0.10,<4.0.0", +""".strip() + ) + + +def test_get_package_extras(): + assert get_package_extras("google") == { + "amazon": ["apache-airflow-providers-amazon>=2.6.0"], + "apache.beam": ["apache-airflow-providers-apache-beam", "apache-beam[gcp]"], + "apache.cassandra": ["apache-airflow-providers-apache-cassandra"], + "cncf.kubernetes": ["apache-airflow-providers-cncf-kubernetes>=7.2.0"], + "common.sql": ["apache-airflow-providers-common-sql"], + "facebook": ["apache-airflow-providers-facebook>=2.2.0"], + "leveldb": ["plyvel"], + "microsoft.azure": ["apache-airflow-providers-microsoft-azure"], + "microsoft.mssql": ["apache-airflow-providers-microsoft-mssql"], + "mysql": ["apache-airflow-providers-mysql"], + "openlineage": ["apache-airflow-providers-openlineage"], + "oracle": ["apache-airflow-providers-oracle>=3.1.0"], + "postgres": ["apache-airflow-providers-postgres"], + "presto": ["apache-airflow-providers-presto"], + "salesforce": ["apache-airflow-providers-salesforce"], + "sftp": ["apache-airflow-providers-sftp"], + "ssh": ["apache-airflow-providers-ssh"], + "trino": ["apache-airflow-providers-trino"], + } + + +def test_get_provider_details(): + provider_details = get_provider_details("asana") + assert provider_details.provider_id == "asana" + assert provider_details.full_package_name == "airflow.providers.asana" + assert provider_details.pypi_package_name == "apache-airflow-providers-asana" + assert ( + provider_details.source_provider_package_path + == AIRFLOW_SOURCES_ROOT / "airflow" / "providers" / "asana" + ) + assert ( + provider_details.documentation_provider_package_path == DOCS_ROOT / "apache-airflow-providers-asana" + ) + assert "Asana" in provider_details.provider_description + assert len(provider_details.versions) > 11 + assert provider_details.excluded_python_versions == [] + assert provider_details.plugins == [] + assert provider_details.changelog_path == provider_details.source_provider_package_path / "CHANGELOG.rst" + assert not provider_details.removed + + +@pytest.mark.parametrize( + "provider_id, pip_package_name", + [ + ("asana", "apache-airflow-providers-asana"), + ("apache.hdfs", "apache-airflow-providers-apache-hdfs"), + ], +) +def test_get_pip_package_name(provider_id: str, pip_package_name: str): + assert get_pip_package_name(provider_id) == pip_package_name + + +@pytest.mark.parametrize( + "provider_id, wheel_package_name", + [ + ("asana", "apache_airflow_providers_asana"), + ("apache.hdfs", "apache_airflow_providers_apache_hdfs"), + ], +) +def test_get_wheel_package_name(provider_id: str, wheel_package_name: str): + assert get_wheel_package_name(provider_id) == wheel_package_name + + +@pytest.mark.parametrize( + "requirements, markdown, table", + [ + ( + ["apache-airflow>2.5.0"], + False, + """ +================== ================== +PIP package Version required +================== ================== +``apache-airflow`` ``>2.5.0`` +================== ================== +""", + ), + ( + ["apache-airflow>2.5.0"], + True, + """ +| PIP package | Version required | +|:-----------------|:-------------------| +| `apache-airflow` | `>2.5.0` | +""", + ), + ], +) +def test_convert_pip_requirements_to_table(requirements: Iterable[str], markdown: bool, table: str): + print(convert_pip_requirements_to_table(requirements, markdown)) + assert convert_pip_requirements_to_table(requirements, markdown).strip() == table.strip() diff --git a/dev/breeze/tests/test_provider_documentation.py b/dev/breeze/tests/test_provider_documentation.py new file mode 100644 index 0000000000000..c11e73349bf48 --- /dev/null +++ b/dev/breeze/tests/test_provider_documentation.py @@ -0,0 +1,252 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import pytest + +from airflow_breeze.prepare_providers.provider_documentation import ( + Change, + _convert_git_changes_to_table, + _find_insertion_index_for_version, + _get_change_from_line, + _get_changes_classified, + _get_git_log_command, + _verify_changelog_exists, + get_version_tag, +) +from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT + +CHANGELOG_CONTENT = """ +Changelog +--------- + +5.0.0 +..... + +Breaking changes +~~~~~~~~~~~~~~~~ + +The ``offset`` parameter has been deprecated from ``list_jobs`` in favor of faster pagination with ``page_token`` similarly to `Databricks API `_. + +* ``Remove offset-based pagination from 'list_jobs' function in 'DatabricksHook' (#34926)`` + +4.7.0 +..... + +Features +~~~~~~~~ + +* ``Add operator to create jobs in Databricks (#35156)`` + +.. Below changes are excluded from the changelog. Move them to + appropriate section above if needed. Do not delete the lines(!): + * ``Prepare docs 3rd wave of Providers October 2023 (#35187)`` + * ``Pre-upgrade 'ruff==0.0.292' changes in providers (#35053)`` + * ``D401 Support - Providers: DaskExecutor to Github (Inclusive) (#34935)`` + +4.6.0 +..... + +.. note:: + This release of provider is only available for Airflow 2.5+ as explained in the + `Apache Airflow providers support policy `_. + +""" + + +def test_find_insertion_index_append_to_found_changelog(): + index, append = _find_insertion_index_for_version(CHANGELOG_CONTENT.splitlines(), "5.0.0") + assert append + assert index == 13 + + +def test_find_insertion_index_insert_new_changelog(): + index, append = _find_insertion_index_for_version(CHANGELOG_CONTENT.splitlines(), "5.0.1") + assert not append + assert index == 3 + + +@pytest.mark.parametrize( + "version, provider_id, suffix, tag", + [ + ("1.0.1", "asana", "", "providers-asana/1.0.1"), + ("1.0.1", "asana", "rc1", "providers-asana/1.0.1rc1"), + ("1.0.1", "apache.hdfs", "beta1", "providers-apache-hdfs/1.0.1beta1"), + ], +) +def test_get_version_tag(version: str, provider_id: str, suffix: str, tag: str): + assert get_version_tag(version, provider_id, suffix) == tag + + +@pytest.mark.parametrize( + "from_commit, to_commit, git_command", + [ + (None, None, ["git", "log", "--pretty=format:%H %h %cd %s", "--date=short", "--", "."]), + ( + "from_tag", + None, + ["git", "log", "--pretty=format:%H %h %cd %s", "--date=short", "from_tag", "--", "."], + ), + ( + "from_tag", + "to_tag", + ["git", "log", "--pretty=format:%H %h %cd %s", "--date=short", "from_tag...to_tag", "--", "."], + ), + ], +) +def test_get_git_log_command(from_commit: str | None, to_commit: str | None, git_command: list[str]): + assert _get_git_log_command(from_commit, to_commit) == git_command + + +def test_get_git_log_command_wrong(): + with pytest.raises(ValueError, match=r"to_commit without from_commit"): + _get_git_log_command(None, "to_commit") + + +@pytest.mark.parametrize( + "line, version, change", + [ + ( + "LONG_HASH_123144 SHORT_HASH 2023-01-01 Description `with` no pr", + "1.0.1", + Change( + full_hash="LONG_HASH_123144", + short_hash="SHORT_HASH", + date="2023-01-01", + version="1.0.1", + message="Description `with` no pr", + message_without_backticks="Description 'with' no pr", + pr=None, + ), + ), + ( + "LONG_HASH_123144 SHORT_HASH 2023-01-01 Description `with` pr (#12345)", + "1.0.1", + Change( + full_hash="LONG_HASH_123144", + short_hash="SHORT_HASH", + date="2023-01-01", + version="1.0.1", + message="Description `with` pr (#12345)", + message_without_backticks="Description 'with' pr (#12345)", + pr="12345", + ), + ), + ], +) +def test_get_change_from_line(line: str, version: str, change: Change): + assert _get_change_from_line(line, version) == change + + +@pytest.mark.parametrize( + "input, output, markdown, changes_len", + [ + ( + """ +LONG_HASH_123144 SHORT_HASH 2023-01-01 Description `with` no pr +LONG_HASH_123144 SHORT_HASH 2023-01-01 Description `with` pr (#12345) + +LONG_HASH_123144 SHORT_HASH 2023-01-01 Description `with` pr (#12346) + +""", + """ +1.0.1 +..... + +Latest change: 2023-01-01 + +============================================ =========== ================================== +Commit Committed Subject +============================================ =========== ================================== +`SHORT_HASH `_ 2023-01-01 ``Description 'with' no pr`` +`SHORT_HASH `_ 2023-01-01 ``Description 'with' pr (#12345)`` +`SHORT_HASH `_ 2023-01-01 ``Description 'with' pr (#12346)`` +============================================ =========== ==================================""", + False, + 3, + ), + ( + """ +LONG_HASH_123144 SHORT_HASH 2023-01-01 Description `with` no pr +LONG_HASH_123144 SHORT_HASH 2023-01-01 Description `with` pr (#12345) + +LONG_HASH_123144 SHORT_HASH 2023-01-01 Description `with` pr (#12346) + +""", + """ +| Commit | Committed | Subject | +|:-------------------------------------------|:------------|:---------------------------------| +| [SHORT_HASH](https://url/LONG_HASH_123144) | 2023-01-01 | `Description 'with' no pr` | +| [SHORT_HASH](https://url/LONG_HASH_123144) | 2023-01-01 | `Description 'with' pr (#12345)` | +| [SHORT_HASH](https://url/LONG_HASH_123144) | 2023-01-01 | `Description 'with' pr (#12346)` | +""", + True, + 3, + ), + ], +) +def test_convert_git_changes_to_table(input: str, output: str, markdown: bool, changes_len): + table, list_of_changes = _convert_git_changes_to_table( + version="1.0.1", changes=input, base_url="https://url/", markdown=markdown + ) + assert table.strip() == output.strip() + assert len(list_of_changes) == changes_len + assert list_of_changes[0].pr is None + assert list_of_changes[1].pr == "12345" + assert list_of_changes[2].pr == "12346" + + +def test_verify_changelog_exists(): + assert ( + _verify_changelog_exists("asana") + == AIRFLOW_SOURCES_ROOT / "airflow" / "providers" / "asana" / "CHANGELOG.rst" + ) + + +@pytest.mark.parametrize( + "descriptions, with_breaking_changes, maybe_with_new_features," + "breaking_count, feature_count, bugfix_count, other_count", + [ + (["Added feature x"], True, True, 0, 1, 0, 0), + (["Added feature x"], False, False, 0, 0, 0, 1), + (["Breaking change in"], True, True, 1, 0, 0, 0), + (["Breaking change in", "Added feature y"], True, True, 1, 1, 0, 0), + (["Fix change in", "Breaking feature y"], False, True, 0, 0, 1, 1), + (["Fix change in", "Breaking feature y"], False, True, 0, 0, 1, 1), + ], +) +def test_classify_changes_automatically( + descriptions: list[str], + with_breaking_changes: bool, + maybe_with_new_features: bool, + breaking_count: int, + feature_count: int, + bugfix_count: int, + other_count: int, +): + """Test simple automated classification of the changes based on their single-line description.""" + changes = [ + _get_change_from_line(f"LONG SHORT 2023-12-01 {description}", version="0.1.0") + for description in descriptions + ] + classified_changes = _get_changes_classified( + changes, with_breaking_changes=with_breaking_changes, maybe_with_new_features=maybe_with_new_features + ) + assert len(classified_changes.breaking_changes) == breaking_count + assert len(classified_changes.features) == feature_count + assert len(classified_changes.fixes) == bugfix_count + assert len(classified_changes.other) == other_count diff --git a/dev/provider_packages/get_provider_info_TEMPLATE.py.jinja2 b/dev/breeze/tests/test_versions.py similarity index 66% rename from dev/provider_packages/get_provider_info_TEMPLATE.py.jinja2 rename to dev/breeze/tests/test_versions.py index b6e50ceeed520..8ecf1118fe11f 100644 --- a/dev/provider_packages/get_provider_info_TEMPLATE.py.jinja2 +++ b/dev/breeze/tests/test_versions.py @@ -14,13 +14,15 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from __future__ import annotations -# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE -# OVERWRITTEN WHEN PREPARING PACKAGES. -# -# IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE -# `get_provider_info_TEMPLATE.py.jinja2` IN the `provider_packages` DIRECTORY +import pytest + +from airflow_breeze.utils.versions import strip_leading_zeros_from_version -def get_provider_info(): - return {{ PROVIDER_INFO }} +@pytest.mark.parametrize( + "version,stripped_version", [("3.4.0", "3.4.0"), ("13.04.05", "13.4.5"), ("0003.00004.000005", "3.4.5")] +) +def test_strip_leading_versions(version: str, stripped_version): + assert stripped_version == strip_leading_zeros_from_version(version) diff --git a/dev/provider_packages/SETUP_TEMPLATE.cfg.jinja2 b/dev/provider_packages/SETUP_TEMPLATE.cfg.jinja2 deleted file mode 100644 index c3fdba076b568..0000000000000 --- a/dev/provider_packages/SETUP_TEMPLATE.cfg.jinja2 +++ /dev/null @@ -1,84 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE -# OVERWRITTEN WHEN PREPARING PACKAGES. -# -# IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE -# `SETUP_TEMPLATE.cfg.jinja2` IN the `dev/provider_packages` DIRECTORY - - -[metadata] -name = {{ PACKAGE_PIP_NAME }} -summary = {{ PROVIDER_TYPE }} for Apache Airflow. Implements {{ PACKAGE_PIP_NAME }} package -description = {{ PROVIDER_TYPE }} package {{ PACKAGE_PIP_NAME }} for Apache Airflow -long_description = file: README.rst -long_description_content_type = text/x-rst -author = Apache Software Foundation -author_email = dev@airflow.apache.org -url = https://airflow.apache.org/ -download_url = https://archive.apache.org/dist/airflow/{{ PROVIDERS_FOLDER }} -license = Apache License 2.0 -license_files = - LICENSE - NOTICE -classifiers = - Development Status :: 5 - Production/Stable - Environment :: Console - Environment :: Web Environment - Intended Audience :: Developers - Intended Audience :: System Administrators - Framework :: Apache Airflow - Framework :: Apache Airflow :: Provider - License :: OSI Approved :: Apache Software License -{%- for python_version in SUPPORTED_PYTHON_VERSIONS %} - Programming Language :: Python :: {{ python_version }} -{%- endfor %} - Topic :: System :: Monitoring -project_urls= - Documentation=https://airflow.apache.org/docs/{{ PACKAGE_PIP_NAME }}/{{RELEASE}}/ - Changelog=https://airflow.apache.org/docs/{{ PACKAGE_PIP_NAME }}/{{RELEASE}}/changelog.html - Bug Tracker=https://github.com/apache/airflow/issues - Source Code=https://github.com/apache/airflow - Slack Chat=https://s.apache.org/airflow-slack - Twitter=https://twitter.com/ApacheAirflow - YouTube=https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/ - -[bdist_wheel] -python_tag=py3 - -[options] -zip_safe = False -include_package_data = True -python_requires = ~=3.8 -packages = find: -setup_requires = {{ SETUP_REQUIREMENTS }} -install_requires = {{ INSTALL_REQUIREMENTS }} - -[options.entry_points] -apache_airflow_provider= - provider_info=airflow.providers.{{ PROVIDER_PACKAGE_ID }}.get_provider_info:get_provider_info -{%- if PLUGINS %} -airflow.plugins= -{%- for plugin in PLUGINS %} - {{ plugin.name }}={{ plugin.package_name }}:{{ plugin.class_name }} -{%- endfor %} -{%- endif %} - - -[files] -packages = airflow.providers.{{ PROVIDER_PACKAGE_ID }} diff --git a/dev/provider_packages/SETUP_TEMPLATE.py.jinja2 b/dev/provider_packages/SETUP_TEMPLATE.py.jinja2 deleted file mode 100644 index 4f4726532124c..0000000000000 --- a/dev/provider_packages/SETUP_TEMPLATE.py.jinja2 +++ /dev/null @@ -1,47 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE -# OVERWRITTEN WHEN PREPARING PACKAGES. -# -# IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE -# `SETUP_TEMPLATE.py.jinja2` IN the `dev/provider_packages` DIRECTORY - -"""Setup.py for the {{ PACKAGE_PIP_NAME }} package.""" - -from setuptools import find_namespace_packages, setup - -version = '{{ RELEASE_NO_LEADING_ZEROS }}' - - -def do_setup(): - """Perform the package {{ PACKAGE_PIP_NAME }} setup.""" - setup( - version=version, - extras_require={{ EXTRAS_REQUIREMENTS }}, - packages=find_namespace_packages( - include=['airflow.providers.{{ PROVIDER_PACKAGE_ID }}', - 'airflow.providers.{{ PROVIDER_PACKAGE_ID }}.*', - 'airflow.providers.{{ PROVIDER_PACKAGE_ID }}_vendor', - 'airflow.providers.{{ PROVIDER_PACKAGE_ID }}_vendor.*'], - ), - ) - - -if __name__ == "__main__": - do_setup() diff --git a/dev/provider_packages/prepare_provider_packages.py b/dev/provider_packages/prepare_provider_packages.py deleted file mode 100755 index 05789e65e9a76..0000000000000 --- a/dev/provider_packages/prepare_provider_packages.py +++ /dev/null @@ -1,2162 +0,0 @@ -#!/usr/bin/env python3 - -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""Setup.py for the Provider packages of Airflow project.""" -from __future__ import annotations - -import difflib -import glob -import json -import logging -import os -import random -import re -import shutil -import subprocess -import sys -import tempfile -import textwrap -from collections import namedtuple -from contextlib import contextmanager -from copy import deepcopy -from datetime import datetime, timedelta -from enum import Enum -from functools import lru_cache -from pathlib import Path -from shutil import copyfile -from typing import Any, Generator, Iterable, NamedTuple - -import jinja2 -import jsonschema -import rich_click as click -import semver as semver -from black import Mode, TargetVersion, format_str, parse_pyproject_toml -from packaging.version import Version -from rich.console import Console -from rich.syntax import Syntax -from yaml import safe_load - -ALL_PYTHON_VERSIONS = ["3.8", "3.9", "3.10", "3.11"] - -MIN_AIRFLOW_VERSION = "2.5.0" - -INITIAL_CHANGELOG_CONTENT = """ - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -.. NOTE TO CONTRIBUTORS: - Please, only add notes to the Changelog just below the "Changelog" header when there - are some breaking changes and you want to add an explanation to the users on how they are supposed - to deal with them. The changelog is updated and maintained semi-automatically by release manager. - -``{{ package_name }}`` - -Changelog ---------- - -1.0.0 -..... - -Initial version of the provider. -""" - -HTTPS_REMOTE = "apache-https-for-providers" -HEAD_OF_HTTPS_REMOTE = f"{HTTPS_REMOTE}" - -MY_DIR_PATH = Path(__file__).parent -AIRFLOW_SOURCES_ROOT_PATH = MY_DIR_PATH.parents[1] -AIRFLOW_PATH = AIRFLOW_SOURCES_ROOT_PATH / "airflow" -DIST_PATH = AIRFLOW_SOURCES_ROOT_PATH / "dist" -PROVIDERS_PATH = AIRFLOW_PATH / "providers" -DOCUMENTATION_PATH = AIRFLOW_SOURCES_ROOT_PATH / "docs" - -DEPENDENCIES_JSON_FILE_PATH = AIRFLOW_SOURCES_ROOT_PATH / "generated" / "provider_dependencies.json" - -TARGET_PROVIDER_PACKAGES_PATH = AIRFLOW_SOURCES_ROOT_PATH / "provider_packages" -GENERATED_AIRFLOW_PATH = TARGET_PROVIDER_PACKAGES_PATH / "airflow" -GENERATED_PROVIDERS_PATH = GENERATED_AIRFLOW_PATH / "providers" - -PROVIDER_RUNTIME_DATA_SCHEMA_PATH = AIRFLOW_SOURCES_ROOT_PATH / "airflow" / "provider_info.schema.json" - -CROSS_PROVIDERS_DEPS = "cross-providers-deps" -DEPS = "deps" - -sys.path.insert(0, str(AIRFLOW_SOURCES_ROOT_PATH)) - - -ALL_DEPENDENCIES = json.loads(DEPENDENCIES_JSON_FILE_PATH.read_text()) - -# those imports need to come after the above sys.path.insert to make sure that Airflow -# sources are importable without having to add the airflow sources to the PYTHONPATH before -# running the script -from setup import PREINSTALLED_PROVIDERS, ALL_PROVIDERS # type: ignore[attr-defined] # isort:skip # noqa - -# Note - we do not test protocols as they are not really part of the official API of -# Apache Airflow - -logger = logging.getLogger(__name__) - -PY3 = sys.version_info[0] == 3 - -console = Console(width=400, color_system="standard") - - -class PluginInfo(NamedTuple): - name: str - package_name: str - class_name: str - - -class ProviderPackageDetails(NamedTuple): - provider_package_id: str - full_package_name: str - pypi_package_name: str - source_provider_package_path: str - documentation_provider_package_path: Path - provider_description: str - versions: list[str] - excluded_python_versions: list[str] - plugins: list[PluginInfo] - removed: bool - - -class EntityType(Enum): - Operators = "Operators" - Transfers = "Transfers" - Sensors = "Sensors" - Hooks = "Hooks" - Secrets = "Secrets" - - -@click.group(context_settings={"help_option_names": ["-h", "--help"], "max_content_width": 500}) -def cli(): - ... - - -option_skip_tag_check = click.option( - "--skip-tag-check/--no-skip-tag-check", - default=False, - is_flag=True, - help="Skip checking if the tag already exists in the remote repository", -) - -option_git_update = click.option( - "--git-update/--no-git-update", - default=True, - is_flag=True, - help=f"If the git remote {HTTPS_REMOTE} already exists, don't try to update it", -) - -option_package_format = click.option( - "--package-format", - type=click.Choice(["wheel", "sdist", "both"]), - help="Format of packages.", - default="wheel", - show_default=True, - envvar="PACKAGE_FORMAT", -) - -option_version_suffix = click.option( - "--version-suffix", - metavar="suffix", - help=textwrap.dedent( - """ - adds version suffix to version of the packages. - only useful when generating rc candidates for pypi.""" - ), -) -option_verbose = click.option( - "--verbose", - is_flag=True, - help="Print verbose information about performed steps", -) -option_force = click.option( - "--force", - is_flag=True, - help="Forces regeneration of already generated documentation", -) -option_base_branch = click.option( - "--base-branch", - type=str, - default="main", -) -argument_package_id = click.argument("package_id") -argument_changelog_files = click.argument("changelog_files", nargs=-1) -argument_package_ids = click.argument("package_ids", nargs=-1) - - -@contextmanager -def with_group(title: str) -> Generator[None, None, None]: - """ - If used in GitHub Action, creates an expandable group in the GitHub Action log. - Otherwise, display simple text groups. - - For more information, see: - https://docs.github.com/en/free-pro-team@latest/actions/reference/workflow-commands-for-github-actions#grouping-log-lines - """ - if os.environ.get("GITHUB_ACTIONS", "false") != "true": - console.print("#" * 10 + " [bright_blue]" + title + "[/] " + "#" * 10) - yield - return - console.print(f"::group::[bright_blue]{title}[/]") - yield - console.print("::endgroup::") - - -def get_source_airflow_folder() -> str: - """ - Returns source directory for whole airflow (from the main airflow project). - - :return: the folder path - """ - return os.path.abspath(AIRFLOW_SOURCES_ROOT_PATH) - - -def get_source_providers_folder() -> str: - """ - Returns source directory for providers (from the main airflow project). - - :return: the folder path - """ - return os.path.join(get_source_airflow_folder(), "airflow", "providers") - - -def get_target_folder() -> str: - """ - Returns target directory for providers (in the provider_packages folder) - - :return: the folder path - """ - return os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, "provider_packages")) - - -def get_target_providers_folder() -> str: - """ - Returns target directory for providers (in the provider_packages folder) - - :return: the folder path - """ - return os.path.abspath(os.path.join(get_target_folder(), "airflow", "providers")) - - -def get_target_providers_package_folder(provider_package_id: str) -> str: - """ - Returns target package folder based on package_id - - :return: the folder path - """ - return os.path.join(get_target_providers_folder(), *provider_package_id.split(".")) - - -def get_pip_package_name(provider_package_id: str) -> str: - """ - Returns PIP package name for the package id. - - :param provider_package_id: id of the package - :return: the name of pip package - """ - return "apache-airflow-providers-" + provider_package_id.replace(".", "-") - - -def get_wheel_package_name(provider_package_id: str) -> str: - """ - Returns PIP package name for the package id. - - :param provider_package_id: id of the package - :return: the name of pip package - """ - return "apache_airflow_providers_" + provider_package_id.replace(".", "_") - - -def get_long_description(provider_package_id: str) -> str: - """ - Gets long description of the package. - - :param provider_package_id: package id - :return: content of the description: README file - """ - package_folder = get_target_providers_package_folder(provider_package_id) - readme_file = os.path.join(package_folder, "README.md") - if not os.path.exists(readme_file): - return "" - with open(readme_file, encoding="utf-8") as file: - readme_contents = file.read() - copying = True - long_description = "" - for line in readme_contents.splitlines(keepends=True): - if line.startswith("**Table of contents**"): - copying = False - continue - header_line = "## Provider package" - if line.startswith(header_line): - copying = True - if copying: - long_description += line - return long_description - - -def get_install_requirements(provider_package_id: str, version_suffix: str) -> str: - """ - Returns install requirements for the package. - - :param provider_package_id: id of the provider package - :param version_suffix: optional version suffix for packages - - :return: install requirements of the package - """ - - def apply_version_suffix(install_clause: str) -> str: - if install_clause.startswith("apache-airflow") and ">=" in install_clause and version_suffix != "": - # This is workaround for `pip` way of handling `--pre` installation switch. It apparently does - # not modify the meaning of `install_requires` to include also pre-releases, so we need to - # modify our internal provider and airflow package version references to include all pre-releases - # including all development releases. When you specify dependency as >= X.Y.Z, and you - # have packages X.Y.Zdev0 or X.Y.Zrc1 in a local file, such package is not considered - # as fulfilling the requirement even if `--pre` switch is used. - return install_clause + ".dev0" - return install_clause - - if provider_package_id in get_removed_provider_ids(): - provider_info = get_provider_info_from_provider_yaml(provider_package_id) - dependencies = provider_info["dependencies"] - else: - dependencies = ALL_DEPENDENCIES[provider_package_id][DEPS] - install_requires = [apply_version_suffix(clause) for clause in dependencies] - return "".join(f"\n {ir}" for ir in install_requires) - - -def get_setup_requirements() -> str: - """ - Returns setup requirements (common for all package for now). - :return: setup requirements - """ - return """ - setuptools - wheel -""" - - -def get_package_extras(provider_package_id: str) -> dict[str, list[str]]: - """ - Finds extras for the package specified. - - :param provider_package_id: id of the package - """ - if provider_package_id == "providers": - return {} - if provider_package_id in get_removed_provider_ids(): - return {} - extras_dict: dict[str, list[str]] = { - module: [get_pip_package_name(module)] - for module in ALL_DEPENDENCIES[provider_package_id][CROSS_PROVIDERS_DEPS] - } - provider_yaml_dict = get_provider_yaml(provider_package_id) - additional_extras = provider_yaml_dict.get("additional-extras") - if additional_extras: - for entry in additional_extras: - name = entry["name"] - dependencies = entry["dependencies"] - if name in extras_dict: - # remove non-versioned dependencies if versioned ones are coming - existing_dependencies = set(extras_dict[name]) - for new_dependency in dependencies: - for dependency in existing_dependencies: - # remove extra if exists as non-versioned one - if new_dependency.startswith(dependency): - extras_dict[name].remove(dependency) - break - extras_dict[name].append(new_dependency) - else: - extras_dict[name] = dependencies - return extras_dict - - -def render_template( - template_name: str, - context: dict[str, Any], - extension: str, - autoescape: bool = True, - keep_trailing_newline: bool = False, -) -> str: - """ - Renders template based on its name. Reads the template from _TEMPLATE.md.jinja2 in current dir. - :param template_name: name of the template to use - :param context: Jinja2 context - :param extension: Target file extension - :param autoescape: Whether to autoescape HTML - :param keep_trailing_newline: Whether to keep the newline in rendered output - :return: rendered template - """ - import jinja2 - - template_loader = jinja2.FileSystemLoader(searchpath=MY_DIR_PATH) - template_env = jinja2.Environment( - loader=template_loader, - undefined=jinja2.StrictUndefined, - autoescape=autoescape, - keep_trailing_newline=keep_trailing_newline, - ) - template = template_env.get_template(f"{template_name}_TEMPLATE{extension}.jinja2") - content: str = template.render(context) - return content - - -PR_PATTERN = re.compile(r".*\(#(\d+)\)") - - -class Change(NamedTuple): - """Stores details about commits""" - - full_hash: str - short_hash: str - date: str - version: str - message: str - message_without_backticks: str - pr: str | None - - -def get_change_from_line(line: str, version: str): - split_line = line.split(" ", maxsplit=3) - message = split_line[3] - pr = None - pr_match = PR_PATTERN.match(message) - if pr_match: - pr = pr_match.group(1) - return Change( - full_hash=split_line[0], - short_hash=split_line[1], - date=split_line[2], - version=version, - message=message, - message_without_backticks=message.replace("`", "'").replace("&39;", "'"), - pr=pr, - ) - - -def convert_git_changes_to_table( - version: str, changes: str, base_url: str, markdown: bool = True -) -> tuple[str, list[Change]]: - """ - Converts list of changes from its string form to markdown/RST table and array of change information - - The changes are in the form of multiple lines where each line consists of: - FULL_COMMIT_HASH SHORT_COMMIT_HASH COMMIT_DATE COMMIT_SUBJECT - - The subject can contain spaces but one of the preceding values can, so we can make split - 3 times on spaces to break it up. - :param version: Version from which the changes are - :param changes: list of changes in a form of multiple-line string - :param base_url: base url for the commit URL - :param markdown: if True, Markdown format is used else rst - :return: formatted table + list of changes (starting from the latest) - """ - from tabulate import tabulate - - lines = changes.splitlines() - headers = ["Commit", "Committed", "Subject"] - table_data = [] - changes_list: list[Change] = [] - for line in lines: - if line == "": - continue - change = get_change_from_line(line, version) - table_data.append( - ( - f"[{change.short_hash}]({base_url}{change.full_hash})" - if markdown - else f"`{change.short_hash} <{base_url}{change.full_hash}>`_", - change.date, - f"`{change.message_without_backticks}`" - if markdown - else f"``{change.message_without_backticks}``", - ) - ) - changes_list.append(change) - header = "" - if not table_data: - return header, [] - table = tabulate(table_data, headers=headers, tablefmt="pipe" if markdown else "rst") - if not markdown: - header += f"\n\n{version}\n" + "." * len(version) + "\n\n" - release_date = table_data[0][1] - header += f"Latest change: {release_date}\n\n" - return header + table, changes_list - - -def convert_pip_requirements_to_table(requirements: Iterable[str], markdown: bool = True) -> str: - """ - Converts PIP requirement list to a Markdown table. - :param requirements: requirements list - :param markdown: if True, Markdown format is used else rst - :return: formatted table - """ - from tabulate import tabulate - - headers = ["PIP package", "Version required"] - table_data = [] - for dependency in requirements: - found = re.match(r"(^[^<=>~]*)([^<=>~]?.*)$", dependency) - if found: - package = found.group(1) - version_required = found.group(2) - if version_required != "": - version_required = f"`{version_required}`" if markdown else f"``{version_required}``" - table_data.append((f"`{package}`" if markdown else f"``{package}``", version_required)) - else: - table_data.append((dependency, "")) - return tabulate(table_data, headers=headers, tablefmt="pipe" if markdown else "rst") - - -def convert_cross_package_dependencies_to_table( - cross_package_dependencies: list[str], - markdown: bool = True, -) -> str: - """ - Converts cross-package dependencies to a Markdown table - :param cross_package_dependencies: list of cross-package dependencies - :param markdown: if True, Markdown format is used else rst - :return: formatted table - """ - from tabulate import tabulate - - headers = ["Dependent package", "Extra"] - table_data = [] - prefix = "apache-airflow-providers-" - base_url = "https://airflow.apache.org/docs/" - for dependency in cross_package_dependencies: - pip_package_name = f"{prefix}{dependency.replace('.','-')}" - url_suffix = f"{dependency.replace('.','-')}" - if markdown: - url = f"[{pip_package_name}]({base_url}{url_suffix})" - else: - url = f"`{pip_package_name} <{base_url}{prefix}{url_suffix}>`_" - table_data.append((url, f"`{dependency}`" if markdown else f"``{dependency}``")) - return tabulate(table_data, headers=headers, tablefmt="pipe" if markdown else "rst") - - -LICENCE = """ -""" - -LICENCE_RST = """ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -""" - -""" -Keeps information about historical releases. -""" -ReleaseInfo = namedtuple( - "ReleaseInfo", "release_version release_version_no_leading_zeros last_commit_hash content file_name" -) - - -def strip_leading_zeros(version: str) -> str: - """ - Strips leading zeros from version number. - - This converts 1974.04.03 to 1974.4.3 as the format with leading month and day zeros is not accepted - by PIP versioning. - - :param version: version number in CALVER format (potentially with leading 0s in date and month) - :return: string with leading 0s after dot replaced. - """ - return ".".join(str(int(i)) for i in version.split(".")) - - -def get_previous_release_info( - previous_release_version: str | None, past_releases: list[ReleaseInfo], current_release_version: str -) -> str | None: - """Find previous release. - - In case we are re-running current release, we assume that last release was - the previous one. This is needed so that we can generate list of changes - since the previous release. - - :param previous_release_version: known last release version - :param past_releases: list of past releases - :param current_release_version: release that we are working on currently - """ - previous_release = None - if previous_release_version == current_release_version: - # Re-running for current release - use previous release as base for git log - if len(past_releases) > 1: - previous_release = past_releases[1].last_commit_hash - else: - previous_release = past_releases[0].last_commit_hash if past_releases else None - return previous_release - - -def check_if_release_version_ok( - past_releases: list[ReleaseInfo], - current_release_version: str, -) -> tuple[str, str | None]: - """Check if the release version passed is not later than the last release version. - - :param past_releases: all past releases (if there are any) - :param current_release_version: release version to check - :return: Tuple of current/previous_release (previous might be None if there are no releases) - """ - previous_release_version = past_releases[0].release_version if past_releases else None - if current_release_version == "": - if previous_release_version: - current_release_version = previous_release_version - else: - current_release_version = (datetime.today() + timedelta(days=5)).strftime("%Y.%m.%d") - if previous_release_version: - if Version(current_release_version) < Version(previous_release_version): - console.print( - f"[red]The release {current_release_version} must be not less than " - f"{previous_release_version} - last release for the package[/]" - ) - raise Exception("Bad release version") - return current_release_version, previous_release_version - - -def get_cross_provider_dependent_packages(provider_package_id: str) -> list[str]: - """Returns cross-provider dependencies for the package. - - :param provider_package_id: package id - :return: list of cross-provider dependencies - """ - if provider_package_id in get_removed_provider_ids(): - return [] - return ALL_DEPENDENCIES[provider_package_id][CROSS_PROVIDERS_DEPS] - - -def make_current_directory_safe(verbose: bool): - """Makes current directory safe for Git. - - New git checks if git ownership for the folder is not manipulated with. We - are running this command only inside the container where the directory is - mounted from "regular" user to "root" user which is used inside the - container, so this is quite ok to assume the directory it is used is safe. - - It's also ok to leave it as safe - it is a global option inside the - container so it will disappear when we exit. - - :param verbose: whether to print commands being executed - """ - safe_dir_remove_command = ["git", "config", "--global", "--unset-all", "safe.directory"] - if verbose: - console.print(f"Running command: '{' '.join(safe_dir_remove_command)}'") - # we ignore result of this call - subprocess.call(safe_dir_remove_command) - safe_dir_add_command = ["git", "config", "--global", "--add", "safe.directory", "/opt/airflow"] - if verbose: - console.print(f"Running command: '{' '.join(safe_dir_add_command)}'") - subprocess.check_call(safe_dir_add_command) - - -def make_sure_remote_apache_exists_and_fetch(git_update: bool, verbose: bool): - """Make sure that apache remote exist in git. - - We need to take a log from the apache repository - not locally. Also, the - local repo might be shallow, so we need to un-shallow it. - - This will: - * mark current directory as safe for ownership (it is run in the container) - * check if the remote exists and add if it does not - * check if the local repo is shallow, mark it to un-shallow in this case - * fetch from the remote including all tags and overriding local tags in case - they are set differently - - :param git_update: If the git remote already exists, should we try to update it - :param verbose: print verbose messages while fetching - """ - - make_current_directory_safe(verbose) - try: - check_remote_command = ["git", "remote", "get-url", HTTPS_REMOTE] - if verbose: - console.print(f"Running command: '{' '.join(check_remote_command)}'") - subprocess.check_call( - check_remote_command, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - - # Remote already exists, don't update it again! - if not git_update: - return - except subprocess.CalledProcessError as ex: - if ex.returncode == 128 or ex.returncode == 2: - remote_add_command = [ - "git", - "remote", - "add", - HTTPS_REMOTE, - "https://github.com/apache/airflow.git", - ] - if verbose: - console.print(f"Running command: '{' '.join(remote_add_command)}'") - try: - subprocess.check_call( - remote_add_command, - ) - except subprocess.CalledProcessError as ex: - console.print("[red]Error: when adding remote:[/]", ex) - sys.exit(128) - else: - raise - if verbose: - console.print("Fetching full history and tags from remote. ") - console.print("This might override your local tags!") - is_shallow_repo = ( - subprocess.check_output(["git", "rev-parse", "--is-shallow-repository"], stderr=subprocess.DEVNULL) - == "true" - ) - fetch_command = ["git", "fetch", "--tags", "--force", HTTPS_REMOTE] - if is_shallow_repo: - if verbose: - console.print( - "This will also un-shallow the repository, " - "making all history available and increasing storage!" - ) - fetch_command.append("--unshallow") - if verbose: - console.print(f"Running command: '{' '.join(fetch_command)}'") - try: - subprocess.check_call(fetch_command) - except subprocess.CalledProcessError as e: - console.print( - "[yellow]Error when fetching tags from remote. Your tags might not be refreshed. " - f'Please refresh the tags manually via {" ".join(fetch_command)}\n' - ) - console.print(f"[yellow]The error was: {e}") - - -def get_git_log_command( - verbose: bool, from_commit: str | None = None, to_commit: str | None = None -) -> list[str]: - """Get git command to run for the current repo from the current folder. - - The current directory should always be the package folder. - - :param verbose: whether to print verbose info while getting the command - :param from_commit: if present - base commit from which to start the log from - :param to_commit: if present - final commit which should be the start of the log - :return: git command to run - """ - git_cmd = [ - "git", - "log", - "--pretty=format:%H %h %cd %s", - "--date=short", - ] - if from_commit and to_commit: - git_cmd.append(f"{from_commit}...{to_commit}") - elif from_commit: - git_cmd.append(from_commit) - git_cmd.extend(["--", "."]) - if verbose: - console.print(f"Command to run: '{' '.join(git_cmd)}'") - return git_cmd - - -def get_git_tag_check_command(tag: str) -> list[str]: - """Get git command to check if tag exits. - - :param tag: Tag to check - :return: git command to run - """ - return [ - "git", - "rev-parse", - tag, - ] - - -def get_source_package_path(provider_package_id: str) -> str: - """Retrieves source package path from package id. - - :param provider_package_id: id of the package - :return: path of the providers folder - """ - return os.path.join(PROVIDERS_PATH, *provider_package_id.split(".")) - - -def get_documentation_package_path(provider_package_id: str) -> Path: - """Retrieves documentation package path from package id. - - :param provider_package_id: id of the package - :return: path of the documentation folder - """ - return DOCUMENTATION_PATH / f"apache-airflow-providers-{provider_package_id.replace('.','-')}" - - -def get_generated_package_path(provider_package_id: str) -> str: - """Retrieves generated package path from package id. - - :param provider_package_id: id of the package - :return: path of the providers folder - """ - provider_package_path = os.path.join(GENERATED_PROVIDERS_PATH, *provider_package_id.split(".")) - return provider_package_path - - -def get_additional_package_info(provider_package_path: str) -> str: - """Returns additional info for the package. - - :param provider_package_path: path for the package - :return: additional information for the path (empty string if missing) - """ - additional_info_file_path = os.path.join(provider_package_path, "ADDITIONAL_INFO.md") - if os.path.isfile(additional_info_file_path): - with open(additional_info_file_path) as additional_info_file: - additional_info = additional_info_file.read() - - additional_info_lines = additional_info.splitlines(keepends=True) - result = "" - skip_comment = True - for line in additional_info_lines: - if line.startswith(" -->"): - skip_comment = False - elif not skip_comment: - result += line - return result - return "" - - -def get_package_pip_name(provider_package_id: str): - return f"apache-airflow-providers-{provider_package_id.replace('.', '-')}" - - -def validate_provider_info_with_runtime_schema(provider_info: dict[str, Any]) -> None: - """Validates provider info against the runtime schema. - - This way we check if the provider info in the packages is future-compatible. - The Runtime Schema should only change when there is a major version change. - - :param provider_info: provider info to validate - """ - - with open(PROVIDER_RUNTIME_DATA_SCHEMA_PATH) as schema_file: - schema = json.load(schema_file) - try: - jsonschema.validate(provider_info, schema=schema) - except jsonschema.ValidationError as ex: - console.print("[red]Provider info not validated against runtime schema[/]") - raise Exception( - "Error when validating schema. The schema must be compatible with " - "airflow/provider_info.schema.json.", - ex, - ) - - -def get_provider_yaml(provider_package_id: str) -> dict[str, Any]: - """Retrieves provider info from the provider YAML file. - - The provider yaml file contains more information than provider_info that is - used at runtime. This method converts the full provider yaml file into - stripped-down provider info and validates it against deprecated 2.0.0 schema - and runtime schema. - - :param provider_package_id: package id to retrieve provider.yaml from - :return: provider_info dictionary - """ - provider_yaml_file_name = os.path.join(get_source_package_path(provider_package_id), "provider.yaml") - if not os.path.exists(provider_yaml_file_name): - raise Exception(f"The provider.yaml file is missing: {provider_yaml_file_name}") - with open(provider_yaml_file_name) as provider_file: - provider_yaml_dict = safe_load(provider_file) - return provider_yaml_dict - - -def get_provider_info_from_provider_yaml(provider_package_id: str) -> dict[str, Any]: - """Retrieves provider info from the provider yaml file. - - :param provider_package_id: package id to retrieve provider.yaml from - :return: provider_info dictionary - """ - provider_yaml_dict = get_provider_yaml(provider_package_id=provider_package_id) - validate_provider_info_with_runtime_schema(provider_yaml_dict) - return provider_yaml_dict - - -def get_version_tag(version: str, provider_package_id: str, version_suffix: str = ""): - if version_suffix is None: - version_suffix = "" - return f"providers-{provider_package_id.replace('.','-')}/{version}{version_suffix}" - - -def print_changes_table(changes_table): - syntax = Syntax(changes_table, "rst", theme="ansi_dark") - console.print(syntax) - - -def get_all_changes_for_package( - provider_package_id: str, - verbose: bool, - base_branch: str, - force: bool, -) -> tuple[bool, list[list[Change]] | Change | None, str]: - """Retrieves all changes for the package. - - :param provider_package_id: provider package id - :param verbose: whether to print verbose messages - :param base_branch: base branch to check changes in apache remote for changes - :param force: whether to force the check even if the tag exists - """ - provider_details = get_provider_details(provider_package_id) - current_version = provider_details.versions[0] - current_tag_no_suffix = get_version_tag(current_version, provider_package_id) - if verbose: - console.print(f"Checking if tag '{current_tag_no_suffix}' exist.") - if not force and not subprocess.call( - get_git_tag_check_command(current_tag_no_suffix), - cwd=provider_details.source_provider_package_path, - stderr=subprocess.DEVNULL, - ): - if verbose: - console.print(f"The tag {current_tag_no_suffix} exists.") - # The tag already exists - changes = subprocess.check_output( - get_git_log_command(verbose, f"{HEAD_OF_HTTPS_REMOTE}/{base_branch}", current_tag_no_suffix), - cwd=provider_details.source_provider_package_path, - text=True, - ) - if changes: - provider_details = get_provider_details(provider_package_id) - doc_only_change_file = os.path.join( - provider_details.source_provider_package_path, ".latest-doc-only-change.txt" - ) - if os.path.exists(doc_only_change_file): - with open(doc_only_change_file) as f: - last_doc_only_hash = f.read().strip() - try: - changes_since_last_doc_only_check = subprocess.check_output( - get_git_log_command( - verbose, f"{HEAD_OF_HTTPS_REMOTE}/{base_branch}", last_doc_only_hash - ), - cwd=provider_details.source_provider_package_path, - text=True, - ) - if not changes_since_last_doc_only_check: - console.print() - console.print( - "[yellow]The provider has doc-only changes since the last release. Skipping[/]" - ) - # Returns 66 in case of doc-only changes - sys.exit(66) - if len(changes) > len(changes_since_last_doc_only_check): - # if doc-only was released after previous release - use it as starting point - # but if before - stay with the releases from last tag. - changes = changes_since_last_doc_only_check - except subprocess.CalledProcessError: - # ignore when the commit mentioned as last doc-only change is obsolete - pass - - console.print(f"[yellow]The provider {provider_package_id} has changes since last release[/]") - console.print() - console.print(f"[bright_blue]Provider: {provider_package_id}[/]\n") - changes_table, array_of_changes = convert_git_changes_to_table( - f"NEXT VERSION AFTER + {provider_details.versions[0]}", - changes, - base_url="https://github.com/apache/airflow/commit/", - markdown=False, - ) - print_changes_table(changes_table) - return False, array_of_changes[0], changes_table - else: - console.print(f"No changes for {provider_package_id}") - return False, None, "" - if verbose: - console.print("The tag does not exist. ") - if len(provider_details.versions) == 1: - console.print( - f"The provider '{provider_package_id}' has never been released but it is ready to release!\n" - ) - else: - console.print(f"New version of the '{provider_package_id}' package is ready to be released!\n") - next_version_tag = f"{HEAD_OF_HTTPS_REMOTE}/{base_branch}" - changes_table = "" - current_version = provider_details.versions[0] - list_of_list_of_changes: list[list[Change]] = [] - for version in provider_details.versions[1:]: - version_tag = get_version_tag(version, provider_package_id) - changes = subprocess.check_output( - get_git_log_command(verbose, next_version_tag, version_tag), - cwd=provider_details.source_provider_package_path, - text=True, - ) - changes_table_for_version, array_of_changes_for_version = convert_git_changes_to_table( - current_version, changes, base_url="https://github.com/apache/airflow/commit/", markdown=False - ) - changes_table += changes_table_for_version - list_of_list_of_changes.append(array_of_changes_for_version) - next_version_tag = version_tag - current_version = version - changes = subprocess.check_output( - get_git_log_command(verbose, next_version_tag), - cwd=provider_details.source_provider_package_path, - text=True, - ) - changes_table_for_version, array_of_changes_for_version = convert_git_changes_to_table( - current_version, changes, base_url="https://github.com/apache/airflow/commit/", markdown=False - ) - changes_table += changes_table_for_version - if verbose: - print_changes_table(changes_table) - return True, list_of_list_of_changes or None, changes_table - - -def get_provider_details(provider_package_id: str) -> ProviderPackageDetails: - provider_info = get_provider_info_from_provider_yaml(provider_package_id) - plugins: list[PluginInfo] = [] - if "plugins" in provider_info: - for plugin in provider_info["plugins"]: - package_name, class_name = plugin["plugin-class"].rsplit(".", maxsplit=1) - plugins.append( - PluginInfo( - name=plugin["name"], - package_name=package_name, - class_name=class_name, - ) - ) - return ProviderPackageDetails( - provider_package_id=provider_package_id, - full_package_name=f"airflow.providers.{provider_package_id}", - pypi_package_name=f"apache-airflow-providers-{provider_package_id.replace('.', '-')}", - source_provider_package_path=get_source_package_path(provider_package_id), - documentation_provider_package_path=get_documentation_package_path(provider_package_id), - provider_description=provider_info["description"], - versions=provider_info["versions"], - excluded_python_versions=provider_info.get("excluded-python-versions") or [], - plugins=plugins, - removed=provider_info.get("removed", False), - ) - - -def get_provider_requirements(provider_package_id: str) -> list[str]: - provider_yaml = get_provider_yaml(provider_package_id) - return provider_yaml["dependencies"] - - -def get_provider_jinja_context( - provider_info: dict[str, Any], - provider_details: ProviderPackageDetails, - current_release_version: str, - version_suffix: str, -): - verify_provider_package(provider_details.provider_package_id) - changelog_path = verify_changelog_exists(provider_details.provider_package_id) - cross_providers_dependencies = get_cross_provider_dependent_packages( - provider_package_id=provider_details.provider_package_id - ) - release_version_no_leading_zeros = strip_leading_zeros(current_release_version) - pip_requirements_table = convert_pip_requirements_to_table( - get_provider_requirements(provider_details.provider_package_id) - ) - pip_requirements_table_rst = convert_pip_requirements_to_table( - get_provider_requirements(provider_details.provider_package_id), markdown=False - ) - cross_providers_dependencies_table = convert_cross_package_dependencies_to_table( - cross_providers_dependencies - ) - cross_providers_dependencies_table_rst = convert_cross_package_dependencies_to_table( - cross_providers_dependencies, markdown=False - ) - with open(changelog_path) as changelog_file: - changelog = changelog_file.read() - supported_python_versions = [ - p for p in ALL_PYTHON_VERSIONS if p not in provider_details.excluded_python_versions - ] - python_requires = "~=3.8" - for p in provider_details.excluded_python_versions: - python_requires += f", !={p}" - min_airflow_version = MIN_AIRFLOW_VERSION - for dependency in provider_info["dependencies"]: - if dependency.startswith("apache-airflow>="): - current_min_airflow_version = dependency.split(">=")[1] - if Version(current_min_airflow_version) > Version(min_airflow_version): - min_airflow_version = current_min_airflow_version - context: dict[str, Any] = { - "ENTITY_TYPES": list(EntityType), - "README_FILE": "README.rst", - "PROVIDER_PACKAGE_ID": provider_details.provider_package_id, - "PACKAGE_PIP_NAME": get_pip_package_name(provider_details.provider_package_id), - "PACKAGE_WHEEL_NAME": get_wheel_package_name(provider_details.provider_package_id), - "FULL_PACKAGE_NAME": provider_details.full_package_name, - "PROVIDER_PATH": provider_details.full_package_name.replace(".", "/"), - "RELEASE": current_release_version, - "RELEASE_NO_LEADING_ZEROS": release_version_no_leading_zeros, - "VERSION_SUFFIX": version_suffix or "", - "ADDITIONAL_INFO": get_additional_package_info( - provider_package_path=provider_details.source_provider_package_path - ), - "CROSS_PROVIDERS_DEPENDENCIES": cross_providers_dependencies, - "PIP_REQUIREMENTS": get_provider_requirements(provider_details.provider_package_id), - "PROVIDER_TYPE": "Provider", - "PROVIDERS_FOLDER": "providers", - "PROVIDER_DESCRIPTION": provider_details.provider_description, - "INSTALL_REQUIREMENTS": get_install_requirements( - provider_package_id=provider_details.provider_package_id, version_suffix=version_suffix - ), - "SETUP_REQUIREMENTS": get_setup_requirements(), - "EXTRAS_REQUIREMENTS": get_package_extras(provider_package_id=provider_details.provider_package_id), - "CROSS_PROVIDERS_DEPENDENCIES_TABLE": cross_providers_dependencies_table, - "CROSS_PROVIDERS_DEPENDENCIES_TABLE_RST": cross_providers_dependencies_table_rst, - "PIP_REQUIREMENTS_TABLE": pip_requirements_table, - "PIP_REQUIREMENTS_TABLE_RST": pip_requirements_table_rst, - "PROVIDER_INFO": provider_info, - "CHANGELOG_RELATIVE_PATH": os.path.relpath( - provider_details.source_provider_package_path, - provider_details.documentation_provider_package_path, - ), - "CHANGELOG": changelog, - "SUPPORTED_PYTHON_VERSIONS": supported_python_versions, - "PYTHON_REQUIRES": python_requires, - "PLUGINS": provider_details.plugins, - "MIN_AIRFLOW_VERSION": min_airflow_version, - "PREINSTALLED_PROVIDER": provider_details.provider_package_id in PREINSTALLED_PROVIDERS, - "PROVIDER_REMOVED": provider_details.removed, - } - return context - - -def prepare_readme_file(context): - readme_content = LICENCE_RST + render_template( - template_name="PROVIDER_README", context=context, extension=".rst" - ) - readme_file_path = os.path.join(TARGET_PROVIDER_PACKAGES_PATH, "README.rst") - with open(readme_file_path, "w") as readme_file: - readme_file.write(readme_content) - - -def confirm(message: str, answer: str | None = None) -> bool: - """Ask user to confirm (case-insensitive). - - :param message: message to display - :param answer: force answer if set - :return: True if the answer is any form of y/yes. Exits with 65 exit code if - any form of q/quit is chosen. - """ - given_answer = answer.lower() if answer is not None else "" - while given_answer not in ["y", "n", "q", "yes", "no", "quit"]: - console.print(f"[yellow]{message}[y/n/q]?[/] ", end="") - try: - given_answer = input("").lower() - except KeyboardInterrupt: - given_answer = "q" - if given_answer.lower() in ["q", "quit"]: - # Returns 65 in case user decided to quit - sys.exit(65) - return given_answer in ["y", "yes"] - - -class TypeOfChange(Enum): - DOCUMENTATION = "d" - BUGFIX = "b" - FEATURE = "f" - BREAKING_CHANGE = "x" - SKIP = "s" - - -def get_type_of_changes(answer: str | None) -> TypeOfChange: - """Ask user to specify type of changes (case-insensitive). - - :return: Type of change. - """ - given_answer = "" - if answer and answer.lower() in ["yes", "y"]: - # Simulate all possible non-terminal answers - return random.choice( - [ - TypeOfChange.DOCUMENTATION, - TypeOfChange.BUGFIX, - TypeOfChange.FEATURE, - TypeOfChange.BREAKING_CHANGE, - TypeOfChange.SKIP, - ] - ) - while given_answer not in [*[t.value for t in TypeOfChange], "q"]: - console.print( - "[yellow]Type of change (d)ocumentation, (b)ugfix, (f)eature, (x)breaking " - "change, (s)kip, (q)uit [d/b/f/x/s/q]?[/] ", - end="", - ) - try: - given_answer = input("").lower() - except KeyboardInterrupt: - given_answer = "q" - if given_answer == "q": - # Returns 65 in case user decided to quit - sys.exit(65) - return TypeOfChange(given_answer) - - -def mark_latest_changes_as_documentation_only(provider_package_id: str, latest_change: Change): - provider_details = get_provider_details(provider_package_id=provider_package_id) - console.print( - f"Marking last change: {latest_change.short_hash} and all above changes since the last release " - "as doc-only changes!" - ) - with open( - os.path.join(provider_details.source_provider_package_path, ".latest-doc-only-change.txt"), "w" - ) as f: - f.write(latest_change.full_hash + "\n") - # exit code 66 marks doc-only change marked - sys.exit(66) - - -def add_new_version(type_of_change: TypeOfChange, provider_package_id: str): - provider_details = get_provider_details(provider_package_id) - version = provider_details.versions[0] - v = semver.VersionInfo.parse(version) - if type_of_change == TypeOfChange.BREAKING_CHANGE: - v = v.bump_major() - elif type_of_change == TypeOfChange.FEATURE: - v = v.bump_minor() - elif type_of_change == TypeOfChange.BUGFIX: - v = v.bump_patch() - provider_yaml_path = Path(get_source_package_path(provider_package_id)) / "provider.yaml" - original_text = provider_yaml_path.read_text() - new_text = re.sub(r"versions:", f"versions:\n - {v}", original_text, 1) - provider_yaml_path.write_text(new_text) - console.print() - console.print(f"[bright_blue]Bumped version to {v}") - - -def update_release_notes( - provider_package_id: str, - version_suffix: str, - force: bool, - verbose: bool, - answer: str | None, - base_branch: str, - regenerate_missing_docs: bool, -) -> bool: - """Updates generated files. - - This includes the readme, changes, and/or setup.cfg/setup.py/manifest.in/provider_info). - - :param provider_package_id: id of the package - :param version_suffix: version suffix corresponding to the version in the code - :param force: regenerate already released documentation - :param verbose: whether to print verbose messages - :param answer: force answer to question if set. - :param base_branch: base branch to check changes in apache remote for changes - :param regenerate_missing_docs: whether to regenerate missing docs - :returns False if the package should be skipped, True if everything generated properly - """ - verify_provider_package(provider_package_id) - proceed, latest_change, changes = get_all_changes_for_package( - provider_package_id, verbose, base_branch, force - ) - if not force: - if proceed: - if not confirm("Provider marked for release. Proceed", answer=answer): - return False - elif not latest_change: - console.print() - console.print( - f"[yellow]Provider: {provider_package_id} - skipping documentation generation. No changes![/]" - ) - console.print() - return False - else: - type_of_change = get_type_of_changes(answer=answer) - if type_of_change == TypeOfChange.DOCUMENTATION: - if isinstance(latest_change, Change): - mark_latest_changes_as_documentation_only(provider_package_id, latest_change) - else: - raise ValueError( - "Expected only one change to be present to mark changes " - f"in provider {provider_package_id} as docs-only. " - f"Received {len(latest_change)}." - ) - elif type_of_change == TypeOfChange.SKIP: - return False - elif type_of_change in [TypeOfChange.BUGFIX, TypeOfChange.FEATURE, TypeOfChange.BREAKING_CHANGE]: - add_new_version(type_of_change, provider_package_id) - proceed, latest_change, changes = get_all_changes_for_package( - provider_package_id, verbose, base_branch, force - ) - provider_details = get_provider_details(provider_package_id) - provider_info = get_provider_info_from_provider_yaml(provider_package_id) - jinja_context = get_provider_jinja_context( - provider_info=provider_info, - provider_details=provider_details, - current_release_version=provider_details.versions[0], - version_suffix=version_suffix, - ) - jinja_context["DETAILED_CHANGES_RST"] = changes - jinja_context["DETAILED_CHANGES_PRESENT"] = bool(changes) - errors = False - if not update_changelog_rst( - jinja_context, - provider_package_id, - provider_details.documentation_provider_package_path, - regenerate_missing_docs, - ): - errors = True - if not force: - if not update_commits_rst( - jinja_context, - provider_package_id, - provider_details.documentation_provider_package_path, - regenerate_missing_docs, - ): - errors = True - if errors: - console.print("[red]There were errors when generating documentation[/]") - sys.exit(1) - return True - - -def update_min_airflow_version(provider_package_id: str, version_suffix: str): - provider_details = get_provider_details(provider_package_id) - provider_info = get_provider_info_from_provider_yaml(provider_package_id) - jinja_context = get_provider_jinja_context( - provider_info=provider_info, - provider_details=provider_details, - current_release_version=provider_details.versions[0], - version_suffix=version_suffix, - ) - generate_init_py_file_for_provider( - context=jinja_context, - target_path=provider_details.source_provider_package_path, - ) - replace_min_airflow_version_in_provider_yaml( - context=jinja_context, target_path=provider_details.source_provider_package_path - ) - - -def update_setup_files( - provider_package_id: str, - version_suffix: str, -): - """Updates generated setup.cfg/setup.py/manifest.in/provider_info for packages. - - :param provider_package_id: id of the package - :param version_suffix: version suffix corresponding to the version in the code - :returns False if the package should be skipped, True if everything generated properly - """ - verify_provider_package(provider_package_id) - provider_details = get_provider_details(provider_package_id) - provider_info = get_provider_info_from_provider_yaml(provider_package_id) - current_release_version = provider_details.versions[0] - jinja_context = get_provider_jinja_context( - provider_info=provider_info, - provider_details=provider_details, - current_release_version=current_release_version, - version_suffix=version_suffix, - ) - console.print() - console.print(f"Generating setup files for {provider_package_id}") - console.print() - prepare_setup_py_file(jinja_context) - prepare_setup_cfg_file(jinja_context) - prepare_get_provider_info_py_file(jinja_context, provider_package_id) - prepare_manifest_in_file(jinja_context) - prepare_readme_file(jinja_context) - return True - - -def replace_content(file_path, old_text, new_text, provider_package_id): - if new_text != old_text: - _, temp_file_path = tempfile.mkstemp() - try: - if os.path.isfile(file_path): - copyfile(file_path, temp_file_path) - with open(file_path, "w") as readme_file: - readme_file.write(new_text) - console.print() - console.print(f"Generated {file_path} file for the {provider_package_id} provider") - console.print() - if old_text != "": - subprocess.call(["diff", "--color=always", temp_file_path, file_path]) - finally: - os.remove(temp_file_path) - - -AUTOMATICALLY_GENERATED_MARKER = "AUTOMATICALLY GENERATED" -AUTOMATICALLY_GENERATED_CONTENT = ( - f".. THE REMAINDER OF THE FILE IS {AUTOMATICALLY_GENERATED_MARKER}. " - f"IT WILL BE OVERWRITTEN AT RELEASE TIME!" -) - - -def update_index_rst( - context: dict[str, Any], - provider_package_id: str, - target_path: Path, -): - index_update = render_template( - template_name="PROVIDER_INDEX", context=context, extension=".rst", keep_trailing_newline=True - ) - index_file_path = os.path.join(target_path, "index.rst") - old_text = "" - if os.path.isfile(index_file_path): - with open(index_file_path) as readme_file_read: - old_text = readme_file_read.read() - new_text = deepcopy(old_text) - lines = old_text.splitlines(keepends=False) - for index, line in enumerate(lines): - if AUTOMATICALLY_GENERATED_MARKER in line: - new_text = "\n".join(lines[:index]) - new_text += "\n" + AUTOMATICALLY_GENERATED_CONTENT + "\n" - new_text += index_update - replace_content(index_file_path, old_text, new_text, provider_package_id) - - -# Taken from pygrep hooks we are using in pre-commit -# https://github.com/pre-commit/pygrep-hooks/blob/main/.pre-commit-hooks.yaml -BACKTICKS_CHECK = re.compile(r"^(?! ).*(^| )`[^`]+`([^_]|$)", re.MULTILINE) - - -def _update_file( - context: dict[str, Any], - template_name: str, - extension: str, - file_name: str, - provider_package_id: str, - target_path: Path, - regenerate_missing_docs: bool, -) -> bool: - file_path = target_path / file_name - if regenerate_missing_docs and file_path.exists(): - return True - new_text = render_template( - template_name=template_name, context=context, extension=extension, keep_trailing_newline=True - ) - file_path = target_path / file_name - old_text = "" - if os.path.isfile(file_path): - with open(file_path) as readme_file_read: - old_text = readme_file_read.read() - replace_content(file_path, old_text, new_text, provider_package_id) - index_path = target_path / "index.rst" - if not index_path.exists(): - console.print(f"[red]ERROR! The index must exist for the provider docs: {index_path}") - sys.exit(1) - - expected_link_in_index = f"<{file_name.split('.')[0]}>" - if expected_link_in_index not in index_path.read_text(): - console.print( - f"\n[red]ERROR! The {index_path} must contain " - f"link to the generated documentation:[/]\n\n" - f"[yellow]{expected_link_in_index}[/]\n\n" - f"[bright_blue]Please make sure to add it to {index_path}.\n" - ) - - console.print(f"Checking for backticks correctly generated in: {file_path}") - match = BACKTICKS_CHECK.search(file_path.read_text()) - if match: - console.print( - f"\n[red]ERROR: Single backticks (`) found in {file_path}:[/]\n\n" - f"[yellow]{match.group(0)}[/]\n\n" - f"[bright_blue]Please fix them by replacing with double backticks (``).[/]\n" - ) - return False - - # TODO: uncomment me. Linting revealed that our already generated provider docs have duplicate links - # in the generated files, we should fix those and uncomment linting as separate step - so that - # we do not hold current release for fixing the docs. - # console.print(f"Linting: {file_path}") - # errors = restructuredtext_lint.lint_file(file_path) - # real_errors = False - # if errors: - # for error in errors: - # # Skip known issue: linter with doc role similar to https://github.com/OCA/pylint-odoo/issues/38 - # if ( - # 'No role entry for "doc"' in error.message - # or 'Unknown interpreted text role "doc"' in error.message - # ): - # continue - # real_errors = True - # console.print(f"* [red] {error.message}") - # if real_errors: - # console.print(f"\n[red] Errors found in {file_path}") - # return False - - console.print(f"[green]Generated {file_path} for {provider_package_id} is OK[/]") - - return True - - -def update_changelog_rst( - context: dict[str, Any], - provider_package_id: str, - target_path: Path, - regenerate_missing_docs: bool, -) -> bool: - return _update_file( - context=context, - template_name="PROVIDER_CHANGELOG", - extension=".rst", - file_name="changelog.rst", - provider_package_id=provider_package_id, - target_path=target_path, - regenerate_missing_docs=regenerate_missing_docs, - ) - - -def update_commits_rst( - context: dict[str, Any], - provider_package_id: str, - target_path: Path, - regenerate_missing_docs: bool, -) -> bool: - return _update_file( - context=context, - template_name="PROVIDER_COMMITS", - extension=".rst", - file_name="commits.rst", - provider_package_id=provider_package_id, - target_path=target_path, - regenerate_missing_docs=regenerate_missing_docs, - ) - - -def replace_min_airflow_version_in_provider_yaml( - context: dict[str, Any], - target_path: str, -): - provider_yaml_path = os.path.join(target_path, "provider.yaml") - with open(provider_yaml_path) as provider_yaml_file: - provider_yaml_txt = provider_yaml_file.read() - provider_yaml_txt = re.sub( - r" - apache-airflow>=.*", f" - apache-airflow>={context['MIN_AIRFLOW_VERSION']}", provider_yaml_txt - ) - with open(provider_yaml_path, "w") as provider_yaml_file: - provider_yaml_file.write(provider_yaml_txt) - - -def generate_init_py_file_for_provider( - context: dict[str, Any], - target_path: str, -): - init_py_content = black_format( - render_template( - template_name="PROVIDER__INIT__PY", - context=context, - extension=".py", - keep_trailing_newline=True, - ) - ) - init_py_path = os.path.join(target_path, "__init__.py") - with open(init_py_path, "w") as init_py_file: - init_py_file.write(init_py_content) - - -@lru_cache(maxsize=None) -def black_mode() -> Mode: - config = parse_pyproject_toml(os.path.join(AIRFLOW_SOURCES_ROOT_PATH, "pyproject.toml")) - target_versions = {TargetVersion[val.upper()] for val in config.get("target_version", ())} - return Mode( - target_versions=target_versions, - line_length=config.get("line_length", Mode.line_length), - ) - - -def black_format(content) -> str: - return format_str(content, mode=black_mode()) - - -def prepare_setup_py_file(context): - setup_py_template_name = "SETUP" - setup_py_file_path = os.path.abspath(os.path.join(get_target_folder(), "setup.py")) - setup_py_content = render_template( - template_name=setup_py_template_name, context=context, extension=".py", autoescape=False - ) - with open(setup_py_file_path, "w") as setup_py_file: - setup_py_file.write(black_format(setup_py_content)) - - -def prepare_setup_cfg_file(context): - setup_cfg_template_name = "SETUP" - setup_cfg_file_path = os.path.abspath(os.path.join(get_target_folder(), "setup.cfg")) - setup_cfg_content = render_template( - template_name=setup_cfg_template_name, - context=context, - extension=".cfg", - autoescape=False, - keep_trailing_newline=True, - ) - with open(setup_cfg_file_path, "w") as setup_cfg_file: - setup_cfg_file.write(setup_cfg_content) - - -def prepare_get_provider_info_py_file(context, provider_package_id: str): - get_provider_template_name = "get_provider_info" - get_provider_file_path = os.path.abspath( - os.path.join( - get_target_providers_package_folder(provider_package_id), - "get_provider_info.py", - ) - ) - get_provider_content = render_template( - template_name=get_provider_template_name, - context=context, - extension=".py", - autoescape=False, - keep_trailing_newline=True, - ) - with open(get_provider_file_path, "w") as get_provider_file: - get_provider_file.write(black_format(get_provider_content)) - - -def prepare_manifest_in_file(context): - target = os.path.abspath(os.path.join(get_target_folder(), "MANIFEST.in")) - content = render_template( - template_name="MANIFEST", - context=context, - extension=".in", - autoescape=False, - keep_trailing_newline=True, - ) - with open(target, "w") as fh: - fh.write(content) - - -def get_all_providers() -> list[str]: - """Returns all providers for regular packages. - - :return: list of providers that are considered for provider packages - """ - return list(ALL_PROVIDERS) - - -def get_removed_provider_ids() -> list[str]: - """ - Yields the ids of suspended providers. - """ - import yaml - - removed_provider_ids = [] - for provider_path in PROVIDERS_PATH.rglob("provider.yaml"): - provider_yaml = yaml.safe_load(provider_path.read_text()) - package_name = provider_yaml.get("package-name") - if provider_yaml.get("removed", False): - if not provider_yaml.get("suspended"): - console.print( - f"[error]The provider {package_name} is marked for removal in provider.yaml, but " - f"not suspended. Please suspend the provider first before removing it.\n" - ) - sys.exit(1) - removed_provider_ids.append(package_name[len("apache-airflow-providers-") :].replace("-", ".")) - return removed_provider_ids - - -def verify_provider_package(provider_package_id: str) -> None: - """Verifies if the provider package is good. - - :param provider_package_id: package id to verify - """ - if provider_package_id not in get_all_providers(): - if provider_package_id in get_removed_provider_ids(): - console.print() - console.print( - f"[yellow]The package: {provider_package_id} is suspended, but " - f"since you asked for it, it will be built [/]" - ) - console.print() - else: - console.print(f"[red]Wrong package name: {provider_package_id}[/]") - console.print("Use one of:") - console.print(get_all_providers()) - console.print(f"[red]The package {provider_package_id} is not a provider package.") - sys.exit(1) - - -def verify_changelog_exists(package: str) -> str: - provider_details = get_provider_details(package) - changelog_path = os.path.join(provider_details.source_provider_package_path, "CHANGELOG.rst") - if not os.path.isfile(changelog_path): - console.print(f"\n[red]ERROR: Missing {changelog_path}[/]\n") - console.print("[info]Please add the file with initial content:") - console.print("----- START COPYING AFTER THIS LINE ------- ") - processed_changelog = jinja2.Template(INITIAL_CHANGELOG_CONTENT, autoescape=True).render( - package_name=provider_details.pypi_package_name, - ) - syntax = Syntax( - processed_changelog, - "rst", - theme="ansi_dark", - ) - console.print(syntax) - console.print("----- END COPYING BEFORE THIS LINE ------- ") - sys.exit(1) - return changelog_path - - -@cli.command() -def list_providers_packages(): - """List all provider packages.""" - providers = get_all_providers() - # if provider needs to be not considered in release add it here - # this is useful for cases where provider is WIP for a long period thus we don't want to release it yet. - providers_to_remove_from_release = [] - for provider in providers: - if provider not in providers_to_remove_from_release: - console.print(provider) - - -@cli.command() -@option_version_suffix -@option_git_update -@argument_package_id -@option_force -@option_verbose -@option_base_branch -@click.option( - "-a", - "--answer", - type=click.Choice(["y", "n", "q", "yes", "no", "quit"]), - help="Force answer to questions.", - envvar="ANSWER", -) -def update_package_documentation( - version_suffix: str, - git_update: bool, - answer: str | None, - package_id: str, - force: bool, - verbose: bool, - base_branch: str, -): - """Updates package documentation. - - See `list-providers-packages` subcommand for the possible PACKAGE_ID values. - """ - provider_package_id = package_id - verify_provider_package(provider_package_id) - with with_group(f"Update release notes for package '{provider_package_id}' "): - console.print("Updating documentation for the latest release version.") - make_sure_remote_apache_exists_and_fetch(git_update, verbose) - only_min_version_upgrade = os.environ.get("ONLY_MIN_VERSION_UPDATE", "false").lower() == "true" - regenerate_missing_docs = os.environ.get("REGENERATE_MISSING_DOCS", "false").lower() == "true" - if not only_min_version_upgrade: - if not update_release_notes( - provider_package_id, - version_suffix, - force=force or regenerate_missing_docs, - verbose=verbose, - answer=answer, - base_branch=base_branch, - regenerate_missing_docs=regenerate_missing_docs, - ): - # Returns 64 in case of skipped package - sys.exit(64) - update_min_airflow_version(provider_package_id=provider_package_id, version_suffix=version_suffix) - - -def tag_exists_for_version(provider_package_id: str, current_tag: str, verbose: bool): - provider_details = get_provider_details(provider_package_id) - if verbose: - console.print(f"Checking if tag `{current_tag}` exists.") - if not subprocess.call( - get_git_tag_check_command(current_tag), - cwd=provider_details.source_provider_package_path, - stderr=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - ): - if verbose: - console.print(f"Tag `{current_tag}` exists.") - return True - if verbose: - console.print(f"Tag `{current_tag}` does not exist.") - return False - - -@cli.command() -@option_version_suffix -@option_git_update -@argument_package_id -@option_verbose -@option_skip_tag_check -def generate_setup_files( - version_suffix: str, git_update: bool, package_id: str, verbose: bool, skip_tag_check: bool -): - """Generates setup files for the package. - - See `list-providers-packages` subcommand for the possible PACKAGE_ID values. - """ - provider_package_id = package_id - with with_group(f"Generate setup files for '{provider_package_id}'"): - if not skip_tag_check: - current_tag = get_current_tag(provider_package_id, version_suffix, git_update, verbose) - if tag_exists_for_version(provider_package_id, current_tag, verbose): - console.print(f"[yellow]The tag {current_tag} exists. Not preparing the package.[/]") - sys.exit(64) - if update_setup_files(provider_package_id, version_suffix): - console.print(f"[green]Generated regular package setup files for {provider_package_id}[/]") - else: - sys.exit(64) - - -def get_current_tag(provider_package_id: str, suffix: str, git_update: bool, verbose: bool): - verify_provider_package(provider_package_id) - make_sure_remote_apache_exists_and_fetch(git_update, verbose) - provider_info = get_provider_info_from_provider_yaml(provider_package_id) - versions: list[str] = provider_info["versions"] - current_version = versions[0] - current_tag = get_version_tag(current_version, provider_package_id, suffix) - return current_tag - - -def cleanup_remnants(verbose: bool): - if verbose: - console.print("Cleaning remnants") - files = glob.glob("*.egg-info") - for file in files: - shutil.rmtree(file, ignore_errors=True) - files = glob.glob("build") - for file in files: - shutil.rmtree(file, ignore_errors=True) - - -def verify_setup_cfg_prepared(provider_package): - with open("setup.cfg") as f: - setup_content = f.read() - search_for = f"providers-{provider_package.replace('.','-')} for Apache Airflow" - if search_for not in setup_content: - console.print( - f"[red]The setup.py is probably prepared for another package. " - f"It does not contain [bold]{search_for}[/bold]![/]" - ) - console.print( - f"\nRun:\n\n[bold]./dev/provider_packages/prepare_provider_packages.py " - f"generate-setup-files {provider_package}[/bold]\n" - ) - raise Exception("Wrong setup!") - - -@cli.command() -@option_package_format -@option_git_update -@option_version_suffix -@argument_package_id -@option_verbose -@option_skip_tag_check -def build_provider_packages( - package_format: str, - git_update: bool, - version_suffix: str, - package_id: str, - verbose: bool, - skip_tag_check: bool, -): - """Builds provider package. - - See `list-providers-packages` subcommand for the possible PACKAGE_ID values. - """ - - import tempfile - - # we cannot use context managers because if the directory gets deleted (which bdist_wheel does), - # the context manager will throw an exception when trying to delete it again - tmp_build_dir = tempfile.TemporaryDirectory().name - tmp_dist_dir = tempfile.TemporaryDirectory().name - try: - provider_package_id = package_id - with with_group(f"Prepare provider package for '{provider_package_id}'"): - if not skip_tag_check and (version_suffix.startswith("rc") or version_suffix == ""): - # For RC and official releases we check if the "officially released" version exists - # and skip the released if it was. This allows to skip packages that have not been - # marked for release. For "dev" suffixes, we always build all packages - released_tag = get_current_tag(provider_package_id, "", git_update, verbose) - if tag_exists_for_version(provider_package_id, released_tag, verbose): - console.print(f"[yellow]The tag {released_tag} exists. Skipping the package.[/]") - return False - console.print(f"Changing directory to {TARGET_PROVIDER_PACKAGES_PATH}") - os.chdir(TARGET_PROVIDER_PACKAGES_PATH) - cleanup_remnants(verbose) - provider_package = package_id - verify_setup_cfg_prepared(provider_package) - - console.print(f"Building provider package: {provider_package} in format {package_format}") - command: list[str] = ["python3", "setup.py", "build", "--build-temp", tmp_build_dir] - if version_suffix is not None: - command.extend(["egg_info", "--tag-build", version_suffix]) - if package_format in ["sdist", "both"]: - command.append("sdist") - if package_format in ["wheel", "both"]: - command.extend(["bdist_wheel", "--bdist-dir", tmp_dist_dir]) - console.print(f"Executing command: '{' '.join(command)}'") - try: - subprocess.check_call(args=command, stdout=subprocess.DEVNULL) - except subprocess.CalledProcessError as ex: - console.print("[red]The command returned an error %s", ex) - sys.exit(ex.returncode) - console.print( - f"[green]Prepared provider package {provider_package} in format {package_format}[/]" - ) - finally: - shutil.rmtree(tmp_build_dir, ignore_errors=True) - shutil.rmtree(tmp_dist_dir, ignore_errors=True) - - -def find_insertion_index_for_version(content: list[str], version: str) -> tuple[int, bool]: - """Finds insertion index for the specified version from the .rst changelog content. - - :param content: changelog split into separate lines - :param version: version to look for - - :return: A 2-tuple. The first item indicates the insertion index, while the - second is a boolean indicating whether to append (False) or insert (True) - to the changelog. - """ - changelog_found = False - skip_next_line = False - index = 0 - for index, line in enumerate(content): - if not changelog_found and line.strip() == version: - changelog_found = True - skip_next_line = True - elif not skip_next_line and line and all(char == "." for char in line): - return index - 2, changelog_found - else: - skip_next_line = False - return index, changelog_found - - -class ClassifiedChanges(NamedTuple): - """Stores lists of changes classified automatically""" - - fixes: list[Change] = [] - features: list[Change] = [] - breaking_changes: list[Change] = [] - other: list[Change] = [] - - -def get_changes_classified(changes: list[Change]) -> ClassifiedChanges: - """Pre-classifies changes based on commit message, it's wildly guessing now, - - However, if we switch to semantic commits, it could be automated. This list - is supposed to be manually reviewed and re-classified by release manager - anyway. - - :param changes: list of changes - :return: list of changes classified semi-automatically to the fix/feature/breaking/other buckets - """ - classified_changes = ClassifiedChanges() - for change in changes: - if "fix" in change.message.lower(): - classified_changes.fixes.append(change) - elif "add" in change.message.lower(): - classified_changes.features.append(change) - elif "breaking" in change.message.lower(): - classified_changes.breaking_changes.append(change) - else: - classified_changes.other.append(change) - return classified_changes - - -@cli.command() -@argument_package_id -@option_base_branch -@option_verbose -def update_changelog(package_id: str, base_branch: str, verbose: bool): - """Updates changelog for the provider.""" - if _update_changelog(package_id, base_branch, verbose, True): - sys.exit(64) - - -def _update_changelog(package_id: str, base_branch: str, verbose: bool, force: bool) -> bool: - """Internal update changelog method. - - :param package_id: package id - :param base_branch: base branch to check changes in apache remote for changes - :param verbose: verbose flag - :return: true if package is skipped - """ - with with_group("Updates changelog for last release"): - verify_provider_package(package_id) - provider_details = get_provider_details(package_id) - provider_info = get_provider_info_from_provider_yaml(package_id) - current_release_version = provider_details.versions[0] - jinja_context = get_provider_jinja_context( - provider_info=provider_info, - provider_details=provider_details, - current_release_version=current_release_version, - version_suffix="", - ) - changelog_path = os.path.join(provider_details.source_provider_package_path, "CHANGELOG.rst") - proceed, changes, _ = get_all_changes_for_package(package_id, verbose, base_branch, force) - if not proceed: - if force: - console.print( - f"[info]The provider {package_id} is not being release but we regenerate docs for it " - f"(except commits).[/]" - ) - else: - console.print( - f"[yellow]The provider {package_id} is not being released. Skipping the package.[/]" - ) - return True - if os.environ.get("REGENERATE_MISSING_DOCS", "false").lower() == "true": - console.print("[info]REGENERATE_MISSING_DOCS is set to true, skipping changelog update[/]") - else: - generate_new_changelog(package_id, provider_details, changelog_path, changes) - console.print() - console.print(f"Update index.rst for {package_id}") - console.print() - update_index_rst(jinja_context, package_id, provider_details.documentation_provider_package_path) - return False - - -def generate_new_changelog(package_id, provider_details, changelog_path, changes): - latest_version = provider_details.versions[0] - with open(changelog_path) as changelog: - current_changelog = changelog.read() - current_changelog_lines = current_changelog.splitlines() - insertion_index, append = find_insertion_index_for_version(current_changelog_lines, latest_version) - if append: - if not changes: - console.print( - f"[green]The provider {package_id} changelog for `{latest_version}` " - "has first release. Not updating the changelog.[/]" - ) - return - new_changes = [ - change for change in changes[0] if change.pr and "(#" + change.pr + ")" not in current_changelog - ] - if not new_changes: - console.print( - f"[green]The provider {package_id} changelog for `{latest_version}` " - "has no new changes. Not updating the changelog.[/]" - ) - return - context = {"new_changes": new_changes} - generated_new_changelog = render_template( - template_name="UPDATE_CHANGELOG", context=context, extension=".rst" - ) - else: - if changes: - classified_changes = get_changes_classified(changes[0]) - else: - # change log exist but without version 1.0.0 entry - classified_changes = None - - context = { - "version": latest_version, - "version_header": "." * len(latest_version), - "classified_changes": classified_changes, - } - generated_new_changelog = render_template( - template_name="CHANGELOG", context=context, extension=".rst" - ) - new_changelog_lines = current_changelog_lines[0:insertion_index] - new_changelog_lines.extend(generated_new_changelog.splitlines()) - new_changelog_lines.extend(current_changelog_lines[insertion_index:]) - diff = "\n".join(difflib.context_diff(current_changelog_lines, new_changelog_lines, n=5)) - syntax = Syntax(diff, "diff") - console.print(syntax) - if not append: - console.print( - f"[green]The provider {package_id} changelog for `{latest_version}` " - "version is missing. Generating fresh changelog.[/]" - ) - else: - console.print( - f"[green]Appending the provider {package_id} changelog for `{latest_version}` version.[/]" - ) - with open(changelog_path, "w") as changelog: - changelog.write("\n".join(new_changelog_lines)) - changelog.write("\n") - - -def get_package_from_changelog(changelog_path: str): - folder = Path(changelog_path).parent - package = "" - separator = "" - while not os.path.basename(folder) == "providers": - package = os.path.basename(folder) + separator + package - separator = "." - folder = Path(folder).parent - return package - - -@cli.command() -@argument_changelog_files -@option_git_update -@option_base_branch -@option_verbose -def update_changelogs(changelog_files: list[str], git_update: bool, base_branch: str, verbose: bool): - """Updates changelogs for multiple packages.""" - if git_update: - make_sure_remote_apache_exists_and_fetch(git_update, verbose) - for changelog_file in changelog_files: - package_id = get_package_from_changelog(changelog_file) - _update_changelog(package_id=package_id, base_branch=base_branch, verbose=verbose, force=True) - - -if __name__ == "__main__": - # The cli exit code is: - # * 0 in case of success - # * 1 in case of error - # * 64 in case of skipped package - # * 65 in case user decided to quit - # * 66 in case package has doc-only changes - try: - cli() - except KeyboardInterrupt: - print("Interrupted") - try: - sys.exit(65) - except SystemExit: - os._exit(65) diff --git a/images/breeze/output_release-management_prepare-provider-documentation.svg b/images/breeze/output_release-management_prepare-provider-documentation.svg index b468885881cee..7643439f991b3 100644 --- a/images/breeze/output_release-management_prepare-provider-documentation.svg +++ b/images/breeze/output_release-management_prepare-provider-documentation.svg @@ -1,4 +1,4 @@ - +