From fddf030a607beab08705b83097578749967df37a Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Thu, 9 Nov 2023 19:35:12 +0100 Subject: [PATCH 1/2] Move "prepare-provider-documentation" to Breeze This PR moves the functionality of preparing provider documentation from a python script inside Breeze CI image to breeze Python package. This is the first of the series of moves that will simplify the way how provider packages are being built and prepared with the aim of improving security of the supply chain and make it easier to debug and modify the release process. Historically, release process has been run inside of Breeze for several reasons: isolation of running package preparation from Host environment, the need to keep separate virtualenv and because we run verification of provider packages during release process - which requires the CI environment with all its dependencies. So far the process looked like this: * bash breeze parsed the arguments * bash breeze started the docker bash script with packages as parameters * the bash script in CI image looped over the packages and run python prepare_provider_packages.py (twice) to generate docs and update changelog (this is interactive process where release manager makes decision on bumping versions). Those python script performed verification on provider.yaml files * the bash script summarized the packages and displayed status of preparation However after moving to Python based breeze, we can simplify it all and run all those steps in Python breeze internal code - no need to go to docker and use bash scripts. We also do not have to do verification of provider.yaml files is done already extensively in pre-commit. This PR moves all this logic to inside Breeze. There is stil remainig duplicated code in the original in-container `prepare_provider_packages.py` remaining, this duplication will be removed by subsequent PRs where other release management commands for provider packages will also be moved to Breeze as follow-up of this PR. This PR has the following changes: * move the provider decumentation code from `dev/provider_packages` to `dev/breeze/` (and from in-container to in-breeze-venv execution) * completely removed the intermediate bash script and calling Python scripts from it - moving the logic to Breeze entirely * added better diagnostics of what happens when packages are classified with particular types of changes (added `special` style to show it) * cleaned and clarified `prepare-provider-documentation` commmand line flags * introduce explicit "non-interactive" mode that is used to run and test the command in CI and to test it locally * replace str with Path were files were used in the moved code * add unit tests covering unit-testable parts of the moved code * refactore the moved code to use utils available in Breeze * split the code into packages and versions (reusable utils) and specific code for preparing package documentation * cached provider.yaml information retrieved from providers * move provider documentation templates to Breeze * better error handling - they are now regular exceptions in Python process rather than interpreting the exit_codes passed from python sub-scripts returnig the codes to Bash * when release manager classifies package, only relevant sections are generated (Features/Breaking changes) based on decision and changes are automatically "guessed" only if release manager chose the section where they would fall in --- .github/workflows/ci.yml | 2 +- .pre-commit-config.yaml | 8 +- BREEZE.rst | 6 - dev/README_RELEASE_PROVIDER_PACKAGES.md | 4 + dev/breeze/README.md | 2 +- dev/breeze/setup.cfg | 3 + .../commands/release_management_commands.py | 269 ++-- .../release_management_commands_config.py | 5 +- .../commands/testing_commands.py | 4 +- .../src/airflow_breeze/global_constants.py | 1 + .../provider_documentation.py | 1152 +++++++++++++++++ .../templates}/CHANGELOG_TEMPLATE.rst.jinja2 | 6 + .../PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2 | 2 +- .../PROVIDER_COMMITS_TEMPLATE.rst.jinja2 | 4 +- .../PROVIDER_INDEX_TEMPLATE.rst.jinja2 | 2 +- .../PROVIDER_README_TEMPLATE.rst.jinja2 | 109 ++ .../PROVIDER__INIT__PY_TEMPLATE.py.jinja2 | 2 +- .../UPDATE_CHANGELOG_TEMPLATE.rst.jinja2 | 0 .../utils/add_back_references.py | 22 +- .../src/airflow_breeze/utils/black_utils.py | 38 + .../src/airflow_breeze/utils/console.py | 3 + .../src/airflow_breeze/utils/packages.py | 425 +++++- .../src/airflow_breeze/utils/parallel.py | 4 +- .../src/airflow_breeze/utils/path_utils.py | 1 + .../utils/publish_docs_builder.py | 6 +- .../utils/publish_docs_helpers.py | 29 - .../src/airflow_breeze/utils/run_tests.py | 4 +- .../src/airflow_breeze/utils/run_utils.py | 2 + .../utils/suspended_providers.py | 62 - .../src/airflow_breeze/utils/versions.py | 30 + dev/breeze/tests/test_packages.py | 159 ++- .../tests/test_provider_documentation.py | 308 +++++ dev/breeze/tests/test_versions.py | 28 + .../prepare_provider_packages.py | 887 +------------ ...agement_prepare-provider-documentation.svg | 58 +- ...agement_prepare-provider-documentation.txt | 2 +- scripts/ci/pre_commit/pre_commit_mypy.py | 4 +- .../run_prepare_provider_documentation.sh | 139 -- setup.py | 3 +- 39 files changed, 2512 insertions(+), 1283 deletions(-) create mode 100644 dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py rename dev/{provider_packages => breeze/src/airflow_breeze/templates}/CHANGELOG_TEMPLATE.rst.jinja2 (94%) rename dev/{provider_packages => breeze/src/airflow_breeze/templates}/PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2 (99%) rename dev/{provider_packages => breeze/src/airflow_breeze/templates}/PROVIDER_COMMITS_TEMPLATE.rst.jinja2 (98%) rename dev/{provider_packages => breeze/src/airflow_breeze/templates}/PROVIDER_INDEX_TEMPLATE.rst.jinja2 (97%) create mode 100644 dev/breeze/src/airflow_breeze/templates/PROVIDER_README_TEMPLATE.rst.jinja2 rename dev/{provider_packages => breeze/src/airflow_breeze/templates}/PROVIDER__INIT__PY_TEMPLATE.py.jinja2 (94%) rename dev/{provider_packages => breeze/src/airflow_breeze/templates}/UPDATE_CHANGELOG_TEMPLATE.rst.jinja2 (100%) create mode 100644 dev/breeze/src/airflow_breeze/utils/black_utils.py delete mode 100644 dev/breeze/src/airflow_breeze/utils/suspended_providers.py create mode 100644 dev/breeze/src/airflow_breeze/utils/versions.py create mode 100644 dev/breeze/tests/test_provider_documentation.py create mode 100644 dev/breeze/tests/test_versions.py delete mode 100755 scripts/in_container/run_prepare_provider_documentation.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 01cf502daa1e9..b840699fbad2c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -814,7 +814,7 @@ jobs: run: rm -fv ./dist/* - name: "Prepare provider documentation" run: > - breeze release-management prepare-provider-documentation + breeze release-management prepare-provider-documentation --non-interactive ${{ needs.build-info.outputs.affected-providers-list-as-string }} - name: "Prepare provider packages: wheel" run: > diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1a8b98f03f4cd..19e85aef8bd72 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1004,7 +1004,7 @@ repos: entry: ./scripts/ci/pre_commit/pre_commit_mypy.py files: ^dev/.*\.py$ require_serial: true - additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml', 'jsonschema'] - id: mypy-core name: Run mypy for core language: python @@ -1012,7 +1012,7 @@ repos: files: \.py$ exclude: ^.*/.*_vendor/|^airflow/migrations|^airflow/providers|^dev|^docs|^provider_packages|^tests/providers|^tests/system/providers|^tests/dags/test_imports.py require_serial: true - additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml', 'jsonschema'] - id: mypy-providers name: Run mypy for providers language: python @@ -1020,7 +1020,7 @@ repos: files: ^airflow/providers/.*\.py$|^tests/providers/.*\.py$|^tests/system/providers/.*\.py$ exclude: ^.*/.*_vendor/ require_serial: true - additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml', 'jsonschema'] - id: mypy-docs name: Run mypy for /docs/ folder language: python @@ -1028,7 +1028,7 @@ repos: files: ^docs/.*\.py$ exclude: ^docs/rtd-deprecation require_serial: true - additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml', 'jsonschema'] - id: check-provider-yaml-valid name: Validate provider.yaml files entry: ./scripts/ci/pre_commit/pre_commit_check_provider_yaml_files.py diff --git a/BREEZE.rst b/BREEZE.rst index b9f4136337fd2..04eecace44397 100644 --- a/BREEZE.rst +++ b/BREEZE.rst @@ -2047,12 +2047,6 @@ The below example perform documentation preparation for provider packages. breeze release-management prepare-provider-documentation -By default, the documentation preparation runs package verification to check if all packages are -importable, but you can add ``--skip-package-verification`` to skip it. - -.. code-block:: bash - - breeze release-management prepare-provider-documentation --skip-package-verification You can also add ``--answer yes`` to perform non-interactive build. diff --git a/dev/README_RELEASE_PROVIDER_PACKAGES.md b/dev/README_RELEASE_PROVIDER_PACKAGES.md index 9569c8db94866..62f26309a433b 100644 --- a/dev/README_RELEASE_PROVIDER_PACKAGES.md +++ b/dev/README_RELEASE_PROVIDER_PACKAGES.md @@ -143,6 +143,10 @@ separately this command: breeze release-management prepare-provider-documentation qubole ``` +In case you want to **just** regenerate the documentation because you fixed something in the templates, add +`--reapply-templates` flag to the command above. If you want to just update min airflow version for all +packages, you can use `--only-min-version-update` flag - this will only update the min version in the +`__init__.py` files of the packages and package documentation without bumping the provider versions. This command will not only prepare documentation but will also help the release manager to review changes implemented in all providers, and determine which of the providers should be released. For each diff --git a/dev/breeze/README.md b/dev/breeze/README.md index a5d495bb1522b..1421958fa9aee 100644 --- a/dev/breeze/README.md +++ b/dev/breeze/README.md @@ -66,6 +66,6 @@ PLEASE DO NOT MODIFY THE HASH BELOW! IT IS AUTOMATICALLY UPDATED BY PRE-COMMIT. --------------------------------------------------------------------------------------------------------- -Package config hash: abef89e76b6c1cbfe37d4a083a9e75259d0169662c666c9e2549ca91ddf12d9f1274a4c7ab44e999619c0aaf9fdb56f299397e8c528fafbc94caf45f7cc70ad9 +Package config hash: 7b512fa3a81a967c22fc4ccccf052a4c4dbcafd5c014adea775d45f0034d03e1c63d7d1e3df723e93724924ed3cfa92a5848c994c247dfd326c0a6300e282f88 --------------------------------------------------------------------------------------------------------- diff --git a/dev/breeze/setup.cfg b/dev/breeze/setup.cfg index 9c163b288e348..db53c94ebb23a 100644 --- a/dev/breeze/setup.cfg +++ b/dev/breeze/setup.cfg @@ -55,6 +55,7 @@ package_dir= =src packages = find: install_requires = + black>=23.11.0 click>=8.1.7 filelock>=3.13.0 inputimeout>=1.0.4 @@ -71,6 +72,8 @@ install_requires = rich>=13.6.0 rich-click>=1.7.1 gitpython>=3.1.40 + semver>=3.0.2 + tabulate>=0.9.0 twine>=4.0.2 wheel>=0.41.3 setuptools>=68.2.2 diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py index 6ea971447e0a4..408f9100f8644 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py @@ -26,6 +26,7 @@ from copy import deepcopy from datetime import datetime from pathlib import Path +from subprocess import DEVNULL from typing import IO, Generator, NamedTuple import click @@ -79,7 +80,7 @@ option_version_suffix_for_pypi, ) from airflow_breeze.utils.confirm import Answer, user_confirm -from airflow_breeze.utils.console import Output, get_console +from airflow_breeze.utils.console import MessageType, Output, get_console from airflow_breeze.utils.custom_param_types import BetterChoice from airflow_breeze.utils.docker_command_utils import ( check_remote_ghcr_io_commands, @@ -88,7 +89,14 @@ perform_environment_checks, ) from airflow_breeze.utils.github import download_constraints_file, get_active_airflow_versions -from airflow_breeze.utils.packages import convert_to_long_package_names, expand_all_provider_packages +from airflow_breeze.utils.packages import ( + expand_all_provider_packages, + find_matching_long_package_names, + get_available_packages, + get_provider_details, + get_provider_packages_metadata, + get_removed_provider_ids, +) from airflow_breeze.utils.parallel import ( GenericRegexpProgressMatcher, SummarizeAfter, @@ -115,8 +123,7 @@ run_command, run_compile_www_assets, ) -from airflow_breeze.utils.shared_options import get_dry_run, get_forced_answer, get_verbose -from airflow_breeze.utils.suspended_providers import get_removed_provider_ids +from airflow_breeze.utils.shared_options import get_dry_run, get_verbose option_debug_release_management = click.option( "--debug", @@ -219,11 +226,24 @@ def prepare_airflow_packages( sys.exit(result_command.returncode) +def provider_documentation_summary(documentation: str, message_type: MessageType, packages: list[str]): + if packages: + get_console().print(f"{documentation}: {len(packages)}\n") + get_console().print(f"[{message_type.value}]{' '.join(packages)}") + get_console().print() + + @release_management.command( name="prepare-provider-documentation", help="Prepare CHANGELOG, README and COMMITS information for providers.", ) -@option_debug_release_management +@click.option( + "--skip-git-fetch", + is_flag=True, + help="Skips removal and recreation of `apache-https-rof-providers` remote in git. By default, the " + "remote is recreated and fetched to make sure that it's up to date and that recent commits " + "are not missing", +) @click.option( "--base-branch", type=str, @@ -237,48 +257,137 @@ def prepare_airflow_packages( help="Only update minimum version in __init__.py files and regenerate corresponding documentation", ) @click.option( - "--regenerate-missing-docs", + "--reapply-templates-only", is_flag=True, - help="Only regenerate missing documentation, do not bump version. Useful if templates were added" + help="Only reapply templates, do not bump version. Useful if templates were added" " and you need to regenerate documentation.", ) +@click.option( + "--non-interactive", + is_flag=True, + help="Run in non-interactive mode. Provides random answers to the type of changes and confirms release" + "for providers prepared for release - useful to test the script in non-interactive mode in CI.", +) @argument_provider_packages @option_verbose @option_dry_run @option_answer def prepare_provider_documentation( github_repository: str, + skip_git_fetch: bool, base_branch: str, - debug: bool, - provider_packages: list[str], + provider_packages: tuple[str], only_min_version_update: bool, - regenerate_missing_docs: bool, + reapply_templates_only: bool, + non_interactive: bool, ): - perform_environment_checks() - check_remote_ghcr_io_commands() - cleanup_python_generated_files() - shell_params = ShellParams( - mount_sources=MOUNT_ALL, - github_repository=github_repository, - python=DEFAULT_PYTHON_MAJOR_MINOR_VERSION, - base_branch=base_branch, - only_min_version_update=only_min_version_update, - regenerate_missing_docs=regenerate_missing_docs, - skip_environment_initialization=True, + from airflow_breeze.prepare_providers.provider_documentation import ( + PrepareReleaseDocsChangesOnlyException, + PrepareReleaseDocsErrorOccurredException, + PrepareReleaseDocsNoChangesException, + PrepareReleaseDocsUserQuitException, + PrepareReleaseDocsUserSkippedException, + make_sure_remote_apache_exists_and_fetch, + update_changelog, + update_min_airflow_version, + update_release_notes, ) - rebuild_or_pull_ci_image_if_needed(command_params=shell_params) - cmd_to_run = [ - "/opt/airflow/scripts/in_container/run_prepare_provider_documentation.sh", - *provider_packages, - ] - answer = get_forced_answer() - result_command = run_docker_command_with_debug( - params=shell_params, - command=cmd_to_run, - enable_input=answer is None or answer[0].lower() != "y", - debug=debug, + + cleanup_python_generated_files() + if not provider_packages: + provider_packages = get_available_packages() + + if not skip_git_fetch: + run_command(["git", "remote", "rm", "apache-https-for-providers"], check=False, stderr=DEVNULL) + make_sure_remote_apache_exists_and_fetch(github_repository=github_repository) + provider_packages_metadata = get_provider_packages_metadata() + no_changes_packages = [] + doc_only_packages = [] + error_packages = [] + user_skipped_packages = [] + success_packages = [] + suspended_packages = [] + removed_packages = [] + for provider_package_id in provider_packages: + provider_metadata = provider_packages_metadata.get(provider_package_id) + if not provider_metadata: + get_console().print( + f"[error]The package {provider_package_id} is not a provider package. Exiting[/]" + ) + sys.exit(1) + if provider_metadata.get("removed", False): + get_console().print( + f"[warning]The package: {provider_package_id} is scheduled for removal, but " + f"since you asked for it, it will be built [/]\n" + ) + elif provider_metadata.get("suspended"): + get_console().print( + f"[warning]The package: {provider_package_id} is suspended " f"skipping it [/]\n" + ) + suspended_packages.append(provider_package_id) + continue + if os.environ.get("GITHUB_ACTIONS", "false") != "true": + get_console().print("-" * get_console().width) + try: + with_breaking_changes = False + maybe_with_new_features = False + with ci_group(f"Update release notes for package '{provider_package_id}' "): + get_console().print("Updating documentation for the latest release version.") + if not only_min_version_update: + with_breaking_changes, maybe_with_new_features = update_release_notes( + provider_package_id, + reapply_templates_only=reapply_templates_only, + base_branch=base_branch, + regenerate_missing_docs=reapply_templates_only, + non_interactive=non_interactive, + ) + update_min_airflow_version( + provider_package_id=provider_package_id, + with_breaking_changes=with_breaking_changes, + maybe_with_new_features=maybe_with_new_features, + ) + with ci_group(f"Updates changelog for last release of package '{provider_package_id}'"): + update_changelog( + package_id=provider_package_id, + base_branch=base_branch, + reapply_templates_only=reapply_templates_only, + with_breaking_changes=with_breaking_changes, + maybe_with_new_features=maybe_with_new_features, + ) + except PrepareReleaseDocsNoChangesException: + no_changes_packages.append(provider_package_id) + except PrepareReleaseDocsChangesOnlyException: + doc_only_packages.append(provider_package_id) + except PrepareReleaseDocsErrorOccurredException: + error_packages.append(provider_package_id) + except PrepareReleaseDocsUserSkippedException: + user_skipped_packages.append(provider_package_id) + except PrepareReleaseDocsUserQuitException: + break + else: + if provider_metadata.get("removed"): + removed_packages.append(provider_package_id) + else: + success_packages.append(provider_package_id) + get_console().print() + get_console().print("\n[info]Summary of prepared packages:\n") + provider_documentation_summary("Success", MessageType.SUCCESS, success_packages) + provider_documentation_summary("Scheduled for removal", MessageType.SUCCESS, removed_packages) + provider_documentation_summary("Docs only", MessageType.SUCCESS, doc_only_packages) + provider_documentation_summary("Skipped on no changes", MessageType.WARNING, no_changes_packages) + provider_documentation_summary("Suspended", MessageType.WARNING, suspended_packages) + provider_documentation_summary("Skipped by user", MessageType.SPECIAL, user_skipped_packages) + provider_documentation_summary("Errors", MessageType.ERROR, error_packages) + if error_packages: + get_console().print("\n[errors]There were errors when generating packages. Exiting!\n") + sys.exit(1) + if not success_packages and not doc_only_packages and not removed_packages: + get_console().print("\n[warning]No packages prepared!\n") + sys.exit(0) + get_console().print("\n[success]Successfully prepared documentation for packages!\n\n") + get_console().print( + "\n[info]Please review the updated files, classify " "the changelog entries and commit the changes.\n" ) - sys.exit(result_command.returncode) @release_management.command( @@ -869,8 +978,8 @@ def publish_docs( "Provide the path of cloned airflow-site repo\n" ) - current_packages = convert_to_long_package_names( - package_filters=package_filter, packages_short_form=expand_all_provider_packages(doc_packages) + current_packages = find_matching_long_package_names( + short_packages=expand_all_provider_packages(doc_packages), filters=package_filter ) print(f"Publishing docs for {len(current_packages)} package(s)") for pkg in current_packages: @@ -1112,46 +1221,34 @@ def is_package_in_dist(dist_files: list[str], package: str) -> bool: ) -def get_prs_for_package(package_id: str) -> list[int]: - import yaml - +def get_prs_for_package(provider_id: str) -> list[int]: pr_matcher = re.compile(r".*\(#([0-9]*)\)``$") - changelog_path = ( - AIRFLOW_SOURCES_ROOT / "airflow" / "providers" / package_id.replace(".", os.sep) / "CHANGELOG.rst" - ) - # load yaml from file - provider_yaml_dict = yaml.safe_load( - ( - AIRFLOW_SOURCES_ROOT - / "airflow" - / "providers" - / package_id.replace(r".", os.sep) - / "provider.yaml" - ).read_text() - ) - current_release_version = provider_yaml_dict["versions"][0] prs = [] - with open(changelog_path) as changelog_file: - changelog_lines = changelog_file.readlines() - extract_prs = False - skip_line = False - for line in changelog_lines: - if skip_line: - # Skip first "....." header - skip_line = False - elif line.strip() == current_release_version: - extract_prs = True - skip_line = True - elif extract_prs: - if len(line) > 1 and all(c == "." for c in line.strip()): - # Header for next version reached - break - if line.startswith(".. Below changes are excluded from the changelog"): - # The reminder of PRs is not important skipping it - break - match_result = pr_matcher.match(line.strip()) - if match_result: - prs.append(int(match_result.group(1))) + provider_yaml_dict = get_provider_packages_metadata().get(provider_id) + if not provider_yaml_dict: + raise RuntimeError(f"The provider id {provider_id} does not have provider.yaml file") + current_release_version = provider_yaml_dict["versions"][0] + provider_details = get_provider_details(provider_id) + changelog_lines = provider_details.changelog_path.read_text().splitlines() + extract_prs = False + skip_line = False + for line in changelog_lines: + if skip_line: + # Skip first "....." header + skip_line = False + elif line.strip() == current_release_version: + extract_prs = True + skip_line = True + elif extract_prs: + if len(line) > 1 and all(c == "." for c in line.strip()): + # Header for next version reached + break + if line.startswith(".. Below changes are excluded from the changelog"): + # The reminder of PRs is not important skipping it + break + match_result = pr_matcher.match(line.strip()) + if match_result: + prs.append(int(match_result.group(1))) return prs @@ -1208,18 +1305,18 @@ class ProviderPRInfo(NamedTuple): if only_available_in_dist: files_in_dist = os.listdir(str(AIRFLOW_SOURCES_ROOT / "dist")) prepared_package_ids = [] - for package_id in provider_packages: - if not only_available_in_dist or is_package_in_dist(files_in_dist, package_id): - get_console().print(f"Extracting PRs for provider {package_id}") - prepared_package_ids.append(package_id) + for provider_id in provider_packages: + if not only_available_in_dist or is_package_in_dist(files_in_dist, provider_id): + get_console().print(f"Extracting PRs for provider {provider_id}") + prepared_package_ids.append(provider_id) else: get_console().print( - f"Skipping extracting PRs for provider {package_id} as it is missing in dist" + f"Skipping extracting PRs for provider {provider_id} as it is missing in dist" ) continue - prs = get_prs_for_package(package_id) - provider_prs[package_id] = [pr for pr in prs if pr not in excluded_prs] - all_prs.update(provider_prs[package_id]) + prs = get_prs_for_package(provider_id) + provider_prs[provider_id] = [pr for pr in prs if pr not in excluded_prs] + all_prs.update(provider_prs[provider_id]) g = Github(github_token) repo = g.get_repo("apache/airflow") pull_requests: dict[int, PullRequest.PullRequest | Issue.Issue] = {} @@ -1239,21 +1336,21 @@ class ProviderPRInfo(NamedTuple): get_console().print(f"[red]The PR #{pr_number} could not be found[/]") progress.advance(task) providers: dict[str, ProviderPRInfo] = {} - for package_id in prepared_package_ids: - pull_request_list = [pull_requests[pr] for pr in provider_prs[package_id] if pr in pull_requests] + for provider_id in prepared_package_ids: + pull_request_list = [pull_requests[pr] for pr in provider_prs[provider_id] if pr in pull_requests] provider_yaml_dict = yaml.safe_load( ( AIRFLOW_SOURCES_ROOT / "airflow" / "providers" - / package_id.replace(".", os.sep) + / provider_id.replace(".", os.sep) / "provider.yaml" ).read_text() ) if pull_request_list: - providers[package_id] = ProviderPRInfo( + providers[provider_id] = ProviderPRInfo( version=provider_yaml_dict["versions"][0], - provider_package_id=package_id, + provider_package_id=provider_id, pypi_package_name=provider_yaml_dict["package-name"], pr_list=pull_request_list, ) diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py index a9ff84f5d6a9a..ad658d228ce62 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py @@ -117,11 +117,12 @@ { "name": "Provider documentation preparation flags", "options": [ - "--debug", "--github-repository", + "--skip-git-fetch", "--base-branch", "--only-min-version-update", - "--regenerate-missing-docs", + "--reapply-templates-only", + "--non-interactive", ], } ], diff --git a/dev/breeze/src/airflow_breeze/commands/testing_commands.py b/dev/breeze/src/airflow_breeze/commands/testing_commands.py index efbfa3667c342..66ffb593c356e 100644 --- a/dev/breeze/src/airflow_breeze/commands/testing_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/testing_commands.py @@ -74,6 +74,7 @@ perform_environment_checks, remove_docker_networks, ) +from airflow_breeze.utils.packages import get_suspended_provider_folders from airflow_breeze.utils.parallel import ( GenericRegexpProgressMatcher, SummarizeAfter, @@ -87,7 +88,6 @@ run_docker_compose_tests, ) from airflow_breeze.utils.run_utils import get_filesystem_type, run_command -from airflow_breeze.utils.suspended_providers import get_suspended_providers_folders LOW_MEMORY_CONDITION = 8 * 1024 * 1024 * 1024 @@ -161,7 +161,7 @@ def _run_test( env_variables["TEST_TYPE"] = exec_shell_params.test_type env_variables["COLLECT_ONLY"] = str(exec_shell_params.collect_only).lower() env_variables["REMOVE_ARM_PACKAGES"] = str(exec_shell_params.remove_arm_packages).lower() - env_variables["SUSPENDED_PROVIDERS_FOLDERS"] = " ".join(get_suspended_providers_folders()).strip() + env_variables["SUSPENDED_PROVIDERS_FOLDERS"] = " ".join(get_suspended_provider_folders()).strip() if "[" in exec_shell_params.test_type and not exec_shell_params.test_type.startswith("Providers"): get_console(output=output).print( "[error]Only 'Providers' test type can specify actual tests with \\[\\][/]" diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index a4ae86a7396b6..1ea794071287d 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -361,6 +361,7 @@ def get_airflow_extras(): "trino", ] ALL_PROVIDER_YAML_FILES = Path(AIRFLOW_SOURCES_ROOT, "airflow", "providers").rglob("provider.yaml") +PROVIDER_RUNTIME_DATA_SCHEMA_PATH = AIRFLOW_SOURCES_ROOT / "airflow" / "provider_info.schema.json" with Path(AIRFLOW_SOURCES_ROOT, "generated", "provider_dependencies.json").open() as f: PROVIDER_DEPENDENCIES = json.load(f) diff --git a/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py b/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py new file mode 100644 index 0000000000000..fc35eacc06788 --- /dev/null +++ b/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py @@ -0,0 +1,1152 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import difflib +import os +import random +import re +import subprocess +import sys +import tempfile +from copy import deepcopy +from enum import Enum +from pathlib import Path +from shutil import copyfile +from typing import Any, Iterable, NamedTuple + +import jinja2 +import semver +from rich.syntax import Syntax + +from airflow_breeze.global_constants import PROVIDER_DEPENDENCIES +from airflow_breeze.utils.black_utils import black_format +from airflow_breeze.utils.confirm import Answer, user_confirm +from airflow_breeze.utils.console import get_console +from airflow_breeze.utils.packages import ( + ProviderPackageDetails, + get_provider_details, + get_provider_jinja_context, + get_provider_packages_metadata, + get_provider_requirements, + get_removed_provider_ids, + get_source_package_path, +) +from airflow_breeze.utils.path_utils import ( + BREEZE_SOURCES_ROOT, +) +from airflow_breeze.utils.run_utils import run_command +from airflow_breeze.utils.shared_options import get_verbose + +HTTPS_REMOTE = "apache-https-for-providers" + +PR_PATTERN = re.compile(r".*\(#(\d+)\)") + +AUTOMATICALLY_GENERATED_MARKER = "AUTOMATICALLY GENERATED" +AUTOMATICALLY_GENERATED_CONTENT = ( + f".. THE REMAINDER OF THE FILE IS {AUTOMATICALLY_GENERATED_MARKER}. " + f"IT WILL BE OVERWRITTEN AT RELEASE TIME!" +) + +# Taken from pygrep hooks we are using in pre-commit +# https://github.com/pre-commit/pygrep-hooks/blob/main/.pre-commit-hooks.yaml +BACKTICKS_CHECK = re.compile(r"^(?! {4}).*(^| )`[^`]+`([^_]|$)", re.MULTILINE) + + +INITIAL_CHANGELOG_CONTENT = """ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. NOTE TO CONTRIBUTORS: + Please, only add notes to the Changelog just below the "Changelog" header when there + are some breaking changes and you want to add an explanation to the users on how they are supposed + to deal with them. The changelog is updated and maintained semi-automatically by release manager. + +``{{ package_name }}`` + +Changelog +--------- + +1.0.0 +..... + +Initial version of the provider. +""" + + +class Change(NamedTuple): + """Stores details about commits""" + + full_hash: str + short_hash: str + date: str + version: str + message: str + message_without_backticks: str + pr: str | None + + +class TypeOfChange(Enum): + DOCUMENTATION = "d" + BUGFIX = "b" + FEATURE = "f" + BREAKING_CHANGE = "x" + SKIP = "s" + + +class ClassifiedChanges: + """Stores lists of changes classified automatically""" + + def __init__(self): + self.fixes: list[Change] = [] + self.features: list[Change] = [] + self.breaking_changes: list[Change] = [] + self.other: list[Change] = [] + + +class PrepareReleaseDocsChangesOnlyException(Exception): + """Raised when package has only documentation changes.""" + + +class PrepareReleaseDocsNoChangesException(Exception): + """Raised when package has no changes.""" + + +class PrepareReleaseDocsErrorOccurredException(Exception): + """Raised when error occurred when preparing packages changes.""" + + +class PrepareReleaseDocsUserSkippedException(Exception): + """Raised when user skipped package.""" + + +class PrepareReleaseDocsUserQuitException(Exception): + """Raised when user decided to quit.""" + + +TYPE_OF_CHANGE_DESCRIPTION = { + TypeOfChange.DOCUMENTATION: "Documentation only changes - no version change needed, " + "only documentation needs to be updated", + TypeOfChange.BUGFIX: "Bugfix/Misc changes only - bump in PATCHLEVEL version needed", + TypeOfChange.FEATURE: "Feature changes - bump in MINOR version needed", + TypeOfChange.BREAKING_CHANGE: "Breaking changes - bump in MAJOR version needed", +} + + +def make_sure_remote_apache_exists_and_fetch(github_repository: str = "apache/airflow"): + """Make sure that apache remote exist in git. + + We need to take a log from the apache repository main branch - not locally because we might + not have the latest version. Also, the local repo might be shallow, so we need to + un-shallow it to see all the history. + + This will: + * check if the remote exists and add if it does not + * check if the local repo is shallow, mark it to un-shallow in this case + * fetch from the remote including all tags and overriding local tags in case + they are set differently + + """ + try: + run_command(["git", "remote", "get-url", HTTPS_REMOTE], text=True, capture_output=True) + except subprocess.CalledProcessError as ex: + if ex.returncode == 128 or ex.returncode == 2: + run_command( + [ + "git", + "remote", + "add", + HTTPS_REMOTE, + f"https://github.com/{github_repository}.git", + ], + check=True, + ) + else: + get_console().print( + f"[error]Error {ex}[/]\n" f"[error]When checking if {HTTPS_REMOTE} is set.[/]\n\n" + ) + sys.exit(1) + get_console().print("[info]Fetching full history and tags from remote.") + get_console().print("[info]This might override your local tags!") + result = run_command( + ["git", "rev-parse", "--is-shallow-repository"], + check=True, + capture_output=True, + text=True, + ) + is_shallow_repo = result.stdout.strip() == "true" + fetch_command = ["git", "fetch", "--tags", "--force", HTTPS_REMOTE] + if is_shallow_repo: + fetch_command.append("--unshallow") + try: + run_command(fetch_command) + except subprocess.CalledProcessError as e: + get_console().print( + f"[error]Error {e}[/]\n" + f"[error]When fetching tags from remote. Your tags might not be refreshed.[/]\n\n" + f'[warning]Please refresh the tags manually via:[/]\n\n"' + f'{" ".join(fetch_command)}\n\n' + ) + sys.exit(1) + + +def _get_version_tag(version: str, provider_package_id: str, version_suffix: str = ""): + if version_suffix is None: + version_suffix = "" + return f"providers-{provider_package_id.replace('.','-')}/{version}{version_suffix}" + + +def _get_git_log_command(from_commit: str | None = None, to_commit: str | None = None) -> list[str]: + """Get git command to run for the current repo from the current folder. + + The current directory should always be the package folder. + + :param from_commit: if present - base commit from which to start the log from + :param to_commit: if present - final commit which should be the start of the log + :return: git command to run + """ + git_cmd = [ + "git", + "log", + "--pretty=format:%H %h %cd %s", + "--date=short", + ] + if from_commit and to_commit: + git_cmd.append(f"{from_commit}...{to_commit}") + elif from_commit: + git_cmd.append(from_commit) + elif to_commit: + raise ValueError("It makes no sense to specify to_commit without from_commit.") + git_cmd.extend(["--", "."]) + return git_cmd + + +def _get_change_from_line(line: str, version: str) -> Change: + split_line = line.split(" ", maxsplit=3) + message = split_line[3] + pr = None + pr_match = PR_PATTERN.match(message) + if pr_match: + pr = pr_match.group(1) + return Change( + full_hash=split_line[0], + short_hash=split_line[1], + date=split_line[2], + version=version, + message=message, + message_without_backticks=message.replace("`", "'").replace("&39;", "'"), + pr=pr, + ) + + +def _convert_git_changes_to_table( + version: str, changes: str, base_url: str, markdown: bool = True +) -> tuple[str, list[Change]]: + """ + Converts list of changes from its string form to markdown/RST table and array of change information + + The changes are in the form of multiple lines where each line consists of: + FULL_COMMIT_HASH SHORT_COMMIT_HASH COMMIT_DATE COMMIT_SUBJECT + + The subject can contain spaces but one of the preceding values can, so we can make split + 3 times on spaces to break it up. + :param version: Version from which the changes are + :param changes: list of changes in a form of multiple-line string + :param base_url: base url for the commit URL + :param markdown: if True, Markdown format is used else rst + :return: formatted table + list of changes (starting from the latest) + """ + from tabulate import tabulate + + lines = changes.splitlines() + headers = ["Commit", "Committed", "Subject"] + table_data = [] + changes_list: list[Change] = [] + for line in lines: + if line == "": + continue + change = _get_change_from_line(line, version) + table_data.append( + ( + f"[{change.short_hash}]({base_url}{change.full_hash})" + if markdown + else f"`{change.short_hash} <{base_url}{change.full_hash}>`_", + change.date, + f"`{change.message_without_backticks}`" + if markdown + else f"``{change.message_without_backticks}``", + ) + ) + changes_list.append(change) + header = "" + if not table_data: + return header, [] + table = tabulate(table_data, headers=headers, tablefmt="pipe" if markdown else "rst") + if not markdown: + header += f"\n\n{version}\n" + "." * len(version) + "\n\n" + release_date = table_data[0][1] + header += f"Latest change: {release_date}\n\n" + return header + table, changes_list + + +def _print_changes_table(changes_table): + syntax = Syntax(changes_table, "rst", theme="ansi_dark") + get_console().print(syntax) + + +def _get_all_changes_for_package( + provider_package_id: str, + base_branch: str, + reapply_templates_only: bool, +) -> tuple[bool, list[list[Change]], str]: + """Retrieves all changes for the package. + + :param provider_package_id: provider package id + :param base_branch: base branch to check changes in apache remote for changes + :param reapply_templates_only: whether to only reapply templates without bumping the version + :return tuple of: + bool (whether to proceed with update) + list of lists of changes for all past versions (might be empty) + the same list converted to string RST table + """ + provider_details = get_provider_details(provider_package_id) + current_version = provider_details.versions[0] + current_tag_no_suffix = _get_version_tag(current_version, provider_package_id) + if get_verbose(): + get_console().print(f"[info]Checking if tag '{current_tag_no_suffix}' exist.") + result = run_command( + ["git", "rev-parse", current_tag_no_suffix], + cwd=provider_details.source_provider_package_path, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + if not reapply_templates_only and result.returncode == 0: + if get_verbose(): + get_console().print(f"[info]The tag {current_tag_no_suffix} exists.") + # The tag already exists + result = run_command( + _get_git_log_command(f"{HTTPS_REMOTE}/{base_branch}", current_tag_no_suffix), + cwd=provider_details.source_provider_package_path, + capture_output=True, + text=True, + check=True, + ) + changes = result.stdout.strip() + if changes: + provider_details = get_provider_details(provider_package_id) + doc_only_change_file = ( + provider_details.source_provider_package_path / ".latest-doc-only-change.txt" + ) + if doc_only_change_file.exists(): + last_doc_only_hash = doc_only_change_file.read_text().strip() + try: + result = run_command( + _get_git_log_command(f"{HTTPS_REMOTE}/{base_branch}", last_doc_only_hash), + cwd=provider_details.source_provider_package_path, + capture_output=True, + text=True, + check=True, + ) + changes_since_last_doc_only_check = result.stdout.strip() + if not changes_since_last_doc_only_check: + get_console().print( + "\n[warning]The provider has doc-only changes since the last release. Skipping[/]" + ) + raise PrepareReleaseDocsChangesOnlyException() + if len(changes.splitlines()) > len(changes_since_last_doc_only_check.splitlines()): + # if doc-only was released after previous release - use it as starting point + # but if before - stay with the releases from last tag. + changes = changes_since_last_doc_only_check + except subprocess.CalledProcessError: + # ignore when the commit mentioned as last doc-only change is obsolete + pass + get_console().print( + f"[warning]The provider {provider_package_id} has {len(changes.splitlines())} " + f"changes since last release[/]" + ) + get_console().print(f"\n[info]Provider: {provider_package_id}[/]\n") + changes_table, array_of_changes = _convert_git_changes_to_table( + f"NEXT VERSION AFTER + {provider_details.versions[0]}", + changes, + base_url="https://github.com/apache/airflow/commit/", + markdown=False, + ) + _print_changes_table(changes_table) + return False, [array_of_changes], changes_table + else: + get_console().print(f"[info]No changes for {provider_package_id}") + return False, [], "" + if len(provider_details.versions) == 1: + get_console().print( + f"[info]The provider '{provider_package_id}' has never " + f"been released but it is ready to release!\n" + ) + else: + get_console().print( + f"[info]New version of the '{provider_package_id}' " f"package is ready to be released!\n" + ) + next_version_tag = f"{HTTPS_REMOTE}/{base_branch}" + changes_table = "" + current_version = provider_details.versions[0] + list_of_list_of_changes: list[list[Change]] = [] + for version in provider_details.versions[1:]: + version_tag = _get_version_tag(version, provider_package_id) + result = run_command( + _get_git_log_command(next_version_tag, version_tag), + cwd=provider_details.source_provider_package_path, + capture_output=True, + text=True, + check=True, + ) + changes = result.stdout.strip() + changes_table_for_version, array_of_changes_for_version = _convert_git_changes_to_table( + current_version, changes, base_url="https://github.com/apache/airflow/commit/", markdown=False + ) + changes_table += changes_table_for_version + list_of_list_of_changes.append(array_of_changes_for_version) + next_version_tag = version_tag + current_version = version + result = run_command( + _get_git_log_command(next_version_tag), + cwd=provider_details.source_provider_package_path, + capture_output=True, + text=True, + check=True, + ) + changes = result.stdout.strip() + changes_table_for_version, array_of_changes_for_version = _convert_git_changes_to_table( + current_version, changes, base_url="https://github.com/apache/airflow/commit/", markdown=False + ) + changes_table += changes_table_for_version + return True, list_of_list_of_changes, changes_table + + +def _ask_the_user_for_the_type_of_changes(non_interactive: bool) -> TypeOfChange: + """Ask user to specify type of changes (case-insensitive). + + :return: Type of change. + """ + # have to do that while waiting for Python 3.11+ StrEnum [*TypeOfChange] :( + type_of_changes_array = [t.value for t in TypeOfChange] + if non_interactive: + # Simulate all possible non-terminal answers - this is useful for running on CI where we want to + # Test all possibilities. + return TypeOfChange(random.choice(type_of_changes_array)) + display_answers = "/".join(type_of_changes_array) + "/q" + while True: + get_console().print( + "[warning]Type of change (d)ocumentation, (b)ugfix, (f)eature, (x)breaking " + f"change, (s)kip, (q)uit [{display_answers}]?[/] ", + end="", + ) + try: + given_answer = input("").lower() + except KeyboardInterrupt: + raise PrepareReleaseDocsUserQuitException() + if given_answer == "q": + raise PrepareReleaseDocsUserQuitException() + if given_answer in type_of_changes_array: + return TypeOfChange(given_answer) + get_console().print( + f"[warning] Wrong answer given: '{given_answer}'. " f"Should be one of {display_answers}" + ) + + +def _mark_latest_changes_as_documentation_only( + provider_package_id: str, list_of_list_of_latest_changes: list[list[Change]] +): + latest_change = list_of_list_of_latest_changes[0][0] + provider_details = get_provider_details(provider_id=provider_package_id) + get_console().print( + f"[special]Marking last change: {latest_change.short_hash} and all above " + f"changes since the last release as doc-only changes!" + ) + (provider_details.source_provider_package_path / ".latest-doc-only-change.txt").write_text( + latest_change.full_hash + "\n" + ) + raise PrepareReleaseDocsChangesOnlyException() + + +def _update_version_in_provider_yaml( + provider_package_id: str, + type_of_change: TypeOfChange, +) -> tuple[bool, bool]: + """ + Updates provider version based on the type of change selected by the user + :param type_of_change: type of change selected + :param provider_package_id: provider package + :return: tuple of two bools: (with_breaking_change, maybe_with_new_features) + """ + provider_details = get_provider_details(provider_package_id) + version = provider_details.versions[0] + v = semver.VersionInfo.parse(version) + with_breaking_changes = False + maybe_with_new_features = False + if type_of_change == TypeOfChange.BREAKING_CHANGE: + v = v.bump_major() + with_breaking_changes = True + # we do not know, but breaking changes may also contain new features + maybe_with_new_features = True + elif type_of_change == TypeOfChange.FEATURE: + v = v.bump_minor() + maybe_with_new_features = True + elif type_of_change == TypeOfChange.BUGFIX: + v = v.bump_patch() + provider_yaml_path = get_source_package_path(provider_package_id) / "provider.yaml" + original_text = provider_yaml_path.read_text() + new_text = re.sub(r"versions:", f"versions:\n - {v}", original_text, 1) + provider_yaml_path.write_text(new_text) + # IMPORTANT!!! Whenever we update provider.yaml files, we MUST clear cache for + # get_provider_packages_metadata function, because otherwise anything next will not use it + get_provider_packages_metadata.cache_clear() + get_console().print(f"[special]Bumped version to {v}\n") + return with_breaking_changes, maybe_with_new_features + + +def _verify_changelog_exists(package: str) -> Path: + provider_details = get_provider_details(package) + changelog_path = Path(provider_details.source_provider_package_path) / "CHANGELOG.rst" + if not os.path.isfile(changelog_path): + get_console().print(f"\n[error]ERROR: Missing {changelog_path}[/]\n") + get_console().print("[info]Please add the file with initial content:") + get_console().print("----- START COPYING AFTER THIS LINE ------- ") + processed_changelog = jinja2.Template(INITIAL_CHANGELOG_CONTENT, autoescape=True).render( + package_name=provider_details.pypi_package_name, + ) + syntax = Syntax( + processed_changelog, + "rst", + theme="ansi_dark", + ) + get_console().print(syntax) + get_console().print("----- END COPYING BEFORE THIS LINE ------- ") + sys.exit(1) + return changelog_path + + +def _convert_pip_requirements_to_table(requirements: Iterable[str], markdown: bool = True) -> str: + """ + Converts PIP requirement list to a Markdown table. + :param requirements: requirements list + :param markdown: if True, Markdown format is used else rst + :return: formatted table + """ + from tabulate import tabulate + + headers = ["PIP package", "Version required"] + table_data = [] + for dependency in requirements: + found = re.match(r"(^[^<=>~]*)([^<=>~]?.*)$", dependency) + if found: + package = found.group(1) + version_required = found.group(2) + if version_required != "": + version_required = f"`{version_required}`" if markdown else f"``{version_required}``" + table_data.append((f"`{package}`" if markdown else f"``{package}``", version_required)) + else: + table_data.append((dependency, "")) + return tabulate(table_data, headers=headers, tablefmt="pipe" if markdown else "rst") + + +def _convert_cross_package_dependencies_to_table( + cross_package_dependencies: list[str], + markdown: bool = True, +) -> str: + """ + Converts cross-package dependencies to a Markdown table + :param cross_package_dependencies: list of cross-package dependencies + :param markdown: if True, Markdown format is used else rst + :return: formatted table + """ + from tabulate import tabulate + + headers = ["Dependent package", "Extra"] + table_data = [] + prefix = "apache-airflow-providers-" + base_url = "https://airflow.apache.org/docs/" + for dependency in cross_package_dependencies: + pip_package_name = f"{prefix}{dependency.replace('.','-')}" + url_suffix = f"{dependency.replace('.','-')}" + if markdown: + url = f"[{pip_package_name}]({base_url}{url_suffix})" + else: + url = f"`{pip_package_name} <{base_url}{prefix}{url_suffix}>`_" + table_data.append((url, f"`{dependency}`" if markdown else f"``{dependency}``")) + return tabulate(table_data, headers=headers, tablefmt="pipe" if markdown else "rst") + + +def _get_cross_provider_dependent_packages(provider_package_id: str) -> list[str]: + if provider_package_id in get_removed_provider_ids(): + return [] + return PROVIDER_DEPENDENCIES[provider_package_id]["cross-providers-deps"] + + +def _get_additional_package_info(provider_package_path: Path) -> str: + """Returns additional info for the package. + + :param provider_package_path: path for the package + :return: additional information for the path (empty string if missing) + """ + additional_info_file_path = provider_package_path / "ADDITIONAL_INFO.md" + if additional_info_file_path.is_file(): + additional_info = additional_info_file_path.read_text() + additional_info_lines = additional_info.splitlines(keepends=True) + result = "" + skip_comment = True + for line in additional_info_lines: + if line.startswith(" -->"): + skip_comment = False + elif not skip_comment: + result += line + return result + return "" + + +def render_template( + template_name: str, + context: dict[str, Any], + extension: str, + autoescape: bool = True, + keep_trailing_newline: bool = False, +) -> str: + """ + Renders template based on its name. Reads the template from _TEMPLATE.md.jinja2 in current dir. + :param template_name: name of the template to use + :param context: Jinja2 context + :param extension: Target file extension + :param autoescape: Whether to autoescape HTML + :param keep_trailing_newline: Whether to keep the newline in rendered output + :return: rendered template + """ + import jinja2 + + template_loader = jinja2.FileSystemLoader( + searchpath=BREEZE_SOURCES_ROOT / "src" / "airflow_breeze" / "templates" + ) + template_env = jinja2.Environment( + loader=template_loader, + undefined=jinja2.StrictUndefined, + autoescape=autoescape, + keep_trailing_newline=keep_trailing_newline, + ) + template = template_env.get_template(f"{template_name}_TEMPLATE{extension}.jinja2") + content: str = template.render(context) + return content + + +def replace_content(file_path: Path, old_text: str, new_text: str, provider_id: str): + if new_text != old_text: + _, temp_file_path = tempfile.mkstemp() + try: + if file_path.is_file(): + copyfile(file_path, temp_file_path) + file_path.write_text(new_text) + get_console().print(f"\n[info]Generated {file_path} file for the {provider_id} provider\n") + if old_text != "": + run_command(["diff", "--color=always", temp_file_path, file_path.as_posix()], check=False) + finally: + os.unlink(temp_file_path) + + +def _update_file( + context: dict[str, Any], + template_name: str, + extension: str, + file_name: str, + provider_package_id: str, + target_path: Path, + regenerate_missing_docs: bool, +) -> None: + target_file_path = target_path / file_name + if regenerate_missing_docs and target_file_path.exists(): + if get_verbose(): + get_console().print( + f"[warnings]The {target_file_path} exists - not regenerating it " + f"for the provider {provider_package_id}[/]" + ) + return + new_text = render_template( + template_name=template_name, context=context, extension=extension, keep_trailing_newline=True + ) + target_file_path = target_path / file_name + old_text = "" + if target_file_path.is_file(): + old_text = target_file_path.read_text() + replace_content(target_file_path, old_text, new_text, provider_package_id) + index_path = target_path / "index.rst" + if not index_path.exists(): + get_console().print(f"[error]ERROR! The index must exist for the provider docs: {index_path}") + raise PrepareReleaseDocsErrorOccurredException() + + expected_link_in_index = f"<{file_name.split('.')[0]}>" + if expected_link_in_index not in index_path.read_text(): + get_console().print( + f"\n[error]ERROR! The {index_path} must contain " + f"link to the generated documentation:[/]\n\n" + f"[warning]{expected_link_in_index}[/]\n\n" + f"[info]Please make sure to add it to {index_path}.\n" + ) + + get_console().print(f"[info]Checking for backticks correctly generated in: {target_file_path}") + match = BACKTICKS_CHECK.search(target_file_path.read_text()) + if match: + get_console().print( + f"\n[error]ERROR: Single backticks (`) found in {target_file_path}:[/]\n\n" + f"[warning]{match.group(0)}[/]\n\n" + f"[info]Please fix them by replacing with double backticks (``).[/]\n" + ) + raise PrepareReleaseDocsErrorOccurredException() + + # TODO: uncomment me. Linting revealed that our already generated provider docs have duplicate links + # in the generated files, we should fix those and uncomment linting as separate step - so that + # we do not hold current release for fixing the docs. + # console.print(f"Linting: {file_path}") + # errors = restructuredtext_lint.lint_file(file_path) + # real_errors = False + # if errors: + # for error in errors: + # # Skip known issue: linter with doc role similar to https://github.com/OCA/pylint-odoo/issues/38 + # if ( + # 'No role entry for "doc"' in error.message + # or 'Unknown interpreted text role "doc"' in error.message + # ): + # continue + # real_errors = True + # console.print(f"* [red] {error.message}") + # if real_errors: + # console.print(f"\n[red] Errors found in {file_path}") + # raise PrepareReleaseDocsErrorOccurredException() + + get_console().print(f"[success]Generated {target_file_path} for {provider_package_id} is OK[/]") + return + + +def _update_changelog_rst( + context: dict[str, Any], + provider_package_id: str, + target_path: Path, + regenerate_missing_docs: bool, +) -> None: + _update_file( + context=context, + template_name="PROVIDER_CHANGELOG", + extension=".rst", + file_name="changelog.rst", + provider_package_id=provider_package_id, + target_path=target_path, + regenerate_missing_docs=regenerate_missing_docs, + ) + + +def _update_commits_rst( + context: dict[str, Any], + provider_package_id: str, + target_path: Path, + regenerate_missing_docs: bool, +) -> None: + _update_file( + context=context, + template_name="PROVIDER_COMMITS", + extension=".rst", + file_name="commits.rst", + provider_package_id=provider_package_id, + target_path=target_path, + regenerate_missing_docs=regenerate_missing_docs, + ) + + +def update_release_notes( + provider_package_id: str, + reapply_templates_only: bool, + base_branch: str, + regenerate_missing_docs: bool, + non_interactive: bool, +) -> tuple[bool, bool]: + """Updates generated files. + + This includes the readme, changes, and/or setup.cfg/setup.py/manifest.in/provider_info. + + :param provider_package_id: id of the package + :param reapply_templates_only: regenerate already released documentation only - without updating versions + :param base_branch: base branch to check changes in apache remote for changes + :param regenerate_missing_docs: whether to regenerate missing docs + :param non_interactive: run in non-interactive mode (useful for CI) + :return: tuple of two bools: (with_breaking_change, maybe_with_new_features) + """ + proceed, list_of_list_of_changes, changes_as_table = _get_all_changes_for_package( + provider_package_id=provider_package_id, + base_branch=base_branch, + reapply_templates_only=reapply_templates_only, + ) + with_breaking_changes = False + maybe_with_new_features = False + if not reapply_templates_only: + if proceed: + if non_interactive: + answer = Answer.YES + else: + answer = user_confirm(f"Provider {provider_package_id} marked for release. Proceed?") + if answer == Answer.NO: + get_console().print( + f"\n[warning]Skipping provider: {provider_package_id} " f"on user request![/]\n" + ) + raise PrepareReleaseDocsUserSkippedException() + elif answer == Answer.QUIT: + raise PrepareReleaseDocsUserQuitException() + elif not list_of_list_of_changes: + get_console().print( + f"\n[warning]Provider: {provider_package_id} - " + f"skipping documentation generation. No changes![/]\n" + ) + raise PrepareReleaseDocsNoChangesException() + else: + type_of_change = _ask_the_user_for_the_type_of_changes(non_interactive=non_interactive) + if type_of_change == TypeOfChange.SKIP: + raise PrepareReleaseDocsUserSkippedException() + get_console().print( + f"[info]Provider {provider_package_id} has been classified as:[/]\n\n" + f"[special]{TYPE_OF_CHANGE_DESCRIPTION[type_of_change]}" + ) + get_console().print() + if type_of_change == TypeOfChange.DOCUMENTATION: + _mark_latest_changes_as_documentation_only(provider_package_id, list_of_list_of_changes) + elif type_of_change in [TypeOfChange.BUGFIX, TypeOfChange.FEATURE, TypeOfChange.BREAKING_CHANGE]: + with_breaking_changes, maybe_with_new_features = _update_version_in_provider_yaml( + provider_package_id=provider_package_id, type_of_change=type_of_change + ) + proceed, list_of_list_of_changes, changes_as_table = _get_all_changes_for_package( + provider_package_id=provider_package_id, + base_branch=base_branch, + reapply_templates_only=reapply_templates_only, + ) + provider_details = get_provider_details(provider_package_id) + _verify_changelog_exists(provider_details.provider_id) + jinja_context = get_provider_documentation_jinja_context( + provider_id=provider_package_id, + with_breaking_changes=with_breaking_changes, + maybe_with_new_features=maybe_with_new_features, + ) + jinja_context["DETAILED_CHANGES_RST"] = changes_as_table + jinja_context["DETAILED_CHANGES_PRESENT"] = bool(changes_as_table) + _update_changelog_rst( + jinja_context, + provider_package_id, + provider_details.documentation_provider_package_path, + regenerate_missing_docs, + ) + _update_commits_rst( + jinja_context, + provider_package_id, + provider_details.documentation_provider_package_path, + regenerate_missing_docs, + ) + return with_breaking_changes, maybe_with_new_features + + +def _find_insertion_index_for_version(content: list[str], version: str) -> tuple[int, bool]: + """Finds insertion index for the specified version from the .rst changelog content. + + :param content: changelog split into separate lines + :param version: version to look for + + :return: A 2-tuple. The first item indicates the insertion index, while the + second is a boolean indicating whether to append (False) or insert (True) + to the changelog. + """ + changelog_found = False + skip_next_line = False + index = 0 + for index, line in enumerate(content): + if not changelog_found and line.strip() == version: + changelog_found = True + skip_next_line = True + elif not skip_next_line and line and all(char == "." for char in line): + return index - 2, changelog_found + else: + skip_next_line = False + return index, changelog_found + + +def _get_changes_classified( + changes: list[Change], with_breaking_changes: bool, maybe_with_new_features: bool +) -> ClassifiedChanges: + """Pre-classifies changes based on commit message, it's wildly guessing now, + + The classification also includes the decision made by the release manager when classifying the release. + + However, if we switch to semantic commits, it could be automated. This list + is supposed to be manually reviewed and re-classified by release manager + anyway. + + :param changes: list of changes + :return: list of changes classified semi-automatically to the fix/feature/breaking/other buckets + """ + classified_changes = ClassifiedChanges() + for change in changes: + if "fix" in change.message.lower(): + classified_changes.fixes.append(change) + elif "add" in change.message.lower() and maybe_with_new_features: + classified_changes.features.append(change) + elif "breaking" in change.message.lower() and with_breaking_changes: + classified_changes.breaking_changes.append(change) + else: + classified_changes.other.append(change) + return classified_changes + + +def _generate_new_changelog( + package_id: str, + provider_details: ProviderPackageDetails, + changes: list[list[Change]], + context: dict[str, Any], + with_breaking_changes: bool, + maybe_with_new_features: bool, +): + latest_version = provider_details.versions[0] + current_changelog = provider_details.changelog_path.read_text() + current_changelog_lines = current_changelog.splitlines() + insertion_index, append = _find_insertion_index_for_version(current_changelog_lines, latest_version) + new_context = deepcopy(context) + if append: + if not changes: + get_console().print( + f"[success]The provider {package_id} changelog for `{latest_version}` " + "has first release. Not updating the changelog.[/]" + ) + return + new_changes = [ + change for change in changes[0] if change.pr and "(#" + change.pr + ")" not in current_changelog + ] + if not new_changes: + get_console().print( + f"[success]The provider {package_id} changelog for `{latest_version}` " + "has no new changes. Not updating the changelog.[/]" + ) + return + new_context["new_changes"] = new_changes + generated_new_changelog = render_template( + template_name="UPDATE_CHANGELOG", context=new_context, extension=".rst" + ) + else: + if changes: + classified_changes = _get_changes_classified( + changes[0], + with_breaking_changes=with_breaking_changes, + maybe_with_new_features=maybe_with_new_features, + ) + else: + # change log exist but without version 1.0.0 entry + classified_changes = None + + new_context.update( + { + "version": latest_version, + "version_header": "." * len(latest_version), + "classified_changes": classified_changes, + } + ) + generated_new_changelog = render_template( + template_name="CHANGELOG", context=new_context, extension=".rst" + ) + new_changelog_lines = current_changelog_lines[0:insertion_index] + new_changelog_lines.extend(generated_new_changelog.splitlines()) + new_changelog_lines.extend(current_changelog_lines[insertion_index:]) + diff = "\n".join(difflib.context_diff(current_changelog_lines, new_changelog_lines, n=5)) + syntax = Syntax(diff, "diff") + get_console().print(syntax) + if not append: + get_console().print( + f"[success]The provider {package_id} changelog for `{latest_version}` " + "version is missing. Generating fresh changelog.[/]" + ) + else: + get_console().print( + f"[success]Appending the provider {package_id} changelog for `{latest_version}` version.[/]" + ) + provider_details.changelog_path.write_text("\n".join(new_changelog_lines) + "\n") + + +def _update_index_rst( + context: dict[str, Any], + provider_package_id: str, + target_path: Path, +): + index_update = render_template( + template_name="PROVIDER_INDEX", context=context, extension=".rst", keep_trailing_newline=True + ) + index_file_path = target_path / "index.rst" + old_text = "" + if index_file_path.is_file(): + old_text = index_file_path.read_text() + new_text = deepcopy(old_text) + lines = old_text.splitlines(keepends=False) + for index, line in enumerate(lines): + if AUTOMATICALLY_GENERATED_MARKER in line: + new_text = "\n".join(lines[:index]) + new_text += "\n" + AUTOMATICALLY_GENERATED_CONTENT + "\n" + new_text += index_update + replace_content(index_file_path, old_text, new_text, provider_package_id) + + +def get_provider_documentation_jinja_context( + provider_id: str, with_breaking_changes: bool, maybe_with_new_features: bool +) -> dict[str, Any]: + provider_details = get_provider_details(provider_id) + current_release_version = provider_details.versions[0] + jinja_context = get_provider_jinja_context( + provider_id=provider_id, + current_release_version=current_release_version, + version_suffix="", + with_breaking_changes=with_breaking_changes, + maybe_with_new_features=maybe_with_new_features, + ) + jinja_context["ADDITIONAL_INFO"] = ( + _get_additional_package_info(provider_package_path=provider_details.source_provider_package_path), + ) + jinja_context["CROSS_PROVIDERS_DEPENDENCIES"] = _get_cross_provider_dependent_packages(provider_id) + cross_providers_dependencies = _get_cross_provider_dependent_packages(provider_package_id=provider_id) + jinja_context["CROSS_PROVIDERS_DEPENDENCIES_TABLE"] = _convert_cross_package_dependencies_to_table( + cross_providers_dependencies + ) + jinja_context["CROSS_PROVIDERS_DEPENDENCIES_TABLE_RST"] = _convert_cross_package_dependencies_to_table( + cross_providers_dependencies, markdown=False + ) + jinja_context["PIP_REQUIREMENTS_TABLE"] = _convert_pip_requirements_to_table( + get_provider_requirements(provider_id) + ) + jinja_context["PIP_REQUIREMENTS_TABLE_RST"] = _convert_pip_requirements_to_table( + get_provider_requirements(provider_id), markdown=False + ) + return jinja_context + + +def update_changelog( + package_id: str, + base_branch: str, + reapply_templates_only: bool, + with_breaking_changes: bool, + maybe_with_new_features: bool, +): + """Internal update changelog method. + + :param package_id: package id + :param base_branch: base branch to check changes in apache remote for changes + :param reapply_templates_only: only reapply templates, no changelog generation + :param with_breaking_changes: whether there are any breaking changes + :param maybe_with_new_features: whether there are any new features + """ + provider_details = get_provider_details(package_id) + jinja_context = get_provider_documentation_jinja_context( + provider_id=package_id, + with_breaking_changes=with_breaking_changes, + maybe_with_new_features=maybe_with_new_features, + ) + proceed, changes, _ = _get_all_changes_for_package( + provider_package_id=package_id, base_branch=base_branch, reapply_templates_only=reapply_templates_only + ) + if not proceed: + get_console().print( + f"[warning]The provider {package_id} is not being released. Skipping the package.[/]" + ) + raise PrepareReleaseDocsNoChangesException() + if reapply_templates_only: + get_console().print("[info]Only reapply templates, no changelog update[/]") + else: + _generate_new_changelog( + package_id=package_id, + provider_details=provider_details, + changes=changes, + context=jinja_context, + with_breaking_changes=with_breaking_changes, + maybe_with_new_features=maybe_with_new_features, + ) + get_console().print(f"\n[info]Update index.rst for {package_id}\n") + _update_index_rst(jinja_context, package_id, provider_details.documentation_provider_package_path) + + +def _generate_init_py_file_for_provider( + context: dict[str, Any], + target_path: Path, +): + init_py_content = black_format( + render_template( + template_name="PROVIDER__INIT__PY", + context=context, + extension=".py", + keep_trailing_newline=True, + ) + ) + init_py_path = target_path / "__init__.py" + init_py_path.write_text(init_py_content) + + +def _replace_min_airflow_version_in_provider_yaml( + context: dict[str, Any], + target_path: Path, +): + provider_yaml_path = target_path / "provider.yaml" + provider_yaml_txt = provider_yaml_path.read_text() + provider_yaml_txt = re.sub( + r" {2}- apache-airflow>=.*", + f" - apache-airflow>={context['MIN_AIRFLOW_VERSION']}", + provider_yaml_txt, + ) + provider_yaml_path.write_text(provider_yaml_txt) + # IMPORTANT!!! Whenever we update provider.yaml files, we MUST clear cache for + # get_provider_packages_metadata function, because otherwise anything next will not use it + get_provider_packages_metadata.cache_clear() + + +def update_min_airflow_version( + provider_package_id: str, with_breaking_changes: bool, maybe_with_new_features: bool +): + """Updates min airflow version in provider yaml and __init__.py + + :param provider_package_id: provider package id + :param with_breaking_changes: whether there are any breaking changes + :param maybe_with_new_features: whether there are any new features + :return: + """ + provider_details = get_provider_details(provider_package_id) + jinja_context = get_provider_documentation_jinja_context( + provider_id=provider_package_id, + with_breaking_changes=with_breaking_changes, + maybe_with_new_features=maybe_with_new_features, + ) + _generate_init_py_file_for_provider( + context=jinja_context, + target_path=provider_details.source_provider_package_path, + ) + _replace_min_airflow_version_in_provider_yaml( + context=jinja_context, target_path=provider_details.source_provider_package_path + ) diff --git a/dev/provider_packages/CHANGELOG_TEMPLATE.rst.jinja2 b/dev/breeze/src/airflow_breeze/templates/CHANGELOG_TEMPLATE.rst.jinja2 similarity index 94% rename from dev/provider_packages/CHANGELOG_TEMPLATE.rst.jinja2 rename to dev/breeze/src/airflow_breeze/templates/CHANGELOG_TEMPLATE.rst.jinja2 index b8c5cbe2653ff..40699b0208e23 100644 --- a/dev/provider_packages/CHANGELOG_TEMPLATE.rst.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/CHANGELOG_TEMPLATE.rst.jinja2 @@ -26,17 +26,23 @@ {{ version }} {{ version_header }} +{%- if WITH_BREAKING_CHANGES %} + Breaking changes ~~~~~~~~~~~~~~~~ {% for breaking_change in classified_changes.breaking_changes %} * ``{{ breaking_change.message_without_backticks | safe }}`` {%- endfor %} +{%- endif %} + +{%- if MAYBE_WITH_NEW_FEATURES %} Features ~~~~~~~~ {% for feature in classified_changes.features %} * ``{{ feature.message_without_backticks | safe }}`` {%- endfor %} +{%- endif %} Bug Fixes ~~~~~~~~~ diff --git a/dev/provider_packages/PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2 b/dev/breeze/src/airflow_breeze/templates/PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2 similarity index 99% rename from dev/provider_packages/PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2 rename to dev/breeze/src/airflow_breeze/templates/PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2 index 086599b381e25..43e4aec48b827 100644 --- a/dev/provider_packages/PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2 @@ -21,7 +21,7 @@ IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE `PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/provider_packages` DIRECTORY --#} +#} .. Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information diff --git a/dev/provider_packages/PROVIDER_COMMITS_TEMPLATE.rst.jinja2 b/dev/breeze/src/airflow_breeze/templates/PROVIDER_COMMITS_TEMPLATE.rst.jinja2 similarity index 98% rename from dev/provider_packages/PROVIDER_COMMITS_TEMPLATE.rst.jinja2 rename to dev/breeze/src/airflow_breeze/templates/PROVIDER_COMMITS_TEMPLATE.rst.jinja2 index fdf0d3ff1e204..fd5e69ee972b3 100644 --- a/dev/provider_packages/PROVIDER_COMMITS_TEMPLATE.rst.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/PROVIDER_COMMITS_TEMPLATE.rst.jinja2 @@ -22,7 +22,7 @@ IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE `PROVIDER_INDEX_TEMPLATE.rst.jinja2` IN the `dev/provider_packages` DIRECTORY --#} +#} .. Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information @@ -47,7 +47,7 @@ Package {{ PACKAGE_PIP_NAME }} {{ PROVIDER_DESCRIPTION | safe }} -This is detailed commit list of changes for versions provider package: ``{{PROVIDER_PACKAGE_ID}}``. +This is detailed commit list of changes for versions provider package: ``{{PROVIDER_ID}}``. For high-level changelog, see :doc:`package information including changelog `. {%- if DETAILED_CHANGES_PRESENT %} diff --git a/dev/provider_packages/PROVIDER_INDEX_TEMPLATE.rst.jinja2 b/dev/breeze/src/airflow_breeze/templates/PROVIDER_INDEX_TEMPLATE.rst.jinja2 similarity index 97% rename from dev/provider_packages/PROVIDER_INDEX_TEMPLATE.rst.jinja2 rename to dev/breeze/src/airflow_breeze/templates/PROVIDER_INDEX_TEMPLATE.rst.jinja2 index 4e7943ced2cb8..cb2bf14ad00de 100644 --- a/dev/provider_packages/PROVIDER_INDEX_TEMPLATE.rst.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/PROVIDER_INDEX_TEMPLATE.rst.jinja2 @@ -42,7 +42,7 @@ Release: {{ RELEASE }}{{ VERSION_SUFFIX }} Provider package ---------------- -This is a provider package for ``{{PROVIDER_PACKAGE_ID}}`` provider. All classes for this provider package +This is a provider package for ``{{PROVIDER_ID}}`` provider. All classes for this provider package are in ``{{FULL_PACKAGE_NAME}}`` python package. {%- if PROVIDER_REMOVED %} diff --git a/dev/breeze/src/airflow_breeze/templates/PROVIDER_README_TEMPLATE.rst.jinja2 b/dev/breeze/src/airflow_breeze/templates/PROVIDER_README_TEMPLATE.rst.jinja2 new file mode 100644 index 0000000000000..3c77f8017e00d --- /dev/null +++ b/dev/breeze/src/airflow_breeze/templates/PROVIDER_README_TEMPLATE.rst.jinja2 @@ -0,0 +1,109 @@ +{# + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE + OVERWRITTEN WHEN PREPARING PACKAGES. + + IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_INDEX_TEMPLATE.rst.jinja2` IN the `dev/provider_packages` DIRECTORY + +#} + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +Package ``{{ PACKAGE_PIP_NAME }}`` + +Release: ``{{ RELEASE }}{{ VERSION_SUFFIX }}`` + + +{{ PROVIDER_DESCRIPTION | safe }} + +Provider package +---------------- + +This is a provider package for ``{{PROVIDER_ID}}`` provider. All classes for this provider package +are in ``{{FULL_PACKAGE_NAME}}`` python package. + +You can find package information and changelog for the provider +in the `documentation `_. + +{%- if PROVIDER_REMOVED %} + + .. warning:: + + This provider is not maintained anymore by the community. It has been removed and is not going to be + updated anymore. The removal was done according to the process described in + `Removing community providers `_ + + Feel free to contact Airflow Development Mailing List if you have any questions. + +{%- endif %} + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below +for the minimum Airflow version supported) via +``pip install {{PACKAGE_PIP_NAME}}`` + +The package supports the following python versions: {{ ",".join(SUPPORTED_PYTHON_VERSIONS) }} + +{%- if PIP_REQUIREMENTS %} + +Requirements +------------ + +{{ PIP_REQUIREMENTS_TABLE_RST | safe }} + +{%- endif %} +{%- if CROSS_PROVIDERS_DEPENDENCIES %} + +Cross provider package dependencies +----------------------------------- + +Those are dependencies that might be needed in order to use all the features of the package. +You need to install the specified provider packages in order to use them. + +You can install such cross-provider dependencies when installing from PyPI. For example: + +.. code-block:: bash + + pip install {{ PACKAGE_PIP_NAME }}[{{ CROSS_PROVIDERS_DEPENDENCIES[0] }}] + + +{{ CROSS_PROVIDERS_DEPENDENCIES_TABLE_RST | safe }} + +{%- endif %} + +The changelog for the provider package can be found in the +`changelog `_. diff --git a/dev/provider_packages/PROVIDER__INIT__PY_TEMPLATE.py.jinja2 b/dev/breeze/src/airflow_breeze/templates/PROVIDER__INIT__PY_TEMPLATE.py.jinja2 similarity index 94% rename from dev/provider_packages/PROVIDER__INIT__PY_TEMPLATE.py.jinja2 rename to dev/breeze/src/airflow_breeze/templates/PROVIDER__INIT__PY_TEMPLATE.py.jinja2 index c057d63e3421b..fa408207987c5 100644 --- a/dev/provider_packages/PROVIDER__INIT__PY_TEMPLATE.py.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/PROVIDER__INIT__PY_TEMPLATE.py.jinja2 @@ -58,5 +58,5 @@ if packaging.version.parse(packaging.version.parse(airflow_version).base_version "{{ MIN_AIRFLOW_VERSION }}" ): raise RuntimeError( - f"The package `{{ PACKAGE_PIP_NAME }}:{__version__}` requires Apache Airflow {{ MIN_AIRFLOW_VERSION }}+" # NOQA: E501 + f"The package `{{ PACKAGE_PIP_NAME }}:{__version__}` needs Apache Airflow {{ MIN_AIRFLOW_VERSION }}+" ) diff --git a/dev/provider_packages/UPDATE_CHANGELOG_TEMPLATE.rst.jinja2 b/dev/breeze/src/airflow_breeze/templates/UPDATE_CHANGELOG_TEMPLATE.rst.jinja2 similarity index 100% rename from dev/provider_packages/UPDATE_CHANGELOG_TEMPLATE.rst.jinja2 rename to dev/breeze/src/airflow_breeze/templates/UPDATE_CHANGELOG_TEMPLATE.rst.jinja2 diff --git a/dev/breeze/src/airflow_breeze/utils/add_back_references.py b/dev/breeze/src/airflow_breeze/utils/add_back_references.py index 108c3599059ab..ba18679ecaf6d 100644 --- a/dev/breeze/src/airflow_breeze/utils/add_back_references.py +++ b/dev/breeze/src/airflow_breeze/utils/add_back_references.py @@ -128,25 +128,25 @@ def generate_back_references(link: str, base_path: Path): create_back_reference_html(relative_path, dest_file_path) -def start_generating_back_references(airflow_site_directory: Path, short_provider_package_ids: list[str]): +def start_generating_back_references(airflow_site_directory: Path, short_provider_ids: list[str]): docs_archive_path = airflow_site_directory / "docs-archive" airflow_docs_path = docs_archive_path / "apache-airflow" helm_docs_path = docs_archive_path / "helm-chart" - if "apache-airflow" in short_provider_package_ids: + if "apache-airflow" in short_provider_ids: generate_back_references(airflow_redirects_link, airflow_docs_path) - short_provider_package_ids.remove("apache-airflow") - if "helm-chart" in short_provider_package_ids: + short_provider_ids.remove("apache-airflow") + if "helm-chart" in short_provider_ids: generate_back_references(helm_redirects_link, helm_docs_path) - short_provider_package_ids.remove("helm-chart") - if "docker-stack" in short_provider_package_ids: + short_provider_ids.remove("helm-chart") + if "docker-stack" in short_provider_ids: get_console().print("[info]Skipping docker-stack package. No back-reference needed.") - short_provider_package_ids.remove("docker-stack") - if "apache-airflow-providers" in short_provider_package_ids: + short_provider_ids.remove("docker-stack") + if "apache-airflow-providers" in short_provider_ids: get_console().print("[info]Skipping apache-airflow-providers package. No back-reference needed.") - short_provider_package_ids.remove("apache-airflow-providers") - if short_provider_package_ids: + short_provider_ids.remove("apache-airflow-providers") + if short_provider_ids: all_providers = [ - f"apache-airflow-providers-{package.replace('.','-')}" for package in short_provider_package_ids + f"apache-airflow-providers-{package.replace('.','-')}" for package in short_provider_ids ] for p in all_providers: get_console().print(f"Processing airflow provider: {p}") diff --git a/dev/breeze/src/airflow_breeze/utils/black_utils.py b/dev/breeze/src/airflow_breeze/utils/black_utils.py new file mode 100644 index 0000000000000..23891b8206c94 --- /dev/null +++ b/dev/breeze/src/airflow_breeze/utils/black_utils.py @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import os +from functools import lru_cache + +from black import Mode, TargetVersion, format_str, parse_pyproject_toml + +from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT + + +@lru_cache(maxsize=None) +def _black_mode() -> Mode: + config = parse_pyproject_toml(os.path.join(AIRFLOW_SOURCES_ROOT, "pyproject.toml")) + target_versions = {TargetVersion[val.upper()] for val in config.get("target_version", ())} + return Mode( + target_versions=target_versions, + line_length=config.get("line_length", Mode.line_length), + ) + + +def black_format(content) -> str: + return format_str(content, mode=_black_mode()) diff --git a/dev/breeze/src/airflow_breeze/utils/console.py b/dev/breeze/src/airflow_breeze/utils/console.py index c1a14c86f5240..d579e58176f9b 100644 --- a/dev/breeze/src/airflow_breeze/utils/console.py +++ b/dev/breeze/src/airflow_breeze/utils/console.py @@ -43,6 +43,7 @@ def get_theme() -> Theme: "info": "bold", "warning": "italic", "error": "italic underline", + "special": "bold italic underline", } ) except ImportError: @@ -56,6 +57,7 @@ def get_theme() -> Theme: "info": "bright_blue", "warning": "bright_yellow", "error": "red", + "special": "magenta", } ) @@ -65,6 +67,7 @@ class MessageType(Enum): INFO = "info" WARNING = "warning" ERROR = "error" + SPECIAL = "special" def message_type_from_return_code(return_code: int) -> MessageType: diff --git a/dev/breeze/src/airflow_breeze/utils/packages.py b/dev/breeze/src/airflow_breeze/utils/packages.py index d3b6b81e840eb..3f6d6dba06f9a 100644 --- a/dev/breeze/src/airflow_breeze/utils/packages.py +++ b/dev/breeze/src/airflow_breeze/utils/packages.py @@ -19,18 +19,181 @@ import fnmatch import json -from typing import Iterable +import os +from enum import Enum +from functools import lru_cache +from pathlib import Path +from typing import Any, Iterable, NamedTuple -from airflow_breeze.global_constants import REGULAR_DOC_PACKAGES -from airflow_breeze.utils.path_utils import PROVIDER_DEPENDENCIES_JSON_FILE_PATH -from airflow_breeze.utils.suspended_providers import get_removed_provider_ids +from airflow_breeze.global_constants import ( + ALLOWED_PYTHON_MAJOR_MINOR_VERSIONS, + PROVIDER_DEPENDENCIES, + PROVIDER_RUNTIME_DATA_SCHEMA_PATH, + REGULAR_DOC_PACKAGES, +) +from airflow_breeze.utils.console import get_console +from airflow_breeze.utils.path_utils import ( + AIRFLOW_PROVIDERS_ROOT, + DOCS_ROOT, + PROVIDER_DEPENDENCIES_JSON_FILE_PATH, +) +from airflow_breeze.utils.publish_docs_helpers import ( + _filepath_to_module, + _filepath_to_system_tests, + _load_schema, + get_provider_yaml_paths, +) +from airflow_breeze.utils.versions import strip_leading_zeros_from_version + +MIN_AIRFLOW_VERSION = "2.5.0" LONG_PROVIDERS_PREFIX = "apache-airflow-providers-" +# TODO: use single source of truth for those +# for now we need to keep them in sync with the ones in setup.py +PREINSTALLED_PROVIDERS = [ + # Until we cut off the 2.8.0 branch and bump current airflow version to 2.9.0, we should + # Keep common.io commented out in order ot be able to generate PyPI constraints because + # The version from PyPI has requirement of apache-airflow>=2.8.0 + # "common.io", + "common.sql", + "ftp", + "http", + "imap", + "sqlite", +] + + +class EntityType(Enum): + Operators = "Operators" + Transfers = "Transfers" + Sensors = "Sensors" + Hooks = "Hooks" + Secrets = "Secrets" + + +class PluginInfo(NamedTuple): + name: str + package_name: str + class_name: str + + +class ProviderPackageDetails(NamedTuple): + provider_id: str + full_package_name: str + pypi_package_name: str + source_provider_package_path: Path + documentation_provider_package_path: Path + changelog_path: Path + provider_description: str + dependencies: list[str] + versions: list[str] + excluded_python_versions: list[str] + plugins: list[PluginInfo] + removed: bool + + +@lru_cache +def get_provider_packages_metadata() -> dict[str, dict[str, Any]]: + """ + Load all data from providers files + + :return: A list containing the contents of all provider.yaml files. + """ + import jsonschema + import yaml + + schema = _load_schema() + result: dict[str, dict[str, Any]] = {} + for provider_yaml_path in get_provider_yaml_paths(): + with open(provider_yaml_path) as yaml_file: + provider = yaml.safe_load(yaml_file) + try: + jsonschema.validate(provider, schema=schema) + except jsonschema.ValidationError: + raise Exception(f"Unable to parse: {provider_yaml_path}.") + provider_yaml_dir = os.path.dirname(provider_yaml_path) + provider["python-module"] = _filepath_to_module(provider_yaml_dir) + provider["package-dir"] = provider_yaml_dir + provider["system-tests-dir"] = _filepath_to_system_tests(provider_yaml_dir) + result[get_short_package_name(provider["package-name"])] = provider + return result + + +def validate_provider_info_with_runtime_schema(provider_info: dict[str, Any]) -> None: + """Validates provider info against the runtime schema. + + This way we check if the provider info in the packages is future-compatible. + The Runtime Schema should only change when there is a major version change. + + :param provider_info: provider info to validate + """ + import jsonschema + + schema = json.loads(PROVIDER_RUNTIME_DATA_SCHEMA_PATH.read_text()) + try: + jsonschema.validate(provider_info, schema=schema) + except jsonschema.ValidationError as ex: + get_console().print("[red]Provider info not validated against runtime schema[/]") + raise Exception( + "Error when validating schema. The schema must be compatible with " + "airflow/provider_info.schema.json.", + ex, + ) + + +def get_provider_info_dict(provider_id: str) -> dict[str, Any]: + """Retrieves provider info from the provider yaml file. + :param provider_id: package id to retrieve provider.yaml from + :return: provider_info dictionary + """ + provider_yaml_dict = get_provider_packages_metadata().get(provider_id) + if provider_yaml_dict: + validate_provider_info_with_runtime_schema(provider_yaml_dict) + return provider_yaml_dict or {} + + +@lru_cache +def get_suspended_provider_ids() -> list[str]: + return [ + provider_id + for provider_id, provider_metadata in get_provider_packages_metadata().items() + if provider_metadata.get("suspended", False) + ] + + +@lru_cache +def get_suspended_provider_folders() -> list[str]: + return [provider_id.replace(".", "/") for provider_id in get_suspended_provider_ids()] + + +@lru_cache +def get_removed_provider_ids() -> list[str]: + return [ + provider_id + for provider_id, provider_metadata in get_provider_packages_metadata().items() + if provider_metadata.get("removed", False) + ] + + +def get_provider_requirements(provider_id: str) -> list[str]: + package_metadata = get_provider_packages_metadata().get(provider_id) + return package_metadata["dependencies"] if package_metadata else [] + + +@lru_cache def get_available_packages( include_non_provider_doc_packages: bool = False, include_all_providers: bool = False ) -> list[str]: + """ + Return provider ids for all packages that are available currently (not suspended). + + :param include_non_provider_doc_packages: whether the non-provider doc packages should be included + (packages like apache-airflow, helm-chart, docker-stack) + :param include_all_providers: whether "all-providers" should be included ni the list. + + """ provider_ids: list[str] = list(json.loads(PROVIDER_DEPENDENCIES_JSON_FILE_PATH.read_text()).keys()) available_packages = [] if include_non_provider_doc_packages: @@ -42,6 +205,7 @@ def get_available_packages( def expand_all_provider_packages(short_doc_packages: tuple[str, ...]) -> tuple[str, ...]: + """In case there are "all-providers" in the list, expand the list with all providers.""" if "all-providers" in short_doc_packages: packages = [package for package in short_doc_packages if package != "all-providers"] packages.extend(get_available_packages()) @@ -52,31 +216,58 @@ def expand_all_provider_packages(short_doc_packages: tuple[str, ...]) -> tuple[s def get_long_package_names(short_form_providers: Iterable[str]) -> tuple[str, ...]: providers: list[str] = [] for short_form_provider in short_form_providers: - if short_form_provider in REGULAR_DOC_PACKAGES: - providers.append(short_form_provider) - continue - short_form_provider.split(".") - parts = "-".join(short_form_provider.split(".")) - providers.append(LONG_PROVIDERS_PREFIX + parts) + long_package_name = get_long_package_name(short_form_provider) + providers.append(long_package_name) + return tuple(providers) + + +def get_long_package_name(short_form_provider: str) -> str: + if short_form_provider in REGULAR_DOC_PACKAGES: + long_package_name = short_form_provider + else: + long_package_name = LONG_PROVIDERS_PREFIX + "-".join(short_form_provider.split(".")) + return long_package_name + + +def get_short_package_names(long_form_providers: Iterable[str]) -> tuple[str, ...]: + providers: list[str] = [] + for long_form_provider in long_form_providers: + providers.append(get_short_package_name(long_form_provider)) return tuple(providers) -def convert_to_long_package_names( - package_filters: tuple[str, ...], packages_short_form: tuple[str, ...] +def get_short_package_name(long_form_provider: str) -> str: + if long_form_provider in REGULAR_DOC_PACKAGES: + return long_form_provider + else: + if not long_form_provider.startswith(LONG_PROVIDERS_PREFIX): + raise ValueError( + f"Invalid provider name: {long_form_provider}. " f"Should start with {LONG_PROVIDERS_PREFIX}" + ) + return long_form_provider[len(LONG_PROVIDERS_PREFIX) :].replace("-", ".") + + +def find_matching_long_package_names( + short_packages: tuple[str, ...], + filters: tuple[str, ...] | None = None, ) -> tuple[str, ...]: - """Filters the package list against a set of filters. + """Finds matching long package names based on short package name and package filters specified. + + The sequence of specified packages / filters is kept (filters first, packages next). In case there + are filters that do not match any of the packages error is raised. - A packet is returned if it matches at least one filter. The function keeps the order of the packages. + :param short_packages: short forms of package names + :param filters: package filters specified """ available_doc_packages = list( get_long_package_names(get_available_packages(include_non_provider_doc_packages=True)) ) - if not package_filters and not packages_short_form: - available_doc_packages.extend(package_filters) + if not filters and not short_packages: + available_doc_packages.extend(filters or ()) return tuple(set(available_doc_packages)) - processed_package_filters = list(package_filters) - processed_package_filters.extend(get_long_package_names(packages_short_form)) + processed_package_filters = list(filters or ()) + processed_package_filters.extend(get_long_package_names(short_packages)) removed_packages: list[str] = [ f"apache-airflow-providers-{provider.replace('.','-')}" for provider in get_removed_provider_ids() @@ -99,3 +290,201 @@ def convert_to_long_package_names( if any(fnmatch.fnmatch(p, f) for f in processed_package_filters) ] ) + + +def get_source_package_path(provider_id: str) -> Path: + return AIRFLOW_PROVIDERS_ROOT.joinpath(*provider_id.split(".")) + + +def get_documentation_package_path(provider_id: str) -> Path: + return DOCS_ROOT / f"apache-airflow-providers-{provider_id.replace('.', '-')}" + + +def get_pip_package_name(provider_id: str) -> str: + """ + Returns PIP package name for the package id. + + :param provider_id: id of the package + :return: the name of pip package + """ + return "apache-airflow-providers-" + provider_id.replace(".", "-") + + +def get_wheel_package_name(provider_id: str) -> str: + """ + Returns Wheel package name prefix for the package id. + + :param provider_id: id of the package + :return: the name of wheel package prefix + """ + return "apache_airflow_providers_" + provider_id.replace(".", "_") + + +def get_install_requirements(provider_id: str, version_suffix: str) -> str: + """ + Returns install requirements for the package. + + :param provider_id: id of the provider package + :param version_suffix: optional version suffix for packages + + :return: install requirements of the package + """ + + def apply_version_suffix(install_clause: str) -> str: + if install_clause.startswith("apache-airflow") and ">=" in install_clause and version_suffix != "": + # This is workaround for `pip` way of handling `--pre` installation switch. It apparently does + # not modify the meaning of `install_requires` to include also pre-releases, so we need to + # modify our internal provider and airflow package version references to include all pre-releases + # including all development releases. When you specify dependency as >= X.Y.Z, and you + # have packages X.Y.Zdev0 or X.Y.Zrc1 in a local file, such package is not considered + # as fulfilling the requirement even if `--pre` switch is used. + return install_clause + ".dev0" + return install_clause + + if provider_id in get_removed_provider_ids(): + dependencies = get_provider_requirements(provider_id) + else: + dependencies = PROVIDER_DEPENDENCIES.get(provider_id)["deps"] + install_requires = [apply_version_suffix(clause) for clause in dependencies] + return "".join(f"\n {ir}" for ir in install_requires) + + +def get_package_extras(provider_id: str) -> dict[str, list[str]]: + """ + Finds extras for the package specified. + + :param provider_id: id of the package + """ + if provider_id == "providers": + return {} + if provider_id in get_removed_provider_ids(): + return {} + extras_dict: dict[str, list[str]] = { + module: [get_pip_package_name(module)] + for module in PROVIDER_DEPENDENCIES.get(provider_id)["cross-providers-deps"] + } + provider_yaml_dict = get_provider_packages_metadata().get(provider_id) + additional_extras = provider_yaml_dict.get("additional-extras") if provider_yaml_dict else None + if additional_extras: + for entry in additional_extras: + name = entry["name"] + dependencies = entry["dependencies"] + if name in extras_dict: + # remove non-versioned dependencies if versioned ones are coming + existing_dependencies = set(extras_dict[name]) + for new_dependency in dependencies: + for dependency in existing_dependencies: + # remove extra if exists as non-versioned one + if new_dependency.startswith(dependency): + extras_dict[name].remove(dependency) + break + extras_dict[name].append(new_dependency) + else: + extras_dict[name] = dependencies + return extras_dict + + +def get_provider_details(provider_id: str) -> ProviderPackageDetails: + provider_info = get_provider_packages_metadata().get(provider_id) + if not provider_info: + raise RuntimeError(f"The provider {provider_id} has no provider.yaml defined.") + plugins: list[PluginInfo] = [] + if "plugins" in provider_info: + for plugin in provider_info["plugins"]: + package_name, class_name = plugin["plugin-class"].rsplit(".", maxsplit=1) + plugins.append( + PluginInfo( + name=plugin["name"], + package_name=package_name, + class_name=class_name, + ) + ) + return ProviderPackageDetails( + provider_id=provider_id, + full_package_name=f"airflow.providers.{provider_id}", + pypi_package_name=f"apache-airflow-providers-{provider_id.replace('.', '-')}", + source_provider_package_path=get_source_package_path(provider_id), + documentation_provider_package_path=get_documentation_package_path(provider_id), + changelog_path=get_source_package_path(provider_id) / "CHANGELOG.rst", + provider_description=provider_info["description"], + dependencies=provider_info["dependencies"], + versions=provider_info["versions"], + excluded_python_versions=provider_info.get("excluded-python-versions") or [], + plugins=plugins, + removed=provider_info.get("removed", False), + ) + + +def get_min_airflow_version(provider_id: str) -> str: + from packaging.version import Version as PackagingVersion + + provider_details = get_provider_details(provider_id=provider_id) + min_airflow_version = MIN_AIRFLOW_VERSION + for dependency in provider_details.dependencies: + if dependency.startswith("apache-airflow>="): + current_min_airflow_version = dependency.split(">=")[1] + if PackagingVersion(current_min_airflow_version) > PackagingVersion(MIN_AIRFLOW_VERSION): + min_airflow_version = current_min_airflow_version + return min_airflow_version + + +def get_python_requires(provider_id: str) -> str: + python_requires = "~=3.8" + provider_details = get_provider_details(provider_id=provider_id) + for p in provider_details.excluded_python_versions: + python_requires += f", !={p}" + return python_requires + + +def get_provider_jinja_context( + provider_id: str, + current_release_version: str, + version_suffix: str, + with_breaking_changes: bool, + maybe_with_new_features: bool, +): + provider_details = get_provider_details(provider_id=provider_id) + release_version_no_leading_zeros = strip_leading_zeros_from_version(current_release_version) + changelog = provider_details.changelog_path.read_text() + supported_python_versions = [ + p for p in ALLOWED_PYTHON_MAJOR_MINOR_VERSIONS if p not in provider_details.excluded_python_versions + ] + context: dict[str, Any] = { + "WITH_BREAKING_CHANGES": with_breaking_changes, + "MAYBE_WITH_NEW_FEATURES": maybe_with_new_features, + "ENTITY_TYPES": list(EntityType), + "README_FILE": "README.rst", + "PROVIDER_ID": provider_details.provider_id, + "PACKAGE_PIP_NAME": get_pip_package_name(provider_details.provider_id), + "PACKAGE_WHEEL_NAME": get_wheel_package_name(provider_details.provider_id), + "FULL_PACKAGE_NAME": provider_details.full_package_name, + "PROVIDER_PATH": provider_details.full_package_name.replace(".", "/"), + "RELEASE": current_release_version, + "RELEASE_NO_LEADING_ZEROS": release_version_no_leading_zeros, + "VERSION_SUFFIX": version_suffix or "", + "PIP_REQUIREMENTS": get_provider_requirements(provider_details.provider_id), + "PROVIDER_TYPE": "Provider", + "PROVIDERS_FOLDER": "providers", + "PROVIDER_DESCRIPTION": provider_details.provider_description, + "INSTALL_REQUIREMENTS": get_install_requirements( + provider_id=provider_details.provider_id, version_suffix=version_suffix + ), + "SETUP_REQUIREMENTS": """ + setuptools + wheel +""", + "EXTRAS_REQUIREMENTS": get_package_extras(provider_id=provider_details.provider_id), + "CHANGELOG_RELATIVE_PATH": os.path.relpath( + provider_details.source_provider_package_path, + provider_details.documentation_provider_package_path, + ), + "CHANGELOG": changelog, + "SUPPORTED_PYTHON_VERSIONS": supported_python_versions, + "PYTHON_REQUIRES": get_python_requires(provider_id), + "PLUGINS": provider_details.plugins, + "MIN_AIRFLOW_VERSION": get_min_airflow_version(provider_id), + "PREINSTALLED_PROVIDER": provider_details.provider_id in PREINSTALLED_PROVIDERS, + "PROVIDER_REMOVED": provider_details.removed, + "PROVIDER_INFO": get_provider_info_dict(provider_id), + } + return context diff --git a/dev/breeze/src/airflow_breeze/utils/parallel.py b/dev/breeze/src/airflow_breeze/utils/parallel.py index ea4ef06030ad0..ad7e197d1c5dd 100644 --- a/dev/breeze/src/airflow_breeze/utils/parallel.py +++ b/dev/breeze/src/airflow_breeze/utils/parallel.py @@ -228,7 +228,7 @@ def get_single_tuple_array(title: str, t: NamedTuple) -> Table: for key, value in t._asdict().items(): table.add_column(header=key, header_style="info") row.append(get_printable_value(key, value)) - table.add_row(*row, style="magenta") + table.add_row(*row, style="special") return table @@ -245,7 +245,7 @@ def get_multi_tuple_array(title: str, tuples: list[tuple[NamedTuple, ...]]) -> T for named_tuple in t: for key, value in named_tuple._asdict().items(): row.append(get_printable_value(key, value)) - table.add_row(*row, style="magenta") + table.add_row(*row, style="special") return table diff --git a/dev/breeze/src/airflow_breeze/utils/path_utils.py b/dev/breeze/src/airflow_breeze/utils/path_utils.py index 1abdddac97dd0..65a32286ced5e 100644 --- a/dev/breeze/src/airflow_breeze/utils/path_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/path_utils.py @@ -268,6 +268,7 @@ def find_airflow_sources_root_to_operate_on() -> Path: TESTS_PROVIDERS_ROOT = AIRFLOW_SOURCES_ROOT / "tests" / "providers" SYSTEM_TESTS_PROVIDERS_ROOT = AIRFLOW_SOURCES_ROOT / "tests" / "system" / "providers" AIRFLOW_PROVIDERS_ROOT = AIRFLOW_SOURCES_ROOT / "airflow" / "providers" +DOCS_ROOT = AIRFLOW_SOURCES_ROOT / "docs" BUILD_CACHE_DIR = AIRFLOW_SOURCES_ROOT / ".build" GENERATED_DIR = AIRFLOW_SOURCES_ROOT / "generated" CONSTRAINTS_CACHE_DIR = BUILD_CACHE_DIR / "constraints" diff --git a/dev/breeze/src/airflow_breeze/utils/publish_docs_builder.py b/dev/breeze/src/airflow_breeze/utils/publish_docs_builder.py index b8e64d66f3615..c3b380b07b61c 100644 --- a/dev/breeze/src/airflow_breeze/utils/publish_docs_builder.py +++ b/dev/breeze/src/airflow_breeze/utils/publish_docs_builder.py @@ -28,7 +28,8 @@ from airflow_breeze.utils.console import Output, get_console from airflow_breeze.utils.docs_errors import DocBuildError, parse_sphinx_warnings from airflow_breeze.utils.helm_chart_utils import chart_version -from airflow_breeze.utils.publish_docs_helpers import load_package_data, pretty_format_path +from airflow_breeze.utils.packages import get_provider_packages_metadata, get_short_package_name +from airflow_breeze.utils.publish_docs_helpers import pretty_format_path from airflow_breeze.utils.spelling_checks import SpellingError, parse_spelling_warnings PROCESS_TIMEOUT = 15 * 60 @@ -95,8 +96,7 @@ def _current_version(self): if self.package_name == "apache-airflow": return get_airflow_version() if self.package_name.startswith("apache-airflow-providers-"): - all_providers_yaml = load_package_data(include_suspended=True) - provider = next(p for p in all_providers_yaml if p["package-name"] == self.package_name) + provider = get_provider_packages_metadata().get(get_short_package_name(self.package_name)) return provider["versions"][0] if self.package_name == "helm-chart": return chart_version() diff --git a/dev/breeze/src/airflow_breeze/utils/publish_docs_helpers.py b/dev/breeze/src/airflow_breeze/utils/publish_docs_helpers.py index 80116471f7739..8c5d63748cb74 100644 --- a/dev/breeze/src/airflow_breeze/utils/publish_docs_helpers.py +++ b/dev/breeze/src/airflow_breeze/utils/publish_docs_helpers.py @@ -23,8 +23,6 @@ from pathlib import Path from typing import Any -import yaml - CONSOLE_WIDTH = 180 ROOT_DIR = Path(__file__).parents[5].resolve() @@ -56,33 +54,6 @@ def get_provider_yaml_paths(): return sorted(glob(f"{ROOT_DIR}/airflow/providers/**/provider.yaml", recursive=True)) -def load_package_data(include_suspended: bool = False) -> list[dict[str, Any]]: - """ - Load all data from providers files - - :return: A list containing the contents of all provider.yaml files. - """ - import jsonschema - - schema = _load_schema() - result = [] - for provider_yaml_path in get_provider_yaml_paths(): - with open(provider_yaml_path) as yaml_file: - provider = yaml.safe_load(yaml_file) - try: - jsonschema.validate(provider, schema=schema) - except jsonschema.ValidationError: - raise Exception(f"Unable to parse: {provider_yaml_path}.") - if provider["suspended"] and not include_suspended: - continue - provider_yaml_dir = os.path.dirname(provider_yaml_path) - provider["python-module"] = _filepath_to_module(provider_yaml_dir) - provider["package-dir"] = provider_yaml_dir - provider["system-tests-dir"] = _filepath_to_system_tests(provider_yaml_dir) - result.append(provider) - return result - - def pretty_format_path(path: str, start: str) -> str: """Formats path nicely.""" relpath = os.path.relpath(path, start) diff --git a/dev/breeze/src/airflow_breeze/utils/run_tests.py b/dev/breeze/src/airflow_breeze/utils/run_tests.py index 042c578fd95af..d2ce641fe4188 100644 --- a/dev/breeze/src/airflow_breeze/utils/run_tests.py +++ b/dev/breeze/src/airflow_breeze/utils/run_tests.py @@ -23,9 +23,9 @@ from subprocess import DEVNULL from airflow_breeze.utils.console import Output, get_console +from airflow_breeze.utils.packages import get_suspended_provider_folders from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT from airflow_breeze.utils.run_utils import run_command -from airflow_breeze.utils.suspended_providers import get_suspended_providers_folders def verify_an_image( @@ -103,7 +103,7 @@ def test_paths(test_type: str, backend: str, helm_test_package: str | None) -> t def get_suspended_provider_args() -> list[str]: pytest_args = [] - suspended_folders = get_suspended_providers_folders() + suspended_folders = get_suspended_provider_folders() for providers in suspended_folders: pytest_args.extend( [ diff --git a/dev/breeze/src/airflow_breeze/utils/run_utils.py b/dev/breeze/src/airflow_breeze/utils/run_utils.py index d78e402a48e0b..7b1bcd073eef5 100644 --- a/dev/breeze/src/airflow_breeze/utils/run_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/run_utils.py @@ -172,6 +172,8 @@ def shorten_command(_index: int, _argument: str) -> str: get_console(output=output).print( "[error]========================= STDERR end ==============================[/]" ) + if check: + raise return ex diff --git a/dev/breeze/src/airflow_breeze/utils/suspended_providers.py b/dev/breeze/src/airflow_breeze/utils/suspended_providers.py deleted file mode 100644 index be918e356572a..0000000000000 --- a/dev/breeze/src/airflow_breeze/utils/suspended_providers.py +++ /dev/null @@ -1,62 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -import sys - -from airflow_breeze.utils.console import get_console -from airflow_breeze.utils.path_utils import AIRFLOW_PROVIDERS_ROOT, AIRFLOW_SOURCES_ROOT - - -def get_suspended_providers_folders() -> list[str]: - """ - Returns a list of suspended providers folders that should be - skipped when running tests (without any prefix - for example apache/beam, yandex, google etc.). - """ - import yaml - - suspended_providers = [] - for provider_path in AIRFLOW_PROVIDERS_ROOT.rglob("provider.yaml"): - provider_yaml = yaml.safe_load(provider_path.read_text()) - if provider_yaml.get("suspended"): - suspended_providers.append( - provider_path.parent.relative_to(AIRFLOW_SOURCES_ROOT) - .as_posix() - .replace("airflow/providers/", "") - ) - return suspended_providers - - -def get_removed_provider_ids() -> list[str]: - """ - Yields the ids of suspended providers. - """ - import yaml - - removed_provider_ids = [] - for provider_path in AIRFLOW_PROVIDERS_ROOT.rglob("provider.yaml"): - provider_yaml = yaml.safe_load(provider_path.read_text()) - package_name = provider_yaml.get("package-name") - if provider_yaml.get("removed", False): - if not provider_yaml.get("suspended"): - get_console().print( - f"[error]The provider {package_name} is marked for removal in provider.yaml, but " - f"not suspended. Please suspend the provider first before removing it.\n" - ) - sys.exit(1) - removed_provider_ids.append(package_name[len("apache-airflow-providers-") :].replace("-", ".")) - return removed_provider_ids diff --git a/dev/breeze/src/airflow_breeze/utils/versions.py b/dev/breeze/src/airflow_breeze/utils/versions.py new file mode 100644 index 0000000000000..abb8d601f2907 --- /dev/null +++ b/dev/breeze/src/airflow_breeze/utils/versions.py @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + + +def strip_leading_zeros_from_version(version: str) -> str: + """ + Strips leading zeros from version number. + + This converts 1974.04.03 to 1974.4.3 as the format with leading month and day zeros is not accepted + by PIP versioning. + + :param version: version number in CALVER format (potentially with leading 0s in date and month) + :return: string with leading 0s after dot replaced. + """ + return ".".join(str(int(i)) for i in version.split(".")) diff --git a/dev/breeze/tests/test_packages.py b/dev/breeze/tests/test_packages.py index f520c8328afdd..d34892fde4912 100644 --- a/dev/breeze/tests/test_packages.py +++ b/dev/breeze/tests/test_packages.py @@ -16,8 +16,26 @@ # under the License. from __future__ import annotations +import pytest + from airflow_breeze.global_constants import REGULAR_DOC_PACKAGES -from airflow_breeze.utils.packages import get_available_packages +from airflow_breeze.utils.packages import ( + expand_all_provider_packages, + find_matching_long_package_names, + get_available_packages, + get_documentation_package_path, + get_install_requirements, + get_long_package_name, + get_package_extras, + get_provider_details, + get_provider_requirements, + get_removed_provider_ids, + get_short_package_name, + get_source_package_path, + get_suspended_provider_folders, + get_suspended_provider_ids, +) +from airflow_breeze.utils.path_utils import AIRFLOW_PROVIDERS_ROOT, AIRFLOW_SOURCES_ROOT, DOCS_ROOT def test_get_available_packages(): @@ -25,6 +43,16 @@ def test_get_available_packages(): assert all(package not in REGULAR_DOC_PACKAGES for package in get_available_packages()) +def test_expand_all_provider_packages(): + assert len(expand_all_provider_packages(("all-providers",))) > 70 + + +def test_expand_all_provider_packages_deduplicate_with_other_packages(): + assert len(expand_all_provider_packages(("all-providers",))) == len( + expand_all_provider_packages(("all-providers", "amazon", "google")) + ) + + def test_get_available_packages_include_non_provider_doc_packages(): all_packages_including_regular_docs = get_available_packages(include_non_provider_doc_packages=True) for package in REGULAR_DOC_PACKAGES: @@ -41,3 +69,132 @@ def test_get_available_packages_include_non_provider_doc_packages_and_all_provid assert package in all_packages_including_regular_docs assert "all-providers" in all_packages_including_regular_docs + + +def test_get_short_package_name(): + assert get_short_package_name("apache-airflow") == "apache-airflow" + assert get_short_package_name("docker-stack") == "docker-stack" + assert get_short_package_name("apache-airflow-providers-amazon") == "amazon" + assert get_short_package_name("apache-airflow-providers-apache-hdfs") == "apache.hdfs" + + +def test_error_on_get_short_package_name(): + with pytest.raises(ValueError, match="Invalid provider name"): + get_short_package_name("wrong-provider-name") + + +def test_get_long_package_name(): + assert get_long_package_name("apache-airflow") == "apache-airflow" + assert get_long_package_name("docker-stack") == "docker-stack" + assert get_long_package_name("amazon") == "apache-airflow-providers-amazon" + assert get_long_package_name("apache.hdfs") == "apache-airflow-providers-apache-hdfs" + + +def test_get_provider_requirements(): + # update me when asana dependencies change + assert get_provider_requirements("asana") == ["apache-airflow>=2.5.0", "asana>=0.10,<4.0.0"] + + +def test_get_removed_providers(): + # Modify it every time we schedule provider for removal or remove it + assert ["qubole"] == get_removed_provider_ids() + + +def test_get_suspended_provider_ids(): + # Modify it every time we suspend/resume provider + assert ["qubole"] == get_suspended_provider_ids() + + +def test_get_suspended_provider_folders(): + # Modify it every time we suspend/resume provider + assert ["qubole"] == get_suspended_provider_folders() + + +@pytest.mark.parametrize( + "short_packages, filters, long_packages", + [ + (("amazon",), (), ("apache-airflow-providers-amazon",)), + (("apache.hdfs",), (), ("apache-airflow-providers-apache-hdfs",)), + (("amazon",), (), ("apache-airflow-providers-amazon",)), + ( + ("apache.hdfs",), + ("apache-airflow-providers-amazon",), + ("apache-airflow-providers-amazon", "apache-airflow-providers-apache-hdfs"), + ), + ( + ("apache.hdfs",), + ("apache-airflow-providers-ama*",), + ("apache-airflow-providers-amazon", "apache-airflow-providers-apache-hdfs"), + ), + ], +) +def test_find_matching_long_package_name( + short_packages: tuple[str, ...], filters: tuple[str, ...], long_packages: tuple[str, ...] +): + assert find_matching_long_package_names(short_packages=short_packages, filters=filters) == long_packages + + +def test_find_matching_long_package_name_bad_filter(): + with pytest.raises(SystemExit, match=r"Some filters did not find any package: \['bad-filter-\*"): + find_matching_long_package_names(short_packages=(), filters=("bad-filter-*",)) + + +def test_get_source_package_path(): + assert get_source_package_path("apache.hdfs") == AIRFLOW_PROVIDERS_ROOT / "apache" / "hdfs" + + +def test_get_documentation_package_path(): + assert get_documentation_package_path("apache.hdfs") == DOCS_ROOT / "apache-airflow-providers-apache-hdfs" + + +def test_get_install_requirements(): + assert ( + get_install_requirements("asana", "").strip() + == """ + apache-airflow>=2.5.0 + asana>=0.10,<4.0.0 +""".strip() + ) + + +def test_get_package_extras(): + assert get_package_extras("google") == { + "amazon": ["apache-airflow-providers-amazon>=2.6.0"], + "apache.beam": ["apache-airflow-providers-apache-beam", "apache-beam[gcp]"], + "apache.cassandra": ["apache-airflow-providers-apache-cassandra"], + "cncf.kubernetes": ["apache-airflow-providers-cncf-kubernetes>=7.2.0"], + "common.sql": ["apache-airflow-providers-common-sql"], + "facebook": ["apache-airflow-providers-facebook>=2.2.0"], + "leveldb": ["plyvel"], + "microsoft.azure": ["apache-airflow-providers-microsoft-azure"], + "microsoft.mssql": ["apache-airflow-providers-microsoft-mssql"], + "mysql": ["apache-airflow-providers-mysql"], + "openlineage": ["apache-airflow-providers-openlineage"], + "oracle": ["apache-airflow-providers-oracle>=3.1.0"], + "postgres": ["apache-airflow-providers-postgres"], + "presto": ["apache-airflow-providers-presto"], + "salesforce": ["apache-airflow-providers-salesforce"], + "sftp": ["apache-airflow-providers-sftp"], + "ssh": ["apache-airflow-providers-ssh"], + "trino": ["apache-airflow-providers-trino"], + } + + +def test_get_provider_details(): + provider_details = get_provider_details("asana") + assert provider_details.provider_id == "asana" + assert provider_details.full_package_name == "airflow.providers.asana" + assert provider_details.pypi_package_name == "apache-airflow-providers-asana" + assert ( + provider_details.source_provider_package_path + == AIRFLOW_SOURCES_ROOT / "airflow" / "providers" / "asana" + ) + assert ( + provider_details.documentation_provider_package_path == DOCS_ROOT / "apache-airflow-providers-asana" + ) + assert "Asana" in provider_details.provider_description + assert len(provider_details.versions) > 11 + assert provider_details.excluded_python_versions == [] + assert provider_details.plugins == [] + assert provider_details.changelog_path == provider_details.source_provider_package_path / "CHANGELOG.rst" + assert not provider_details.removed diff --git a/dev/breeze/tests/test_provider_documentation.py b/dev/breeze/tests/test_provider_documentation.py new file mode 100644 index 0000000000000..cf1a1e3024bd1 --- /dev/null +++ b/dev/breeze/tests/test_provider_documentation.py @@ -0,0 +1,308 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import Iterable + +import pytest + +from airflow_breeze.prepare_providers.provider_documentation import ( + Change, + _convert_git_changes_to_table, + _convert_pip_requirements_to_table, + _find_insertion_index_for_version, + _get_change_from_line, + _get_changes_classified, + _get_git_log_command, + _get_version_tag, + _verify_changelog_exists, +) +from airflow_breeze.utils.packages import get_pip_package_name, get_wheel_package_name +from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT + +CHANGELOG_CONTENT = """ +Changelog +--------- + +5.0.0 +..... + +Breaking changes +~~~~~~~~~~~~~~~~ + +The ``offset`` parameter has been deprecated from ``list_jobs`` in favor of faster pagination with ``page_token`` similarly to `Databricks API `_. + +* ``Remove offset-based pagination from 'list_jobs' function in 'DatabricksHook' (#34926)`` + +4.7.0 +..... + +Features +~~~~~~~~ + +* ``Add operator to create jobs in Databricks (#35156)`` + +.. Below changes are excluded from the changelog. Move them to + appropriate section above if needed. Do not delete the lines(!): + * ``Prepare docs 3rd wave of Providers October 2023 (#35187)`` + * ``Pre-upgrade 'ruff==0.0.292' changes in providers (#35053)`` + * ``D401 Support - Providers: DaskExecutor to Github (Inclusive) (#34935)`` + +4.6.0 +..... + +.. note:: + This release of provider is only available for Airflow 2.5+ as explained in the + `Apache Airflow providers support policy `_. + +""" + + +def test_find_insertion_index_append_to_found_changelog(): + index, append = _find_insertion_index_for_version(CHANGELOG_CONTENT.splitlines(), "5.0.0") + assert append + assert index == 13 + + +def test_find_insertion_index_insert_new_changelog(): + index, append = _find_insertion_index_for_version(CHANGELOG_CONTENT.splitlines(), "5.0.1") + assert not append + assert index == 3 + + +@pytest.mark.parametrize( + "version, provider_id, suffix, tag", + [ + ("1.0.1", "asana", "", "providers-asana/1.0.1"), + ("1.0.1", "asana", "rc1", "providers-asana/1.0.1rc1"), + ("1.0.1", "apache.hdfs", "beta1", "providers-apache-hdfs/1.0.1beta1"), + ], +) +def test_get_version_tag(version: str, provider_id: str, suffix: str, tag: str): + assert _get_version_tag(version, provider_id, suffix) == tag + + +@pytest.mark.parametrize( + "from_commit, to_commit, git_command", + [ + (None, None, ["git", "log", "--pretty=format:%H %h %cd %s", "--date=short", "--", "."]), + ( + "from_tag", + None, + ["git", "log", "--pretty=format:%H %h %cd %s", "--date=short", "from_tag", "--", "."], + ), + ( + "from_tag", + "to_tag", + ["git", "log", "--pretty=format:%H %h %cd %s", "--date=short", "from_tag...to_tag", "--", "."], + ), + ], +) +def test_get_git_log_command(from_commit: str | None, to_commit: str | None, git_command: list[str]): + assert _get_git_log_command(from_commit, to_commit) == git_command + + +def test_get_git_log_command_wrong(): + with pytest.raises(ValueError, match=r"to_commit without from_commit"): + _get_git_log_command(None, "to_commit") + + +@pytest.mark.parametrize( + "provider_id, pip_package_name", + [ + ("asana", "apache-airflow-providers-asana"), + ("apache.hdfs", "apache-airflow-providers-apache-hdfs"), + ], +) +def test_get_pip_package_name(provider_id: str, pip_package_name: str): + assert get_pip_package_name(provider_id) == pip_package_name + + +@pytest.mark.parametrize( + "provider_id, wheel_package_name", + [ + ("asana", "apache_airflow_providers_asana"), + ("apache.hdfs", "apache_airflow_providers_apache_hdfs"), + ], +) +def test_get_wheel_package_name(provider_id: str, wheel_package_name: str): + assert get_wheel_package_name(provider_id) == wheel_package_name + + +@pytest.mark.parametrize( + "line, version, change", + [ + ( + "LONG_HASH_123144 SHORT_HASH 2023-01-01 Description `with` no pr", + "1.0.1", + Change( + full_hash="LONG_HASH_123144", + short_hash="SHORT_HASH", + date="2023-01-01", + version="1.0.1", + message="Description `with` no pr", + message_without_backticks="Description 'with' no pr", + pr=None, + ), + ), + ( + "LONG_HASH_123144 SHORT_HASH 2023-01-01 Description `with` pr (#12345)", + "1.0.1", + Change( + full_hash="LONG_HASH_123144", + short_hash="SHORT_HASH", + date="2023-01-01", + version="1.0.1", + message="Description `with` pr (#12345)", + message_without_backticks="Description 'with' pr (#12345)", + pr="12345", + ), + ), + ], +) +def test_get_change_from_line(line: str, version: str, change: Change): + assert _get_change_from_line(line, version) == change + + +@pytest.mark.parametrize( + "input, output, markdown, changes_len", + [ + ( + """ +LONG_HASH_123144 SHORT_HASH 2023-01-01 Description `with` no pr +LONG_HASH_123144 SHORT_HASH 2023-01-01 Description `with` pr (#12345) + +LONG_HASH_123144 SHORT_HASH 2023-01-01 Description `with` pr (#12346) + +""", + """ +1.0.1 +..... + +Latest change: 2023-01-01 + +============================================ =========== ================================== +Commit Committed Subject +============================================ =========== ================================== +`SHORT_HASH `_ 2023-01-01 ``Description 'with' no pr`` +`SHORT_HASH `_ 2023-01-01 ``Description 'with' pr (#12345)`` +`SHORT_HASH `_ 2023-01-01 ``Description 'with' pr (#12346)`` +============================================ =========== ==================================""", + False, + 3, + ), + ( + """ +LONG_HASH_123144 SHORT_HASH 2023-01-01 Description `with` no pr +LONG_HASH_123144 SHORT_HASH 2023-01-01 Description `with` pr (#12345) + +LONG_HASH_123144 SHORT_HASH 2023-01-01 Description `with` pr (#12346) + +""", + """ +| Commit | Committed | Subject | +|:-------------------------------------------|:------------|:---------------------------------| +| [SHORT_HASH](https://url/LONG_HASH_123144) | 2023-01-01 | `Description 'with' no pr` | +| [SHORT_HASH](https://url/LONG_HASH_123144) | 2023-01-01 | `Description 'with' pr (#12345)` | +| [SHORT_HASH](https://url/LONG_HASH_123144) | 2023-01-01 | `Description 'with' pr (#12346)` | +""", + True, + 3, + ), + ], +) +def test_convert_git_changes_to_table(input: str, output: str, markdown: bool, changes_len): + table, list_of_changes = _convert_git_changes_to_table( + version="1.0.1", changes=input, base_url="https://url/", markdown=markdown + ) + assert table.strip() == output.strip() + assert len(list_of_changes) == changes_len + assert list_of_changes[0].pr is None + assert list_of_changes[1].pr == "12345" + assert list_of_changes[2].pr == "12346" + + +@pytest.mark.parametrize( + "requirements, markdown, table", + [ + ( + ["apache-airflow>2.5.0"], + False, + """ +================== ================== +PIP package Version required +================== ================== +``apache-airflow`` ``>2.5.0`` +================== ================== +""", + ), + ( + ["apache-airflow>2.5.0"], + True, + """ +| PIP package | Version required | +|:-----------------|:-------------------| +| `apache-airflow` | `>2.5.0` | +""", + ), + ], +) +def test_convert_pip_requirements_to_table(requirements: Iterable[str], markdown: bool, table: str): + print(_convert_pip_requirements_to_table(requirements, markdown)) + assert _convert_pip_requirements_to_table(requirements, markdown).strip() == table.strip() + + +def test_verify_changelog_exists(): + assert ( + _verify_changelog_exists("asana") + == AIRFLOW_SOURCES_ROOT / "airflow" / "providers" / "asana" / "CHANGELOG.rst" + ) + + +@pytest.mark.parametrize( + "descriptions, with_breaking_changes, maybe_with_new_features," + "breaking_count, feature_count, bugfix_count, other_count", + [ + (["Added feature x"], True, True, 0, 1, 0, 0), + (["Added feature x"], False, False, 0, 0, 0, 1), + (["Breaking change in"], True, True, 1, 0, 0, 0), + (["Breaking change in", "Added feature y"], True, True, 1, 1, 0, 0), + (["Fix change in", "Breaking feature y"], False, True, 0, 0, 1, 1), + (["Fix change in", "Breaking feature y"], False, True, 0, 0, 1, 1), + ], +) +def test_classify_changes_automatically( + descriptions: list[str], + with_breaking_changes: bool, + maybe_with_new_features: bool, + breaking_count: int, + feature_count: int, + bugfix_count: int, + other_count: int, +): + """Test simple automated classification of the changes based on their single-line description.""" + changes = [ + _get_change_from_line(f"LONG SHORT 2023-12-01 {description}", version="0.1.0") + for description in descriptions + ] + classified_changes = _get_changes_classified( + changes, with_breaking_changes=with_breaking_changes, maybe_with_new_features=maybe_with_new_features + ) + assert len(classified_changes.breaking_changes) == breaking_count + assert len(classified_changes.features) == feature_count + assert len(classified_changes.fixes) == bugfix_count + assert len(classified_changes.other) == other_count diff --git a/dev/breeze/tests/test_versions.py b/dev/breeze/tests/test_versions.py new file mode 100644 index 0000000000000..8ecf1118fe11f --- /dev/null +++ b/dev/breeze/tests/test_versions.py @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import pytest + +from airflow_breeze.utils.versions import strip_leading_zeros_from_version + + +@pytest.mark.parametrize( + "version,stripped_version", [("3.4.0", "3.4.0"), ("13.04.05", "13.4.5"), ("0003.00004.000005", "3.4.5")] +) +def test_strip_leading_versions(version: str, stripped_version): + assert stripped_version == strip_leading_zeros_from_version(version) diff --git a/dev/provider_packages/prepare_provider_packages.py b/dev/provider_packages/prepare_provider_packages.py index 05789e65e9a76..f018c30c21a73 100755 --- a/dev/provider_packages/prepare_provider_packages.py +++ b/dev/provider_packages/prepare_provider_packages.py @@ -20,12 +20,10 @@ """Setup.py for the Provider packages of Airflow project.""" from __future__ import annotations -import difflib import glob import json import logging import os -import random import re import shutil import subprocess @@ -34,7 +32,6 @@ import textwrap from collections import namedtuple from contextlib import contextmanager -from copy import deepcopy from datetime import datetime, timedelta from enum import Enum from functools import lru_cache @@ -200,19 +197,7 @@ def cli(): is_flag=True, help="Print verbose information about performed steps", ) -option_force = click.option( - "--force", - is_flag=True, - help="Forces regeneration of already generated documentation", -) -option_base_branch = click.option( - "--base-branch", - type=str, - default="main", -) argument_package_id = click.argument("package_id") -argument_changelog_files = click.argument("changelog_files", nargs=-1) -argument_package_ids = click.argument("package_ids", nargs=-1) @contextmanager @@ -290,7 +275,7 @@ def get_pip_package_name(provider_package_id: str) -> str: def get_wheel_package_name(provider_package_id: str) -> str: """ - Returns PIP package name for the package id. + Returns Wheel package name for the package id. :param provider_package_id: id of the package :return: the name of pip package @@ -298,33 +283,6 @@ def get_wheel_package_name(provider_package_id: str) -> str: return "apache_airflow_providers_" + provider_package_id.replace(".", "_") -def get_long_description(provider_package_id: str) -> str: - """ - Gets long description of the package. - - :param provider_package_id: package id - :return: content of the description: README file - """ - package_folder = get_target_providers_package_folder(provider_package_id) - readme_file = os.path.join(package_folder, "README.md") - if not os.path.exists(readme_file): - return "" - with open(readme_file, encoding="utf-8") as file: - readme_contents = file.read() - copying = True - long_description = "" - for line in readme_contents.splitlines(keepends=True): - if line.startswith("**Table of contents**"): - copying = False - continue - header_line = "## Provider package" - if line.startswith(header_line): - copying = True - if copying: - long_description += line - return long_description - - def get_install_requirements(provider_package_id: str, version_suffix: str) -> str: """ Returns install requirements for the package. @@ -464,56 +422,6 @@ def get_change_from_line(line: str, version: str): ) -def convert_git_changes_to_table( - version: str, changes: str, base_url: str, markdown: bool = True -) -> tuple[str, list[Change]]: - """ - Converts list of changes from its string form to markdown/RST table and array of change information - - The changes are in the form of multiple lines where each line consists of: - FULL_COMMIT_HASH SHORT_COMMIT_HASH COMMIT_DATE COMMIT_SUBJECT - - The subject can contain spaces but one of the preceding values can, so we can make split - 3 times on spaces to break it up. - :param version: Version from which the changes are - :param changes: list of changes in a form of multiple-line string - :param base_url: base url for the commit URL - :param markdown: if True, Markdown format is used else rst - :return: formatted table + list of changes (starting from the latest) - """ - from tabulate import tabulate - - lines = changes.splitlines() - headers = ["Commit", "Committed", "Subject"] - table_data = [] - changes_list: list[Change] = [] - for line in lines: - if line == "": - continue - change = get_change_from_line(line, version) - table_data.append( - ( - f"[{change.short_hash}]({base_url}{change.full_hash})" - if markdown - else f"`{change.short_hash} <{base_url}{change.full_hash}>`_", - change.date, - f"`{change.message_without_backticks}`" - if markdown - else f"``{change.message_without_backticks}``", - ) - ) - changes_list.append(change) - header = "" - if not table_data: - return header, [] - table = tabulate(table_data, headers=headers, tablefmt="pipe" if markdown else "rst") - if not markdown: - header += f"\n\n{version}\n" + "." * len(version) + "\n\n" - release_date = table_data[0][1] - header += f"Latest change: {release_date}\n\n" - return header + table, changes_list - - def convert_pip_requirements_to_table(requirements: Iterable[str], markdown: bool = True) -> str: """ Converts PIP requirement list to a Markdown table. @@ -709,112 +617,6 @@ def make_current_directory_safe(verbose: bool): subprocess.check_call(safe_dir_add_command) -def make_sure_remote_apache_exists_and_fetch(git_update: bool, verbose: bool): - """Make sure that apache remote exist in git. - - We need to take a log from the apache repository - not locally. Also, the - local repo might be shallow, so we need to un-shallow it. - - This will: - * mark current directory as safe for ownership (it is run in the container) - * check if the remote exists and add if it does not - * check if the local repo is shallow, mark it to un-shallow in this case - * fetch from the remote including all tags and overriding local tags in case - they are set differently - - :param git_update: If the git remote already exists, should we try to update it - :param verbose: print verbose messages while fetching - """ - - make_current_directory_safe(verbose) - try: - check_remote_command = ["git", "remote", "get-url", HTTPS_REMOTE] - if verbose: - console.print(f"Running command: '{' '.join(check_remote_command)}'") - subprocess.check_call( - check_remote_command, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - - # Remote already exists, don't update it again! - if not git_update: - return - except subprocess.CalledProcessError as ex: - if ex.returncode == 128 or ex.returncode == 2: - remote_add_command = [ - "git", - "remote", - "add", - HTTPS_REMOTE, - "https://github.com/apache/airflow.git", - ] - if verbose: - console.print(f"Running command: '{' '.join(remote_add_command)}'") - try: - subprocess.check_call( - remote_add_command, - ) - except subprocess.CalledProcessError as ex: - console.print("[red]Error: when adding remote:[/]", ex) - sys.exit(128) - else: - raise - if verbose: - console.print("Fetching full history and tags from remote. ") - console.print("This might override your local tags!") - is_shallow_repo = ( - subprocess.check_output(["git", "rev-parse", "--is-shallow-repository"], stderr=subprocess.DEVNULL) - == "true" - ) - fetch_command = ["git", "fetch", "--tags", "--force", HTTPS_REMOTE] - if is_shallow_repo: - if verbose: - console.print( - "This will also un-shallow the repository, " - "making all history available and increasing storage!" - ) - fetch_command.append("--unshallow") - if verbose: - console.print(f"Running command: '{' '.join(fetch_command)}'") - try: - subprocess.check_call(fetch_command) - except subprocess.CalledProcessError as e: - console.print( - "[yellow]Error when fetching tags from remote. Your tags might not be refreshed. " - f'Please refresh the tags manually via {" ".join(fetch_command)}\n' - ) - console.print(f"[yellow]The error was: {e}") - - -def get_git_log_command( - verbose: bool, from_commit: str | None = None, to_commit: str | None = None -) -> list[str]: - """Get git command to run for the current repo from the current folder. - - The current directory should always be the package folder. - - :param verbose: whether to print verbose info while getting the command - :param from_commit: if present - base commit from which to start the log from - :param to_commit: if present - final commit which should be the start of the log - :return: git command to run - """ - git_cmd = [ - "git", - "log", - "--pretty=format:%H %h %cd %s", - "--date=short", - ] - if from_commit and to_commit: - git_cmd.append(f"{from_commit}...{to_commit}") - elif from_commit: - git_cmd.append(from_commit) - git_cmd.extend(["--", "."]) - if verbose: - console.print(f"Command to run: '{' '.join(git_cmd)}'") - return git_cmd - - def get_git_tag_check_command(tag: str) -> list[str]: """Get git command to check if tag exits. @@ -856,33 +658,6 @@ def get_generated_package_path(provider_package_id: str) -> str: return provider_package_path -def get_additional_package_info(provider_package_path: str) -> str: - """Returns additional info for the package. - - :param provider_package_path: path for the package - :return: additional information for the path (empty string if missing) - """ - additional_info_file_path = os.path.join(provider_package_path, "ADDITIONAL_INFO.md") - if os.path.isfile(additional_info_file_path): - with open(additional_info_file_path) as additional_info_file: - additional_info = additional_info_file.read() - - additional_info_lines = additional_info.splitlines(keepends=True) - result = "" - skip_comment = True - for line in additional_info_lines: - if line.startswith(" -->"): - skip_comment = False - elif not skip_comment: - result += line - return result - return "" - - -def get_package_pip_name(provider_package_id: str): - return f"apache-airflow-providers-{provider_package_id.replace('.', '-')}" - - def validate_provider_info_with_runtime_schema(provider_info: dict[str, Any]) -> None: """Validates provider info against the runtime schema. @@ -941,127 +716,6 @@ def get_version_tag(version: str, provider_package_id: str, version_suffix: str return f"providers-{provider_package_id.replace('.','-')}/{version}{version_suffix}" -def print_changes_table(changes_table): - syntax = Syntax(changes_table, "rst", theme="ansi_dark") - console.print(syntax) - - -def get_all_changes_for_package( - provider_package_id: str, - verbose: bool, - base_branch: str, - force: bool, -) -> tuple[bool, list[list[Change]] | Change | None, str]: - """Retrieves all changes for the package. - - :param provider_package_id: provider package id - :param verbose: whether to print verbose messages - :param base_branch: base branch to check changes in apache remote for changes - :param force: whether to force the check even if the tag exists - """ - provider_details = get_provider_details(provider_package_id) - current_version = provider_details.versions[0] - current_tag_no_suffix = get_version_tag(current_version, provider_package_id) - if verbose: - console.print(f"Checking if tag '{current_tag_no_suffix}' exist.") - if not force and not subprocess.call( - get_git_tag_check_command(current_tag_no_suffix), - cwd=provider_details.source_provider_package_path, - stderr=subprocess.DEVNULL, - ): - if verbose: - console.print(f"The tag {current_tag_no_suffix} exists.") - # The tag already exists - changes = subprocess.check_output( - get_git_log_command(verbose, f"{HEAD_OF_HTTPS_REMOTE}/{base_branch}", current_tag_no_suffix), - cwd=provider_details.source_provider_package_path, - text=True, - ) - if changes: - provider_details = get_provider_details(provider_package_id) - doc_only_change_file = os.path.join( - provider_details.source_provider_package_path, ".latest-doc-only-change.txt" - ) - if os.path.exists(doc_only_change_file): - with open(doc_only_change_file) as f: - last_doc_only_hash = f.read().strip() - try: - changes_since_last_doc_only_check = subprocess.check_output( - get_git_log_command( - verbose, f"{HEAD_OF_HTTPS_REMOTE}/{base_branch}", last_doc_only_hash - ), - cwd=provider_details.source_provider_package_path, - text=True, - ) - if not changes_since_last_doc_only_check: - console.print() - console.print( - "[yellow]The provider has doc-only changes since the last release. Skipping[/]" - ) - # Returns 66 in case of doc-only changes - sys.exit(66) - if len(changes) > len(changes_since_last_doc_only_check): - # if doc-only was released after previous release - use it as starting point - # but if before - stay with the releases from last tag. - changes = changes_since_last_doc_only_check - except subprocess.CalledProcessError: - # ignore when the commit mentioned as last doc-only change is obsolete - pass - - console.print(f"[yellow]The provider {provider_package_id} has changes since last release[/]") - console.print() - console.print(f"[bright_blue]Provider: {provider_package_id}[/]\n") - changes_table, array_of_changes = convert_git_changes_to_table( - f"NEXT VERSION AFTER + {provider_details.versions[0]}", - changes, - base_url="https://github.com/apache/airflow/commit/", - markdown=False, - ) - print_changes_table(changes_table) - return False, array_of_changes[0], changes_table - else: - console.print(f"No changes for {provider_package_id}") - return False, None, "" - if verbose: - console.print("The tag does not exist. ") - if len(provider_details.versions) == 1: - console.print( - f"The provider '{provider_package_id}' has never been released but it is ready to release!\n" - ) - else: - console.print(f"New version of the '{provider_package_id}' package is ready to be released!\n") - next_version_tag = f"{HEAD_OF_HTTPS_REMOTE}/{base_branch}" - changes_table = "" - current_version = provider_details.versions[0] - list_of_list_of_changes: list[list[Change]] = [] - for version in provider_details.versions[1:]: - version_tag = get_version_tag(version, provider_package_id) - changes = subprocess.check_output( - get_git_log_command(verbose, next_version_tag, version_tag), - cwd=provider_details.source_provider_package_path, - text=True, - ) - changes_table_for_version, array_of_changes_for_version = convert_git_changes_to_table( - current_version, changes, base_url="https://github.com/apache/airflow/commit/", markdown=False - ) - changes_table += changes_table_for_version - list_of_list_of_changes.append(array_of_changes_for_version) - next_version_tag = version_tag - current_version = version - changes = subprocess.check_output( - get_git_log_command(verbose, next_version_tag), - cwd=provider_details.source_provider_package_path, - text=True, - ) - changes_table_for_version, array_of_changes_for_version = convert_git_changes_to_table( - current_version, changes, base_url="https://github.com/apache/airflow/commit/", markdown=False - ) - changes_table += changes_table_for_version - if verbose: - print_changes_table(changes_table) - return True, list_of_list_of_changes or None, changes_table - - def get_provider_details(provider_package_id: str) -> ProviderPackageDetails: provider_info = get_provider_info_from_provider_yaml(provider_package_id) plugins: list[PluginInfo] = [] @@ -1112,9 +766,6 @@ def get_provider_jinja_context( pip_requirements_table_rst = convert_pip_requirements_to_table( get_provider_requirements(provider_details.provider_package_id), markdown=False ) - cross_providers_dependencies_table = convert_cross_package_dependencies_to_table( - cross_providers_dependencies - ) cross_providers_dependencies_table_rst = convert_cross_package_dependencies_to_table( cross_providers_dependencies, markdown=False ) @@ -1143,9 +794,6 @@ def get_provider_jinja_context( "RELEASE": current_release_version, "RELEASE_NO_LEADING_ZEROS": release_version_no_leading_zeros, "VERSION_SUFFIX": version_suffix or "", - "ADDITIONAL_INFO": get_additional_package_info( - provider_package_path=provider_details.source_provider_package_path - ), "CROSS_PROVIDERS_DEPENDENCIES": cross_providers_dependencies, "PIP_REQUIREMENTS": get_provider_requirements(provider_details.provider_package_id), "PROVIDER_TYPE": "Provider", @@ -1156,7 +804,6 @@ def get_provider_jinja_context( ), "SETUP_REQUIREMENTS": get_setup_requirements(), "EXTRAS_REQUIREMENTS": get_package_extras(provider_package_id=provider_details.provider_package_id), - "CROSS_PROVIDERS_DEPENDENCIES_TABLE": cross_providers_dependencies_table, "CROSS_PROVIDERS_DEPENDENCIES_TABLE_RST": cross_providers_dependencies_table_rst, "PIP_REQUIREMENTS_TABLE": pip_requirements_table, "PIP_REQUIREMENTS_TABLE_RST": pip_requirements_table_rst, @@ -1185,205 +832,6 @@ def prepare_readme_file(context): readme_file.write(readme_content) -def confirm(message: str, answer: str | None = None) -> bool: - """Ask user to confirm (case-insensitive). - - :param message: message to display - :param answer: force answer if set - :return: True if the answer is any form of y/yes. Exits with 65 exit code if - any form of q/quit is chosen. - """ - given_answer = answer.lower() if answer is not None else "" - while given_answer not in ["y", "n", "q", "yes", "no", "quit"]: - console.print(f"[yellow]{message}[y/n/q]?[/] ", end="") - try: - given_answer = input("").lower() - except KeyboardInterrupt: - given_answer = "q" - if given_answer.lower() in ["q", "quit"]: - # Returns 65 in case user decided to quit - sys.exit(65) - return given_answer in ["y", "yes"] - - -class TypeOfChange(Enum): - DOCUMENTATION = "d" - BUGFIX = "b" - FEATURE = "f" - BREAKING_CHANGE = "x" - SKIP = "s" - - -def get_type_of_changes(answer: str | None) -> TypeOfChange: - """Ask user to specify type of changes (case-insensitive). - - :return: Type of change. - """ - given_answer = "" - if answer and answer.lower() in ["yes", "y"]: - # Simulate all possible non-terminal answers - return random.choice( - [ - TypeOfChange.DOCUMENTATION, - TypeOfChange.BUGFIX, - TypeOfChange.FEATURE, - TypeOfChange.BREAKING_CHANGE, - TypeOfChange.SKIP, - ] - ) - while given_answer not in [*[t.value for t in TypeOfChange], "q"]: - console.print( - "[yellow]Type of change (d)ocumentation, (b)ugfix, (f)eature, (x)breaking " - "change, (s)kip, (q)uit [d/b/f/x/s/q]?[/] ", - end="", - ) - try: - given_answer = input("").lower() - except KeyboardInterrupt: - given_answer = "q" - if given_answer == "q": - # Returns 65 in case user decided to quit - sys.exit(65) - return TypeOfChange(given_answer) - - -def mark_latest_changes_as_documentation_only(provider_package_id: str, latest_change: Change): - provider_details = get_provider_details(provider_package_id=provider_package_id) - console.print( - f"Marking last change: {latest_change.short_hash} and all above changes since the last release " - "as doc-only changes!" - ) - with open( - os.path.join(provider_details.source_provider_package_path, ".latest-doc-only-change.txt"), "w" - ) as f: - f.write(latest_change.full_hash + "\n") - # exit code 66 marks doc-only change marked - sys.exit(66) - - -def add_new_version(type_of_change: TypeOfChange, provider_package_id: str): - provider_details = get_provider_details(provider_package_id) - version = provider_details.versions[0] - v = semver.VersionInfo.parse(version) - if type_of_change == TypeOfChange.BREAKING_CHANGE: - v = v.bump_major() - elif type_of_change == TypeOfChange.FEATURE: - v = v.bump_minor() - elif type_of_change == TypeOfChange.BUGFIX: - v = v.bump_patch() - provider_yaml_path = Path(get_source_package_path(provider_package_id)) / "provider.yaml" - original_text = provider_yaml_path.read_text() - new_text = re.sub(r"versions:", f"versions:\n - {v}", original_text, 1) - provider_yaml_path.write_text(new_text) - console.print() - console.print(f"[bright_blue]Bumped version to {v}") - - -def update_release_notes( - provider_package_id: str, - version_suffix: str, - force: bool, - verbose: bool, - answer: str | None, - base_branch: str, - regenerate_missing_docs: bool, -) -> bool: - """Updates generated files. - - This includes the readme, changes, and/or setup.cfg/setup.py/manifest.in/provider_info). - - :param provider_package_id: id of the package - :param version_suffix: version suffix corresponding to the version in the code - :param force: regenerate already released documentation - :param verbose: whether to print verbose messages - :param answer: force answer to question if set. - :param base_branch: base branch to check changes in apache remote for changes - :param regenerate_missing_docs: whether to regenerate missing docs - :returns False if the package should be skipped, True if everything generated properly - """ - verify_provider_package(provider_package_id) - proceed, latest_change, changes = get_all_changes_for_package( - provider_package_id, verbose, base_branch, force - ) - if not force: - if proceed: - if not confirm("Provider marked for release. Proceed", answer=answer): - return False - elif not latest_change: - console.print() - console.print( - f"[yellow]Provider: {provider_package_id} - skipping documentation generation. No changes![/]" - ) - console.print() - return False - else: - type_of_change = get_type_of_changes(answer=answer) - if type_of_change == TypeOfChange.DOCUMENTATION: - if isinstance(latest_change, Change): - mark_latest_changes_as_documentation_only(provider_package_id, latest_change) - else: - raise ValueError( - "Expected only one change to be present to mark changes " - f"in provider {provider_package_id} as docs-only. " - f"Received {len(latest_change)}." - ) - elif type_of_change == TypeOfChange.SKIP: - return False - elif type_of_change in [TypeOfChange.BUGFIX, TypeOfChange.FEATURE, TypeOfChange.BREAKING_CHANGE]: - add_new_version(type_of_change, provider_package_id) - proceed, latest_change, changes = get_all_changes_for_package( - provider_package_id, verbose, base_branch, force - ) - provider_details = get_provider_details(provider_package_id) - provider_info = get_provider_info_from_provider_yaml(provider_package_id) - jinja_context = get_provider_jinja_context( - provider_info=provider_info, - provider_details=provider_details, - current_release_version=provider_details.versions[0], - version_suffix=version_suffix, - ) - jinja_context["DETAILED_CHANGES_RST"] = changes - jinja_context["DETAILED_CHANGES_PRESENT"] = bool(changes) - errors = False - if not update_changelog_rst( - jinja_context, - provider_package_id, - provider_details.documentation_provider_package_path, - regenerate_missing_docs, - ): - errors = True - if not force: - if not update_commits_rst( - jinja_context, - provider_package_id, - provider_details.documentation_provider_package_path, - regenerate_missing_docs, - ): - errors = True - if errors: - console.print("[red]There were errors when generating documentation[/]") - sys.exit(1) - return True - - -def update_min_airflow_version(provider_package_id: str, version_suffix: str): - provider_details = get_provider_details(provider_package_id) - provider_info = get_provider_info_from_provider_yaml(provider_package_id) - jinja_context = get_provider_jinja_context( - provider_info=provider_info, - provider_details=provider_details, - current_release_version=provider_details.versions[0], - version_suffix=version_suffix, - ) - generate_init_py_file_for_provider( - context=jinja_context, - target_path=provider_details.source_provider_package_path, - ) - replace_min_airflow_version_in_provider_yaml( - context=jinja_context, target_path=provider_details.source_provider_package_path - ) - - def update_setup_files( provider_package_id: str, version_suffix: str, @@ -1439,29 +887,6 @@ def replace_content(file_path, old_text, new_text, provider_package_id): ) -def update_index_rst( - context: dict[str, Any], - provider_package_id: str, - target_path: Path, -): - index_update = render_template( - template_name="PROVIDER_INDEX", context=context, extension=".rst", keep_trailing_newline=True - ) - index_file_path = os.path.join(target_path, "index.rst") - old_text = "" - if os.path.isfile(index_file_path): - with open(index_file_path) as readme_file_read: - old_text = readme_file_read.read() - new_text = deepcopy(old_text) - lines = old_text.splitlines(keepends=False) - for index, line in enumerate(lines): - if AUTOMATICALLY_GENERATED_MARKER in line: - new_text = "\n".join(lines[:index]) - new_text += "\n" + AUTOMATICALLY_GENERATED_CONTENT + "\n" - new_text += index_update - replace_content(index_file_path, old_text, new_text, provider_package_id) - - # Taken from pygrep hooks we are using in pre-commit # https://github.com/pre-commit/pygrep-hooks/blob/main/.pre-commit-hooks.yaml BACKTICKS_CHECK = re.compile(r"^(?! ).*(^| )`[^`]+`([^_]|$)", re.MULTILINE) @@ -1537,71 +962,6 @@ def _update_file( return True -def update_changelog_rst( - context: dict[str, Any], - provider_package_id: str, - target_path: Path, - regenerate_missing_docs: bool, -) -> bool: - return _update_file( - context=context, - template_name="PROVIDER_CHANGELOG", - extension=".rst", - file_name="changelog.rst", - provider_package_id=provider_package_id, - target_path=target_path, - regenerate_missing_docs=regenerate_missing_docs, - ) - - -def update_commits_rst( - context: dict[str, Any], - provider_package_id: str, - target_path: Path, - regenerate_missing_docs: bool, -) -> bool: - return _update_file( - context=context, - template_name="PROVIDER_COMMITS", - extension=".rst", - file_name="commits.rst", - provider_package_id=provider_package_id, - target_path=target_path, - regenerate_missing_docs=regenerate_missing_docs, - ) - - -def replace_min_airflow_version_in_provider_yaml( - context: dict[str, Any], - target_path: str, -): - provider_yaml_path = os.path.join(target_path, "provider.yaml") - with open(provider_yaml_path) as provider_yaml_file: - provider_yaml_txt = provider_yaml_file.read() - provider_yaml_txt = re.sub( - r" - apache-airflow>=.*", f" - apache-airflow>={context['MIN_AIRFLOW_VERSION']}", provider_yaml_txt - ) - with open(provider_yaml_path, "w") as provider_yaml_file: - provider_yaml_file.write(provider_yaml_txt) - - -def generate_init_py_file_for_provider( - context: dict[str, Any], - target_path: str, -): - init_py_content = black_format( - render_template( - template_name="PROVIDER__INIT__PY", - context=context, - extension=".py", - keep_trailing_newline=True, - ) - ) - init_py_path = os.path.join(target_path, "__init__.py") - with open(init_py_path, "w") as init_py_file: - init_py_file.write(init_py_content) - - @lru_cache(maxsize=None) def black_mode() -> Mode: config = parse_pyproject_toml(os.path.join(AIRFLOW_SOURCES_ROOT_PATH, "pyproject.toml")) @@ -1755,55 +1115,6 @@ def list_providers_packages(): console.print(provider) -@cli.command() -@option_version_suffix -@option_git_update -@argument_package_id -@option_force -@option_verbose -@option_base_branch -@click.option( - "-a", - "--answer", - type=click.Choice(["y", "n", "q", "yes", "no", "quit"]), - help="Force answer to questions.", - envvar="ANSWER", -) -def update_package_documentation( - version_suffix: str, - git_update: bool, - answer: str | None, - package_id: str, - force: bool, - verbose: bool, - base_branch: str, -): - """Updates package documentation. - - See `list-providers-packages` subcommand for the possible PACKAGE_ID values. - """ - provider_package_id = package_id - verify_provider_package(provider_package_id) - with with_group(f"Update release notes for package '{provider_package_id}' "): - console.print("Updating documentation for the latest release version.") - make_sure_remote_apache_exists_and_fetch(git_update, verbose) - only_min_version_upgrade = os.environ.get("ONLY_MIN_VERSION_UPDATE", "false").lower() == "true" - regenerate_missing_docs = os.environ.get("REGENERATE_MISSING_DOCS", "false").lower() == "true" - if not only_min_version_upgrade: - if not update_release_notes( - provider_package_id, - version_suffix, - force=force or regenerate_missing_docs, - verbose=verbose, - answer=answer, - base_branch=base_branch, - regenerate_missing_docs=regenerate_missing_docs, - ): - # Returns 64 in case of skipped package - sys.exit(64) - update_min_airflow_version(provider_package_id=provider_package_id, version_suffix=version_suffix) - - def tag_exists_for_version(provider_package_id: str, current_tag: str, verbose: bool): provider_details = get_provider_details(provider_package_id) if verbose: @@ -1850,7 +1161,6 @@ def generate_setup_files( def get_current_tag(provider_package_id: str, suffix: str, git_update: bool, verbose: bool): verify_provider_package(provider_package_id) - make_sure_remote_apache_exists_and_fetch(git_update, verbose) provider_info = get_provider_info_from_provider_yaml(provider_package_id) versions: list[str] = provider_info["versions"] current_version = versions[0] @@ -1950,201 +1260,6 @@ def build_provider_packages( shutil.rmtree(tmp_dist_dir, ignore_errors=True) -def find_insertion_index_for_version(content: list[str], version: str) -> tuple[int, bool]: - """Finds insertion index for the specified version from the .rst changelog content. - - :param content: changelog split into separate lines - :param version: version to look for - - :return: A 2-tuple. The first item indicates the insertion index, while the - second is a boolean indicating whether to append (False) or insert (True) - to the changelog. - """ - changelog_found = False - skip_next_line = False - index = 0 - for index, line in enumerate(content): - if not changelog_found and line.strip() == version: - changelog_found = True - skip_next_line = True - elif not skip_next_line and line and all(char == "." for char in line): - return index - 2, changelog_found - else: - skip_next_line = False - return index, changelog_found - - -class ClassifiedChanges(NamedTuple): - """Stores lists of changes classified automatically""" - - fixes: list[Change] = [] - features: list[Change] = [] - breaking_changes: list[Change] = [] - other: list[Change] = [] - - -def get_changes_classified(changes: list[Change]) -> ClassifiedChanges: - """Pre-classifies changes based on commit message, it's wildly guessing now, - - However, if we switch to semantic commits, it could be automated. This list - is supposed to be manually reviewed and re-classified by release manager - anyway. - - :param changes: list of changes - :return: list of changes classified semi-automatically to the fix/feature/breaking/other buckets - """ - classified_changes = ClassifiedChanges() - for change in changes: - if "fix" in change.message.lower(): - classified_changes.fixes.append(change) - elif "add" in change.message.lower(): - classified_changes.features.append(change) - elif "breaking" in change.message.lower(): - classified_changes.breaking_changes.append(change) - else: - classified_changes.other.append(change) - return classified_changes - - -@cli.command() -@argument_package_id -@option_base_branch -@option_verbose -def update_changelog(package_id: str, base_branch: str, verbose: bool): - """Updates changelog for the provider.""" - if _update_changelog(package_id, base_branch, verbose, True): - sys.exit(64) - - -def _update_changelog(package_id: str, base_branch: str, verbose: bool, force: bool) -> bool: - """Internal update changelog method. - - :param package_id: package id - :param base_branch: base branch to check changes in apache remote for changes - :param verbose: verbose flag - :return: true if package is skipped - """ - with with_group("Updates changelog for last release"): - verify_provider_package(package_id) - provider_details = get_provider_details(package_id) - provider_info = get_provider_info_from_provider_yaml(package_id) - current_release_version = provider_details.versions[0] - jinja_context = get_provider_jinja_context( - provider_info=provider_info, - provider_details=provider_details, - current_release_version=current_release_version, - version_suffix="", - ) - changelog_path = os.path.join(provider_details.source_provider_package_path, "CHANGELOG.rst") - proceed, changes, _ = get_all_changes_for_package(package_id, verbose, base_branch, force) - if not proceed: - if force: - console.print( - f"[info]The provider {package_id} is not being release but we regenerate docs for it " - f"(except commits).[/]" - ) - else: - console.print( - f"[yellow]The provider {package_id} is not being released. Skipping the package.[/]" - ) - return True - if os.environ.get("REGENERATE_MISSING_DOCS", "false").lower() == "true": - console.print("[info]REGENERATE_MISSING_DOCS is set to true, skipping changelog update[/]") - else: - generate_new_changelog(package_id, provider_details, changelog_path, changes) - console.print() - console.print(f"Update index.rst for {package_id}") - console.print() - update_index_rst(jinja_context, package_id, provider_details.documentation_provider_package_path) - return False - - -def generate_new_changelog(package_id, provider_details, changelog_path, changes): - latest_version = provider_details.versions[0] - with open(changelog_path) as changelog: - current_changelog = changelog.read() - current_changelog_lines = current_changelog.splitlines() - insertion_index, append = find_insertion_index_for_version(current_changelog_lines, latest_version) - if append: - if not changes: - console.print( - f"[green]The provider {package_id} changelog for `{latest_version}` " - "has first release. Not updating the changelog.[/]" - ) - return - new_changes = [ - change for change in changes[0] if change.pr and "(#" + change.pr + ")" not in current_changelog - ] - if not new_changes: - console.print( - f"[green]The provider {package_id} changelog for `{latest_version}` " - "has no new changes. Not updating the changelog.[/]" - ) - return - context = {"new_changes": new_changes} - generated_new_changelog = render_template( - template_name="UPDATE_CHANGELOG", context=context, extension=".rst" - ) - else: - if changes: - classified_changes = get_changes_classified(changes[0]) - else: - # change log exist but without version 1.0.0 entry - classified_changes = None - - context = { - "version": latest_version, - "version_header": "." * len(latest_version), - "classified_changes": classified_changes, - } - generated_new_changelog = render_template( - template_name="CHANGELOG", context=context, extension=".rst" - ) - new_changelog_lines = current_changelog_lines[0:insertion_index] - new_changelog_lines.extend(generated_new_changelog.splitlines()) - new_changelog_lines.extend(current_changelog_lines[insertion_index:]) - diff = "\n".join(difflib.context_diff(current_changelog_lines, new_changelog_lines, n=5)) - syntax = Syntax(diff, "diff") - console.print(syntax) - if not append: - console.print( - f"[green]The provider {package_id} changelog for `{latest_version}` " - "version is missing. Generating fresh changelog.[/]" - ) - else: - console.print( - f"[green]Appending the provider {package_id} changelog for `{latest_version}` version.[/]" - ) - with open(changelog_path, "w") as changelog: - changelog.write("\n".join(new_changelog_lines)) - changelog.write("\n") - - -def get_package_from_changelog(changelog_path: str): - folder = Path(changelog_path).parent - package = "" - separator = "" - while not os.path.basename(folder) == "providers": - package = os.path.basename(folder) + separator + package - separator = "." - folder = Path(folder).parent - return package - - -@cli.command() -@argument_changelog_files -@option_git_update -@option_base_branch -@option_verbose -def update_changelogs(changelog_files: list[str], git_update: bool, base_branch: str, verbose: bool): - """Updates changelogs for multiple packages.""" - if git_update: - make_sure_remote_apache_exists_and_fetch(git_update, verbose) - for changelog_file in changelog_files: - package_id = get_package_from_changelog(changelog_file) - _update_changelog(package_id=package_id, base_branch=base_branch, verbose=verbose, force=True) - - if __name__ == "__main__": # The cli exit code is: # * 0 in case of success diff --git a/images/breeze/output_release-management_prepare-provider-documentation.svg b/images/breeze/output_release-management_prepare-provider-documentation.svg index b468885881cee..7643439f991b3 100644 --- a/images/breeze/output_release-management_prepare-provider-documentation.svg +++ b/images/breeze/output_release-management_prepare-provider-documentation.svg @@ -1,4 +1,4 @@ - +