Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
DEFAULT_PYTHON_MAJOR_MINOR_VERSION,
DOCKER_DEFAULT_PLATFORM,
MOUNT_SELECTED,
get_available_documentation_packages,
get_available_documentation_provider_packages,
)
from airflow_breeze.params.build_ci_params import BuildCiParams
from airflow_breeze.params.doc_build_params import DocBuildParams
Expand Down Expand Up @@ -331,7 +331,7 @@ def start_airflow(
@click.option(
"--package-filter",
help="List of packages to consider.",
type=NotVerifiedBetterChoice(get_available_documentation_packages()),
type=NotVerifiedBetterChoice(get_available_documentation_provider_packages()),
multiple=True,
)
@click.option(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
MOUNT_ALL,
MOUNT_SELECTED,
MULTI_PLATFORM,
get_available_documentation_packages,
get_available_documentation_provider_packages,
)
from airflow_breeze.params.shell_params import ShellParams
from airflow_breeze.utils.add_back_references import (
Expand Down Expand Up @@ -783,7 +783,7 @@ def alias_image(image_from: str, image_to: str):
@click.option(
"--package-filter",
help="List of packages to consider.",
type=NotVerifiedBetterChoice(get_available_documentation_packages()),
type=NotVerifiedBetterChoice(get_available_documentation_provider_packages()),
multiple=True,
)
@option_verbose
Expand Down Expand Up @@ -821,38 +821,52 @@ def publish_docs(
"-a",
"--airflow-site-directory",
envvar="AIRFLOW_SITE_DIRECTORY",
type=click.Path(exists=True, file_okay=False, dir_okay=True, resolve_path=True),
help="Local directory path of cloned airflow-site repo.",
required=True,
)
@click.option(
"-g",
"--gen-type",
help="Type of back references to generate, supports: [airflow | providers | helm]",
type=str,
required=True,
show_default=True,
help="Type of back references to generate. Forced to providers if providers specified as arguments.",
type=BetterChoice(
[e.name for e in GenerationType],
),
default=GenerationType.airflow.name,
)
@argument_packages
@option_verbose
@option_dry_run
def add_back_references(
airflow_site_directory: bool,
airflow_site_directory: str,
gen_type: str,
packages: list[str],
):
"""Adds back references for documentation generated by build-docs and publish-docs"""
if not os.path.isdir(airflow_site_directory):
site_path = Path(airflow_site_directory)
if not site_path.is_dir():
get_console().print(
"\n[error]location pointed by airflow_site_dir is not valid. "
"Provide the path of cloned airflow-site repo\n"
)
sys.exit(1)

if len(packages) != 0 and gen_type != GenerationType.providers.name:
get_console().print(
[
f"[warning]Forcing gen type to "
f"{GenerationType.providers} as some provider_packages are selected."
]
)
gen_type = GenerationType.providers.name
gen = GenerationType[gen_type]
if gen not in GenerationType:
get_console().print(
"\n[error]invalid type of doc generation required. Pass one of [airflow | providers | helm]\n"
)
sys.exit(1)

start_generating_back_references(gen, airflow_site_directory)
start_generating_back_references(gen, site_path, packages)


@release_management.command(
Expand Down
2 changes: 1 addition & 1 deletion dev/breeze/src/airflow_breeze/global_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def all_helm_test_packages() -> list[str]:
ALL_HISTORICAL_PYTHON_VERSIONS = ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11"]


def get_available_documentation_packages(short_version=False) -> list[str]:
def get_available_documentation_provider_packages(short_version=False) -> list[str]:
provider_names: list[str] = list(json.loads(PROVIDER_DEPENDENCIES_JSON_FILE_PATH.read_text()).keys())
doc_provider_names = [provider_name.replace(".", "-") for provider_name in provider_names]
available_packages = [f"apache-airflow-providers-{doc_provider}" for doc_provider in doc_provider_names]
Expand Down
71 changes: 41 additions & 30 deletions dev/breeze/src/airflow_breeze/utils/add_back_references.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,16 @@

import enum
import os
import re
import tempfile
from pathlib import Path
from urllib.error import URLError
from urllib.request import urlopen

from rich import print

from airflow_breeze.global_constants import get_available_documentation_provider_packages

airflow_redirects_link = (
"https://raw.githubusercontent.com/apache/airflow/main/docs/apache-airflow/redirects.txt"
)
Expand Down Expand Up @@ -89,24 +92,31 @@ def get_github_redirects_url(provider_name: str):
return f"https://raw.githubusercontent.com/apache/airflow/main/docs/{provider_name}/redirects.txt"


def get_provider_docs_path(docs_archive_path, provider_name: str):
return docs_archive_path + "/" + provider_name
def crete_redirect_html_if_not_exist(path: Path, content: str):
if not path.exists():
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(content)
print(f"[green]Created back reference redirect: {path}")
else:
print(f"Skipping file:{path}, redirects already exist")


def create_back_reference_html(back_ref_url, path):
def create_back_reference_html(back_ref_url: str, target_path: Path):
content = get_redirect_content(back_ref_url)

if Path(path).exists():
print(f"Skipping file:{path}, redirects already exist")
return

# creating a back reference html file
with open(path, "w") as f:
f.write(content)
print(f"[green]Created back reference redirect: {path}")
version_match = re.compile(r"[0-9]+.[0-9]+.[0-9]+")
target_path_as_posix = target_path.as_posix()
if "/stable/" in target_path_as_posix:
prefix, postfix = target_path_as_posix.split("/stable/")
base_folder = Path(prefix)
for folder in base_folder.iterdir():
if folder.is_dir() and version_match.match(folder.name):
crete_redirect_html_if_not_exist(folder / postfix, content)
else:
crete_redirect_html_if_not_exist(Path(target_path), content)


def generate_back_references(link: str, base_path: str):
def generate_back_references(link: str, base_path: Path):
is_downloaded, file_name = download_file(link)
if not is_downloaded:
old_to_new: list[tuple[str, str]] = []
Expand All @@ -118,46 +128,47 @@ def generate_back_references(link: str, base_path: str):
old_to_new.append(("security.html", "security/security-model.html"))

versions = [f.path.split("/")[-1] for f in os.scandir(base_path) if f.is_dir()]

for version in versions:
print(f"Processing {base_path}, version: {version}")
versioned_provider_path = base_path + "/" + version
versioned_provider_path = base_path / version

for old, new in old_to_new:
# only if old file exists, add the back reference
if os.path.exists(versioned_provider_path + "/" + old):
if os.path.exists(versioned_provider_path / old):
split_new_path = new.split("/")
file_name = new.split("/")[-1]
dest_dir = versioned_provider_path + "/" + "/".join(split_new_path[: len(split_new_path) - 1])
dest_dir = versioned_provider_path.joinpath(*split_new_path[: len(split_new_path) - 1])

# finds relative path of old file with respect to new and handles case of different file
# names also
relative_path = os.path.relpath(old, new)
# remove one directory level because file path was used above
relative_path = relative_path.replace("../", "", 1)

os.makedirs(dest_dir, exist_ok=True)
dest_file_path = dest_dir + "/" + file_name
dest_file_path = dest_dir / file_name
create_back_reference_html(relative_path, dest_file_path)


def start_generating_back_references(gen_type, airflow_site_directory):
docs_archive_path = airflow_site_directory + "/docs-archive"
airflow_docs_path = docs_archive_path + "/apache-airflow"
helm_docs_path = docs_archive_path + "/helm-chart"
def start_generating_back_references(
gen_type: GenerationType, airflow_site_directory: Path, short_provider_package_ids: list[str]
):
# Either packages or gen_type should be provided
docs_archive_path = airflow_site_directory / "docs-archive"
airflow_docs_path = docs_archive_path / "apache-airflow"
helm_docs_path = docs_archive_path / "helm-chart"

if gen_type == GenerationType.airflow:
generate_back_references(airflow_redirects_link, airflow_docs_path)
elif gen_type == GenerationType.helm:
generate_back_references(helm_redirects_link, helm_docs_path)
elif gen_type == GenerationType.providers:
all_providers = [
f.path.split("/")[-1]
for f in os.scandir(docs_archive_path)
if f.is_dir() and "providers" in f.name
]
if short_provider_package_ids:
all_providers = [
f"apache-airflow-providers-{package.replace('.','-')}"
for package in short_provider_package_ids
]
else:
all_providers = get_available_documentation_provider_packages()
for p in all_providers:
print(f"Processing airflow provider: {p}")
generate_back_references(
get_github_redirects_url(p), get_provider_docs_path(docs_archive_path, p)
)
generate_back_references(get_github_redirects_url(p), docs_archive_path / p)
4 changes: 2 additions & 2 deletions dev/breeze/src/airflow_breeze/utils/common_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
SINGLE_PLATFORMS,
START_AIRFLOW_ALLOWED_EXECUTORS,
START_AIRFLOW_DEFAULT_ALLOWED_EXECUTORS,
get_available_documentation_packages,
get_available_documentation_provider_packages,
)
from airflow_breeze.utils.custom_param_types import (
AnswerChoice,
Expand Down Expand Up @@ -448,7 +448,7 @@ def _set_default_from_parent(ctx: click.core.Context, option: click.core.Option,
"packages",
nargs=-1,
required=False,
type=BetterChoice(get_available_documentation_packages(short_version=True)),
type=BetterChoice(get_available_documentation_provider_packages(short_version=True)),
)
option_airflow_constraints_reference = click.option(
"--airflow-constraints-reference",
Expand Down
6 changes: 3 additions & 3 deletions dev/breeze/tests/test_global_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
# under the License.
from __future__ import annotations

from airflow_breeze.global_constants import get_available_documentation_packages
from airflow_breeze.global_constants import get_available_documentation_provider_packages

AVAILABLE_PACKAGES_STARTING_LIST = ("apache-airflow", "helm-chart", "docker-stack")


def test_get_available_packages():
assert len(get_available_documentation_packages()) > 70
for package in get_available_documentation_packages():
assert len(get_available_documentation_provider_packages()) > 70
for package in get_available_documentation_provider_packages():
assert package.startswith(AVAILABLE_PACKAGES_STARTING_LIST)
4 changes: 2 additions & 2 deletions images/breeze/output-commands-hash.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ prod-image:build:85bb45de92e86a55474238c3f04def92
prod-image:pull:76f1f27e6119928412abecf153fce4bb
prod-image:verify:bd2b78738a7c388dbad6076c41a9f906
prod-image:7b3369e182724fc155b3399c95d0fd73
release-management:add-back-references:8dab6a30076a55f2d31c6d22a94e0ccb
release-management:add-back-references:0d4eb5ed82e5381bc630b343ba605a72
release-management:create-minor-branch:a3834afc4aa5d1e98002c9e9e7a9931d
release-management:generate-constraints:b8fcaf8f0acd35ed5dbd48659bdb6485
release-management:generate-issue-content-providers:6b0d954cb6dbdec0da0a7988feec58f0
Expand All @@ -49,7 +49,7 @@ release-management:release-prod-images:cfbfe8b19fee91fd90718f98ef2fd078
release-management:start-rc-process:b27bd524dd3c89f50a747b60a7e892c1
release-management:start-release:419f48f6a4ff4457cb9de7ff496aebbe
release-management:verify-provider-packages:96dce5644aad6b37080acf77b3d8de3a
release-management:b6ee5d92b636083c7b127821afb71ea3
release-management:dc4897917210deefb4338f2038f8cb33
sbom:generate-provider-requirements:9abe53200ea5f40e0bf7c27f6087f27f
sbom:update-sbom-information:0ce56884e5f842e3e80d6619df1ccc64
sbom:935d041028e847d3faf763a95b51063e
Expand Down
Loading