From 95b02c68fd19f04a83a95f166d64457e4e47b109 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Sat, 22 Jul 2023 09:55:35 +0200 Subject: [PATCH 1/2] Introduce decorator to load providers configuration A number of commands in Airflow relies on the fact that providers configuration is loaded. This is a rather fast operation as it does not involve any importing of provider classes, just discovering entrypoints, running them and parsing yaml configuration, so it is a very low sub-second time to do it. We cannot do it once in settings/config because we actually need settings/config to be pre-initialized without providers in order to be able to bootstrap airflow, therefore we need to run it individually in each command that can be run with the "airflow" entrypoint. Decorator seems to be best suited to do the job: * easy to apply and not easy to forget when you create another command and look at other commands * nicely wraps around local ProvidersManager import There are exceptions for the "version" and "providers lazy-loaded" commands because they are NOT supposed to initialize configuration of providers. --- airflow/__main__.py | 11 ++-- airflow/cli/cli_config.py | 6 +- airflow/cli/commands/celery_command.py | 5 ++ airflow/cli/commands/config_command.py | 6 +- airflow/cli/commands/connection_command.py | 8 +++ airflow/cli/commands/dag_command.py | 23 +++++++- airflow/cli/commands/dag_processor_command.py | 2 + airflow/cli/commands/db_command.py | 11 ++++ airflow/cli/commands/info_command.py | 2 + airflow/cli/commands/internal_api_command.py | 2 + airflow/cli/commands/jobs_command.py | 2 + airflow/cli/commands/kerberos_command.py | 2 + airflow/cli/commands/kubernetes_command.py | 3 + airflow/cli/commands/plugins_command.py | 2 + airflow/cli/commands/pool_command.py | 7 +++ airflow/cli/commands/provider_command.py | 17 +++++- airflow/cli/commands/role_command.py | 7 +++ .../cli/commands/rotate_fernet_key_command.py | 2 + airflow/cli/commands/scheduler_command.py | 2 + airflow/cli/commands/standalone_command.py | 2 + airflow/cli/commands/sync_perm_command.py | 2 + airflow/cli/commands/task_command.py | 7 +++ airflow/cli/commands/triggerer_command.py | 2 + airflow/cli/commands/user_command.py | 7 +++ airflow/cli/commands/variable_command.py | 7 +++ airflow/cli/commands/webserver_command.py | 2 + .../celery/executors/celery_executor_utils.py | 22 +++++++- .../executors/celery_kubernetes_executor.py | 7 +-- .../celery/executors/default_celery.py | 24 +++++--- .../utils/providers_configuration_loader.py | 55 +++++++++++++++++++ tests/cli/test_cli_parser.py | 4 +- .../executors/test_celery_executor.py | 16 +++++- .../celery/executors/test_celery_executor.py | 8 ++- 33 files changed, 250 insertions(+), 35 deletions(-) create mode 100644 airflow/utils/providers_configuration_loader.py diff --git a/airflow/__main__.py b/airflow/__main__.py index 893c6bde099de..c15f75b4b917e 100644 --- a/airflow/__main__.py +++ b/airflow/__main__.py @@ -46,12 +46,13 @@ def main(): argcomplete.autocomplete(parser) args = parser.parse_args() - # Here we ensure that the default configuration is written if needed before running any command - # that might need it. This used to be done during configuration initialization but having it - # in main ensures that it is not done during tests and other ways airflow imports are used - from airflow.configuration import write_default_airflow_configuration_if_needed + if args.subcommand not in ["lazy_loaded", "version"]: + # Here we ensure that the default configuration is written if needed before running any command + # that might need it. This used to be done during configuration initialization but having it + # in main ensures that it is not done during tests and other ways airflow imports are used + from airflow.configuration import write_default_airflow_configuration_if_needed - write_default_airflow_configuration_if_needed() + write_default_airflow_configuration_if_needed() args.func(args) diff --git a/airflow/cli/cli_config.py b/airflow/cli/cli_config.py index a57a8dae1dece..30e71302832c5 100644 --- a/airflow/cli/cli_config.py +++ b/airflow/cli/cli_config.py @@ -1872,9 +1872,9 @@ class GroupCommand(NamedTuple): args=(ARG_OUTPUT, ARG_VERBOSE), ), ActionCommand( - name="status", - help="Get information about provider initialization status", - func=lazy_load_command("airflow.cli.commands.provider_command.status"), + name="lazy-loaded", + help="Checks that provider configuration is lazy loaded", + func=lazy_load_command("airflow.cli.commands.provider_command.lazy_loaded"), args=(ARG_VERBOSE,), ), ) diff --git a/airflow/cli/commands/celery_command.py b/airflow/cli/commands/celery_command.py index 7adbea36e6852..af7dcbc6077a8 100644 --- a/airflow/cli/commands/celery_command.py +++ b/airflow/cli/commands/celery_command.py @@ -36,12 +36,14 @@ from airflow.configuration import conf from airflow.utils import cli as cli_utils from airflow.utils.cli import setup_locations, setup_logging +from airflow.utils.providers_configuration_loader import providers_configuration_loaded from airflow.utils.serve_logs import serve_logs WORKER_PROCESS_NAME = "worker" @cli_utils.action_cli +@providers_configuration_loaded def flower(args): """Starts Flower, Celery monitoring tool.""" # This needs to be imported locally to not trigger Providers Manager initialization @@ -103,6 +105,7 @@ def _serve_logs(skip_serve_logs: bool = False): @after_setup_logger.connect() +@providers_configuration_loaded def logger_setup_handler(logger, **kwargs): """ Reconfigure the logger. @@ -132,6 +135,7 @@ def filter(self, record): @cli_utils.action_cli +@providers_configuration_loaded def worker(args): """Starts Airflow Celery worker.""" # This needs to be imported locally to not trigger Providers Manager initialization @@ -239,6 +243,7 @@ def worker(args): @cli_utils.action_cli +@providers_configuration_loaded def stop_worker(args): """Sends SIGTERM to Celery worker.""" # Read PID from file diff --git a/airflow/cli/commands/config_command.py b/airflow/cli/commands/config_command.py index 86130dab102c7..4855c7dd9ff42 100644 --- a/airflow/cli/commands/config_command.py +++ b/airflow/cli/commands/config_command.py @@ -25,8 +25,10 @@ from airflow.configuration import conf from airflow.utils.cli import should_use_colors from airflow.utils.code_utils import get_terminal_formatter +from airflow.utils.providers_configuration_loader import providers_configuration_loaded +@providers_configuration_loaded def show_config(args): """Show current application configuration.""" with io.StringIO() as output: @@ -47,6 +49,7 @@ def show_config(args): print(code) +@providers_configuration_loaded def get_value(args): """Get one value from configuration.""" # while this will make get_value quite a bit slower we must initialize configuration @@ -54,9 +57,6 @@ def get_value(args): # providers are initialized. Theoretically Providers might add new sections and options # but also override defaults for existing options, so without loading all providers we # cannot be sure what is the final value of the option. - from airflow.providers_manager import ProvidersManager - - ProvidersManager().initialize_providers_configuration() if not conf.has_option(args.section, args.option): raise SystemExit(f"The option [{args.section}/{args.option}] is not found in config.") diff --git a/airflow/cli/commands/connection_command.py b/airflow/cli/commands/connection_command.py index e7b83e342e298..5db123f004a80 100644 --- a/airflow/cli/commands/connection_command.py +++ b/airflow/cli/commands/connection_command.py @@ -39,6 +39,7 @@ from airflow.secrets.local_filesystem import load_connections_dict from airflow.utils import cli as cli_utils, helpers, yaml from airflow.utils.cli import suppress_logs_and_warning +from airflow.utils.providers_configuration_loader import providers_configuration_loaded from airflow.utils.session import create_session @@ -61,6 +62,7 @@ def _connection_mapper(conn: Connection) -> dict[str, Any]: @suppress_logs_and_warning +@providers_configuration_loaded def connections_get(args): """Get a connection.""" try: @@ -75,6 +77,7 @@ def connections_get(args): @suppress_logs_and_warning +@providers_configuration_loaded def connections_list(args): """Lists all connections at the command line.""" with create_session() as session: @@ -150,6 +153,7 @@ def _get_connection_types() -> list[str]: return _connection_types +@providers_configuration_loaded def connections_export(args): """Exports all connections to a file.""" file_formats = [".yaml", ".json", ".env"] @@ -200,6 +204,7 @@ def connections_export(args): @cli_utils.action_cli +@providers_configuration_loaded def connections_add(args): """Adds new connection.""" has_uri = bool(args.conn_uri) @@ -291,6 +296,7 @@ def connections_add(args): @cli_utils.action_cli +@providers_configuration_loaded def connections_delete(args): """Deletes connection from DB.""" with create_session() as session: @@ -306,6 +312,7 @@ def connections_delete(args): @cli_utils.action_cli(check_db=False) +@providers_configuration_loaded def connections_import(args): """Imports connections from a file.""" if os.path.exists(args.file): @@ -343,6 +350,7 @@ def _import_helper(file_path: str, overwrite: bool) -> None: @suppress_logs_and_warning +@providers_configuration_loaded def connections_test(args) -> None: """Test an Airflow connection.""" console = AirflowConsole() diff --git a/airflow/cli/commands/dag_command.py b/airflow/cli/commands/dag_command.py index 66decad78b167..f965e24b5ef77 100644 --- a/airflow/cli/commands/dag_command.py +++ b/airflow/cli/commands/dag_command.py @@ -45,6 +45,7 @@ from airflow.utils import cli as cli_utils, timezone from airflow.utils.cli import get_dag, get_dags, process_subdir, sigint_handler, suppress_logs_and_warning from airflow.utils.dot_renderer import render_dag, render_dag_dependencies +from airflow.utils.providers_configuration_loader import providers_configuration_loaded from airflow.utils.session import NEW_SESSION, create_session, provide_session from airflow.utils.state import DagRunState @@ -120,6 +121,7 @@ def _run_dag_backfill(dags: list[DAG], args) -> None: @cli_utils.action_cli +@providers_configuration_loaded def dag_backfill(args, dag: list[DAG] | DAG | None = None) -> None: """Creates backfill job or dry run for a DAG or list of DAGs using regex.""" logging.basicConfig(level=settings.LOGGING_LEVEL, format=settings.SIMPLE_LOG_FORMAT) @@ -150,6 +152,7 @@ def dag_backfill(args, dag: list[DAG] | DAG | None = None) -> None: @cli_utils.action_cli +@providers_configuration_loaded def dag_trigger(args) -> None: """Creates a dag run for the specified dag.""" api_client = get_current_api_client() @@ -170,6 +173,7 @@ def dag_trigger(args) -> None: @cli_utils.action_cli +@providers_configuration_loaded def dag_delete(args) -> None: """Deletes all DB records related to the specified dag.""" api_client = get_current_api_client() @@ -188,17 +192,20 @@ def dag_delete(args) -> None: @cli_utils.action_cli +@providers_configuration_loaded def dag_pause(args) -> None: """Pauses a DAG.""" set_is_paused(True, args) @cli_utils.action_cli +@providers_configuration_loaded def dag_unpause(args) -> None: """Unpauses a DAG.""" set_is_paused(False, args) +@providers_configuration_loaded def set_is_paused(is_paused: bool, args) -> None: """Sets is_paused for DAG by a given dag_id.""" dag = DagModel.get_dagmodel(args.dag_id) @@ -211,6 +218,7 @@ def set_is_paused(is_paused: bool, args) -> None: print(f"Dag: {args.dag_id}, paused: {is_paused}") +@providers_configuration_loaded def dag_dependencies_show(args) -> None: """Displays DAG dependencies, save to file or show as imgcat image.""" dot = render_dag_dependencies(SerializedDagModel.get_dag_dependencies()) @@ -230,6 +238,7 @@ def dag_dependencies_show(args) -> None: print(dot.source) +@providers_configuration_loaded def dag_show(args) -> None: """Displays DAG or saves it's graphic representation to the file.""" dag = get_dag(args.subdir, args.dag_id) @@ -273,6 +282,7 @@ def _save_dot_to_file(dot: Dot, filename: str) -> None: @cli_utils.action_cli +@providers_configuration_loaded @provide_session def dag_state(args, session: Session = NEW_SESSION) -> None: """ @@ -296,6 +306,7 @@ def dag_state(args, session: Session = NEW_SESSION) -> None: @cli_utils.action_cli +@providers_configuration_loaded def dag_next_execution(args) -> None: """ Returns the next execution datetime of a DAG at the command line. @@ -335,6 +346,7 @@ def print_execution_interval(interval: DataInterval | None): @cli_utils.action_cli @suppress_logs_and_warning +@providers_configuration_loaded def dag_list_dags(args) -> None: """Displays dags with or without stats at the command line.""" dagbag = DagBag(process_subdir(args.subdir)) @@ -360,6 +372,7 @@ def dag_list_dags(args) -> None: @cli_utils.action_cli @suppress_logs_and_warning +@providers_configuration_loaded @provide_session def dag_details(args, session=NEW_SESSION): """Get DAG details given a DAG id.""" @@ -381,6 +394,7 @@ def dag_details(args, session=NEW_SESSION): @cli_utils.action_cli @suppress_logs_and_warning +@providers_configuration_loaded def dag_list_import_errors(args) -> None: """Displays dags with import errors on the command line.""" dagbag = DagBag(process_subdir(args.subdir)) @@ -395,6 +409,7 @@ def dag_list_import_errors(args) -> None: @cli_utils.action_cli @suppress_logs_and_warning +@providers_configuration_loaded def dag_report(args) -> None: """Displays dagbag stats at the command line.""" dagbag = DagBag(process_subdir(args.subdir)) @@ -413,6 +428,7 @@ def dag_report(args) -> None: @cli_utils.action_cli @suppress_logs_and_warning +@providers_configuration_loaded @provide_session def dag_list_jobs(args, dag: DAG | None = None, session: Session = NEW_SESSION) -> None: """Lists latest n jobs.""" @@ -443,6 +459,7 @@ def dag_list_jobs(args, dag: DAG | None = None, session: Session = NEW_SESSION) @cli_utils.action_cli @suppress_logs_and_warning +@providers_configuration_loaded @provide_session def dag_list_dag_runs(args, dag: DAG | None = None, session: Session = NEW_SESSION) -> None: """Lists dag runs for a given DAG.""" @@ -479,8 +496,9 @@ def dag_list_dag_runs(args, dag: DAG | None = None, session: Session = NEW_SESSI ) -@provide_session @cli_utils.action_cli +@providers_configuration_loaded +@provide_session def dag_test(args, dag: DAG | None = None, session: Session = NEW_SESSION) -> None: """Execute one single DagRun for a given DAG and execution date.""" run_conf = None @@ -513,8 +531,9 @@ def dag_test(args, dag: DAG | None = None, session: Session = NEW_SESSION) -> No print(dot_graph.source) -@provide_session @cli_utils.action_cli +@providers_configuration_loaded +@provide_session def dag_reserialize(args, session: Session = NEW_SESSION) -> None: """Serialize a DAG instance.""" session.execute(delete(SerializedDagModel).execution_options(synchronize_session=False)) diff --git a/airflow/cli/commands/dag_processor_command.py b/airflow/cli/commands/dag_processor_command.py index 70bebf285b5e1..757bd778cca5d 100644 --- a/airflow/cli/commands/dag_processor_command.py +++ b/airflow/cli/commands/dag_processor_command.py @@ -31,6 +31,7 @@ from airflow.jobs.job import Job, run_job from airflow.utils import cli as cli_utils from airflow.utils.cli import setup_locations, setup_logging +from airflow.utils.providers_configuration_loader import providers_configuration_loaded log = logging.getLogger(__name__) @@ -53,6 +54,7 @@ def _create_dag_processor_job_runner(args: Any) -> DagProcessorJobRunner: @cli_utils.action_cli +@providers_configuration_loaded def dag_processor(args): """Starts Airflow Dag Processor Job.""" if not conf.getboolean("scheduler", "standalone_dag_processor"): diff --git a/airflow/cli/commands/db_command.py b/airflow/cli/commands/db_command.py index 64d54cc22ea71..e703f6271e873 100644 --- a/airflow/cli/commands/db_command.py +++ b/airflow/cli/commands/db_command.py @@ -31,10 +31,12 @@ from airflow.utils.db import REVISION_HEADS_MAP from airflow.utils.db_cleanup import config_dict, drop_archived_tables, export_archived_records, run_cleanup from airflow.utils.process_utils import execute_interactive +from airflow.utils.providers_configuration_loader import providers_configuration_loaded log = logging.getLogger(__name__) +@providers_configuration_loaded def initdb(args): """Initializes the metadata database.""" print("DB: " + repr(settings.engine.url)) @@ -42,6 +44,7 @@ def initdb(args): print("Initialization done") +@providers_configuration_loaded def resetdb(args): """Resets the metadata database.""" print("DB: " + repr(settings.engine.url)) @@ -51,6 +54,7 @@ def resetdb(args): @cli_utils.action_cli(check_db=False) +@providers_configuration_loaded def upgradedb(args): """Upgrades the metadata database.""" print("DB: " + repr(settings.engine.url)) @@ -96,6 +100,7 @@ def upgradedb(args): @cli_utils.action_cli(check_db=False) +@providers_configuration_loaded def downgrade(args): """Downgrades the metadata database.""" if args.to_revision and args.to_version: @@ -142,12 +147,14 @@ def downgrade(args): raise SystemExit("Cancelled") +@providers_configuration_loaded def check_migrations(args): """Function to wait for all airflow migrations to complete. Used for launching airflow in k8s.""" db.check_migrations(timeout=args.migration_wait_timeout) @cli_utils.action_cli(check_db=False) +@providers_configuration_loaded def shell(args): """Run a shell that allows to access metadata database.""" url = settings.engine.url @@ -191,6 +198,7 @@ def shell(args): @cli_utils.action_cli(check_db=False) +@providers_configuration_loaded def check(args): """Runs a check command that checks if db is available.""" retries: int = args.retry @@ -215,6 +223,7 @@ def _warn_remaining_retries(retrystate: RetryCallState): @cli_utils.action_cli(check_db=False) +@providers_configuration_loaded def cleanup_tables(args): """Purges old records in metadata database.""" run_cleanup( @@ -228,6 +237,7 @@ def cleanup_tables(args): @cli_utils.action_cli(check_db=False) +@providers_configuration_loaded def export_archived(args): """Exports archived records from metadata database.""" export_archived_records( @@ -240,6 +250,7 @@ def export_archived(args): @cli_utils.action_cli(check_db=False) +@providers_configuration_loaded def drop_archived(args): """Drops archived tables from metadata database.""" drop_archived_tables( diff --git a/airflow/cli/commands/info_command.py b/airflow/cli/commands/info_command.py index 7261dfc484156..2e60d80b271d8 100644 --- a/airflow/cli/commands/info_command.py +++ b/airflow/cli/commands/info_command.py @@ -35,6 +35,7 @@ from airflow.typing_compat import Protocol from airflow.utils.cli import suppress_logs_and_warning from airflow.utils.platform import getuser +from airflow.utils.providers_configuration_loader import providers_configuration_loaded from airflow.version import version as airflow_version log = logging.getLogger(__name__) @@ -378,6 +379,7 @@ def _send_report_to_fileio(info): @suppress_logs_and_warning +@providers_configuration_loaded def show_info(args): """Show information related to Airflow, system and other.""" # Enforce anonymization, when file_io upload is tuned on. diff --git a/airflow/cli/commands/internal_api_command.py b/airflow/cli/commands/internal_api_command.py index 3d8f205bd1e7c..72fe57c206a0e 100644 --- a/airflow/cli/commands/internal_api_command.py +++ b/airflow/cli/commands/internal_api_command.py @@ -47,6 +47,7 @@ from airflow.utils import cli as cli_utils from airflow.utils.cli import setup_locations, setup_logging from airflow.utils.process_utils import check_if_pidfile_process_is_running +from airflow.utils.providers_configuration_loader import providers_configuration_loaded from airflow.www.extensions.init_dagbag import init_dagbag from airflow.www.extensions.init_jinja_globals import init_jinja_globals from airflow.www.extensions.init_manifest_files import configure_manifest_files @@ -58,6 +59,7 @@ @cli_utils.action_cli +@providers_configuration_loaded def internal_api(args): """Starts Airflow Internal API.""" print(settings.HEADER) diff --git a/airflow/cli/commands/jobs_command.py b/airflow/cli/commands/jobs_command.py index bcdd6df475d6d..b6509ea6424e9 100644 --- a/airflow/cli/commands/jobs_command.py +++ b/airflow/cli/commands/jobs_command.py @@ -21,10 +21,12 @@ from airflow.jobs.job import Job from airflow.utils.net import get_hostname +from airflow.utils.providers_configuration_loader import providers_configuration_loaded from airflow.utils.session import NEW_SESSION, provide_session from airflow.utils.state import JobState +@providers_configuration_loaded @provide_session def check(args, session: Session = NEW_SESSION) -> None: """Checks if job(s) are still alive.""" diff --git a/airflow/cli/commands/kerberos_command.py b/airflow/cli/commands/kerberos_command.py index 4bbe3f6df919d..4dd63d52ebdd5 100644 --- a/airflow/cli/commands/kerberos_command.py +++ b/airflow/cli/commands/kerberos_command.py @@ -24,9 +24,11 @@ from airflow.security import kerberos as krb from airflow.utils import cli as cli_utils from airflow.utils.cli import setup_locations +from airflow.utils.providers_configuration_loader import providers_configuration_loaded @cli_utils.action_cli +@providers_configuration_loaded def kerberos(args): """Start a kerberos ticket renewer.""" print(settings.HEADER) diff --git a/airflow/cli/commands/kubernetes_command.py b/airflow/cli/commands/kubernetes_command.py index c367d4be87b6d..1555f7be9258d 100644 --- a/airflow/cli/commands/kubernetes_command.py +++ b/airflow/cli/commands/kubernetes_command.py @@ -33,9 +33,11 @@ from airflow.models import DagRun, TaskInstance from airflow.utils import cli as cli_utils, yaml from airflow.utils.cli import get_dag +from airflow.utils.providers_configuration_loader import providers_configuration_loaded @cli_utils.action_cli +@providers_configuration_loaded def generate_pod_yaml(args): """Generates yaml files for each task in the DAG. Used for testing output of KubernetesExecutor.""" execution_date = args.execution_date @@ -71,6 +73,7 @@ def generate_pod_yaml(args): @cli_utils.action_cli +@providers_configuration_loaded def cleanup_pods(args): """Clean up k8s pods in evicted/failed/succeeded/pending states.""" namespace = args.namespace diff --git a/airflow/cli/commands/plugins_command.py b/airflow/cli/commands/plugins_command.py index 50ee583099110..29dd75674afe0 100644 --- a/airflow/cli/commands/plugins_command.py +++ b/airflow/cli/commands/plugins_command.py @@ -23,6 +23,7 @@ from airflow.cli.simple_table import AirflowConsole from airflow.plugins_manager import PluginsDirectorySource, get_plugin_info from airflow.utils.cli import suppress_logs_and_warning +from airflow.utils.providers_configuration_loader import providers_configuration_loaded def _get_name(class_like_object) -> str: @@ -39,6 +40,7 @@ def _join_plugins_names(value: list[Any] | Any) -> str: @suppress_logs_and_warning +@providers_configuration_loaded def dump_plugins(args): """Dump plugins information.""" plugins_info: list[dict[str, str]] = get_plugin_info() diff --git a/airflow/cli/commands/pool_command.py b/airflow/cli/commands/pool_command.py index aa56ba8fea7de..8d9e206f1bb2d 100644 --- a/airflow/cli/commands/pool_command.py +++ b/airflow/cli/commands/pool_command.py @@ -27,6 +27,7 @@ from airflow.exceptions import PoolNotFound from airflow.utils import cli as cli_utils from airflow.utils.cli import suppress_logs_and_warning +from airflow.utils.providers_configuration_loader import providers_configuration_loaded def _show_pools(pools, output): @@ -42,6 +43,7 @@ def _show_pools(pools, output): @suppress_logs_and_warning +@providers_configuration_loaded def pool_list(args): """Displays info of all the pools.""" api_client = get_current_api_client() @@ -50,6 +52,7 @@ def pool_list(args): @suppress_logs_and_warning +@providers_configuration_loaded def pool_get(args): """Displays pool info by a given name.""" api_client = get_current_api_client() @@ -62,6 +65,7 @@ def pool_get(args): @cli_utils.action_cli @suppress_logs_and_warning +@providers_configuration_loaded def pool_set(args): """Creates new pool with a given name and slots.""" api_client = get_current_api_client() @@ -71,6 +75,7 @@ def pool_set(args): @cli_utils.action_cli @suppress_logs_and_warning +@providers_configuration_loaded def pool_delete(args): """Deletes pool by a given name.""" api_client = get_current_api_client() @@ -83,6 +88,7 @@ def pool_delete(args): @cli_utils.action_cli @suppress_logs_and_warning +@providers_configuration_loaded def pool_import(args): """Imports pools from the file.""" if not os.path.exists(args.file): @@ -93,6 +99,7 @@ def pool_import(args): print(f"Uploaded {len(pools)} pool(s)") +@providers_configuration_loaded def pool_export(args): """Exports all the pools to the file.""" pools = pool_export_helper(args.file) diff --git a/airflow/cli/commands/provider_command.py b/airflow/cli/commands/provider_command.py index adcb1d18fa14b..a55032d9f210e 100644 --- a/airflow/cli/commands/provider_command.py +++ b/airflow/cli/commands/provider_command.py @@ -24,6 +24,7 @@ from airflow.cli.simple_table import AirflowConsole from airflow.providers_manager import ProvidersManager from airflow.utils.cli import suppress_logs_and_warning +from airflow.utils.providers_configuration_loader import providers_configuration_loaded ERROR_IMPORTING_HOOK = "Error when importing hook!" @@ -33,6 +34,7 @@ def _remove_rst_syntax(value: str) -> str: @suppress_logs_and_warning +@providers_configuration_loaded def provider_get(args): """Get a provider info.""" providers = ProvidersManager().providers @@ -54,6 +56,7 @@ def provider_get(args): @suppress_logs_and_warning +@providers_configuration_loaded def providers_list(args): """Lists all providers at the command line.""" AirflowConsole().print_as( @@ -68,6 +71,7 @@ def providers_list(args): @suppress_logs_and_warning +@providers_configuration_loaded def hooks_list(args): """Lists all hooks at the command line.""" AirflowConsole().print_as( @@ -84,6 +88,7 @@ def hooks_list(args): @suppress_logs_and_warning +@providers_configuration_loaded def triggers_list(args): AirflowConsole().print_as( data=ProvidersManager().trigger, @@ -97,6 +102,7 @@ def triggers_list(args): @suppress_logs_and_warning +@providers_configuration_loaded def connection_form_widget_list(args): """Lists all custom connection form fields at the command line.""" AirflowConsole().print_as( @@ -112,6 +118,7 @@ def connection_form_widget_list(args): @suppress_logs_and_warning +@providers_configuration_loaded def connection_field_behaviours(args): """Lists field behaviours.""" AirflowConsole().print_as( @@ -124,6 +131,7 @@ def connection_field_behaviours(args): @suppress_logs_and_warning +@providers_configuration_loaded def extra_links_list(args): """Lists all extra links at the command line.""" AirflowConsole().print_as( @@ -136,6 +144,7 @@ def extra_links_list(args): @suppress_logs_and_warning +@providers_configuration_loaded def logging_list(args): """Lists all log task handlers at the command line.""" AirflowConsole().print_as( @@ -148,6 +157,7 @@ def logging_list(args): @suppress_logs_and_warning +@providers_configuration_loaded def secrets_backends_list(args): """Lists all secrets backends at the command line.""" AirflowConsole().print_as( @@ -160,6 +170,7 @@ def secrets_backends_list(args): @suppress_logs_and_warning +@providers_configuration_loaded def auth_backend_list(args): """Lists all API auth backend modules at the command line.""" AirflowConsole().print_as( @@ -172,6 +183,7 @@ def auth_backend_list(args): @suppress_logs_and_warning +@providers_configuration_loaded def executors_list(args): """Lists all executors at the command line.""" AirflowConsole().print_as( @@ -184,6 +196,7 @@ def executors_list(args): @suppress_logs_and_warning +@providers_configuration_loaded def config_list(args): """Lists all configurations at the command line.""" AirflowConsole().print_as( @@ -196,8 +209,8 @@ def config_list(args): @suppress_logs_and_warning -def status(args): - """Informs if providers manager has been initialized. +def lazy_loaded(args): + """Informs if providers manager has been initialized too early. If provider is initialized, shows the stack trace and exit with error code 1. """ diff --git a/airflow/cli/commands/role_command.py b/airflow/cli/commands/role_command.py index 91fa267429414..db11d69dd2f83 100644 --- a/airflow/cli/commands/role_command.py +++ b/airflow/cli/commands/role_command.py @@ -26,11 +26,13 @@ from airflow.cli.simple_table import AirflowConsole from airflow.utils import cli as cli_utils from airflow.utils.cli import suppress_logs_and_warning +from airflow.utils.providers_configuration_loader import providers_configuration_loaded from airflow.www.fab_security.sqla.models import Action, Permission, Resource, Role from airflow.www.security import EXISTING_ROLES @suppress_logs_and_warning +@providers_configuration_loaded def roles_list(args): """Lists all existing roles.""" from airflow.utils.cli_app_builder import get_application_builder @@ -58,6 +60,7 @@ def roles_list(args): @cli_utils.action_cli @suppress_logs_and_warning +@providers_configuration_loaded def roles_create(args): """Creates new empty role in DB.""" from airflow.utils.cli_app_builder import get_application_builder @@ -70,6 +73,7 @@ def roles_create(args): @cli_utils.action_cli @suppress_logs_and_warning +@providers_configuration_loaded def roles_delete(args): """Deletes role in DB.""" from airflow.utils.cli_app_builder import get_application_builder @@ -138,6 +142,7 @@ def __roles_add_or_remove_permissions(args): @cli_utils.action_cli @suppress_logs_and_warning +@providers_configuration_loaded def roles_add_perms(args): """Adds permissions to role in DB.""" __roles_add_or_remove_permissions(args) @@ -145,12 +150,14 @@ def roles_add_perms(args): @cli_utils.action_cli @suppress_logs_and_warning +@providers_configuration_loaded def roles_del_perms(args): """Deletes permissions from role in DB.""" __roles_add_or_remove_permissions(args) @suppress_logs_and_warning +@providers_configuration_loaded def roles_export(args): """ Exports all the roles from the database to a file. diff --git a/airflow/cli/commands/rotate_fernet_key_command.py b/airflow/cli/commands/rotate_fernet_key_command.py index e9973978e0a1b..1a47a29e3149a 100644 --- a/airflow/cli/commands/rotate_fernet_key_command.py +++ b/airflow/cli/commands/rotate_fernet_key_command.py @@ -21,10 +21,12 @@ from airflow.models import Connection, Variable from airflow.utils import cli as cli_utils +from airflow.utils.providers_configuration_loader import providers_configuration_loaded from airflow.utils.session import create_session @cli_utils.action_cli +@providers_configuration_loaded def rotate_fernet_key(args): """Rotates all encrypted connection credentials and variables.""" with create_session() as session: diff --git a/airflow/cli/commands/scheduler_command.py b/airflow/cli/commands/scheduler_command.py index 22f9a758088ad..808645282cacf 100644 --- a/airflow/cli/commands/scheduler_command.py +++ b/airflow/cli/commands/scheduler_command.py @@ -32,6 +32,7 @@ from airflow.jobs.scheduler_job_runner import SchedulerJobRunner from airflow.utils import cli as cli_utils from airflow.utils.cli import process_subdir, setup_locations, setup_logging, sigint_handler, sigquit_handler +from airflow.utils.providers_configuration_loader import providers_configuration_loaded from airflow.utils.scheduler_health import serve_health_check @@ -43,6 +44,7 @@ def _run_scheduler_job(job_runner: SchedulerJobRunner, *, skip_serve_logs: bool) @cli_utils.action_cli +@providers_configuration_loaded def scheduler(args): """Starts Airflow Scheduler.""" print(settings.HEADER) diff --git a/airflow/cli/commands/standalone_command.py b/airflow/cli/commands/standalone_command.py index 9bdd49a82a68c..68abfdd0002b3 100644 --- a/airflow/cli/commands/standalone_command.py +++ b/airflow/cli/commands/standalone_command.py @@ -35,6 +35,7 @@ from airflow.jobs.scheduler_job_runner import SchedulerJobRunner from airflow.jobs.triggerer_job_runner import TriggererJobRunner from airflow.utils import db +from airflow.utils.providers_configuration_loader import providers_configuration_loaded class StandaloneCommand: @@ -56,6 +57,7 @@ def __init__(self): self.ready_time = None self.ready_delay = 3 + @providers_configuration_loaded def run(self): """Main run loop.""" self.print_output("standalone", "Starting Airflow Standalone") diff --git a/airflow/cli/commands/sync_perm_command.py b/airflow/cli/commands/sync_perm_command.py index 7ae340696788c..ab458b2d93b41 100644 --- a/airflow/cli/commands/sync_perm_command.py +++ b/airflow/cli/commands/sync_perm_command.py @@ -19,9 +19,11 @@ from __future__ import annotations from airflow.utils import cli as cli_utils +from airflow.utils.providers_configuration_loader import providers_configuration_loaded @cli_utils.action_cli +@providers_configuration_loaded def sync_perm(args): """Updates permissions for existing roles and DAGs.""" from airflow.utils.cli_app_builder import get_application_builder diff --git a/airflow/cli/commands/task_command.py b/airflow/cli/commands/task_command.py index 796d3a8cf1204..da47e8dd8b375 100644 --- a/airflow/cli/commands/task_command.py +++ b/airflow/cli/commands/task_command.py @@ -65,6 +65,7 @@ from airflow.utils.log.logging_mixin import StreamLogWriter from airflow.utils.log.secrets_masker import RedactedIO from airflow.utils.net import get_hostname +from airflow.utils.providers_configuration_loader import providers_configuration_loaded from airflow.utils.session import NEW_SESSION, create_session, provide_session from airflow.utils.state import DagRunState @@ -438,6 +439,7 @@ def task_run(args, dag: DAG | None = None) -> TaskReturnCode | None: @cli_utils.action_cli(check_db=False) +@providers_configuration_loaded def task_failed_deps(args) -> None: """ Get task instance dependencies that were not met. @@ -468,6 +470,7 @@ def task_failed_deps(args) -> None: @cli_utils.action_cli(check_db=False) @suppress_logs_and_warning +@providers_configuration_loaded def task_state(args) -> None: """ Returns the state of a TaskInstance at the command line. @@ -483,6 +486,7 @@ def task_state(args) -> None: @cli_utils.action_cli(check_db=False) @suppress_logs_and_warning +@providers_configuration_loaded def task_list(args, dag: DAG | None = None) -> None: """Lists the tasks within a DAG at the command line.""" dag = dag or get_dag(args.subdir, args.dag_id) @@ -530,6 +534,7 @@ def _guess_debugger() -> _SupportedDebugger: @cli_utils.action_cli(check_db=False) @suppress_logs_and_warning +@providers_configuration_loaded @provide_session def task_states_for_dag_run(args, session: Session = NEW_SESSION) -> None: """Get the status of all task instances in a DagRun.""" @@ -631,6 +636,7 @@ def task_test(args, dag: DAG | None = None) -> None: @cli_utils.action_cli(check_db=False) @suppress_logs_and_warning +@providers_configuration_loaded def task_render(args, dag: DAG | None = None) -> None: """Renders and displays templated fields for a given task.""" if not dag: @@ -653,6 +659,7 @@ def task_render(args, dag: DAG | None = None) -> None: @cli_utils.action_cli(check_db=False) +@providers_configuration_loaded def task_clear(args) -> None: """Clears all task instances or only those matched by regex for a DAG(s).""" logging.basicConfig(level=settings.LOGGING_LEVEL, format=settings.SIMPLE_LOG_FORMAT) diff --git a/airflow/cli/commands/triggerer_command.py b/airflow/cli/commands/triggerer_command.py index aa06d641c7601..c7d0827bd8e58 100644 --- a/airflow/cli/commands/triggerer_command.py +++ b/airflow/cli/commands/triggerer_command.py @@ -32,6 +32,7 @@ from airflow.jobs.triggerer_job_runner import TriggererJobRunner from airflow.utils import cli as cli_utils from airflow.utils.cli import setup_locations, setup_logging, sigint_handler, sigquit_handler +from airflow.utils.providers_configuration_loader import providers_configuration_loaded from airflow.utils.serve_logs import serve_logs @@ -51,6 +52,7 @@ def _serve_logs(skip_serve_logs: bool = False) -> Generator[None, None, None]: @cli_utils.action_cli +@providers_configuration_loaded def triggerer(args): """Starts Airflow Triggerer.""" settings.MASK_SECRETS_IN_LOGS = True diff --git a/airflow/cli/commands/user_command.py b/airflow/cli/commands/user_command.py index a5a5be9787565..1553d27a01566 100644 --- a/airflow/cli/commands/user_command.py +++ b/airflow/cli/commands/user_command.py @@ -32,6 +32,7 @@ from airflow.cli.simple_table import AirflowConsole from airflow.utils import cli as cli_utils from airflow.utils.cli import suppress_logs_and_warning +from airflow.utils.providers_configuration_loader import providers_configuration_loaded class UserSchema(Schema): @@ -46,6 +47,7 @@ class UserSchema(Schema): @suppress_logs_and_warning +@providers_configuration_loaded def users_list(args): """Lists users at the command line.""" from airflow.utils.cli_app_builder import get_application_builder @@ -60,6 +62,7 @@ def users_list(args): @cli_utils.action_cli(check_db=True) +@providers_configuration_loaded def users_create(args): """Creates new user in the DB.""" from airflow.utils.cli_app_builder import get_application_builder @@ -108,6 +111,7 @@ def _find_user(args): @cli_utils.action_cli +@providers_configuration_loaded def users_delete(args): """Deletes user from DB.""" user = _find_user(args) @@ -125,6 +129,7 @@ def users_delete(args): @cli_utils.action_cli +@providers_configuration_loaded def users_manage_role(args, remove=False): """Deletes or appends user roles.""" user = _find_user(args) @@ -153,6 +158,7 @@ def users_manage_role(args, remove=False): print(f'User "{user.username}" added to role "{args.role}"') +@providers_configuration_loaded def users_export(args): """Exports all users to the json file.""" from airflow.utils.cli_app_builder import get_application_builder @@ -182,6 +188,7 @@ def remove_underscores(s): @cli_utils.action_cli +@providers_configuration_loaded def users_import(args): """Imports users from the json file.""" json_file = getattr(args, "import") diff --git a/airflow/cli/commands/variable_command.py b/airflow/cli/commands/variable_command.py index 32f6b0c1987c4..34b46530d5620 100644 --- a/airflow/cli/commands/variable_command.py +++ b/airflow/cli/commands/variable_command.py @@ -28,10 +28,12 @@ from airflow.models import Variable from airflow.utils import cli as cli_utils from airflow.utils.cli import suppress_logs_and_warning +from airflow.utils.providers_configuration_loader import providers_configuration_loaded from airflow.utils.session import create_session @suppress_logs_and_warning +@providers_configuration_loaded def variables_list(args): """Displays all the variables.""" with create_session() as session: @@ -40,6 +42,7 @@ def variables_list(args): @suppress_logs_and_warning +@providers_configuration_loaded def variables_get(args): """Displays variable by a given name.""" try: @@ -54,6 +57,7 @@ def variables_get(args): @cli_utils.action_cli +@providers_configuration_loaded def variables_set(args): """Creates new variable with a given name and value.""" Variable.set(args.key, args.value, serialize_json=args.json) @@ -61,6 +65,7 @@ def variables_set(args): @cli_utils.action_cli +@providers_configuration_loaded def variables_delete(args): """Deletes variable by a given name.""" Variable.delete(args.key) @@ -68,6 +73,7 @@ def variables_delete(args): @cli_utils.action_cli +@providers_configuration_loaded def variables_import(args): """Imports variables from a given file.""" if os.path.exists(args.file): @@ -76,6 +82,7 @@ def variables_import(args): raise SystemExit("Missing variables file.") +@providers_configuration_loaded def variables_export(args): """Exports all the variables to the file.""" _variable_export_helper(args.file) diff --git a/airflow/cli/commands/webserver_command.py b/airflow/cli/commands/webserver_command.py index f5d44e505869a..5399f8ba64045 100644 --- a/airflow/cli/commands/webserver_command.py +++ b/airflow/cli/commands/webserver_command.py @@ -42,6 +42,7 @@ from airflow.utils.hashlib_wrapper import md5 from airflow.utils.log.logging_mixin import LoggingMixin from airflow.utils.process_utils import check_if_pidfile_process_is_running +from airflow.utils.providers_configuration_loader import providers_configuration_loaded log = logging.getLogger(__name__) @@ -320,6 +321,7 @@ def _check_workers(self) -> None: @cli_utils.action_cli +@providers_configuration_loaded def webserver(args): """Starts Airflow Webserver.""" print(settings.HEADER) diff --git a/airflow/providers/celery/executors/celery_executor_utils.py b/airflow/providers/celery/executors/celery_executor_utils.py index 1c02dbb57ba61..e970174a658c9 100644 --- a/airflow/providers/celery/executors/celery_executor_utils.py +++ b/airflow/providers/celery/executors/celery_executor_utils.py @@ -57,18 +57,36 @@ TaskInstanceInCelery = Tuple[TaskInstanceKey, CommandType, Optional[str], Task] +# IMPORTANT NOTE! Celery Executor has initialization done dynamically and it performs initialization when +# it is imported, so we need fallbacks here in order to be able to import the class directly without +# having configuration initialized before. Do not remove those fallbacks! +# +# This is not strictly needed for production: +# +# * for Airflow 2.6 and before the defaults will come from the core defaults +# * for Airflow 2.7+ the defaults will be loaded via ProvidersManager +# +# But it helps in our tests to import the executor class and validate if the celery code can be imported +# in the current and older versions of Airflow. + OPERATION_TIMEOUT = conf.getfloat("celery", "operation_timeout", fallback=1.0) # Make it constant for unit test. CELERY_FETCH_ERR_MSG_HEADER = "Error fetching Celery task state" if conf.has_option("celery", "celery_config_options"): - celery_configuration = conf.getimport("celery", "celery_config_options") + celery_configuration = conf.getimport( + "celery", + "celery_config_options", + fallback="airflow.providers.celery.executors.default_celery.DEFAULT_CELERY_CONFIG", + ) else: celery_configuration = DEFAULT_CELERY_CONFIG -celery_app_name = conf.get("celery", "CELERY_APP_NAME") +celery_app_name = conf.get( + "celery", "CELERY_APP_NAME", fallback="airflow.providers.celery.executors.celery_executor" +) if celery_app_name == "airflow.executors.celery_executor": warnings.warn( "The celery.CELERY_APP_NAME configuration uses deprecated package name: " diff --git a/airflow/providers/celery/executors/celery_kubernetes_executor.py b/airflow/providers/celery/executors/celery_kubernetes_executor.py index 72c037b684ea4..d79b3cd1db67e 100644 --- a/airflow/providers/celery/executors/celery_kubernetes_executor.py +++ b/airflow/providers/celery/executors/celery_kubernetes_executor.py @@ -26,6 +26,7 @@ from airflow.executors.kubernetes_executor import KubernetesExecutor from airflow.providers.celery.executors.celery_executor import CeleryExecutor from airflow.utils.log.logging_mixin import LoggingMixin +from airflow.utils.providers_configuration_loader import providers_configuration_loaded if TYPE_CHECKING: from airflow.executors.base_executor import CommandType, EventBufferValueType, QueuedTaskInstanceType @@ -57,12 +58,8 @@ class CeleryKubernetesExecutor(LoggingMixin): callback_sink: BaseCallbackSink | None = None @cached_property + @providers_configuration_loaded def kubernetes_queue(self) -> str: - # lazily retrieve the value of kubernetes_queue from the configuration - # because it might need providers - from airflow.providers_manager import ProvidersManager - - ProvidersManager().initialize_providers_configuration() return conf.get("celery_kubernetes_executor", "kubernetes_queue") def __init__(self, celery_executor: CeleryExecutor, kubernetes_executor: KubernetesExecutor): diff --git a/airflow/providers/celery/executors/default_celery.py b/airflow/providers/celery/executors/default_celery.py index f5c23cbb81a3b..6af4c1cd201c0 100644 --- a/airflow/providers/celery/executors/default_celery.py +++ b/airflow/providers/celery/executors/default_celery.py @@ -25,7 +25,6 @@ from airflow.configuration import conf from airflow.exceptions import AirflowConfigException, AirflowException -from airflow.providers_manager import ProvidersManager def _broker_supports_visibility_timeout(url): @@ -34,8 +33,19 @@ def _broker_supports_visibility_timeout(url): log = logging.getLogger(__name__) -ProvidersManager().initialize_providers_configuration() -broker_url = conf.get("celery", "BROKER_URL") +# IMPORTANT NOTE! Celery Executor has initialization done dynamically and it performs initialization when +# it is imported, so we need fallbacks here in order to be able to import the class directly without +# having configuration initialized before. Do not remove those fallbacks! +# +# This is not strictly needed for production: +# +# * for Airflow 2.6 and before the defaults will come from the core defaults +# * for Airflow 2.7+ the defaults will be loaded via ProvidersManager +# +# But it helps in our tests to import the executor class and validate if the celery code can be imported +# in the current and older versions of Airflow. + +broker_url = conf.get("celery", "BROKER_URL", fallback="redis://redis:6379/0") broker_transport_options = conf.getsection("celery_broker_transport_options") or {} if "visibility_timeout" not in broker_transport_options: @@ -61,19 +71,19 @@ def _broker_supports_visibility_timeout(url): DEFAULT_CELERY_CONFIG = { "accept_content": ["json"], "event_serializer": "json", - "worker_prefetch_multiplier": conf.getint("celery", "worker_prefetch_multiplier"), + "worker_prefetch_multiplier": conf.getint("celery", "worker_prefetch_multiplier", fallback=1), "task_acks_late": True, "task_default_queue": conf.get("operators", "DEFAULT_QUEUE"), "task_default_exchange": conf.get("operators", "DEFAULT_QUEUE"), - "task_track_started": conf.getboolean("celery", "task_track_started"), + "task_track_started": conf.getboolean("celery", "task_track_started", fallback=True), "broker_url": broker_url, "broker_transport_options": broker_transport_options_for_celery, "result_backend": result_backend, "database_engine_options": conf.getjson( "celery", "result_backend_sqlalchemy_engine_options", fallback={} ), - "worker_concurrency": conf.getint("celery", "WORKER_CONCURRENCY"), - "worker_enable_remote_control": conf.getboolean("celery", "worker_enable_remote_control"), + "worker_concurrency": conf.getint("celery", "WORKER_CONCURRENCY", fallback=16), + "worker_enable_remote_control": conf.getboolean("celery", "worker_enable_remote_control", fallback=True), } diff --git a/airflow/utils/providers_configuration_loader.py b/airflow/utils/providers_configuration_loader.py new file mode 100644 index 0000000000000..df7f9d31755f1 --- /dev/null +++ b/airflow/utils/providers_configuration_loader.py @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from functools import wraps +from typing import Callable, TypeVar, cast + +T = TypeVar("T", bound=Callable) + + +def providers_configuration_loaded(func: T) -> T: + """ + Decorator that makes sure that providers configuration is loaded before actually calling + the decorated function. + + ProvidersManager initialization of configuration is relatively inexpensive - it walks through + all providers's entrypoints, retrieve the provider_info and loads config yaml parts of the get_info. + Unlike initialization of hooks and operators it does not import any of the provider's code, so it can + be run quickly by all commands that need to access providers configuration. We cannot even import + ProvidersManager while importing any of the commands, so we need to locally import it here. + + We cannot initialize the configuration in settings/conf because of the way how conf/settings are used + internally - they are loaded while importing airflow, and we need to access airflow version conf in the + ProvidesManager initialization, so instead we opt for decorating all the methods that need it with this + decorator. + + The decorator should be placed below @suppress_logs_and_warning but above @provide_session in order to + avoid spoiling the output of formatted options with some warnings ar infos, and to be prepared that + session creation might need some configuration defaults from the providers configuration. + + :param func: function to makes sure that providers configuration is loaded before actually calling + """ + + @wraps(func) + def wrapped_function(*args, **kwargs): + from airflow.providers_manager import ProvidersManager + + ProvidersManager().initialize_providers_configuration() + return func(*args, **kwargs) + + return cast(T, wrapped_function) diff --git a/tests/cli/test_cli_parser.py b/tests/cli/test_cli_parser.py index e770e19416eb4..6089961de94be 100644 --- a/tests/cli/test_cli_parser.py +++ b/tests/cli/test_cli_parser.py @@ -324,7 +324,7 @@ def test_cli_parsing_does_not_initialize_providers_manager(self): CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True) CONFIG_FILE.touch(exist_ok=True) result = subprocess.run( - [sys.executable, "-m", "airflow", "providers", "status"], + [sys.executable, "-m", "airflow", "providers", "lazy-loaded"], env={"PYTHONPATH": os.pathsep.join(sys.path)}, check=False, text=True, @@ -339,7 +339,7 @@ def test_airflow_config_contains_providers(self): """ CONFIG_FILE.unlink(missing_ok=True) result = subprocess.run( - [sys.executable, "-m", "airflow", "version"], + [sys.executable, "-m", "airflow", "config", "list"], env={"PYTHONPATH": os.pathsep.join(sys.path)}, check=False, text=True, diff --git a/tests/integration/executors/test_celery_executor.py b/tests/integration/executors/test_celery_executor.py index c4c3e5d6f04e6..9e8d365c5ce80 100644 --- a/tests/integration/executors/test_celery_executor.py +++ b/tests/integration/executors/test_celery_executor.py @@ -39,7 +39,6 @@ from airflow.models.dag import DAG from airflow.models.taskinstance import SimpleTaskInstance, TaskInstance from airflow.operators.bash import BashOperator -from airflow.providers.celery.executors import celery_executor, celery_executor_utils from airflow.utils.state import State from tests.test_utils import db @@ -61,6 +60,8 @@ def task_id(self): @contextlib.contextmanager def _prepare_app(broker_url=None, execute=None): + from airflow.providers.celery.executors import celery_executor_utils + broker_url = broker_url or conf.get("celery", "BROKER_URL") execute = execute or celery_executor_utils.execute_command.__wrapped__ @@ -106,6 +107,8 @@ def teardown_method(self) -> None: @pytest.mark.flaky(reruns=3) @pytest.mark.parametrize("broker_url", _prepare_test_bodies()) def test_celery_integration(self, broker_url): + from airflow.providers.celery.executors import celery_executor, celery_executor_utils + success_command = ["airflow", "tasks", "run", "true", "some_parameter"] fail_command = ["airflow", "version"] @@ -163,6 +166,8 @@ def fake_execute_command(command): assert executor.queued_tasks == {} def test_error_sending_task(self): + from airflow.providers.celery.executors import celery_executor + def fake_execute_command(): pass @@ -189,6 +194,7 @@ def fake_execute_command(): def test_retry_on_error_sending_task(self, caplog): """Test that Airflow retries publishing tasks to Celery Broker at least 3 times""" + from airflow.providers.celery.executors import celery_executor, celery_executor_utils with _prepare_app(), caplog.at_level(logging.INFO), mock.patch.object( # Mock `with timeout()` to _instantly_ fail. @@ -268,6 +274,8 @@ class TestBulkStateFetcher: return_value=[json.dumps({"status": "SUCCESS", "task_id": "123"})], ) def test_should_support_kv_backend(self, mock_mget, caplog): + from airflow.providers.celery.executors import celery_executor, celery_executor_utils + caplog.set_level(logging.DEBUG, logger=self.bulk_state_fetcher_logger) with _prepare_app(): mock_backend = BaseKeyValueStoreBackend(app=celery_executor.app) @@ -293,6 +301,8 @@ def test_should_support_kv_backend(self, mock_mget, caplog): @mock.patch("celery.backends.database.DatabaseBackend.ResultSession") def test_should_support_db_backend(self, mock_session, caplog): + from airflow.providers.celery.executors import celery_executor, celery_executor_utils + caplog.set_level(logging.DEBUG, logger=self.bulk_state_fetcher_logger) with _prepare_app(): mock_backend = DatabaseBackend(app=celery_executor.app, url="sqlite3://") @@ -318,6 +328,8 @@ def test_should_support_db_backend(self, mock_session, caplog): @mock.patch("celery.backends.database.DatabaseBackend.ResultSession") def test_should_retry_db_backend(self, mock_session, caplog): + from airflow.providers.celery.executors import celery_executor, celery_executor_utils + caplog.set_level(logging.DEBUG, logger=self.bulk_state_fetcher_logger) from sqlalchemy.exc import DatabaseError @@ -352,6 +364,8 @@ def test_should_retry_db_backend(self, mock_session, caplog): ] def test_should_support_base_backend(self, caplog): + from airflow.providers.celery.executors import celery_executor_utils + caplog.set_level(logging.DEBUG, logger=self.bulk_state_fetcher_logger) with _prepare_app(): mock_backend = mock.MagicMock(autospec=BaseBackend) diff --git a/tests/providers/celery/executors/test_celery_executor.py b/tests/providers/celery/executors/test_celery_executor.py index fd00d6a083575..c8ddee8c4ee87 100644 --- a/tests/providers/celery/executors/test_celery_executor.py +++ b/tests/providers/celery/executors/test_celery_executor.py @@ -37,7 +37,7 @@ from airflow.models.baseoperator import BaseOperator from airflow.models.dag import DAG from airflow.models.taskinstance import TaskInstance, TaskInstanceKey -from airflow.providers.celery.executors import celery_executor, celery_executor_utils +from airflow.providers.celery.executors import celery_executor, celery_executor_utils, default_celery from airflow.providers.celery.executors.celery_executor import CeleryExecutor from airflow.utils import timezone from airflow.utils.state import State @@ -65,6 +65,7 @@ def task_id(self): @contextlib.contextmanager def _prepare_app(broker_url=None, execute=None): broker_url = broker_url or conf.get("celery", "BROKER_URL") + execute = execute or celery_executor_utils.execute_command.__wrapped__ test_config = dict(celery_executor_utils.celery_configuration) @@ -185,6 +186,7 @@ def test_try_adopt_task_instances_none(self): key1 = TaskInstance(task=task_1, run_id=None) tis = [key1] + executor = celery_executor.CeleryExecutor() assert executor.try_adopt_task_instances(tis) == tis @@ -208,6 +210,7 @@ def test_try_adopt_task_instances(self): ti2.state = State.QUEUED tis = [ti1, ti2] + executor = celery_executor.CeleryExecutor() assert executor.running == set() assert executor.tasks == {} @@ -243,6 +246,7 @@ def test_cleanup_stuck_queued_tasks(self, mock_fail): tis = [ti] with _prepare_app() as app: app.control.revoke = mock.MagicMock() + executor = celery_executor.CeleryExecutor() executor.job_id = 1 executor.running = {ti.key} @@ -258,8 +262,6 @@ def test_cleanup_stuck_queued_tasks(self, mock_fail): def test_result_backend_sqlalchemy_engine_options(self, mock_celery): import importlib - from airflow.providers.celery.executors import celery_executor_utils, default_celery - # reload celery conf to apply the new config importlib.reload(default_celery) # reload celery_executor_utils to recreate the celery app with new config From 064c90cd453a6817ddeb0150f3bd730735be1225 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Sat, 22 Jul 2023 13:44:07 +0200 Subject: [PATCH 2/2] Extract Dask executor to daskexecutor provider --- CONTRIBUTING.rst | 18 +- Dockerfile | 2 +- IMAGES.rst | 2 +- INSTALL | 18 +- airflow/config_templates/config.yml | 33 ---- airflow/executors/__init__.py | 3 + airflow/executors/executor_loader.py | 2 +- .../CREATING_COMMUNITY_PROVIDERS.rst | 74 ++++--- airflow/providers/daskexecutor/CHANGELOG.rst | 33 ++++ airflow/providers/daskexecutor/__init__.py | 41 ++++ .../daskexecutor/executors/__init__.py | 16 ++ .../daskexecutor}/executors/dask_executor.py | 0 airflow/providers/daskexecutor/provider.yaml | 79 ++++++++ .../src/airflow_breeze/global_constants.py | 2 +- .../changelog.rst | 19 ++ .../commits.rst | 27 +++ .../configurations-ref.rst | 18 ++ .../index.rst | 86 ++++++++ .../installing-providers-from-sources.rst | 18 ++ .../security.rst | 38 ++++ .../core-concepts/executor/dask.rst | 4 +- docs/apache-airflow/extra-packages-ref.rst | 10 +- docs/docker-stack/build-arg-ref.rst | 2 +- docs/spelling_wordlist.txt | 1 + generated/provider_dependencies.json | 10 + images/breeze/output-commands-hash.txt | 20 +- images/breeze/output_build-docs.svg | 128 ++++++------ images/breeze/output_prod-image.svg | 24 +-- images/breeze/output_prod-image_build.svg | 184 +++++++++--------- images/breeze/output_release-management.svg | 171 ++++++++++++++++ ...ement_generate-issue-content-providers.svg | 72 +++---- ...agement_prepare-provider-documentation.svg | 66 +++---- ...e-management_prepare-provider-packages.svg | 40 ++-- ...output_release-management_publish-docs.svg | 110 +++++------ images/breeze/output_sbom.svg | 20 +- ...ut_sbom_generate-provider-requirements.svg | 54 ++--- scripts/ci/installed_providers.txt | 1 + .../ci/pre_commit/pre_commit_insert_extras.py | 2 +- setup.py | 14 +- tests/providers/daskexecutor/__init__.py | 16 ++ .../daskexecutor}/test_dask_executor.py | 4 +- 41 files changed, 1025 insertions(+), 457 deletions(-) create mode 100644 airflow/providers/daskexecutor/CHANGELOG.rst create mode 100644 airflow/providers/daskexecutor/__init__.py create mode 100644 airflow/providers/daskexecutor/executors/__init__.py rename airflow/{ => providers/daskexecutor}/executors/dask_executor.py (100%) create mode 100644 airflow/providers/daskexecutor/provider.yaml create mode 100644 docs/apache-airflow-providers-daskexecutor/changelog.rst create mode 100644 docs/apache-airflow-providers-daskexecutor/commits.rst create mode 100644 docs/apache-airflow-providers-daskexecutor/configurations-ref.rst create mode 100644 docs/apache-airflow-providers-daskexecutor/index.rst create mode 100644 docs/apache-airflow-providers-daskexecutor/installing-providers-from-sources.rst create mode 100644 docs/apache-airflow-providers-daskexecutor/security.rst create mode 100644 images/breeze/output_release-management.svg create mode 100644 tests/providers/daskexecutor/__init__.py rename tests/{executors => providers/daskexecutor}/test_dask_executor.py (98%) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 06b9391eb9341..ef358cbdc107c 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -671,15 +671,15 @@ aiobotocore, airbyte, alibaba, all, all_dbs, amazon, apache.atlas, apache.beam, apache.drill, apache.druid, apache.flink, apache.hdfs, apache.hive, apache.impala, apache.kafka, apache.kylin, apache.livy, apache.pig, apache.pinot, apache.spark, apache.sqoop, apache.webhdfs, apprise, arangodb, asana, async, atlas, atlassian.jira, aws, azure, cassandra, celery, cgroups, -cloudant, cncf.kubernetes, common.sql, crypto, dask, databricks, datadog, dbt.cloud, deprecated_api, -devel, devel_all, devel_ci, devel_hadoop, dingding, discord, doc, doc_gen, docker, druid, -elasticsearch, exasol, facebook, ftp, gcp, gcp_api, github, github_enterprise, google, google_auth, -grpc, hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, ldap, -leveldb, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, mysql, -neo4j, odbc, openfaas, openlineage, opsgenie, oracle, otel, pagerduty, pandas, papermill, password, -pinot, plexus, postgres, presto, qds, qubole, rabbitmq, redis, s3, salesforce, samba, segment, -sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, spark, sqlite, ssh, statsd, tableau, -tabular, telegram, trino, vertica, virtualenv, webhdfs, winrm, zendesk +cloudant, cncf.kubernetes, common.sql, crypto, dask, daskexecutor, databricks, datadog, dbt.cloud, +deprecated_api, devel, devel_all, devel_ci, devel_hadoop, dingding, discord, doc, doc_gen, docker, +druid, elasticsearch, exasol, facebook, ftp, gcp, gcp_api, github, github_enterprise, google, +google_auth, grpc, hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, +ldap, leveldb, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, +mysql, neo4j, odbc, openfaas, openlineage, opsgenie, oracle, otel, pagerduty, pandas, papermill, +password, pinot, plexus, postgres, presto, qds, qubole, rabbitmq, redis, s3, salesforce, samba, +segment, sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, spark, sqlite, ssh, statsd, +tableau, tabular, telegram, trino, vertica, virtualenv, webhdfs, winrm, zendesk .. END EXTRAS HERE Provider packages diff --git a/Dockerfile b/Dockerfile index 14f02ec90377e..ab9a37326cfe9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -35,7 +35,7 @@ # much smaller. # # Use the same builder frontend version for everyone -ARG AIRFLOW_EXTRAS="aiobotocore,amazon,async,celery,cncf.kubernetes,dask,docker,elasticsearch,ftp,google,google_auth,grpc,hashicorp,http,ldap,microsoft.azure,mysql,odbc,pandas,postgres,redis,sendgrid,sftp,slack,snowflake,ssh,statsd,virtualenv" +ARG AIRFLOW_EXTRAS="aiobotocore,amazon,async,celery,cncf.kubernetes,daskexecutor,docker,elasticsearch,ftp,google,google_auth,grpc,hashicorp,http,ldap,microsoft.azure,mysql,odbc,pandas,postgres,redis,sendgrid,sftp,slack,snowflake,ssh,statsd,virtualenv" ARG ADDITIONAL_AIRFLOW_EXTRAS="" ARG ADDITIONAL_PYTHON_DEPS="" diff --git a/IMAGES.rst b/IMAGES.rst index 2fa95b4a9c0f3..4657468125809 100644 --- a/IMAGES.rst +++ b/IMAGES.rst @@ -135,7 +135,7 @@ This will build the image using command similar to: .. code-block:: bash pip install \ - apache-airflow[async,amazon,celery,cncf.kubernetes,docker,dask,elasticsearch,ftp,grpc,hashicorp,http,ldap,google,microsoft.azure,mysql,postgres,redis,sendgrid,sftp,slack,ssh,statsd,virtualenv]==2.0.0 \ + apache-airflow[async,amazon,celery,cncf.kubernetes,docker,daskexecutor,elasticsearch,ftp,grpc,hashicorp,http,ldap,google,microsoft.azure,mysql,postgres,redis,sendgrid,sftp,slack,ssh,statsd,virtualenv]==2.0.0 \ --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.0.0/constraints-3.8.txt" .. note:: diff --git a/INSTALL b/INSTALL index c5bfc7fe55d03..b30f0dbd5f10f 100644 --- a/INSTALL +++ b/INSTALL @@ -98,15 +98,15 @@ aiobotocore, airbyte, alibaba, all, all_dbs, amazon, apache.atlas, apache.beam, apache.drill, apache.druid, apache.flink, apache.hdfs, apache.hive, apache.impala, apache.kafka, apache.kylin, apache.livy, apache.pig, apache.pinot, apache.spark, apache.sqoop, apache.webhdfs, apprise, arangodb, asana, async, atlas, atlassian.jira, aws, azure, cassandra, celery, cgroups, -cloudant, cncf.kubernetes, common.sql, crypto, dask, databricks, datadog, dbt.cloud, deprecated_api, -devel, devel_all, devel_ci, devel_hadoop, dingding, discord, doc, doc_gen, docker, druid, -elasticsearch, exasol, facebook, ftp, gcp, gcp_api, github, github_enterprise, google, google_auth, -grpc, hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, ldap, -leveldb, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, mysql, -neo4j, odbc, openfaas, openlineage, opsgenie, oracle, otel, pagerduty, pandas, papermill, password, -pinot, plexus, postgres, presto, qds, qubole, rabbitmq, redis, s3, salesforce, samba, segment, -sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, spark, sqlite, ssh, statsd, tableau, -tabular, telegram, trino, vertica, virtualenv, webhdfs, winrm, zendesk +cloudant, cncf.kubernetes, common.sql, crypto, dask, daskexecutor, databricks, datadog, dbt.cloud, +deprecated_api, devel, devel_all, devel_ci, devel_hadoop, dingding, discord, doc, doc_gen, docker, +druid, elasticsearch, exasol, facebook, ftp, gcp, gcp_api, github, github_enterprise, google, +google_auth, grpc, hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, +ldap, leveldb, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, +mysql, neo4j, odbc, openfaas, openlineage, opsgenie, oracle, otel, pagerduty, pandas, papermill, +password, pinot, plexus, postgres, presto, qds, qubole, rabbitmq, redis, s3, salesforce, samba, +segment, sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, spark, sqlite, ssh, statsd, +tableau, tabular, telegram, trino, vertica, virtualenv, webhdfs, winrm, zendesk # END EXTRAS HERE # For installing Airflow in development environments - see CONTRIBUTING.rst diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index 4cef07dee1de7..d3f2156bd86e8 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -2010,39 +2010,6 @@ local_kubernetes_executor: type: string example: ~ default: "kubernetes" -dask: - description: | - This section only applies if you are using the DaskExecutor in - [core] section above - options: - cluster_address: - description: | - The IP address and port of the Dask cluster's scheduler. - version_added: ~ - type: string - example: ~ - default: "127.0.0.1:8786" - tls_ca: - description: | - Path to a CA certificate file encoded in PEM format to access a secured Dask scheduler. - version_added: ~ - type: string - example: ~ - default: "" - tls_cert: - description: | - Path to a certificate file for the client, encoded in PEM format. - version_added: ~ - type: string - example: ~ - default: "" - tls_key: - description: | - Path to a key file for the client, encoded in PEM format. - version_added: ~ - type: string - example: ~ - default: "" scheduler: description: ~ options: diff --git a/airflow/executors/__init__.py b/airflow/executors/__init__.py index 2c342d45f16d9..ceeec523397b5 100644 --- a/airflow/executors/__init__.py +++ b/airflow/executors/__init__.py @@ -28,6 +28,9 @@ "CeleryKubernetesExecutor": "airflow.providers.celery.executors." "celery_kubernetes_executor.CeleryKubernetesExecutor", }, + "dask_executor": { + "DaskExecutor": "airflow.providers.daskexecutor.executors.dask_executor.DaskExecutor", + }, } add_deprecated_classes(__deprecated_classes, __name__) diff --git a/airflow/executors/executor_loader.py b/airflow/executors/executor_loader.py index 816e172140aee..3553a71183609 100644 --- a/airflow/executors/executor_loader.py +++ b/airflow/executors/executor_loader.py @@ -63,7 +63,7 @@ class ExecutorLoader: CELERY_EXECUTOR: "airflow.providers.celery.executors.celery_executor.CeleryExecutor", CELERY_KUBERNETES_EXECUTOR: "airflow.providers.celery." "executors.celery_kubernetes_executor.CeleryKubernetesExecutor", - DASK_EXECUTOR: "airflow.executors.dask_executor.DaskExecutor", + DASK_EXECUTOR: "airflow.providers.daskexecutor.executors.dask_executor.DaskExecutor", KUBERNETES_EXECUTOR: "airflow.executors.kubernetes_executor.KubernetesExecutor", DEBUG_EXECUTOR: "airflow.executors.debug_executor.DebugExecutor", } diff --git a/airflow/providers/CREATING_COMMUNITY_PROVIDERS.rst b/airflow/providers/CREATING_COMMUNITY_PROVIDERS.rst index be249b53bb046..d4e14e138cf66 100644 --- a/airflow/providers/CREATING_COMMUNITY_PROVIDERS.rst +++ b/airflow/providers/CREATING_COMMUNITY_PROVIDERS.rst @@ -55,6 +55,9 @@ the provider may need. Understand that not all providers will need all the compo If you still have doubts about building your provider, we recommend that you read the initial provider guide and open a issue on GitHub so the community can help you. +The folders are optional: example_dags, hooks, links, logs, notifications, operators, secrets, sensors, transfers, +triggers, waiters (and the list changes continuously). + .. code-block:: bash airflow/ @@ -63,20 +66,27 @@ open a issue on GitHub so the community can help you. │ ├── example_dags/ │ │ ├── __init__.py │ │ └── example_.py + │ ├── executors/ + │ │ ├── __init__.py + │ │ └── .py │ ├── hooks/ │ │ ├── __init__.py │ │ └── .py │ ├── operators/ │ │ ├── __init__.py │ │ └── .py - │ ├── sensors/ + .... + │ ├── transfers/ │ │ ├── __init__.py │ │ └── .py - │ └── transfers/ + │ └── triggers/ │ ├── __init__.py │ └── .py └── tests/providers// ├── __init__.py + ├── executors/ + │ ├── __init__.py + │ └── test_.py ├── hooks/ │ ├── __init__.py │ └── test_.py @@ -84,10 +94,11 @@ open a issue on GitHub so the community can help you. │ ├── __init__.py │ ├── test_.py │ └── test__system.py - ├── sensors/ + ... + ├── transfers/ │ ├── __init__.py │ └── test_.py - └── transfers/ + └── triggers/ ├── __init__.py └── test_.py @@ -113,26 +124,27 @@ Some steps for documentation occurs automatically by ``pre-commit`` see `Install .. code-block:: bash - airflow/ - ├── INSTALL - ├── CONTRIBUTING.rst - ├── setup.py - ├── docs/ - │ ├── spelling_wordlist.txt - │ ├── apache-airflow/ - │ │ └── extra-packages-ref.rst - │ ├── integration-logos// - │ │ └── .png - │ └── apache-airflow-providers-/ - │ ├── index.rst - │ ├── commits.rst - │ ├── connections.rst - │ └── operators/ - │ └── .rst - └── providers/ - └── / - ├── provider.yaml - └── CHANGELOG.rst + ├── INSTALL + ├── CONTRIBUTING.rst + ├── setup.py + ├── airflow/ + │ └── providers/ + │ └── / + │ ├── provider.yaml + │ └── CHANGELOG.rst + │ + └── docs/ + ├── spelling_wordlist.txt + ├── apache-airflow/ + │ └── extra-packages-ref.rst + ├── integration-logos// + │ └── .png + └── apache-airflow-providers-/ + ├── index.rst + ├── commits.rst + ├── connections.rst + └── operators/ + └── .rst Files automatically updated by pre-commit: @@ -199,9 +211,19 @@ In the ``docs/apache-airflow-providers-/operators/.r :end-before: [END howto_operator_] -In the ``docs/apache-airflow-providers-new_provider/index.rst``: +Copy from another, similar provider the docs: ``docs/apache-airflow-providers-new_provider/*.rst``: + +At least those docs should be present + +* security.rst +* changelog.rst +* commits.rst +* index.rst +* installing-providers-from-sources.rst +* configurations-ref.rst - if your provider has ``config`` element in provider.yaml with configuration options + specific for your provider -- add all information of the purpose of your provider. It is recommended to check with another provider to help you complete this document as best as possible. +Make sure to update/add all information that are specific for the new provider. In the ``airflow/providers//provider.yaml`` add information of your provider: diff --git a/airflow/providers/daskexecutor/CHANGELOG.rst b/airflow/providers/daskexecutor/CHANGELOG.rst new file mode 100644 index 0000000000000..264edd3a39665 --- /dev/null +++ b/airflow/providers/daskexecutor/CHANGELOG.rst @@ -0,0 +1,33 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +.. NOTE TO CONTRIBUTORS: + Please, only add notes to the Changelog just below the "Changelog" header when there are some breaking changes + and you want to add an explanation to the users on how they are supposed to deal with them. + The changelog is updated and maintained semi-automatically by release manager. + +``apache-airflow-providers-daskexecutor`` + + +Changelog +--------- + +1.0.0 +..... + +Initial version of the provider. diff --git a/airflow/providers/daskexecutor/__init__.py b/airflow/providers/daskexecutor/__init__.py new file mode 100644 index 0000000000000..6548d7b15d61d --- /dev/null +++ b/airflow/providers/daskexecutor/__init__.py @@ -0,0 +1,41 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE +# OVERWRITTEN WHEN PREPARING DOCUMENTATION FOR THE PACKAGES. +# +# IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE +# `PROVIDER__INIT__PY_TEMPLATE.py.jinja2` IN the `dev/provider_packages` DIRECTORY +# +from __future__ import annotations + +import packaging.version + +__all__ = ["__version__"] + +__version__ = "1.0.0" + +try: + from airflow import __version__ as airflow_version +except ImportError: + from airflow.version import version as airflow_version + +if packaging.version.parse(airflow_version) < packaging.version.parse("2.4.0"): + raise RuntimeError( + f"The package `apache-airflow-providers-daskexecutor:{__version__}` requires Apache Airflow 2.4.0+" # NOQA: E501 + ) diff --git a/airflow/providers/daskexecutor/executors/__init__.py b/airflow/providers/daskexecutor/executors/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/airflow/providers/daskexecutor/executors/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/airflow/executors/dask_executor.py b/airflow/providers/daskexecutor/executors/dask_executor.py similarity index 100% rename from airflow/executors/dask_executor.py rename to airflow/providers/daskexecutor/executors/dask_executor.py diff --git a/airflow/providers/daskexecutor/provider.yaml b/airflow/providers/daskexecutor/provider.yaml new file mode 100644 index 0000000000000..3c6466d460403 --- /dev/null +++ b/airflow/providers/daskexecutor/provider.yaml @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +--- +package-name: apache-airflow-providers-daskexecutor +name: Dask Executor +description: | + `Dask `__ + +suspended: false +versions: + - 1.0.0 + +dependencies: + - apache-airflow>=2.4.0 + # Dask support is limited, we need Dask team to upgrade support for dask if we were to continue + # Supporting it in the future + - cloudpickle>=1.4.1 + # Dask and distributed in version 2023.5.0 break our test + # See https://github.com/dask/dask/issues/10279 + - dask>=2.9.0,!=2022.10.1,!=2023.5.0 + - distributed>=2.11.1,!=2023.5.0 + +integrations: + - integration-name: Dask + external-doc-url: https://www.dask.org/ + logo: /integration-logos/dask/dask.png + tags: [service] + +executors: + - airflow.providers.daskexecutor.executors.dask_executor.DaskExecutor + +config: + dask: + description: | + This section only applies if you are using DaskExecutor. + options: + cluster_address: + description: | + The IP address and port of the Dask cluster's scheduler. + version_added: ~ + type: string + example: ~ + default: "127.0.0.1:8786" + tls_ca: + description: | + Path to a CA certificate file encoded in PEM format to access a secured Dask scheduler. + version_added: ~ + type: string + example: ~ + default: "" + tls_cert: + description: | + Path to a certificate file for the client, encoded in PEM format. + version_added: ~ + type: string + example: ~ + default: "" + tls_key: + description: | + Path to a key file for the client, encoded in PEM format. + version_added: ~ + type: string + example: ~ + default: "" diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index 18ff5d7828f60..d4fcd4cb2584c 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -341,7 +341,7 @@ def get_airflow_extras(): "async", "celery", "cncf.kubernetes", - "dask", + "daskexecutor", "docker", "elasticsearch", "ftp", diff --git a/docs/apache-airflow-providers-daskexecutor/changelog.rst b/docs/apache-airflow-providers-daskexecutor/changelog.rst new file mode 100644 index 0000000000000..a2bf898b3f6a3 --- /dev/null +++ b/docs/apache-airflow-providers-daskexecutor/changelog.rst @@ -0,0 +1,19 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. include:: ../../airflow/providers/daskexecutor/CHANGELOG.rst diff --git a/docs/apache-airflow-providers-daskexecutor/commits.rst b/docs/apache-airflow-providers-daskexecutor/commits.rst new file mode 100644 index 0000000000000..1b8a85877d0cf --- /dev/null +++ b/docs/apache-airflow-providers-daskexecutor/commits.rst @@ -0,0 +1,27 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +Package apache-airflow-providers-daskexecutor +------------------------------------------------------ + +`Dask `__ + + +This is detailed commit list of changes for versions provider package: ``daskexecutor``. +For high-level changelog, see :doc:`package information including changelog `. diff --git a/docs/apache-airflow-providers-daskexecutor/configurations-ref.rst b/docs/apache-airflow-providers-daskexecutor/configurations-ref.rst new file mode 100644 index 0000000000000..5885c9d91b6e8 --- /dev/null +++ b/docs/apache-airflow-providers-daskexecutor/configurations-ref.rst @@ -0,0 +1,18 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. include:: ../exts/includes/providers-configurations-ref.rst diff --git a/docs/apache-airflow-providers-daskexecutor/index.rst b/docs/apache-airflow-providers-daskexecutor/index.rst new file mode 100644 index 0000000000000..be612a6956320 --- /dev/null +++ b/docs/apache-airflow-providers-daskexecutor/index.rst @@ -0,0 +1,86 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +``apache-airflow-providers-daskexecutor`` +========================================= + + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Basics + + Home + Changelog + Security + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: References + + Configuration + Python API <_api/airflow/providers/daskexecutor/index> + PyPI Repository + Installing from sources + +.. THE REMAINDER OF THE FILE IS AUTOMATICALLY GENERATED. IT WILL BE OVERWRITTEN AT RELEASE TIME! + + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Commits + + Detailed list of commits + + +Package apache-airflow-providers-daskexecutor +------------------------------------------------------ + +`Dask `__ + + +Release: 1.0.0 + +Provider package +---------------- + +This is a provider package for ``daskexecutor`` provider. All classes for this provider package +are in ``airflow.providers.daskexecutor`` python package. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below) +for the minimum Airflow version supported) via +``pip install apache-airflow-providers-daskexecutor`` + +Requirements +------------ + +The minimum Apache Airflow version supported by this provider package is ``2.4.0``. + +================== ================================== +PIP package Version required +================== ================================== +``apache-airflow`` ``>=2.4.0`` +``cloudpickle`` ``>=1.4.1`` +``dask`` ``>=2.9.0,!=2022.10.1,!=2023.5.0`` +``distributed`` ``>=2.11.1,!=2023.5.0`` +================== ================================== diff --git a/docs/apache-airflow-providers-daskexecutor/installing-providers-from-sources.rst b/docs/apache-airflow-providers-daskexecutor/installing-providers-from-sources.rst new file mode 100644 index 0000000000000..b4e730f4ff21a --- /dev/null +++ b/docs/apache-airflow-providers-daskexecutor/installing-providers-from-sources.rst @@ -0,0 +1,18 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. include:: ../exts/includes/installing-providers-from-sources.rst diff --git a/docs/apache-airflow-providers-daskexecutor/security.rst b/docs/apache-airflow-providers-daskexecutor/security.rst new file mode 100644 index 0000000000000..66c6f79a4ecfc --- /dev/null +++ b/docs/apache-airflow-providers-daskexecutor/security.rst @@ -0,0 +1,38 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Releasing security patches +-------------------------- + +Airflow providers are released independently from Airflow itself and the information about vulnerabilities +is published separately. You can upgrade providers independently from Airflow itself, following the +instructions found in :doc:`apache-airflow:installation/installing-from-pypi`. + +When we release Provider version, the development is always done from the ``main`` branch where we prepare +the next version. The provider uses strict `SemVer `_ versioning policy. Depending on +the scope of the change, Provider will get ''MAJOR'' version upgrade when there are +breaking changes, ``MINOR`` version upgrade when there are new features or ``PATCHLEVEL`` version upgrade +when there are only bug fixes (including security bugfixes) - and this is the only version that receives +security fixes by default, so you should upgrade to latest version of the provider if you want to receive +all released security fixes. + +The only exception to that rule is when we have a critical security fix and good reason to provide an +out-of-band release for the provider, in which case stakeholders in the provider might decide to cherry-pick +and prepare a branch for an older version of the provider following the +`mixed governance model `_ +and requires interested parties to cherry-pick and test the fixes. diff --git a/docs/apache-airflow/core-concepts/executor/dask.rst b/docs/apache-airflow/core-concepts/executor/dask.rst index 7551ffaf8b38e..117d396962976 100644 --- a/docs/apache-airflow/core-concepts/executor/dask.rst +++ b/docs/apache-airflow/core-concepts/executor/dask.rst @@ -21,7 +21,7 @@ Dask Executor ============= -:class:`airflow.executors.dask_executor.DaskExecutor` allows you to run Airflow tasks in a Dask Distributed cluster. +:class:`airflow.providers.daskexecutor.executors.dask_executor.DaskExecutor` allows you to run Airflow tasks in a Dask Distributed cluster. Dask clusters can be run on a single machine or on remote networks. For complete details, consult the `Distributed documentation `_. @@ -42,7 +42,7 @@ Next start at least one Worker on any machine that can connect to the host: dask-worker $DASK_HOST:$DASK_PORT -Edit your ``airflow.cfg`` to set your executor to :class:`airflow.executors.dask_executor.DaskExecutor` and provide +Edit your ``airflow.cfg`` to set your executor to :class:`airflow.providers.daskexecutor.executors.dask_executor.DaskExecutor` and provide the Dask Scheduler address in the ``[dask]`` section. For more information on setting the configuration, see :doc:`../../howto/set-config`. diff --git a/docs/apache-airflow/extra-packages-ref.rst b/docs/apache-airflow/extra-packages-ref.rst index 027fbb657e415..ef6c4bf1b2124 100644 --- a/docs/apache-airflow/extra-packages-ref.rst +++ b/docs/apache-airflow/extra-packages-ref.rst @@ -50,7 +50,7 @@ python dependencies for the provided package. +---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+--------------+ | cncf.kubernetes | ``pip install 'apache-airflow[cncf.kubernetes]'`` | Kubernetes Executor (also installs the Kubernetes provider package) | | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+--------------+ -| dask | ``pip install 'apache-airflow[dask]'`` | DaskExecutor | | +| daskexecutor | ``pip install 'apache-airflow[daskexecutor]'`` | DaskExecutor ((also installs the Daskexecutor provider package) | | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+--------------+ | deprecated_api | ``pip install 'apache-airflow[deprecated_api]'`` | Deprecated, experimental API that is replaced with the new REST API | | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+--------------+ @@ -349,10 +349,10 @@ Those are the extras that are needed to generated documentation for Airflow. Thi +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -Deprecated 1.10 extras ----------------------- +Deprecated extras +----------------- -These are the extras that have been deprecated in 2.0 and will be removed in Airflow 3.0.0. They were +These are the extras that have been used before and deprecated in 2.0 and will be removed in Airflow 3.0.0. They were all replaced by new extras, which have naming consistent with the names of provider packages. The ``crypto`` extra is not needed any more, because all crypto dependencies are part of airflow package, @@ -371,6 +371,8 @@ so there is no replacement for ``crypto`` extra. +---------------------+-----------------------------+ | crypto | | +---------------------+-----------------------------+ +| dask | daskexecutor | ++---------------------+-----------------------------+ | druid | apache.druid | +---------------------+-----------------------------+ | gcp | google | diff --git a/docs/docker-stack/build-arg-ref.rst b/docs/docker-stack/build-arg-ref.rst index 8544beff6dad4..aa59a927014d3 100644 --- a/docs/docker-stack/build-arg-ref.rst +++ b/docs/docker-stack/build-arg-ref.rst @@ -85,7 +85,7 @@ List of default extras in the production Dockerfile: * async * celery * cncf.kubernetes -* dask +* daskexecutor * docker * elasticsearch * ftp diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 8effe2cc92ca4..6bfca3ef37d59 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -344,6 +344,7 @@ DagRunState DAGs Dask dask +daskexecutor dat Databricks databricks diff --git a/generated/provider_dependencies.json b/generated/provider_dependencies.json index 03d821eff15e9..091a45cf36ee2 100644 --- a/generated/provider_dependencies.json +++ b/generated/provider_dependencies.json @@ -277,6 +277,16 @@ ], "excluded-python-versions": [] }, + "daskexecutor": { + "deps": [ + "apache-airflow>=2.4.0", + "cloudpickle>=1.4.1", + "dask>=2.9.0,!=2022.10.1,!=2023.5.0", + "distributed>=2.11.1,!=2023.5.0" + ], + "cross-providers-deps": [], + "excluded-python-versions": [] + }, "databricks": { "deps": [ "aiohttp>=3.6.3, <4", diff --git a/images/breeze/output-commands-hash.txt b/images/breeze/output-commands-hash.txt index a678af7996c67..463c27f7e7ada 100644 --- a/images/breeze/output-commands-hash.txt +++ b/images/breeze/output-commands-hash.txt @@ -2,7 +2,7 @@ # Please do not solve it but run `breeze setup regenerate-command-images`. # This command should fix the conflict and regenerate help images that you have conflict with. main:344261ca3aa7ff31e098b1d88280566a -build-docs:927336a331afdb9f7b6797c119d9cc16 +build-docs:1baeefeaba160aef5355ffbb09212cf5 ci:fix-ownership:3e5a73533cc96045e72cb258783cfc96 ci:free-space:49af17b032039c05c41a7a8283f365cc ci:get-workflow-info:8246038093359b9c3c110043419473e2 @@ -31,28 +31,28 @@ k8s:status:1b1b6fb6ccc7a211a3eb44568da71659 k8s:tests:2319c6f5ba0681ff7627e3166a1479ef k8s:upload-k8s-image:a9ac79e2f5e3d6b01fa45e764885913f k8s:db08df55a8200bff5858a3b7549bff47 -prod-image:build:58ab53c2f5e799bbe05d6fac26d001bf +prod-image:build:1a321a1243555ab4e2cc95f44e85c92e prod-image:pull:76f1f27e6119928412abecf153fce4bb prod-image:verify:bd2b78738a7c388dbad6076c41a9f906 -prod-image:a434391e2b5201aaeb5c06601485f7bc +prod-image:ad55d9dce93cbfcc1c5ad8640a007340 release-management:add-back-references:8dab6a30076a55f2d31c6d22a94e0ccb release-management:create-minor-branch:a3834afc4aa5d1e98002c9e9e7a9931d release-management:generate-constraints:b8fcaf8f0acd35ed5dbd48659bdb6485 -release-management:generate-issue-content-providers:9cea234261547208ac812464d3e4a598 +release-management:generate-issue-content-providers:6b0d954cb6dbdec0da0a7988feec58f0 release-management:generate-providers-metadata:d4e8e5cfaa024e3963af02d7a873048d release-management:install-provider-packages:a89493be1ae961c13469b5a25a605069 release-management:prepare-airflow-package:85d01c57e5b5ee0fb9e5f9d9706ed3b5 -release-management:prepare-provider-documentation:bc0b95c6a0f308273ab25f728afdb039 -release-management:prepare-provider-packages:3e19a787c71d5515a2bc05539da0022f -release-management:publish-docs:4a7d78a846f7e58c6af4a55cbdf7104b +release-management:prepare-provider-documentation:b8defe8b94bf790d88d055c3e4c0ffba +release-management:prepare-provider-packages:4599a06e636c6fe8fda41b5a751c1429 +release-management:publish-docs:00eae3f9acc2134c14c9a94d789423ce release-management:release-prod-images:4d85a23a2175bf8894de5aedbdd85614 release-management:start-rc-process:b27bd524dd3c89f50a747b60a7e892c1 release-management:start-release:419f48f6a4ff4457cb9de7ff496aebbe release-management:verify-provider-packages:96dce5644aad6b37080acf77b3d8de3a -release-management:e12492cf058a0c32fc6daecde65a1aef -sbom:generate-provider-requirements:1e7609a0ebafb839d30ec90e4254a628 +release-management:f15d70b8cb924cf33e6d7e2a1f32adf8 +sbom:generate-provider-requirements:9abe53200ea5f40e0bf7c27f6087f27f sbom:update-sbom-information:0ce56884e5f842e3e80d6619df1ccc64 -sbom:2ac6b8fc1e84428d5588270eb0d84040 +sbom:935d041028e847d3faf763a95b51063e setup:autocomplete:fffcd49e102e09ccd69b3841a9e3ea8e setup:check-all-params-in-groups:76b3b1d3726ca2a446bab0668b1c50f5 setup:config:38ebaaf93ed42bc7b2a3000eeea2631d diff --git a/images/breeze/output_build-docs.svg b/images/breeze/output_build-docs.svg index f0475ed9c40a9..75e92ebb2ccdf 100644 --- a/images/breeze/output_build-docs.svg +++ b/images/breeze/output_build-docs.svg @@ -1,4 +1,4 @@ - + - + @@ -219,9 +219,12 @@ + + + - Command: build-docs + Command: build-docs @@ -232,64 +235,65 @@ -Usage: breeze build-docs [OPTIONS] +Usage: breeze build-docs [OPTIONS] -╭─ Doc flags ──────────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---docs-only-dOnly build documentation. ---spellcheck-only-sOnly run spell checking. ---clean-buildClean inventories of Inter-Sphinx documentation and generated APIs and sphinx artifacts     -before the build - useful for a clean build.                                                ---one-pass-onlyBuilds documentation in one pass only. This is useful for debugging sphinx errors. ---package-filterList of packages to consider.                                                               -(apache-airflow | apache-airflow-providers-airbyte | apache-airflow-providers-alibaba |     -apache-airflow-providers-amazon | apache-airflow-providers-apache-beam |                    -apache-airflow-providers-apache-cassandra | apache-airflow-providers-apache-drill |         -apache-airflow-providers-apache-druid | apache-airflow-providers-apache-flink |             -apache-airflow-providers-apache-hdfs | apache-airflow-providers-apache-hive |               -apache-airflow-providers-apache-impala | apache-airflow-providers-apache-kafka |            -apache-airflow-providers-apache-kylin | apache-airflow-providers-apache-livy |              -apache-airflow-providers-apache-pig | apache-airflow-providers-apache-pinot |               -apache-airflow-providers-apache-spark | apache-airflow-providers-apache-sqoop |             -apache-airflow-providers-apprise | apache-airflow-providers-arangodb |                      -apache-airflow-providers-asana | apache-airflow-providers-atlassian-jira |                  -apache-airflow-providers-celery | apache-airflow-providers-cloudant |                       -apache-airflow-providers-cncf-kubernetes | apache-airflow-providers-common-sql |            -apache-airflow-providers-databricks | apache-airflow-providers-datadog |                    -apache-airflow-providers-dbt-cloud | apache-airflow-providers-dingding |                    -apache-airflow-providers-discord | apache-airflow-providers-docker |                        -apache-airflow-providers-elasticsearch | apache-airflow-providers-exasol |                  -apache-airflow-providers-facebook | apache-airflow-providers-ftp |                          -apache-airflow-providers-github | apache-airflow-providers-google |                         -apache-airflow-providers-grpc | apache-airflow-providers-hashicorp |                        -apache-airflow-providers-http | apache-airflow-providers-imap |                             -apache-airflow-providers-influxdb | apache-airflow-providers-jdbc |                         -apache-airflow-providers-jenkins | apache-airflow-providers-microsoft-azure |               -apache-airflow-providers-microsoft-mssql | apache-airflow-providers-microsoft-psrp |        -apache-airflow-providers-microsoft-winrm | apache-airflow-providers-mongo |                 -apache-airflow-providers-mysql | apache-airflow-providers-neo4j |                           -apache-airflow-providers-odbc | apache-airflow-providers-openfaas |                         -apache-airflow-providers-openlineage | apache-airflow-providers-opsgenie |                  -apache-airflow-providers-oracle | apache-airflow-providers-pagerduty |                      -apache-airflow-providers-papermill | apache-airflow-providers-plexus |                      -apache-airflow-providers-postgres | apache-airflow-providers-presto |                       -apache-airflow-providers-qubole | apache-airflow-providers-redis |                          -apache-airflow-providers-salesforce | apache-airflow-providers-samba |                      -apache-airflow-providers-segment | apache-airflow-providers-sendgrid |                      -apache-airflow-providers-sftp | apache-airflow-providers-singularity |                      -apache-airflow-providers-slack | apache-airflow-providers-smtp |                            -apache-airflow-providers-snowflake | apache-airflow-providers-sqlite |                      -apache-airflow-providers-ssh | apache-airflow-providers-tableau |                           -apache-airflow-providers-tabular | apache-airflow-providers-telegram |                      -apache-airflow-providers-trino | apache-airflow-providers-vertica |                         -apache-airflow-providers-zendesk | docker-stack | helm-chart)                               ---github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] ---builderBuildx builder used to perform `docker buildx build` commands.(TEXT)[default: autodetect] -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Doc flags ──────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--docs-only-dOnly build documentation. +--spellcheck-only-sOnly run spell checking. +--clean-buildClean inventories of Inter-Sphinx documentation and generated APIs and sphinx artifacts     +before the build - useful for a clean build.                                                +--one-pass-onlyBuilds documentation in one pass only. This is useful for debugging sphinx errors. +--package-filterList of packages to consider.                                                               +(apache-airflow | apache-airflow-providers-airbyte | apache-airflow-providers-alibaba |     +apache-airflow-providers-amazon | apache-airflow-providers-apache-beam |                    +apache-airflow-providers-apache-cassandra | apache-airflow-providers-apache-drill |         +apache-airflow-providers-apache-druid | apache-airflow-providers-apache-flink |             +apache-airflow-providers-apache-hdfs | apache-airflow-providers-apache-hive |               +apache-airflow-providers-apache-impala | apache-airflow-providers-apache-kafka |            +apache-airflow-providers-apache-kylin | apache-airflow-providers-apache-livy |              +apache-airflow-providers-apache-pig | apache-airflow-providers-apache-pinot |               +apache-airflow-providers-apache-spark | apache-airflow-providers-apache-sqoop |             +apache-airflow-providers-apprise | apache-airflow-providers-arangodb |                      +apache-airflow-providers-asana | apache-airflow-providers-atlassian-jira |                  +apache-airflow-providers-celery | apache-airflow-providers-cloudant |                       +apache-airflow-providers-cncf-kubernetes | apache-airflow-providers-common-sql |            +apache-airflow-providers-daskexecutor | apache-airflow-providers-databricks |               +apache-airflow-providers-datadog | apache-airflow-providers-dbt-cloud |                     +apache-airflow-providers-dingding | apache-airflow-providers-discord |                      +apache-airflow-providers-docker | apache-airflow-providers-elasticsearch |                  +apache-airflow-providers-exasol | apache-airflow-providers-facebook |                       +apache-airflow-providers-ftp | apache-airflow-providers-github |                            +apache-airflow-providers-google | apache-airflow-providers-grpc |                           +apache-airflow-providers-hashicorp | apache-airflow-providers-http |                        +apache-airflow-providers-imap | apache-airflow-providers-influxdb |                         +apache-airflow-providers-jdbc | apache-airflow-providers-jenkins |                          +apache-airflow-providers-microsoft-azure | apache-airflow-providers-microsoft-mssql |       +apache-airflow-providers-microsoft-psrp | apache-airflow-providers-microsoft-winrm |        +apache-airflow-providers-mongo | apache-airflow-providers-mysql |                           +apache-airflow-providers-neo4j | apache-airflow-providers-odbc |                            +apache-airflow-providers-openfaas | apache-airflow-providers-openlineage |                  +apache-airflow-providers-opsgenie | apache-airflow-providers-oracle |                       +apache-airflow-providers-pagerduty | apache-airflow-providers-papermill |                   +apache-airflow-providers-plexus | apache-airflow-providers-postgres |                       +apache-airflow-providers-presto | apache-airflow-providers-qubole |                         +apache-airflow-providers-redis | apache-airflow-providers-salesforce |                      +apache-airflow-providers-samba | apache-airflow-providers-segment |                         +apache-airflow-providers-sendgrid | apache-airflow-providers-sftp |                         +apache-airflow-providers-singularity | apache-airflow-providers-slack |                     +apache-airflow-providers-smtp | apache-airflow-providers-snowflake |                        +apache-airflow-providers-sqlite | apache-airflow-providers-ssh |                            +apache-airflow-providers-tableau | apache-airflow-providers-tabular |                       +apache-airflow-providers-telegram | apache-airflow-providers-trino |                        +apache-airflow-providers-vertica | apache-airflow-providers-zendesk | docker-stack |        +helm-chart)                                                                                 +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +--builderBuildx builder used to perform `docker buildx build` commands.(TEXT)[default: autodetect] +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_prod-image.svg b/images/breeze/output_prod-image.svg index 35bf4ae86bb6e..cd179a225efee 100644 --- a/images/breeze/output_prod-image.svg +++ b/images/breeze/output_prod-image.svg @@ -35,8 +35,8 @@ .breeze-prod-image-r1 { fill: #c5c8c6;font-weight: bold } .breeze-prod-image-r2 { fill: #c5c8c6 } .breeze-prod-image-r3 { fill: #d0b344;font-weight: bold } -.breeze-prod-image-r4 { fill: #68a0b3;font-weight: bold } -.breeze-prod-image-r5 { fill: #868887 } +.breeze-prod-image-r4 { fill: #868887 } +.breeze-prod-image-r5 { fill: #68a0b3;font-weight: bold } .breeze-prod-image-r6 { fill: #98a84b;font-weight: bold } @@ -93,18 +93,18 @@ -Usage: breeze prod-image [OPTIONSCOMMAND [ARGS]... +Usage: breeze prod-image [OPTIONS] COMMAND [ARGS]... -Tools that developers can use to manually manage PROD images +Tools that developers can use to manually manage PROD images -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Production Image tools ─────────────────────────────────────────────────────────────────────────────────────────────╮ -build  Build Production image. Include building multiple images for all or selected Python versions sequentially.  -pull   Pull and optionally verify Production images - possibly in parallel for all Python versions.                -verify Verify Production image.                                                                                    -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Production Image tools ─────────────────────────────────────────────────────────────────────────────────────────────╮ +build  Build Production image. Include building multiple images for all or selected Python versions sequentially.  +pull   Pull and optionally verify Production images - possibly in parallel for all Python versions.                +verify Verify Production image.                                                                                    +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_prod-image_build.svg b/images/breeze/output_prod-image_build.svg index 30e4426a06cb4..882393e5d0df9 100644 --- a/images/breeze/output_prod-image_build.svg +++ b/images/breeze/output_prod-image_build.svg @@ -35,8 +35,8 @@ .breeze-prod-image-build-r1 { fill: #c5c8c6;font-weight: bold } .breeze-prod-image-build-r2 { fill: #c5c8c6 } .breeze-prod-image-build-r3 { fill: #d0b344;font-weight: bold } -.breeze-prod-image-build-r4 { fill: #68a0b3;font-weight: bold } -.breeze-prod-image-build-r5 { fill: #868887 } +.breeze-prod-image-build-r4 { fill: #868887 } +.breeze-prod-image-build-r5 { fill: #68a0b3;font-weight: bold } .breeze-prod-image-build-r6 { fill: #98a84b;font-weight: bold } .breeze-prod-image-build-r7 { fill: #8d7b39 } @@ -337,99 +337,99 @@ -Usage: breeze prod-image build [OPTIONS] +Usage: breeze prod-image build [OPTIONS] Build Production image. Include building multiple images for all or selected Python versions sequentially. -╭─ Basic usage ────────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---python-pPython major/minor version used in Airflow image for images. -(>3.8< | 3.9 | 3.10 | 3.11)                                  -[default: 3.8]                                               ---install-airflow-version-VInstall version of Airflow from PyPI.(TEXT) ---upgrade-to-newer-dependencies-uWhen set, upgrade all PIP packages to latest. ---upgrade-on-failureWhen set, attempt to run upgrade to newer dependencies when regular build       -fails.                                                                          ---image-tag-tTag the image after building it.(TEXT)[default: latest] ---tag-as-latestTags the image as latest and update checksum of all files after pulling. Useful -when you build or pull image with --image-tag.                                  ---docker-cache-cCache option for image used during the build.(registry | local | disabled) -[default: registry]                           -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Building images in parallel ────────────────────────────────────────────────────────────────────────────────────────╮ ---run-in-parallelRun the operation in parallel on all or selected subset of Python versions. ---parallelismMaximum number of processes to use while running the operation in parallel. -(INTEGER RANGE)                                                             -[default: 4; 1<=x<=8]                                                       ---python-versionsSpace separated list of python versions used for build with multiple versions.(TEXT) -[default: 3.8 3.9 3.10 3.11]                                                   ---skip-cleanupSkip cleanup of temporary files created during parallel run. ---debug-resourcesWhether to show resource information while running in parallel. ---include-success-outputsWhether to include outputs of successful parallel runs (skipped by default). -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Options for customizing images ─────────────────────────────────────────────────────────────────────────────────────╮ ---builderBuildx builder used to perform `docker buildx build` commands.(TEXT) -[default: autodetect]                                          ---install-providers-from-sourcesInstall providers from sources when installing. ---airflow-extrasExtras to install by default.                                                    -(TEXT)                                                                           -[default:                                                                        -aiobotocore,amazon,async,celery,cncf.kubernetes,dask,docker,elasticsearch,ftp,g… ---airflow-constraints-locationIf specified, it is used instead of calculating reference to the constraint      -file. It could be full remote URL to the location file, or local file placed in  -`docker-context-files` (in this case it has to start with                        -/opt/airflow/docker-context-files).                                              -(TEXT)                                                                           ---airflow-constraints-modeMode of constraints for PROD image building.                            -(constraints | constraints-no-providers | constraints-source-providers) -[default: constraints]                                                  ---airflow-constraints-referenceConstraint reference to use when building the image.(TEXT) ---python-imageIf specified this is the base python image used to build the image. Should be    -something like: python:VERSION-slim-bullseye.                                    -(TEXT)                                                                           ---additional-extrasAdditional extra package while installing Airflow in the image.(TEXT) ---additional-pip-install-flagsAdditional flags added to `pip install` commands (except reinstalling `pip`      -itself).                                                                         -(TEXT)                                                                           ---additional-python-depsAdditional python dependencies to use when building the images.(TEXT) ---additional-runtime-apt-depsAdditional apt runtime dependencies to use when building the images.(TEXT) ---additional-runtime-apt-envAdditional environment variables set when adding runtime dependencies.(TEXT) ---additional-runtime-apt-commandAdditional command executed before runtime apt deps are installed.(TEXT) ---additional-dev-apt-depsAdditional apt dev dependencies to use when building the images.(TEXT) ---additional-dev-apt-envAdditional environment variables set when adding dev dependencies.(TEXT) ---additional-dev-apt-commandAdditional command executed before dev apt deps are installed.(TEXT) ---runtime-apt-depsApt runtime dependencies to use when building the images.(TEXT) ---runtime-apt-commandCommand executed before runtime apt deps are installed.(TEXT) ---dev-apt-depsApt dev dependencies to use when building the images.(TEXT) ---dev-apt-commandCommand executed before dev apt deps are installed.(TEXT) -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Customization options (for specific customization needs) ───────────────────────────────────────────────────────────╮ ---install-packages-from-contextInstall wheels from local docker-context-files when building image.        -Implies --disable-airflow-repo-cache.                                      ---cleanup-contextClean up docker context files before running build (cannot be used         -together with --install-packages-from-context).                            ---disable-mysql-client-installationDo not install MySQL client. ---disable-mssql-client-installationDo not install MsSQl client. ---disable-postgres-client-installationDo not install Postgres client. ---disable-airflow-repo-cacheDisable cache from Airflow repository during building. ---install-airflow-referenceInstall Airflow using GitHub tag or branch.(TEXT) ---installation-methodInstall Airflow from: sources or PyPI.(. | apache-airflow) -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Preparing cache and push (for maintainers and CI) ──────────────────────────────────────────────────────────────────╮ ---platformPlatform for Airflow image.(linux/amd64 | linux/arm64 | linux/amd64,linux/arm64) ---pushPush image after building it. ---empty-imagePrepare empty image tagged with the same name as the Airflow image. ---prepare-buildx-cachePrepares build cache (this is done as separate per-platform steps instead of building the  -image).                                                                                    -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Github authentication ──────────────────────────────────────────────────────────────────────────────────────────────╮ ---github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] ---github-tokenThe token used to authenticate to GitHub.(TEXT) -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Basic usage ────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--python-pPython major/minor version used in Airflow image for images. +(>3.8< | 3.9 | 3.10 | 3.11)                                  +[default: 3.8]                                               +--install-airflow-version-VInstall version of Airflow from PyPI.(TEXT) +--upgrade-to-newer-dependencies-uWhen set, upgrade all PIP packages to latest. +--upgrade-on-failureWhen set, attempt to run upgrade to newer dependencies when regular build       +fails.                                                                          +--image-tag-tTag the image after building it.(TEXT)[default: latest] +--tag-as-latestTags the image as latest and update checksum of all files after pulling. Useful +when you build or pull image with --image-tag.                                  +--docker-cache-cCache option for image used during the build.(registry | local | disabled) +[default: registry]                           +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Building images in parallel ────────────────────────────────────────────────────────────────────────────────────────╮ +--run-in-parallelRun the operation in parallel on all or selected subset of Python versions. +--parallelismMaximum number of processes to use while running the operation in parallel. +(INTEGER RANGE)                                                             +[default: 4; 1<=x<=8]                                                       +--python-versionsSpace separated list of python versions used for build with multiple versions.(TEXT) +[default: 3.8 3.9 3.10 3.11]                                                   +--skip-cleanupSkip cleanup of temporary files created during parallel run. +--debug-resourcesWhether to show resource information while running in parallel. +--include-success-outputsWhether to include outputs of successful parallel runs (skipped by default). +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Options for customizing images ─────────────────────────────────────────────────────────────────────────────────────╮ +--builderBuildx builder used to perform `docker buildx build` commands.(TEXT) +[default: autodetect]                                          +--install-providers-from-sourcesInstall providers from sources when installing. +--airflow-extrasExtras to install by default.                                                    +(TEXT)                                                                           +[default:                                                                        +aiobotocore,amazon,async,celery,cncf.kubernetes,daskexecutor,docker,elasticsear… +--airflow-constraints-locationIf specified, it is used instead of calculating reference to the constraint      +file. It could be full remote URL to the location file, or local file placed in  +`docker-context-files` (in this case it has to start with                        +/opt/airflow/docker-context-files).                                              +(TEXT)                                                                           +--airflow-constraints-modeMode of constraints for PROD image building.                            +(constraints | constraints-no-providers | constraints-source-providers) +[default: constraints]                                                  +--airflow-constraints-referenceConstraint reference to use when building the image.(TEXT) +--python-imageIf specified this is the base python image used to build the image. Should be    +something like: python:VERSION-slim-bullseye.                                    +(TEXT)                                                                           +--additional-extrasAdditional extra package while installing Airflow in the image.(TEXT) +--additional-pip-install-flagsAdditional flags added to `pip install` commands (except reinstalling `pip`      +itself).                                                                         +(TEXT)                                                                           +--additional-python-depsAdditional python dependencies to use when building the images.(TEXT) +--additional-runtime-apt-depsAdditional apt runtime dependencies to use when building the images.(TEXT) +--additional-runtime-apt-envAdditional environment variables set when adding runtime dependencies.(TEXT) +--additional-runtime-apt-commandAdditional command executed before runtime apt deps are installed.(TEXT) +--additional-dev-apt-depsAdditional apt dev dependencies to use when building the images.(TEXT) +--additional-dev-apt-envAdditional environment variables set when adding dev dependencies.(TEXT) +--additional-dev-apt-commandAdditional command executed before dev apt deps are installed.(TEXT) +--runtime-apt-depsApt runtime dependencies to use when building the images.(TEXT) +--runtime-apt-commandCommand executed before runtime apt deps are installed.(TEXT) +--dev-apt-depsApt dev dependencies to use when building the images.(TEXT) +--dev-apt-commandCommand executed before dev apt deps are installed.(TEXT) +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Customization options (for specific customization needs) ───────────────────────────────────────────────────────────╮ +--install-packages-from-contextInstall wheels from local docker-context-files when building image.        +Implies --disable-airflow-repo-cache.                                      +--cleanup-contextClean up docker context files before running build (cannot be used         +together with --install-packages-from-context).                            +--disable-mysql-client-installationDo not install MySQL client. +--disable-mssql-client-installationDo not install MsSQl client. +--disable-postgres-client-installationDo not install Postgres client. +--disable-airflow-repo-cacheDisable cache from Airflow repository during building. +--install-airflow-referenceInstall Airflow using GitHub tag or branch.(TEXT) +--installation-methodInstall Airflow from: sources or PyPI.(. | apache-airflow) +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Preparing cache and push (for maintainers and CI) ──────────────────────────────────────────────────────────────────╮ +--platformPlatform for Airflow image.(linux/amd64 | linux/arm64 | linux/amd64,linux/arm64) +--pushPush image after building it. +--empty-imagePrepare empty image tagged with the same name as the Airflow image. +--prepare-buildx-cachePrepares build cache (this is done as separate per-platform steps instead of building the  +image).                                                                                    +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Github authentication ──────────────────────────────────────────────────────────────────────────────────────────────╮ +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +--github-tokenThe token used to authenticate to GitHub.(TEXT) +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_release-management.svg b/images/breeze/output_release-management.svg new file mode 100644 index 0000000000000..c25e978e3c16a --- /dev/null +++ b/images/breeze/output_release-management.svg @@ -0,0 +1,171 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Command: release-management + + + + + + + + + + +Usage: breeze release-management [OPTIONS] COMMAND [ARGS]... + +Tools that release managers can use to prepare and manage Airflow releases + +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Airflow release commands ───────────────────────────────────────────────────────────────────────────────────────────╮ +prepare-airflow-package      Prepare sdist/whl package of Airflow.                                                 +create-minor-branch          Create a new version branch and update the default branches in main                   +start-rc-process             Start RC process                                                                      +start-release                Start Airflow release process                                                         +release-prod-images          Release production images to DockerHub (needs DockerHub permissions).                 +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Providers release commands ─────────────────────────────────────────────────────────────────────────────────────────╮ +prepare-provider-documentation      Prepare CHANGELOG, README and COMMITS information for providers.               +prepare-provider-packages           Prepare sdist/whl packages of Airflow Providers.                               +install-provider-packages           Installs provider packages that can be found in dist.                          +verify-provider-packages            Verifies if all provider code is following expectations for providers.         +generate-providers-metadata         Generates metadata for providers.                                              +generate-issue-content-providers    Generates content for issue to test the release.                               +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Other release commands ─────────────────────────────────────────────────────────────────────────────────────────────╮ +publish-docs            Command to publish generated documentation to airflow-site                                 +generate-constraints    Generates pinned constraint files with all extras from setup.py in parallel.               +add-back-references     Command to add back references for documentation to make it backward compatible            +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + + + + diff --git a/images/breeze/output_release-management_generate-issue-content-providers.svg b/images/breeze/output_release-management_generate-issue-content-providers.svg index 12eabb09189c4..04fd3c8155f25 100644 --- a/images/breeze/output_release-management_generate-issue-content-providers.svg +++ b/images/breeze/output_release-management_generate-issue-content-providers.svg @@ -1,4 +1,4 @@ - + - + @@ -150,9 +150,12 @@ + + + - Command: release-management generate-issue-content-providers + Command: release-management generate-issue-content-providers @@ -163,41 +166,42 @@ -Usage: breeze release-management generate-issue-content-providers [OPTIONS] [airbyte | alibaba | amazon | apache.beam +Usage: breeze release-management generate-issue-content-providers [OPTIONS] [airbyte | alibaba | amazon | apache.beam                                                                   | apache.cassandra | apache.drill | apache.druid |                                                                   apache.flink | apache.hdfs | apache.hive |                                                                   apache.impala | apache.kafka | apache.kylin |                                                                   apache.livy | apache.pig | apache.pinot |                                                                   apache.spark | apache.sqoop | apprise | arangodb |                                                                   asana | atlassian.jira | celery | cloudant | -                                                                  cncf.kubernetes | common.sql | databricks | datadog -                                                                  | dbt.cloud | dingding | discord | docker | -                                                                  elasticsearch | exasol | facebook | ftp | github | -                                                                  google | grpc | hashicorp | http | imap | influxdb | -                                                                  jdbc | jenkins | microsoft.azure | microsoft.mssql | -                                                                  microsoft.psrp | microsoft.winrm | mongo | mysql | -                                                                  neo4j | odbc | openfaas | openlineage | opsgenie | -                                                                  oracle | pagerduty | papermill | plexus | postgres | -                                                                  presto | qubole | redis | salesforce | samba | -                                                                  segment | sendgrid | sftp | singularity | slack | -                                                                  smtp | snowflake | sqlite | ssh | tableau | tabular -                                                                  | telegram | trino | vertica | zendesk]... - -Generates content for issue to test the release. - -╭─ Generate issue content flags ───────────────────────────────────────────────────────────────────────────────────────╮ ---github-tokenGitHub token used to authenticate. You can set omit it if you have GITHUB_TOKEN env      -variable set. Can be generated with:                                                     -https://github.com/settings/tokens/new?description=Read%20sssues&scopes=repo:status      -(TEXT)                                                                                   ---suffixSuffix to add to the version prepared(TEXT) ---only-available-in-distOnly consider package ids with packages prepared in the dist folder ---excluded-pr-listComa-separated list of PRs to exclude from the issue.(TEXT) ---disable-progressDisable progress bar -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +                                                                  cncf.kubernetes | common.sql | daskexecutor | +                                                                  databricks | datadog | dbt.cloud | dingding | +                                                                  discord | docker | elasticsearch | exasol | facebook +                                                                  | ftp | github | google | grpc | hashicorp | http | +                                                                  imap | influxdb | jdbc | jenkins | microsoft.azure | +                                                                  microsoft.mssql | microsoft.psrp | microsoft.winrm | +                                                                  mongo | mysql | neo4j | odbc | openfaas | +                                                                  openlineage | opsgenie | oracle | pagerduty | +                                                                  papermill | plexus | postgres | presto | qubole | +                                                                  redis | salesforce | samba | segment | sendgrid | +                                                                  sftp | singularity | slack | smtp | snowflake | +                                                                  sqlite | ssh | tableau | tabular | telegram | trino +                                                                  | vertica | zendesk]... + +Generates content for issue to test the release. + +╭─ Generate issue content flags ───────────────────────────────────────────────────────────────────────────────────────╮ +--github-tokenGitHub token used to authenticate. You can set omit it if you have GITHUB_TOKEN env      +variable set. Can be generated with:                                                     +https://github.com/settings/tokens/new?description=Read%20sssues&scopes=repo:status      +(TEXT)                                                                                   +--suffixSuffix to add to the version prepared(TEXT) +--only-available-in-distOnly consider package ids with packages prepared in the dist folder +--excluded-pr-listComa-separated list of PRs to exclude from the issue.(TEXT) +--disable-progressDisable progress bar +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_release-management_prepare-provider-documentation.svg b/images/breeze/output_release-management_prepare-provider-documentation.svg index 69b5e0589ba80..68c55935ffd9b 100644 --- a/images/breeze/output_release-management_prepare-provider-documentation.svg +++ b/images/breeze/output_release-management_prepare-provider-documentation.svg @@ -35,8 +35,8 @@ .breeze-release-management-prepare-provider-documentation-r1 { fill: #c5c8c6;font-weight: bold } .breeze-release-management-prepare-provider-documentation-r2 { fill: #c5c8c6 } .breeze-release-management-prepare-provider-documentation-r3 { fill: #d0b344;font-weight: bold } -.breeze-release-management-prepare-provider-documentation-r4 { fill: #68a0b3;font-weight: bold } -.breeze-release-management-prepare-provider-documentation-r5 { fill: #868887 } +.breeze-release-management-prepare-provider-documentation-r4 { fill: #868887 } +.breeze-release-management-prepare-provider-documentation-r5 { fill: #68a0b3;font-weight: bold } .breeze-release-management-prepare-provider-documentation-r6 { fill: #98a84b;font-weight: bold } .breeze-release-management-prepare-provider-documentation-r7 { fill: #8d7b39 } @@ -175,45 +175,45 @@ -Usage: breeze release-management prepare-provider-documentation [OPTIONS] [airbyte | alibaba | amazon | apache.beam | +Usage: breeze release-management prepare-provider-documentation [OPTIONS] [airbyte | alibaba | amazon | apache.beam |                                                                 apache.cassandra | apache.drill | apache.druid |                                                                 apache.flink | apache.hdfs | apache.hive |                                                                 apache.impala | apache.kafka | apache.kylin |                                                                 apache.livy | apache.pig | apache.pinot | apache.spark                                                                 | apache.sqoop | apprise | arangodb | asana |                                                                 atlassian.jira | celery | cloudant | cncf.kubernetes | -                                                                common.sql | databricks | datadog | dbt.cloud | -                                                                dingding | discord | docker | elasticsearch | exasol | -                                                                facebook | ftp | github | google | grpc | hashicorp | -                                                                http | imap | influxdb | jdbc | jenkins | -                                                                microsoft.azure | microsoft.mssql | microsoft.psrp | -                                                                microsoft.winrm | mongo | mysql | neo4j | odbc | -                                                                openfaas | openlineage | opsgenie | oracle | pagerduty -                                                                | papermill | plexus | postgres | presto | qubole | -                                                                redis | salesforce | samba | segment | sendgrid | sftp -                                                                | singularity | slack | smtp | snowflake | sqlite | -                                                                ssh | tableau | tabular | telegram | trino | vertica | -                                                                zendesk]... +                                                                common.sql | daskexecutor | databricks | datadog | +                                                                dbt.cloud | dingding | discord | docker | +                                                                elasticsearch | exasol | facebook | ftp | github | +                                                                google | grpc | hashicorp | http | imap | influxdb | +                                                                jdbc | jenkins | microsoft.azure | microsoft.mssql | +                                                                microsoft.psrp | microsoft.winrm | mongo | mysql | +                                                                neo4j | odbc | openfaas | openlineage | opsgenie | +                                                                oracle | pagerduty | papermill | plexus | postgres | +                                                                presto | qubole | redis | salesforce | samba | segment +                                                                | sendgrid | sftp | singularity | slack | smtp | +                                                                snowflake | sqlite | ssh | tableau | tabular | +                                                                telegram | trino | vertica | zendesk]... -Prepare CHANGELOGREADME and COMMITS information for providers. +Prepare CHANGELOG, README and COMMITS information for providers. -╭─ Provider documentation preparation flags ───────────────────────────────────────────────────────────────────────────╮ ---debugDrop user in shell instead of running the command. Useful for debugging. ---github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] ---base-branchBase branch to use as diff for documentation generation (used for releasing from old  -branch)                                                                               -(TEXT)                                                                                ---only-min-version-updateOnly update minimum version in __init__.py files and regenerate corresponding         -documentation                                                                         ---regenerate-missing-docsOnly regenerate missing documentation, do not bump version. Useful if templates were  -added and you need to regenerate documentation.                                       -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---answer-aForce answer to questions.(y | n | q | yes | no | quit) ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Provider documentation preparation flags ───────────────────────────────────────────────────────────────────────────╮ +--debugDrop user in shell instead of running the command. Useful for debugging. +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +--base-branchBase branch to use as diff for documentation generation (used for releasing from old  +branch)                                                                               +(TEXT)                                                                                +--only-min-version-updateOnly update minimum version in __init__.py files and regenerate corresponding         +documentation                                                                         +--regenerate-missing-docsOnly regenerate missing documentation, do not bump version. Useful if templates were  +added and you need to regenerate documentation.                                       +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--answer-aForce answer to questions.(y | n | q | yes | no | quit) +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_release-management_prepare-provider-packages.svg b/images/breeze/output_release-management_prepare-provider-packages.svg index e205331cb588e..5a42796427fe7 100644 --- a/images/breeze/output_release-management_prepare-provider-packages.svg +++ b/images/breeze/output_release-management_prepare-provider-packages.svg @@ -35,8 +35,8 @@ .breeze-release-management-prepare-provider-packages-r1 { fill: #c5c8c6;font-weight: bold } .breeze-release-management-prepare-provider-packages-r2 { fill: #c5c8c6 } .breeze-release-management-prepare-provider-packages-r3 { fill: #d0b344;font-weight: bold } -.breeze-release-management-prepare-provider-packages-r4 { fill: #68a0b3;font-weight: bold } -.breeze-release-management-prepare-provider-packages-r5 { fill: #868887 } +.breeze-release-management-prepare-provider-packages-r4 { fill: #868887 } +.breeze-release-management-prepare-provider-packages-r5 { fill: #68a0b3;font-weight: bold } .breeze-release-management-prepare-provider-packages-r6 { fill: #8d7b39 } .breeze-release-management-prepare-provider-packages-r7 { fill: #98a84b;font-weight: bold } @@ -154,17 +154,17 @@ -Usage: breeze release-management prepare-provider-packages [OPTIONS] [airbyte | alibaba | amazon | apache.beam | +Usage: breeze release-management prepare-provider-packages [OPTIONS] [airbyte | alibaba | amazon | apache.beam |                                                            apache.cassandra | apache.drill | apache.druid |                                                            apache.flink | apache.hdfs | apache.hive | apache.impala |                                                            apache.kafka | apache.kylin | apache.livy | apache.pig |                                                            apache.pinot | apache.spark | apache.sqoop | apprise |                                                            arangodb | asana | atlassian.jira | celery | cloudant | -                                                           cncf.kubernetes | common.sql | databricks | datadog | -                                                           dbt.cloud | dingding | discord | docker | elasticsearch | -                                                           exasol | facebook | ftp | github | google | grpc | -                                                           hashicorp | http | imap | influxdb | jdbc | jenkins | -                                                           microsoft.azure | microsoft.mssql | microsoft.psrp | +                                                           cncf.kubernetes | common.sql | daskexecutor | databricks | +                                                           datadog | dbt.cloud | dingding | discord | docker | +                                                           elasticsearch | exasol | facebook | ftp | github | google | +                                                           grpc | hashicorp | http | imap | influxdb | jdbc | jenkins +                                                           | microsoft.azure | microsoft.mssql | microsoft.psrp |                                                            microsoft.winrm | mongo | mysql | neo4j | odbc | openfaas |                                                            openlineage | opsgenie | oracle | pagerduty | papermill |                                                            plexus | postgres | presto | qubole | redis | salesforce | @@ -174,18 +174,18 @@ Prepare sdist/whl packages of Airflow Providers. -╭─ Package flags ──────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---package-formatFormat of packages.(wheel | sdist | both)[default: wheel] ---version-suffix-for-pypiVersion suffix used for PyPI packages (alpha, beta, rc1, etc.).(TEXT) ---package-list-fileRead list of packages from text file (one package per line).(FILENAME) ---debugDrop user in shell instead of running the command. Useful for debugging. ---github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Package flags ──────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--package-formatFormat of packages.(wheel | sdist | both)[default: wheel] +--version-suffix-for-pypiVersion suffix used for PyPI packages (alpha, beta, rc1, etc.).(TEXT) +--package-list-fileRead list of packages from text file (one package per line).(FILENAME) +--debugDrop user in shell instead of running the command. Useful for debugging. +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_release-management_publish-docs.svg b/images/breeze/output_release-management_publish-docs.svg index f534f04e953ce..a118eae74f03d 100644 --- a/images/breeze/output_release-management_publish-docs.svg +++ b/images/breeze/output_release-management_publish-docs.svg @@ -35,8 +35,8 @@ .breeze-release-management-publish-docs-r1 { fill: #c5c8c6;font-weight: bold } .breeze-release-management-publish-docs-r2 { fill: #c5c8c6 } .breeze-release-management-publish-docs-r3 { fill: #d0b344;font-weight: bold } -.breeze-release-management-publish-docs-r4 { fill: #68a0b3;font-weight: bold } -.breeze-release-management-publish-docs-r5 { fill: #868887 } +.breeze-release-management-publish-docs-r4 { fill: #868887 } +.breeze-release-management-publish-docs-r5 { fill: #68a0b3;font-weight: bold } .breeze-release-management-publish-docs-r6 { fill: #98a84b;font-weight: bold } .breeze-release-management-publish-docs-r7 { fill: #8d7b39 } .breeze-release-management-publish-docs-r8 { fill: #cc555a } @@ -228,62 +228,62 @@ -Usage: breeze release-management publish-docs [OPTIONS] +Usage: breeze release-management publish-docs [OPTIONS] Command to publish generated documentation to airflow-site -╭─ Publish Docs ───────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---override-versioned-sOverrides versioned directories. ---package-filterList of packages to consider.                                                       -(apache-airflow | apache-airflow-providers-airbyte |                                -apache-airflow-providers-alibaba | apache-airflow-providers-amazon |                -apache-airflow-providers-apache-beam | apache-airflow-providers-apache-cassandra |  -apache-airflow-providers-apache-drill | apache-airflow-providers-apache-druid |     -apache-airflow-providers-apache-flink | apache-airflow-providers-apache-hdfs |      -apache-airflow-providers-apache-hive | apache-airflow-providers-apache-impala |     -apache-airflow-providers-apache-kafka | apache-airflow-providers-apache-kylin |     -apache-airflow-providers-apache-livy | apache-airflow-providers-apache-pig |        -apache-airflow-providers-apache-pinot | apache-airflow-providers-apache-spark |     -apache-airflow-providers-apache-sqoop | apache-airflow-providers-apprise |          -apache-airflow-providers-arangodb | apache-airflow-providers-asana |                -apache-airflow-providers-atlassian-jira | apache-airflow-providers-celery |         -apache-airflow-providers-cloudant | apache-airflow-providers-cncf-kubernetes |      -apache-airflow-providers-common-sql | apache-airflow-providers-databricks |         -apache-airflow-providers-datadog | apache-airflow-providers-dbt-cloud |             -apache-airflow-providers-dingding | apache-airflow-providers-discord |              -apache-airflow-providers-docker | apache-airflow-providers-elasticsearch |          -apache-airflow-providers-exasol | apache-airflow-providers-facebook |               -apache-airflow-providers-ftp | apache-airflow-providers-github |                    -apache-airflow-providers-google | apache-airflow-providers-grpc |                   -apache-airflow-providers-hashicorp | apache-airflow-providers-http |                -apache-airflow-providers-imap | apache-airflow-providers-influxdb |                 -apache-airflow-providers-jdbc | apache-airflow-providers-jenkins |                  -apache-airflow-providers-microsoft-azure | apache-airflow-providers-microsoft-mssql -| apache-airflow-providers-microsoft-psrp |                                         -apache-airflow-providers-microsoft-winrm | apache-airflow-providers-mongo |         -apache-airflow-providers-mysql | apache-airflow-providers-neo4j |                   -apache-airflow-providers-odbc | apache-airflow-providers-openfaas |                 -apache-airflow-providers-openlineage | apache-airflow-providers-opsgenie |          -apache-airflow-providers-oracle | apache-airflow-providers-pagerduty |              -apache-airflow-providers-papermill | apache-airflow-providers-plexus |              -apache-airflow-providers-postgres | apache-airflow-providers-presto |               -apache-airflow-providers-qubole | apache-airflow-providers-redis |                  -apache-airflow-providers-salesforce | apache-airflow-providers-samba |              -apache-airflow-providers-segment | apache-airflow-providers-sendgrid |              -apache-airflow-providers-sftp | apache-airflow-providers-singularity |              -apache-airflow-providers-slack | apache-airflow-providers-smtp |                    -apache-airflow-providers-snowflake | apache-airflow-providers-sqlite |              -apache-airflow-providers-ssh | apache-airflow-providers-tableau |                   -apache-airflow-providers-tabular | apache-airflow-providers-telegram |              -apache-airflow-providers-trino | apache-airflow-providers-vertica |                 -apache-airflow-providers-zendesk | docker-stack | helm-chart)                       -*--airflow-site-directory-aLocal directory path of cloned airflow-site repo.(TEXT)[required] -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Publish Docs ───────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--override-versioned-sOverrides versioned directories. +--package-filterList of packages to consider.                                                       +(apache-airflow | apache-airflow-providers-airbyte |                                +apache-airflow-providers-alibaba | apache-airflow-providers-amazon |                +apache-airflow-providers-apache-beam | apache-airflow-providers-apache-cassandra |  +apache-airflow-providers-apache-drill | apache-airflow-providers-apache-druid |     +apache-airflow-providers-apache-flink | apache-airflow-providers-apache-hdfs |      +apache-airflow-providers-apache-hive | apache-airflow-providers-apache-impala |     +apache-airflow-providers-apache-kafka | apache-airflow-providers-apache-kylin |     +apache-airflow-providers-apache-livy | apache-airflow-providers-apache-pig |        +apache-airflow-providers-apache-pinot | apache-airflow-providers-apache-spark |     +apache-airflow-providers-apache-sqoop | apache-airflow-providers-apprise |          +apache-airflow-providers-arangodb | apache-airflow-providers-asana |                +apache-airflow-providers-atlassian-jira | apache-airflow-providers-celery |         +apache-airflow-providers-cloudant | apache-airflow-providers-cncf-kubernetes |      +apache-airflow-providers-common-sql | apache-airflow-providers-daskexecutor |       +apache-airflow-providers-databricks | apache-airflow-providers-datadog |            +apache-airflow-providers-dbt-cloud | apache-airflow-providers-dingding |            +apache-airflow-providers-discord | apache-airflow-providers-docker |                +apache-airflow-providers-elasticsearch | apache-airflow-providers-exasol |          +apache-airflow-providers-facebook | apache-airflow-providers-ftp |                  +apache-airflow-providers-github | apache-airflow-providers-google |                 +apache-airflow-providers-grpc | apache-airflow-providers-hashicorp |                +apache-airflow-providers-http | apache-airflow-providers-imap |                     +apache-airflow-providers-influxdb | apache-airflow-providers-jdbc |                 +apache-airflow-providers-jenkins | apache-airflow-providers-microsoft-azure |       +apache-airflow-providers-microsoft-mssql | apache-airflow-providers-microsoft-psrp  +| apache-airflow-providers-microsoft-winrm | apache-airflow-providers-mongo |       +apache-airflow-providers-mysql | apache-airflow-providers-neo4j |                   +apache-airflow-providers-odbc | apache-airflow-providers-openfaas |                 +apache-airflow-providers-openlineage | apache-airflow-providers-opsgenie |          +apache-airflow-providers-oracle | apache-airflow-providers-pagerduty |              +apache-airflow-providers-papermill | apache-airflow-providers-plexus |              +apache-airflow-providers-postgres | apache-airflow-providers-presto |               +apache-airflow-providers-qubole | apache-airflow-providers-redis |                  +apache-airflow-providers-salesforce | apache-airflow-providers-samba |              +apache-airflow-providers-segment | apache-airflow-providers-sendgrid |              +apache-airflow-providers-sftp | apache-airflow-providers-singularity |              +apache-airflow-providers-slack | apache-airflow-providers-smtp |                    +apache-airflow-providers-snowflake | apache-airflow-providers-sqlite |              +apache-airflow-providers-ssh | apache-airflow-providers-tableau |                   +apache-airflow-providers-tabular | apache-airflow-providers-telegram |              +apache-airflow-providers-trino | apache-airflow-providers-vertica |                 +apache-airflow-providers-zendesk | docker-stack | helm-chart)                       +*--airflow-site-directory-aLocal directory path of cloned airflow-site repo.(TEXT)[required] +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_sbom.svg b/images/breeze/output_sbom.svg index 205f0fa242690..0f42e678c1799 100644 --- a/images/breeze/output_sbom.svg +++ b/images/breeze/output_sbom.svg @@ -35,8 +35,8 @@ .breeze-sbom-r1 { fill: #c5c8c6;font-weight: bold } .breeze-sbom-r2 { fill: #c5c8c6 } .breeze-sbom-r3 { fill: #d0b344;font-weight: bold } -.breeze-sbom-r4 { fill: #68a0b3;font-weight: bold } -.breeze-sbom-r5 { fill: #868887 } +.breeze-sbom-r4 { fill: #868887 } +.breeze-sbom-r5 { fill: #68a0b3;font-weight: bold } .breeze-sbom-r6 { fill: #98a84b;font-weight: bold } @@ -90,17 +90,17 @@ -Usage: breeze sbom [OPTIONSCOMMAND [ARGS]... +Usage: breeze sbom [OPTIONS] COMMAND [ARGS]... Tools that release managers can use to prepare sbom information -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ SBOM commands ──────────────────────────────────────────────────────────────────────────────────────────────────────╮ -update-sbom-information                     Update SBOM information in airflow-site project.                       -generate-provider-requirements              Generate requirements for selected provider.                           -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ SBOM commands ──────────────────────────────────────────────────────────────────────────────────────────────────────╮ +update-sbom-information                     Update SBOM information in airflow-site project.                       +generate-provider-requirements              Generate requirements for selected provider.                           +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_sbom_generate-provider-requirements.svg b/images/breeze/output_sbom_generate-provider-requirements.svg index a803d93f6d92e..9dc8ae664aa2a 100644 --- a/images/breeze/output_sbom_generate-provider-requirements.svg +++ b/images/breeze/output_sbom_generate-provider-requirements.svg @@ -35,8 +35,8 @@ .breeze-sbom-generate-provider-requirements-r1 { fill: #c5c8c6;font-weight: bold } .breeze-sbom-generate-provider-requirements-r2 { fill: #c5c8c6 } .breeze-sbom-generate-provider-requirements-r3 { fill: #d0b344;font-weight: bold } -.breeze-sbom-generate-provider-requirements-r4 { fill: #68a0b3;font-weight: bold } -.breeze-sbom-generate-provider-requirements-r5 { fill: #868887 } +.breeze-sbom-generate-provider-requirements-r4 { fill: #868887 } +.breeze-sbom-generate-provider-requirements-r5 { fill: #68a0b3;font-weight: bold } .breeze-sbom-generate-provider-requirements-r6 { fill: #8d7b39 } .breeze-sbom-generate-provider-requirements-r7 { fill: #cc555a } .breeze-sbom-generate-provider-requirements-r8 { fill: #8a4346 } @@ -144,34 +144,34 @@ -Usage: breeze sbom generate-provider-requirements [OPTIONS] +Usage: breeze sbom generate-provider-requirements [OPTIONS] Generate requirements for selected provider. -╭─ Generate provider requirements flags ───────────────────────────────────────────────────────────────────────────────╮ ---airflow-versionAirflow version to use to generate the requirements(TEXT) ---pythonPython version to generate the requirements for(3.6 | 3.7 | 3.8 | 3.9 | 3.10 | 3.11) -*--provider-idProvider to generate the requirements for                                                   -(airbyte | alibaba | amazon | apache.beam | apache.cassandra | apache.drill | apache.druid  -| apache.flink | apache.hdfs | apache.hive | apache.impala | apache.kafka | apache.kylin |  -apache.livy | apache.pig | apache.pinot | apache.spark | apache.sqoop | apprise | arangodb  -| asana | atlassian.jira | celery | cloudant | cncf.kubernetes | common.sql | databricks |  -datadog | dbt.cloud | dingding | discord | docker | elasticsearch | exasol | facebook | ftp -| github | google | grpc | hashicorp | http | imap | influxdb | jdbc | jenkins |            -microsoft.azure | microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo | mysql |      -neo4j | odbc | openfaas | openlineage | opsgenie | oracle | pagerduty | papermill | plexus  -| postgres | presto | qubole | redis | salesforce | samba | segment | sendgrid | sftp |     -singularity | slack | smtp | snowflake | sqlite | ssh | tableau | tabular | telegram |      -trino | vertica | zendesk)                                                                  -[required]                                                                                  ---provider-versionProvider version to generate the requirements for(TEXT) -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---answer-aForce answer to questions.(y | n | q | yes | no | quit) ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Generate provider requirements flags ───────────────────────────────────────────────────────────────────────────────╮ +--airflow-versionAirflow version to use to generate the requirements(TEXT) +--pythonPython version to generate the requirements for(3.6 | 3.7 | 3.8 | 3.9 | 3.10 | 3.11) +*--provider-idProvider to generate the requirements for                                                   +(airbyte | alibaba | amazon | apache.beam | apache.cassandra | apache.drill | apache.druid  +| apache.flink | apache.hdfs | apache.hive | apache.impala | apache.kafka | apache.kylin |  +apache.livy | apache.pig | apache.pinot | apache.spark | apache.sqoop | apprise | arangodb  +| asana | atlassian.jira | celery | cloudant | cncf.kubernetes | common.sql | daskexecutor  +| databricks | datadog | dbt.cloud | dingding | discord | docker | elasticsearch | exasol | +facebook | ftp | github | google | grpc | hashicorp | http | imap | influxdb | jdbc |       +jenkins | microsoft.azure | microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo |    +mysql | neo4j | odbc | openfaas | openlineage | opsgenie | oracle | pagerduty | papermill | +plexus | postgres | presto | qubole | redis | salesforce | samba | segment | sendgrid |     +sftp | singularity | slack | smtp | snowflake | sqlite | ssh | tableau | tabular | telegram +| trino | vertica | zendesk)                                                                +[required]                                                                                  +--provider-versionProvider version to generate the requirements for(TEXT) +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--answer-aForce answer to questions.(y | n | q | yes | no | quit) +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/scripts/ci/installed_providers.txt b/scripts/ci/installed_providers.txt index 3d511abfcc961..9cdcf765eb06a 100644 --- a/scripts/ci/installed_providers.txt +++ b/scripts/ci/installed_providers.txt @@ -2,6 +2,7 @@ amazon celery cncf.kubernetes common.sql +daskexecutor docker elasticsearch ftp diff --git a/scripts/ci/pre_commit/pre_commit_insert_extras.py b/scripts/ci/pre_commit/pre_commit_insert_extras.py index 2750e9dbaed80..2ec53c1962c7f 100755 --- a/scripts/ci/pre_commit/pre_commit_insert_extras.py +++ b/scripts/ci/pre_commit/pre_commit_insert_extras.py @@ -45,7 +45,7 @@ CONSTANTS_FOOTER = "# END EXTRAS HERE" DEFAULT_EXTRAS = ( - "amazon,async,celery,cncf.kubernetes,dask,docker,elasticsearch,ftp,google," + "amazon,async,celery,cncf.kubernetes,daskexecutor,docker,elasticsearch,ftp,google," "google_auth,grpc,hashicorp,http,ldap,microsoft.azure,mysql,odbc,pandas," "postgres,redis,sendgrid,sftp,slack,ssh,statsd,virtualenv" ) diff --git a/setup.py b/setup.py index 44998d8747ce9..ffd70f811d163 100644 --- a/setup.py +++ b/setup.py @@ -251,15 +251,6 @@ def write_version(filename: str = str(AIRFLOW_SOURCES_ROOT / "airflow" / "git_ve # Cgroupspy 0.2.2 added Python 3.10 compatibility "cgroupspy>=0.2.2", ] -dask = [ - # Dask support is limited, we need Dask team to upgrade support for dask if we were to continue - # Supporting it in the future - "cloudpickle>=1.4.1", - # Dask and distributed in version 2023.5.0 break our tests for Python > 3.7 - # See https://github.com/dask/dask/issues/10279 - "dask>=2.9.0,!=2022.10.1,!=2023.5.0", - "distributed>=2.11.1,!=2023.5.0", -] deprecated_api = [ "requests>=2.26.0", ] @@ -484,7 +475,7 @@ def get_unique_dependency_list(req_list_iterable: Iterable[list[str]]): "celery": celery, "cgroups": cgroups, "cncf.kubernetes": kubernetes, - "dask": dask, + "dask": [], "deprecated_api": deprecated_api, "github_enterprise": flask_appbuilder_oauth, "google_auth": flask_appbuilder_oauth, @@ -538,6 +529,7 @@ def add_additional_extras() -> None: "azure": "microsoft.azure", "cassandra": "apache.cassandra", "crypto": "", # this is legacy extra - all dependencies are already "install-requires" + "dask": "daskexecutor", "druid": "apache.druid", "gcp": "google", "gcp_api": "google", @@ -844,7 +836,7 @@ def replace_extra_dependencies_with_provider_packages(extra: str, providers: lis :param extra: Name of the extra to add providers to :param providers: list of provider ids """ - if extra in ["cncf.kubernetes", "kubernetes", "celery"]: + if extra in ["cncf.kubernetes", "kubernetes", "celery", "daskexecutor", "dask"]: EXTRAS_DEPENDENCIES[extra].extend( [get_provider_package_name_from_package_id(package_name) for package_name in providers] ) diff --git a/tests/providers/daskexecutor/__init__.py b/tests/providers/daskexecutor/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/providers/daskexecutor/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/executors/test_dask_executor.py b/tests/providers/daskexecutor/test_dask_executor.py similarity index 98% rename from tests/executors/test_dask_executor.py rename to tests/providers/daskexecutor/test_dask_executor.py index f64f2a7dfdc72..da7fd7c11ba01 100644 --- a/tests/executors/test_dask_executor.py +++ b/tests/providers/daskexecutor/test_dask_executor.py @@ -24,10 +24,10 @@ from distributed import LocalCluster from airflow.exceptions import AirflowException -from airflow.executors.dask_executor import DaskExecutor from airflow.jobs.backfill_job_runner import BackfillJobRunner from airflow.jobs.job import Job, run_job from airflow.models import DagBag +from airflow.providers.daskexecutor.executors.dask_executor import DaskExecutor from airflow.utils import timezone from tests.test_utils.config import conf_vars @@ -155,7 +155,7 @@ def test_tls(self): # and tasks to have completed. executor.client.close() - @mock.patch("airflow.executors.dask_executor.DaskExecutor.sync") + @mock.patch("airflow.providers.daskexecutor.executors.dask_executor.DaskExecutor.sync") @mock.patch("airflow.executors.base_executor.BaseExecutor.trigger_tasks") @mock.patch("airflow.executors.base_executor.Stats.gauge") def test_gauge_executor_metrics(self, mock_stats_gauge, mock_trigger_tasks, mock_sync):