diff --git a/.github/actions/install-pre-commit/action.yml b/.github/actions/install-pre-commit/action.yml index 5e9ed3f2a4eff..cb053ac7e7757 100644 --- a/.github/actions/install-pre-commit/action.yml +++ b/.github/actions/install-pre-commit/action.yml @@ -24,7 +24,7 @@ inputs: default: "3.9" uv-version: description: 'uv version to use' - default: "0.5.17" # Keep this comment to allow automatic replacement of uv version + default: "0.5.24" # Keep this comment to allow automatic replacement of uv version pre-commit-version: description: 'pre-commit version to use' default: "3.5.0" # Keep this comment to allow automatic replacement of pre-commit version diff --git a/.github/workflows/ci-image-checks.yml b/.github/workflows/ci-image-checks.yml index 06edff1101f97..b49fb3995bd03 100644 --- a/.github/workflows/ci-image-checks.yml +++ b/.github/workflows/ci-image-checks.yml @@ -142,6 +142,7 @@ jobs: if: inputs.canary-run == 'true' - name: "Prepare .tar file from pre-commit cache" run: | + mkdir -p ~/.cache/uv # until we are Python 3.9+ we do not have .uv in pre-commits tar -C ~ -czf /tmp/cache-pre-commit.tar.gz .cache/pre-commit .cache/uv shell: bash if: inputs.canary-run == 'true' diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 859213e663129..1e2d4c5de212f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,6 +30,7 @@ on: # yamllint disable-line rule:truthy - v[0-9]+-[0-9]+-test - v[0-9]+-[0-9]+-stable - providers-[a-z]+-?[a-z]*/v[0-9]+-[0-9]+ + types: [opened, reopened, synchronize, ready_for_review] workflow_dispatch: permissions: # All other permissions are set to none by default diff --git a/.github/workflows/push-image-cache.yml b/.github/workflows/push-image-cache.yml index 86ec3b2a85a86..7698fc88e5388 100644 --- a/.github/workflows/push-image-cache.yml +++ b/.github/workflows/push-image-cache.yml @@ -80,8 +80,6 @@ on: # yamllint disable-line rule:truthy description: "Disable airflow repo cache read from main." required: true type: string -permissions: - contents: read jobs: push-ci-image-cache: name: "Push CI ${{ inputs.cache-type }}:${{ matrix.python }} image cache " @@ -90,6 +88,9 @@ jobs: # instead of an array of strings. # yamllint disable-line rule:line-length runs-on: ${{ (inputs.platform == 'linux/amd64') && fromJSON(inputs.runs-on-as-json-public) || fromJSON(inputs.runs-on-as-json-self-hosted) }} + permissions: + contents: read + packages: write strategy: fail-fast: false matrix: @@ -163,6 +164,9 @@ jobs: # instead of an array of strings. # yamllint disable-line rule:line-length runs-on: ${{ (inputs.platform == 'linux/amd64') && fromJSON(inputs.runs-on-as-json-public) || fromJSON(inputs.runs-on-as-json-self-hosted) }} + permissions: + contents: read + packages: write strategy: fail-fast: false matrix: diff --git a/Dockerfile b/Dockerfile index a96ddc40db759..de4a18d6a9453 100644 --- a/Dockerfile +++ b/Dockerfile @@ -53,9 +53,9 @@ ARG PYTHON_BASE_IMAGE="python:3.8-slim-bookworm" # You can swap comments between those two args to test pip from the main version # When you attempt to test if the version of `pip` from specified branch works for our builds # Also use `force pip` label on your PR to swap all places we use `uv` to `pip` -ARG AIRFLOW_PIP_VERSION=24.3.1 +ARG AIRFLOW_PIP_VERSION=25.0 # ARG AIRFLOW_PIP_VERSION="git+https://github.com/pypa/pip.git@main" -ARG AIRFLOW_UV_VERSION=0.5.17 +ARG AIRFLOW_UV_VERSION=0.5.24 ARG AIRFLOW_USE_UV="false" ARG UV_HTTP_TIMEOUT="300" ARG AIRFLOW_IMAGE_REPOSITORY="https://github.com/apache/airflow" diff --git a/Dockerfile.ci b/Dockerfile.ci index ca526f53f8a87..dc3276f948de3 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -1249,9 +1249,9 @@ COPY --from=scripts common.sh install_packaging_tools.sh install_additional_depe # You can swap comments between those two args to test pip from the main version # When you attempt to test if the version of `pip` from specified branch works for our builds # Also use `force pip` label on your PR to swap all places we use `uv` to `pip` -ARG AIRFLOW_PIP_VERSION=24.3.1 +ARG AIRFLOW_PIP_VERSION=25.0 # ARG AIRFLOW_PIP_VERSION="git+https://github.com/pypa/pip.git@main" -ARG AIRFLOW_UV_VERSION=0.5.17 +ARG AIRFLOW_UV_VERSION=0.5.24 # TODO(potiuk): automate with upgrade check (possibly) ARG AIRFLOW_PRE_COMMIT_VERSION="3.5.0" diff --git a/README.md b/README.md index 8da91a71f9ad8..eb672d54bb444 100644 --- a/README.md +++ b/README.md @@ -288,7 +288,7 @@ Apache Airflow version life cycle: | Version | Current Patch/Minor | State | First Release | Limited Support | EOL/Terminated | |-----------|-----------------------|-----------|-----------------|-------------------|------------------| -| 2 | 2.10.4 | Supported | Dec 17, 2020 | TBD | TBD | +| 2 | 2.10.5 | Supported | Dec 17, 2020 | TBD | TBD | | 1.10 | 1.10.15 | EOL | Aug 27, 2018 | Dec 17, 2020 | June 17, 2021 | | 1.9 | 1.9.0 | EOL | Jan 03, 2018 | Aug 27, 2018 | Aug 27, 2018 | | 1.8 | 1.8.2 | EOL | Mar 19, 2017 | Jan 03, 2018 | Jan 03, 2018 | @@ -534,6 +534,3 @@ The CI infrastructure for Apache Airflow has been sponsored by: astronomer.io AWS OpenSource - - -Tracking Pixel diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index cb7572626b434..b922b54f61b9b 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -21,6 +21,59 @@ .. towncrier release notes start +Airflow 2.10.5 (2025-02-06) +--------------------------- + +Significant Changes +^^^^^^^^^^^^^^^^^^^ + +Ensure teardown tasks are executed when DAG run is set to failed (#45530) +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Previously when a DAG run was manually set to "failed" or to "success" state the terminal state was set to all tasks. +But this was a gap for cases when setup- and teardown tasks were defined: If teardown was used to clean-up infrastructure +or other resources, they were also skipped and thus resources could stay allocated. + +As of now when setup tasks had been executed before and the DAG is manually set to "failed" or "success" then teardown +tasks are executed. Teardown tasks are skipped if the setup was also skipped. + +As a side effect this means if the DAG contains teardown tasks, then the manual marking of DAG as "failed" or "success" +will need to keep the DAG in running state to ensure that teardown tasks will be scheduled. They would not be scheduled +if the DAG is directly set to "failed" or "success". + + +Bug Fixes +""""""""" + +- Prevent using ``trigger_rule=TriggerRule.ALWAYS`` in a task-generated mapping within bare tasks (#44751) +- Fix ShortCircuitOperator mapped tasks (#44912) +- Fix premature evaluation of tasks with certain trigger rules (e.g. ``ONE_DONE``) in a mapped task group (#44937) +- Fix task_id validation in BaseOperator (#44938) (#44938) +- Allow fetching XCom with forward slash from the API and escape it in the UI (#45134) +- Fix ``FileTaskHandler`` only read from default executor (#46000) +- Fix empty task instance for log (#45702) (#45703) +- Remove ``skip_if`` and ``run_if`` decorators before TaskFlow virtualenv tasks are run (#41832) (#45680) +- Fix request body for json requests in event log (#45546) (#45560) +- Ensure teardown tasks are executed when DAG run is set to failed (#45530) (#45581) +- Do not update DR on TI update after task execution (#45348) +- Fix object and array DAG params that have a None default (#45313) (#45315) +- Fix endless sensor rescheduling (#45224) (#45250) +- Evaluate None in SQLAlchemy's extended JSON type decorator (#45119) (#45120) +- Allow dynamic tasks to be filtered by ``rendered_map_index`` (#45109) (#45122) +- Handle relative paths when sanitizing URLs (#41995) (#45080) +- Set Autocomplete Off on Login Form (#44929) (#44940) +- Add Webserver parameters ``max_form_parts``, ``max_form_memory_size`` (#46243) (#45749) +- Fixed accessing thread local variable in BaseOperators ``execute`` safeguard mechanism (#44646) (#46280) +- Add map_index parameter to extra links API (#46337) + + +Miscellaneous +""""""""""""" + +- Add traceback log output when SIGTERMs was sent (#44880) (#45077) +- Removed the ability for Operators to specify their own "scheduling deps" (#45713) (#45742) +- Deprecate ``conf`` from Task Context (#44993) + Airflow 2.10.4 (2024-12-09) --------------------------- @@ -223,6 +276,11 @@ Airflow 2.10.0 (2024-08-15) Significant Changes ^^^^^^^^^^^^^^^^^^^ +Scarf based telemetry: Airflow now collect telemetry data (#39510) +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +Airflow integrates Scarf to collect basic usage data during operation. Deployments can opt-out of data collection by +setting the ``[usage_data_collection]enabled`` option to ``False``, or the ``SCARF_ANALYTICS=false`` environment variable. + Datasets no longer trigger inactive DAGs (#38891) """"""""""""""""""""""""""""""""""""""""""""""""" @@ -271,12 +329,6 @@ Previously known as hybrid executors, this new feature allows Airflow to use mul to use a specific executor that suits its needs best. A single DAG can contain tasks all using different executors. Please see the Airflow documentation for more details. Note: This feature is still experimental. See `documentation on Executor `_ for a more detailed description. -Scarf based telemetry: Does Airflow collect any telemetry data? (#39510) -"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" -Airflow integrates Scarf to collect basic usage data during operation. Deployments can opt-out of data collection by setting the ``[usage_data_collection]enabled`` option to False, or the SCARF_ANALYTICS=false environment variable. -See `FAQ on this `_ for more information. - - New Features """""""""""" - AIP-61 Hybrid Execution (`AIP-61 `_) diff --git a/airflow/__init__.py b/airflow/__init__.py index 818bec887bf71..d3fa9d50f6b48 100644 --- a/airflow/__init__.py +++ b/airflow/__init__.py @@ -17,7 +17,7 @@ # under the License. from __future__ import annotations -__version__ = "2.10.4" +__version__ = "2.10.5" import os import sys diff --git a/airflow/api_connexion/endpoints/extra_link_endpoint.py b/airflow/api_connexion/endpoints/extra_link_endpoint.py index ddf4b670285c8..87f83fb77c93f 100644 --- a/airflow/api_connexion/endpoints/extra_link_endpoint.py +++ b/airflow/api_connexion/endpoints/extra_link_endpoint.py @@ -42,6 +42,7 @@ def get_extra_links( dag_id: str, dag_run_id: str, task_id: str, + map_index: int = -1, session: Session = NEW_SESSION, ) -> APIResponse: """Get extra links for task instance.""" @@ -62,6 +63,7 @@ def get_extra_links( TaskInstance.dag_id == dag_id, TaskInstance.run_id == dag_run_id, TaskInstance.task_id == task_id, + TaskInstance.map_index == map_index, ) ) diff --git a/airflow/api_connexion/openapi/v1.yaml b/airflow/api_connexion/openapi/v1.yaml index 180f854c02f3f..b150ff66eb83b 100644 --- a/airflow/api_connexion/openapi/v1.yaml +++ b/airflow/api_connexion/openapi/v1.yaml @@ -231,7 +231,7 @@ info: This means that the server encountered an unexpected condition that prevented it from fulfilling the request. - version: "2.10.4" + version: "2.10.5" license: name: Apache 2.0 url: http://www.apache.org/licenses/LICENSE-2.0.html @@ -2062,6 +2062,7 @@ paths: - $ref: "#/components/parameters/DAGID" - $ref: "#/components/parameters/DAGRunID" - $ref: "#/components/parameters/TaskID" + - $ref: "#/components/parameters/FilterMapIndex" get: summary: List extra links diff --git a/airflow/api_connexion/schemas/task_schema.py b/airflow/api_connexion/schemas/task_schema.py index 03bf4b59ef2e2..086e9ae3a5524 100644 --- a/airflow/api_connexion/schemas/task_schema.py +++ b/airflow/api_connexion/schemas/task_schema.py @@ -49,14 +49,14 @@ class TaskSchema(Schema): ) depends_on_past = fields.Boolean(dump_only=True) wait_for_downstream = fields.Boolean(dump_only=True) - retries = fields.Number(dump_only=True) + retries = fields.Number(dump_only=True) # type: ignore[var-annotated] queue = fields.String(dump_only=True) pool = fields.String(dump_only=True) - pool_slots = fields.Number(dump_only=True) + pool_slots = fields.Number(dump_only=True) # type: ignore[var-annotated] execution_timeout = fields.Nested(TimeDeltaSchema, dump_only=True) retry_delay = fields.Nested(TimeDeltaSchema, dump_only=True) retry_exponential_backoff = fields.Boolean(dump_only=True) - priority_weight = fields.Number(dump_only=True) + priority_weight = fields.Number(dump_only=True) # type: ignore[var-annotated] weight_rule = WeightRuleField(dump_only=True) ui_color = ColorField(dump_only=True) ui_fgcolor = ColorField(dump_only=True) diff --git a/airflow/cli/commands/scheduler_command.py b/airflow/cli/commands/scheduler_command.py index 96cfe1e2852f5..37fd399d2e03a 100644 --- a/airflow/cli/commands/scheduler_command.py +++ b/airflow/cli/commands/scheduler_command.py @@ -33,7 +33,6 @@ from airflow.utils.cli import process_subdir from airflow.utils.providers_configuration_loader import providers_configuration_loaded from airflow.utils.scheduler_health import serve_health_check -from airflow.utils.usage_data_collection import usage_data_collection log = logging.getLogger(__name__) @@ -54,8 +53,6 @@ def scheduler(args: Namespace): """Start Airflow Scheduler.""" print(settings.HEADER) - usage_data_collection() - run_command_with_daemon_option( args=args, process_name="scheduler", diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index 613c5e3394a40..04b4e27843f22 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -2120,6 +2120,22 @@ webserver: type: boolean example: ~ default: "False" + max_form_memory_size: + description: | + The maximum size in bytes any non-file form field may be in a multipart/form-data body. + If this limit is exceeded, a 413 RequestEntityTooLarge error is raised by webserver. + version_added: 2.10.5 + type: integer + example: ~ + default: "500000" + max_form_parts: + description: | + The maximum number of fields that may be present in a multipart/form-data body. + If this limit is exceeded, a 413 RequestEntityTooLarge error is raised by webserver. + version_added: 2.10.5 + type: integer + example: ~ + default: "1000" email: description: | Configuration email backend and whether to @@ -2735,25 +2751,3 @@ sensors: type: float example: ~ default: "604800" -usage_data_collection: - description: | - Airflow integrates `Scarf `__ to collect basic platform and usage data - during operation. This data assists Airflow maintainers in better understanding how Airflow is used. - Insights gained from this telemetry are critical for prioritizing patches, minor releases, and - security fixes. Additionally, this information supports key decisions related to the development road map. - Check the FAQ doc for more information on what data is collected. - - Deployments can opt-out of analytics by setting the ``enabled`` option - to ``False``, or the ``SCARF_ANALYTICS=false`` environment variable. - Individual users can easily opt-out of analytics in various ways documented in the - `Scarf Do Not Track docs `__. - - options: - enabled: - description: | - Enable or disable usage data collection and sending. - version_added: 2.10.0 - type: boolean - example: ~ - default: "True" - see_also: ":ref:`Usage data collection FAQ `" diff --git a/airflow/executors/executor_loader.py b/airflow/executors/executor_loader.py index 3c08887906114..7ad42a2fb1bc2 100644 --- a/airflow/executors/executor_loader.py +++ b/airflow/executors/executor_loader.py @@ -201,6 +201,10 @@ def init_executors(cls) -> list[BaseExecutor]: @classmethod def lookup_executor_name_by_str(cls, executor_name_str: str) -> ExecutorName: # lookup the executor by alias first, if not check if we're given a module path + if not _classname_to_executors or not _module_to_executors or not _alias_to_executors: + # if we haven't loaded the executors yet, such as directly calling load_executor + cls._get_executor_names() + if executor_name := _alias_to_executors.get(executor_name_str): return executor_name elif executor_name := _module_to_executors.get(executor_name_str): diff --git a/airflow/models/baseoperator.py b/airflow/models/baseoperator.py index 65900276271a6..1b1b22c7be454 100644 --- a/airflow/models/baseoperator.py +++ b/airflow/models/baseoperator.py @@ -410,6 +410,8 @@ def wrapper(self, *args, **kwargs): sentinel = kwargs.pop(sentinel_key, None) if sentinel: + if not getattr(cls._sentinel, "callers", None): + cls._sentinel.callers = {} cls._sentinel.callers[sentinel_key] = sentinel else: sentinel = cls._sentinel.callers.pop(f"{func.__qualname__.split('.')[0]}__sentinel", None) diff --git a/airflow/plugins_manager.py b/airflow/plugins_manager.py index 8ccdef2c6390c..bb90d80ec5bcd 100644 --- a/airflow/plugins_manager.py +++ b/airflow/plugins_manager.py @@ -27,6 +27,7 @@ import os import sys import types +import warnings from pathlib import Path from typing import TYPE_CHECKING, Any, Iterable @@ -431,6 +432,17 @@ def initialize_ti_deps_plugins(): registered_ti_dep_classes = {} for plugin in plugins: + if not plugin.ti_deps: + continue + + from airflow.exceptions import RemovedInAirflow3Warning + + warnings.warn( + "Using custom `ti_deps` on operators has been removed in Airflow 3.0", + RemovedInAirflow3Warning, + stacklevel=1, + ) + registered_ti_dep_classes.update( {qualname(ti_dep.__class__): ti_dep.__class__ for ti_dep in plugin.ti_deps} ) diff --git a/airflow/providers/MANAGING_PROVIDERS_LIFECYCLE.rst b/airflow/providers/MANAGING_PROVIDERS_LIFECYCLE.rst index 48980f2153cd0..3d3e95c28b172 100644 --- a/airflow/providers/MANAGING_PROVIDERS_LIFECYCLE.rst +++ b/airflow/providers/MANAGING_PROVIDERS_LIFECYCLE.rst @@ -454,7 +454,7 @@ If you have pre-commit installed, pre-commit will be run automatically on commit manually after commit, you can run it via ``breeze static-checks --last-commit`` some of the tests might fail because suspension of the provider might cause changes in the dependencies, so if you see errors about missing dependencies imports, non-usable classes etc., you will need to build the CI image locally -via ``breeze build-image --python 3.8 --upgrade-to-newer-dependencies`` after the first pre-commit run +via ``breeze build-image --python 3.9 --upgrade-to-newer-dependencies`` after the first pre-commit run and then run the static checks again. If you want to be absolutely sure to run all static checks you can always do this via diff --git a/airflow/providers/amazon/aws/transfers/sql_to_s3.py b/airflow/providers/amazon/aws/transfers/sql_to_s3.py index 65e40797a59b1..19bc7f016b186 100644 --- a/airflow/providers/amazon/aws/transfers/sql_to_s3.py +++ b/airflow/providers/amazon/aws/transfers/sql_to_s3.py @@ -223,12 +223,9 @@ def _partition_dataframe(self, df: pd.DataFrame) -> Iterable[tuple[str, pd.DataF for group_label in (grouped_df := df.groupby(**self.groupby_kwargs)).groups: yield ( cast(str, group_label), - cast( - "pd.DataFrame", - grouped_df.get_group(group_label) - .drop(random_column_name, axis=1, errors="ignore") - .reset_index(drop=True), - ), + grouped_df.get_group(group_label) + .drop(random_column_name, axis=1, errors="ignore") + .reset_index(drop=True), ) def _get_hook(self) -> DbApiHook: diff --git a/airflow/reproducible_build.yaml b/airflow/reproducible_build.yaml index 9f7c6d5a10069..de660bb380adc 100644 --- a/airflow/reproducible_build.yaml +++ b/airflow/reproducible_build.yaml @@ -1,2 +1,2 @@ -release-notes-hash: 7be47e2ddbbe1bfbd0d3f572d2b7800a -source-date-epoch: 1736532824 +release-notes-hash: 8e5657e541a0bf44f777a4ec3ee442e3 +source-date-epoch: 1738582969 diff --git a/airflow/serialization/serializers/timezone.py b/airflow/serialization/serializers/timezone.py index a1f40e67c6972..ef875a92ab6ae 100644 --- a/airflow/serialization/serializers/timezone.py +++ b/airflow/serialization/serializers/timezone.py @@ -87,9 +87,9 @@ def deserialize(classname: str, version: int, data: object) -> Any: try: from zoneinfo import ZoneInfo except ImportError: - from backports.zoneinfo import ZoneInfo + from backports.zoneinfo import ZoneInfo # type: ignore[no-redef] - return ZoneInfo(data) + return ZoneInfo(data) # type: ignore[arg-type] return parse_timezone(data) diff --git a/airflow/settings.py b/airflow/settings.py index 7e9626d788f50..85d56d4be8573 100644 --- a/airflow/settings.py +++ b/airflow/settings.py @@ -800,13 +800,6 @@ def initialize(): atexit.register(dispose_orm) -def is_usage_data_collection_enabled() -> bool: - """Check if data collection is enabled.""" - return conf.getboolean("usage_data_collection", "enabled", fallback=True) and ( - os.getenv("SCARF_ANALYTICS", "").strip().lower() != "false" - ) - - # Const stuff KILOBYTE = 1024 diff --git a/airflow/utils/decorators.py b/airflow/utils/decorators.py index 77a5eddaf0888..e6981256ebbd4 100644 --- a/airflow/utils/decorators.py +++ b/airflow/utils/decorators.py @@ -81,7 +81,7 @@ def _remove_task_decorator(py_source, decorator_name): after_decorator = after_decorator[1:] return before_decorator + after_decorator - decorators = ["@setup", "@teardown", task_decorator_name] + decorators = ["@setup", "@teardown", "@task.skip_if", "@task.run_if", task_decorator_name] for decorator in decorators: python_source = _remove_task_decorator(python_source, decorator) return python_source diff --git a/airflow/utils/log/file_task_handler.py b/airflow/utils/log/file_task_handler.py index 9eb55c707f180..2df73ef4ffada 100644 --- a/airflow/utils/log/file_task_handler.py +++ b/airflow/utils/log/file_task_handler.py @@ -46,6 +46,7 @@ if TYPE_CHECKING: from pendulum import DateTime + from airflow.executors.base_executor import BaseExecutor from airflow.models import DagRun from airflow.models.taskinstance import TaskInstance from airflow.models.taskinstancekey import TaskInstanceKey @@ -185,6 +186,8 @@ class FileTaskHandler(logging.Handler): inherits_from_empty_operator_log_message = ( "Operator inherits from empty operator and thus does not have logs" ) + executor_instances: dict[str, BaseExecutor] = {} + DEFAULT_EXECUTOR_KEY = "_default_executor" def __init__( self, @@ -340,11 +343,26 @@ def _render_filename(self, ti: TaskInstance | TaskInstancePydantic, try_number: def _read_grouped_logs(self): return False - @cached_property - def _executor_get_task_log(self) -> Callable[[TaskInstance, int], tuple[list[str], list[str]]]: - """This cached property avoids loading executor repeatedly.""" - executor = ExecutorLoader.get_default_executor() - return executor.get_task_log + def _get_executor_get_task_log( + self, ti: TaskInstance + ) -> Callable[[TaskInstance, int], tuple[list[str], list[str]]]: + """ + Get the get_task_log method from executor of current task instance. + + Since there might be multiple executors, so we need to get the executor of current task instance instead of getting from default executor. + :param ti: task instance object + :return: get_task_log method of the executor + """ + executor_name = ti.executor or self.DEFAULT_EXECUTOR_KEY + executor = self.executor_instances.get(executor_name) + if executor is not None: + return executor.get_task_log + + if executor_name == self.DEFAULT_EXECUTOR_KEY: + self.executor_instances[executor_name] = ExecutorLoader.get_default_executor() + else: + self.executor_instances[executor_name] = ExecutorLoader.load_executor(executor_name) + return self.executor_instances[executor_name].get_task_log def _read( self, @@ -386,7 +404,8 @@ def _read( messages_list.extend(remote_messages) has_k8s_exec_pod = False if ti.state == TaskInstanceState.RUNNING: - response = self._executor_get_task_log(ti, try_number) + executor_get_task_log = self._get_executor_get_task_log(ti) + response = executor_get_task_log(ti, try_number) if response: executor_messages, executor_logs = response if executor_messages: diff --git a/airflow/utils/usage_data_collection.py b/airflow/utils/usage_data_collection.py deleted file mode 100644 index 3bdfb180fa912..0000000000000 --- a/airflow/utils/usage_data_collection.py +++ /dev/null @@ -1,123 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -""" -This module is for management of Airflow's usage data collection. - -This module is not part of the public interface and is subject to change at any time. - -:meta private: -""" - -from __future__ import annotations - -import os -import platform -from urllib.parse import urlencode - -import httpx -from packaging.version import parse - -from airflow import __version__ as airflow_version, settings -from airflow.configuration import conf - - -def usage_data_collection(): - if not settings.is_usage_data_collection_enabled(): - return - - # Exclude pre-releases and dev versions - if _version_is_prerelease(airflow_version): - return - - # Exclude CI environments - if _is_ci_environ(): - return - - scarf_domain = "https://apacheairflow.gateway.scarf.sh/scheduler" - - try: - platform_sys, arch = get_platform_info() - - params = { - "version": airflow_version, - "python_version": get_python_version(), - "platform": platform_sys, - "arch": arch, - "database": get_database_name(), - "db_version": get_database_version(), - "executor": get_executor(), - } - - query_string = urlencode(params) - scarf_url = f"{scarf_domain}?{query_string}" - - httpx.get(scarf_url, timeout=5.0) - except Exception: - pass - - -def _version_is_prerelease(version: str) -> bool: - return parse(version).is_prerelease - - -def _is_ci_environ() -> bool: - """Return True if running in any known CI environment.""" - if os.getenv("CI") == "true": - # Generic CI variable set by many CI systems (GH Actions, Travis, GitLab, CircleCI, Jenkins, Heroku) - return True - - # Other CI variables set by specific CI systems - ci_env_vars = { - "CIRCLECI", # CircleCI - "CODEBUILD_BUILD_ID", # AWS CodeBuild - "GITHUB_ACTIONS", # GitHub Actions - "GITLAB_CI", # GitLab CI - "JENKINS_URL", # Jenkins - "TF_BUILD", # Azure Pipelines - "TRAVIS", # Travis CI - } - - return any(var in os.environ for var in ci_env_vars) - - -def get_platform_info() -> tuple[str, str]: - return platform.system(), platform.machine() - - -def get_database_version() -> str: - if settings.engine is None: - return "None" - - version_info = settings.engine.dialect.server_version_info - # Example: (1, 2, 3) -> "1.2" (cut only major+minor w/o patch) - return ".".join(map(str, version_info[0:2])) if version_info else "None" - - -def get_database_name() -> str: - if settings.engine is None: - return "None" - return settings.engine.dialect.name - - -def get_executor() -> str: - return conf.get("core", "EXECUTOR") - - -def get_python_version() -> str: - # Cut only major+minor from the python version string (e.g. 3.10.12 --> 3.10) - return ".".join(platform.python_version().split(".")[0:2]) diff --git a/airflow/www/app.py b/airflow/www/app.py index e093e66cfd881..23d79b0186138 100644 --- a/airflow/www/app.py +++ b/airflow/www/app.py @@ -78,6 +78,8 @@ def create_app(config=None, testing=False): flask_app.config["PERMANENT_SESSION_LIFETIME"] = timedelta(minutes=settings.get_session_lifetime_config()) flask_app.config["MAX_CONTENT_LENGTH"] = conf.getfloat("webserver", "allowed_payload_size") * 1024 * 1024 + flask_app.config["MAX_FORM_PARTS"] = conf.getint("webserver", "max_form_parts") + flask_app.config["MAX_FORM_MEMORY_SIZE"] = conf.getint("webserver", "max_form_memory_size") webserver_config = conf.get_mandatory_value("webserver", "config_file") # Enable customizations in webserver_config.py to be applied via Flask.current_app. diff --git a/airflow/www/decorators.py b/airflow/www/decorators.py index 3eae5f6239184..b7dc45515e2ef 100644 --- a/airflow/www/decorators.py +++ b/airflow/www/decorators.py @@ -95,7 +95,7 @@ def wrapper(*args, **kwargs): user_display = get_auth_manager().get_user_display_name() isAPIRequest = request.blueprint == "/api/v1" - hasJsonBody = request.headers.get("content-type") == "application/json" and request.json + hasJsonBody = "application/json" in request.headers.get("content-type", "") and request.json fields_skip_logging = { "csrf_token", diff --git a/airflow/www/extensions/init_views.py b/airflow/www/extensions/init_views.py index cc4e1141be707..16ccea91ac137 100644 --- a/airflow/www/extensions/init_views.py +++ b/airflow/www/extensions/init_views.py @@ -26,6 +26,7 @@ from connexion.decorators.validation import RequestBodyValidator from connexion.exceptions import BadRequestProblem from flask import request +from werkzeug import Request from airflow.api_connexion.exceptions import common_error_handler from airflow.configuration import conf @@ -194,6 +195,21 @@ def set_cors_headers_on_response(response): return response +def init_data_form_parameters(): + """ + Initialize custom values for data form parameters. + + This is a workaround for Flask versions prior to 3.1.0. + In order to allow users customizing form data parameters, we need these two fields to be configurable. + Starting from Flask 3.1.0 these two parameters can be configured through Flask config, but unfortunately, + current version of flask supported in Airflow is way older. That's why this workaround was introduced. + See https://flask.palletsprojects.com/en/stable/api/#flask.Request.max_form_memory_size + # TODO: remove it when Flask upgraded to version 3.1.0 or higher. + """ + Request.max_form_parts = conf.getint("webserver", "max_form_parts") + Request.max_form_memory_size = conf.getint("webserver", "max_form_memory_size") + + class _LazyResolution: """ OpenAPI endpoint that lazily resolves the function on first use. @@ -286,6 +302,7 @@ def init_api_connexion(app: Flask) -> None: validate_responses=True, validator_map={"body": _CustomErrorRequestBodyValidator}, ).blueprint + api_bp.before_app_request(init_data_form_parameters) api_bp.after_request(set_cors_headers_on_response) app.register_blueprint(api_bp) diff --git a/airflow/www/static/js/types/api-generated.ts b/airflow/www/static/js/types/api-generated.ts index 2da17d2981d03..8cc92e140038f 100644 --- a/airflow/www/static/js/types/api-generated.ts +++ b/airflow/www/static/js/types/api-generated.ts @@ -638,6 +638,10 @@ export interface paths { /** The task ID. */ task_id: components["parameters"]["TaskID"]; }; + query: { + /** Filter on map index for mapped task. */ + map_index?: components["parameters"]["FilterMapIndex"]; + }; }; }; "/dags/{dag_id}/dagRuns/{dag_run_id}/taskInstances/{task_id}/logs/{task_try_number}": { @@ -4741,6 +4745,10 @@ export interface operations { /** The task ID. */ task_id: components["parameters"]["TaskID"]; }; + query: { + /** Filter on map index for mapped task. */ + map_index?: components["parameters"]["FilterMapIndex"]; + }; }; responses: { /** Success. */ @@ -5990,7 +5998,8 @@ export type GetXcomEntryVariables = CamelCasedPropertiesDeep< operations["get_xcom_entry"]["parameters"]["query"] >; export type GetExtraLinksVariables = CamelCasedPropertiesDeep< - operations["get_extra_links"]["parameters"]["path"] + operations["get_extra_links"]["parameters"]["path"] & + operations["get_extra_links"]["parameters"]["query"] >; export type GetLogVariables = CamelCasedPropertiesDeep< operations["get_log"]["parameters"]["path"] & diff --git a/airflow/www/views.py b/airflow/www/views.py index fdaee7a79f668..284733857e853 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -1771,7 +1771,7 @@ def log(self, session: Session = NEW_SESSION): title="Log by attempts", dag_id=dag_id, task_id=task_id, - task_display_name=ti.task_display_name, + task_display_name=ti.task_display_name if ti else "", execution_date=execution_date, map_index=map_index, form=form, diff --git a/constraints/README.md b/constraints/README.md index 791450d1bd7c9..485ddaba44ff4 100644 --- a/constraints/README.md +++ b/constraints/README.md @@ -34,7 +34,7 @@ Typical workflow in this case is: * build the image using this command ```bash -breeze ci-image build --python 3.8 --airflow-constraints-location constraints/constraints-3.8txt +breeze ci-image build --python 3.9 --airflow-constraints-location constraints/constraints-3.8txt ``` You can continue iterating and updating the constraint file (and rebuilding the image) diff --git a/contributing-docs/03_contributors_quick_start.rst b/contributing-docs/03_contributors_quick_start.rst index 9c7bdbe985178..dc52368577cf9 100644 --- a/contributing-docs/03_contributors_quick_start.rst +++ b/contributing-docs/03_contributors_quick_start.rst @@ -256,7 +256,7 @@ Setting up Breeze .. code-block:: bash - breeze --python 3.8 --backend postgres + breeze --python 3.9 --backend postgres .. note:: If you encounter an error like "docker.credentials.errors.InitializationError: @@ -313,7 +313,7 @@ Using Breeze ------------ 1. Starting breeze environment using ``breeze start-airflow`` starts Breeze environment with last configuration run( - In this case python and backend will be picked up from last execution ``breeze --python 3.8 --backend postgres``) + In this case python and backend will be picked up from last execution ``breeze --python 3.9 --backend postgres``) It also automatically starts webserver, backend and scheduler. It drops you in tmux with scheduler in bottom left and webserver in bottom right. Use ``[Ctrl + B] and Arrow keys`` to navigate. @@ -363,7 +363,7 @@ Using Breeze .. code-block:: bash - breeze --python 3.8 --backend postgres + breeze --python 3.9 --backend postgres 2. Open tmux @@ -657,7 +657,7 @@ All Tests are inside ./tests directory. .. code-block:: bash - breeze --backend postgres --postgres-version 15 --python 3.8 --db-reset testing tests --test-type All + breeze --backend postgres --postgres-version 15 --python 3.9 --db-reset testing tests --test-type All - Running specific type of test @@ -667,7 +667,7 @@ All Tests are inside ./tests directory. .. code-block:: bash - breeze --backend postgres --postgres-version 15 --python 3.8 --db-reset testing tests --test-type Core + breeze --backend postgres --postgres-version 15 --python 3.9 --db-reset testing tests --test-type Core - Running Integration test for specific test type @@ -676,7 +676,7 @@ All Tests are inside ./tests directory. .. code-block:: bash - breeze --backend postgres --postgres-version 15 --python 3.8 --db-reset testing tests --test-type All --integration mongo + breeze --backend postgres --postgres-version 15 --python 3.9 --db-reset testing tests --test-type All --integration mongo - For more information on Testing visit : |09_testing.rst| diff --git a/contributing-docs/testing/docker_compose_tests.rst b/contributing-docs/testing/docker_compose_tests.rst index 94864b4137de8..8ecdc071ff1f4 100644 --- a/contributing-docs/testing/docker_compose_tests.rst +++ b/contributing-docs/testing/docker_compose_tests.rst @@ -48,7 +48,7 @@ Running complete test with breeze: .. code-block:: bash - breeze prod-image build --python 3.8 + breeze prod-image build --python 3.9 breeze testing docker-compose-tests In case the test fails, it will dump the logs from the running containers to the console and it @@ -66,7 +66,7 @@ The test can be also run manually with ``pytest docker_tests/test_docker_compose command, provided that you have a local airflow venv with ``dev`` extra set and the ``DOCKER_IMAGE`` environment variable is set to the image you want to test. The variable defaults to ``ghcr.io/apache/airflow/main/prod/python3.8:latest`` which is built by default -when you run ``breeze prod-image build --python 3.8``. also the switches ``--skip-docker-compose-deletion`` +when you run ``breeze prod-image build --python 3.9``. also the switches ``--skip-docker-compose-deletion`` and ``--wait-for-containers-timeout`` can only be passed via environment variables. If you want to debug the deployment using ``docker compose`` commands after ``SKIP_DOCKER_COMPOSE_DELETION`` diff --git a/contributing-docs/testing/k8s_tests.rst b/contributing-docs/testing/k8s_tests.rst index a4a6f67da0e2c..79ebba89bdcde 100644 --- a/contributing-docs/testing/k8s_tests.rst +++ b/contributing-docs/testing/k8s_tests.rst @@ -270,7 +270,7 @@ Should result in KinD creating the K8S cluster. Connecting to localhost:18150. Num try: 1 Error when connecting to localhost:18150 : ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - Airflow webserver is not available at port 18150. Run `breeze k8s deploy-airflow --python 3.8 --kubernetes-version v1.24.2` to (re)deploy airflow + Airflow webserver is not available at port 18150. Run `breeze k8s deploy-airflow --python 3.9 --kubernetes-version v1.24.2` to (re)deploy airflow KinD cluster airflow-python-3.8-v1.24.2 created! @@ -352,7 +352,7 @@ Should show the status of current KinD cluster. Connecting to localhost:18150. Num try: 1 Error when connecting to localhost:18150 : ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - Airflow webserver is not available at port 18150. Run `breeze k8s deploy-airflow --python 3.8 --kubernetes-version v1.24.2` to (re)deploy airflow + Airflow webserver is not available at port 18150. Run `breeze k8s deploy-airflow --python 3.9 --kubernetes-version v1.24.2` to (re)deploy airflow Cluster healthy: airflow-python-3.8-v1.24.2 diff --git a/contributing-docs/testing/unit_tests.rst b/contributing-docs/testing/unit_tests.rst index e40b5c82d3b0b..6b4f21f81a5e0 100644 --- a/contributing-docs/testing/unit_tests.rst +++ b/contributing-docs/testing/unit_tests.rst @@ -209,7 +209,7 @@ rerun in Breeze as you will (``-n auto`` will parallelize tests using ``pytest-x .. code-block:: bash - breeze shell --backend none --python 3.8 + breeze shell --backend none --python 3.9 > pytest tests --skip-db-tests -n auto @@ -286,7 +286,7 @@ either by package/module/test or by test type - whatever ``pytest`` supports. .. code-block:: bash - breeze shell --backend postgres --python 3.8 + breeze shell --backend postgres --python 3.9 > pytest tests --run-db-tests-only As explained before, you cannot run DB tests in parallel using ``pytest-xdist`` plugin, but ``breeze`` has @@ -296,7 +296,7 @@ you use ``breeze testing db-tests`` command): .. code-block:: bash - breeze testing tests --run-db-tests-only --backend postgres --python 3.8 --run-in-parallel + breeze testing tests --run-db-tests-only --backend postgres --python 3.9 --run-in-parallel Examples of marking test as DB test ................................... @@ -1122,7 +1122,7 @@ directly to the container. .. code-block:: bash - breeze ci-image build --python 3.8 + breeze ci-image build --python 3.9 2. Enter breeze environment by selecting the appropriate airflow version and choosing ``providers-and-tests`` option for ``--mount-sources`` flag. @@ -1224,7 +1224,7 @@ Herr id how to reproduce it. .. code-block:: bash - breeze ci-image build --python 3.8 + breeze ci-image build --python 3.9 2. Build providers from latest sources: diff --git a/dev/README_RELEASE_AIRFLOW.md b/dev/README_RELEASE_AIRFLOW.md index 2d63edf30119f..d9cb0f0d8867b 100644 --- a/dev/README_RELEASE_AIRFLOW.md +++ b/dev/README_RELEASE_AIRFLOW.md @@ -687,7 +687,7 @@ There is also an easy way of installation with Breeze if you have the latest sou Running the following command will use tmux inside breeze, create `admin` user and run Webserver & Scheduler: ```shell script -breeze start-airflow --use-airflow-version 2.7.0rc1 --python 3.8 --backend postgres +breeze start-airflow --use-airflow-version 2.7.0rc1 --python 3.9 --backend postgres ``` You can also choose different executors and extras to install when you are installing airflow this way. For @@ -695,7 +695,7 @@ example in order to run Airflow with CeleryExecutor and install celery, google a Airflow 2.7.0, you need to have celery provider installed to run Airflow with CeleryExecutor) you can run: ```shell script -breeze start-airflow --use-airflow-version 2.7.0rc1 --python 3.8 --backend postgres \ +breeze start-airflow --use-airflow-version 2.7.0rc1 --python 3.9 --backend postgres \ --executor CeleryExecutor --airflow-extras "celery,google,amazon" ``` diff --git a/dev/README_RELEASE_PROVIDER_PACKAGES.md b/dev/README_RELEASE_PROVIDER_PACKAGES.md index 25aa8062c7722..d546076af82d1 100644 --- a/dev/README_RELEASE_PROVIDER_PACKAGES.md +++ b/dev/README_RELEASE_PROVIDER_PACKAGES.md @@ -1016,7 +1016,7 @@ pip install apache-airflow-providers-==rc ### Installing with Breeze ```shell -breeze start-airflow --use-airflow-version 2.2.4 --python 3.8 --backend postgres \ +breeze start-airflow --use-airflow-version 2.2.4 --python 3.9 --backend postgres \ --load-example-dags --load-default-connections ``` diff --git a/dev/breeze/doc/ci/02_images.md b/dev/breeze/doc/ci/02_images.md index df8446f5a8606..ff7ebedc72ae8 100644 --- a/dev/breeze/doc/ci/02_images.md +++ b/dev/breeze/doc/ci/02_images.md @@ -442,8 +442,8 @@ can be used for CI images: | `DEV_APT_DEPS` | | Dev APT dependencies installed in the first part of the image (default empty means default dependencies are used) | | `ADDITIONAL_DEV_APT_DEPS` | | Additional apt dev dependencies installed in the first part of the image | | `ADDITIONAL_DEV_APT_ENV` | | Additional env variables defined when installing dev deps | -| `AIRFLOW_PIP_VERSION` | `24.3.1` | `pip` version used. | -| `AIRFLOW_UV_VERSION` | `0.5.17` | `uv` version used. | +| `AIRFLOW_PIP_VERSION` | `25.0` | `pip` version used. | +| `AIRFLOW_UV_VERSION` | `0.5.24` | `uv` version used. | | `AIRFLOW_PRE_COMMIT_VERSION` | `3.5.0` | `pre-commit` version used. | | `AIRFLOW_USE_UV` | `true` | Whether to use UV for installation. | | `PIP_PROGRESS_BAR` | `on` | Progress bar for PIP installation | diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py index 93220b2572f14..efeb839d3bdfb 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py @@ -233,8 +233,8 @@ class VersionedFile(NamedTuple): file_name: str -AIRFLOW_PIP_VERSION = "24.3.1" -AIRFLOW_UV_VERSION = "0.5.17" +AIRFLOW_PIP_VERSION = "25.0" +AIRFLOW_UV_VERSION = "0.5.24" AIRFLOW_USE_UV = False # TODO: automate these as well WHEEL_VERSION = "0.44.0" diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index d374a30ededb9..6f48f6458833c 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -46,7 +46,7 @@ # Checked before putting in build cache ALLOWED_PYTHON_MAJOR_MINOR_VERSIONS = ["3.8", "3.9", "3.10", "3.11", "3.12"] -DEFAULT_PYTHON_MAJOR_MINOR_VERSION = ALLOWED_PYTHON_MAJOR_MINOR_VERSIONS[0] +DEFAULT_PYTHON_MAJOR_MINOR_VERSION = "3.9" ALLOWED_ARCHITECTURES = [Architecture.X86_64, Architecture.ARM] # Database Backends used when starting Breeze. The "none" value means that the configuration is invalid. # No database will be started - access to a database will fail. @@ -188,8 +188,8 @@ ALLOWED_INSTALL_MYSQL_CLIENT_TYPES = ["mariadb", "mysql"] -PIP_VERSION = "24.3.1" -UV_VERSION = "0.5.17" +PIP_VERSION = "25.0" +UV_VERSION = "0.5.24" DEFAULT_UV_HTTP_TIMEOUT = 300 DEFAULT_WSL2_HTTP_TIMEOUT = 900 diff --git a/dev/breeze/src/airflow_breeze/params/common_build_params.py b/dev/breeze/src/airflow_breeze/params/common_build_params.py index ed32eea4f2ca2..b206d595b2353 100644 --- a/dev/breeze/src/airflow_breeze/params/common_build_params.py +++ b/dev/breeze/src/airflow_breeze/params/common_build_params.py @@ -67,7 +67,7 @@ class CommonBuildParams: prepare_buildx_cache: bool = False python_image: str | None = None push: bool = False - python: str = "3.8" + python: str = "3.9" uv_http_timeout: int = DEFAULT_UV_HTTP_TIMEOUT dry_run: bool = False version_suffix_for_pypi: str | None = None diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py index bfa97dcb1ee50..61e237ba3937e 100644 --- a/dev/breeze/tests/test_selective_checks.py +++ b/dev/breeze/tests/test_selective_checks.py @@ -113,10 +113,10 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ("INTHEWILD.md",), { "selected-providers-list-as-string": None, - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", - "python-versions": "['3.8']", - "python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", + "python-versions": "['3.9']", + "python-versions-list-as-string": "3.9", "ci-image-build": "false", "needs-helm-tests": "false", "run-tests": "false", @@ -147,10 +147,10 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): pytest.param( ("tests/api/file.py",), { - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", - "python-versions": "['3.8']", - "python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", + "python-versions": "['3.9']", + "python-versions-list-as-string": "3.9", "ci-image-build": "true", "prod-image-build": "false", "needs-helm-tests": "false", @@ -174,10 +174,10 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ("airflow/operators/file.py",), { "selected-providers-list-as-string": None, - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", - "python-versions": "['3.8']", - "python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", + "python-versions": "['3.9']", + "python-versions-list-as-string": "3.9", "ci-image-build": "true", "prod-image-build": "false", "needs-helm-tests": "false", @@ -201,10 +201,10 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ("airflow/serialization/python.py",), { "selected-providers-list-as-string": None, - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", - "python-versions": "['3.8']", - "python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", + "python-versions": "['3.9']", + "python-versions-list-as-string": "3.9", "ci-image-build": "true", "prod-image-build": "false", "needs-helm-tests": "false", @@ -228,10 +228,10 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ("docs/file.rst",), { "selected-providers-list-as-string": None, - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", - "python-versions": "['3.8']", - "python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", + "python-versions": "['3.9']", + "python-versions-list-as-string": "3.9", "ci-image-build": "true", "prod-image-build": "false", "needs-helm-tests": "false", @@ -259,10 +259,10 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ), { "selected-providers-list-as-string": None, - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", - "python-versions": "['3.8']", - "python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", + "python-versions": "['3.9']", + "python-versions-list-as-string": "3.9", "ci-image-build": "true", "prod-image-build": "true", "needs-helm-tests": "true", @@ -340,10 +340,10 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ("tests/utils/test_cli_util.py",), { "selected-providers-list-as-string": ALL_PROVIDERS_AFFECTED, - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", - "python-versions": "['3.8']", - "python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", + "python-versions": "['3.9']", + "python-versions-list-as-string": "3.9", "ci-image-build": "true", "prod-image-build": "true", "needs-helm-tests": "true", @@ -366,10 +366,10 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ("tests_common/__init__.py",), { "selected-providers-list-as-string": ALL_PROVIDERS_AFFECTED, - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", - "python-versions": "['3.8']", - "python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", + "python-versions": "['3.9']", + "python-versions-list-as-string": "3.9", "ci-image-build": "true", "prod-image-build": "true", "needs-helm-tests": "true", @@ -394,10 +394,10 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ("airflow/ui/src/index.tsx",), { "selected-providers-list-as-string": None, - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", - "python-versions": "['3.8']", - "python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", + "python-versions": "['3.9']", + "python-versions-list-as-string": "3.9", "ci-image-build": "false", "prod-image-build": "false", "needs-helm-tests": "false", @@ -595,16 +595,16 @@ def test_full_test_needed_when_scripts_changes(files: tuple[str, ...], expected_ "main", { "selected-providers-list-as-string": ALL_PROVIDERS_AFFECTED, - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", "all-versions": "false", "mysql-versions": "['8.0']", "postgres-versions": "['13']", - "python-versions": "['3.8']", - "python-versions-list-as-string": "3.8", + "python-versions": "['3.9']", + "python-versions-list-as-string": "3.9", "kubernetes-versions": "['v1.28.15']", "kubernetes-versions-list-as-string": "v1.28.15", - "kubernetes-combos-list-as-string": "3.8-v1.28.15", + "kubernetes-combos-list-as-string": "3.9-v1.28.15", "ci-image-build": "true", "prod-image-build": "true", "run-tests": "true", @@ -631,16 +631,16 @@ def test_full_test_needed_when_scripts_changes(files: tuple[str, ...], expected_ "main", { "selected-providers-list-as-string": ALL_PROVIDERS_AFFECTED, - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", "all-versions": "false", "mysql-versions": "['8.0']", "postgres-versions": "['13']", - "python-versions": "['3.8']", - "python-versions-list-as-string": "3.8", + "python-versions": "['3.9']", + "python-versions-list-as-string": "3.9", "kubernetes-versions": "['v1.28.15']", "kubernetes-versions-list-as-string": "v1.28.15", - "kubernetes-combos-list-as-string": "3.8-v1.28.15", + "kubernetes-combos-list-as-string": "3.9-v1.28.15", "ci-image-build": "true", "prod-image-build": "true", "run-tests": "true", @@ -707,14 +707,14 @@ def test_full_test_needed_when_scripts_changes(files: tuple[str, ...], expected_ "main", { "selected-providers-list-as-string": ALL_PROVIDERS_AFFECTED, - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", "all-versions": "false", - "python-versions": "['3.8']", - "python-versions-list-as-string": "3.8", + "python-versions": "['3.9']", + "python-versions-list-as-string": "3.9", "kubernetes-versions": "['v1.28.15']", "kubernetes-versions-list-as-string": "v1.28.15", - "kubernetes-combos-list-as-string": "3.8-v1.28.15", + "kubernetes-combos-list-as-string": "3.9-v1.28.15", "ci-image-build": "true", "prod-image-build": "true", "run-tests": "true", @@ -741,14 +741,14 @@ def test_full_test_needed_when_scripts_changes(files: tuple[str, ...], expected_ "main", { "selected-providers-list-as-string": ALL_PROVIDERS_AFFECTED, - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", "all-versions": "false", - "python-versions": "['3.8']", - "python-versions-list-as-string": "3.8", + "python-versions": "['3.9']", + "python-versions-list-as-string": "3.9", "kubernetes-versions": "['v1.28.15']", "kubernetes-versions-list-as-string": "v1.28.15", - "kubernetes-combos-list-as-string": "3.8-v1.28.15", + "kubernetes-combos-list-as-string": "3.9-v1.28.15", "ci-image-build": "true", "prod-image-build": "true", "run-tests": "true", @@ -775,10 +775,10 @@ def test_full_test_needed_when_scripts_changes(files: tuple[str, ...], expected_ ("full tests needed",), "v2-7-stable", { - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", - "python-versions": "['3.8']", - "python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", + "python-versions": "['3.9']", + "python-versions-list-as-string": "3.9", "all-versions": "false", "ci-image-build": "true", "prod-image-build": "true", @@ -824,8 +824,8 @@ def test_expected_output_full_tests_needed( ("INTHEWILD.md",), { "selected-providers-list-as-string": None, - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", "ci-image-build": "false", "needs-helm-tests": "false", "run-tests": "false", @@ -847,8 +847,8 @@ def test_expected_output_full_tests_needed( "tests/providers/google/file.py", ), { - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", "needs-helm-tests": "false", "ci-image-build": "true", "prod-image-build": "true", @@ -874,8 +874,8 @@ def test_expected_output_full_tests_needed( "tests/providers/google/file.py", ), { - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", "ci-image-build": "true", "prod-image-build": "true", "needs-helm-tests": "false", @@ -899,8 +899,8 @@ def test_expected_output_full_tests_needed( "tests/providers/google/file.py", ), { - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", "ci-image-build": "true", "prod-image-build": "false", "needs-helm-tests": "false", @@ -1029,8 +1029,8 @@ def test_expected_output_push( ("INTHEWILD.md",), { "selected-providers-list-as-string": None, - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", "ci-image-build": "false", "needs-helm-tests": "false", "run-tests": "false", @@ -1051,8 +1051,8 @@ def test_expected_output_push( ("tests/system/any_file.py",), { "selected-providers-list-as-string": None, - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", "ci-image-build": "true", "prod-image-build": "false", "needs-helm-tests": "false", @@ -1073,8 +1073,8 @@ def test_expected_output_push( pytest.param( ("airflow/models/test.py",), { - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", "ci-image-build": "true", "prod-image-build": "false", "needs-helm-tests": "false", @@ -1096,8 +1096,8 @@ def test_expected_output_push( pytest.param( ("airflow/file.py",), { - "all-python-versions": "['3.8']", - "all-python-versions-list-as-string": "3.8", + "all-python-versions": "['3.9']", + "all-python-versions-list-as-string": "3.9", "ci-image-build": "true", "prod-image-build": "false", "needs-helm-tests": "false", diff --git a/docker_tests/docker_utils.py b/docker_tests/docker_utils.py index 1c9aea8a420b3..a5230d6be2193 100644 --- a/docker_tests/docker_utils.py +++ b/docker_tests/docker_utils.py @@ -87,11 +87,11 @@ def display_dependency_conflict_message(): CI image: - breeze ci-image build --upgrade-to-newer-dependencies --python 3.8 + breeze ci-image build --upgrade-to-newer-dependencies --python 3.9 Production image: - breeze ci-image build --production-image --upgrade-to-newer-dependencies --python 3.8 + breeze ci-image build --production-image --upgrade-to-newer-dependencies --python 3.9 * You will see error messages there telling which requirements are conflicting and which packages caused the conflict. Add the limitation that caused the conflict to EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS diff --git a/docs/apache-airflow/faq.rst b/docs/apache-airflow/faq.rst index 6021ba514ad1f..7cef74c5f4436 100644 --- a/docs/apache-airflow/faq.rst +++ b/docs/apache-airflow/faq.rst @@ -522,26 +522,3 @@ This means ``explicit_defaults_for_timestamp`` is disabled in your mysql server #. Set ``explicit_defaults_for_timestamp = 1`` under the ``mysqld`` section in your ``my.cnf`` file. #. Restart the Mysql server. - -Does Airflow collect any telemetry data? ----------------------------------------- - -.. _usage-data-collection: - -Airflow integrates `Scarf `__ to collect basic usage data during operation. -This data assists Airflow maintainers in better understanding how Airflow is used. -Insights gained from this data are helpful for prioritizing patches, minor releases, and -security fixes. Additionally, this information supports key decisions related to the development road map. - -Deployments can opt-out of data collection by setting the :ref:`[usage_data_collection] enabled ` -option to ``False``, or the ``SCARF_ANALYTICS=false`` environment variable. -Individual users can easily opt-out of analytics in various ways documented in the -`Scarf Do Not Track docs `__. - -The telemetry data collected is limited to the following: - -- Airflow version -- Python version -- Operating system & machine architecture -- Executor -- Metadata DB type & its version diff --git a/docs/apache-airflow/installation/installing-from-pypi.rst b/docs/apache-airflow/installation/installing-from-pypi.rst index 8c689da5e1f7f..a62d87d90543c 100644 --- a/docs/apache-airflow/installation/installing-from-pypi.rst +++ b/docs/apache-airflow/installation/installing-from-pypi.rst @@ -330,12 +330,6 @@ dependencies compatible with just airflow core at the moment Airflow was release # For example: https://raw.githubusercontent.com/apache/airflow/constraints-|version|/constraints-no-providers-3.8.txt pip install "apache-airflow==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}" - -.. note:: - - Airflow uses `Scarf `__ to collect basic usage data during operation. - Check the :ref:`Usage data collection FAQ ` for more information about the data collected and how to opt-out. - Troubleshooting ''''''''''''''' diff --git a/docs/apache-airflow/installation/supported-versions.rst b/docs/apache-airflow/installation/supported-versions.rst index c8fc9c8293ee1..dacd2da7bde09 100644 --- a/docs/apache-airflow/installation/supported-versions.rst +++ b/docs/apache-airflow/installation/supported-versions.rst @@ -29,7 +29,7 @@ Apache Airflow® version life cycle: ========= ===================== ========= =============== ================= ================ Version Current Patch/Minor State First Release Limited Support EOL/Terminated ========= ===================== ========= =============== ================= ================ -2 2.10.4 Supported Dec 17, 2020 TBD TBD +2 2.10.5 Supported Dec 17, 2020 TBD TBD 1.10 1.10.15 EOL Aug 27, 2018 Dec 17, 2020 June 17, 2021 1.9 1.9.0 EOL Jan 03, 2018 Aug 27, 2018 Aug 27, 2018 1.8 1.8.2 EOL Mar 19, 2017 Jan 03, 2018 Jan 03, 2018 diff --git a/docs/docker-stack/README.md b/docs/docker-stack/README.md index 9369660e2ccc2..5db0badc469ed 100644 --- a/docs/docker-stack/README.md +++ b/docs/docker-stack/README.md @@ -31,12 +31,12 @@ Every time a new version of Airflow is released, the images are prepared in the [apache/airflow DockerHub](https://hub.docker.com/r/apache/airflow) for all the supported Python versions. -You can find the following images there (Assuming Airflow version `2.10.4`): +You can find the following images there (Assuming Airflow version `2.10.5`): * `apache/airflow:latest` - the latest released Airflow image with default Python version (3.8 currently) * `apache/airflow:latest-pythonX.Y` - the latest released Airflow image with specific Python version -* `apache/airflow:2.10.4` - the versioned Airflow image with default Python version (3.8 currently) -* `apache/airflow:2.10.4-pythonX.Y` - the versioned Airflow image with specific Python version +* `apache/airflow:2.10.5` - the versioned Airflow image with default Python version (3.8 currently) +* `apache/airflow:2.10.5-pythonX.Y` - the versioned Airflow image with specific Python version Those are "reference" regular images. They contain the most common set of extras, dependencies and providers that are often used by the users and they are good to "try-things-out" when you want to just take Airflow for a spin, @@ -47,8 +47,8 @@ via [Building the image](https://airflow.apache.org/docs/docker-stack/build.html * `apache/airflow:slim-latest` - the latest released Airflow image with default Python version (3.8 currently) * `apache/airflow:slim-latest-pythonX.Y` - the latest released Airflow image with specific Python version -* `apache/airflow:slim-2.10.4` - the versioned Airflow image with default Python version (3.8 currently) -* `apache/airflow:slim-2.10.4-pythonX.Y` - the versioned Airflow image with specific Python version +* `apache/airflow:slim-2.10.5` - the versioned Airflow image with default Python version (3.8 currently) +* `apache/airflow:slim-2.10.5-pythonX.Y` - the versioned Airflow image with specific Python version The Apache Airflow image provided as convenience package is optimized for size, and it provides just a bare minimal set of the extras and dependencies installed and in most cases diff --git a/docs/docker-stack/docker-examples/extending/add-airflow-configuration/Dockerfile b/docs/docker-stack/docker-examples/extending/add-airflow-configuration/Dockerfile index 5fb16b7ced047..ebb3c9e13fe64 100644 --- a/docs/docker-stack/docker-examples/extending/add-airflow-configuration/Dockerfile +++ b/docs/docker-stack/docker-examples/extending/add-airflow-configuration/Dockerfile @@ -15,7 +15,7 @@ # This is an example Dockerfile. It is not intended for PRODUCTION use # [START Dockerfile] -FROM apache/airflow:2.10.4 +FROM apache/airflow:2.10.5 ENV AIRFLOW__CORE__LOAD_EXAMPLES=True ENV AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=my_conn_string # [END Dockerfile] diff --git a/docs/docker-stack/docker-examples/extending/add-apt-packages/Dockerfile b/docs/docker-stack/docker-examples/extending/add-apt-packages/Dockerfile index 72346ed959730..221b3dcdf6dd8 100644 --- a/docs/docker-stack/docker-examples/extending/add-apt-packages/Dockerfile +++ b/docs/docker-stack/docker-examples/extending/add-apt-packages/Dockerfile @@ -15,7 +15,7 @@ # This is an example Dockerfile. It is not intended for PRODUCTION use # [START Dockerfile] -FROM apache/airflow:2.10.4 +FROM apache/airflow:2.10.5 USER root RUN apt-get update \ && apt-get install -y --no-install-recommends \ diff --git a/docs/docker-stack/docker-examples/extending/add-build-essential-extend/Dockerfile b/docs/docker-stack/docker-examples/extending/add-build-essential-extend/Dockerfile index 2bb166deb0ba0..399569c296198 100644 --- a/docs/docker-stack/docker-examples/extending/add-build-essential-extend/Dockerfile +++ b/docs/docker-stack/docker-examples/extending/add-build-essential-extend/Dockerfile @@ -15,7 +15,7 @@ # This is an example Dockerfile. It is not intended for PRODUCTION use # [START Dockerfile] -FROM apache/airflow:2.10.4 +FROM apache/airflow:2.10.5 USER root RUN apt-get update \ && apt-get install -y --no-install-recommends \ diff --git a/docs/docker-stack/docker-examples/extending/add-providers/Dockerfile b/docs/docker-stack/docker-examples/extending/add-providers/Dockerfile index 46584a972c3ef..50bc0e745e7f3 100644 --- a/docs/docker-stack/docker-examples/extending/add-providers/Dockerfile +++ b/docs/docker-stack/docker-examples/extending/add-providers/Dockerfile @@ -15,7 +15,7 @@ # This is an example Dockerfile. It is not intended for PRODUCTION use # [START Dockerfile] -FROM apache/airflow:2.10.4 +FROM apache/airflow:2.10.5 USER root RUN apt-get update \ && apt-get install -y --no-install-recommends \ diff --git a/docs/docker-stack/docker-examples/extending/add-pypi-packages-constraints/Dockerfile b/docs/docker-stack/docker-examples/extending/add-pypi-packages-constraints/Dockerfile index d0a73412945d0..845b1107b9440 100644 --- a/docs/docker-stack/docker-examples/extending/add-pypi-packages-constraints/Dockerfile +++ b/docs/docker-stack/docker-examples/extending/add-pypi-packages-constraints/Dockerfile @@ -15,6 +15,6 @@ # This is an example Dockerfile. It is not intended for PRODUCTION use # [START Dockerfile] -FROM apache/airflow:2.10.4 +FROM apache/airflow:2.10.5 RUN pip install --no-cache-dir "apache-airflow==${AIRFLOW_VERSION}" lxml --constraint "${HOME}/constraints.txt" # [END Dockerfile] diff --git a/docs/docker-stack/docker-examples/extending/add-pypi-packages-uv/Dockerfile b/docs/docker-stack/docker-examples/extending/add-pypi-packages-uv/Dockerfile index 06082308dc483..c6f87ff024bce 100644 --- a/docs/docker-stack/docker-examples/extending/add-pypi-packages-uv/Dockerfile +++ b/docs/docker-stack/docker-examples/extending/add-pypi-packages-uv/Dockerfile @@ -15,7 +15,7 @@ # This is an example Dockerfile. It is not intended for PRODUCTION use # [START Dockerfile] -FROM apache/airflow:2.10.4 +FROM apache/airflow:2.10.5 # The `uv` tools is Rust packaging tool that is much faster than `pip` and other installer # Support for uv as installation tool is experimental diff --git a/docs/docker-stack/docker-examples/extending/add-pypi-packages/Dockerfile b/docs/docker-stack/docker-examples/extending/add-pypi-packages/Dockerfile index fe19bacf174cc..d6ca1b31faf51 100644 --- a/docs/docker-stack/docker-examples/extending/add-pypi-packages/Dockerfile +++ b/docs/docker-stack/docker-examples/extending/add-pypi-packages/Dockerfile @@ -15,6 +15,6 @@ # This is an example Dockerfile. It is not intended for PRODUCTION use # [START Dockerfile] -FROM apache/airflow:2.10.4 +FROM apache/airflow:2.10.5 RUN pip install --no-cache-dir "apache-airflow==${AIRFLOW_VERSION}" lxml # [END Dockerfile] diff --git a/docs/docker-stack/docker-examples/extending/add-requirement-packages/Dockerfile b/docs/docker-stack/docker-examples/extending/add-requirement-packages/Dockerfile index ecbbf1984bbce..aebc7f49b23ba 100644 --- a/docs/docker-stack/docker-examples/extending/add-requirement-packages/Dockerfile +++ b/docs/docker-stack/docker-examples/extending/add-requirement-packages/Dockerfile @@ -15,7 +15,7 @@ # This is an example Dockerfile. It is not intended for PRODUCTION use # [START Dockerfile] -FROM apache/airflow:2.10.4 +FROM apache/airflow:2.10.5 COPY requirements.txt / RUN pip install --no-cache-dir "apache-airflow==${AIRFLOW_VERSION}" -r /requirements.txt # [END Dockerfile] diff --git a/docs/docker-stack/docker-examples/extending/custom-providers/Dockerfile b/docs/docker-stack/docker-examples/extending/custom-providers/Dockerfile index 363debd15c43e..4112a4b5a89e4 100644 --- a/docs/docker-stack/docker-examples/extending/custom-providers/Dockerfile +++ b/docs/docker-stack/docker-examples/extending/custom-providers/Dockerfile @@ -15,6 +15,6 @@ # This is an example Dockerfile. It is not intended for PRODUCTION use # [START Dockerfile] -FROM apache/airflow:2.10.4 +FROM apache/airflow:2.10.5 RUN pip install "apache-airflow==${AIRFLOW_VERSION}" --no-cache-dir apache-airflow-providers-docker==2.5.1 # [END Dockerfile] diff --git a/docs/docker-stack/docker-examples/extending/embedding-dags/Dockerfile b/docs/docker-stack/docker-examples/extending/embedding-dags/Dockerfile index 59f395d1728df..1a4672f84f968 100644 --- a/docs/docker-stack/docker-examples/extending/embedding-dags/Dockerfile +++ b/docs/docker-stack/docker-examples/extending/embedding-dags/Dockerfile @@ -15,7 +15,7 @@ # This is an example Dockerfile. It is not intended for PRODUCTION use # [START Dockerfile] -FROM apache/airflow:2.10.4 +FROM apache/airflow:2.10.5 COPY --chown=airflow:root test_dag.py /opt/airflow/dags diff --git a/docs/docker-stack/docker-examples/extending/writable-directory/Dockerfile b/docs/docker-stack/docker-examples/extending/writable-directory/Dockerfile index 7e3cee6464585..a72f2bed5bd9b 100644 --- a/docs/docker-stack/docker-examples/extending/writable-directory/Dockerfile +++ b/docs/docker-stack/docker-examples/extending/writable-directory/Dockerfile @@ -15,7 +15,7 @@ # This is an example Dockerfile. It is not intended for PRODUCTION use # [START Dockerfile] -FROM apache/airflow:2.10.4 +FROM apache/airflow:2.10.5 RUN umask 0002; \ mkdir -p ~/writeable-directory # [END Dockerfile] diff --git a/docs/docker-stack/entrypoint.rst b/docs/docker-stack/entrypoint.rst index 5c0d0d0a432a6..0da779d8bec85 100644 --- a/docs/docker-stack/entrypoint.rst +++ b/docs/docker-stack/entrypoint.rst @@ -132,7 +132,7 @@ if you specify extra arguments. For example: .. code-block:: bash - docker run -it apache/airflow:2.10.4-python3.8 bash -c "ls -la" + docker run -it apache/airflow:2.10.5-python3.8 bash -c "ls -la" total 16 drwxr-xr-x 4 airflow root 4096 Jun 5 18:12 . drwxr-xr-x 1 root root 4096 Jun 5 18:12 .. @@ -144,7 +144,7 @@ you pass extra parameters. For example: .. code-block:: bash - > docker run -it apache/airflow:2.10.4-python3.8 python -c "print('test')" + > docker run -it apache/airflow:2.10.5-python3.8 python -c "print('test')" test If first argument equals to "airflow" - the rest of the arguments is treated as an airflow command @@ -152,13 +152,13 @@ to execute. Example: .. code-block:: bash - docker run -it apache/airflow:2.10.4-python3.8 airflow webserver + docker run -it apache/airflow:2.10.5-python3.8 airflow webserver If there are any other arguments - they are simply passed to the "airflow" command .. code-block:: bash - > docker run -it apache/airflow:2.10.4-python3.8 help + > docker run -it apache/airflow:2.10.5-python3.8 help usage: airflow [-h] GROUP_OR_COMMAND ... positional arguments: @@ -363,7 +363,7 @@ database and creating an ``admin/admin`` Admin user with the following command: --env "_AIRFLOW_DB_MIGRATE=true" \ --env "_AIRFLOW_WWW_USER_CREATE=true" \ --env "_AIRFLOW_WWW_USER_PASSWORD=admin" \ - apache/airflow:2.10.4-python3.8 webserver + apache/airflow:2.10.5-python3.8 webserver .. code-block:: bash @@ -372,7 +372,7 @@ database and creating an ``admin/admin`` Admin user with the following command: --env "_AIRFLOW_DB_MIGRATE=true" \ --env "_AIRFLOW_WWW_USER_CREATE=true" \ --env "_AIRFLOW_WWW_USER_PASSWORD_CMD=echo admin" \ - apache/airflow:2.10.4-python3.8 webserver + apache/airflow:2.10.5-python3.8 webserver The commands above perform initialization of the SQLite database, create admin user with admin password and Admin role. They also forward local port ``8080`` to the webserver port and finally start the webserver. @@ -412,6 +412,6 @@ Example: --env "_AIRFLOW_DB_MIGRATE=true" \ --env "_AIRFLOW_WWW_USER_CREATE=true" \ --env "_AIRFLOW_WWW_USER_PASSWORD_CMD=echo admin" \ - apache/airflow:2.10.4-python3.8 webserver + apache/airflow:2.10.5-python3.8 webserver This method is only available starting from Docker image of Airflow 2.1.1 and above. diff --git a/docs/exts/airflow_intersphinx.py b/docs/exts/airflow_intersphinx.py index b0fecdec9b7b2..ccfd6662be3b2 100644 --- a/docs/exts/airflow_intersphinx.py +++ b/docs/exts/airflow_intersphinx.py @@ -126,7 +126,7 @@ def fetch_inventories(intersphinx_mapping) -> dict[str, Any]: cache: dict[Any, Any] = {} with concurrent.futures.ThreadPoolExecutor() as pool: for name, (uri, invs) in intersphinx_mapping.values(): - pool.submit(fetch_inventory_group, name, uri, invs, cache, _MockApp(), now) + pool.submit(fetch_inventory_group, name, uri, invs, cache, _MockApp(), now) # type: ignore[arg-type] inv_dict = {} for uri, (name, now, invdata) in cache.items(): diff --git a/docs/sphinx_design/static/custom.css b/docs/sphinx_design/static/custom.css index b1cf49f37d486..70356c06a97ca 100644 --- a/docs/sphinx_design/static/custom.css +++ b/docs/sphinx_design/static/custom.css @@ -31,3 +31,38 @@ --sd-color-tabs-underline-hover: #68d1ff; --sd-color-tabs-underline: transparent; } + +div.admonition.warning { + background: #e8cccc; + font-weight: bolder; +} + +.rst-content .warning .admonition-title { + background: #cc341d; +} + +/* Patches as of moving to Sphinx 7 to get layout to previous state */ +/* Needs to be cleaned in a follow-up to source this from the origin style in */ +/* https://github.com/apache/airflow-site/blob/main/landing-pages/site/assets/scss/_rst-content.scss */ +.base-layout { + padding-top: 123px !important; +} + +section { + padding-top: 0rem !important; + padding-bottom: 0rem !important; +} + +section ol li p:last-child, section ul li p:last-child { + margin-bottom: 0 !important; +} + +a.headerlink { + content: "" !important; + font-size: 75% !important; +} + +a.headerlink::after { + content: " [link]" !important; /* Theme image not existing */ + visibility: visible !important; +} diff --git a/hatch_build.py b/hatch_build.py index 8cb883f002690..13fa2a44330f3 100644 --- a/hatch_build.py +++ b/hatch_build.py @@ -156,37 +156,28 @@ DOC_EXTRAS: dict[str, list[str]] = { "doc": [ - "astroid>=2.12.3,<3.0", - "checksumdir>=1.2.0", - # click 8.1.4 and 8.1.5 generate mypy errors due to typing issue in the upstream package: - # https://github.com/pallets/click/issues/2558 - "click>=8.0,!=8.1.4,!=8.1.5", - # Docutils 0.17.0 converts generated
into
and breaks our doc formatting - # By adding a lot of whitespace separation. This limit can be lifted when we update our doc to handle - #
tags for sections - "docutils<0.17,>=0.16", - # The new theme 0.1.0 is for Sphinx 7. Airflow 2. still uses old version of Sphinx - "sphinx-airflow-theme>=0.0.12,<0.1.0", - "sphinx-argparse>=0.4.0", - # sphinx-autoapi fails with astroid 3.0, see: https://github.com/readthedocs/sphinx-autoapi/issues/407 - # This was fixed in sphinx-autoapi 3.0, however it has requirement sphinx>=6.1, but we stuck on 5.x - "sphinx-autoapi>=2.1.1", - "sphinx-copybutton>=0.5.2", - "sphinx-design>=0.5.0", - "sphinx-jinja>=2.0.2", - "sphinx-rtd-theme>=2.0.0", - # Currently we are using sphinx 5 but we need to migrate to Sphinx 7 - "sphinx>=5.3.0,<6.0.0", - "sphinxcontrib-applehelp>=1.0.4", - "sphinxcontrib-devhelp>=1.0.2", - "sphinxcontrib-htmlhelp>=2.0.1", - "sphinxcontrib-httpdomain>=1.8.1", - "sphinxcontrib-jquery>=4.1", - "sphinxcontrib-jsmath>=1.0.1", - "sphinxcontrib-qthelp>=1.0.3", - "sphinxcontrib-redoc>=1.6.0", - "sphinxcontrib-serializinghtml==1.1.5", - "sphinxcontrib-spelling>=8.0.0", + "astroid>=3; python_version >= '3.9'", + "checksumdir>=1.2.0; python_version >= '3.9'", + "click>=8.1.8; python_version >= '3.9'", + "docutils>=0.21; python_version >= '3.9'", + "sphinx-airflow-theme>=0.1.0; python_version >= '3.9'", + "sphinx-argparse>=0.4.0; python_version >= '3.9'", + "sphinx-autoapi>=3; python_version >= '3.9'", + "sphinx-copybutton>=0.5.2; python_version >= '3.9'", + "sphinx-design>=0.5.0; python_version >= '3.9'", + "sphinx-jinja>=2.0.2; python_version >= '3.9'", + "sphinx-rtd-theme>=2.0.0; python_version >= '3.9'", + "sphinx>=7; python_version >= '3.9'", + "sphinxcontrib-applehelp>=1.0.4; python_version >= '3.9'", + "sphinxcontrib-devhelp>=1.0.2; python_version >= '3.9'", + "sphinxcontrib-htmlhelp>=2.0.1; python_version >= '3.9'", + "sphinxcontrib-httpdomain>=1.8.1; python_version >= '3.9'", + "sphinxcontrib-jquery>=4.1; python_version >= '3.9'", + "sphinxcontrib-jsmath>=1.0.1; python_version >= '3.9'", + "sphinxcontrib-qthelp>=1.0.3; python_version >= '3.9'", + "sphinxcontrib-redoc>=1.6.0; python_version >= '3.9'", + "sphinxcontrib-serializinghtml>=1.1.5; python_version >= '3.9'", + "sphinxcontrib-spelling>=8.0.0; python_version >= '3.9'", ], "doc-gen": [ "apache-airflow[doc]", diff --git a/newsfragments/44751.bugfix.rst b/newsfragments/44751.bugfix.rst deleted file mode 100644 index c85601d0fe13a..0000000000000 --- a/newsfragments/44751.bugfix.rst +++ /dev/null @@ -1 +0,0 @@ -``TriggerRule.ALWAYS`` cannot be utilized within a task-generated mapping, either in bare tasks (fixed in this PR) or mapped task groups (fixed in PR #44368). The issue with doing so, is that the task is immediately executed without waiting for the upstreams's mapping results, which certainly leads to failure of the task. This fix avoids it by raising an exception when it is detected during DAG parsing. diff --git a/newsfragments/44912.bugfix.rst b/newsfragments/44912.bugfix.rst deleted file mode 100644 index 6d19c5223f564..0000000000000 --- a/newsfragments/44912.bugfix.rst +++ /dev/null @@ -1 +0,0 @@ -Fix short circuit operator in mapped tasks. The operator did not work until now due to a bug in ``NotPreviouslySkippedDep``. Please note that at time of merging, this fix has been applied only for Airflow version > 2.10.4 and < 3, and should be ported to v3 after merging PR #44925. diff --git a/newsfragments/44937.bugfix.rst b/newsfragments/44937.bugfix.rst deleted file mode 100644 index d50da4de82fc9..0000000000000 --- a/newsfragments/44937.bugfix.rst +++ /dev/null @@ -1 +0,0 @@ -Fix pre-mature evaluation of tasks in mapped task group. The origins of the bug are in ``TriggerRuleDep``, when dealing with ``TriggerRule`` that is fastly triggered (i.e, ``ONE_FAILED``, ``ONE_SUCCESS`, or ``ONE_DONE``). Please note that at time of merging, this fix has been applied only for Airflow version > 2.10.4 and < 3, and should be ported to v3 after merging PR #40460. diff --git a/newsfragments/44938.bugfix.rst b/newsfragments/44938.bugfix.rst deleted file mode 100644 index 4e6746b223d1d..0000000000000 --- a/newsfragments/44938.bugfix.rst +++ /dev/null @@ -1 +0,0 @@ -Fix task_id validation in BaseOperator (#44938) diff --git a/newsfragments/44968.misc.rst b/newsfragments/44968.misc.rst deleted file mode 100644 index 160ccd60855af..0000000000000 --- a/newsfragments/44968.misc.rst +++ /dev/null @@ -1 +0,0 @@ -The ``conf`` variable, which provided access to the full Airflow configuration (``airflow.cfg``), has been deprecated and will be removed in Airflow 3 from the Task (Jinja2) template context for security and simplicity. If you need specific configuration values in your tasks, retrieve them explicitly in your DAG or task code using the ``airflow.configuration.conf`` module. For users retrieving the webserver URL (e.g., to include log links in task or callbacks), one of the most common use-case, use the ``ti.log_url`` property available in the ``TaskInstance`` context instead. diff --git a/newsfragments/45134.bugfix.rst b/newsfragments/45134.bugfix.rst deleted file mode 100644 index 09aaae23a3487..0000000000000 --- a/newsfragments/45134.bugfix.rst +++ /dev/null @@ -1 +0,0 @@ -(v2 API & UI) Allow fetching XCom with forward slash from the API and escape it in the UI diff --git a/newsfragments/45530.significant.rst b/newsfragments/45530.significant.rst deleted file mode 100644 index 7e2ae8e8ac6a5..0000000000000 --- a/newsfragments/45530.significant.rst +++ /dev/null @@ -1,12 +0,0 @@ -Ensure teardown tasks are executed when DAG run is set to failed - -Previously when a DAG run was manually set to "failed" or to "success" state the terminal state was set to all tasks. -But this was a gap for cases when setup- and teardown tasks were defined: If teardown was used to clean-up infrastructure -or other resources, they were also skipped and thus resources could stay allocated. - -As of now when setup tasks had been executed before and the DAG is manually set to "failed" or "success" then teardown -tasks are executed. Teardown tasks are skipped if the setup was also skipped. - -As a side effect this means if the DAG contains teardown tasks, then the manual marking of DAG as "failed" or "success" -will need to keep the DAG in running state to ensure that teardown tasks will be scheduled. They would not be scheduled -if the DAG is diorectly set to "failed" or "success". diff --git a/scripts/ci/install_breeze.sh b/scripts/ci/install_breeze.sh index 093c8f6db9ce5..5259628a579d3 100755 --- a/scripts/ci/install_breeze.sh +++ b/scripts/ci/install_breeze.sh @@ -21,8 +21,8 @@ cd "$( dirname "${BASH_SOURCE[0]}" )/../../" PYTHON_ARG="" -PIP_VERSION="24.3.1" -UV_VERSION="0.5.17" +PIP_VERSION="25.0" +UV_VERSION="0.5.24" if [[ ${PYTHON_VERSION=} != "" ]]; then PYTHON_ARG="--python=$(which python"${PYTHON_VERSION}") " fi diff --git a/scripts/ci/pre_commit/common_precommit_utils.py b/scripts/ci/pre_commit/common_precommit_utils.py index f406c8116622d..1fe162421fdf0 100644 --- a/scripts/ci/pre_commit/common_precommit_utils.py +++ b/scripts/ci/pre_commit/common_precommit_utils.py @@ -30,7 +30,7 @@ AIRFLOW_SOURCES_ROOT_PATH = Path(__file__).parents[3].resolve() AIRFLOW_BREEZE_SOURCES_PATH = AIRFLOW_SOURCES_ROOT_PATH / "dev" / "breeze" -DEFAULT_PYTHON_MAJOR_MINOR_VERSION = "3.8" +DEFAULT_PYTHON_MAJOR_MINOR_VERSION = "3.9" console = Console(width=400, color_system="standard") @@ -223,12 +223,12 @@ def validate_cmd_result(cmd_result, include_ci_env_check=False): "\n[yellow]If you see strange stacktraces above, especially about missing imports " "run this command:[/]\n" ) - console.print("[magenta]breeze ci-image build --python 3.8 --upgrade-to-newer-dependencies[/]\n") + console.print("[magenta]breeze ci-image build --python 3.9 --upgrade-to-newer-dependencies[/]\n") elif cmd_result.returncode != 0: console.print( "[warning]\nIf you see strange stacktraces above, " - "run `breeze ci-image build --python 3.8` and try again." + "run `breeze ci-image build --python 3.9` and try again." ) sys.exit(cmd_result.returncode) diff --git a/scripts/ci/pre_commit/supported_versions.py b/scripts/ci/pre_commit/supported_versions.py index 8524f237dc993..a3a96abb957c0 100755 --- a/scripts/ci/pre_commit/supported_versions.py +++ b/scripts/ci/pre_commit/supported_versions.py @@ -27,7 +27,7 @@ HEADERS = ("Version", "Current Patch/Minor", "State", "First Release", "Limited Support", "EOL/Terminated") SUPPORTED_VERSIONS = ( - ("2", "2.10.4", "Supported", "Dec 17, 2020", "TBD", "TBD"), + ("2", "2.10.5", "Supported", "Dec 17, 2020", "TBD", "TBD"), ("1.10", "1.10.15", "EOL", "Aug 27, 2018", "Dec 17, 2020", "June 17, 2021"), ("1.9", "1.9.0", "EOL", "Jan 03, 2018", "Aug 27, 2018", "Aug 27, 2018"), ("1.8", "1.8.2", "EOL", "Mar 19, 2017", "Jan 03, 2018", "Jan 03, 2018"), diff --git a/tests/api_connexion/endpoints/test_dag_run_endpoint.py b/tests/api_connexion/endpoints/test_dag_run_endpoint.py index dc77648784ce5..7b63aca840f01 100644 --- a/tests/api_connexion/endpoints/test_dag_run_endpoint.py +++ b/tests/api_connexion/endpoints/test_dag_run_endpoint.py @@ -16,6 +16,7 @@ # under the License. from __future__ import annotations +import json import urllib from datetime import timedelta from unittest import mock @@ -25,6 +26,7 @@ from airflow.api_connexion.exceptions import EXCEPTIONS_LINK_MAP from airflow.datasets import Dataset +from airflow.models import Log from airflow.models.dag import DAG, DagModel from airflow.models.dagrun import DagRun from airflow.models.dataset import DatasetEvent, DatasetModel @@ -1729,6 +1731,60 @@ def test_should_respond_200(self, state, run_type, dag_maker, session): "note": None, } + @pytest.mark.parametrize("state", ["failed", "success", "queued"]) + @pytest.mark.parametrize("run_type", [state.value for state in DagRunType]) + def test_action_logging(self, state, run_type, dag_maker, session): + dag_id = "TEST_DAG_ID" + dag_run_id = "TEST_DAG_RUN_ID" + with dag_maker(dag_id) as dag: + task = EmptyOperator(task_id="task_id", dag=dag) + self.app.dag_bag.bag_dag(dag, root_dag=dag) + dr = dag_maker.create_dagrun(run_id=dag_run_id, run_type=run_type) + ti = dr.get_task_instance(task_id="task_id") + ti.task = task + ti.state = State.RUNNING + session.merge(ti) + session.commit() + + request_json = {"state": state} + + self.client.patch( + f"api/v1/dags/{dag_id}/dagRuns/{dag_run_id}", + json=request_json, + environ_overrides={"REMOTE_USER": "test"}, + ) + + log = ( + session.query(Log) + .filter( + Log.dag_id == dag_id, + Log.run_id == dag_run_id, + Log.event == "api.update_dag_run_state", + ) + .order_by(Log.id.desc()) + .first() + ) + assert log.extra == json.dumps(request_json) + + self.client.patch( + f"api/v1/dags/{dag_id}/dagRuns/{dag_run_id}", + json=request_json, + environ_overrides={"REMOTE_USER": "test"}, + headers={"content-type": "application/json; charset=utf-8"}, + ) + + log = ( + session.query(Log) + .filter( + Log.dag_id == dag_id, + Log.run_id == dag_run_id, + Log.event == "api.update_dag_run_state", + ) + .order_by(Log.id.desc()) + .first() + ) + assert log.extra == json.dumps(request_json) + def test_schema_validation_error_raises(self, dag_maker, session): dag_id = "TEST_DAG_ID" dag_run_id = "TEST_DAG_RUN_ID" diff --git a/tests/api_connexion/endpoints/test_extra_link_endpoint.py b/tests/api_connexion/endpoints/test_extra_link_endpoint.py index 76b2a09603609..cafdc8979ecd2 100644 --- a/tests/api_connexion/endpoints/test_extra_link_endpoint.py +++ b/tests/api_connexion/endpoints/test_extra_link_endpoint.py @@ -27,6 +27,7 @@ from airflow.models.xcom import XCom from airflow.plugins_manager import AirflowPlugin from airflow.security import permissions +from airflow.serialization.serialized_objects import SerializedBaseOperator from airflow.timetables.base import DataInterval from airflow.utils import timezone from airflow.utils.state import DagRunState @@ -62,7 +63,7 @@ def configured_app(minimal_app_for_api): delete_user(app, username="test_no_permissions") # type: ignore -class TestGetExtraLinks: +class BaseGetExtraLinks: @pytest.fixture(autouse=True) def setup_attrs(self, configured_app, session) -> None: self.default_time = timezone.datetime(2020, 1, 1) @@ -72,7 +73,7 @@ def setup_attrs(self, configured_app, session) -> None: self.app = configured_app - self.dag = self._create_dag() + self.dag = self._create_dag() # type: ignore self.app.dag_bag = DagBag(os.devnull, include_examples=False) self.app.dag_bag.dags = {self.dag.dag_id: self.dag} # type: ignore @@ -94,6 +95,8 @@ def teardown_method(self) -> None: clear_db_runs() clear_db_xcom() + +class TestGetExtraLinks(BaseGetExtraLinks): def _create_dag(self): with DAG(dag_id="TEST_DAG_ID", schedule=None, default_args={"start_date": self.default_time}) as dag: CustomOperator(task_id="TEST_SINGLE_LINK", bash_command="TEST_LINK_VALUE") @@ -241,3 +244,60 @@ class AirflowTestPlugin(AirflowPlugin): "TEST_DAG_ID/TEST_SINGLE_LINK/2020-01-01T00%3A00%3A00%2B00%3A00" ), } == response.json + + +class TestMappedTaskExtraLinks(BaseGetExtraLinks): + def _create_dag(self): + with DAG(dag_id="TEST_DAG_ID", schedule=None, default_args={"start_date": self.default_time}) as dag: + # Mapped task expanded over a list of bash_commands + CustomOperator.partial(task_id="TEST_MAPPED_TASK").expand( + bash_command=["TEST_LINK_VALUE_3", "TEST_LINK_VALUE_4"] + ) + return SerializedBaseOperator.deserialize(SerializedBaseOperator.serialize(dag)) + + @pytest.mark.parametrize( + "map_index, expected_status, expected_json", + [ + ( + 0, + 200, + { + "Google Custom": "http://google.com/custom_base_link?search=TEST_LINK_VALUE_3", + "google": "https://www.google.com", + }, + ), + ( + 1, + 200, + { + "Google Custom": "http://google.com/custom_base_link?search=TEST_LINK_VALUE_4", + "google": "https://www.google.com", + }, + ), + (6, 404, {"detail": 'DAG Run with ID = "TEST_DAG_RUN_ID" not found'}), + ], + ) + @mock_plugin_manager(plugins=[]) + def test_mapped_task_links(self, map_index, expected_status, expected_json): + """Parameterized test for mapped task extra links.""" + # Set XCom data for different map indices + if map_index < 2: + XCom.set( + key="search_query", + value=f"TEST_LINK_VALUE_{map_index + 3}", + task_id="TEST_MAPPED_TASK", + dag_id="TEST_DAG_ID", + run_id="TEST_DAG_RUN_ID", + map_index=map_index, + ) + + response = self.client.get( + f"/api/v1/dags/TEST_DAG_ID/dagRuns/TEST_DAG_RUN_ID/taskInstances/TEST_MAPPED_TASK/links?map_index={map_index}", + environ_overrides={"REMOTE_USER": "test"}, + ) + + assert response.status_code == expected_status + if map_index < 2: + assert response.json == expected_json + else: + assert response.json["detail"] == expected_json["detail"] diff --git a/tests/conftest.py b/tests/conftest.py index 6d064fa0a9bd2..6c17c6f4036d4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1487,3 +1487,15 @@ def clean_dags_and_dagruns(): yield # Test runs here clear_db_dags() clear_db_runs() + + +@pytest.fixture +def clean_executor_loader(): + """Clean the executor_loader state, as it stores global variables in the module, causing side effects for some tests.""" + from airflow.executors.executor_loader import ExecutorLoader + from tests.test_utils.executor_loader import clean_executor_loader_module + + clean_executor_loader_module() + yield # Test runs here + clean_executor_loader_module() + ExecutorLoader.init_executors() diff --git a/tests/core/test_settings.py b/tests/core/test_settings.py index 483ef24e25f7f..3a0c33b08b6ab 100644 --- a/tests/core/test_settings.py +++ b/tests/core/test_settings.py @@ -31,7 +31,7 @@ from airflow.api_internal.internal_api_call import InternalApiConfig from airflow.configuration import conf from airflow.exceptions import AirflowClusterPolicyViolation, AirflowConfigException -from airflow.settings import _ENABLE_AIP_44, TracebackSession, is_usage_data_collection_enabled +from airflow.settings import _ENABLE_AIP_44, TracebackSession from airflow.utils.session import create_session from tests.test_utils.config import conf_vars @@ -368,26 +368,3 @@ def test_create_session_ctx_mgr_no_call_methods(mock_new, clear_internal_api): assert session == m method_calls = [x[0] for x in m.method_calls] assert method_calls == [] # commit and close not called when using internal API - - -@pytest.mark.parametrize( - "env_var, conf_setting, is_enabled", - [ - ("false", "True", False), # env forces disable - ("false", "False", False), # Both force disable - ("False ", "False", False), # Both force disable - ("true", "True", True), # Both enable - ("true", "False", False), # Conf forces disable - (None, "True", True), # Default env, conf enables - (None, "False", False), # Default env, conf disables - ], -) -def test_usage_data_collection_disabled(env_var, conf_setting, is_enabled, clear_internal_api): - conf_patch = conf_vars({("usage_data_collection", "enabled"): conf_setting}) - - if env_var is not None: - with conf_patch, patch.dict(os.environ, {"SCARF_ANALYTICS": env_var}): - assert is_usage_data_collection_enabled() == is_enabled - else: - with conf_patch: - assert is_usage_data_collection_enabled() == is_enabled diff --git a/tests/executors/test_executor_loader.py b/tests/executors/test_executor_loader.py index 2192487a01cf8..dc60b9cc507ae 100644 --- a/tests/executors/test_executor_loader.py +++ b/tests/executors/test_executor_loader.py @@ -17,7 +17,6 @@ from __future__ import annotations from contextlib import nullcontext -from importlib import reload from unittest import mock import pytest @@ -25,7 +24,7 @@ from airflow import plugins_manager from airflow.exceptions import AirflowConfigException from airflow.executors import executor_loader -from airflow.executors.executor_loader import ConnectorSource, ExecutorLoader, ExecutorName +from airflow.executors.executor_loader import ConnectorSource, ExecutorName from airflow.executors.local_executor import LocalExecutor from airflow.providers.amazon.aws.executors.ecs.ecs_executor import AwsEcsExecutor from airflow.providers.celery.executors.celery_executor import CeleryExecutor @@ -50,24 +49,12 @@ class FakePlugin(plugins_manager.AirflowPlugin): executors = [FakeExecutor] +@pytest.mark.usefixtures("clean_executor_loader") class TestExecutorLoader: - def setup_method(self) -> None: - from airflow.executors import executor_loader - - reload(executor_loader) - global ExecutorLoader - ExecutorLoader = executor_loader.ExecutorLoader # type: ignore - - def teardown_method(self) -> None: - from airflow.executors import executor_loader - - reload(executor_loader) - ExecutorLoader.init_executors() - def test_no_executor_configured(self): with conf_vars({("core", "executor"): None}): with pytest.raises(AirflowConfigException, match=r".*not found in config$"): - ExecutorLoader.get_default_executor() + executor_loader.ExecutorLoader.get_default_executor() @pytest.mark.parametrize( "executor_name", @@ -81,18 +68,20 @@ def test_no_executor_configured(self): ) def test_should_support_executor_from_core(self, executor_name): with conf_vars({("core", "executor"): executor_name}): - executor = ExecutorLoader.get_default_executor() + executor = executor_loader.ExecutorLoader.get_default_executor() assert executor is not None assert executor_name == executor.__class__.__name__ assert executor.name is not None - assert executor.name == ExecutorName(ExecutorLoader.executors[executor_name], alias=executor_name) + assert executor.name == ExecutorName( + executor_loader.ExecutorLoader.executors[executor_name], alias=executor_name + ) assert executor.name.connector_source == ConnectorSource.CORE @mock.patch("airflow.plugins_manager.plugins", [FakePlugin()]) @mock.patch("airflow.plugins_manager.executors_modules", None) def test_should_support_plugins(self): with conf_vars({("core", "executor"): f"{TEST_PLUGIN_NAME}.FakeExecutor"}): - executor = ExecutorLoader.get_default_executor() + executor = executor_loader.ExecutorLoader.get_default_executor() assert executor is not None assert "FakeExecutor" == executor.__class__.__name__ assert executor.name is not None @@ -101,7 +90,7 @@ def test_should_support_plugins(self): def test_should_support_custom_path(self): with conf_vars({("core", "executor"): "tests.executors.test_executor_loader.FakeExecutor"}): - executor = ExecutorLoader.get_default_executor() + executor = executor_loader.ExecutorLoader.get_default_executor() assert executor is not None assert "FakeExecutor" == executor.__class__.__name__ assert executor.name is not None @@ -172,17 +161,17 @@ def test_should_support_custom_path(self): ) def test_get_hybrid_executors_from_config(self, executor_config, expected_executors_list): with conf_vars({("core", "executor"): executor_config}): - executors = ExecutorLoader._get_executor_names() + executors = executor_loader.ExecutorLoader._get_executor_names() assert executors == expected_executors_list def test_init_executors(self): with conf_vars({("core", "executor"): "CeleryExecutor"}): - executors = ExecutorLoader.init_executors() - executor_name = ExecutorLoader.get_default_executor_name() + executors = executor_loader.ExecutorLoader.init_executors() + executor_name = executor_loader.ExecutorLoader.get_default_executor_name() assert len(executors) == 1 assert isinstance(executors[0], CeleryExecutor) - assert "CeleryExecutor" in ExecutorLoader.executors - assert ExecutorLoader.executors["CeleryExecutor"] == executor_name.module_path + assert "CeleryExecutor" in executor_loader.ExecutorLoader.executors + assert executor_loader.ExecutorLoader.executors["CeleryExecutor"] == executor_name.module_path assert isinstance(executor_loader._loaded_executors[executor_name], CeleryExecutor) @pytest.mark.parametrize( @@ -202,7 +191,7 @@ def test_get_hybrid_executors_from_config_duplicates_should_fail(self, executor_ with pytest.raises( AirflowConfigException, match=r".+Duplicate executors are not yet supported.+" ): - ExecutorLoader._get_executor_names() + executor_loader.ExecutorLoader._get_executor_names() @pytest.mark.parametrize( "executor_config", @@ -218,7 +207,7 @@ def test_get_hybrid_executors_from_config_duplicates_should_fail(self, executor_ def test_get_hybrid_executors_from_config_core_executors_bad_config_format(self, executor_config): with conf_vars({("core", "executor"): executor_config}): with pytest.raises(AirflowConfigException): - ExecutorLoader._get_executor_names() + executor_loader.ExecutorLoader._get_executor_names() @pytest.mark.parametrize( ("executor_config", "expected_value"), @@ -234,7 +223,7 @@ def test_get_hybrid_executors_from_config_core_executors_bad_config_format(self, ) def test_should_support_import_executor_from_core(self, executor_config, expected_value): with conf_vars({("core", "executor"): executor_config}): - executor, import_source = ExecutorLoader.import_default_executor_cls() + executor, import_source = executor_loader.ExecutorLoader.import_default_executor_cls() assert expected_value == executor.__name__ assert import_source == ConnectorSource.CORE @@ -249,7 +238,7 @@ def test_should_support_import_executor_from_core(self, executor_config, expecte ) def test_should_support_import_plugins(self, executor_config): with conf_vars({("core", "executor"): executor_config}): - executor, import_source = ExecutorLoader.import_default_executor_cls() + executor, import_source = executor_loader.ExecutorLoader.import_default_executor_cls() assert "FakeExecutor" == executor.__name__ assert import_source == ConnectorSource.PLUGIN @@ -263,7 +252,7 @@ def test_should_support_import_plugins(self, executor_config): ) def test_should_support_import_custom_path(self, executor_config): with conf_vars({("core", "executor"): executor_config}): - executor, import_source = ExecutorLoader.import_default_executor_cls() + executor, import_source = executor_loader.ExecutorLoader.import_default_executor_cls() assert "FakeExecutor" == executor.__name__ assert import_source == ConnectorSource.CUSTOM_PATH @@ -272,7 +261,7 @@ def test_should_support_import_custom_path(self, executor_config): @pytest.mark.parametrize("executor", [FakeExecutor, FakeSingleThreadedExecutor]) def test_validate_database_executor_compatibility_general(self, monkeypatch, executor): monkeypatch.delenv("_AIRFLOW__SKIP_DATABASE_EXECUTOR_COMPATIBILITY_CHECK") - ExecutorLoader.validate_database_executor_compatibility(executor) + executor_loader.ExecutorLoader.validate_database_executor_compatibility(executor) @pytest.mark.db_test @pytest.mark.backend("sqlite") @@ -290,24 +279,32 @@ def test_validate_database_executor_compatibility_general(self, monkeypatch, exe def test_validate_database_executor_compatibility_sqlite(self, monkeypatch, executor, expectation): monkeypatch.delenv("_AIRFLOW__SKIP_DATABASE_EXECUTOR_COMPATIBILITY_CHECK") with expectation: - ExecutorLoader.validate_database_executor_compatibility(executor) + executor_loader.ExecutorLoader.validate_database_executor_compatibility(executor) def test_load_executor(self): with conf_vars({("core", "executor"): "LocalExecutor"}): - ExecutorLoader.init_executors() - assert isinstance(ExecutorLoader.load_executor("LocalExecutor"), LocalExecutor) - assert isinstance(ExecutorLoader.load_executor(executor_loader._executor_names[0]), LocalExecutor) - assert isinstance(ExecutorLoader.load_executor(None), LocalExecutor) + executor_loader.ExecutorLoader.init_executors() + assert isinstance(executor_loader.ExecutorLoader.load_executor("LocalExecutor"), LocalExecutor) + assert isinstance( + executor_loader.ExecutorLoader.load_executor(executor_loader._executor_names[0]), + LocalExecutor, + ) + assert isinstance(executor_loader.ExecutorLoader.load_executor(None), LocalExecutor) def test_load_executor_alias(self): with conf_vars({("core", "executor"): "local_exec:airflow.executors.local_executor.LocalExecutor"}): - ExecutorLoader.init_executors() - assert isinstance(ExecutorLoader.load_executor("local_exec"), LocalExecutor) + executor_loader.ExecutorLoader.init_executors() + assert isinstance(executor_loader.ExecutorLoader.load_executor("local_exec"), LocalExecutor) + assert isinstance( + executor_loader.ExecutorLoader.load_executor( + "airflow.executors.local_executor.LocalExecutor" + ), + LocalExecutor, + ) assert isinstance( - ExecutorLoader.load_executor("airflow.executors.local_executor.LocalExecutor"), + executor_loader.ExecutorLoader.load_executor(executor_loader._executor_names[0]), LocalExecutor, ) - assert isinstance(ExecutorLoader.load_executor(executor_loader._executor_names[0]), LocalExecutor) @mock.patch("airflow.providers.amazon.aws.executors.ecs.ecs_executor.AwsEcsExecutor", autospec=True) def test_load_custom_executor_with_classname(self, mock_executor): @@ -319,15 +316,16 @@ def test_load_custom_executor_with_classname(self, mock_executor): ): "my_alias:airflow.providers.amazon.aws.executors.ecs.ecs_executor.AwsEcsExecutor" } ): - ExecutorLoader.init_executors() - assert isinstance(ExecutorLoader.load_executor("my_alias"), AwsEcsExecutor) - assert isinstance(ExecutorLoader.load_executor("AwsEcsExecutor"), AwsEcsExecutor) + executor_loader.ExecutorLoader.init_executors() + assert isinstance(executor_loader.ExecutorLoader.load_executor("my_alias"), AwsEcsExecutor) + assert isinstance(executor_loader.ExecutorLoader.load_executor("AwsEcsExecutor"), AwsEcsExecutor) assert isinstance( - ExecutorLoader.load_executor( + executor_loader.ExecutorLoader.load_executor( "airflow.providers.amazon.aws.executors.ecs.ecs_executor.AwsEcsExecutor" ), AwsEcsExecutor, ) assert isinstance( - ExecutorLoader.load_executor(executor_loader._executor_names[0]), AwsEcsExecutor + executor_loader.ExecutorLoader.load_executor(executor_loader._executor_names[0]), + AwsEcsExecutor, ) diff --git a/tests/models/test_baseoperatormeta.py b/tests/models/test_baseoperatormeta.py index 5244e86b2c386..52e45dd1cf325 100644 --- a/tests/models/test_baseoperatormeta.py +++ b/tests/models/test_baseoperatormeta.py @@ -18,6 +18,7 @@ from __future__ import annotations import datetime +import threading from typing import TYPE_CHECKING, Any from unittest.mock import patch @@ -211,3 +212,20 @@ def say_hello(**context): mock_log.warning.assert_called_once_with( "HelloWorldOperator.execute cannot be called outside TaskInstance!" ) + + def test_thread_local_executor_safeguard(self): + class TestExecutorSafeguardThread(threading.Thread): + def __init__(self): + threading.Thread.__init__(self) + self.executor_safeguard = ExecutorSafeguard() + + def run(self): + class Wrapper: + def wrapper_test_func(self, *args, **kwargs): + print("test") + + wrap_func = self.executor_safeguard.decorator(Wrapper.wrapper_test_func) + wrap_func(Wrapper(), Wrapper__sentinel="abc") + + # Test thread local caller value is set properly + TestExecutorSafeguardThread().start() diff --git a/tests/models/test_dag.py b/tests/models/test_dag.py index f23eac76b6e93..5f721b61d2691 100644 --- a/tests/models/test_dag.py +++ b/tests/models/test_dag.py @@ -28,7 +28,6 @@ import weakref from contextlib import redirect_stdout from datetime import timedelta -from importlib import reload from io import StringIO from pathlib import Path from typing import TYPE_CHECKING @@ -56,7 +55,6 @@ RemovedInAirflow3Warning, UnknownExecutorException, ) -from airflow.executors import executor_loader from airflow.executors.local_executor import LocalExecutor from airflow.executors.sequential_executor import SequentialExecutor from airflow.models.baseoperator import BaseOperator @@ -3324,10 +3322,10 @@ def test_dataset_expression(self, session: Session) -> None: ] } + @pytest.mark.usefixtures("clean_executor_loader") @mock.patch("airflow.models.dag.run_job") def test_dag_executors(self, run_job_mock): dag = DAG(dag_id="test", schedule=None) - reload(executor_loader) with conf_vars({("core", "executor"): "SequentialExecutor"}): dag.run() assert isinstance(run_job_mock.call_args_list[0].kwargs["job"].executor, SequentialExecutor) diff --git a/tests/plugins/test_plugins_manager.py b/tests/plugins/test_plugins_manager.py index 2426352fc8531..2e7ddd9bac848 100644 --- a/tests/plugins/test_plugins_manager.py +++ b/tests/plugins/test_plugins_manager.py @@ -28,6 +28,7 @@ import pytest +from airflow.exceptions import RemovedInAirflow3Warning from airflow.hooks.base import BaseHook from airflow.listeners.listener import get_listener_manager from airflow.plugins_manager import AirflowPlugin @@ -174,6 +175,11 @@ def clean_plugins(self): plugins_manager.loaded_plugins = set() plugins_manager.plugins = [] + yield + plugins_manager.loaded_plugins = set() + + plugins_manager.registered_ti_dep_classes = None + plugins_manager.plugins = None def test_no_log_when_no_plugins(self, caplog): with mock_plugin_manager(plugins=[]): @@ -270,6 +276,17 @@ class AirflowAdminMenuLinksPlugin(AirflowPlugin): ), ] + def test_deprecate_ti_deps(self): + class DeprecatedTIDeps(AirflowPlugin): + name = "ti_deps" + + ti_deps = [mock.MagicMock()] + + with mock_plugin_manager(plugins=[DeprecatedTIDeps()]), pytest.warns(RemovedInAirflow3Warning): + from airflow import plugins_manager + + plugins_manager.initialize_ti_deps_plugins() + def test_should_not_warning_about_fab_plugins(self, caplog): class AirflowAdminViewsPlugin(AirflowPlugin): name = "test_admin_views_plugin" diff --git a/tests/providers/amazon/aws/system/utils/test_helpers.py b/tests/providers/amazon/aws/system/utils/test_helpers.py index f48de1788b74c..3af3720688a09 100644 --- a/tests/providers/amazon/aws/system/utils/test_helpers.py +++ b/tests/providers/amazon/aws/system/utils/test_helpers.py @@ -24,7 +24,7 @@ import os import sys from io import StringIO -from unittest.mock import ANY, patch +from unittest.mock import patch import pytest from moto import mock_aws @@ -79,8 +79,15 @@ def test_fetch_variable_success( ) -> None: mock_getenv.return_value = env_value or ssm_value - result = utils.fetch_variable(ANY, default_value) if default_value else utils.fetch_variable(ANY_STR) + utils._fetch_from_ssm.cache_clear() + result = ( + utils.fetch_variable("some_key", default_value) + if default_value + else utils.fetch_variable(ANY_STR) + ) + + utils._fetch_from_ssm.cache_clear() assert result == expected_result def test_fetch_variable_no_value_found_raises_exception(self): diff --git a/tests/serialization/test_dag_serialization.py b/tests/serialization/test_dag_serialization.py index d7f09c20ff9d3..58f16d80f8c89 100644 --- a/tests/serialization/test_dag_serialization.py +++ b/tests/serialization/test_dag_serialization.py @@ -408,6 +408,21 @@ def timetable_plugin(monkeypatch): ) +@pytest.fixture +def custom_ti_dep(monkeypatch): + """Patch plugins manager to always and only return our custom timetable.""" + from test_plugin import CustomTestTriggerRule + + from airflow import plugins_manager + + monkeypatch.setattr(plugins_manager, "initialize_ti_deps_plugins", lambda: None) + monkeypatch.setattr( + plugins_manager, + "registered_ti_dep_classes", + {"test_plugin.CustomTestTriggerRule": CustomTestTriggerRule}, + ) + + # TODO: (potiuk) - AIP-44 - check why this test hangs @pytest.mark.skip_if_database_isolation_mode class TestStringifiedDAGs: @@ -430,6 +445,7 @@ def setup_test_cases(self): ) @pytest.mark.db_test + @pytest.mark.filterwarnings("ignore::airflow.exceptions.RemovedInAirflow3Warning") def test_serialization(self): """Serialization and deserialization should work for every DAG and Operator.""" dags = collect_dags() @@ -539,6 +555,7 @@ def sorted_serialized_dag(dag_dict: dict): return actual, expected @pytest.mark.db_test + @pytest.mark.filterwarnings("ignore::airflow.exceptions.RemovedInAirflow3Warning") def test_deserialization_across_process(self): """A serialized DAG can be deserialized in another process.""" @@ -1596,6 +1613,7 @@ def test_deps_sorted(self): "airflow.ti_deps.deps.trigger_rule_dep.TriggerRuleDep", ] + @pytest.mark.filterwarnings("ignore::airflow.exceptions.RemovedInAirflow3Warning") def test_error_on_unregistered_ti_dep_serialization(self): # trigger rule not registered through the plugin system will not be serialized class DummyTriggerRule(BaseTIDep): @@ -1634,6 +1652,8 @@ def test_error_on_unregistered_ti_dep_deserialization(self): SerializedBaseOperator.deserialize_operator(serialize_op) @pytest.mark.db_test + @pytest.mark.usefixtures("custom_ti_dep") + @pytest.mark.filterwarnings("ignore::airflow.exceptions.RemovedInAirflow3Warning") def test_serialize_and_deserialize_custom_ti_deps(self): from test_plugin import CustomTestTriggerRule diff --git a/tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py b/tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py index fcebc8c40a0d4..2b7bce2fecde8 100644 --- a/tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py +++ b/tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py @@ -127,7 +127,7 @@ def create_opensearch_policies(bedrock_role_arn: str, collection_name: str, poli def _create_security_policy(name, policy_type, policy): try: - aoss_client.create_security_policy(name=name, policy=json.dumps(policy), type=policy_type) + aoss_client.conn.create_security_policy(name=name, policy=json.dumps(policy), type=policy_type) except ClientError as e: if e.response["Error"]["Code"] == "ConflictException": log.info("OpenSearch security policy %s already exists.", name) @@ -135,7 +135,7 @@ def _create_security_policy(name, policy_type, policy): def _create_access_policy(name, policy_type, policy): try: - aoss_client.create_access_policy(name=name, policy=json.dumps(policy), type=policy_type) + aoss_client.conn.create_access_policy(name=name, policy=json.dumps(policy), type=policy_type) except ClientError as e: if e.response["Error"]["Code"] == "ConflictException": log.info("OpenSearch data access policy %s already exists.", name) @@ -204,9 +204,9 @@ def create_collection(collection_name: str): :param collection_name: The name of the Collection to create. """ log.info("\nCreating collection: %s.", collection_name) - return aoss_client.create_collection(name=collection_name, type="VECTORSEARCH")["createCollectionDetail"][ - "id" - ] + return aoss_client.conn.create_collection(name=collection_name, type="VECTORSEARCH")[ + "createCollectionDetail" + ]["id"] @task @@ -317,7 +317,7 @@ def get_collection_arn(collection_id: str): """ return next( colxn["arn"] - for colxn in aoss_client.list_collections()["collectionSummaries"] + for colxn in aoss_client.conn.list_collections()["collectionSummaries"] if colxn["id"] == collection_id ) @@ -336,7 +336,9 @@ def delete_data_source(knowledge_base_id: str, data_source_id: str): :param data_source_id: The unique identifier of the data source to delete. """ log.info("Deleting data source %s from Knowledge Base %s.", data_source_id, knowledge_base_id) - bedrock_agent_client.delete_data_source(dataSourceId=data_source_id, knowledgeBaseId=knowledge_base_id) + bedrock_agent_client.conn.delete_data_source( + dataSourceId=data_source_id, knowledgeBaseId=knowledge_base_id + ) # [END howto_operator_bedrock_delete_data_source] @@ -355,7 +357,7 @@ def delete_knowledge_base(knowledge_base_id: str): :param knowledge_base_id: The unique identifier of the knowledge base to delete. """ log.info("Deleting Knowledge Base %s.", knowledge_base_id) - bedrock_agent_client.delete_knowledge_base(knowledgeBaseId=knowledge_base_id) + bedrock_agent_client.conn.delete_knowledge_base(knowledgeBaseId=knowledge_base_id) # [END howto_operator_bedrock_delete_knowledge_base] @@ -393,7 +395,7 @@ def delete_collection(collection_id: str): :param collection_id: ID of the collection to be indexed. """ log.info("Deleting collection %s.", collection_id) - aoss_client.delete_collection(id=collection_id) + aoss_client.conn.delete_collection(id=collection_id) @task(trigger_rule=TriggerRule.ALL_DONE) @@ -404,7 +406,7 @@ def delete_opensearch_policies(collection_name: str): :param collection_name: All policies in the given collection name will be deleted. """ - access_policies = aoss_client.list_access_policies( + access_policies = aoss_client.conn.list_access_policies( type="data", resource=[f"collection/{collection_name}"] )["accessPolicySummaries"] log.info("Found access policies for %s: %s", collection_name, access_policies) @@ -412,10 +414,10 @@ def delete_opensearch_policies(collection_name: str): raise Exception("No access policies found?") for policy in access_policies: log.info("Deleting access policy for %s: %s", collection_name, policy["name"]) - aoss_client.delete_access_policy(name=policy["name"], type="data") + aoss_client.conn.delete_access_policy(name=policy["name"], type="data") for policy_type in ["encryption", "network"]: - policies = aoss_client.list_security_policies( + policies = aoss_client.conn.list_security_policies( type=policy_type, resource=[f"collection/{collection_name}"] )["securityPolicySummaries"] if not policies: @@ -423,7 +425,7 @@ def delete_opensearch_policies(collection_name: str): log.info("Found %s security policies for %s: %s", policy_type, collection_name, policies) for policy in policies: log.info("Deleting %s security policy for %s: %s", policy_type, collection_name, policy["name"]) - aoss_client.delete_security_policy(name=policy["name"], type=policy_type) + aoss_client.conn.delete_security_policy(name=policy["name"], type=policy_type) with DAG( @@ -436,8 +438,8 @@ def delete_opensearch_policies(collection_name: str): test_context = sys_test_context_task() env_id = test_context["ENV_ID"] - aoss_client = OpenSearchServerlessHook(aws_conn_id=None).conn - bedrock_agent_client = BedrockAgentHook(aws_conn_id=None).conn + aoss_client = OpenSearchServerlessHook(aws_conn_id=None) + bedrock_agent_client = BedrockAgentHook(aws_conn_id=None) region_name = boto3.session.Session().region_name diff --git a/tests/system/providers/amazon/aws/utils/__init__.py b/tests/system/providers/amazon/aws/utils/__init__.py index 8b4114fc90ad0..411f92ab7bf3a 100644 --- a/tests/system/providers/amazon/aws/utils/__init__.py +++ b/tests/system/providers/amazon/aws/utils/__init__.py @@ -16,6 +16,7 @@ # under the License. from __future__ import annotations +import functools import inspect import json import logging @@ -92,6 +93,7 @@ def _validate_env_id(env_id: str) -> str: return env_id.lower() +@functools.cache def _fetch_from_ssm(key: str, test_name: str | None = None) -> str: """ Test values are stored in the SSM Value as a JSON-encoded dict of key/value pairs. diff --git a/tests/test_utils/executor_loader.py b/tests/test_utils/executor_loader.py new file mode 100644 index 0000000000000..cc28223b7ce78 --- /dev/null +++ b/tests/test_utils/executor_loader.py @@ -0,0 +1,33 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import TYPE_CHECKING + +import airflow.executors.executor_loader as executor_loader + +if TYPE_CHECKING: + from airflow.executors.executor_utils import ExecutorName + + +def clean_executor_loader_module(): + """Clean the executor_loader state, as it stores global variables in the module, causing side effects for some tests.""" + executor_loader._alias_to_executors: dict[str, ExecutorName] = {} + executor_loader._module_to_executors: dict[str, ExecutorName] = {} + executor_loader._classname_to_executors: dict[str, ExecutorName] = {} + executor_loader._executor_names: list[ExecutorName] = [] diff --git a/tests/test_utils/mock_operators.py b/tests/test_utils/mock_operators.py index cd816707a59f5..f254d22484c0f 100644 --- a/tests/test_utils/mock_operators.py +++ b/tests/test_utils/mock_operators.py @@ -22,6 +22,7 @@ import attr from airflow.models.baseoperator import BaseOperator +from airflow.models.mappedoperator import MappedOperator from airflow.models.xcom import XCom from tests.test_utils.compat import BaseOperatorLink @@ -137,7 +138,11 @@ class CustomOpLink(BaseOperatorLink): def get_link(self, operator, *, ti_key): search_query = XCom.get_one( - task_id=ti_key.task_id, dag_id=ti_key.dag_id, run_id=ti_key.run_id, key="search_query" + task_id=ti_key.task_id, + dag_id=ti_key.dag_id, + run_id=ti_key.run_id, + map_index=ti_key.map_index, + key="search_query", ) if not search_query: return None @@ -153,7 +158,11 @@ def operator_extra_links(self): """ Return operator extra links """ - if isinstance(self.bash_command, str) or self.bash_command is None: + if ( + isinstance(self, MappedOperator) + or isinstance(self.bash_command, str) + or self.bash_command is None + ): return (CustomOpLink(),) return (CustomBaseIndexOpLink(i) for i, _ in enumerate(self.bash_command)) diff --git a/tests/ti_deps/deps/test_ready_to_reschedule_dep.py b/tests/ti_deps/deps/test_ready_to_reschedule_dep.py index 568d6abf025c7..9241145f7f532 100644 --- a/tests/ti_deps/deps/test_ready_to_reschedule_dep.py +++ b/tests/ti_deps/deps/test_ready_to_reschedule_dep.py @@ -48,6 +48,7 @@ def side_effect(*args, **kwargs): yield m +@pytest.mark.usefixtures("clean_executor_loader") class TestNotInReschedulePeriodDep: @pytest.fixture(autouse=True) def setup_test_cases(self, request, create_task_instance): diff --git a/tests/utils/test_decorators.py b/tests/utils/test_decorators.py new file mode 100644 index 0000000000000..19d3ec31d0311 --- /dev/null +++ b/tests/utils/test_decorators.py @@ -0,0 +1,128 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +from airflow.decorators import task + +if TYPE_CHECKING: + from airflow.decorators.base import Task, TaskDecorator + +_CONDITION_DECORATORS = frozenset({"skip_if", "run_if"}) +_NO_SOURCE_DECORATORS = frozenset({"sensor"}) +DECORATORS = sorted( + set(x for x in dir(task) if not x.startswith("_")) - _CONDITION_DECORATORS - _NO_SOURCE_DECORATORS +) +DECORATORS_USING_SOURCE = ("external_python", "virtualenv", "branch_virtualenv", "branch_external_python") + + +@pytest.fixture +def decorator(request: pytest.FixtureRequest) -> TaskDecorator: + decorator_factory = getattr(task, request.param) + + kwargs = {} + if "external" in request.param: + kwargs["python"] = "python3" + return decorator_factory(**kwargs) + + +@pytest.mark.parametrize("decorator", DECORATORS_USING_SOURCE, indirect=["decorator"]) +def test_task_decorator_using_source(decorator: TaskDecorator): + @decorator + def f(): + return ["some_task"] + + assert parse_python_source(f, "decorator") == 'def f():\n return ["some_task"]\n' + + +@pytest.mark.parametrize("decorator", DECORATORS, indirect=["decorator"]) +def test_skip_if(decorator: TaskDecorator): + @task.skip_if(lambda context: True) + @decorator + def f(): + return "hello world" + + assert parse_python_source(f, "decorator") == 'def f():\n return "hello world"\n' + + +@pytest.mark.parametrize("decorator", DECORATORS, indirect=["decorator"]) +def test_run_if(decorator: TaskDecorator): + @task.run_if(lambda context: True) + @decorator + def f(): + return "hello world" + + assert parse_python_source(f, "decorator") == 'def f():\n return "hello world"\n' + + +def test_skip_if_and_run_if(): + @task.skip_if(lambda context: True) + @task.run_if(lambda context: True) + @task.virtualenv() + def f(): + return "hello world" + + assert parse_python_source(f) == 'def f():\n return "hello world"\n' + + +def test_run_if_and_skip_if(): + @task.run_if(lambda context: True) + @task.skip_if(lambda context: True) + @task.virtualenv() + def f(): + return "hello world" + + assert parse_python_source(f) == 'def f():\n return "hello world"\n' + + +def test_skip_if_allow_decorator(): + def non_task_decorator(func): + return func + + @task.skip_if(lambda context: True) + @task.virtualenv() + @non_task_decorator + def f(): + return "hello world" + + assert parse_python_source(f) == '@non_task_decorator\ndef f():\n return "hello world"\n' + + +def test_run_if_allow_decorator(): + def non_task_decorator(func): + return func + + @task.run_if(lambda context: True) + @task.virtualenv() + @non_task_decorator + def f(): + return "hello world" + + assert parse_python_source(f) == '@non_task_decorator\ndef f():\n return "hello world"\n' + + +def parse_python_source(task: Task, custom_operator_name: str | None = None) -> str: + operator = task().operator + if custom_operator_name: + custom_operator_name = ( + custom_operator_name if custom_operator_name.startswith("@") else f"@{custom_operator_name}" + ) + operator.__dict__["custom_operator_name"] = custom_operator_name + return operator.get_python_source() diff --git a/tests/utils/test_log_handlers.py b/tests/utils/test_log_handlers.py index d3651370d657e..95483f2285fa8 100644 --- a/tests/utils/test_log_handlers.py +++ b/tests/utils/test_log_handlers.py @@ -34,7 +34,7 @@ from airflow.config_templates.airflow_local_settings import DEFAULT_LOGGING_CONFIG from airflow.exceptions import RemovedInAirflow3Warning -from airflow.executors import executor_loader +from airflow.executors import executor_constants, executor_loader from airflow.jobs.job import Job from airflow.jobs.triggerer_job_runner import TriggererJobRunner from airflow.models.dag import DAG @@ -202,6 +202,95 @@ def task_callable(ti): # Remove the generated tmp log file. os.remove(log_filename) + @pytest.mark.parametrize( + "executor_name", + [ + (executor_constants.LOCAL_KUBERNETES_EXECUTOR), + (executor_constants.CELERY_KUBERNETES_EXECUTOR), + (executor_constants.KUBERNETES_EXECUTOR), + (None), + ], + ) + @conf_vars( + { + ("core", "EXECUTOR"): ",".join( + [ + executor_constants.LOCAL_KUBERNETES_EXECUTOR, + executor_constants.CELERY_KUBERNETES_EXECUTOR, + executor_constants.KUBERNETES_EXECUTOR, + ] + ), + } + ) + @patch( + "airflow.executors.executor_loader.ExecutorLoader.load_executor", + wraps=executor_loader.ExecutorLoader.load_executor, + ) + @patch( + "airflow.executors.executor_loader.ExecutorLoader.get_default_executor", + wraps=executor_loader.ExecutorLoader.get_default_executor, + ) + def test_file_task_handler_with_multiple_executors( + self, + mock_get_default_executor, + mock_load_executor, + executor_name, + create_task_instance, + clean_executor_loader, + ): + executors_mapping = executor_loader.ExecutorLoader.executors + default_executor_name = executor_loader.ExecutorLoader.get_default_executor_name() + path_to_executor_class: str + if executor_name is None: + path_to_executor_class = executors_mapping.get(default_executor_name.alias) + else: + path_to_executor_class = executors_mapping.get(executor_name) + + with patch(f"{path_to_executor_class}.get_task_log", return_value=([], [])) as mock_get_task_log: + mock_get_task_log.return_value = ([], []) + ti = create_task_instance( + dag_id="dag_for_testing_multiple_executors", + task_id="task_for_testing_multiple_executors", + run_type=DagRunType.SCHEDULED, + execution_date=DEFAULT_DATE, + ) + if executor_name is not None: + ti.executor = executor_name + ti.try_number = 1 + ti.state = TaskInstanceState.RUNNING + logger = ti.log + ti.log.disabled = False + + file_handler = next( + (handler for handler in logger.handlers if handler.name == FILE_TASK_HANDLER), None + ) + assert file_handler is not None + + set_context(logger, ti) + # clear executor_instances cache + file_handler.executor_instances = {} + assert file_handler.handler is not None + # We expect set_context generates a file locally. + log_filename = file_handler.handler.baseFilename + assert os.path.isfile(log_filename) + assert log_filename.endswith("1.log"), log_filename + + file_handler.flush() + file_handler.close() + + assert hasattr(file_handler, "read") + file_handler.read(ti) + os.remove(log_filename) + mock_get_task_log.assert_called_once() + + if executor_name is None: + mock_get_default_executor.assert_called_once() + # will be called in `ExecutorLoader.get_default_executor` method + mock_load_executor.assert_called_once_with(default_executor_name) + else: + mock_get_default_executor.assert_not_called() + mock_load_executor.assert_called_once_with(executor_name) + def test_file_task_handler_running(self): def task_callable(ti): ti.log.info("test") @@ -296,6 +385,7 @@ def test__read_from_local(self, tmp_path): @mock.patch( "airflow.providers.cncf.kubernetes.executors.kubernetes_executor.KubernetesExecutor.get_task_log" ) + @pytest.mark.usefixtures("clean_executor_loader") @pytest.mark.parametrize("state", [TaskInstanceState.RUNNING, TaskInstanceState.SUCCESS]) def test__read_for_k8s_executor(self, mock_k8s_get_task_log, create_task_instance, state): """Test for k8s executor, the log is read from get_task_log method""" @@ -309,6 +399,7 @@ def test__read_for_k8s_executor(self, mock_k8s_get_task_log, create_task_instanc ) ti.state = state ti.triggerer_job = None + ti.executor = executor_name with conf_vars({("core", "executor"): executor_name}): reload(executor_loader) fth = FileTaskHandler("") @@ -401,11 +492,12 @@ def test__read_served_logs_checked_when_done_and_no_local_or_remote_logs( pytest.param(k8s.V1Pod(metadata=k8s.V1ObjectMeta(name="pod-name-xxx")), "default"), ], ) - @patch.dict("os.environ", AIRFLOW__CORE__EXECUTOR="KubernetesExecutor") + @conf_vars({("core", "executor"): "KubernetesExecutor"}) @patch("airflow.providers.cncf.kubernetes.kube_client.get_kube_client") def test_read_from_k8s_under_multi_namespace_mode( self, mock_kube_client, pod_override, namespace_to_call ): + reload(executor_loader) mock_read_log = mock_kube_client.return_value.read_namespaced_pod_log mock_list_pod = mock_kube_client.return_value.list_namespaced_pod @@ -426,6 +518,7 @@ def task_callable(ti): ) ti = TaskInstance(task=task, run_id=dagrun.run_id) ti.try_number = 3 + ti.executor = "KubernetesExecutor" logger = ti.log ti.log.disabled = False @@ -434,6 +527,8 @@ def task_callable(ti): set_context(logger, ti) ti.run(ignore_ti_state=True) ti.state = TaskInstanceState.RUNNING + # clear executor_instances cache + file_handler.executor_instances = {} file_handler.read(ti, 2) # first we find pod name diff --git a/tests/utils/test_usage_data_collection.py b/tests/utils/test_usage_data_collection.py deleted file mode 100644 index 143bce39eca4d..0000000000000 --- a/tests/utils/test_usage_data_collection.py +++ /dev/null @@ -1,104 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -import platform -from unittest import mock - -import pytest - -from airflow import __version__ as airflow_version -from airflow.configuration import conf -from airflow.utils.usage_data_collection import ( - get_database_version, - get_python_version, - usage_data_collection, -) - - -@pytest.mark.parametrize("is_enabled, is_prerelease", [(False, True), (True, True)]) -@mock.patch("httpx.get") -def test_scarf_analytics_disabled(mock_get, is_enabled, is_prerelease): - with mock.patch("airflow.settings.is_usage_data_collection_enabled", return_value=is_enabled), mock.patch( - "airflow.utils.usage_data_collection._version_is_prerelease", return_value=is_prerelease - ): - usage_data_collection() - mock_get.assert_not_called() - - -@mock.patch("airflow.settings.is_usage_data_collection_enabled", return_value=True) -@mock.patch("airflow.utils.usage_data_collection._version_is_prerelease", return_value=False) -@mock.patch("airflow.utils.usage_data_collection._is_ci_environ", return_value=False) -@mock.patch("airflow.utils.usage_data_collection.get_database_version", return_value="12.3") -@mock.patch("airflow.utils.usage_data_collection.get_database_name", return_value="postgres") -@mock.patch("httpx.get") -def test_scarf_analytics( - mock_get, - mock_is_usage_data_collection_enabled, - mock_version_is_ci, - mock_version_is_prerelease, - get_database_version, - get_database_name, -): - platform_sys = platform.system() - platform_machine = platform.machine() - python_version = get_python_version() - executor = conf.get("core", "EXECUTOR") - scarf_endpoint = "https://apacheairflow.gateway.scarf.sh/scheduler" - usage_data_collection() - - expected_scarf_url = ( - f"{scarf_endpoint}?version={airflow_version}" - f"&python_version={python_version}" - f"&platform={platform_sys}" - f"&arch={platform_machine}" - f"&database=postgres" - f"&db_version=12.3" - f"&executor={executor}" - ) - - mock_get.assert_called_once_with(expected_scarf_url, timeout=5.0) - - -@pytest.mark.skip_if_database_isolation_mode -@pytest.mark.db_test -@pytest.mark.parametrize( - "version_info, expected_version", - [ - ((1, 2, 3), "1.2"), # Normal version tuple - (None, "None"), # No version info available - ((1,), "1"), # Single element version tuple - ((1, 2, 3, "beta", 4), "1.2"), # Complex version tuple with strings - ], -) -def test_get_database_version(version_info, expected_version): - with mock.patch("airflow.settings.engine.dialect.server_version_info", new=version_info): - assert get_database_version() == expected_version - - -@pytest.mark.parametrize( - "version_info, expected_version", - [ - ("1.2.3", "1.2"), # Normal version - ("4", "4"), # Single element version - ("1.2.3.beta4", "1.2"), # Complex version tuple with strings - ], -) -def test_get_python_version(version_info, expected_version): - with mock.patch("platform.python_version", return_value=version_info): - assert get_python_version() == expected_version