diff --git a/airflow/api/common/delete_dag.py b/airflow/api/common/delete_dag.py index 1cf7ffec8b9e4..11b046648c5a1 100644 --- a/airflow/api/common/delete_dag.py +++ b/airflow/api/common/delete_dag.py @@ -22,11 +22,11 @@ import logging from typing import TYPE_CHECKING -from sqlalchemy import and_, delete, or_, select +from sqlalchemy import delete, select from airflow import models from airflow.exceptions import AirflowException, DagNotFound -from airflow.models import DagModel, TaskFail +from airflow.models import DagModel from airflow.models.errors import ParseImportError from airflow.models.serialized_dag import SerializedDagModel from airflow.utils.db import get_sqla_model_classes @@ -64,18 +64,6 @@ def delete_dag(dag_id: str, keep_records_in_log: bool = True, session: Session = if dag is None: raise DagNotFound(f"Dag id {dag_id} not found") - # deleting a DAG should also delete all of its subdags - dags_to_delete_query = session.execute( - select(DagModel.dag_id).where( - or_( - DagModel.dag_id == dag_id, - and_(DagModel.dag_id.like(f"{dag_id}.%"), DagModel.is_subdag), - ) - ) - ) - - dags_to_delete = [dag_id for (dag_id,) in dags_to_delete_query] - # Scheduler removes DAGs without files from serialized_dag table every dag_dir_list_interval. # There may be a lag, so explicitly removes serialized DAG here. if SerializedDagModel.has_dag(dag_id=dag_id, session=session): @@ -86,15 +74,7 @@ def delete_dag(dag_id: str, keep_records_in_log: bool = True, session: Session = for model in get_sqla_model_classes(): if hasattr(model, "dag_id") and (not keep_records_in_log or model.__name__ != "Log"): count += session.execute( - delete(model) - .where(model.dag_id.in_(dags_to_delete)) - .execution_options(synchronize_session="fetch") - ).rowcount - if dag.is_subdag: - parent_dag_id, task_id = dag_id.rsplit(".", 1) - for model in TaskFail, models.TaskInstance: - count += session.execute( - delete(model).where(model.dag_id == parent_dag_id, model.task_id == task_id) + delete(model).where(model.dag_id == dag_id).execution_options(synchronize_session="fetch") ).rowcount # Delete entries in Import Errors table for a deleted DAG diff --git a/airflow/api/common/mark_tasks.py b/airflow/api/common/mark_tasks.py index fa6ce835a919e..d0be6b86d21cb 100644 --- a/airflow/api/common/mark_tasks.py +++ b/airflow/api/common/mark_tasks.py @@ -26,12 +26,10 @@ from airflow.models.dagrun import DagRun from airflow.models.taskinstance import TaskInstance -from airflow.operators.subdag import SubDagOperator from airflow.utils import timezone from airflow.utils.helpers import exactly_one from airflow.utils.session import NEW_SESSION, provide_session from airflow.utils.state import DagRunState, State, TaskInstanceState -from airflow.utils.types import DagRunType if TYPE_CHECKING: from datetime import datetime @@ -40,6 +38,7 @@ from airflow.models.dag import DAG from airflow.models.operator import Operator + from airflow.utils.types import DagRunType class _DagRunInfo(NamedTuple): @@ -101,14 +100,14 @@ def set_state( Can set state for future tasks (calculated from run_id) and retroactively for past tasks. Will verify integrity of past dag runs in order to create tasks that did not exist. It will not create dag runs that are missing - on the schedule (but it will, as for subdag, dag runs if needed). + on the schedule. :param tasks: the iterable of tasks or (task, map_index) tuples from which to work. ``task.dag`` needs to be set :param run_id: the run_id of the dagrun to start looking from :param execution_date: the execution date from which to start looking (deprecated) :param upstream: Mark all parents (upstream tasks) - :param downstream: Mark all siblings (downstream tasks) of task_id, including SubDags + :param downstream: Mark all siblings (downstream tasks) of task_id :param future: Mark all future tasks on the interval of the dag up until last execution date. :param past: Retroactively mark all tasks starting from start_date of the DAG @@ -140,54 +139,20 @@ def set_state( dag_run_ids = get_run_ids(dag, run_id, future, past, session=session) task_id_map_index_list = list(find_task_relatives(tasks, downstream, upstream)) - task_ids = [task_id if isinstance(task_id, str) else task_id[0] for task_id in task_id_map_index_list] - - confirmed_infos = list(_iter_existing_dag_run_infos(dag, dag_run_ids, session=session)) - confirmed_dates = [info.logical_date for info in confirmed_infos] - - sub_dag_run_ids = ( - list( - _iter_subdag_run_ids(dag, session, DagRunState(state), task_ids, commit, confirmed_infos), - ) - if not state == TaskInstanceState.SKIPPED - else [] - ) - # now look for the task instances that are affected qry_dag = get_all_dag_task_query(dag, session, state, task_id_map_index_list, dag_run_ids) if commit: tis_altered = session.scalars(qry_dag.with_for_update()).all() - if sub_dag_run_ids: - qry_sub_dag = all_subdag_tasks_query(sub_dag_run_ids, session, state, confirmed_dates) - tis_altered += session.scalars(qry_sub_dag.with_for_update()).all() for task_instance in tis_altered: task_instance.set_state(state, session=session) session.flush() else: tis_altered = session.scalars(qry_dag).all() - if sub_dag_run_ids: - qry_sub_dag = all_subdag_tasks_query(sub_dag_run_ids, session, state, confirmed_dates) - tis_altered += session.scalars(qry_sub_dag).all() return tis_altered -def all_subdag_tasks_query( - sub_dag_run_ids: list[str], - session: SASession, - state: TaskInstanceState, - confirmed_dates: Iterable[datetime], -): - """Get *all* tasks of the sub dags.""" - qry_sub_dag = ( - select(TaskInstance) - .where(TaskInstance.dag_id.in_(sub_dag_run_ids), TaskInstance.execution_date.in_(confirmed_dates)) - .where(or_(TaskInstance.state.is_(None), TaskInstance.state != state)) - ) - return qry_sub_dag - - def get_all_dag_task_query( dag: DAG, session: SASession, @@ -208,71 +173,6 @@ def get_all_dag_task_query( return qry_dag -def _iter_subdag_run_ids( - dag: DAG, - session: SASession, - state: DagRunState, - task_ids: list[str], - commit: bool, - confirmed_infos: Iterable[_DagRunInfo], -) -> Iterator[str]: - """ - Go through subdag operators and create dag runs. - - We only work within the scope of the subdag. A subdag does not propagate to - its parent DAG, but parent propagates to subdags. - """ - dags = [dag] - while dags: - current_dag = dags.pop() - for task_id in task_ids: - if not current_dag.has_task(task_id): - continue - - current_task = current_dag.get_task(task_id) - if isinstance(current_task, SubDagOperator) or current_task.task_type == "SubDagOperator": - # this works as a kind of integrity check - # it creates missing dag runs for subdag operators, - # maybe this should be moved to dagrun.verify_integrity - if TYPE_CHECKING: - assert current_task.subdag - dag_runs = _create_dagruns( - current_task.subdag, - infos=confirmed_infos, - state=DagRunState.RUNNING, - run_type=DagRunType.BACKFILL_JOB, - ) - - verify_dagruns(dag_runs, commit, state, session, current_task) - - dags.append(current_task.subdag) - yield current_task.subdag.dag_id - - -def verify_dagruns( - dag_runs: Iterable[DagRun], - commit: bool, - state: DagRunState, - session: SASession, - current_task: Operator, -): - """ - Verify integrity of dag_runs. - - :param dag_runs: dag runs to verify - :param commit: whether dag runs state should be updated - :param state: state of the dag_run to set if commit is True - :param session: session to use - :param current_task: current task - """ - for dag_run in dag_runs: - dag_run.dag = current_task.subdag - dag_run.verify_integrity() - if commit: - dag_run.state = state - session.merge(dag_run) - - def _iter_existing_dag_run_infos(dag: DAG, run_ids: list[str], session: SASession) -> Iterator[_DagRunInfo]: for dag_run in DagRun.find(dag_id=dag.dag_id, run_id=run_ids, session=session): dag_run.dag = dag diff --git a/airflow/api/common/trigger_dag.py b/airflow/api/common/trigger_dag.py index f22755ec640ea..70fb999f542a2 100644 --- a/airflow/api/common/trigger_dag.py +++ b/airflow/api/common/trigger_dag.py @@ -43,7 +43,7 @@ def _trigger_dag( conf: dict | str | None = None, execution_date: datetime | None = None, replace_microseconds: bool = True, -) -> list[DagRun | None]: +) -> DagRun | None: """ Triggers DAG run. @@ -90,21 +90,17 @@ def _trigger_dag( if conf: run_conf = conf if isinstance(conf, dict) else json.loads(conf) - dag_runs = [] - dags_to_run = [dag, *dag.subdags] - for _dag in dags_to_run: - dag_run = _dag.create_dagrun( - run_id=run_id, - execution_date=execution_date, - state=DagRunState.QUEUED, - conf=run_conf, - external_trigger=True, - dag_hash=dag_bag.dags_hash.get(dag_id), - data_interval=data_interval, - ) - dag_runs.append(dag_run) + dag_run = dag.create_dagrun( + run_id=run_id, + execution_date=execution_date, + state=DagRunState.QUEUED, + conf=run_conf, + external_trigger=True, + dag_hash=dag_bag.dags_hash.get(dag_id), + data_interval=data_interval, + ) - return dag_runs + return dag_run @internal_api_call @@ -133,7 +129,7 @@ def trigger_dag( raise DagNotFound(f"Dag id {dag_id} not found in DagModel") dagbag = DagBag(dag_folder=dag_model.fileloc, read_dags_from_db=True) - triggers = _trigger_dag( + dr = _trigger_dag( dag_id=dag_id, dag_bag=dagbag, run_id=run_id, @@ -142,4 +138,4 @@ def trigger_dag( replace_microseconds=replace_microseconds, ) - return triggers[0] if triggers else None + return dr if dr else None diff --git a/airflow/api_connexion/endpoints/dag_endpoint.py b/airflow/api_connexion/endpoints/dag_endpoint.py index 1895bfeaec762..749c3bf14ddf3 100644 --- a/airflow/api_connexion/endpoints/dag_endpoint.py +++ b/airflow/api_connexion/endpoints/dag_endpoint.py @@ -106,7 +106,7 @@ def get_dags( ) -> APIResponse: """Get all DAGs.""" allowed_attrs = ["dag_id"] - dags_query = select(DagModel).where(~DagModel.is_subdag) + dags_query = select(DagModel) if only_active: dags_query = dags_query.where(DagModel.is_active) if paused is not None: @@ -179,10 +179,9 @@ def patch_dags(limit, session, offset=0, only_active=True, tags=None, dag_id_pat update_mask = update_mask[0] patch_body_[update_mask] = patch_body[update_mask] patch_body = patch_body_ + dags_query = select(DagModel) if only_active: - dags_query = select(DagModel).where(~DagModel.is_subdag, DagModel.is_active) - else: - dags_query = select(DagModel).where(~DagModel.is_subdag) + dags_query = dags_query.where(DagModel.is_active) if dag_id_pattern == "~": dag_id_pattern = "%" diff --git a/airflow/api_connexion/endpoints/dag_run_endpoint.py b/airflow/api_connexion/endpoints/dag_run_endpoint.py index 96fdd42fa03b7..acf4b44493924 100644 --- a/airflow/api_connexion/endpoints/dag_run_endpoint.py +++ b/airflow/api_connexion/endpoints/dag_run_endpoint.py @@ -425,8 +425,6 @@ def clear_dag_run(*, dag_id: str, dag_run_id: str, session: Session = NEW_SESSIO start_date=start_date, end_date=end_date, task_ids=None, - include_subdags=True, - include_parentdag=True, only_failed=False, dry_run=True, ) @@ -438,8 +436,6 @@ def clear_dag_run(*, dag_id: str, dag_run_id: str, session: Session = NEW_SESSIO start_date=start_date, end_date=end_date, task_ids=None, - include_subdags=True, - include_parentdag=True, only_failed=False, ) dag_run = session.execute(select(DagRun).where(DagRun.id == dag_run.id)).scalar_one() diff --git a/airflow/api_connexion/openapi/v1.yaml b/airflow/api_connexion/openapi/v1.yaml index 0394da4f466cf..fbd9a64eacd89 100644 --- a/airflow/api_connexion/openapi/v1.yaml +++ b/airflow/api_connexion/openapi/v1.yaml @@ -3106,11 +3106,6 @@ components: Human centric display text for the DAG. *New in version 2.9.0* - root_dag_id: - type: string - readOnly: true - nullable: true - description: If the DAG is SubDAG then it is the top level DAG identifier. Otherwise, null. is_paused: type: boolean nullable: true @@ -3125,10 +3120,6 @@ components: nullable: true readOnly: true type: boolean - is_subdag: - description: Whether the DAG is SubDAG. - type: boolean - readOnly: true last_parsed_time: type: string format: date-time @@ -4903,14 +4894,6 @@ components: type: boolean default: false - include_subdags: - description: Clear tasks in subdags and clear external tasks indicated by ExternalTaskMarker. - type: boolean - - include_parentdag: - description: Clear tasks in the parent dag of the subdag. - type: boolean - reset_dag_runs: description: Set state of DAG runs to RUNNING. type: boolean diff --git a/airflow/api_connexion/schemas/dag_schema.py b/airflow/api_connexion/schemas/dag_schema.py index 799e4092ccaee..32eca2f0b8903 100644 --- a/airflow/api_connexion/schemas/dag_schema.py +++ b/airflow/api_connexion/schemas/dag_schema.py @@ -51,10 +51,8 @@ class Meta: dag_id = auto_field(dump_only=True) dag_display_name = fields.String(attribute="dag_display_name", dump_only=True) - root_dag_id = auto_field(dump_only=True) is_paused = auto_field() is_active = auto_field(dump_only=True) - is_subdag = auto_field(dump_only=True) last_parsed_time = auto_field(dump_only=True) last_pickled = auto_field(dump_only=True) last_expired = auto_field(dump_only=True) diff --git a/airflow/api_connexion/schemas/task_instance_schema.py b/airflow/api_connexion/schemas/task_instance_schema.py index 5d0eb72091272..74cd0585dcda8 100644 --- a/airflow/api_connexion/schemas/task_instance_schema.py +++ b/airflow/api_connexion/schemas/task_instance_schema.py @@ -177,8 +177,6 @@ class ClearTaskInstanceFormSchema(Schema): end_date = fields.DateTime(load_default=None, validate=validate_istimezone) only_failed = fields.Boolean(load_default=True) only_running = fields.Boolean(load_default=False) - include_subdags = fields.Boolean(load_default=False) - include_parentdag = fields.Boolean(load_default=False) reset_dag_runs = fields.Boolean(load_default=False) task_ids = fields.List(fields.String(), validate=validate.Length(min=1)) dag_run_id = fields.Str(load_default=None) diff --git a/airflow/api_connexion/schemas/task_schema.py b/airflow/api_connexion/schemas/task_schema.py index 03bf4b59ef2e2..e78c3ef4af1b2 100644 --- a/airflow/api_connexion/schemas/task_schema.py +++ b/airflow/api_connexion/schemas/task_schema.py @@ -26,7 +26,6 @@ TimeDeltaSchema, WeightRuleField, ) -from airflow.api_connexion.schemas.dag_schema import DAGSchema from airflow.models.mappedoperator import MappedOperator if TYPE_CHECKING: @@ -61,7 +60,6 @@ class TaskSchema(Schema): ui_color = ColorField(dump_only=True) ui_fgcolor = ColorField(dump_only=True) template_fields = fields.List(fields.String(), dump_only=True) - sub_dag = fields.Nested(DAGSchema, dump_only=True) downstream_task_ids = fields.List(fields.String(), dump_only=True) params = fields.Method("_get_params", dump_only=True) is_mapped = fields.Method("_get_is_mapped", dump_only=True) diff --git a/airflow/cli/cli_config.py b/airflow/cli/cli_config.py index 7814c86bd6d18..269916548401d 100644 --- a/airflow/cli/cli_config.py +++ b/airflow/cli/cli_config.py @@ -449,12 +449,6 @@ def string_lower_type(val): ARG_ONLY_FAILED = Arg(("-f", "--only-failed"), help="Only failed jobs", action="store_true") ARG_ONLY_RUNNING = Arg(("-r", "--only-running"), help="Only running jobs", action="store_true") ARG_DOWNSTREAM = Arg(("-d", "--downstream"), help="Include downstream tasks", action="store_true") -ARG_EXCLUDE_SUBDAGS = Arg(("-x", "--exclude-subdags"), help="Exclude subdags", action="store_true") -ARG_EXCLUDE_PARENTDAG = Arg( - ("-X", "--exclude-parentdag"), - help="Exclude ParentDAGS if the task cleared is a part of a SubDAG", - action="store_true", -) ARG_DAG_REGEX = Arg( ("-R", "--dag-regex"), help="Search dag_id as regex instead of exact string", action="store_true" ) @@ -1330,8 +1324,6 @@ class GroupCommand(NamedTuple): ARG_YES, ARG_ONLY_FAILED, ARG_ONLY_RUNNING, - ARG_EXCLUDE_SUBDAGS, - ARG_EXCLUDE_PARENTDAG, ARG_DAG_REGEX, ARG_VERBOSE, ), diff --git a/airflow/cli/commands/dag_command.py b/airflow/cli/commands/dag_command.py index dac61d0da5971..2f300ebef2144 100644 --- a/airflow/cli/commands/dag_command.py +++ b/airflow/cli/commands/dag_command.py @@ -101,7 +101,6 @@ def _run_dag_backfill(dags: list[DAG], args) -> None: start_date=args.start_date, end_date=args.end_date, confirm_prompt=not args.yes, - include_subdags=True, dag_run_state=DagRunState.QUEUED, ) @@ -334,10 +333,8 @@ def _get_dagbag_dag_details(dag: DAG) -> dict: return { "dag_id": dag.dag_id, "dag_display_name": dag.dag_display_name, - "root_dag_id": dag.parent_dag.dag_id if dag.parent_dag else None, "is_paused": dag.get_is_paused(), "is_active": dag.get_is_active(), - "is_subdag": dag.is_subdag, "last_parsed_time": None, "last_pickled": None, "last_expired": None, diff --git a/airflow/cli/commands/task_command.py b/airflow/cli/commands/task_command.py index 6e0fc80fbb300..c4c6db386a4e9 100644 --- a/airflow/cli/commands/task_command.py +++ b/airflow/cli/commands/task_command.py @@ -764,8 +764,6 @@ def task_clear(args) -> None: only_failed=args.only_failed, only_running=args.only_running, confirm_prompt=not args.yes, - include_subdags=not args.exclude_subdags, - include_parentdag=not args.exclude_parentdag, ) diff --git a/airflow/dag_processing/processor.py b/airflow/dag_processing/processor.py index 86db0b5b881a3..0b19d8f2db76c 100644 --- a/airflow/dag_processing/processor.py +++ b/airflow/dag_processing/processor.py @@ -688,8 +688,6 @@ def get_pools(dag) -> dict[str, set[str]]: pool_dict: dict[str, set[str]] = {} for dag in dagbag.dags.values(): pool_dict.update(get_pools(dag)) - for subdag in dag.subdags: - pool_dict.update(get_pools(subdag)) dag_ids = {dag.dag_id for dag in dagbag.dags.values()} return DagFileProcessor._validate_task_pools_and_update_dag_warnings(pool_dict, dag_ids) diff --git a/airflow/example_dags/example_subdag_operator.py b/airflow/example_dags/example_subdag_operator.py deleted file mode 100644 index 196012024c2c6..0000000000000 --- a/airflow/example_dags/example_subdag_operator.py +++ /dev/null @@ -1,70 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""Example DAG demonstrating the usage of the SubDagOperator.""" - -from __future__ import annotations - -import warnings - -with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - message=r"This class is deprecated\. Please use `airflow\.utils\.task_group\.TaskGroup`\.", - ) - - # [START example_subdag_operator] - import datetime - - from airflow.example_dags.subdags.subdag import subdag - from airflow.models.dag import DAG - from airflow.operators.empty import EmptyOperator - from airflow.operators.subdag import SubDagOperator - - DAG_NAME = "example_subdag_operator" - - with DAG( - dag_id=DAG_NAME, - default_args={"retries": 2}, - start_date=datetime.datetime(2022, 1, 1), - schedule="@once", - tags=["example"], - ) as dag: - start = EmptyOperator( - task_id="start", - ) - - section_1 = SubDagOperator( - task_id="section-1", - subdag=subdag(DAG_NAME, "section-1", dag.default_args), - ) - - some_other_task = EmptyOperator( - task_id="some-other-task", - ) - - section_2 = SubDagOperator( - task_id="section-2", - subdag=subdag(DAG_NAME, "section-2", dag.default_args), - ) - - end = EmptyOperator( - task_id="end", - ) - - start >> section_1 >> some_other_task >> section_2 >> end - # [END example_subdag_operator] diff --git a/airflow/example_dags/subdags/__init__.py b/airflow/example_dags/subdags/__init__.py deleted file mode 100644 index 217e5db960782..0000000000000 --- a/airflow/example_dags/subdags/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. diff --git a/airflow/example_dags/subdags/subdag.py b/airflow/example_dags/subdags/subdag.py deleted file mode 100644 index 748582f4b84ae..0000000000000 --- a/airflow/example_dags/subdags/subdag.py +++ /dev/null @@ -1,56 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""Helper function to generate a DAG and operators given some arguments.""" - -from __future__ import annotations - -# [START subdag] -import pendulum - -from airflow.models.dag import DAG -from airflow.operators.empty import EmptyOperator - - -def subdag(parent_dag_name, child_dag_name, args) -> DAG: - """ - Generate a DAG to be used as a subdag. - - :param str parent_dag_name: Id of the parent DAG - :param str child_dag_name: Id of the child DAG - :param dict args: Default arguments to provide to the subdag - :return: DAG to use as a subdag - """ - dag_subdag = DAG( - dag_id=f"{parent_dag_name}.{child_dag_name}", - default_args=args, - start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), - catchup=False, - schedule="@daily", - ) - - for i in range(5): - EmptyOperator( - task_id=f"{child_dag_name}-task-{i + 1}", - default_args=args, - dag=dag_subdag, - ) - - return dag_subdag - - -# [END subdag] diff --git a/airflow/jobs/backfill_job_runner.py b/airflow/jobs/backfill_job_runner.py index 961c4b7e020b3..028d494219677 100644 --- a/airflow/jobs/backfill_job_runner.py +++ b/airflow/jobs/backfill_job_runner.py @@ -65,7 +65,7 @@ class BackfillJobRunner(BaseJobRunner, LoggingMixin): """ - A backfill job runner consists of a dag or subdag for a specific time range. + A backfill job runner consists of a dag for a specific time range. It triggers a set of task instance runs, in the right order and lasts for as long as it takes for the set of task instance to be completed. @@ -327,7 +327,7 @@ def _manage_executor_state( def _iter_task_needing_expansion() -> Iterator[AbstractOperator]: from airflow.models.mappedoperator import AbstractOperator - for node in self.dag.get_task(ti.task_id, include_subdags=True).iter_mapped_dependants(): + for node in self.dag.get_task(ti.task_id).iter_mapped_dependants(): if isinstance(node, AbstractOperator): yield node else: # A (mapped) task group. All its children need expansion. @@ -359,8 +359,7 @@ def _get_dag_run( """ run_date = dagrun_info.logical_date - # consider max_active_runs but ignore when running subdags - respect_dag_max_active_limit = bool(dag.timetable.can_be_scheduled and not dag.is_subdag) + respect_dag_max_active_limit = bool(dag.timetable.can_be_scheduled) current_active_dag_count = dag.get_num_active_runs(external_trigger=False) @@ -500,7 +499,7 @@ def _process_backfill_task_instances( def _per_task_process(key, ti: TaskInstance, session): ti.refresh_from_db(lock_for_update=True, session=session) - task = self.dag.get_task(ti.task_id, include_subdags=True) + task = self.dag.get_task(ti.task_id) ti.task = task self.log.debug("Task instance to run %s state %s", ti, ti.state) @@ -636,7 +635,7 @@ def _per_task_process(key, ti: TaskInstance, session): ti_status.not_ready.add(key) try: - for task in self.dag.topological_sort(include_subdag_tasks=True): + for task in self.dag.topological_sort(): for key, ti in list(ti_status.to_run.items()): # Attempt to workaround deadlock on backfill by attempting to commit the transaction # state update few times before giving up @@ -839,9 +838,6 @@ def tabulate_ti_keys_set(ti_keys: Iterable[TaskInstanceKey]) -> str: yield "\n\nThese tasks are deadlocked:\n" yield tabulate_ti_keys_set([ti.key for ti in ti_status.deadlocked]) - def _get_dag_with_subdags(self) -> list[DAG]: - return [self.dag, *self.dag.subdags] - @provide_session def _execute_dagruns( self, @@ -863,12 +859,11 @@ def _execute_dagruns( :param session: the current session object """ for dagrun_info in dagrun_infos: - for dag in self._get_dag_with_subdags(): - dag_run = self._get_dag_run(dagrun_info, dag, session=session) - if dag_run is not None: - tis_map = self._task_instances_for_dag_run(dag, dag_run, session=session) - ti_status.active_runs.add(dag_run) - ti_status.to_run.update(tis_map or {}) + dag_run = self._get_dag_run(dagrun_info, self.dag, session=session) + if dag_run is not None: + tis_map = self._task_instances_for_dag_run(self.dag, dag_run, session=session) + ti_status.active_runs.add(dag_run) + ti_status.to_run.update(tis_map or {}) tis_missing_executor = [] for ti in ti_status.to_run.values(): @@ -948,9 +943,8 @@ def _execute(self, session: Session = NEW_SESSION) -> None: return dagrun_infos = [DagRunInfo.interval(dagrun_start_date, dagrun_end_date)] - dag_with_subdags_ids = [d.dag_id for d in self._get_dag_with_subdags()] running_dagruns = DagRun.find( - dag_id=dag_with_subdags_ids, + dag_id=self.dag.dag_id, execution_start_date=self.bf_start_date, execution_end_date=self.bf_end_date, no_backfills=True, diff --git a/airflow/migrations/versions/0003_3_0_0_remove_is_subdag.py b/airflow/migrations/versions/0003_3_0_0_remove_is_subdag.py new file mode 100644 index 0000000000000..eab9954b329ab --- /dev/null +++ b/airflow/migrations/versions/0003_3_0_0_remove_is_subdag.py @@ -0,0 +1,76 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Remove SubDAGs: ``is_subdag`` & ``root_dag_id`` columns from DAG table. + +Revision ID: d0f1c55954fa +Revises: 044f740568ec +Create Date: 2024-08-11 21:32:40.576172 + +""" + +from __future__ import annotations + +import sqlalchemy as sa +from alembic import op + +from airflow.migrations.db_types import StringID + +# revision identifiers, used by Alembic. +revision = "d0f1c55954fa" +down_revision = "044f740568ec" +branch_labels = None +depends_on = None +airflow_version = "3.0.0" + + +def _column_exists(inspector, column_name): + return column_name in [col["name"] for col in inspector.get_columns("dag")] + + +def _index_exists(inspector, index_name): + return index_name in [index["name"] for index in inspector.get_indexes("dag")] + + +def upgrade(): + """Remove ``is_subdag`` column from DAGs table.""" + conn = op.get_bind() + inspector = sa.inspect(conn) + + with op.batch_alter_table("dag", schema=None) as batch_op: + if _index_exists(inspector, "idx_root_dag_id"): + batch_op.drop_index("idx_root_dag_id") + if _column_exists(inspector, "is_subdag"): + batch_op.drop_column("is_subdag") + if _column_exists(inspector, "root_dag_id"): + batch_op.drop_column("root_dag_id") + + +def downgrade(): + """Add ``is_subdag`` column in DAGs table.""" + conn = op.get_bind() + inspector = sa.inspect(conn) + + with op.batch_alter_table("dag", schema=None) as batch_op: + if not _column_exists(inspector, "is_subdag"): + batch_op.add_column(sa.Column("is_subdag", sa.BOOLEAN(), nullable=True)) + if not _column_exists(inspector, "root_dag_id"): + batch_op.add_column(sa.Column("root_dag_id", StringID(), nullable=True)) + if not _index_exists(inspector, "idx_root_dag_id"): + batch_op.create_index("idx_root_dag_id", ["root_dag_id"], unique=False) diff --git a/airflow/models/baseoperator.py b/airflow/models/baseoperator.py index 7ffa596ec67a1..ea100cd4e2abf 100644 --- a/airflow/models/baseoperator.py +++ b/airflow/models/baseoperator.py @@ -854,10 +854,6 @@ def say_hello_world(**context): _dag: DAG | None = None task_group: TaskGroup | None = None - # subdag parameter is only set for SubDagOperator. - # Setting it to None by default as other Operators do not have that field - subdag: DAG | None = None - start_date: pendulum.DateTime | None = None end_date: pendulum.DateTime | None = None @@ -1724,7 +1720,6 @@ def get_serialized_fields(cls): "end_date", "_task_type", "_operator_name", - "subdag", "ui_color", "ui_fgcolor", "template_ext", diff --git a/airflow/models/dag.py b/airflow/models/dag.py index 1c9d351c1d292..7b762aa18dbf8 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -497,7 +497,6 @@ class DAG(LoggingMixin): _comps = { "dag_id", "task_ids", - "parent_dag", "start_date", "end_date", "schedule_interval", @@ -510,14 +509,12 @@ class DAG(LoggingMixin): fileloc: str """ - File path that needs to be imported to load this DAG or subdag. + File path that needs to be imported to load this DAG. This may not be an actual file on disk in the case when this DAG is loaded from a ZIP file or other DAG distribution format. """ - parent_dag: DAG | None = None # Gets set when DAGs are loaded - # NOTE: When updating arguments here, please also keep arguments in @dag() # below in sync. (Search for 'def dag(' in this file.) def __init__( @@ -1135,10 +1132,6 @@ def next_dagrun_info( :return: DagRunInfo of the next dagrun, or None if a dagrun is not going to be scheduled. """ - # Never schedule a subdag. It will be scheduled by its parent dag. - if self.is_subdag: - return None - data_interval = None if isinstance(last_automated_dagrun, datetime): warnings.warn( @@ -1217,8 +1210,7 @@ def iter_dagrun_infos_between( If ``align`` is ``False``, the first run will happen immediately on ``earliest``, even if it does not fall on the logical timetable schedule. - The default is ``True``, but subdags will ignore this value and always - behave as if this is set to ``False`` for backward compatibility. + The default is ``True``. Example: A DAG is scheduled to run every midnight (``0 0 * * *``). If ``earliest`` is ``2021-06-03 23:00:00``, the first DagRunInfo would be @@ -1234,15 +1226,6 @@ def iter_dagrun_infos_between( restriction = TimeRestriction(earliest, latest, catchup=True) - # HACK: Sub-DAGs are currently scheduled differently. For example, say - # the schedule is @daily and start is 2021-06-03 22:16:00, a top-level - # DAG should be first scheduled to run on midnight 2021-06-04, but a - # sub-DAG should be first scheduled to run RIGHT NOW. We can change - # this, but since sub-DAGs are going away in 3.0 anyway, let's keep - # compatibility for now and remove this entirely later. - if self.is_subdag: - align = False - try: info = self.timetable.next_dagrun_info( last_automated_data_interval=None, @@ -1347,10 +1330,6 @@ def dag_id(self) -> str: def dag_id(self, value: str) -> None: self._dag_id = value - @property - def is_subdag(self) -> bool: - return self.parent_dag is not None - @property def full_filepath(self) -> str: """ @@ -1755,25 +1734,6 @@ def latest_execution_date(self): ) return self.get_latest_execution_date() - @property - def subdags(self): - """Return a list of the subdag objects associated to this DAG.""" - # Check SubDag for class but don't check class directly - from airflow.operators.subdag import SubDagOperator - - subdag_lst = [] - for task in self.tasks: - if ( - isinstance(task, SubDagOperator) - or - # TODO remove in Airflow 2.0 - type(task).__name__ == "SubDagOperator" - or task.task_type == "SubDagOperator" - ): - subdag_lst.append(task.subdag) - subdag_lst += task.subdag.subdags - return subdag_lst - def resolve_template_files(self): for t in self.tasks: t.resolve_template_files() @@ -1866,8 +1826,6 @@ def get_task_instances( end_date=end_date, run_id=None, state=state or (), - include_subdags=False, - include_parentdag=False, include_dependent_dags=False, exclude_task_ids=(), session=session, @@ -1883,8 +1841,6 @@ def _get_task_instances( end_date: datetime | None, run_id: str | None, state: TaskInstanceState | Sequence[TaskInstanceState], - include_subdags: bool, - include_parentdag: bool, include_dependent_dags: bool, exclude_task_ids: Collection[str | tuple[str, int]] | None, session: Session, @@ -1901,8 +1857,6 @@ def _get_task_instances( end_date: datetime | None, run_id: str | None, state: TaskInstanceState | Sequence[TaskInstanceState], - include_subdags: bool, - include_parentdag: bool, include_dependent_dags: bool, exclude_task_ids: Collection[str | tuple[str, int]] | None, session: Session, @@ -1921,8 +1875,6 @@ def _get_task_instances( end_date: datetime | None, run_id: str | None, state: TaskInstanceState | Sequence[TaskInstanceState], - include_subdags: bool, - include_parentdag: bool, include_dependent_dags: bool, exclude_task_ids: Collection[str | tuple[str, int]] | None, session: Session, @@ -1933,7 +1885,7 @@ def _get_task_instances( ) -> Iterable[TaskInstance] | set[TaskInstanceKey]: TI = TaskInstance - # If we are looking at subdags/dependent dags we want to avoid UNION calls + # If we are looking at dependent dags we want to avoid UNION calls # in SQL (it doesn't play nice with fields that have no equality operator, # like JSON types), we instead build our result set separately. # @@ -1948,15 +1900,7 @@ def _get_task_instances( tis = select(TaskInstance) tis = tis.join(TaskInstance.dag_run) - if include_subdags: - # Crafting the right filter for dag_id and task_ids combo - conditions = [] - for dag in [*self.subdags, self]: - conditions.append( - (TaskInstance.dag_id == dag.dag_id) & TaskInstance.task_id.in_(dag.task_ids) - ) - tis = tis.where(or_(*conditions)) - elif self.partial: + if self.partial: tis = tis.where(TaskInstance.dag_id == self.dag_id, TaskInstance.task_id.in_(self.task_ids)) else: tis = tis.where(TaskInstance.dag_id == self.dag_id) @@ -1990,36 +1934,6 @@ def _get_task_instances( else: tis = tis.where(TaskInstance.state.in_(state)) - # Next, get any of them from our parent DAG (if there is one) - if include_parentdag and self.parent_dag is not None: - if visited_external_tis is None: - visited_external_tis = set() - - p_dag = self.parent_dag.partial_subset( - task_ids_or_regex=r"^{}$".format(self.dag_id.split(".")[1]), - include_upstream=False, - include_downstream=True, - ) - result.update( - p_dag._get_task_instances( - task_ids=task_ids, - start_date=start_date, - end_date=end_date, - run_id=None, - state=state, - include_subdags=include_subdags, - include_parentdag=False, - include_dependent_dags=include_dependent_dags, - as_pk_tuple=True, - exclude_task_ids=exclude_task_ids, - session=session, - dag_bag=dag_bag, - recursion_depth=recursion_depth, - max_recursion_depth=max_recursion_depth, - visited_external_tis=visited_external_tis, - ) - ) - if include_dependent_dags: # Recursively find external tasks indicated by ExternalTaskMarker from airflow.sensors.external_task import ExternalTaskMarker @@ -2089,9 +2003,7 @@ def _get_task_instances( start_date=None, end_date=None, state=state, - include_subdags=include_subdags, include_dependent_dags=include_dependent_dags, - include_parentdag=False, as_pk_tuple=True, exclude_task_ids=exclude_task_ids, dag_bag=dag_bag, @@ -2103,7 +2015,7 @@ def _get_task_instances( ) if result or as_pk_tuple: - # Only execute the `ti` query if we have also collected some other results (i.e. subdags etc.) + # Only execute the `ti` query if we have also collected some other results if as_pk_tuple: tis_query = session.execute(tis).all() result.update(TaskInstanceKey(**cols._mapping) for cols in tis_query) @@ -2218,8 +2130,6 @@ def set_task_instance_state( subdag.clear( start_date=start_date, end_date=end_date, - include_subdags=True, - include_parentdag=True, only_failed=True, session=session, # Exclude the task itself from being cleared @@ -2319,8 +2229,6 @@ def set_task_group_state( task_subset.clear( start_date=start_date, end_date=end_date, - include_subdags=True, - include_parentdag=True, only_failed=True, session=session, # Exclude the task from the current group from being cleared @@ -2339,7 +2247,7 @@ def leaves(self) -> list[Operator]: """Return nodes with no children. These are last to execute and are called leaves or leaf nodes.""" return [task for task in self.tasks if not task.downstream_list] - def topological_sort(self, include_subdag_tasks: bool = False): + def topological_sort(self): """ Sorts tasks in topographical order, such that a task comes after any of its upstream dependencies. @@ -2348,7 +2256,7 @@ def topological_sort(self, include_subdag_tasks: bool = False): from airflow.utils.task_group import TaskGroup def nested_topo(group): - for node in group.topological_sort(_include_subdag_tasks=include_subdag_tasks): + for node in group.topological_sort(): if isinstance(node, TaskGroup): yield from nested_topo(node) else: @@ -2387,8 +2295,6 @@ def clear( only_failed: bool = False, only_running: bool = False, confirm_prompt: bool = False, - include_subdags: bool = True, - include_parentdag: bool = True, dag_run_state: DagRunState = DagRunState.QUEUED, dry_run: bool = False, session: Session = NEW_SESSION, @@ -2407,14 +2313,11 @@ def clear( :param only_failed: Only clear failed tasks :param only_running: Only clear running tasks. :param confirm_prompt: Ask for confirmation - :param include_subdags: Clear tasks in subdags and clear external tasks - indicated by ExternalTaskMarker - :param include_parentdag: Clear tasks in the parent dag of the subdag. :param dag_run_state: state to set DagRun to. If set to False, dagrun state will not be changed. :param dry_run: Find the tasks to clear but don't clear them. :param session: The sqlalchemy session to use - :param dag_bag: The DagBag used to find the dags subdags (Optional) + :param dag_bag: The DagBag used to find the dags (Optional) :param exclude_task_ids: A set of ``task_id`` or (``task_id``, ``map_index``) tuples that should not be cleared """ @@ -2452,9 +2355,7 @@ def clear( end_date=end_date, run_id=None, state=state, - include_subdags=include_subdags, - include_parentdag=include_parentdag, - include_dependent_dags=include_subdags, # compat, yes this is not a typo + include_dependent_dags=True, session=session, dag_bag=dag_bag, exclude_task_ids=exclude_task_ids, @@ -2497,8 +2398,6 @@ def clear_dags( only_failed=False, only_running=False, confirm_prompt=False, - include_subdags=True, - include_parentdag=False, dag_run_state=DagRunState.QUEUED, dry_run=False, ): @@ -2510,8 +2409,6 @@ def clear_dags( only_failed=only_failed, only_running=only_running, confirm_prompt=False, - include_subdags=include_subdags, - include_parentdag=include_parentdag, dag_run_state=dag_run_state, dry_run=True, ) @@ -2538,7 +2435,6 @@ def clear_dags( only_failed=only_failed, only_running=only_running, confirm_prompt=False, - include_subdags=include_subdags, dag_run_state=dag_run_state, dry_run=False, ) @@ -2563,15 +2459,6 @@ def __deepcopy__(self, memo): result._log = self._log return result - def sub_dag(self, *args, **kwargs): - """Use `airflow.models.DAG.partial_subset`, this method is deprecated.""" - warnings.warn( - "This method is deprecated and will be removed in a future version. Please use partial_subset", - RemovedInAirflow3Warning, - stacklevel=2, - ) - return self.partial_subset(*args, **kwargs) - def partial_subset( self, task_ids_or_regex: str | Pattern | Iterable[str], @@ -2714,13 +2601,9 @@ def has_task_group(self, task_group_id: str) -> bool: def task_group_dict(self): return {k: v for k, v in self._task_group.get_task_group_dict().items() if k is not None} - def get_task(self, task_id: str, include_subdags: bool = False) -> Operator: + def get_task(self, task_id: str) -> Operator: if task_id in self.task_dict: return self.task_dict[task_id] - if include_subdags: - for dag in self.subdags: - if task_id in dag.task_dict: - return dag.task_dict[task_id] raise TaskNotFound(f"Task {task_id} not found") def pickle_info(self): @@ -3198,8 +3081,6 @@ def bulk_write_to_db( """ Ensure the DagModel rows for the given dags are up-to-date in the dag table in the DB. - Note that this method can be called for both DAGs and SubDAGs. A SubDag is actually a SubDagOperator. - :param dags: the DAG objects to save to the DB :return: None """ @@ -3251,15 +3132,8 @@ def bulk_write_to_db( for orm_dag in sorted(orm_dags, key=lambda d: d.dag_id): dag = dag_by_ids[orm_dag.dag_id] filelocs.append(dag.fileloc) - if dag.is_subdag: - orm_dag.is_subdag = True - orm_dag.fileloc = dag.parent_dag.fileloc # type: ignore - orm_dag.root_dag_id = dag.parent_dag.dag_id # type: ignore - orm_dag.owners = dag.parent_dag.owner # type: ignore - else: - orm_dag.is_subdag = False - orm_dag.fileloc = dag.fileloc - orm_dag.owners = dag.owner + orm_dag.fileloc = dag.fileloc + orm_dag.owners = dag.owner orm_dag.is_active = True orm_dag.has_import_errors = False orm_dag.last_parsed_time = timezone.utcnow() @@ -3474,9 +3348,6 @@ def bulk_write_to_db( # decide when to commit session.flush() - for dag in dags: - cls.bulk_write_to_db(dag.subdags, processor_subdir=processor_subdir, session=session) - @classmethod def _get_latest_runs_stmt(cls, dags: list[str]) -> Select: """ @@ -3526,8 +3397,6 @@ def sync_to_db(self, processor_subdir: str | None = None, session=NEW_SESSION): """ Save attributes about this DAG to the DB. - Note that this method can be called for both DAGs and SubDAGs. A SubDag is actually a SubDagOperator. - :return: None """ self.bulk_write_to_db([self], processor_subdir=processor_subdir, session=session) @@ -3621,7 +3490,6 @@ def get_serialized_fields(cls): """Stringified DAGs and operators contain exactly these fields.""" if not cls.__serialized_fields: exclusion_list = { - "parent_dag", "schedule_dataset_references", "schedule_dataset_alias_references", "task_outlet_dataset_references", @@ -3753,13 +3621,10 @@ class DagModel(Base): These items are stored in the database for state related information """ dag_id = Column(StringID(), primary_key=True) - root_dag_id = Column(StringID()) # A DAG can be paused from the UI / DB # Set this default value of is_paused based on a configuration value! is_paused_at_creation = airflow_conf.getboolean("core", "dags_are_paused_at_creation") is_paused = Column(Boolean, default=is_paused_at_creation) - # Whether the DAG is a subdag - is_subdag = Column(Boolean, default=False) # Whether that DAG was seen on the last DagBag load is_active = Column(Boolean, default=False) # Last time the scheduler started @@ -3818,14 +3683,8 @@ class DagModel(Base): # Earliest time at which this ``next_dagrun`` can be created. next_dagrun_create_after = Column(UtcDateTime) - __table_args__ = ( - Index("idx_root_dag_id", root_dag_id, unique=False), - Index("idx_next_dagrun_create_after", next_dagrun_create_after, unique=False), - ) + __table_args__ = (Index("idx_next_dagrun_create_after", next_dagrun_create_after, unique=False),) - parent_dag = relationship( - "DagModel", remote_side=[dag_id], primaryjoin=root_dag_id == dag_id, foreign_keys=[root_dag_id] - ) schedule_dataset_references = relationship( "DagScheduleDatasetReference", back_populates="dag", @@ -3898,7 +3757,6 @@ def get_dagmodel(dag_id: str, session: Session = NEW_SESSION) -> DagModel | None return session.get( DagModel, dag_id, - options=[joinedload(DagModel.parent_dag)], ) @classmethod @@ -3963,19 +3821,17 @@ def relative_fileloc(self) -> pathlib.Path | None: return path @provide_session - def set_is_paused(self, is_paused: bool, including_subdags: bool = True, session=NEW_SESSION) -> None: + def set_is_paused(self, is_paused: bool, session=NEW_SESSION) -> None: """ Pause/Un-pause a DAG. :param is_paused: Is the DAG paused - :param including_subdags: whether to include the DAG's subdags :param session: session """ filter_query = [ DagModel.dag_id == self.dag_id, ] - if including_subdags: - filter_query.append(DagModel.root_dag_id == self.dag_id) + session.execute( update(DagModel) .where(or_(*filter_query)) diff --git a/airflow/models/dagbag.py b/airflow/models/dagbag.py index f384bfcd84ea8..3fa3af180c92d 100644 --- a/airflow/models/dagbag.py +++ b/airflow/models/dagbag.py @@ -259,8 +259,6 @@ def get_dag(self, dag_id, session: Session = None): root_dag_id = dag_id if dag_id in self.dags: dag = self.dags[dag_id] - if dag.parent_dag: - root_dag_id = dag.parent_dag.dag_id # If DAG Model is absent, we can't check last_expired property. Is the DAG not yet synchronized? orm_dag = DagModel.get_current(root_dag_id, session=session) @@ -272,11 +270,7 @@ def get_dag(self, dag_id, session: Session = None): is_expired = orm_dag.last_expired and dag and dag.last_loaded < orm_dag.last_expired if is_expired: # Remove associated dags so we can re-add them. - self.dags = { - key: dag - for key, dag in self.dags.items() - if root_dag_id != key and not (dag.parent_dag and root_dag_id == dag.parent_dag.dag_id) - } + self.dags = {key: dag for key, dag in self.dags.items()} if is_missing or is_expired: # Reprocess source file. found_dags = self.process_file( @@ -300,8 +294,6 @@ def _add_dag_from_db(self, dag_id: str, session: Session): row.load_op_links = self.load_op_links dag = row.dag - for subdag in dag.subdags: - self.dags[subdag.dag_id] = subdag self.dags[dag.dag_id] = dag self.dags_last_fetched[dag.dag_id] = timezone.utcnow() self.dags_hash[dag.dag_id] = row.dag_hash @@ -476,7 +468,7 @@ def _process_modules(self, filepath, mods, file_last_changed_on_disk): dag.fileloc = mod.__file__ try: dag.validate() - self.bag_dag(dag=dag, root_dag=dag) + self.bag_dag(dag=dag) except AirflowClusterPolicySkipDag: pass except Exception as e: @@ -485,25 +477,15 @@ def _process_modules(self, filepath, mods, file_last_changed_on_disk): self.file_last_changed[dag.fileloc] = file_last_changed_on_disk else: found_dags.append(dag) - found_dags += dag.subdags return found_dags - def bag_dag(self, dag, root_dag): + def bag_dag(self, dag): """ - Add the DAG into the bag, recurses into sub dags. + Add the DAG into the bag. :raises: AirflowDagCycleException if a cycle is detected in this dag or its subdags. :raises: AirflowDagDuplicatedIdException if this dag or its subdags already exists in the bag. """ - self._bag_dag(dag=dag, root_dag=root_dag, recursive=True) - - def _bag_dag(self, *, dag, root_dag, recursive): - """ - Actual implementation of bagging a dag. - - The only purpose of this is to avoid exposing ``recursive`` in ``bag_dag()``, - intended to only be used by the ``_bag_dag()`` implementation. - """ check_cycle(dag) # throws if a task cycle is found dag.resolve_template_files() @@ -531,17 +513,7 @@ def _bag_dag(self, *, dag, root_dag, recursive): self.log.exception(e) raise AirflowClusterPolicyError(e) - subdags = dag.subdags - try: - # DAG.subdags automatically performs DFS search, so we don't recurse - # into further _bag_dag() calls. - if recursive: - for subdag in subdags: - subdag.fileloc = dag.fileloc - subdag.parent_dag = dag - self._bag_dag(dag=subdag, root_dag=root_dag, recursive=False) - prev_dag = self.dags.get(dag.dag_id) if prev_dag and prev_dag.fileloc != dag.fileloc: raise AirflowDagDuplicatedIdException( @@ -554,12 +526,6 @@ def _bag_dag(self, *, dag, root_dag, recursive): except (AirflowDagCycleException, AirflowDagDuplicatedIdException): # There was an error in bagging the dag. Remove it from the list of dags self.log.exception("Exception bagging dag: %s", dag.dag_id) - # Only necessary at the root level since DAG.subdags automatically - # performs DFS to search through all subdags - if recursive: - for subdag in subdags: - if subdag.dag_id in self.dags: - del self.dags[subdag.dag_id] raise def collect_dags( @@ -627,15 +593,6 @@ def collect_dags_from_db(self): # from the table by the scheduler job. self.dags = SerializedDagModel.read_all_dags() - # Adds subdags. - # DAG post-processing steps such as self.bag_dag and croniter are not needed as - # they are done by scheduler before serialization. - subdags = {} - for dag in self.dags.values(): - for subdag in dag.subdags: - subdags[subdag.dag_id] = subdag - self.dags.update(subdags) - def dagbag_report(self): """Print a report around DagBag loading stats.""" stats = self.dagbag_stats @@ -678,8 +635,6 @@ def _serialize_dag_capturing_errors(dag, session, processor_subdir): We can't place them directly in import_errors, as this may be retried, and work the next time """ - if dag.is_subdag: - return [] try: # We can't use bulk_write_to_db as we want to capture each error individually dag_was_updated = SerializedDagModel.write_dag( @@ -739,13 +694,13 @@ def sync_to_db(self, processor_subdir: str | None = None, session: Session = NEW @provide_session def _sync_perm_for_dag(cls, dag: DAG, session: Session = NEW_SESSION): """Sync DAG specific permissions.""" - root_dag_id = dag.parent_dag.dag_id if dag.parent_dag else dag.dag_id + dag_id = dag.dag_id - cls.logger().debug("Syncing DAG permissions: %s to the DB", root_dag_id) + cls.logger().debug("Syncing DAG permissions: %s to the DB", dag_id) from airflow.www.security_appless import ApplessAirflowSecurityManager security_manager = ApplessAirflowSecurityManager(session=session) - security_manager.sync_perm_for_dag(root_dag_id, dag.access_control) + security_manager.sync_perm_for_dag(dag_id, dag.access_control) def generate_md5_hash(context): diff --git a/airflow/models/dagrun.py b/airflow/models/dagrun.py index 523f94cded38f..1ff2f6316a5a7 100644 --- a/airflow/models/dagrun.py +++ b/airflow/models/dagrun.py @@ -590,7 +590,6 @@ def _check_last_n_dagruns_failed(self, dag_id, max_consecutive_failed_dag_runs, ) filter_query = [ DagModel.dag_id == self.dag_id, - DagModel.root_dag_id == self.dag_id, # for sub-dags ] session.execute( update(DagModel) diff --git a/airflow/models/mappedoperator.py b/airflow/models/mappedoperator.py index 2377fdab00756..2cb7d993fc9f9 100644 --- a/airflow/models/mappedoperator.py +++ b/airflow/models/mappedoperator.py @@ -157,10 +157,6 @@ class OperatorPartial: _expand_called: bool = False # Set when expand() is called to ease user debugging. def __attrs_post_init__(self): - from airflow.operators.subdag import SubDagOperator - - if issubclass(self.operator_class, SubDagOperator): - raise TypeError("Mapping over deprecated SubDagOperator is not supported") validate_mapping_kwargs(self.operator_class, "partial", self.kwargs) def __repr__(self) -> str: @@ -306,7 +302,6 @@ class MappedOperator(AbstractOperator): This should be a name to call ``getattr()`` on. """ - subdag: None = None # Since we don't support SubDagOperator, this is always None. supports_lineage: bool = False HIDE_ATTRS_FROM_UI: ClassVar[frozenset[str]] = AbstractOperator.HIDE_ATTRS_FROM_UI | frozenset( @@ -347,7 +342,6 @@ def get_serialized_fields(cls): "dag", "deps", "expand_input", # This is needed to be able to accept XComArg. - "subdag", "task_group", "upstream_task_ids", "supports_lineage", diff --git a/airflow/models/serialized_dag.py b/airflow/models/serialized_dag.py index 99495f81b2ca1..dec843451a98a 100644 --- a/airflow/models/serialized_dag.py +++ b/airflow/models/serialized_dag.py @@ -305,8 +305,6 @@ def get(cls, dag_id: str, session: Session = NEW_SESSION) -> SerializedDagModel """ Get the SerializedDAG for the given dag ID. - It will cope with being passed the ID of a subdag by looking up the root dag_id from the DAG table. - :param dag_id: the DAG to fetch :param session: ORM Session """ @@ -314,11 +312,7 @@ def get(cls, dag_id: str, session: Session = NEW_SESSION) -> SerializedDagModel if row: return row - # If we didn't find a matching DAG id then ask the DAG table to find - # out the root dag - root_dag_id = session.scalar(select(DagModel.root_dag_id).where(DagModel.dag_id == dag_id)) - - return session.scalar(select(cls).where(cls.dag_id == root_dag_id)) + return session.scalar(select(cls).where(cls.dag_id == dag_id)) @staticmethod @provide_session @@ -337,13 +331,12 @@ def bulk_sync_to_db( :return: None """ for dag in dags: - if not dag.is_subdag: - SerializedDagModel.write_dag( - dag=dag, - min_update_interval=MIN_SERIALIZED_DAG_UPDATE_INTERVAL, - processor_subdir=processor_subdir, - session=session, - ) + SerializedDagModel.write_dag( + dag=dag, + min_update_interval=MIN_SERIALIZED_DAG_UPDATE_INTERVAL, + processor_subdir=processor_subdir, + session=session, + ) @classmethod @provide_session diff --git a/airflow/models/taskinstance.py b/airflow/models/taskinstance.py index 284b313cdae4d..cedc25423900e 100644 --- a/airflow/models/taskinstance.py +++ b/airflow/models/taskinstance.py @@ -2098,12 +2098,7 @@ def _command_as_list( should_pass_filepath = not pickle_id and dag path: PurePath | None = None if should_pass_filepath: - if dag.is_subdag: - if TYPE_CHECKING: - assert dag.parent_dag is not None - path = dag.parent_dag.relative_fileloc - else: - path = dag.relative_fileloc + path = dag.relative_fileloc if path: if not path.is_absolute(): diff --git a/airflow/operators/__init__.py b/airflow/operators/__init__.py index 7fc63e1c8f507..f3195123b6a48 100644 --- a/airflow/operators/__init__.py +++ b/airflow/operators/__init__.py @@ -190,10 +190,6 @@ "sqlite_operator": { "SqliteOperator": "airflow.providers.sqlite.operators.sqlite.SqliteOperator", }, - "subdag_operator": { - "SkippedStatePropagationOptions": "airflow.operators.subdag.SkippedStatePropagationOptions", - "SubDagOperator": "airflow.operators.subdag.SubDagOperator", - }, } add_deprecated_classes(__deprecated_classes, __name__) diff --git a/airflow/operators/subdag.py b/airflow/operators/subdag.py deleted file mode 100644 index 7cbfa03198758..0000000000000 --- a/airflow/operators/subdag.py +++ /dev/null @@ -1,247 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -""" -This module is deprecated. Please use :mod:`airflow.utils.task_group`. - -The module which provides a way to nest your DAGs and so your levels of complexity. -""" - -from __future__ import annotations - -import warnings -from enum import Enum -from typing import TYPE_CHECKING - -from sqlalchemy import select - -from airflow.api.common.experimental.get_task_instance import get_task_instance -from airflow.api_internal.internal_api_call import InternalApiConfig -from airflow.exceptions import AirflowException, RemovedInAirflow3Warning, TaskInstanceNotFound -from airflow.models import DagRun -from airflow.models.dag import DagContext -from airflow.models.pool import Pool -from airflow.models.taskinstance import TaskInstance -from airflow.sensors.base import BaseSensorOperator -from airflow.utils.session import NEW_SESSION, create_session, provide_session -from airflow.utils.state import DagRunState, TaskInstanceState -from airflow.utils.types import DagRunType - -if TYPE_CHECKING: - from datetime import datetime - - from sqlalchemy.orm.session import Session - - from airflow.models.dag import DAG - from airflow.utils.context import Context - - -class SkippedStatePropagationOptions(Enum): - """Available options for skipped state propagation of subdag's tasks to parent dag tasks.""" - - ALL_LEAVES = "all_leaves" - ANY_LEAF = "any_leaf" - - -class SubDagOperator(BaseSensorOperator): - """ - This class is deprecated, please use :class:`airflow.utils.task_group.TaskGroup`. - - This runs a sub dag. By convention, a sub dag's dag_id - should be prefixed by its parent and a dot. As in `parent.child`. - Although SubDagOperator can occupy a pool/concurrency slot, - user can specify the mode=reschedule so that the slot will be - released periodically to avoid potential deadlock. - - :param subdag: the DAG object to run as a subdag of the current DAG. - :param session: sqlalchemy session - :param conf: Configuration for the subdag - :param propagate_skipped_state: by setting this argument you can define - whether the skipped state of leaf task(s) should be propagated to the - parent dag's downstream task. - """ - - ui_color = "#555" - ui_fgcolor = "#fff" - - subdag: DAG - - @provide_session - def __init__( - self, - *, - subdag: DAG, - session: Session = NEW_SESSION, - conf: dict | None = None, - propagate_skipped_state: SkippedStatePropagationOptions | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.subdag = subdag - self.conf = conf - self.propagate_skipped_state = propagate_skipped_state - - self._validate_dag(kwargs) - if not InternalApiConfig.get_use_internal_api(): - self._validate_pool(session) - - warnings.warn( - """This class is deprecated. Please use `airflow.utils.task_group.TaskGroup`.""", - RemovedInAirflow3Warning, - stacklevel=4, - ) - - def _validate_dag(self, kwargs): - dag = kwargs.get("dag") or DagContext.get_current_dag() - - if not dag: - raise AirflowException("Please pass in the `dag` param or call within a DAG context manager") - - if dag.dag_id + "." + kwargs["task_id"] != self.subdag.dag_id: - raise AirflowException( - f"The subdag's dag_id should have the form '{{parent_dag_id}}.{{this_task_id}}'. " - f"Expected '{dag.dag_id}.{kwargs['task_id']}'; received '{self.subdag.dag_id}'." - ) - - def _validate_pool(self, session): - if self.pool: - conflicts = [t for t in self.subdag.tasks if t.pool == self.pool] - if conflicts: - # only query for pool conflicts if one may exist - pool = session.scalar(select(Pool).where(Pool.slots == 1, Pool.pool == self.pool)) - if pool and any(t.pool == self.pool for t in self.subdag.tasks): - raise AirflowException( - f"SubDagOperator {self.task_id} and subdag task{'s' if len(conflicts) > 1 else ''} " - f"{', '.join(t.task_id for t in conflicts)} both use pool {self.pool}, " - f"but the pool only has 1 slot. The subdag tasks will never run." - ) - - def _get_dagrun(self, execution_date): - dag_runs = DagRun.find( - dag_id=self.subdag.dag_id, - execution_date=execution_date, - ) - return dag_runs[0] if dag_runs else None - - def _reset_dag_run_and_task_instances(self, dag_run: DagRun, execution_date: datetime) -> None: - """ - Set task instance states to allow for execution. - - The state of the DAG run will be set to RUNNING, and failed task - instances to ``None`` for scheduler to pick up. - - :param dag_run: DAG run to reset. - :param execution_date: Execution date to select task instances. - """ - with create_session() as session: - dag_run.state = DagRunState.RUNNING - session.merge(dag_run) - failed_task_instances = session.scalars( - select(TaskInstance) - .where(TaskInstance.dag_id == self.subdag.dag_id) - .where(TaskInstance.execution_date == execution_date) - .where(TaskInstance.state.in_((TaskInstanceState.FAILED, TaskInstanceState.UPSTREAM_FAILED))) - ) - - for task_instance in failed_task_instances: - task_instance.state = None - session.merge(task_instance) - session.commit() - - def pre_execute(self, context): - super().pre_execute(context) - execution_date = context["execution_date"] - dag_run = self._get_dagrun(execution_date) - - if dag_run is None: - if context["data_interval_start"] is None or context["data_interval_end"] is None: - data_interval: tuple[datetime, datetime] | None = None - else: - data_interval = (context["data_interval_start"], context["data_interval_end"]) - dag_run = self.subdag.create_dagrun( - run_type=DagRunType.SCHEDULED, - execution_date=execution_date, - state=DagRunState.RUNNING, - conf=self.conf, - external_trigger=True, - data_interval=data_interval, - ) - self.log.info("Created DagRun: %s", dag_run.run_id) - else: - self.log.info("Found existing DagRun: %s", dag_run.run_id) - if dag_run.state == DagRunState.FAILED: - self._reset_dag_run_and_task_instances(dag_run, execution_date) - - def poke(self, context: Context): - execution_date = context["execution_date"] - dag_run = self._get_dagrun(execution_date=execution_date) - return dag_run.state != DagRunState.RUNNING - - def post_execute(self, context, result=None): - super().post_execute(context) - execution_date = context["execution_date"] - dag_run = self._get_dagrun(execution_date=execution_date) - self.log.info("Execution finished. State is %s", dag_run.state) - - if dag_run.state != DagRunState.SUCCESS: - raise AirflowException(f"Expected state: SUCCESS. Actual state: {dag_run.state}") - - if self.propagate_skipped_state and self._check_skipped_states(context): - self._skip_downstream_tasks(context) - - def _check_skipped_states(self, context): - leaves_tis = self._get_leaves_tis(context["execution_date"]) - - if self.propagate_skipped_state == SkippedStatePropagationOptions.ANY_LEAF: - return any(ti.state == TaskInstanceState.SKIPPED for ti in leaves_tis) - if self.propagate_skipped_state == SkippedStatePropagationOptions.ALL_LEAVES: - return all(ti.state == TaskInstanceState.SKIPPED for ti in leaves_tis) - raise AirflowException( - f"Unimplemented SkippedStatePropagationOptions {self.propagate_skipped_state} used." - ) - - def _get_leaves_tis(self, execution_date): - leaves_tis = [] - for leaf in self.subdag.leaves: - try: - ti = get_task_instance( - dag_id=self.subdag.dag_id, task_id=leaf.task_id, execution_date=execution_date - ) - leaves_tis.append(ti) - except TaskInstanceNotFound: - continue - return leaves_tis - - def _skip_downstream_tasks(self, context): - self.log.info( - "Skipping downstream tasks because propagate_skipped_state is set to %s " - "and skipped task(s) were found.", - self.propagate_skipped_state, - ) - - downstream_tasks = context["task"].downstream_list - self.log.debug("Downstream task_ids %s", downstream_tasks) - - if downstream_tasks: - self.skip( - context["dag_run"], - context["execution_date"], - downstream_tasks, - map_index=context["ti"].map_index, - ) - - self.log.info("Done.") diff --git a/airflow/providers/celery/executors/celery_executor_utils.py b/airflow/providers/celery/executors/celery_executor_utils.py index 5dd2d59ab0ead..8f25f040c90ad 100644 --- a/airflow/providers/celery/executors/celery_executor_utils.py +++ b/airflow/providers/celery/executors/celery_executor_utils.py @@ -112,8 +112,7 @@ def on_celery_import_modules(*args, **kwargs): import airflow.jobs.local_task_job_runner import airflow.macros import airflow.operators.bash - import airflow.operators.python - import airflow.operators.subdag # noqa: F401 + import airflow.operators.python # noqa: F401 with contextlib.suppress(ImportError): import numpy # noqa: F401 diff --git a/airflow/providers/cncf/kubernetes/operators/pod.py b/airflow/providers/cncf/kubernetes/operators/pod.py index 921fdaa92762f..6b3b635d220c6 100644 --- a/airflow/providers/cncf/kubernetes/operators/pod.py +++ b/airflow/providers/cncf/kubernetes/operators/pod.py @@ -495,8 +495,9 @@ def _get_ti_pod_labels(context: Context | None = None, include_try_number: bool if include_try_number: labels.update(try_number=ti.try_number) # In the case of sub dags this is just useful - if context["dag"].parent_dag: - labels["parent_dag_id"] = context["dag"].parent_dag.dag_id + if getattr(context["dag"], "parent_dag", False): + labels["parent_dag_id"] = context["dag"].parent_dag.dag_id # type: ignore[attr-defined] + # Ensure that label is valid for Kube, # and if not truncate/remove invalid chars and replace with short hash. for label_id, label in labels.items(): diff --git a/airflow/providers/cncf/kubernetes/operators/spark_kubernetes.py b/airflow/providers/cncf/kubernetes/operators/spark_kubernetes.py index 82df0a2ec90c0..d8a2867c64aca 100644 --- a/airflow/providers/cncf/kubernetes/operators/spark_kubernetes.py +++ b/airflow/providers/cncf/kubernetes/operators/spark_kubernetes.py @@ -202,7 +202,7 @@ def create_labels_for_pod(context: dict | None = None, include_try_number: bool labels.update(try_number=ti.try_number) # In the case of sub dags this is just useful - if context["dag"].is_subdag: + if getattr(context["dag"], "is_subdag", False): labels["parent_dag_id"] = context["dag"].parent_dag.dag_id # Ensure that label is valid for Kube, # and if not truncate/remove invalid chars and replace with short hash. diff --git a/airflow/providers/fab/auth_manager/fab_auth_manager.py b/airflow/providers/fab/auth_manager/fab_auth_manager.py index 344df7588de7d..ceec5c0e37bd3 100644 --- a/airflow/providers/fab/auth_manager/fab_auth_manager.py +++ b/airflow/providers/fab/auth_manager/fab_auth_manager.py @@ -503,7 +503,7 @@ def _get_root_dag_id(self, dag_id: str) -> str: :meta private: """ - if "." in dag_id: + if "." in dag_id and hasattr(DagModel, "root_dag_id"): return self.appbuilder.get_session.scalar( select(DagModel.dag_id, DagModel.root_dag_id).where(DagModel.dag_id == dag_id).limit(1) ) diff --git a/airflow/providers/fab/auth_manager/security_manager/override.py b/airflow/providers/fab/auth_manager/security_manager/override.py index e2208e5fb409f..86d76de76ff2b 100644 --- a/airflow/providers/fab/auth_manager/security_manager/override.py +++ b/airflow/providers/fab/auth_manager/security_manager/override.py @@ -1073,7 +1073,7 @@ def create_dag_specific_permissions(self) -> None: dags = dagbag.dags.values() for dag in dags: - root_dag_id = dag.parent_dag.dag_id if dag.parent_dag else dag.dag_id + root_dag_id = (getattr(dag, "parent_dag", None) or dag).dag_id for resource_name, resource_values in self.RESOURCE_DETAILS_MAP.items(): dag_resource_name = self._resource_name(root_dag_id, resource_name) for action_name in resource_values["actions"]: @@ -2828,7 +2828,7 @@ def filter_roles_by_perm_with_action(self, action_name: str, role_ids: list[int] ).all() def _get_root_dag_id(self, dag_id: str) -> str: - if "." in dag_id: + if "." in dag_id and hasattr(DagModel, "root_dag_id"): dm = self.appbuilder.get_session.execute( select(DagModel.dag_id, DagModel.root_dag_id).where(DagModel.dag_id == dag_id) ).one() diff --git a/airflow/serialization/pydantic/dag.py b/airflow/serialization/pydantic/dag.py index fa1cac535f60b..a1fea6384aade 100644 --- a/airflow/serialization/pydantic/dag.py +++ b/airflow/serialization/pydantic/dag.py @@ -108,10 +108,8 @@ class DagModelPydantic(BaseModelPydantic): """Serializable representation of the DagModel ORM SqlAlchemyModel used by internal API.""" dag_id: str - root_dag_id: Optional[str] is_paused_at_creation: bool = airflow_conf.getboolean("core", "dags_are_paused_at_creation") is_paused: bool = is_paused_at_creation - is_subdag: Optional[bool] = False is_active: Optional[bool] = False last_parsed_time: Optional[datetime] last_pickled: Optional[datetime] @@ -127,7 +125,6 @@ class DagModelPydantic(BaseModelPydantic): timetable_description: Optional[str] tags: List[DagTagPydantic] # noqa: UP006 dag_owner_links: List[DagOwnerAttributesPydantic] # noqa: UP006 - parent_dag: Optional[PydanticDag] max_active_tasks: int max_active_runs: Optional[int] diff --git a/airflow/serialization/schema.json b/airflow/serialization/schema.json index 84b2e2ed4a75f..d76bfcb1a40bd 100644 --- a/airflow/serialization/schema.json +++ b/airflow/serialization/schema.json @@ -157,7 +157,6 @@ } }, "catchup": { "type": "boolean" }, - "is_subdag": { "type": "boolean" }, "fileloc": { "type" : "string"}, "_processor_dags_folder": { "anyOf": [ diff --git a/airflow/serialization/serialized_objects.py b/airflow/serialization/serialized_objects.py index d110271c3da08..a10916852b653 100644 --- a/airflow/serialization/serialized_objects.py +++ b/airflow/serialization/serialized_objects.py @@ -1273,8 +1273,6 @@ def populate_operator(cls, op: Operator, encoded_op: dict[str, Any]) -> None: continue elif k == "downstream_task_ids": v = set(v) - elif k == "subdag": - v = SerializedDAG.deserialize_dag(v) elif k in {"retry_delay", "execution_timeout", "sla", "max_retry_delay"}: v = cls._deserialize_timedelta(v) elif k in encoded_op["template_fields"]: @@ -1359,9 +1357,6 @@ def set_task_dag_references(task: Operator, dag: DAG) -> None: if getattr(task, date_attr, None) is None: setattr(task, date_attr, getattr(dag, date_attr, None)) - if task.subdag is not None: - task.subdag.parent_dag = dag - # Dereference expand_input and op_kwargs_expand_input. for k in ("expand_input", "op_kwargs_expand_input"): if isinstance(kwargs_ref := getattr(task, k, None), _ExpandInputRef): diff --git a/airflow/utils/db.py b/airflow/utils/db.py index b5e722cb50056..a86ca5dbf5628 100644 --- a/airflow/utils/db.py +++ b/airflow/utils/db.py @@ -95,7 +95,7 @@ class MappedClassProtocol(Protocol): _REVISION_HEADS_MAP = { "2.10.0": "22ed7efa9da2", - "3.0.0": "044f740568ec", + "3.0.0": "d0f1c55954fa", } diff --git a/airflow/utils/task_group.py b/airflow/utils/task_group.py index f3d46ea6eac7c..69a5d015bd426 100644 --- a/airflow/utils/task_group.py +++ b/airflow/utils/task_group.py @@ -491,7 +491,7 @@ def hierarchical_alphabetical_sort(self): self.children.values(), key=lambda node: (not isinstance(node, TaskGroup), node.node_id) ) - def topological_sort(self, _include_subdag_tasks: bool = False): + def topological_sort(self): """ Sorts children in topographical order, such that a task comes after any of its upstream dependencies. @@ -499,8 +499,6 @@ def topological_sort(self, _include_subdag_tasks: bool = False): """ # This uses a modified version of Kahn's Topological Sort algorithm to # not have to pre-compute the "in-degree" of the nodes. - from airflow.operators.subdag import SubDagOperator # Avoid circular import - graph_unsorted = copy.copy(self.children) graph_sorted: list[DAGNode] = [] @@ -539,10 +537,6 @@ def topological_sort(self, _include_subdag_tasks: bool = False): acyclic = True del graph_unsorted[node.node_id] graph_sorted.append(node) - if _include_subdag_tasks and isinstance(node, SubDagOperator): - graph_sorted.extend( - node.subdag.task_group.topological_sort(_include_subdag_tasks=True) - ) if not acyclic: raise AirflowDagCycleException(f"A cyclic dependency occurred in dag: {self.dag_id}") diff --git a/airflow/www/static/js/dag/details/taskInstance/Nav.tsx b/airflow/www/static/js/dag/details/taskInstance/Nav.tsx index e24088085a9f9..22d7fdd696fa8 100644 --- a/airflow/www/static/js/dag/details/taskInstance/Nav.tsx +++ b/airflow/www/static/js/dag/details/taskInstance/Nav.tsx @@ -20,7 +20,7 @@ import React, { forwardRef } from "react"; import { Flex } from "@chakra-ui/react"; -import { getMetaValue, appendSearchParams } from "src/utils"; +import { getMetaValue } from "src/utils"; import LinkButton from "src/components/LinkButton"; import type { Task } from "src/types"; import URLSearchParamsWrapper from "src/utils/URLSearchParamWrapper"; @@ -28,18 +28,16 @@ import URLSearchParamsWrapper from "src/utils/URLSearchParamWrapper"; const dagId = getMetaValue("dag_id"); const taskInstancesUrl = getMetaValue("task_instances_list_url"); const taskUrl = getMetaValue("task_url"); -const gridUrl = getMetaValue("grid_url"); interface Props { taskId: Task["id"]; executionDate: string; - operator?: string; isMapped?: boolean; mapIndex?: number; } const Nav = forwardRef( - ({ taskId, executionDate, operator, isMapped = false, mapIndex }, ref) => { + ({ taskId, executionDate, isMapped = false, mapIndex }, ref) => { if (!taskId) return null; const params = new URLSearchParamsWrapper({ task_id: taskId, @@ -52,32 +50,16 @@ const Nav = forwardRef( _flt_3_task_id: taskId, _oc_TaskInstanceModelView: "dag_run.execution_date", }); - const subDagParams = new URLSearchParamsWrapper({ - execution_date: executionDate, - }).toString(); if (mapIndex !== undefined && mapIndex >= 0) listParams.append("_flt_0_map_index", mapIndex.toString()); const allInstancesLink = `${taskInstancesUrl}?${listParams.toString()}`; - const subDagLink = appendSearchParams( - gridUrl.replace(dagId, `${dagId}.${taskId}`), - subDagParams - ); - - // TODO: base subdag zooming as its own attribute instead of via operator name - const isSubDag = operator === "SubDagOperator"; - return ( {(!isMapped || mapIndex !== undefined) && ( - <> - More Details - {isSubDag && ( - Zoom into SubDag - )} - + More Details )} List All Instances diff --git a/airflow/www/static/js/dag/details/taskInstance/index.tsx b/airflow/www/static/js/dag/details/taskInstance/index.tsx index 74f317867aea3..44311249b0cec 100644 --- a/airflow/www/static/js/dag/details/taskInstance/index.tsx +++ b/airflow/www/static/js/dag/details/taskInstance/index.tsx @@ -54,7 +54,6 @@ const TaskInstance = ({ taskId, runId, mapIndex }: Props) => { const children = group?.children; const isMapped = group?.isMapped; - const operator = group?.operator; const isMappedTaskSummary = !!isMapped && !isMapIndexDefined && taskId; const isGroup = !!children; @@ -90,7 +89,6 @@ const TaskInstance = ({ taskId, runId, mapIndex }: Props) => { isMapped={isMapped} mapIndex={mapIndex} executionDate={run?.executionDate} - operator={operator} /> )} {!isGroupOrMappedTaskSummary && } diff --git a/airflow/www/static/js/types/api-generated.ts b/airflow/www/static/js/types/api-generated.ts index a892e327ace07..30948df332bb1 100644 --- a/airflow/www/static/js/types/api-generated.ts +++ b/airflow/www/static/js/types/api-generated.ts @@ -1031,8 +1031,6 @@ export interface components { * *New in version 2.9.0* */ dag_display_name?: string; - /** @description If the DAG is SubDAG then it is the top level DAG identifier. Otherwise, null. */ - root_dag_id?: string | null; /** @description Whether the DAG is paused. */ is_paused?: boolean | null; /** @@ -1043,8 +1041,6 @@ export interface components { * *Changed in version 2.2.0*: Field is read-only. */ is_active?: boolean | null; - /** @description Whether the DAG is SubDAG. */ - is_subdag?: boolean; /** * Format: date-time * @description The last time the DAG was parsed. @@ -2090,10 +2086,6 @@ export interface components { * @default false */ only_running?: boolean; - /** @description Clear tasks in subdags and clear external tasks indicated by ExternalTaskMarker. */ - include_subdags?: boolean; - /** @description Clear tasks in the parent dag of the subdag. */ - include_parentdag?: boolean; /** @description Set state of DAG runs to RUNNING. */ reset_dag_runs?: boolean; /** @description The DagRun ID for this task instance */ diff --git a/airflow/www/templates/airflow/dag.html b/airflow/www/templates/airflow/dag.html index d3a7995440c05..973ca812e3fb2 100644 --- a/airflow/www/templates/airflow/dag.html +++ b/airflow/www/templates/airflow/dag.html @@ -110,35 +110,25 @@ {% endblock %} {% block content %} - {% if dag.parent_dag is defined and dag.parent_dag %} - - - DAG: {{ dag.parent_dag.dag_display_name }} - {% endif %} -

- {% if dag.parent_dag is defined and dag.parent_dag %} - SUBDAG: {{ dag.dag_id }} + {% if can_edit_dag %} + {% set switch_tooltip = 'Pause/Unpause DAG' %} {% else %} - {% if can_edit_dag %} - {% set switch_tooltip = 'Pause/Unpause DAG' %} - {% else %} - {% set switch_tooltip = 'DAG is Paused' if dag_is_paused else 'DAG is Active' %} - {% endif %} - - DAG: {{ dag.dag_display_name }} - {{ dag.description[0:150] + '…' if dag.description and dag.description|length > 150 else dag.description|default('', true) }} - {% if dag_model is defined and dag_model.max_consecutive_failed_dag_runs is defined and dag_model.max_consecutive_failed_dag_runs > 0 %} - - {% endif %} + {% set switch_tooltip = 'DAG is Paused' if dag_is_paused else 'DAG is Active' %} + {% endif %} + + DAG: {{ dag.dag_display_name }} + {{ dag.description[0:150] + '…' if dag.description and dag.description|length > 150 else dag.description|default('', true) }} + {% if dag_model is defined and dag_model.max_consecutive_failed_dag_runs is defined and dag_model.max_consecutive_failed_dag_runs > 0 %} + {% endif %}

diff --git a/airflow/www/views.py b/airflow/www/views.py index a485f84ed4b1c..d7b670303c86f 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -844,7 +844,7 @@ def index(self): with create_session() as session: # read orm_dags from the db - dags_query = select(DagModel).where(~DagModel.is_subdag, DagModel.is_active) + dags_query = select(DagModel).where(DagModel.is_active) if arg_search_query: escaped_arg_search_query = arg_search_query.replace("_", r"\_") @@ -2341,8 +2341,6 @@ def _clear_dag_tis( start_date=start_date, end_date=end_date, task_ids=task_ids, - include_subdags=recursive, - include_parentdag=recursive, only_failed=only_failed, session=session, ) @@ -2355,8 +2353,6 @@ def _clear_dag_tis( start_date=start_date, end_date=end_date, task_ids=task_ids, - include_subdags=recursive, - include_parentdag=recursive, only_failed=only_failed, dry_run=True, session=session, @@ -5488,8 +5484,6 @@ def _clear_task_instances( start_date=dag_run.execution_date, end_date=dag_run.execution_date, task_ids=downstream_task_ids_to_clear, - include_subdags=False, - include_parentdag=False, session=session, dry_run=True, ) @@ -5638,7 +5632,6 @@ def autocomplete(self, session: Session = NEW_SESSION): DagModel.dag_id.label("name"), DagModel._dag_display_property_value.label("dag_display_name"), ).where( - ~DagModel.is_subdag, DagModel.is_active, or_( DagModel.dag_id.ilike(f"%{query}%"), @@ -5653,7 +5646,7 @@ def autocomplete(self, session: Session = NEW_SESSION): sqla.literal(None).label("dag_display_name"), ) .distinct() - .where(~DagModel.is_subdag, DagModel.is_active, DagModel.owners.ilike(f"%{query}%")) + .where(DagModel.is_active, DagModel.owners.ilike(f"%{query}%")) ) # Hide DAGs if not showing status: "all" diff --git a/docs/apache-airflow/administration-and-deployment/pools.rst b/docs/apache-airflow/administration-and-deployment/pools.rst index 5a906d3f6fed2..56a830c33caa5 100644 --- a/docs/apache-airflow/administration-and-deployment/pools.rst +++ b/docs/apache-airflow/administration-and-deployment/pools.rst @@ -83,8 +83,3 @@ for the heavy task to complete before they are executed. Here, in terms of resou This implementation can prevent overwhelming system resources, which (in this example) could occur when a heavy and a light task are running concurrently. On the other hand, both light tasks can run concurrently since they only occupy one pool slot each, while the heavy task would have to wait for two pool slots to become available before getting executed. - -.. warning:: - - Pools and SubDAGs do not interact as you might first expect. SubDAGs will *not* honor any pool you set on them at - the top level; pools must be set on the tasks *inside* the SubDAG directly. diff --git a/docs/apache-airflow/core-concepts/dags.rst b/docs/apache-airflow/core-concepts/dags.rst index 482b604f33ed9..acc7b0ff16f48 100644 --- a/docs/apache-airflow/core-concepts/dags.rst +++ b/docs/apache-airflow/core-concepts/dags.rst @@ -543,7 +543,7 @@ TaskGroups A TaskGroup can be used to organize tasks into hierarchical groups in Graph view. It is useful for creating repeating patterns and cutting down visual clutter. -Unlike :ref:`concepts:subdags`, TaskGroups are purely a UI grouping concept. Tasks in TaskGroups live on the same original DAG, and honor all the DAG settings and pool configurations. +Tasks in TaskGroups live on the same original DAG, and honor all the DAG settings and pool configurations. .. image:: /img/task_group.gif @@ -680,96 +680,6 @@ This is especially useful if your tasks are built dynamically from configuration """ -.. _concepts:subdags: - -SubDAGs -------- - -.. note:: - - SubDAG is deprecated hence TaskGroup is always the preferred choice. - - -Sometimes, you will find that you are regularly adding exactly the same set of tasks to every DAG, or you want to group a lot of tasks into a single, logical unit. This is what SubDAGs are for. - -For example, here's a DAG that has a lot of parallel tasks in two sections: - -.. image:: /img/subdag_before.png - -We can combine all of the parallel ``task-*`` operators into a single SubDAG, so that the resulting DAG resembles the following: - -.. image:: /img/subdag_after.png - -Note that SubDAG operators should contain a factory method that returns a DAG object. This will prevent the SubDAG from being treated like a separate DAG in the main UI - remember, if Airflow sees a DAG at the top level of a Python file, it will :ref:`load it as its own DAG `. For example: - -.. exampleinclude:: /../../airflow/example_dags/subdags/subdag.py - :language: python - :start-after: [START subdag] - :end-before: [END subdag] - -This SubDAG can then be referenced in your main DAG file: - -.. exampleinclude:: /../../airflow/example_dags/example_subdag_operator.py - :language: python - :dedent: 4 - :start-after: [START example_subdag_operator] - :end-before: [END example_subdag_operator] - -You can zoom into a :class:`~airflow.operators.subdag.SubDagOperator` from the graph view of the main DAG to show the tasks contained within the SubDAG: - -.. image:: /img/subdag_zoom.png - -Some other tips when using SubDAGs: - -- By convention, a SubDAG's ``dag_id`` should be prefixed by the name of its parent DAG and a dot (``parent.child``) -- You should share arguments between the main DAG and the SubDAG by passing arguments to the SubDAG operator (as demonstrated above) -- SubDAGs must have a schedule and be enabled. If the SubDAG's schedule is set to ``None`` or ``@once``, the SubDAG will succeed without having done anything. -- Clearing a :class:`~airflow.operators.subdag.SubDagOperator` also clears the state of the tasks within it. -- Marking success on a :class:`~airflow.operators.subdag.SubDagOperator` does not affect the state of the tasks within it. -- Refrain from using :ref:`concepts:depends-on-past` in tasks within the SubDAG as this can be confusing. -- You can specify an executor for the SubDAG. It is common to use the SequentialExecutor if you want to run the SubDAG in-process and effectively limit its parallelism to one. Using LocalExecutor can be problematic as it may over-subscribe your worker, running multiple tasks in a single slot. - -See ``airflow/example_dags`` for a demonstration. - - -.. note:: - - Parallelism is *not honored* by :class:`~airflow.operators.subdag.SubDagOperator`, and so resources could be consumed by SubdagOperators beyond any limits you may have set. - - - -TaskGroups vs SubDAGs ----------------------- - -SubDAGs, while serving a similar purpose as TaskGroups, introduces both performance and functional issues due to its implementation. - -* The SubDagOperator starts a BackfillJob, which ignores existing parallelism configurations potentially oversubscribing the worker environment. -* SubDAGs have their own DAG attributes. When the SubDAG DAG attributes are inconsistent with its parent DAG, unexpected behavior can occur. -* Unable to see the "full" DAG in one view as SubDAGs exists as a full fledged DAG. -* SubDAGs introduces all sorts of edge cases and caveats. This can disrupt user experience and expectation. - -TaskGroups, on the other hand, is a better option given that it is purely a UI grouping concept. All tasks within the TaskGroup still behave as any other tasks outside of the TaskGroup. - -You can see the core differences between these two constructs. - -+--------------------------------------------------------+--------------------------------------------------------+ -| TaskGroup | SubDAG | -+========================================================+========================================================+ -| Repeating patterns as part of the same DAG | Repeating patterns as a separate DAG | -+--------------------------------------------------------+--------------------------------------------------------+ -| One set of views and statistics for the DAG | Separate set of views and statistics between parent | -| | and child DAGs | -+--------------------------------------------------------+--------------------------------------------------------+ -| One set of DAG configuration | Several sets of DAG configurations | -+--------------------------------------------------------+--------------------------------------------------------+ -| Honors parallelism configurations through existing | Does not honor parallelism configurations due to | -| SchedulerJob | newly spawned BackfillJob | -+--------------------------------------------------------+--------------------------------------------------------+ -| Simple construct declaration with context manager | Complex DAG factory with naming restrictions | -+--------------------------------------------------------+--------------------------------------------------------+ - - - Packaging DAGs -------------- diff --git a/docs/apache-airflow/core-concepts/overview.rst b/docs/apache-airflow/core-concepts/overview.rst index 767b7e8990fc4..e10cf9d5785d8 100644 --- a/docs/apache-airflow/core-concepts/overview.rst +++ b/docs/apache-airflow/core-concepts/overview.rst @@ -232,7 +232,7 @@ To pass data between tasks you have three options: Airflow sends out Tasks to run on Workers as space becomes available, so there's no guarantee all the tasks in your DAG will run on the same worker or the same machine. -As you build out your DAGs, they are likely to get very complex, so Airflow provides several mechanisms for making this more sustainable - :ref:`SubDAGs ` let you make "reusable" DAGs you can embed into other ones, and :ref:`concepts:taskgroups` let you visually group tasks in the UI. +As you build out your DAGs, they are likely to get very complex, so Airflow provides several mechanisms for making this more sustainable, example :ref:`concepts:taskgroups` let you visually group tasks in the UI. There are also features for letting you easily pre-configure access to a central resource, like a datastore, in the form of :doc:`../authoring-and-scheduling/connections`, and for limiting concurrency, via :doc:`../administration-and-deployment/pools`. diff --git a/docs/apache-airflow/img/airflow_erd.sha256 b/docs/apache-airflow/img/airflow_erd.sha256 index 4e832da49f0c4..e7e8667275258 100644 --- a/docs/apache-airflow/img/airflow_erd.sha256 +++ b/docs/apache-airflow/img/airflow_erd.sha256 @@ -1 +1 @@ -bbe537329f9e97dcb4a395e3f3e5d9df4ccd51b657aaa714ce27b2b80f9ca79a \ No newline at end of file +37de7143e49532e7650ba09d0172788c23e132b57123a5f6e863e7234cf4f79e \ No newline at end of file diff --git a/docs/apache-airflow/img/airflow_erd.svg b/docs/apache-airflow/img/airflow_erd.svg index a7d8e961dca79..9f253a53db27f 100644 --- a/docs/apache-airflow/img/airflow_erd.svg +++ b/docs/apache-airflow/img/airflow_erd.svg @@ -4,11 +4,11 @@ - - + + %3 - + log @@ -583,573 +583,565 @@ dataset_alias - -dataset_alias - -id - - [INTEGER] - NOT NULL - -name - - [VARCHAR(3000)] - NOT NULL + +dataset_alias + +id + + [INTEGER] + NOT NULL + +name + + [VARCHAR(3000)] + NOT NULL dataset_alias_dataset - -dataset_alias_dataset - -alias_id - - [INTEGER] - NOT NULL - -dataset_id - - [INTEGER] - NOT NULL + +dataset_alias_dataset + +alias_id + + [INTEGER] + NOT NULL + +dataset_id + + [INTEGER] + NOT NULL dataset_alias--dataset_alias_dataset - -0..N -1 + +0..N +1 dataset_alias--dataset_alias_dataset - -0..N -1 + +0..N +1 dataset_alias_dataset_event - -dataset_alias_dataset_event - -alias_id - - [INTEGER] - NOT NULL - -event_id - - [INTEGER] - NOT NULL + +dataset_alias_dataset_event + +alias_id + + [INTEGER] + NOT NULL + +event_id + + [INTEGER] + NOT NULL dataset_alias--dataset_alias_dataset_event - -0..N -1 + +0..N +1 dataset_alias--dataset_alias_dataset_event - -0..N -1 + +0..N +1 dag_schedule_dataset_alias_reference - -dag_schedule_dataset_alias_reference - -alias_id - - [INTEGER] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL + +dag_schedule_dataset_alias_reference + +alias_id + + [INTEGER] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL dataset_alias--dag_schedule_dataset_alias_reference - -0..N -1 + +0..N +1 dataset - -dataset - -id - - [INTEGER] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -extra - - [JSON] - NOT NULL - -is_orphaned - - [BOOLEAN] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL - -uri - - [VARCHAR(3000)] - NOT NULL + +dataset + +id + + [INTEGER] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +extra + + [JSON] + NOT NULL + +is_orphaned + + [BOOLEAN] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL + +uri + + [VARCHAR(3000)] + NOT NULL dataset--dataset_alias_dataset - -0..N -1 + +0..N +1 dataset--dataset_alias_dataset - -0..N -1 + +0..N +1 dag_schedule_dataset_reference - -dag_schedule_dataset_reference - -dag_id - - [VARCHAR(250)] - NOT NULL - -dataset_id - - [INTEGER] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL + +dag_schedule_dataset_reference + +dag_id + + [VARCHAR(250)] + NOT NULL + +dataset_id + + [INTEGER] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL dataset--dag_schedule_dataset_reference - -0..N -1 + +0..N +1 task_outlet_dataset_reference - -task_outlet_dataset_reference - -dag_id - - [VARCHAR(250)] - NOT NULL - -dataset_id - - [INTEGER] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL + +task_outlet_dataset_reference + +dag_id + + [VARCHAR(250)] + NOT NULL + +dataset_id + + [INTEGER] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL dataset--task_outlet_dataset_reference - -0..N -1 + +0..N +1 dataset_dag_run_queue - -dataset_dag_run_queue - -dataset_id - - [INTEGER] - NOT NULL - -target_dag_id - - [VARCHAR(250)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL + +dataset_dag_run_queue + +dataset_id + + [INTEGER] + NOT NULL + +target_dag_id + + [VARCHAR(250)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL dataset--dataset_dag_run_queue - -0..N -1 + +0..N +1 dataset_event - -dataset_event - -id - - [INTEGER] - NOT NULL - -dataset_id - - [INTEGER] - NOT NULL - -extra - - [JSON] - NOT NULL - -source_dag_id - - [VARCHAR(250)] - -source_map_index - - [INTEGER] - -source_run_id - - [VARCHAR(250)] - -source_task_id - - [VARCHAR(250)] - -timestamp - - [TIMESTAMP] - NOT NULL + +dataset_event + +id + + [INTEGER] + NOT NULL + +dataset_id + + [INTEGER] + NOT NULL + +extra + + [JSON] + NOT NULL + +source_dag_id + + [VARCHAR(250)] + +source_map_index + + [INTEGER] + +source_run_id + + [VARCHAR(250)] + +source_task_id + + [VARCHAR(250)] + +timestamp + + [TIMESTAMP] + NOT NULL dataset_event--dataset_alias_dataset_event - -0..N -1 + +0..N +1 dataset_event--dataset_alias_dataset_event - -0..N -1 + +0..N +1 dagrun_dataset_event - -dagrun_dataset_event - -dag_run_id - - [INTEGER] - NOT NULL - -event_id - - [INTEGER] - NOT NULL + +dagrun_dataset_event + +dag_run_id + + [INTEGER] + NOT NULL + +event_id + + [INTEGER] + NOT NULL dataset_event--dagrun_dataset_event - -0..N -1 + +0..N +1 dag - -dag - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_display_name - - [VARCHAR(2000)] - -dataset_expression - - [JSON] - -default_view - - [VARCHAR(25)] - -description - - [TEXT] - -fileloc - - [VARCHAR(2000)] - -has_import_errors - - [BOOLEAN] - -has_task_concurrency_limits - - [BOOLEAN] - NOT NULL - -is_active - - [BOOLEAN] - -is_paused - - [BOOLEAN] - -is_subdag - - [BOOLEAN] - -last_expired - - [TIMESTAMP] - -last_parsed_time - - [TIMESTAMP] - -last_pickled - - [TIMESTAMP] - -max_active_runs - - [INTEGER] - -max_active_tasks - - [INTEGER] - NOT NULL - -max_consecutive_failed_dag_runs - - [INTEGER] - NOT NULL - -next_dagrun - - [TIMESTAMP] - -next_dagrun_create_after - - [TIMESTAMP] - -next_dagrun_data_interval_end - - [TIMESTAMP] - -next_dagrun_data_interval_start - - [TIMESTAMP] - -owners - - [VARCHAR(2000)] - -pickle_id - - [INTEGER] - -processor_subdir - - [VARCHAR(2000)] - -root_dag_id - - [VARCHAR(250)] - -schedule_interval - - [TEXT] - -scheduler_lock - - [BOOLEAN] - -timetable_description - - [VARCHAR(1000)] + +dag + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_display_name + + [VARCHAR(2000)] + +dataset_expression + + [JSON] + +default_view + + [VARCHAR(25)] + +description + + [TEXT] + +fileloc + + [VARCHAR(2000)] + +has_import_errors + + [BOOLEAN] + +has_task_concurrency_limits + + [BOOLEAN] + NOT NULL + +is_active + + [BOOLEAN] + +is_paused + + [BOOLEAN] + +last_expired + + [TIMESTAMP] + +last_parsed_time + + [TIMESTAMP] + +last_pickled + + [TIMESTAMP] + +max_active_runs + + [INTEGER] + +max_active_tasks + + [INTEGER] + NOT NULL + +max_consecutive_failed_dag_runs + + [INTEGER] + NOT NULL + +next_dagrun + + [TIMESTAMP] + +next_dagrun_create_after + + [TIMESTAMP] + +next_dagrun_data_interval_end + + [TIMESTAMP] + +next_dagrun_data_interval_start + + [TIMESTAMP] + +owners + + [VARCHAR(2000)] + +pickle_id + + [INTEGER] + +processor_subdir + + [VARCHAR(2000)] + +schedule_interval + + [TEXT] + +scheduler_lock + + [BOOLEAN] + +timetable_description + + [VARCHAR(1000)] dag--dag_schedule_dataset_alias_reference - -0..N -1 + +0..N +1 dag--dag_schedule_dataset_reference - -0..N -1 + +0..N +1 dag--task_outlet_dataset_reference - -0..N -1 + +0..N +1 dag--dataset_dag_run_queue - -0..N -1 + +0..N +1 dag_tag - -dag_tag - -dag_id - - [VARCHAR(250)] - NOT NULL - -name - - [VARCHAR(100)] - NOT NULL + +dag_tag + +dag_id + + [VARCHAR(250)] + NOT NULL + +name + + [VARCHAR(100)] + NOT NULL dag--dag_tag - -0..N -1 + +0..N +1 dag_owner_attributes - -dag_owner_attributes - -dag_id - - [VARCHAR(250)] - NOT NULL - -owner - - [VARCHAR(500)] - NOT NULL - -link - - [VARCHAR(500)] - NOT NULL + +dag_owner_attributes + +dag_id + + [VARCHAR(250)] + NOT NULL + +owner + + [VARCHAR(500)] + NOT NULL + +link + + [VARCHAR(500)] + NOT NULL dag--dag_owner_attributes - -0..N -1 + +0..N +1 dag_warning - -dag_warning - -dag_id - - [VARCHAR(250)] - NOT NULL - -warning_type - - [VARCHAR(50)] - NOT NULL - -message - - [TEXT] - NOT NULL - -timestamp - - [TIMESTAMP] - NOT NULL + +dag_warning + +dag_id + + [VARCHAR(250)] + NOT NULL + +warning_type + + [VARCHAR(50)] + NOT NULL + +message + + [TEXT] + NOT NULL + +timestamp + + [TIMESTAMP] + NOT NULL dag--dag_warning - -0..N -1 + +0..N +1 @@ -1275,813 +1267,813 @@ dag_run--dagrun_dataset_event - -0..N -1 + +0..N +1 task_instance - -task_instance - -dag_id - - [VARCHAR(250)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -custom_operator_name - - [VARCHAR(1000)] - -duration - - [DOUBLE_PRECISION] - -end_date - - [TIMESTAMP] - -executor - - [VARCHAR(1000)] - -executor_config - - [BYTEA] - -external_executor_id - - [VARCHAR(250)] - -hostname - - [VARCHAR(1000)] - -job_id - - [INTEGER] - -max_tries - - [INTEGER] - -next_kwargs - - [JSON] - -next_method - - [VARCHAR(1000)] - -operator - - [VARCHAR(1000)] - -pid - - [INTEGER] - -pool - - [VARCHAR(256)] - NOT NULL - -pool_slots - - [INTEGER] - NOT NULL - -priority_weight - - [INTEGER] - -queue - - [VARCHAR(256)] - -queued_by_job_id - - [INTEGER] - -queued_dttm - - [TIMESTAMP] - -rendered_map_index - - [VARCHAR(250)] - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(20)] - -task_display_name - - [VARCHAR(2000)] - -trigger_id - - [INTEGER] - -trigger_timeout - - [TIMESTAMP] - -try_number - - [INTEGER] - -unixname - - [VARCHAR(1000)] - -updated_at - - [TIMESTAMP] + +task_instance + +dag_id + + [VARCHAR(250)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +custom_operator_name + + [VARCHAR(1000)] + +duration + + [DOUBLE_PRECISION] + +end_date + + [TIMESTAMP] + +executor + + [VARCHAR(1000)] + +executor_config + + [BYTEA] + +external_executor_id + + [VARCHAR(250)] + +hostname + + [VARCHAR(1000)] + +job_id + + [INTEGER] + +max_tries + + [INTEGER] + +next_kwargs + + [JSON] + +next_method + + [VARCHAR(1000)] + +operator + + [VARCHAR(1000)] + +pid + + [INTEGER] + +pool + + [VARCHAR(256)] + NOT NULL + +pool_slots + + [INTEGER] + NOT NULL + +priority_weight + + [INTEGER] + +queue + + [VARCHAR(256)] + +queued_by_job_id + + [INTEGER] + +queued_dttm + + [TIMESTAMP] + +rendered_map_index + + [VARCHAR(250)] + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(20)] + +task_display_name + + [VARCHAR(2000)] + +trigger_id + + [INTEGER] + +trigger_timeout + + [TIMESTAMP] + +try_number + + [INTEGER] + +unixname + + [VARCHAR(1000)] + +updated_at + + [TIMESTAMP] dag_run--task_instance - -0..N -1 + +0..N +1 dag_run--task_instance - -0..N -1 + +0..N +1 dag_run_note - -dag_run_note - -dag_run_id - - [INTEGER] - NOT NULL - -content - - [VARCHAR(1000)] - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL - -user_id - - [INTEGER] + +dag_run_note + +dag_run_id + + [INTEGER] + NOT NULL + +content + + [VARCHAR(1000)] + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL + +user_id + + [INTEGER] dag_run--dag_run_note - -1 -1 + +1 +1 task_reschedule - -task_reschedule - -id - - [INTEGER] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -duration - - [INTEGER] - NOT NULL - -end_date - - [TIMESTAMP] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -reschedule_date - - [TIMESTAMP] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -try_number - - [INTEGER] - NOT NULL + +task_reschedule + +id + + [INTEGER] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +duration + + [INTEGER] + NOT NULL + +end_date + + [TIMESTAMP] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +reschedule_date + + [TIMESTAMP] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +start_date + + [TIMESTAMP] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +try_number + + [INTEGER] + NOT NULL dag_run--task_reschedule - -0..N -1 + +0..N +1 dag_run--task_reschedule - -0..N -1 + +0..N +1 task_instance--task_reschedule - -0..N -1 + +0..N +1 task_instance--task_reschedule - -0..N -1 + +0..N +1 task_instance--task_reschedule - -0..N -1 + +0..N +1 task_instance--task_reschedule - -0..N -1 + +0..N +1 rendered_task_instance_fields - -rendered_task_instance_fields - -dag_id - - [VARCHAR(250)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -k8s_pod_yaml - - [JSON] - -rendered_fields - - [JSON] - NOT NULL + +rendered_task_instance_fields + +dag_id + + [VARCHAR(250)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +k8s_pod_yaml + + [JSON] + +rendered_fields + + [JSON] + NOT NULL task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 task_fail - -task_fail - -id - - [INTEGER] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -duration - - [INTEGER] - -end_date - - [TIMESTAMP] - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - -task_id - - [VARCHAR(250)] - NOT NULL + +task_fail + +id + + [INTEGER] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +duration + + [INTEGER] + +end_date + + [TIMESTAMP] + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +start_date + + [TIMESTAMP] + +task_id + + [VARCHAR(250)] + NOT NULL task_instance--task_fail - -0..N -1 + +0..N +1 task_instance--task_fail - -0..N -1 + +0..N +1 task_instance--task_fail - -0..N -1 + +0..N +1 task_instance--task_fail - -0..N -1 + +0..N +1 task_map - -task_map - -dag_id - - [VARCHAR(250)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -keys - - [JSON] - -length - - [INTEGER] - NOT NULL + +task_map + +dag_id + + [VARCHAR(250)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +keys + + [JSON] + +length + + [INTEGER] + NOT NULL task_instance--task_map - -0..N -1 + +0..N +1 task_instance--task_map - -0..N -1 + +0..N +1 task_instance--task_map - -0..N -1 + +0..N +1 task_instance--task_map - -0..N -1 + +0..N +1 xcom - -xcom - -dag_run_id - - [INTEGER] - NOT NULL - -key - - [VARCHAR(512)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -timestamp - - [TIMESTAMP] - NOT NULL - -value - - [BYTEA] + +xcom + +dag_run_id + + [INTEGER] + NOT NULL + +key + + [VARCHAR(512)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +timestamp + + [TIMESTAMP] + NOT NULL + +value + + [BYTEA] task_instance--xcom - -0..N -1 + +0..N +1 task_instance--xcom - -0..N -1 + +0..N +1 task_instance--xcom - -0..N -1 + +0..N +1 task_instance--xcom - -0..N -1 + +0..N +1 task_instance_note - -task_instance_note - -dag_id - - [VARCHAR(250)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -content - - [VARCHAR(1000)] - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL - -user_id - - [INTEGER] + +task_instance_note + +dag_id + + [VARCHAR(250)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +content + + [VARCHAR(1000)] + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL + +user_id + + [INTEGER] task_instance--task_instance_note - -0..N -1 + +0..N +1 task_instance--task_instance_note - -0..N -1 + +0..N +1 task_instance--task_instance_note - -0..N -1 + +0..N +1 task_instance--task_instance_note - -0..N -1 + +0..N +1 task_instance_history - -task_instance_history - -id - - [INTEGER] - NOT NULL - -custom_operator_name - - [VARCHAR(1000)] - -dag_id - - [VARCHAR(250)] - NOT NULL - -duration - - [DOUBLE_PRECISION] - -end_date - - [TIMESTAMP] - -executor - - [VARCHAR(1000)] - -executor_config - - [BYTEA] - -external_executor_id - - [VARCHAR(250)] - -hostname - - [VARCHAR(1000)] - -job_id - - [INTEGER] - -map_index - - [INTEGER] - NOT NULL - -max_tries - - [INTEGER] - -next_kwargs - - [JSON] - -next_method - - [VARCHAR(1000)] - -operator - - [VARCHAR(1000)] - -pid - - [INTEGER] - -pool - - [VARCHAR(256)] - NOT NULL - -pool_slots - - [INTEGER] - NOT NULL - -priority_weight - - [INTEGER] - -queue - - [VARCHAR(256)] - -queued_by_job_id - - [INTEGER] - -queued_dttm - - [TIMESTAMP] - -rendered_map_index - - [VARCHAR(250)] - -run_id - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(20)] - -task_display_name - - [VARCHAR(2000)] - -task_id - - [VARCHAR(250)] - NOT NULL - -trigger_id - - [INTEGER] - -trigger_timeout - - [TIMESTAMP] - -try_number - - [INTEGER] - NOT NULL - -unixname - - [VARCHAR(1000)] - -updated_at - - [TIMESTAMP] + +task_instance_history + +id + + [INTEGER] + NOT NULL + +custom_operator_name + + [VARCHAR(1000)] + +dag_id + + [VARCHAR(250)] + NOT NULL + +duration + + [DOUBLE_PRECISION] + +end_date + + [TIMESTAMP] + +executor + + [VARCHAR(1000)] + +executor_config + + [BYTEA] + +external_executor_id + + [VARCHAR(250)] + +hostname + + [VARCHAR(1000)] + +job_id + + [INTEGER] + +map_index + + [INTEGER] + NOT NULL + +max_tries + + [INTEGER] + +next_kwargs + + [JSON] + +next_method + + [VARCHAR(1000)] + +operator + + [VARCHAR(1000)] + +pid + + [INTEGER] + +pool + + [VARCHAR(256)] + NOT NULL + +pool_slots + + [INTEGER] + NOT NULL + +priority_weight + + [INTEGER] + +queue + + [VARCHAR(256)] + +queued_by_job_id + + [INTEGER] + +queued_dttm + + [TIMESTAMP] + +rendered_map_index + + [VARCHAR(250)] + +run_id + + [VARCHAR(250)] + NOT NULL + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(20)] + +task_display_name + + [VARCHAR(2000)] + +task_id + + [VARCHAR(250)] + NOT NULL + +trigger_id + + [INTEGER] + +trigger_timeout + + [TIMESTAMP] + +try_number + + [INTEGER] + NOT NULL + +unixname + + [VARCHAR(1000)] + +updated_at + + [TIMESTAMP] task_instance--task_instance_history - -0..N -1 + +0..N +1 task_instance--task_instance_history - -0..N -1 + +0..N +1 task_instance--task_instance_history - -0..N -1 + +0..N +1 task_instance--task_instance_history - -0..N -1 + +0..N +1 @@ -2207,39 +2199,39 @@ trigger - -trigger - -id - - [INTEGER] - NOT NULL - -classpath - - [VARCHAR(1000)] - NOT NULL - -created_date - - [TIMESTAMP] - NOT NULL - -kwargs - - [TEXT] - NOT NULL - -triggerer_id - - [INTEGER] + +trigger + +id + + [INTEGER] + NOT NULL + +classpath + + [VARCHAR(1000)] + NOT NULL + +created_date + + [TIMESTAMP] + NOT NULL + +kwargs + + [TEXT] + NOT NULL + +triggerer_id + + [INTEGER] trigger--task_instance - -0..N -{0,1} + +0..N +{0,1} diff --git a/docs/apache-airflow/img/subdag_after.png b/docs/apache-airflow/img/subdag_after.png deleted file mode 100644 index 166a6ded314ca..0000000000000 Binary files a/docs/apache-airflow/img/subdag_after.png and /dev/null differ diff --git a/docs/apache-airflow/img/subdag_before.png b/docs/apache-airflow/img/subdag_before.png deleted file mode 100644 index ebc3e589b2221..0000000000000 Binary files a/docs/apache-airflow/img/subdag_before.png and /dev/null differ diff --git a/docs/apache-airflow/img/subdag_zoom.png b/docs/apache-airflow/img/subdag_zoom.png deleted file mode 100644 index e6d4728602181..0000000000000 Binary files a/docs/apache-airflow/img/subdag_zoom.png and /dev/null differ diff --git a/docs/apache-airflow/migrations-ref.rst b/docs/apache-airflow/migrations-ref.rst index 1dbe67bf271f6..37640e2b5ed86 100644 --- a/docs/apache-airflow/migrations-ref.rst +++ b/docs/apache-airflow/migrations-ref.rst @@ -36,13 +36,16 @@ Here's the list of all the Database Migrations that are executed via when you ru .. All table elements are scraped from migration files .. Beginning of auto-generated table -+-------------------------+------------------+-------------------+-------------------------------------------------+ -| Revision ID | Revises ID | Airflow Version | Description | -+=========================+==================+===================+=================================================+ -| ``044f740568ec`` (head) | ``22ed7efa9da2`` | ``3.0.0`` | Drop ab_user.id foreign key. | -+-------------------------+------------------+-------------------+-------------------------------------------------+ -| ``22ed7efa9da2`` (base) | ``None`` | ``2.10.0`` | Add dag_schedule_dataset_alias_reference table. | -+-------------------------+------------------+-------------------+-------------------------------------------------+ ++-------------------------+------------------+-------------------+--------------------------------------------------------------+ +| Revision ID | Revises ID | Airflow Version | Description | ++=========================+==================+===================+==============================================================+ +| ``d0f1c55954fa`` (head) | ``044f740568ec`` | ``3.0.0`` | Remove SubDAGs: ``is_subdag`` & ``root_dag_id`` columns from | +| | | | DAG table. | ++-------------------------+------------------+-------------------+--------------------------------------------------------------+ +| ``044f740568ec`` | ``22ed7efa9da2`` | ``3.0.0`` | Drop ab_user.id foreign key. | ++-------------------------+------------------+-------------------+--------------------------------------------------------------+ +| ``22ed7efa9da2`` (base) | ``None`` | ``2.10.0`` | Add dag_schedule_dataset_alias_reference table. | ++-------------------------+------------------+-------------------+--------------------------------------------------------------+ .. End of auto-generated table diff --git a/docs/apache-airflow/operators-and-hooks-ref.rst b/docs/apache-airflow/operators-and-hooks-ref.rst index 9fdf9d97cc510..6742559a02304 100644 --- a/docs/apache-airflow/operators-and-hooks-ref.rst +++ b/docs/apache-airflow/operators-and-hooks-ref.rst @@ -74,9 +74,6 @@ For details see: :doc:`apache-airflow-providers:operators-and-hooks-ref/index`. * - :mod:`airflow.operators.python` - :doc:`How to use ` - * - :mod:`airflow.operators.subdag` - - - * - :mod:`airflow.operators.trigger_dagrun` - diff --git a/newsfragments/41390.significant.rst b/newsfragments/41390.significant.rst new file mode 100644 index 0000000000000..37ddf0732449e --- /dev/null +++ b/newsfragments/41390.significant.rst @@ -0,0 +1,14 @@ +Support for SubDags is removed + +**Breaking Change** + +Subdags have been removed from the following locations: + +- CLI +- API +- ``SubDagOperator`` + +This removal marks the end of Subdag support across all interfaces. Users +should transition to using TaskGroups as a more efficient and maintainable +alternative. Please ensure your DAGs are updated to +remove any usage of Subdags to maintain compatibility with future Airflow releases. diff --git a/tests/always/test_example_dags.py b/tests/always/test_example_dags.py index 2b5f37631a427..f81bbf82caacd 100644 --- a/tests/always/test_example_dags.py +++ b/tests/always/test_example_dags.py @@ -33,7 +33,6 @@ AIRFLOW_SOURCES_ROOT = Path(__file__).resolve().parents[2] AIRFLOW_PROVIDERS_ROOT = AIRFLOW_SOURCES_ROOT / "airflow" / "providers" CURRENT_PYTHON_VERSION = f"{sys.version_info.major}.{sys.version_info.minor}" -NO_DB_QUERY_EXCEPTION = ("/airflow/example_dags/example_subdag_operator.py",) PROVIDERS_PREFIXES = ("airflow/providers/", "tests/system/providers/") OPTIONAL_PROVIDERS_DEPENDENCIES: dict[str, dict[str, str | None]] = { # Some examples or system tests may depend on additional packages @@ -163,10 +162,6 @@ def example_not_excluded_dags(xfail_db_exception: bool = False): if not result: param_marks.append(pytest.mark.skip(reason=reason)) - if xfail_db_exception and candidate.endswith(NO_DB_QUERY_EXCEPTION): - # Use strict XFAIL for excluded tests. So if it is not failed, we should remove from the list. - param_marks.append(pytest.mark.xfail(reason="Expected DB call", strict=True)) - if candidate.startswith(providers_folders): # Do not raise an error for airflow.exceptions.RemovedInAirflow3Warning. # We should not rush to enforce new syntax updates in providers diff --git a/tests/api_connexion/endpoints/test_dag_endpoint.py b/tests/api_connexion/endpoints/test_dag_endpoint.py index a546e6bec4751..d3192723a7077 100644 --- a/tests/api_connexion/endpoints/test_dag_endpoint.py +++ b/tests/api_connexion/endpoints/test_dag_endpoint.py @@ -195,9 +195,7 @@ def test_should_respond_200(self): "file_token": "Ii90bXAvZGFnXzEucHki.EnmIdPaUPo26lHQClbWMbDFD1Pk", "is_paused": False, "is_active": True, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": {"__type": "CronExpression", "value": "2 2 * * *"}, "tags": [], "next_dagrun": None, @@ -238,9 +236,7 @@ def test_should_respond_200_with_schedule_interval_none(self, session): "file_token": "Ii90bXAvZGFnXzEucHki.EnmIdPaUPo26lHQClbWMbDFD1Pk", "is_paused": False, "is_active": False, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": None, "tags": [], "next_dagrun": None, @@ -371,7 +367,6 @@ def test_should_respond_200(self, url_safe_serializer): "is_active": True, "is_paused": False, "is_paused_upon_creation": None, - "is_subdag": False, "last_expired": None, "last_parsed": last_parsed, "last_parsed_time": None, @@ -395,7 +390,6 @@ def test_should_respond_200(self, url_safe_serializer): }, "pickle_id": None, "render_template_as_native_obj": False, - "root_dag_id": None, "schedule_interval": {"__type": "CronExpression", "value": "2 2 * * *"}, "scheduler_lock": None, "start_date": "2020-06-15T00:00:00+00:00", @@ -437,7 +431,6 @@ def test_should_respond_200_with_dataset_expression(self, url_safe_serializer): "is_active": True, "is_paused": False, "is_paused_upon_creation": None, - "is_subdag": False, "last_expired": None, "last_parsed": last_parsed, "last_parsed_time": None, @@ -461,7 +454,6 @@ def test_should_respond_200_with_dataset_expression(self, url_safe_serializer): }, "pickle_id": None, "render_template_as_native_obj": False, - "root_dag_id": None, "schedule_interval": {"__type": "CronExpression", "value": "2 2 * * *"}, "scheduler_lock": None, "start_date": "2020-06-15T00:00:00+00:00", @@ -498,7 +490,6 @@ def test_should_response_200_with_doc_md_none(self, url_safe_serializer): "is_active": True, "is_paused": False, "is_paused_upon_creation": None, - "is_subdag": False, "last_expired": None, "last_parsed": last_parsed, "last_parsed_time": None, @@ -515,7 +506,6 @@ def test_should_response_200_with_doc_md_none(self, url_safe_serializer): "params": {}, "pickle_id": None, "render_template_as_native_obj": False, - "root_dag_id": None, "schedule_interval": {"__type": "CronExpression", "value": "2 2 * * *"}, "scheduler_lock": None, "start_date": "2020-06-15T00:00:00+00:00", @@ -552,7 +542,6 @@ def test_should_response_200_for_null_start_date(self, url_safe_serializer): "is_active": True, "is_paused": False, "is_paused_upon_creation": None, - "is_subdag": False, "last_expired": None, "last_parsed": last_parsed, "last_parsed_time": None, @@ -569,7 +558,6 @@ def test_should_response_200_for_null_start_date(self, url_safe_serializer): "params": {}, "pickle_id": None, "render_template_as_native_obj": False, - "root_dag_id": None, "schedule_interval": {"__type": "CronExpression", "value": "2 2 * * *"}, "scheduler_lock": None, "start_date": None, @@ -609,7 +597,6 @@ def test_should_respond_200_serialized(self, url_safe_serializer): "is_active": True, "is_paused": False, "is_paused_upon_creation": None, - "is_subdag": False, "last_expired": None, "last_parsed_time": None, "last_pickled": None, @@ -632,7 +619,6 @@ def test_should_respond_200_serialized(self, url_safe_serializer): }, "pickle_id": None, "render_template_as_native_obj": False, - "root_dag_id": None, "schedule_interval": {"__type": "CronExpression", "value": "2 2 * * *"}, "scheduler_lock": None, "start_date": "2020-06-15T00:00:00+00:00", @@ -673,7 +659,6 @@ def test_should_respond_200_serialized(self, url_safe_serializer): "is_active": True, "is_paused": False, "is_paused_upon_creation": None, - "is_subdag": False, "last_expired": None, "last_parsed_time": None, "last_pickled": None, @@ -696,7 +681,6 @@ def test_should_respond_200_serialized(self, url_safe_serializer): }, "pickle_id": None, "render_template_as_native_obj": False, - "root_dag_id": None, "schedule_interval": {"__type": "CronExpression", "value": "2 2 * * *"}, "scheduler_lock": None, "start_date": "2020-06-15T00:00:00+00:00", @@ -773,7 +757,7 @@ def test_should_respond_200(self, session, url_safe_serializer): self._create_dag_models(2) self._create_deactivated_dag() - dags_query = session.query(DagModel).filter(~DagModel.is_subdag) + dags_query = session.query(DagModel) assert len(dags_query.all()) == 3 response = self.client.get("api/v1/dags", environ_overrides={"REMOTE_USER": "test"}) @@ -791,9 +775,7 @@ def test_should_respond_200(self, session, url_safe_serializer): "file_token": file_token, "is_paused": False, "is_active": True, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -824,9 +806,7 @@ def test_should_respond_200(self, session, url_safe_serializer): "file_token": file_token2, "is_paused": False, "is_active": True, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -869,9 +849,7 @@ def test_only_active_true_returns_active_dags(self, url_safe_serializer): "file_token": file_token, "is_paused": False, "is_active": True, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -915,9 +893,7 @@ def test_only_active_false_returns_all_dags(self, url_safe_serializer): "file_token": file_token, "is_paused": False, "is_active": True, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -948,9 +924,7 @@ def test_only_active_false_returns_all_dags(self, url_safe_serializer): "file_token": file_token_2, "is_paused": False, "is_active": False, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -1119,9 +1093,7 @@ def test_paused_true_returns_paused_dags(self, url_safe_serializer): "file_token": file_token, "is_paused": True, "is_active": True, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -1164,9 +1136,7 @@ def test_paused_false_returns_unpaused_dags(self, url_safe_serializer): "file_token": file_token, "is_paused": False, "is_active": True, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -1209,9 +1179,7 @@ def test_paused_none_returns_all_dags(self, url_safe_serializer): "file_token": file_token, "is_paused": True, "is_active": True, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -1242,9 +1210,7 @@ def test_paused_none_returns_all_dags(self, url_safe_serializer): "file_token": file_token, "is_paused": False, "is_active": True, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -1306,7 +1272,7 @@ def test_with_auth_role_public_set(self, set_auto_role_public, expected_status_c self._create_dag_models(2) self._create_deactivated_dag() - dags_query = session.query(DagModel).filter(~DagModel.is_subdag) + dags_query = session.query(DagModel) assert len(dags_query.all()) == 3 response = self.client.get("api/v1/dags") @@ -1333,9 +1299,7 @@ def test_should_respond_200_on_patch_is_paused(self, url_safe_serializer, sessio "file_token": file_token, "is_paused": False, "is_active": False, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -1470,9 +1434,7 @@ def test_should_respond_200_with_update_mask(self, url_safe_serializer): "file_token": file_token, "is_paused": False, "is_active": False, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -1566,7 +1528,7 @@ def test_should_respond_200_on_patch_is_paused(self, session, url_safe_serialize self._create_dag_models(2) self._create_deactivated_dag() - dags_query = session.query(DagModel).filter(~DagModel.is_subdag) + dags_query = session.query(DagModel) assert len(dags_query.all()) == 3 response = self.client.patch( @@ -1588,9 +1550,7 @@ def test_should_respond_200_on_patch_is_paused(self, session, url_safe_serialize "file_token": file_token, "is_paused": False, "is_active": True, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -1621,9 +1581,7 @@ def test_should_respond_200_on_patch_is_paused(self, session, url_safe_serialize "file_token": file_token2, "is_paused": False, "is_active": True, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -1657,7 +1615,7 @@ def test_should_respond_200_on_patch_is_paused_using_update_mask(self, session, self._create_dag_models(2) self._create_deactivated_dag() - dags_query = session.query(DagModel).filter(~DagModel.is_subdag) + dags_query = session.query(DagModel) assert len(dags_query.all()) == 3 response = self.client.patch( @@ -1679,9 +1637,7 @@ def test_should_respond_200_on_patch_is_paused_using_update_mask(self, session, "file_token": file_token, "is_paused": False, "is_active": True, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -1712,9 +1668,7 @@ def test_should_respond_200_on_patch_is_paused_using_update_mask(self, session, "file_token": file_token2, "is_paused": False, "is_active": True, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -1746,7 +1700,7 @@ def test_wrong_value_as_update_mask_rasise(self, session): self._create_dag_models(2) self._create_deactivated_dag() - dags_query = session.query(DagModel).filter(~DagModel.is_subdag) + dags_query = session.query(DagModel) assert len(dags_query.all()) == 3 response = self.client.patch( @@ -1769,7 +1723,7 @@ def test_invalid_request_body_raises_badrequest(self, session): self._create_dag_models(2) self._create_deactivated_dag() - dags_query = session.query(DagModel).filter(~DagModel.is_subdag) + dags_query = session.query(DagModel) assert len(dags_query.all()) == 3 response = self.client.patch( @@ -1810,9 +1764,7 @@ def test_only_active_true_returns_active_dags(self, url_safe_serializer, session "file_token": file_token, "is_paused": False, "is_active": True, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -1864,9 +1816,7 @@ def test_only_active_false_returns_all_dags(self, url_safe_serializer, session): "file_token": file_token, "is_paused": False, "is_active": True, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -1897,9 +1847,7 @@ def test_only_active_false_returns_all_dags(self, url_safe_serializer, session): "file_token": file_token_2, "is_paused": False, "is_active": False, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -2114,9 +2062,7 @@ def test_should_respond_200_and_pause_dags(self, url_safe_serializer): "file_token": file_token, "is_paused": True, "is_active": True, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -2147,9 +2093,7 @@ def test_should_respond_200_and_pause_dags(self, url_safe_serializer): "file_token": file_token2, "is_paused": True, "is_active": True, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -2201,9 +2145,7 @@ def test_should_respond_200_and_pause_dag_pattern(self, session, url_safe_serial "file_token": file_token, "is_paused": True, "is_active": True, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -2234,9 +2176,7 @@ def test_should_respond_200_and_pause_dag_pattern(self, session, url_safe_serial "file_token": file_token10, "is_paused": True, "is_active": True, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -2290,9 +2230,7 @@ def test_should_respond_200_and_reverse_ordering(self, session, url_safe_seriali "file_token": file_token10, "is_paused": False, "is_active": True, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -2323,9 +2261,7 @@ def test_should_respond_200_and_reverse_ordering(self, session, url_safe_seriali "file_token": file_token, "is_paused": False, "is_active": True, - "is_subdag": False, "owners": [], - "root_dag_id": None, "schedule_interval": { "__type": "CronExpression", "value": "2 2 * * *", @@ -2376,7 +2312,7 @@ def test_with_auth_role_public_set( self._create_dag_models(2) self._create_deactivated_dag() - dags_query = session.query(DagModel).filter(~DagModel.is_subdag) + dags_query = session.query(DagModel) assert len(dags_query.all()) == 3 response = self.client.patch( diff --git a/tests/api_connexion/endpoints/test_dag_run_endpoint.py b/tests/api_connexion/endpoints/test_dag_run_endpoint.py index dc77648784ce5..d1e965aa0b1d2 100644 --- a/tests/api_connexion/endpoints/test_dag_run_endpoint.py +++ b/tests/api_connexion/endpoints/test_dag_run_endpoint.py @@ -147,7 +147,7 @@ def _create_dag(self, dag_id): with create_session() as session: session.add(dag_instance) dag = DAG(dag_id=dag_id, schedule=None, params={"validated_number": Param(1, minimum=1, maximum=10)}) - self.app.dag_bag.bag_dag(dag, root_dag=dag) + self.app.dag_bag.bag_dag(dag) return dag_instance def _create_test_dag_run(self, state=DagRunState.RUNNING, extra_dag=False, commit=True, idx_start=1): @@ -1690,7 +1690,7 @@ def test_should_respond_200(self, state, run_type, dag_maker, session): dag_run_id = "TEST_DAG_RUN_ID" with dag_maker(dag_id) as dag: task = EmptyOperator(task_id="task_id", dag=dag) - self.app.dag_bag.bag_dag(dag, root_dag=dag) + self.app.dag_bag.bag_dag(dag) dr = dag_maker.create_dagrun(run_id=dag_run_id, run_type=run_type) ti = dr.get_task_instance(task_id="task_id") ti.task = task @@ -1734,7 +1734,7 @@ def test_schema_validation_error_raises(self, dag_maker, session): dag_run_id = "TEST_DAG_RUN_ID" with dag_maker(dag_id) as dag: EmptyOperator(task_id="task_id", dag=dag) - self.app.dag_bag.bag_dag(dag, root_dag=dag) + self.app.dag_bag.bag_dag(dag) dag_maker.create_dagrun(run_id=dag_run_id) response = self.client.patch( @@ -1814,7 +1814,7 @@ def test_with_auth_role_public_set(self, set_auto_role_public, expected_status_c dag_run_id = "TEST_DAG_RUN_ID" with dag_maker(dag_id) as dag: task = EmptyOperator(task_id="task_id", dag=dag) - self.app.dag_bag.bag_dag(dag, root_dag=dag) + self.app.dag_bag.bag_dag(dag) dr = dag_maker.create_dagrun(run_id=dag_run_id, run_type=DagRunType.SCHEDULED) ti = dr.get_task_instance(task_id="task_id") ti.task = task @@ -1836,7 +1836,7 @@ def test_should_respond_200(self, dag_maker, session): dag_run_id = "TEST_DAG_RUN_ID" with dag_maker(dag_id) as dag: task = EmptyOperator(task_id="task_id", dag=dag) - self.app.dag_bag.bag_dag(dag, root_dag=dag) + self.app.dag_bag.bag_dag(dag) dr = dag_maker.create_dagrun(run_id=dag_run_id, state=DagRunState.FAILED) ti = dr.get_task_instance(task_id="task_id") ti.task = task @@ -1879,7 +1879,7 @@ def test_schema_validation_error_raises_for_invalid_fields(self, dag_maker, sess dag_run_id = "TEST_DAG_RUN_ID" with dag_maker(dag_id) as dag: EmptyOperator(task_id="task_id", dag=dag) - self.app.dag_bag.bag_dag(dag, root_dag=dag) + self.app.dag_bag.bag_dag(dag) dag_maker.create_dagrun(run_id=dag_run_id, state=DagRunState.FAILED) response = self.client.post( f"api/v1/dags/{dag_id}/dagRuns/{dag_run_id}/clear", @@ -1900,7 +1900,7 @@ def test_dry_run(self, dag_maker, session): dag_run_id = "TEST_DAG_RUN_ID" with dag_maker(dag_id) as dag: task = EmptyOperator(task_id="task_id", dag=dag) - self.app.dag_bag.bag_dag(dag, root_dag=dag) + self.app.dag_bag.bag_dag(dag) dr = dag_maker.create_dagrun(run_id=dag_run_id) ti = dr.get_task_instance(task_id="task_id") ti.task = task @@ -1974,7 +1974,7 @@ def test_with_auth_role_public_set(self, set_auto_role_public, expected_status_c dag_run_id = "TEST_DAG_RUN_ID" with dag_maker(dag_id) as dag: task = EmptyOperator(task_id="task_id", dag=dag) - self.app.dag_bag.bag_dag(dag, root_dag=dag) + self.app.dag_bag.bag_dag(dag) dr = dag_maker.create_dagrun(run_id=dag_run_id, run_type=DagRunType.SCHEDULED) ti = dr.get_task_instance(task_id="task_id") ti.task = task diff --git a/tests/api_connexion/endpoints/test_dag_stats_endpoint.py b/tests/api_connexion/endpoints/test_dag_stats_endpoint.py index 2e36701bd8014..36fc54d3a5b17 100644 --- a/tests/api_connexion/endpoints/test_dag_stats_endpoint.py +++ b/tests/api_connexion/endpoints/test_dag_stats_endpoint.py @@ -77,7 +77,7 @@ def _create_dag(self, dag_id): with create_session() as session: session.add(dag_instance) dag = DAG(dag_id=dag_id, schedule=None) - self.app.dag_bag.bag_dag(dag, root_dag=dag) + self.app.dag_bag.bag_dag(dag) return dag_instance def test_should_respond_200(self, session): diff --git a/tests/api_connexion/endpoints/test_log_endpoint.py b/tests/api_connexion/endpoints/test_log_endpoint.py index 19390d7f46d7e..7c4fb613e4843 100644 --- a/tests/api_connexion/endpoints/test_log_endpoint.py +++ b/tests/api_connexion/endpoints/test_log_endpoint.py @@ -92,7 +92,7 @@ def add_one(x: int): start_date=timezone.parse(self.default_time), ) - configured_app.dag_bag.bag_dag(dag, root_dag=dag) + configured_app.dag_bag.bag_dag(dag) # Add dummy dag for checking picking correct log with same task_id and different dag_id case. with dag_maker( @@ -105,7 +105,7 @@ def add_one(x: int): execution_date=timezone.parse(self.default_time), start_date=timezone.parse(self.default_time), ) - configured_app.dag_bag.bag_dag(dummy_dag, root_dag=dummy_dag) + configured_app.dag_bag.bag_dag(dummy_dag) for ti in dr.task_instances: ti.try_number = 1 @@ -286,7 +286,7 @@ def test_get_logs_of_removed_task(self, request_url, expected_filename, extra_qu dagbag = self.app.dag_bag dag = DAG(self.DAG_ID, start_date=timezone.parse(self.default_time)) del dagbag.dags[self.DAG_ID] - dagbag.bag_dag(dag=dag, root_dag=dag) + dagbag.bag_dag(dag=dag) key = self.app.config["SECRET_KEY"] serializer = URLSafeSerializer(key) diff --git a/tests/api_connexion/endpoints/test_task_instance_endpoint.py b/tests/api_connexion/endpoints/test_task_instance_endpoint.py index 6d04cbf3989e1..4fcd66affea71 100644 --- a/tests/api_connexion/endpoints/test_task_instance_endpoint.py +++ b/tests/api_connexion/endpoints/test_task_instance_endpoint.py @@ -1219,57 +1219,6 @@ class TestPostClearTaskInstances(TestTaskInstanceEndpoint): 2, id="clear by task ids", ), - pytest.param( - "example_subdag_operator", - [ - {"execution_date": DEFAULT_DATETIME_1, "state": State.FAILED}, - { - "execution_date": DEFAULT_DATETIME_1 + dt.timedelta(days=1), - "state": State.FAILED, - }, - { - "execution_date": DEFAULT_DATETIME_1 + dt.timedelta(days=2), - "state": State.FAILED, - }, - { - "execution_date": DEFAULT_DATETIME_1 + dt.timedelta(days=3), - "state": State.FAILED, - }, - { - "execution_date": DEFAULT_DATETIME_1 + dt.timedelta(days=4), - "state": State.FAILED, - }, - ], - "example_subdag_operator.section-1", - {"dry_run": True, "include_parentdag": True}, - 4, - id="include parent dag", - ), - pytest.param( - "example_subdag_operator.section-1", - [ - {"execution_date": DEFAULT_DATETIME_1, "state": State.FAILED}, - { - "execution_date": DEFAULT_DATETIME_1 + dt.timedelta(days=1), - "state": State.FAILED, - }, - { - "execution_date": DEFAULT_DATETIME_1 + dt.timedelta(days=2), - "state": State.FAILED, - }, - { - "execution_date": DEFAULT_DATETIME_1 + dt.timedelta(days=3), - "state": State.FAILED, - }, - ], - "example_subdag_operator", - { - "dry_run": True, - "include_subdags": True, - }, - 4, - id="include sub dag", - ), pytest.param( "example_python_operator", [ @@ -1321,7 +1270,7 @@ def test_clear_taskinstance_is_called_with_queued_dr_state(self, mock_clearti, s """Test that if reset_dag_runs is True, then clear_task_instances is called with State.QUEUED""" self.create_task_instances(session) dag_id = "example_python_operator" - payload = {"include_subdags": True, "reset_dag_runs": True, "dry_run": False} + payload = {"reset_dag_runs": True, "dry_run": False} self.app.dag_bag.sync_to_db() response = self.client.post( f"/api/v1/dags/{dag_id}/clearTaskInstances", @@ -1362,7 +1311,6 @@ def test_should_respond_200_with_reset_dag_run(self, session): "reset_dag_runs": True, "only_failed": False, "only_running": True, - "include_subdags": True, } task_instances = [ {"execution_date": DEFAULT_DATETIME_1, "state": State.RUNNING}, @@ -1454,7 +1402,6 @@ def test_should_respond_200_with_dag_run_id(self, session): "reset_dag_runs": False, "only_failed": False, "only_running": True, - "include_subdags": True, "dag_run_id": "TEST_DAG_RUN_ID_0", } task_instances = [ @@ -1514,7 +1461,6 @@ def test_should_respond_200_with_include_past(self, session): "only_failed": False, "include_past": True, "only_running": True, - "include_subdags": True, } task_instances = [ {"execution_date": DEFAULT_DATETIME_1, "state": State.RUNNING}, @@ -1693,7 +1639,6 @@ def test_should_respond_404_for_nonexistent_dagrun_id(self, session): "reset_dag_runs": False, "only_failed": False, "only_running": True, - "include_subdags": True, "dag_run_id": "TEST_DAG_RUN_ID_100", } task_instances = [ @@ -1732,7 +1677,6 @@ def test_should_raises_401_unauthenticated(self): "reset_dag_runs": True, "only_failed": False, "only_running": True, - "include_subdags": True, }, ) assert_401(response) @@ -1747,7 +1691,6 @@ def test_should_raise_403_forbidden(self, username: str): "reset_dag_runs": True, "only_failed": False, "only_running": True, - "include_subdags": True, }, ) assert response.status_code == 403 @@ -1794,7 +1737,6 @@ def test_raises_404_for_non_existent_dag(self): "reset_dag_runs": True, "only_failed": False, "only_running": True, - "include_subdags": True, }, ) assert response.status_code == 404 diff --git a/tests/api_connexion/schemas/test_dag_schema.py b/tests/api_connexion/schemas/test_dag_schema.py index 1e91972d1fa65..ae3a87db151e8 100644 --- a/tests/api_connexion/schemas/test_dag_schema.py +++ b/tests/api_connexion/schemas/test_dag_schema.py @@ -37,10 +37,8 @@ def test_serialize_test_dag_schema(url_safe_serializer): dag_model = DagModel( dag_id="test_dag_id", - root_dag_id="test_root_dag_id", is_paused=True, is_active=True, - is_subdag=False, fileloc="/root/airflow/dags/my_dag.py", owners="airflow1,airflow2", description="The description", @@ -57,9 +55,7 @@ def test_serialize_test_dag_schema(url_safe_serializer): "file_token": url_safe_serializer.dumps("/root/airflow/dags/my_dag.py"), "is_paused": True, "is_active": True, - "is_subdag": False, "owners": ["airflow1", "airflow2"], - "root_dag_id": "test_root_dag_id", "schedule_interval": {"__type": "CronExpression", "value": "5 4 * * *"}, "tags": [{"name": "tag-1"}, {"name": "tag-2"}], "next_dagrun": None, @@ -95,10 +91,8 @@ def test_serialize_test_dag_collection_schema(url_safe_serializer): "fileloc": "/tmp/a.py", "file_token": url_safe_serializer.dumps("/tmp/a.py"), "is_paused": None, - "is_subdag": None, "is_active": None, "owners": [], - "root_dag_id": None, "schedule_interval": None, "tags": [], "next_dagrun": None, @@ -126,9 +120,7 @@ def test_serialize_test_dag_collection_schema(url_safe_serializer): "file_token": url_safe_serializer.dumps("/tmp/a.py"), "is_active": None, "is_paused": None, - "is_subdag": None, "owners": [], - "root_dag_id": None, "schedule_interval": None, "tags": [], "next_dagrun": None, @@ -181,7 +173,6 @@ def test_serialize_test_dag_detail_schema(url_safe_serializer): "file_token": url_safe_serializer.dumps(__file__), "is_active": None, "is_paused": None, - "is_subdag": False, "orientation": "LR", "owners": [], "params": { @@ -240,7 +231,6 @@ def test_serialize_test_dag_with_dataset_schedule_detail_schema(url_safe_seriali "file_token": url_safe_serializer.dumps(__file__), "is_active": None, "is_paused": None, - "is_subdag": False, "orientation": "LR", "owners": [], "params": { diff --git a/tests/api_experimental/common/test_delete_dag.py b/tests/api_experimental/common/test_delete_dag.py index 693a534bc6dea..e7c249ee02ac5 100644 --- a/tests/api_experimental/common/test_delete_dag.py +++ b/tests/api_experimental/common/test_delete_dag.py @@ -67,10 +67,7 @@ class TestDeleteDAGSuccessfulDelete: dag_file_path = "/usr/local/airflow/dags/test_dag_8.py" key = "test_dag_id" - def setup_dag_models(self, for_sub_dag=False): - if for_sub_dag: - self.key = "test_dag_id.test_subdag" - + def setup_dag_models(self): task = EmptyOperator( task_id="dummy", dag=DAG(dag_id=self.key, default_args={"start_date": timezone.datetime(2022, 1, 1)}), @@ -79,7 +76,7 @@ def setup_dag_models(self, for_sub_dag=False): test_date = timezone.datetime(2022, 1, 1) with create_session() as session: - session.add(DagModel(dag_id=self.key, fileloc=self.dag_file_path, is_subdag=for_sub_dag)) + session.add(DagModel(dag_id=self.key, fileloc=self.dag_file_path)) dr = DR(dag_id=self.key, run_type=DagRunType.MANUAL, run_id="test", execution_date=test_date) ti = TI(task=task, state=State.SUCCESS) ti.dag_run = dr @@ -158,21 +155,16 @@ def test_delete_dag_successful_delete_not_keeping_records_in_log(self): delete_dag(dag_id=self.key, keep_records_in_log=False) self.check_dag_models_removed(expect_logs=0) - def test_delete_subdag_successful_delete(self): - self.setup_dag_models(for_sub_dag=True) - self.check_dag_models_exists() - delete_dag(dag_id=self.key, keep_records_in_log=False) - self.check_dag_models_removed(expect_logs=0) - def test_delete_dag_preserves_other_dags(self): self.setup_dag_models() with create_session() as session: session.add(DagModel(dag_id=self.key + ".other_dag", fileloc=self.dag_file_path)) - session.add(DagModel(dag_id=self.key + ".subdag", fileloc=self.dag_file_path, is_subdag=True)) + session.add(DagModel(dag_id=self.key + ".other_dag2", fileloc=self.dag_file_path)) delete_dag(self.key) with create_session() as session: assert session.query(DagModel).filter(DagModel.dag_id == self.key + ".other_dag").count() == 1 - assert session.query(DagModel).filter(DagModel.dag_id.like(self.key + "%")).count() == 1 + assert session.query(DagModel).filter(DagModel.dag_id == self.key + ".other_dag2").count() == 1 + assert session.query(DagModel).filter(DagModel.dag_id == self.key).count() == 0 diff --git a/tests/api_experimental/common/test_mark_tasks.py b/tests/api_experimental/common/test_mark_tasks.py index e90938ca69877..578fe7b04602f 100644 --- a/tests/api_experimental/common/test_mark_tasks.py +++ b/tests/api_experimental/common/test_mark_tasks.py @@ -61,7 +61,7 @@ class TestMarkTasks: @classmethod def create_dags(cls, dagbag): cls.dag1 = dagbag.get_dag("miscellaneous_test_dag") - cls.dag2 = dagbag.get_dag("example_subdag_operator") + cls.dag2 = dagbag.get_dag("example_python_operator") cls.dag3 = dagbag.get_dag("example_trigger_target_dag") cls.dag4 = dagbag.get_dag("test_mapped_classic") cls.execution_dates = [timezone.datetime(2022, 1, 1), timezone.datetime(2022, 1, 2)] @@ -408,33 +408,6 @@ def test_mark_tasks_multiple(self): self.dag1, [task.task_id for task in tasks], [self.execution_dates[0]], State.SUCCESS, snapshot ) - # TODO: this backend should be removed once a fixing solution is found later - # We skip it here because this test case is working with Postgres & SQLite - # but not with MySQL - @pytest.mark.backend("sqlite", "postgres") - def test_mark_tasks_subdag(self): - # set one task to success towards end of scheduled dag runs - snapshot = TestMarkTasks.snapshot_state(self.dag2, self.execution_dates) - task = self.dag2.get_task("section-1") - relatives = task.get_flat_relatives(upstream=False) - task_ids = [t.task_id for t in relatives] - task_ids.append(task.task_id) - dr = DagRun.find(dag_id=self.dag2.dag_id, execution_date=self.execution_dates[0])[0] - - altered = set_state( - tasks=[task], - run_id=dr.run_id, - upstream=False, - downstream=True, - future=False, - past=False, - state=State.SUCCESS, - commit=True, - ) - assert len(altered) == 14 - - self.verify_state(self.dag2, task_ids, [self.execution_dates[0]], State.SUCCESS, snapshot) - def test_mark_mapped_task_instance_state(self, session): # set mapped task instance to success mapped = self.dag4.get_task("consumer") @@ -481,7 +454,7 @@ def setup_class(cls): dagbag = models.DagBag(include_examples=True, read_dags_from_db=False) cls.dag1 = dagbag.dags["miscellaneous_test_dag"] cls.dag1.sync_to_db() - cls.dag2 = dagbag.dags["example_subdag_operator"] + cls.dag2 = dagbag.dags["example_python_operator"] cls.dag2.sync_to_db() cls.execution_dates = [ timezone.datetime(2022, 1, 1), @@ -766,14 +739,7 @@ def test_set_state_with_multiple_dagruns(self, session=None): altered = set_dag_run_state_to_success(dag=self.dag2, run_id=dr2.run_id, commit=True) - # Recursively count number of tasks in the dag - def count_dag_tasks(dag): - count = len(dag.tasks) - subdag_counts = [count_dag_tasks(subdag) for subdag in dag.subdags] - count += sum(subdag_counts) - return count - - assert len(altered) == count_dag_tasks(self.dag2) + assert len(altered) == len(self.dag2.tasks) self._verify_dag_run_state(self.dag2, self.execution_dates[1], State.SUCCESS) # Make sure other dag status are not changed diff --git a/tests/api_experimental/common/test_trigger_dag.py b/tests/api_experimental/common/test_trigger_dag.py index 8d4dc47e25a92..e65a3dad6ea14 100644 --- a/tests/api_experimental/common/test_trigger_dag.py +++ b/tests/api_experimental/common/test_trigger_dag.py @@ -55,38 +55,6 @@ def test_trigger_dag_dag_run_exist(self, dag_bag_mock, dag_run_mock): with pytest.raises(AirflowException): _trigger_dag(dag_id, dag_bag_mock) - @mock.patch("airflow.models.DAG") - @mock.patch("airflow.api.common.trigger_dag.DagRun", spec=DagRun) - @mock.patch("airflow.models.DagBag") - def test_trigger_dag_include_subdags(self, dag_bag_mock, dag_run_mock, dag_mock): - dag_id = "trigger_dag" - dag_bag_mock.dags = [dag_id] - dag_bag_mock.get_dag.return_value = dag_mock - dag_run_mock.find_duplicate.return_value = None - dag1 = mock.MagicMock(subdags=[]) - dag2 = mock.MagicMock(subdags=[]) - dag_mock.subdags = [dag1, dag2] - - triggers = _trigger_dag(dag_id, dag_bag_mock) - - assert 3 == len(triggers) - - @mock.patch("airflow.models.DAG") - @mock.patch("airflow.api.common.trigger_dag.DagRun", spec=DagRun) - @mock.patch("airflow.models.DagBag") - def test_trigger_dag_include_nested_subdags(self, dag_bag_mock, dag_run_mock, dag_mock): - dag_id = "trigger_dag" - dag_bag_mock.dags = [dag_id] - dag_bag_mock.get_dag.return_value = dag_mock - dag_run_mock.find_duplicate.return_value = None - dag1 = mock.MagicMock(subdags=[]) - dag2 = mock.MagicMock(subdags=[dag1]) - dag_mock.subdags = [dag1, dag2] - - triggers = _trigger_dag(dag_id, dag_bag_mock) - - assert 3 == len(triggers) - @mock.patch("airflow.models.DagBag") def test_trigger_dag_with_too_early_start_date(self, dag_bag_mock): dag_id = "trigger_dag_with_too_early_start_date" @@ -105,9 +73,9 @@ def test_trigger_dag_with_valid_start_date(self, dag_bag_mock): dag_bag_mock.get_dag.return_value = dag dag_bag_mock.dags_hash = {} - triggers = _trigger_dag(dag_id, dag_bag_mock, execution_date=timezone.datetime(2018, 7, 5, 10, 10, 0)) + dagrun = _trigger_dag(dag_id, dag_bag_mock, execution_date=timezone.datetime(2018, 7, 5, 10, 10, 0)) - assert len(triggers) == 1 + assert dagrun @pytest.mark.parametrize( "conf, expected_conf", @@ -126,6 +94,6 @@ def test_trigger_dag_with_conf(self, dag_bag_mock, conf, expected_conf): dag_bag_mock.dags_hash = {} - triggers = _trigger_dag(dag_id, dag_bag_mock, conf=conf) + dagrun = _trigger_dag(dag_id, dag_bag_mock, conf=conf) - assert triggers[0].conf == expected_conf + assert dagrun.conf == expected_conf diff --git a/tests/cli/commands/test_task_command.py b/tests/cli/commands/test_task_command.py index eed9c400c7c74..869d5a5a22005 100644 --- a/tests/cli/commands/test_task_command.py +++ b/tests/cli/commands/test_task_command.py @@ -692,22 +692,6 @@ def test_task_states_for_dag_run_when_dag_run_not_exists(self): ) ) - def test_subdag_clear(self): - args = self.parser.parse_args(["tasks", "clear", "example_subdag_operator", "--yes"]) - task_command.task_clear(args) - args = self.parser.parse_args( - ["tasks", "clear", "example_subdag_operator", "--yes", "--exclude-subdags"] - ) - task_command.task_clear(args) - - def test_parentdag_downstream_clear(self): - args = self.parser.parse_args(["tasks", "clear", "example_subdag_operator.section-1", "--yes"]) - task_command.task_clear(args) - args = self.parser.parse_args( - ["tasks", "clear", "example_subdag_operator.section-1", "--yes", "--exclude-parentdag"] - ) - task_command.task_clear(args) - def _set_state_and_try_num(ti, session): ti.state = TaskInstanceState.QUEUED diff --git a/tests/conftest.py b/tests/conftest.py index 0206ab9a9f714..d41ac095c2858 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -844,6 +844,13 @@ def get_serialized_data(self): return json.loads(data) return data + def _bag_dag_compat(self, dag): + # This is a compatibility shim for the old bag_dag method in Airflow <3.0 + # TODO: Remove this when we drop support for Airflow <3.0 in Providers + if hasattr(dag, "parent_dag"): + return self.dagbag.bag_dag(dag, root_dag=dag) + return self.dagbag.bag_dag(dag) + def __exit__(self, type, value, traceback): from airflow.models import DagModel from airflow.models.serialized_dag import SerializedDagModel @@ -863,10 +870,10 @@ def __exit__(self, type, value, traceback): ) self.session.merge(self.serialized_model) serialized_dag = self._serialized_dag() - self.dagbag.bag_dag(serialized_dag, root_dag=serialized_dag) + self._bag_dag_compat(serialized_dag) self.session.flush() else: - self.dagbag.bag_dag(self.dag, self.dag) + self._bag_dag_compat(self.dag) def create_dagrun(self, **kwargs): from airflow.utils import timezone diff --git a/tests/core/test_impersonation_tests.py b/tests/core/test_impersonation_tests.py index b17900820d504..721d180ce7d3f 100644 --- a/tests/core/test_impersonation_tests.py +++ b/tests/core/test_impersonation_tests.py @@ -217,11 +217,6 @@ def test_default_impersonation(self, monkeypatch): monkeypatch.setenv("AIRFLOW__CORE__DEFAULT_IMPERSONATION", TEST_USER) self.run_backfill("test_default_impersonation", "test_deelevated_user") - @pytest.mark.execution_timeout(150) - def test_impersonation_subdag(self): - """Tests that impersonation using a subdag correctly passes the right configuration.""" - self.run_backfill("impersonation_subdag", "test_subdag_operation") - class TestImpersonationWithCustomPythonPath(BaseImpersonationTest): @pytest.fixture(autouse=True) diff --git a/tests/dags/test_clear_subdag.py b/tests/dags/test_clear_subdag.py deleted file mode 100644 index bd41ead6a0dc9..0000000000000 --- a/tests/dags/test_clear_subdag.py +++ /dev/null @@ -1,72 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -import datetime -import warnings - -import pytest - -from airflow.models.dag import DAG -from airflow.operators.bash import BashOperator -from airflow.operators.subdag import SubDagOperator - -pytestmark = pytest.mark.db_test - - -def create_subdag_opt(main_dag): - subdag_name = "daily_job" - subdag = DAG( - dag_id=f"{dag_name}.{subdag_name}", - start_date=start_date, - schedule=None, - max_active_tasks=2, - ) - BashOperator(bash_command="echo 1", task_id="daily_job_subdag_task", dag=subdag) - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - message=r"This class is deprecated\. Please use `airflow\.utils\.task_group\.TaskGroup`\.", - ) - return SubDagOperator( - task_id=subdag_name, - subdag=subdag, - dag=main_dag, - ) - - -dag_name = "clear_subdag_test_dag" - -start_date = datetime.datetime(2016, 1, 1) - -dag = DAG(dag_id=dag_name, max_active_tasks=3, start_date=start_date, schedule="0 0 * * *") - -daily_job_irrelevant = BashOperator( - bash_command="echo 1", - task_id="daily_job_irrelevant", - dag=dag, -) - -daily_job_downstream = BashOperator( - bash_command="echo 1", - task_id="daily_job_downstream", - dag=dag, -) - -daily_job = create_subdag_opt(main_dag=dag) - -daily_job >> daily_job_downstream diff --git a/tests/dags/test_impersonation_subdag.py b/tests/dags/test_impersonation_subdag.py deleted file mode 100644 index 7b006f3f96909..0000000000000 --- a/tests/dags/test_impersonation_subdag.py +++ /dev/null @@ -1,55 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -import warnings - -from airflow.models.dag import DAG -from airflow.operators.bash import BashOperator -from airflow.operators.python import PythonOperator -from airflow.operators.subdag import SubDagOperator -from airflow.utils import timezone - -DEFAULT_DATE = timezone.datetime(2016, 1, 1) - -default_args = {"owner": "airflow", "start_date": DEFAULT_DATE, "run_as_user": "airflow_test_user"} - -dag = DAG(dag_id="impersonation_subdag", default_args=default_args) - - -def print_today(): - print(f"Today is {timezone.utcnow()}") - - -subdag = DAG("impersonation_subdag.test_subdag_operation", default_args=default_args) - - -PythonOperator(python_callable=print_today, task_id="exec_python_fn", dag=subdag) - - -BashOperator(task_id="exec_bash_operator", bash_command='echo "Running within SubDag"', dag=subdag) - - -with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - message=r"This class is deprecated\. Please use `airflow\.utils\.task_group\.TaskGroup`\.", - ) - subdag_operator = SubDagOperator( - task_id="test_subdag_operation", subdag=subdag, mode="reschedule", poke_interval=1, dag=dag - ) diff --git a/tests/dags/test_subdag.py b/tests/dags/test_subdag.py deleted file mode 100644 index 9a2ebb91d395c..0000000000000 --- a/tests/dags/test_subdag.py +++ /dev/null @@ -1,86 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -""" -A DAG with subdag for testing purpose. -""" - -from __future__ import annotations - -import warnings -from datetime import datetime, timedelta - -from airflow.models.dag import DAG -from airflow.operators.empty import EmptyOperator -from airflow.operators.subdag import SubDagOperator - -DAG_NAME = "test_subdag_operator" - -DEFAULT_TASK_ARGS = { - "owner": "airflow", - "start_date": datetime(2019, 1, 1), - "max_active_runs": 1, -} - - -def subdag(parent_dag_name, child_dag_name, args): - """ - Create a subdag. - """ - dag_subdag = DAG( - dag_id=f"{parent_dag_name}.{child_dag_name}", - default_args=args, - schedule="@daily", - ) - - for i in range(2): - EmptyOperator( - task_id=f"{child_dag_name}-task-{i + 1}", - default_args=args, - dag=dag_subdag, - ) - - return dag_subdag - - -with DAG( - dag_id=DAG_NAME, - start_date=datetime(2019, 1, 1), - max_active_runs=1, - default_args=DEFAULT_TASK_ARGS, - schedule=timedelta(minutes=1), -): - start = EmptyOperator( - task_id="start", - ) - - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - message=r"This class is deprecated\. Please use `airflow\.utils\.task_group\.TaskGroup`\.", - ) - section_1 = SubDagOperator( - task_id="section-1", - subdag=subdag(DAG_NAME, "section-1", DEFAULT_TASK_ARGS), - default_args=DEFAULT_TASK_ARGS, - ) - - some_other_task = EmptyOperator( - task_id="some-other-task", - ) - - start >> section_1 >> some_other_task diff --git a/tests/jobs/test_backfill_job.py b/tests/jobs/test_backfill_job.py index e46f27ed62459..e18e71e7d7e4b 100644 --- a/tests/jobs/test_backfill_job.py +++ b/tests/jobs/test_backfill_job.py @@ -68,7 +68,6 @@ set_default_pool_slots, ) from tests.test_utils.mock_executor import MockExecutor -from tests.test_utils.timetables import cron_timetable pytestmark = [pytest.mark.db_test, pytest.mark.skip_if_database_isolation_mode] @@ -1447,153 +1446,6 @@ def test_backfill_fill_blanks(self, dag_maker, mock_executor): elif ti.task_id == op5.task_id: assert ti.state == State.UPSTREAM_FAILED - def test_backfill_execute_subdag(self, mock_executor): - dag = self.dagbag.get_dag("example_subdag_operator") - subdag_op_task = dag.get_task("section-1") - - subdag = subdag_op_task.subdag - subdag.timetable = cron_timetable("@daily") - - start_date = timezone.utcnow() - executor = mock_executor - job = Job() - job_runner = BackfillJobRunner( - job=job, - dag=subdag, - start_date=start_date, - end_date=start_date, - donot_pickle=True, - ) - run_job(job=job, execute_callable=job_runner._execute) - - subdag_op_task.pre_execute(context={"execution_date": start_date}) - subdag_op_task.execute(context={"execution_date": start_date}) - subdag_op_task.post_execute(context={"execution_date": start_date}) - - history = executor.history - subdag_history = history[0] - - # check that all 5 task instances of the subdag 'section-1' were executed - assert 5 == len(subdag_history) - for sdh in subdag_history: - ti = sdh[3] - assert "section-1-task-" in ti.task_id - - with create_session() as session: - successful_subdag_runs = ( - session.query(DagRun) - .filter(DagRun.dag_id == subdag.dag_id) - .filter(DagRun.execution_date == start_date) - .filter(DagRun.state == State.SUCCESS) - .count() - ) - - assert 1 == successful_subdag_runs - - subdag.clear() - dag.clear() - - def test_subdag_clear_parentdag_downstream_clear(self, mock_executor): - dag = self.dagbag.get_dag("clear_subdag_test_dag") - subdag_op_task = dag.get_task("daily_job") - - subdag = subdag_op_task.subdag - - job = Job() - job_runner = BackfillJobRunner( - job=job, - dag=dag, - start_date=DEFAULT_DATE, - end_date=DEFAULT_DATE, - donot_pickle=True, - ) - - with timeout(seconds=30): - run_job(job=job, execute_callable=job_runner._execute) - - run_id = f"backfill__{DEFAULT_DATE.isoformat()}" - ti_subdag = TI(task=dag.get_task("daily_job"), run_id=run_id) - ti_subdag.refresh_from_db() - assert ti_subdag.state == State.SUCCESS - - ti_irrelevant = TI(task=dag.get_task("daily_job_irrelevant"), run_id=run_id) - ti_irrelevant.refresh_from_db() - assert ti_irrelevant.state == State.SUCCESS - - ti_downstream = TI(task=dag.get_task("daily_job_downstream"), run_id=run_id) - ti_downstream.refresh_from_db() - assert ti_downstream.state == State.SUCCESS - - sdag = subdag.partial_subset( - task_ids_or_regex="daily_job_subdag_task", include_downstream=True, include_upstream=False - ) - - sdag.clear(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, include_parentdag=True) - - ti_subdag.refresh_from_db() - assert State.NONE == ti_subdag.state - - ti_irrelevant.refresh_from_db() - assert State.SUCCESS == ti_irrelevant.state - - ti_downstream.refresh_from_db() - assert State.NONE == ti_downstream.state - - subdag.clear() - dag.clear() - - def test_backfill_execute_subdag_with_removed_task(self, mock_executor): - """ - Ensure that subdag operators execute properly in the case where - an associated task of the subdag has been removed from the dag - definition, but has instances in the database from previous runs. - """ - dag = self.dagbag.get_dag("example_subdag_operator") - subdag = dag.get_task("section-1").subdag - - session = settings.Session() - job = Job() - job_runner = BackfillJobRunner( - job=job, - dag=subdag, - start_date=DEFAULT_DATE, - end_date=DEFAULT_DATE, - donot_pickle=True, - ) - dr = DagRun( - dag_id=subdag.dag_id, execution_date=DEFAULT_DATE, run_id="test", run_type=DagRunType.BACKFILL_JOB - ) - session.add(dr) - - removed_task_ti = TI( - task=EmptyOperator(task_id="removed_task"), run_id=dr.run_id, state=State.REMOVED - ) - removed_task_ti.dag_id = subdag.dag_id - dr.task_instances.append(removed_task_ti) - - session.commit() - - with timeout(seconds=30): - run_job(job=job, execute_callable=job_runner._execute) - - for task in subdag.tasks: - instance = ( - session.query(TI) - .filter( - TI.dag_id == subdag.dag_id, TI.task_id == task.task_id, TI.execution_date == DEFAULT_DATE - ) - .first() - ) - - assert instance is not None - assert instance.state == State.SUCCESS - - removed_task_ti.refresh_from_db() - assert removed_task_ti.state == State.REMOVED - - subdag.clear() - dag.clear() - def test_update_counters(self, dag_maker, session): with dag_maker(dag_id="test_manage_executor_state", start_date=DEFAULT_DATE, session=session) as dag: task1 = EmptyOperator(task_id="dummy", owner="airflow") diff --git a/tests/jobs/test_local_task_job.py b/tests/jobs/test_local_task_job.py index a4e7c4f387752..91217703e2d0f 100644 --- a/tests/jobs/test_local_task_job.py +++ b/tests/jobs/test_local_task_job.py @@ -863,7 +863,7 @@ def test_fast_follow( scheduler_job = Job() scheduler_job_runner = SchedulerJobRunner(job=scheduler_job, subdir=os.devnull) - scheduler_job_runner.dagbag.bag_dag(dag, root_dag=dag) + scheduler_job_runner.dagbag.bag_dag(dag) dag_run = dag.create_dagrun(run_id="test_dagrun_fast_follow", state=State.RUNNING) diff --git a/tests/jobs/test_scheduler_job.py b/tests/jobs/test_scheduler_job.py index 2e96728d5ecae..8fdbf4826db7a 100644 --- a/tests/jobs/test_scheduler_job.py +++ b/tests/jobs/test_scheduler_job.py @@ -3097,12 +3097,12 @@ def test_scheduler_task_start_date(self, configs): dag_id = "test_task_start_date_scheduling" dag = self.dagbag.get_dag(dag_id) dag.is_paused_upon_creation = False - dagbag.bag_dag(dag=dag, root_dag=dag) + dagbag.bag_dag(dag=dag) # Deactivate other dags in this file so the scheduler doesn't waste time processing them other_dag = self.dagbag.get_dag("test_start_date_scheduling") other_dag.is_paused_upon_creation = True - dagbag.bag_dag(dag=other_dag, root_dag=other_dag) + dagbag.bag_dag(dag=other_dag) dagbag.sync_to_db() diff --git a/tests/models/test_dag.py b/tests/models/test_dag.py index 3d39a7290d909..2b3961f1e6c6f 100644 --- a/tests/models/test_dag.py +++ b/tests/models/test_dag.py @@ -84,7 +84,6 @@ from airflow.operators.bash import BashOperator from airflow.operators.empty import EmptyOperator from airflow.operators.python import PythonOperator -from airflow.operators.subdag import SubDagOperator from airflow.security import permissions from airflow.templates import NativeEnvironment, SandboxedEnvironment from airflow.timetables.base import DagRunInfo, DataInterval, TimeRestriction, Timetable @@ -96,7 +95,7 @@ ) from airflow.utils import timezone from airflow.utils.file import list_py_file_paths -from airflow.utils.session import create_session, provide_session +from airflow.utils.session import create_session from airflow.utils.state import DagRunState, State, TaskInstanceState from airflow.utils.task_group import TaskGroup, TaskGroupContext from airflow.utils.timezone import datetime as datetime_tz @@ -292,42 +291,6 @@ def test_dag_as_context_manager(self): assert op8.dag == dag assert op9.dag == dag2 - def test_dag_topological_sort_include_subdag_tasks(self): - child_dag = DAG( - "parent_dag.child_dag", - schedule="@daily", - start_date=DEFAULT_DATE, - ) - - with child_dag: - EmptyOperator(task_id="a_child") - EmptyOperator(task_id="b_child") - - parent_dag = DAG( - "parent_dag", - schedule="@daily", - start_date=DEFAULT_DATE, - ) - - # a_parent -> child_dag -> (a_child | b_child) -> b_parent - with parent_dag: - op1 = EmptyOperator(task_id="a_parent") - with pytest.warns( - RemovedInAirflow3Warning, match="Please use `airflow.utils.task_group.TaskGroup`." - ): - op2 = SubDagOperator(task_id="child_dag", subdag=child_dag) - op3 = EmptyOperator(task_id="b_parent") - - op1 >> op2 >> op3 - - topological_list = parent_dag.topological_sort(include_subdag_tasks=True) - - assert self._occur_before("a_parent", "child_dag", topological_list) - assert self._occur_before("child_dag", "a_child", topological_list) - assert self._occur_before("child_dag", "b_child", topological_list) - assert self._occur_before("a_child", "b_parent", topological_list) - assert self._occur_before("b_child", "b_parent", topological_list) - def test_dag_topological_sort_dag_without_tasks(self): dag = DAG("dag", start_date=DEFAULT_DATE, default_args={"owner": "owner1"}) @@ -1374,16 +1337,7 @@ def test_sync_to_db(self): ) with dag: EmptyOperator(task_id="task", owner="owner1") - subdag = DAG( - "dag.subtask", - start_date=DEFAULT_DATE, - ) - # parent_dag and is_subdag was set by DagBag. We don't use DagBag, so this value is not set. - subdag.parent_dag = dag - with pytest.warns( - RemovedInAirflow3Warning, match="Please use `airflow.utils.task_group.TaskGroup`." - ): - SubDagOperator(task_id="subtask", owner="owner2", subdag=subdag) + EmptyOperator(task_id="task2", owner="owner2") session = settings.Session() dag.sync_to_db(session=session) @@ -1393,12 +1347,6 @@ def test_sync_to_db(self): assert orm_dag.default_view is not None assert orm_dag.default_view == conf.get("webserver", "dag_default_view").lower() assert orm_dag.safe_dag_id == "dag" - - orm_subdag = session.query(DagModel).filter(DagModel.dag_id == "dag.subtask").one() - assert set(orm_subdag.owners.split(", ")) == {"owner1", "owner2"} - assert orm_subdag.is_active - assert orm_subdag.safe_dag_id == "dag__dot__subtask" - assert orm_subdag.fileloc == orm_dag.fileloc session.close() def test_sync_to_db_default_view(self): @@ -1409,17 +1357,6 @@ def test_sync_to_db_default_view(self): ) with dag: EmptyOperator(task_id="task", owner="owner1") - with pytest.warns( - RemovedInAirflow3Warning, match="Please use `airflow.utils.task_group.TaskGroup`." - ): - SubDagOperator( - task_id="subtask", - owner="owner2", - subdag=DAG( - "dag.subtask", - start_date=DEFAULT_DATE, - ), - ) session = settings.Session() dag.sync_to_db(session=session) @@ -1428,81 +1365,6 @@ def test_sync_to_db_default_view(self): assert orm_dag.default_view == "graph" session.close() - @provide_session - def test_is_paused_subdag(self, session): - subdag_id = "dag.subdag" - subdag = DAG( - subdag_id, - start_date=DEFAULT_DATE, - ) - with subdag: - EmptyOperator( - task_id="dummy_task", - ) - - dag_id = "dag" - dag = DAG( - dag_id, - start_date=DEFAULT_DATE, - ) - - with dag, pytest.warns( - RemovedInAirflow3Warning, match="Please use `airflow.utils.task_group.TaskGroup`." - ): - SubDagOperator(task_id="subdag", subdag=subdag) - - # parent_dag and is_subdag was set by DagBag. We don't use DagBag, so this value is not set. - subdag.parent_dag = dag - - session.query(DagModel).filter(DagModel.dag_id.in_([subdag_id, dag_id])).delete( - synchronize_session=False - ) - - dag.sync_to_db(session=session) - - unpaused_dags = ( - session.query(DagModel.dag_id, DagModel.is_paused) - .filter( - DagModel.dag_id.in_([subdag_id, dag_id]), - ) - .all() - ) - - assert { - (dag_id, False), - (subdag_id, False), - } == set(unpaused_dags) - - DagModel.get_dagmodel(dag.dag_id).set_is_paused(is_paused=True, including_subdags=False) - - paused_dags = ( - session.query(DagModel.dag_id, DagModel.is_paused) - .filter( - DagModel.dag_id.in_([subdag_id, dag_id]), - ) - .all() - ) - - assert { - (dag_id, True), - (subdag_id, False), - } == set(paused_dags) - - DagModel.get_dagmodel(dag.dag_id).set_is_paused(is_paused=True) - - paused_dags = ( - session.query(DagModel.dag_id, DagModel.is_paused) - .filter( - DagModel.dag_id.in_([subdag_id, dag_id]), - ) - .all() - ) - - assert { - (dag_id, True), - (subdag_id, True), - } == set(paused_dags) - def test_existing_dag_is_paused_upon_creation(self): dag = DAG("dag_paused") dag.sync_to_db() @@ -2151,8 +2013,6 @@ def test_clear_set_dagrun_state(self, dag_run_state): start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=1), dag_run_state=dag_run_state, - include_subdags=False, - include_parentdag=False, session=session, ) @@ -2213,8 +2073,6 @@ def consumer(value): start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=1), dag_run_state=dag_run_state, - include_subdags=False, - include_parentdag=False, session=session, ) session.refresh(upstream_ti) @@ -2357,96 +2215,6 @@ def check_task(): path.write_text(test_connections_string) dag.test(conn_file_path=os.fspath(path)) - def _make_test_subdag(self, session): - dag_id = "test_subdag" - self._clean_up(dag_id) - task_id = "t1" - dag = DAG(dag_id, start_date=DEFAULT_DATE, max_active_runs=1) - t_1 = EmptyOperator(task_id=task_id, dag=dag) - subdag = DAG(dag_id + ".test", start_date=DEFAULT_DATE, max_active_runs=1) - with pytest.warns( - RemovedInAirflow3Warning, - match="This class is deprecated. Please use `airflow.utils.task_group.TaskGroup`.", - ): - SubDagOperator(task_id="test", subdag=subdag, dag=dag) - t_2 = EmptyOperator(task_id="task", dag=subdag) - subdag.parent_dag = dag - - dag.sync_to_db() - - session = settings.Session() - dag.create_dagrun( - run_type=DagRunType.MANUAL, - state=State.FAILED, - start_date=DEFAULT_DATE, - execution_date=DEFAULT_DATE, - session=session, - data_interval=(DEFAULT_DATE, DEFAULT_DATE), - ) - subdag.create_dagrun( - run_type=DagRunType.MANUAL, - state=State.FAILED, - start_date=DEFAULT_DATE, - execution_date=DEFAULT_DATE, - session=session, - data_interval=(DEFAULT_DATE, DEFAULT_DATE), - ) - task_instance_1 = TI(t_1, run_id=f"manual__{DEFAULT_DATE.isoformat()}", state=State.RUNNING) - task_instance_2 = TI(t_2, run_id=f"manual__{DEFAULT_DATE.isoformat()}", state=State.RUNNING) - session.merge(task_instance_1) - session.merge(task_instance_2) - - return dag, subdag - - @pytest.mark.parametrize("dag_run_state", [DagRunState.QUEUED, DagRunState.RUNNING]) - def test_clear_set_dagrun_state_for_subdag(self, dag_run_state): - session = settings.Session() - dag, subdag = self._make_test_subdag(session) - session.flush() - - dag.clear( - start_date=DEFAULT_DATE, - end_date=DEFAULT_DATE + datetime.timedelta(days=1), - dag_run_state=dag_run_state, - include_subdags=True, - include_parentdag=False, - session=session, - ) - - dagrun = ( - session.query( - DagRun, - ) - .filter(DagRun.dag_id == subdag.dag_id) - .one() - ) - assert dagrun.state == dag_run_state - session.rollback() - - @pytest.mark.parametrize("dag_run_state", [DagRunState.QUEUED, DagRunState.RUNNING]) - def test_clear_set_dagrun_state_for_parent_dag(self, dag_run_state): - session = settings.Session() - dag, subdag = self._make_test_subdag(session) - session.flush() - - subdag.clear( - start_date=DEFAULT_DATE, - end_date=DEFAULT_DATE + datetime.timedelta(days=1), - dag_run_state=dag_run_state, - include_subdags=True, - include_parentdag=True, - session=session, - ) - - dagrun = ( - session.query( - DagRun, - ) - .filter(DagRun.dag_id == dag.dag_id) - .one() - ) - assert dagrun.state == dag_run_state - @pytest.mark.parametrize( "ti_state_begin, ti_state_end", [ @@ -2726,50 +2494,6 @@ def test_next_dagrun_after_auto_align(self): assert next_info assert next_info.logical_date == timezone.datetime(2016, 1, 1, 10, 10) - def test_next_dagrun_after_not_for_subdags(self): - """ - Test the subdags are never marked to have dagruns created, as they are - handled by the SubDagOperator, not the scheduler - """ - - def subdag(parent_dag_name, child_dag_name, args): - """ - Create a subdag. - """ - dag_subdag = DAG( - dag_id=f"{parent_dag_name}.{child_dag_name}", - schedule="@daily", - default_args=args, - ) - - for i in range(2): - EmptyOperator(task_id=f"{child_dag_name}-task-{i + 1}", dag=dag_subdag) - - return dag_subdag - - with DAG( - dag_id="test_subdag_operator", - start_date=datetime.datetime(2019, 1, 1), - max_active_runs=1, - schedule=timedelta(minutes=1), - ) as dag, pytest.warns( - RemovedInAirflow3Warning, match="Please use `airflow.utils.task_group.TaskGroup`." - ): - section_1 = SubDagOperator( - task_id="section-1", - subdag=subdag(dag.dag_id, "section-1", {"start_date": dag.start_date}), - ) - - subdag = section_1.subdag - # parent_dag and is_subdag was set by DagBag. We don't use DagBag, so this value is not set. - subdag.parent_dag = dag - - next_parent_info = dag.next_dagrun_info(None) - assert next_parent_info.logical_date == timezone.datetime(2019, 1, 1, 0, 0) - - next_subdag_info = subdag.next_dagrun_info(None) - assert next_subdag_info is None, "SubDags should never have DagRuns created by the scheduler" - def test_next_dagrun_info_on_29_feb(self): dag = DAG( "test_scheduler_dagrun_29_feb", start_date=timezone.datetime(2024, 1, 1), schedule="0 0 29 2 *" diff --git a/tests/models/test_dagbag.py b/tests/models/test_dagbag.py index 936852dd082e4..5ace985adde5d 100644 --- a/tests/models/test_dagbag.py +++ b/tests/models/test_dagbag.py @@ -36,7 +36,7 @@ import airflow.example_dags from airflow import settings -from airflow.exceptions import RemovedInAirflow3Warning, SerializationError +from airflow.exceptions import SerializationError from airflow.models.dag import DAG, DagModel from airflow.models.dagbag import DagBag from airflow.models.serialized_dag import SerializedDagModel @@ -364,14 +364,6 @@ def process_file(self, filepath, only_if_updated=True, safe_mode=True): {"example_bash_operator": "airflow/example_dags/example_bash_operator.py"}, id="example_bash_operator", ), - pytest.param( - TEST_DAGS_FOLDER / "test_subdag.py", - { - "test_subdag_operator": "dags/test_subdag.py", - "test_subdag_operator.section-1": "dags/test_subdag.py", - }, - id="test_subdag_operator", - ), ), ) def test_get_dag_registration(self, file_to_load, expected): @@ -500,161 +492,18 @@ def process_dag(self, create_dag, tmp_path): found_dags = dagbag.process_file(os.fspath(path)) return dagbag, found_dags, os.fspath(path) - def validate_dags(self, expected_parent_dag, actual_found_dags, actual_dagbag, should_be_found=True): - expected_dag_ids = [dag.dag_id for dag in expected_parent_dag.subdags] - expected_dag_ids.append(expected_parent_dag.dag_id) - + def validate_dags(self, expected_dag, actual_found_dags, actual_dagbag, should_be_found=True): actual_found_dag_ids = [dag.dag_id for dag in actual_found_dags] - - for dag_id in expected_dag_ids: - actual_dagbag.log.info("validating %s", dag_id) - assert (dag_id in actual_found_dag_ids) == should_be_found, ( - f"dag \"{dag_id}\" should {'' if should_be_found else 'not '}" - f'have been found after processing dag "{expected_parent_dag.dag_id}"' - ) - assert (dag_id in actual_dagbag.dags) == should_be_found, ( - f"dag \"{dag_id}\" should {'' if should_be_found else 'not '}" - f'be in dagbag.dags after processing dag "{expected_parent_dag.dag_id}"' - ) - - def test_load_subdags(self, tmp_path): - # Define Dag to load - def standard_subdag(): - import datetime - - from airflow.models.dag import DAG - from airflow.operators.empty import EmptyOperator - from airflow.operators.subdag import SubDagOperator - - dag_name = "parent" - default_args = {"owner": "owner1", "start_date": datetime.datetime(2016, 1, 1)} - dag = DAG(dag_name, default_args=default_args) - - # parent: - # A -> opSubDag_0 - # parent.opsubdag_0: - # -> subdag_0.task - # A -> opSubDag_1 - # parent.opsubdag_1: - # -> subdag_1.task - - with dag: - - def subdag_0(): - subdag_0 = DAG("parent.op_subdag_0", default_args=default_args) - EmptyOperator(task_id="subdag_0.task", dag=subdag_0) - return subdag_0 - - def subdag_1(): - subdag_1 = DAG("parent.op_subdag_1", default_args=default_args) - EmptyOperator(task_id="subdag_1.task", dag=subdag_1) - return subdag_1 - - op_subdag_0 = SubDagOperator(task_id="op_subdag_0", dag=dag, subdag=subdag_0()) - op_subdag_1 = SubDagOperator(task_id="op_subdag_1", dag=dag, subdag=subdag_1()) - - op_a = EmptyOperator(task_id="A") - op_a.set_downstream(op_subdag_0) - op_a.set_downstream(op_subdag_1) - return dag - - test_dag = standard_subdag() - # coherence check to make sure DAG.subdag is still functioning properly - assert len(test_dag.subdags) == 2 - - # Perform processing dag - dagbag, found_dags, _ = self.process_dag(standard_subdag, tmp_path) - - # Validate correctness - # all dags from test_dag should be listed - self.validate_dags(test_dag, found_dags, dagbag) - - # Define Dag to load - def nested_subdags(): - import datetime - - from airflow.models.dag import DAG - from airflow.operators.empty import EmptyOperator - from airflow.operators.subdag import SubDagOperator - - dag_name = "parent" - default_args = {"owner": "owner1", "start_date": datetime.datetime(2016, 1, 1)} - dag = DAG(dag_name, default_args=default_args) - - # parent: - # A -> op_subdag_0 - # parent.op_subdag_0: - # -> opSubDag_A - # parent.op_subdag_0.opSubdag_A: - # -> subdag_a.task - # -> opSubdag_B - # parent.op_subdag_0.opSubdag_B: - # -> subdag_b.task - # A -> op_subdag_1 - # parent.op_subdag_1: - # -> opSubdag_C - # parent.op_subdag_1.opSubdag_C: - # -> subdag_c.task - # -> opSubDag_D - # parent.op_subdag_1.opSubdag_D: - # -> subdag_d.task - - with dag: - - def subdag_a(): - subdag_a = DAG("parent.op_subdag_0.opSubdag_A", default_args=default_args) - EmptyOperator(task_id="subdag_a.task", dag=subdag_a) - return subdag_a - - def subdag_b(): - subdag_b = DAG("parent.op_subdag_0.opSubdag_B", default_args=default_args) - EmptyOperator(task_id="subdag_b.task", dag=subdag_b) - return subdag_b - - def subdag_c(): - subdag_c = DAG("parent.op_subdag_1.opSubdag_C", default_args=default_args) - EmptyOperator(task_id="subdag_c.task", dag=subdag_c) - return subdag_c - - def subdag_d(): - subdag_d = DAG("parent.op_subdag_1.opSubdag_D", default_args=default_args) - EmptyOperator(task_id="subdag_d.task", dag=subdag_d) - return subdag_d - - def subdag_0(): - subdag_0 = DAG("parent.op_subdag_0", default_args=default_args) - SubDagOperator(task_id="opSubdag_A", dag=subdag_0, subdag=subdag_a()) - SubDagOperator(task_id="opSubdag_B", dag=subdag_0, subdag=subdag_b()) - return subdag_0 - - def subdag_1(): - subdag_1 = DAG("parent.op_subdag_1", default_args=default_args) - SubDagOperator(task_id="opSubdag_C", dag=subdag_1, subdag=subdag_c()) - SubDagOperator(task_id="opSubdag_D", dag=subdag_1, subdag=subdag_d()) - return subdag_1 - - op_subdag_0 = SubDagOperator(task_id="op_subdag_0", dag=dag, subdag=subdag_0()) - op_subdag_1 = SubDagOperator(task_id="op_subdag_1", dag=dag, subdag=subdag_1()) - - op_a = EmptyOperator(task_id="A") - op_a.set_downstream(op_subdag_0) - op_a.set_downstream(op_subdag_1) - - return dag - - test_dag = nested_subdags() - # coherence check to make sure DAG.subdag is still functioning properly - assert len(test_dag.subdags) == 6 - - # Perform processing dag - dagbag, found_dags, filename = self.process_dag(nested_subdags, tmp_path) - - # Validate correctness - # all dags from test_dag should be listed - self.validate_dags(test_dag, found_dags, dagbag) - - for dag in dagbag.dags.values(): - assert dag.fileloc == filename + dag_id = expected_dag.dag_id + actual_dagbag.log.info("validating %s", dag_id) + assert (dag_id in actual_found_dag_ids) == should_be_found, ( + f"dag \"{dag_id}\" should {'' if should_be_found else 'not '}" + f'have been found after processing dag "{expected_dag.dag_id}"' + ) + assert (dag_id in actual_dagbag.dags) == should_be_found, ( + f"dag \"{dag_id}\" should {'' if should_be_found else 'not '}" + f'be in dagbag.dags after processing dag "{expected_dag.dag_id}"' + ) def test_skip_cycle_dags(self, tmp_path): """ @@ -681,8 +530,6 @@ def basic_cycle(): return dag test_dag = basic_cycle() - # coherence check to make sure DAG.subdag is still functioning properly - assert len(test_dag.subdags) == 0 # Perform processing dag dagbag, found_dags, file_path = self.process_dag(basic_cycle, tmp_path) @@ -692,97 +539,6 @@ def basic_cycle(): self.validate_dags(test_dag, found_dags, dagbag, should_be_found=False) assert file_path in dagbag.import_errors - # Define Dag to load - def nested_subdag_cycle(): - import datetime - - from airflow.models.dag import DAG - from airflow.operators.empty import EmptyOperator - from airflow.operators.subdag import SubDagOperator - - dag_name = "nested_cycle" - default_args = {"owner": "owner1", "start_date": datetime.datetime(2016, 1, 1)} - dag = DAG(dag_name, default_args=default_args) - - # cycle: - # A -> op_subdag_0 - # cycle.op_subdag_0: - # -> opSubDag_A - # cycle.op_subdag_0.opSubdag_A: - # -> subdag_a.task - # -> opSubdag_B - # cycle.op_subdag_0.opSubdag_B: - # -> subdag_b.task - # A -> op_subdag_1 - # cycle.op_subdag_1: - # -> opSubdag_C - # cycle.op_subdag_1.opSubdag_C: - # -> subdag_c.task -> subdag_c.task >Invalid Loop< - # -> opSubDag_D - # cycle.op_subdag_1.opSubdag_D: - # -> subdag_d.task - - with dag: - - def subdag_a(): - subdag_a = DAG("nested_cycle.op_subdag_0.opSubdag_A", default_args=default_args) - EmptyOperator(task_id="subdag_a.task", dag=subdag_a) - return subdag_a - - def subdag_b(): - subdag_b = DAG("nested_cycle.op_subdag_0.opSubdag_B", default_args=default_args) - EmptyOperator(task_id="subdag_b.task", dag=subdag_b) - return subdag_b - - def subdag_c(): - subdag_c = DAG("nested_cycle.op_subdag_1.opSubdag_C", default_args=default_args) - op_subdag_c_task = EmptyOperator(task_id="subdag_c.task", dag=subdag_c) - # introduce a loop in opSubdag_C - op_subdag_c_task.set_downstream(op_subdag_c_task) - return subdag_c - - def subdag_d(): - subdag_d = DAG("nested_cycle.op_subdag_1.opSubdag_D", default_args=default_args) - EmptyOperator(task_id="subdag_d.task", dag=subdag_d) - return subdag_d - - def subdag_0(): - subdag_0 = DAG("nested_cycle.op_subdag_0", default_args=default_args) - SubDagOperator(task_id="opSubdag_A", dag=subdag_0, subdag=subdag_a()) - SubDagOperator(task_id="opSubdag_B", dag=subdag_0, subdag=subdag_b()) - return subdag_0 - - def subdag_1(): - subdag_1 = DAG("nested_cycle.op_subdag_1", default_args=default_args) - SubDagOperator(task_id="opSubdag_C", dag=subdag_1, subdag=subdag_c()) - SubDagOperator(task_id="opSubdag_D", dag=subdag_1, subdag=subdag_d()) - return subdag_1 - - op_subdag_0 = SubDagOperator(task_id="op_subdag_0", dag=dag, subdag=subdag_0()) - op_subdag_1 = SubDagOperator(task_id="op_subdag_1", dag=dag, subdag=subdag_1()) - - op_a = EmptyOperator(task_id="A") - op_a.set_downstream(op_subdag_0) - op_a.set_downstream(op_subdag_1) - - return dag - - with pytest.warns( - RemovedInAirflow3Warning, - match="This class is deprecated. Please use `airflow.utils.task_group.TaskGroup`.", - ): - test_dag = nested_subdag_cycle() - # coherence check to make sure DAG.subdag is still functioning properly - assert len(test_dag.subdags) == 6 - - # Perform processing dag - dagbag, found_dags, file_path = self.process_dag(nested_subdag_cycle, tmp_path) - - # Validate correctness - # None of the dags should be found - self.validate_dags(test_dag, found_dags, dagbag, should_be_found=False) - assert file_path in dagbag.import_errors - def test_process_file_with_none(self, tmp_path): """ test that process_file can handle Nones @@ -870,7 +626,6 @@ def test_sync_to_db_is_retried(self, mock_bulk_write_to_db, mock_s10n_write_dag, dagbag = DagBag("/dev/null") mock_dag = mock.MagicMock(spec=DAG) - mock_dag.is_subdag = False dagbag.dags["mock_dag"] = mock_dag op_error = OperationalError(statement=mock.ANY, params=mock.ANY, orig=mock.ANY) diff --git a/tests/models/test_dagcode.py b/tests/models/test_dagcode.py index 1d30c94863c8a..e566e9ceed080 100644 --- a/tests/models/test_dagcode.py +++ b/tests/models/test_dagcode.py @@ -105,8 +105,6 @@ def test_detecting_duplicate_key(self, mock_hash): def _compare_example_dags(self, example_dags): with create_session() as session: for dag in example_dags.values(): - if dag.is_subdag: - dag.fileloc = dag.parent_dag.fileloc assert DagCode.has_dag(dag.fileloc) dag_fileloc_hash = DagCode.dag_fileloc_hash(dag.fileloc) result = ( diff --git a/tests/models/test_serialized_dag.py b/tests/models/test_serialized_dag.py index 531ffb031925e..83da1916378df 100644 --- a/tests/models/test_serialized_dag.py +++ b/tests/models/test_serialized_dag.py @@ -164,11 +164,8 @@ def test_read_dags(self): def test_remove_dags_by_id(self): """DAGs can be removed from database.""" example_dags_list = list(self._write_example_dags().values()) - # Remove SubDags from the list as they are not stored in DB in a separate row - # and are directly added in Json blob of the main DAG - filtered_example_dags_list = [dag for dag in example_dags_list if not dag.is_subdag] # Tests removing by dag_id. - dag_removed_by_id = filtered_example_dags_list[0] + dag_removed_by_id = example_dags_list[0] SDM.remove_dag(dag_removed_by_id.dag_id) assert not SDM.has_dag(dag_removed_by_id.dag_id) @@ -176,13 +173,10 @@ def test_remove_dags_by_id(self): def test_remove_dags_by_filepath(self): """DAGs can be removed from database.""" example_dags_list = list(self._write_example_dags().values()) - # Remove SubDags from the list as they are not stored in DB in a separate row - # and are directly added in Json blob of the main DAG - filtered_example_dags_list = [dag for dag in example_dags_list if not dag.is_subdag] # Tests removing by file path. - dag_removed_by_file = filtered_example_dags_list[0] + dag_removed_by_file = example_dags_list[0] # remove repeated files for those DAGs that define multiple dags in the same file (set comprehension) - example_dag_files = list({dag.fileloc for dag in filtered_example_dags_list}) + example_dag_files = list({dag.fileloc for dag in example_dags_list}) example_dag_files.remove(dag_removed_by_file.fileloc) SDM.remove_deleted_dags(example_dag_files, processor_subdir="/tmp/test") assert not SDM.has_dag(dag_removed_by_file.dag_id) diff --git a/tests/operators/test_subdag_operator.py b/tests/operators/test_subdag_operator.py deleted file mode 100644 index ca669a9e4e8cf..0000000000000 --- a/tests/operators/test_subdag_operator.py +++ /dev/null @@ -1,376 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -from unittest import mock -from unittest.mock import Mock - -import pytest - -import airflow -from airflow.exceptions import AirflowException, RemovedInAirflow3Warning -from airflow.models.dag import DAG -from airflow.models.dagrun import DagRun -from airflow.models.taskinstance import TaskInstance -from airflow.operators.empty import EmptyOperator -from airflow.operators.subdag import SkippedStatePropagationOptions, SubDagOperator -from airflow.utils.session import create_session -from airflow.utils.state import State -from airflow.utils.timezone import datetime -from airflow.utils.types import DagRunType -from tests.test_utils.db import clear_db_runs - -pytestmark = pytest.mark.db_test - -DEFAULT_DATE = datetime(2016, 1, 1) - -default_args = {"start_date": DEFAULT_DATE} - -WARNING_MESSAGE = """This class is deprecated. Please use `airflow.utils.task_group.TaskGroup`.""" - - -class TestSubDagOperator: - def setup_method(self): - clear_db_runs() - self.dag_run_running = DagRun() - self.dag_run_running.state = State.RUNNING - self.dag_run_success = DagRun() - self.dag_run_success.state = State.SUCCESS - self.dag_run_failed = DagRun() - self.dag_run_failed.state = State.FAILED - - def teardown_class(self): - clear_db_runs() - - def test_subdag_name(self): - """ - Subdag names must be {parent_dag}.{subdag task} - """ - dag = DAG("parent", default_args=default_args) - subdag_good = DAG("parent.test", default_args=default_args) - subdag_bad1 = DAG("parent.bad", default_args=default_args) - subdag_bad2 = DAG("bad.test", default_args=default_args) - subdag_bad3 = DAG("bad.bad", default_args=default_args) - - with pytest.warns(RemovedInAirflow3Warning, match=WARNING_MESSAGE): - SubDagOperator(task_id="test", dag=dag, subdag=subdag_good) - with pytest.raises(AirflowException): - SubDagOperator(task_id="test", dag=dag, subdag=subdag_bad1) - with pytest.raises(AirflowException): - SubDagOperator(task_id="test", dag=dag, subdag=subdag_bad2) - with pytest.raises(AirflowException): - SubDagOperator(task_id="test", dag=dag, subdag=subdag_bad3) - - def test_subdag_in_context_manager(self): - """ - Creating a sub DAG within a main DAG's context manager - """ - with DAG("parent", default_args=default_args) as dag: - subdag = DAG("parent.test", default_args=default_args) - with pytest.warns(RemovedInAirflow3Warning, match=WARNING_MESSAGE): - op = SubDagOperator(task_id="test", subdag=subdag) - - assert op.dag == dag - assert op.subdag == subdag - - def test_subdag_pools(self, dag_maker): - """ - Subdags and subdag tasks can't both have a pool with 1 slot - """ - with dag_maker("parent", default_args=default_args, serialized=True) as dag: - pass - - pool_1 = airflow.models.Pool(pool="test_pool_1", slots=1, include_deferred=False) - pool_10 = airflow.models.Pool(pool="test_pool_10", slots=10, include_deferred=False) - dag_maker.session.add(pool_1) - dag_maker.session.add(pool_10) - dag_maker.session.commit() - - with dag_maker("parent.child", default_args=default_args, serialized=True) as subdag: - EmptyOperator(task_id="dummy", pool="test_pool_1") - - with pytest.raises(AirflowException): - SubDagOperator(task_id="child", dag=dag, subdag=subdag, pool="test_pool_1") - - # recreate dag because failed subdagoperator was already added - dag = DAG("parent", default_args=default_args) - with pytest.warns(RemovedInAirflow3Warning, match=WARNING_MESSAGE): - SubDagOperator(task_id="child", dag=dag, subdag=subdag, pool="test_pool_10") - - dag_maker.session.delete(pool_1) - dag_maker.session.delete(pool_10) - dag_maker.session.commit() - - def test_subdag_pools_no_possible_conflict(self): - """ - Subdags and subdag tasks with no pool overlap, should not to query - pools - """ - dag = DAG("parent", default_args=default_args) - subdag = DAG("parent.child", default_args=default_args) - - session = airflow.settings.Session() - pool_1 = airflow.models.Pool(pool="test_pool_1", slots=1, include_deferred=False) - pool_10 = airflow.models.Pool(pool="test_pool_10", slots=10, include_deferred=False) - session.add(pool_1) - session.add(pool_10) - session.commit() - - EmptyOperator(task_id="dummy", dag=subdag, pool="test_pool_10") - - mock_session = Mock() - with pytest.warns(RemovedInAirflow3Warning, match=WARNING_MESSAGE): - SubDagOperator(task_id="child", dag=dag, subdag=subdag, pool="test_pool_1", session=mock_session) - assert not mock_session.query.called - - session.delete(pool_1) - session.delete(pool_10) - session.commit() - - def test_execute_create_dagrun_wait_until_success(self): - """ - When SubDagOperator executes, it creates a DagRun if there is no existing one - and wait until the DagRun succeeds. - """ - dag = DAG("parent", default_args=default_args) - subdag = DAG("parent.test", default_args=default_args) - with pytest.warns(RemovedInAirflow3Warning, match=WARNING_MESSAGE): - subdag_task = SubDagOperator(task_id="test", subdag=subdag, dag=dag, poke_interval=1) - - subdag.create_dagrun = Mock() - subdag.create_dagrun.return_value = self.dag_run_running - - subdag_task._get_dagrun = Mock() - subdag_task._get_dagrun.side_effect = [None, self.dag_run_success, self.dag_run_success] - - context = { - "data_interval_start": None, - "data_interval_end": None, - "execution_date": DEFAULT_DATE, - } - - subdag_task.pre_execute(context=context) - subdag_task.execute(context=context) - subdag_task.post_execute(context=context) - - subdag.create_dagrun.assert_called_once_with( - run_type=DagRunType.SCHEDULED, - execution_date=DEFAULT_DATE, - data_interval=None, - conf=None, - state=State.RUNNING, - external_trigger=True, - ) - - assert 3 == subdag_task._get_dagrun.call_count - - def test_execute_create_dagrun_with_conf(self): - """ - When SubDagOperator executes, it creates a DagRun if there is no existing one - and wait until the DagRun succeeds. - """ - conf = {"key": "value"} - dag = DAG("parent", default_args=default_args) - subdag = DAG("parent.test", default_args=default_args) - with pytest.warns(RemovedInAirflow3Warning, match=WARNING_MESSAGE): - subdag_task = SubDagOperator(task_id="test", subdag=subdag, dag=dag, poke_interval=1, conf=conf) - - subdag.create_dagrun = Mock() - subdag.create_dagrun.return_value = self.dag_run_running - - subdag_task._get_dagrun = Mock() - subdag_task._get_dagrun.side_effect = [None, self.dag_run_success, self.dag_run_success] - - context = { - "data_interval_start": None, - "data_interval_end": None, - "execution_date": DEFAULT_DATE, - } - - subdag_task.pre_execute(context=context) - subdag_task.execute(context=context) - subdag_task.post_execute(context=context) - - subdag.create_dagrun.assert_called_once_with( - run_type=DagRunType.SCHEDULED, - execution_date=DEFAULT_DATE, - data_interval=None, - conf=conf, - state=State.RUNNING, - external_trigger=True, - ) - - assert 3 == subdag_task._get_dagrun.call_count - - def test_execute_dagrun_failed(self): - """ - When the DagRun failed during the execution, it raises an Airflow Exception. - """ - dag = DAG("parent", default_args=default_args) - subdag = DAG("parent.test", default_args=default_args) - with pytest.warns(RemovedInAirflow3Warning, match=WARNING_MESSAGE): - subdag_task = SubDagOperator(task_id="test", subdag=subdag, dag=dag, poke_interval=1) - - subdag.create_dagrun = Mock() - subdag.create_dagrun.return_value = self.dag_run_running - - subdag_task._get_dagrun = Mock() - subdag_task._get_dagrun.side_effect = [None, self.dag_run_failed, self.dag_run_failed] - - context = { - "data_interval_start": None, - "data_interval_end": None, - "execution_date": DEFAULT_DATE, - } - - subdag_task.pre_execute(context=context) - subdag_task.execute(context=context) - with pytest.raises(AirflowException): - subdag_task.post_execute(context=context) - - def test_execute_skip_if_dagrun_success(self): - """ - When there is an existing DagRun in SUCCESS state, skip the execution. - """ - dag = DAG("parent", default_args=default_args) - subdag = DAG("parent.test", default_args=default_args) - - subdag.create_dagrun = Mock() - with pytest.warns(RemovedInAirflow3Warning, match=WARNING_MESSAGE): - subdag_task = SubDagOperator(task_id="test", subdag=subdag, dag=dag, poke_interval=1) - subdag_task._get_dagrun = Mock() - subdag_task._get_dagrun.return_value = self.dag_run_success - - context = { - "data_interval_start": None, - "data_interval_end": None, - "execution_date": DEFAULT_DATE, - } - - subdag_task.pre_execute(context=context) - subdag_task.execute(context=context) - subdag_task.post_execute(context=context) - - subdag.create_dagrun.assert_not_called() - assert 3 == subdag_task._get_dagrun.call_count - - @pytest.mark.skip_if_database_isolation_mode # this uses functions which operate directly on DB - def test_rerun_failed_subdag(self, dag_maker): - """ - When there is an existing DagRun with failed state, reset the DagRun and the - corresponding TaskInstances - """ - with create_session() as session: - with dag_maker("parent.test", default_args=default_args, session=session) as subdag: - dummy_task = EmptyOperator(task_id="dummy") - sub_dagrun = dag_maker.create_dagrun( - run_type=DagRunType.SCHEDULED, - execution_date=DEFAULT_DATE, - state=State.FAILED, - external_trigger=True, - ) - - (dummy_task_instance,) = sub_dagrun.task_instances - dummy_task_instance.refresh_from_task(dummy_task) - dummy_task_instance.state == State.FAILED - - with dag_maker("parent", default_args=default_args, session=session), pytest.warns( - RemovedInAirflow3Warning, match=WARNING_MESSAGE - ): - subdag_task = SubDagOperator(task_id="test", subdag=subdag, poke_interval=1) - dag_maker.create_dagrun(execution_date=DEFAULT_DATE, run_type=DagRunType.SCHEDULED) - - subdag_task._reset_dag_run_and_task_instances(sub_dagrun, execution_date=DEFAULT_DATE) - - dummy_task_instance.refresh_from_db() - assert dummy_task_instance.state == State.NONE - - sub_dagrun.refresh_from_db() - assert sub_dagrun.state == State.RUNNING - - @pytest.mark.parametrize( - "propagate_option, states, skip_parent", - [ - (SkippedStatePropagationOptions.ALL_LEAVES, [State.SKIPPED, State.SKIPPED], True), - (SkippedStatePropagationOptions.ALL_LEAVES, [State.SKIPPED, State.SUCCESS], False), - (SkippedStatePropagationOptions.ANY_LEAF, [State.SKIPPED, State.SUCCESS], True), - (SkippedStatePropagationOptions.ANY_LEAF, [State.FAILED, State.SKIPPED], True), - (None, [State.SKIPPED, State.SKIPPED], False), - ], - ) - @mock.patch("airflow.operators.subdag.SubDagOperator.skip") - @mock.patch("airflow.operators.subdag.get_task_instance") - def test_subdag_with_propagate_skipped_state( - self, - mock_get_task_instance, - mock_skip, - dag_maker, - propagate_option, - states, - skip_parent, - ): - """ - Tests that skipped state of leaf tasks propagates to the parent dag. - Note that the skipped state propagation only takes affect when the dagrun's state is SUCCESS. - """ - with dag_maker("parent.test", default_args=default_args) as subdag: - dummy_subdag_tasks = [EmptyOperator(task_id=f"dummy_subdag_{i}") for i in range(len(states))] - dag_maker.create_dagrun(execution_date=DEFAULT_DATE) - - with dag_maker("parent", default_args=default_args): - with pytest.warns(RemovedInAirflow3Warning, match=WARNING_MESSAGE): - subdag_task = SubDagOperator( - task_id="test", - subdag=subdag, - poke_interval=1, - propagate_skipped_state=propagate_option, - ) - dummy_dag_task = EmptyOperator(task_id="dummy_dag") - subdag_task >> dummy_dag_task - dag_run = dag_maker.create_dagrun(execution_date=DEFAULT_DATE) - - subdag_task._get_dagrun = Mock(return_value=self.dag_run_success) - - mock_get_task_instance.side_effect = [ - TaskInstance(task=task, run_id=dag_run.run_id, state=state) - for task, state in zip(dummy_subdag_tasks, states) - ] - - context = { - "execution_date": DEFAULT_DATE, - "dag_run": dag_run, - "task": subdag_task, - "ti": mock.MagicMock(map_index=-1), - } - subdag_task.post_execute(context) - - if skip_parent: - mock_skip.assert_called_once_with( - context["dag_run"], context["execution_date"], [dummy_dag_task], map_index=-1 - ) - else: - mock_skip.assert_not_called() - - def test_deprecation_warning(self): - dag = DAG("parent", default_args=default_args) - subdag = DAG("parent.test", default_args=default_args) - warning_message = """This class is deprecated. Please use `airflow.utils.task_group.TaskGroup`.""" - - with pytest.warns(DeprecationWarning) as warnings: - SubDagOperator(task_id="test", subdag=subdag, dag=dag) - assert warning_message == str(warnings[0].message) diff --git a/tests/operators/test_trigger_dagrun.py b/tests/operators/test_trigger_dagrun.py index 349bba463800f..9ec22e7e7a3de 100644 --- a/tests/operators/test_trigger_dagrun.py +++ b/tests/operators/test_trigger_dagrun.py @@ -76,7 +76,7 @@ def setup_method(self): def re_sync_triggered_dag_to_db(self, dag, dag_maker): TracebackSessionForTests.set_allow_db_access(dag_maker.session, True) dagbag = DagBag(self.f_name, read_dags_from_db=False, include_examples=False) - dagbag.bag_dag(dag, root_dag=dag) + dagbag.bag_dag(dag) dagbag.sync_to_db(session=dag_maker.session) TracebackSessionForTests.set_allow_db_access(dag_maker.session, False) diff --git a/tests/providers/fab/auth_manager/test_security.py b/tests/providers/fab/auth_manager/test_security.py index 5ff7f34d018c0..6dd48cc6e6e29 100644 --- a/tests/providers/fab/auth_manager/test_security.py +++ b/tests/providers/fab/auth_manager/test_security.py @@ -1008,46 +1008,6 @@ def test_prefixed_dag_id_is_deprecated(security_manager): security_manager.prefixed_dag_id("hello") -def test_parent_dag_access_applies_to_subdag(app, security_manager, assert_user_has_dag_perms, session): - username = "dag_permission_user" - role_name = "dag_permission_role" - parent_dag_name = "parent_dag" - subdag_name = parent_dag_name + ".subdag" - subsubdag_name = parent_dag_name + ".subdag.subsubdag" - with app.app_context(): - mock_roles = [ - { - "role": role_name, - "perms": [ - (permissions.ACTION_CAN_READ, f"DAG:{parent_dag_name}"), - (permissions.ACTION_CAN_EDIT, f"DAG:{parent_dag_name}"), - ], - } - ] - with create_user_scope( - app, - username=username, - role_name=role_name, - ) as user: - dag1 = DagModel(dag_id=parent_dag_name) - dag2 = DagModel(dag_id=subdag_name, is_subdag=True, root_dag_id=parent_dag_name) - dag3 = DagModel(dag_id=subsubdag_name, is_subdag=True, root_dag_id=parent_dag_name) - session.add_all([dag1, dag2, dag3]) - session.commit() - security_manager.bulk_sync_roles(mock_roles) - for _ in [dag1, dag2, dag3]: - security_manager._sync_dag_view_permissions( - parent_dag_name, access_control={role_name: READ_WRITE} - ) - - assert_user_has_dag_perms(perms=["GET", "PUT"], dag_id=parent_dag_name, user=user) - assert_user_has_dag_perms(perms=["GET", "PUT"], dag_id=parent_dag_name + ".subdag", user=user) - assert_user_has_dag_perms( - perms=["GET", "PUT"], dag_id=parent_dag_name + ".subdag.subsubdag", user=user - ) - session.query(DagModel).delete() - - def test_permissions_work_for_dags_with_dot_in_dagname( app, security_manager, assert_user_has_dag_perms, assert_user_does_not_have_dag_perms, session ): diff --git a/tests/sensors/test_external_task_sensor.py b/tests/sensors/test_external_task_sensor.py index 8c5ed3c42920b..91b224a0de44d 100644 --- a/tests/sensors/test_external_task_sensor.py +++ b/tests/sensors/test_external_task_sensor.py @@ -1205,7 +1205,7 @@ def dag_bag_ext(): task_a_3 >> task_b_3 for dag in [dag_0, dag_1, dag_2, dag_3]: - dag_bag.bag_dag(dag=dag, root_dag=dag) + dag_bag.bag_dag(dag=dag) yield dag_bag @@ -1254,7 +1254,7 @@ def dag_bag_parent_child(): ) for dag in [dag_0, dag_1]: - dag_bag.bag_dag(dag=dag, root_dag=dag) + dag_bag.bag_dag(dag=dag) yield dag_bag @@ -1479,7 +1479,7 @@ def _factory(depth: int) -> DagBag: task_a >> task_b for dag in dags: - dag_bag.bag_dag(dag=dag, root_dag=dag) + dag_bag.bag_dag(dag=dag) return dag_bag @@ -1531,8 +1531,8 @@ def dag_bag_multiple(): dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) daily_dag = DAG("daily_dag", start_date=DEFAULT_DATE, schedule="@daily") agg_dag = DAG("agg_dag", start_date=DEFAULT_DATE, schedule="@daily") - dag_bag.bag_dag(dag=daily_dag, root_dag=daily_dag) - dag_bag.bag_dag(dag=agg_dag, root_dag=agg_dag) + dag_bag.bag_dag(dag=daily_dag) + dag_bag.bag_dag(dag=agg_dag) daily_task = EmptyOperator(task_id="daily_tas", dag=daily_dag) @@ -1603,7 +1603,7 @@ def dag_bag_head_tail(): ) head >> body >> tail - dag_bag.bag_dag(dag=dag, root_dag=dag) + dag_bag.bag_dag(dag=dag) return dag_bag @@ -1687,7 +1687,7 @@ def fake_task(x: int): ) head >> body >> tail - dag_bag.bag_dag(dag=dag, root_dag=dag) + dag_bag.bag_dag(dag=dag) return dag_bag diff --git a/tests/serialization/test_dag_serialization.py b/tests/serialization/test_dag_serialization.py index e9c8ceaf03979..8ef09349275ea 100644 --- a/tests/serialization/test_dag_serialization.py +++ b/tests/serialization/test_dag_serialization.py @@ -364,9 +364,6 @@ def collect_dags(dag_folder=None): if any([directory.startswith(excluded_pattern) for excluded_pattern in excluded_patterns]): continue dags.update(make_example_dags(directory)) - - # Filter subdags as they are stored in same row in Serialized Dag table - dags = {dag_id: dag for dag_id, dag in dags.items() if not dag.is_subdag} return dags @@ -640,7 +637,6 @@ def validate_deserialized_task( # Checked separately "_task_type", "_operator_name", - "subdag", # Type is excluded, so don't check it "_log", # List vs tuple. Check separately @@ -714,14 +710,6 @@ def validate_deserialized_task( original_partial_kwargs = {**default_partial_kwargs, **task.partial_kwargs} assert serialized_partial_kwargs == original_partial_kwargs - # Check that for Deserialized task, task.subdag is None for all other Operators - # except for the SubDagOperator where task.subdag is an instance of DAG object - if task.task_type == "SubDagOperator": - assert serialized_task.subdag is not None - assert isinstance(serialized_task.subdag, DAG) - else: - assert serialized_task.subdag is None - @pytest.mark.parametrize( "dag_start_date, task_start_date, expected_task_start_date", [ @@ -1254,7 +1242,6 @@ def test_dag_serialized_fields_with_schema(self): # The parameters we add manually in Serialization need to be ignored ignored_keys: set = { - "is_subdag", "tasks", "has_on_success_callback", "has_on_failure_callback", diff --git a/tests/utils/test_cli_util.py b/tests/utils/test_cli_util.py index 395db77e0392f..25003eec6d0eb 100644 --- a/tests/utils/test_cli_util.py +++ b/tests/utils/test_cli_util.py @@ -83,12 +83,9 @@ def test_process_subdir_path_with_placeholder(self): assert os.path.join(settings.DAGS_FOLDER, "abc") == cli.process_subdir("DAGS_FOLDER/abc") def test_get_dags(self): - dags = cli.get_dags(None, "example_subdag_operator") + dags = cli.get_dags(None, "example_bash_operator") assert len(dags) == 1 - dags = cli.get_dags(None, "subdag", True) - assert len(dags) > 1 - with pytest.raises(AirflowException): cli.get_dags(None, "foobar", True) diff --git a/tests/www/views/test_views.py b/tests/www/views/test_views.py index 44f11d3f033a5..ae18bdc943981 100644 --- a/tests/www/views/test_views.py +++ b/tests/www/views/test_views.py @@ -367,7 +367,7 @@ def get_task_instance(session, task): session.commit() test_app.dag_bag = DagBag(dag_folder="/dev/null", include_examples=False) - test_app.dag_bag.bag_dag(dag=dag, root_dag=dag) + test_app.dag_bag.bag_dag(dag=dag) with test_app.test_request_context(): view = Airflow() @@ -469,7 +469,7 @@ def get_task_instance(session, task): session.commit() test_app.dag_bag = DagBag(dag_folder="/dev/null", include_examples=False) - test_app.dag_bag.bag_dag(dag=dag, root_dag=dag) + test_app.dag_bag.bag_dag(dag=dag) with test_app.test_request_context(): view = Airflow() diff --git a/tests/www/views/test_views_acl.py b/tests/www/views/test_views_acl.py index 17700749f6e32..400ac2a6bf384 100644 --- a/tests/www/views/test_views_acl.py +++ b/tests/www/views/test_views_acl.py @@ -147,7 +147,7 @@ def init_dagruns(acl_app, reset_dagruns): start_date=timezone.utcnow(), state=State.RUNNING, ) - acl_app.dag_bag.get_dag("example_subdag_operator").create_dagrun( + acl_app.dag_bag.get_dag("example_python_operator").create_dagrun( run_type=DagRunType.SCHEDULED, execution_date=DEFAULT_DATE, start_date=timezone.utcnow(), @@ -238,14 +238,14 @@ def client_all_dags(acl_app, user_all_dags): def test_index_for_all_dag_user(client_all_dags): # The all dag user can access/view all dags. resp = client_all_dags.get("/", follow_redirects=True) - check_content_in_response("example_subdag_operator", resp) + check_content_in_response("example_python_operator", resp) check_content_in_response("example_bash_operator", resp) def test_index_failure(dag_test_client): # This user can only access/view example_bash_operator dag. resp = dag_test_client.get("/", follow_redirects=True) - check_content_not_in_response("example_subdag_operator", resp) + check_content_not_in_response("example_python_operator", resp) def test_dag_autocomplete_success(client_all_dags): @@ -376,12 +376,12 @@ def test_dag_stats_success(client_all_dags_dagruns): def test_task_stats_failure(dag_test_client): resp = dag_test_client.post("task_stats", follow_redirects=True) - check_content_not_in_response("example_subdag_operator", resp) + check_content_not_in_response("example_python_operator", resp) def test_dag_stats_success_for_all_dag_user(client_all_dags_dagruns): resp = client_all_dags_dagruns.post("dag_stats", follow_redirects=True) - check_content_in_response("example_subdag_operator", resp) + check_content_in_response("example_python_operator", resp) check_content_in_response("example_bash_operator", resp) @@ -413,18 +413,18 @@ def client_all_dags_dagruns_tis(acl_app, user_all_dags_dagruns_tis): def test_task_stats_empty_success(client_all_dags_dagruns_tis): resp = client_all_dags_dagruns_tis.post("task_stats", follow_redirects=True) check_content_in_response("example_bash_operator", resp) - check_content_in_response("example_subdag_operator", resp) + check_content_in_response("example_python_operator", resp) @pytest.mark.parametrize( "dags_to_run, unexpected_dag_ids", [ ( - ["example_subdag_operator"], + ["example_python_operator"], ["example_bash_operator", "example_xcom"], ), ( - ["example_subdag_operator", "example_bash_operator"], + ["example_python_operator", "example_bash_operator"], ["example_xcom"], ), ], @@ -484,7 +484,7 @@ def test_code_failure(dag_test_client): @pytest.mark.parametrize( "dag_id", - ["example_bash_operator", "example_subdag_operator"], + ["example_bash_operator", "example_python_operator"], ) def test_code_success_for_all_dag_user(client_all_dags_codes, dag_id): url = f"code?dag_id={dag_id}" @@ -494,7 +494,7 @@ def test_code_success_for_all_dag_user(client_all_dags_codes, dag_id): @pytest.mark.parametrize( "dag_id", - ["example_bash_operator", "example_subdag_operator"], + ["example_bash_operator", "example_python_operator"], ) def test_dag_details_success_for_all_dag_user(client_all_dags_dagruns, dag_id): url = f"dag_details?dag_id={dag_id}" @@ -673,7 +673,7 @@ def test_blocked_success(client_all_dags_dagruns): def test_blocked_success_for_all_dag_user(all_dag_user_client): resp = all_dag_user_client.post("blocked") check_content_in_response("example_bash_operator", resp) - check_content_in_response("example_subdag_operator", resp) + check_content_in_response("example_python_operator", resp) def test_blocked_viewer(viewer_client): @@ -685,11 +685,11 @@ def test_blocked_viewer(viewer_client): "dags_to_block, unexpected_dag_ids", [ ( - ["example_subdag_operator"], + ["example_python_operator"], ["example_bash_operator", "example_xcom"], ), ( - ["example_subdag_operator", "example_bash_operator"], + ["example_python_operator", "example_bash_operator"], ["example_xcom"], ), ], diff --git a/tests/www/views/test_views_blocked.py b/tests/www/views/test_views_blocked.py deleted file mode 100644 index c3e8cd4e88cf1..0000000000000 --- a/tests/www/views/test_views_blocked.py +++ /dev/null @@ -1,90 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -import pytest - -from airflow.models import DagModel -from airflow.models.dagbag import DagBag -from airflow.models.serialized_dag import SerializedDagModel -from airflow.operators.empty import EmptyOperator -from airflow.operators.subdag import SubDagOperator -from airflow.utils import timezone -from airflow.utils.session import create_session -from airflow.utils.state import State -from tests.test_utils.db import clear_db_runs - -pytestmark = pytest.mark.db_test - - -@pytest.fixture -def running_subdag(admin_client, dag_maker): - with dag_maker(dag_id="running_dag.subdag") as subdag: - EmptyOperator(task_id="empty") - - with pytest.deprecated_call(), dag_maker(dag_id="running_dag") as dag: - SubDagOperator(task_id="subdag", subdag=subdag) - - dag_bag = DagBag(include_examples=False) - dag_bag.bag_dag(dag, root_dag=dag) - - with create_session() as session: - # This writes both DAGs to DagModel, but only serialize the parent DAG. - dag_bag.sync_to_db(session=session) - - # Simulate triggering the SubDagOperator to run the subdag. - logical_date = timezone.datetime(2016, 1, 1) - subdag.create_dagrun( - run_id="blocked_run_example_bash_operator", - state=State.RUNNING, - execution_date=logical_date, - data_interval=(logical_date, logical_date), - start_date=timezone.datetime(2016, 1, 1), - session=session, - ) - - # Now delete the parent DAG but leave the subdag. - session.query(DagModel).filter(DagModel.dag_id == dag.dag_id).delete() - session.query(SerializedDagModel).filter(SerializedDagModel.dag_id == dag.dag_id).delete() - - yield subdag - - with create_session() as session: - session.query(DagModel).filter(DagModel.dag_id == subdag.dag_id).delete() - clear_db_runs() - - -def test_blocked_subdag_success(admin_client, running_subdag): - """Test the /blocked endpoint works when a DAG is deleted. - - When a DAG is bagged, it is written to both DagModel and SerializedDagModel, - but its subdags are only written to DagModel (without serialization). Thus, - ``DagBag.get_dag(subdag_id)`` would raise ``SerializedDagNotFound`` if the - subdag was not previously bagged in the dagbag (perhaps due to its root DAG - being deleted). ``DagBag.get_dag()`` calls should catch the exception and - properly handle this situation. - """ - resp = admin_client.post("/blocked", data={"dag_ids": [running_subdag.dag_id]}) - assert resp.status_code == 200 - assert resp.json == [ - { - "dag_id": running_subdag.dag_id, - "active_dag_run": 1, - "max_active_runs": 0, # Default value for an unserialized DAG. - }, - ] diff --git a/tests/www/views/test_views_decorators.py b/tests/www/views/test_views_decorators.py index f10b3d66847f2..fb095b78d2fd0 100644 --- a/tests/www/views/test_views_decorators.py +++ b/tests/www/views/test_views_decorators.py @@ -42,18 +42,13 @@ def bash_dag(dagbag): return dagbag.get_dag("example_bash_operator") -@pytest.fixture(scope="module") -def sub_dag(dagbag): - return dagbag.get_dag("example_subdag_operator") - - @pytest.fixture(scope="module") def xcom_dag(dagbag): return dagbag.get_dag("example_xcom") @pytest.fixture(autouse=True) -def dagruns(bash_dag, sub_dag, xcom_dag): +def dagruns(bash_dag, xcom_dag): bash_dagrun = bash_dag.create_dagrun( run_type=DagRunType.SCHEDULED, execution_date=EXAMPLE_DAG_DEFAULT_DATE, @@ -62,14 +57,6 @@ def dagruns(bash_dag, sub_dag, xcom_dag): state=State.RUNNING, ) - sub_dagrun = sub_dag.create_dagrun( - run_type=DagRunType.SCHEDULED, - execution_date=EXAMPLE_DAG_DEFAULT_DATE, - data_interval=(EXAMPLE_DAG_DEFAULT_DATE, EXAMPLE_DAG_DEFAULT_DATE), - start_date=timezone.utcnow(), - state=State.RUNNING, - ) - xcom_dagrun = xcom_dag.create_dagrun( run_type=DagRunType.SCHEDULED, execution_date=EXAMPLE_DAG_DEFAULT_DATE, @@ -78,7 +65,7 @@ def dagruns(bash_dag, sub_dag, xcom_dag): state=State.RUNNING, ) - yield bash_dagrun, sub_dagrun, xcom_dagrun + yield bash_dagrun, xcom_dagrun clear_db_runs() diff --git a/tests/www/views/test_views_log.py b/tests/www/views/test_views_log.py index 2607317c5fccc..56df22eb050a6 100644 --- a/tests/www/views/test_views_log.py +++ b/tests/www/views/test_views_log.py @@ -139,8 +139,8 @@ def dags(log_app, create_dummy_dag, session): ) bag = DagBag(include_examples=False) - bag.bag_dag(dag=dag, root_dag=dag) - bag.bag_dag(dag=dag_removed, root_dag=dag_removed) + bag.bag_dag(dag=dag) + bag.bag_dag(dag=dag_removed) bag.sync_to_db(session=session) log_app.dag_bag = bag diff --git a/tests/www/views/test_views_tasks.py b/tests/www/views/test_views_tasks.py index 4e4b8d27afc83..d0e7c168e59a2 100644 --- a/tests/www/views/test_views_tasks.py +++ b/tests/www/views/test_views_tasks.py @@ -76,14 +76,7 @@ def init_dagruns(app): start_date=timezone.utcnow(), state=State.RUNNING, ) - XCom.set( - key="return_value", - value="{'x':1}", - task_id="runme_0", - dag_id="example_bash_operator", - run_id=DEFAULT_DAGRUN, - ) - app.dag_bag.get_dag("example_subdag_operator").create_dagrun( + app.dag_bag.get_dag("example_python_operator").create_dagrun( run_id=DEFAULT_DAGRUN, run_type=DagRunType.SCHEDULED, execution_date=DEFAULT_DATE, @@ -91,6 +84,13 @@ def init_dagruns(app): start_date=timezone.utcnow(), state=State.RUNNING, ) + XCom.set( + key="return_value", + value="{'x':1}", + task_id="runme_0", + dag_id="example_bash_operator", + run_id=DEFAULT_DAGRUN, + ) app.dag_bag.get_dag("example_xcom").create_dagrun( run_id=DEFAULT_DAGRUN, run_type=DagRunType.SCHEDULED, @@ -200,21 +200,11 @@ def client_ti_without_dag_edit(app): ["runme_1"], id="graph-data", ), - pytest.param( - "object/graph_data?dag_id=example_subdag_operator.section-1", - ["section-1-task-1"], - id="graph-data-subdag", - ), pytest.param( "object/grid_data?dag_id=example_bash_operator", ["runme_1"], id="grid-data", ), - pytest.param( - "object/grid_data?dag_id=example_subdag_operator.section-1", - ["section-1-task-1"], - id="grid-data-subdag", - ), pytest.param( "duration?days=30&dag_id=example_bash_operator", ["example_bash_operator"], @@ -459,22 +449,21 @@ def test_last_dagruns(admin_client): def test_last_dagruns_success_when_selecting_dags(admin_client): resp = admin_client.post( - "last_dagruns", data={"dag_ids": ["example_subdag_operator"]}, follow_redirects=True + "last_dagruns", data={"dag_ids": ["example_python_operator"]}, follow_redirects=True ) assert resp.status_code == 200 stats = json.loads(resp.data.decode("utf-8")) assert "example_bash_operator" not in stats - assert "example_subdag_operator" in stats + assert "example_python_operator" in stats # Multiple resp = admin_client.post( "last_dagruns", - data={"dag_ids": ["example_subdag_operator", "example_bash_operator"]}, + data={"dag_ids": ["example_python_operator", "example_bash_operator"]}, follow_redirects=True, ) stats = json.loads(resp.data.decode("utf-8")) assert "example_bash_operator" in stats - assert "example_subdag_operator" in stats check_content_not_in_response("example_xcom", resp) @@ -972,15 +961,8 @@ def test_task_instance_set_state_failure(admin_client, action): check_content_in_response("Failed to set state", resp) -@pytest.mark.parametrize( - "task_search_tuples", - [ - [("example_xcom", "bash_push"), ("example_bash_operator", "run_this_last")], - [("example_subdag_operator", "some-other-task")], - ], - ids=["multiple_tasks", "one_task"], -) -def test_action_muldelete_task_instance(session, admin_client, task_search_tuples): +def test_action_muldelete_task_instance(session, admin_client): + task_search_tuples = [("example_xcom", "bash_push"), ("example_bash_operator", "run_this_last")] # get task instances to delete tasks_to_delete = [] for task_search_tuple in task_search_tuples: