Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 3 additions & 13 deletions airflow/jobs/scheduler_job_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,8 @@
from collections import Counter
from dataclasses import dataclass
from datetime import timedelta
from functools import lru_cache, partial
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, Collection, Iterable, Iterator
from typing import TYPE_CHECKING, Any, Collection, Iterable, Iterator

from sqlalchemy import and_, delete, func, not_, or_, select, text, update
from sqlalchemy.exc import OperationalError
Expand Down Expand Up @@ -1064,12 +1063,8 @@ def _do_scheduling(self, session: Session) -> int:
callback_tuples = self._schedule_all_dag_runs(guard, dag_runs, session)

# Send the callbacks after we commit to ensure the context is up to date when it gets run
# cache saves time during scheduling of many dag_runs for same dag
cached_get_dag: Callable[[str], DAG | None] = lru_cache()(
partial(self.dagbag.get_dag, session=session)
)
for dag_run, callback_to_run in callback_tuples:
dag = cached_get_dag(dag_run.dag_id)
dag = dag_run.dag or self.dagbag.get_dag(dag_run.dag_id, session=session)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For instance, just before this loop are these two calls:

            dag_runs = self._get_next_dagruns_to_examine(DagRunState.RUNNING, session)
            # Bulk fetch the currently active dag runs for the dags we are
            # examining, rather than making one query per DagRun
            callback_tuples = self._schedule_all_dag_runs(guard, dag_runs, session)

Both of those get the dag out of the dagbag which weren't affected by an LRU cache, but every dagrun we have here must have been in the call to _schedule_all_dag_runs.

if dag:
# Sending callbacks there as in standalone_dag_processor they are adding to the database,
# so it must be done outside of prohibit_commit.
Expand Down Expand Up @@ -1373,13 +1368,8 @@ def _update_state(dag: DAG, dag_run: DagRun):
tags={"dag_id": dag.dag_id},
)

# cache saves time during scheduling of many dag_runs for same dag
cached_get_dag: Callable[[str], DAG | None] = lru_cache()(
partial(self.dagbag.get_dag, session=session)
)

for dag_run in dag_runs:
dag = dag_run.dag = cached_get_dag(dag_run.dag_id)
dag = dag_run.dag = self.dagbag.get_dag(dag_run.dag_id, session=session)

if not dag:
self.log.error("DAG '%s' not found in serialized_dag table", dag_run.dag_id)
Expand Down