2727from mock import MagicMock , PropertyMock
2828
2929from airflow .configuration import conf , mkdir_p
30- from airflow .jobs import DagFileProcessor
31- from airflow .jobs import LocalTaskJob as LJ
30+ from airflow .jobs import DagFileProcessor , LocalTaskJob as LJ
3231from airflow .models import DagBag , TaskInstance as TI
3332from airflow .utils import timezone
34- from airflow .utils .dag_processing import (DagFileProcessorAgent , DagFileProcessorManager ,
35- DagFileStat , SimpleTaskInstance , correct_maybe_zipped )
33+ from airflow .utils .dag_processing import (
34+ DagFileProcessorAgent , DagFileProcessorManager , DagFileStat , SimpleTaskInstance , correct_maybe_zipped ,
35+ )
3636from airflow .utils .db import create_session
3737from airflow .utils .state import State
38+ from tests .test_utils .config import conf_vars
39+ from tests .test_utils .db import clear_db_runs
3840
3941TEST_DAG_FOLDER = os .path .join (
4042 os .path .dirname (os .path .realpath (__file__ )), os .pardir , 'dags' )
@@ -133,6 +135,9 @@ def __exit__(self, *exc_info):
133135
134136
135137class TestDagFileProcessorManager (unittest .TestCase ):
138+ def setUp (self ):
139+ clear_db_runs ()
140+
136141 def test_set_file_paths_when_processor_file_path_not_in_new_file_paths (self ):
137142 manager = DagFileProcessorManager (
138143 dag_directory = 'directory' ,
@@ -202,7 +207,8 @@ def test_find_zombies(self):
202207
203208 manager ._last_zombie_query_time = timezone .utcnow () - timedelta (
204209 seconds = manager ._zombie_threshold_secs + 1 )
205- zombies = manager ._find_zombies ()
210+ manager ._find_zombies ()
211+ zombies = manager ._zombies
206212 self .assertEqual (1 , len (zombies ))
207213 self .assertIsInstance (zombies [0 ], SimpleTaskInstance )
208214 self .assertEqual (ti .dag_id , zombies [0 ].dag_id )
@@ -212,6 +218,87 @@ def test_find_zombies(self):
212218 session .query (TI ).delete ()
213219 session .query (LJ ).delete ()
214220
221+ def test_zombies_are_correctly_passed_to_dag_file_processor (self ):
222+ """
223+ Check that the same set of zombies are passed to the dag
224+ file processors until the next zombie detection logic is invoked.
225+ """
226+ with conf_vars ({('scheduler' , 'max_threads' ): '1' ,
227+ ('core' , 'load_examples' ): 'False' }):
228+ dagbag = DagBag (os .path .join (TEST_DAG_FOLDER , 'test_example_bash_operator.py' ))
229+ with create_session () as session :
230+ session .query (LJ ).delete ()
231+ dag = dagbag .get_dag ('test_example_bash_operator' )
232+ task = dag .get_task (task_id = 'run_this_last' )
233+
234+ ti = TI (task , DEFAULT_DATE , State .RUNNING )
235+ lj = LJ (ti )
236+ lj .state = State .SHUTDOWN
237+ lj .id = 1
238+ ti .job_id = lj .id
239+
240+ session .add (lj )
241+ session .add (ti )
242+ session .commit ()
243+ fake_zombies = [SimpleTaskInstance (ti )]
244+
245+ class FakeDagFIleProcessor (DagFileProcessor ):
246+ # This fake processor will return the zombies it received in constructor
247+ # as its processing result w/o actually parsing anything.
248+ def __init__ (self , file_path , pickle_dags , dag_id_white_list , zombies ):
249+ super (FakeDagFIleProcessor , self ).__init__ (
250+ file_path , pickle_dags , dag_id_white_list , zombies
251+ )
252+
253+ self ._result = zombies , 0
254+
255+ def start (self ):
256+ pass
257+
258+ @property
259+ def start_time (self ):
260+ return DEFAULT_DATE
261+
262+ @property
263+ def pid (self ):
264+ return 1234
265+
266+ @property
267+ def done (self ):
268+ return True
269+
270+ @property
271+ def result (self ):
272+ return self ._result
273+
274+ def processor_factory (file_path , zombies ):
275+ return FakeDagFIleProcessor (file_path ,
276+ False ,
277+ [],
278+ zombies )
279+
280+ test_dag_path = os .path .join (TEST_DAG_FOLDER ,
281+ 'test_example_bash_operator.py' )
282+ async_mode = 'sqlite' not in conf .get ('core' , 'sql_alchemy_conn' )
283+ processor_agent = DagFileProcessorAgent (test_dag_path ,
284+ [],
285+ 1 ,
286+ processor_factory ,
287+ timedelta .max ,
288+ async_mode )
289+ processor_agent .start ()
290+ parsing_result = []
291+ if not async_mode :
292+ processor_agent .heartbeat ()
293+ while not processor_agent .done :
294+ if not async_mode :
295+ processor_agent .wait_until_finished ()
296+ parsing_result .extend (processor_agent .harvest_simple_dags ())
297+
298+ self .assertEqual (len (fake_zombies ), len (parsing_result ))
299+ self .assertEqual (set ([zombie .key for zombie in fake_zombies ]),
300+ set ([result .key for result in parsing_result ]))
301+
215302 @mock .patch ("airflow.jobs.DagFileProcessor.pid" , new_callable = PropertyMock )
216303 @mock .patch ("airflow.jobs.DagFileProcessor.kill" )
217304 def test_kill_timed_out_processors_kill (self , mock_kill , mock_pid ):
0 commit comments