From 3ff34313efd49bce7e6d7e701d863e9b1f377499 Mon Sep 17 00:00:00 2001 From: Ash Berlin-Taylor Date: Fri, 28 Sep 2018 23:45:29 +0100 Subject: [PATCH] [AIRFLOW-XXX] Speed up DagBagTest cases I noticed that many of the tests of DagBags operate on a specific DAG only, and don't need to load the example or test dags. By not loading the dags we don't need to this shaves about 10-20s of test time. Nothing huge, but every little bit counts. **Before**: + nosetests tests.models.DagBagTest ............ ---------------------------------------------------------------------- Ran 12 tests in 30.855s OK [2018-09-28 22:42:17,846] {settings.py:193} DEBUG - Disposing DB connection pool (PID 1467) real 0m47.582s user 0m8.300s sys 0m4.700s **After**: + nosetests tests.models.DagBagTest ............ ---------------------------------------------------------------------- Ran 12 tests in 7.784s OK [2018-09-28 22:44:39,475] {settings.py:193} DEBUG - Disposing DB connection pool (PID 1490) real 0m30.399s user 0m9.960s sys 0m3.180s --- tests/models.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/tests/models.py b/tests/models.py index 7e3c2929e0f3b..838a47f938eaa 100644 --- a/tests/models.py +++ b/tests/models.py @@ -1159,12 +1159,19 @@ def with_all_tasks_removed(dag): class DagBagTest(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.empty_dir = mkdtemp() + + @classmethod + def tearDownClass(cls): + os.rmdir(cls.empty_dir) def test_get_existing_dag(self): """ test that were're able to parse some example DAGs and retrieve them """ - dagbag = models.DagBag(include_examples=True) + dagbag = models.DagBag(dag_folder=self.empty_dir, include_examples=True) some_expected_dag_ids = ["example_bash_operator", "example_branch_operator"] @@ -1181,7 +1188,7 @@ def test_get_non_existing_dag(self): """ test that retrieving a non existing dag id returns None without crashing """ - dagbag = models.DagBag(include_examples=True) + dagbag = models.DagBag(dag_folder=self.empty_dir, include_examples=False) non_existing_dag_id = "non_existing_dag_id" self.assertIsNone(dagbag.get_dag(non_existing_dag_id)) @@ -1194,7 +1201,7 @@ def test_process_file_that_contains_multi_bytes_char(self): f.write('\u3042'.encode('utf8')) # write multi-byte char (hiragana) f.flush() - dagbag = models.DagBag(include_examples=True) + dagbag = models.DagBag(dag_folder=self.empty_dir, include_examples=False) self.assertEqual([], dagbag.process_file(f.name)) def test_zip_skip_log(self): @@ -1216,7 +1223,7 @@ def test_zip(self): """ test the loading of a DAG within a zip file that includes dependencies """ - dagbag = models.DagBag() + dagbag = models.DagBag(dag_folder=self.empty_dir, include_examples=False) dagbag.process_file(os.path.join(TEST_DAGS_FOLDER, "test_zip.zip")) self.assertTrue(dagbag.get_dag("test_zip_dag")) @@ -1226,7 +1233,7 @@ def test_process_file_cron_validity_check(self): as schedule interval can be identified """ invalid_dag_files = ["test_invalid_cron.py", "test_zip_invalid_cron.zip"] - dagbag = models.DagBag(dag_folder=mkdtemp()) + dagbag = models.DagBag(dag_folder=self.empty_dir, include_examples=False) self.assertEqual(len(dagbag.import_errors), 0) for d in invalid_dag_files: @@ -1290,7 +1297,7 @@ def process_dag(self, create_dag): f.write(source.encode('utf8')) f.flush() - dagbag = models.DagBag(include_examples=False) + dagbag = models.DagBag(dag_folder=self.empty_dir, include_examples=False) found_dags = dagbag.process_file(f.name) return dagbag, found_dags, f.name @@ -1601,7 +1608,7 @@ def test_process_file_with_none(self): """ test that process_file can handle Nones """ - dagbag = models.DagBag(include_examples=True) + dagbag = models.DagBag(dag_folder=self.empty_dir, include_examples=False) self.assertEqual([], dagbag.process_file(None))