From 52cb6b643c9a9bdcbea2fca5feb1b74779882d88 Mon Sep 17 00:00:00 2001 From: XD-DENG Date: Thu, 27 Sep 2018 21:50:26 +0800 Subject: [PATCH] [AIRFLOW-3104] Add .airflowignore info into doc .airflowignore is a nice feature, but it was not mentioned at all in the documentation. --- airflow/models.py | 8 +++++--- docs/concepts.rst | 29 +++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/airflow/models.py b/airflow/models.py index 6154ca64f480b..6ea4638a51d24 100755 --- a/airflow/models.py +++ b/airflow/models.py @@ -522,10 +522,12 @@ def collect_dags( Given a file path or a folder, this method looks for python modules, imports them and adds them to the dagbag collection. - Note that if a .airflowignore file is found while processing, - the directory, it will behaves much like a .gitignore does, + Note that if a ``.airflowignore`` file is found while processing + the directory, it will behave much like a ``.gitignore``, ignoring files that match any of the regex patterns specified - in the file. **Note**: The patterns in .airflowignore are treated as + in the file. + + **Note**: The patterns in .airflowignore are treated as un-anchored regexes, not shell-like glob patterns. """ start_dttm = timezone.utcnow() diff --git a/docs/concepts.rst b/docs/concepts.rst index 73164772251cc..a30a7026bc414 100644 --- a/docs/concepts.rst +++ b/docs/concepts.rst @@ -863,3 +863,32 @@ do the same, but then it is more to use a virtualenv and pip. to be available on the system if a module needs those. In other words only pure python modules can be packaged. + +.airflowignore +'''''''''''''' + +A ``.airflowignore`` file specifies the directories or files in ``DAG_FOLDER`` +that Airflow should intentionally ignore. Each line in ``.airflowignore`` +specifies a regular expression pattern, and directories or files whose names +(not DAG id) match any of the patterns would be ignored (under the hood, +``re.findall()`` is used to match the pattern). Overall it works like a +``.gitignore`` file. + +``.airflowignore`` file should be put in your ``DAG_FOLDER``. +For example, you can prepare a ``.airflowignore`` file with contents + +.. code:: + + project_a + tenant_[\d] + + +Then files like "project_a_dag_1.py", "TESTING_project_a.py", "tenant_1.py", +"project_a/dag_1.py", and "tenant_1/dag_1.py" in your ``DAG_FOLDER`` would be ignored +(If a directory's name matches any of the patterns, this directory and all its subfolders +would not be scanned by Airflow at all. This improves efficiency of DAG finding). + +The scope of a ``.airflowignore`` file is the directory it is in plus all its subfolders. +You can also prepare ``.airflowignore`` file for a subfolder in ``DAG_FOLDER`` and it +would only be applicable for that subfolder. +