From 3e08e405bc8947e031d701601a5f6a1d2c19e08a Mon Sep 17 00:00:00 2001 From: Andrew Godwin Date: Fri, 16 Apr 2021 16:16:13 -0600 Subject: [PATCH] An initial rework of the "Concepts" docs --- .../databricks/operators/databricks.py | 4 +- .../operators/ads.rst | 4 +- .../operators/cloud/bigquery_dts.rst | 8 +- .../operators/cloud/cloud_build.rst | 2 +- .../operators/cloud/cloud_memorystore.rst | 20 +- .../operators/cloud/datacatalog.rst | 42 +- .../operators/cloud/datafusion.rst | 20 +- .../operators/cloud/gcs.rst | 2 +- .../operators/cloud/mlengine.rst | 4 +- .../operators/cloud/natural_language.rst | 8 +- .../operators/firebase/firestore.rst | 2 +- .../marketing_platform/analytics.rst | 6 +- .../marketing_platform/campaign_manager.rst | 14 +- .../marketing_platform/display_video.rst | 20 +- .../marketing_platform/search_ads.rst | 6 +- .../operators/suite/sheets.rst | 2 +- .../operators/transfer/gcs_to_gdrive.rst | 2 +- .../operators/transfer/gcs_to_sftp.rst | 2 +- .../operators/transfer/gcs_to_sheets.rst | 2 +- .../operators/transfer/gdrive_to_gcs.rst | 2 +- .../operators/transfer/sftp_to_gcs.rst | 2 +- .../operators/transfer/sheets_to_gcs.rst | 2 +- .../operators.rst | 2 +- docs/apache-airflow/best-practices.rst | 2 +- docs/apache-airflow/concepts.rst | 1708 ----------------- .../concepts/cluster-policies.rst | 92 + docs/apache-airflow/concepts/connections.rst | 37 + docs/apache-airflow/concepts/dags.rst | 587 ++++++ docs/apache-airflow/concepts/index.rst | 54 + docs/apache-airflow/concepts/operators.rst | 143 ++ docs/apache-airflow/concepts/overview.rst | 96 + docs/apache-airflow/concepts/pools.rst | 46 + .../{ => concepts}/scheduler.rst | 2 +- docs/apache-airflow/concepts/sensors.rst | 33 + .../smart-sensors.rst} | 14 +- docs/apache-airflow/concepts/taskflow.rst | 72 + docs/apache-airflow/concepts/tasks.rst | 167 ++ docs/apache-airflow/concepts/variables.rst | 48 + docs/apache-airflow/concepts/xcoms.rst | 49 + docs/apache-airflow/executor/index.rst | 54 +- docs/apache-airflow/faq.rst | 2 +- docs/apache-airflow/howto/connection.rst | 2 +- docs/apache-airflow/howto/custom-operator.rst | 4 +- docs/apache-airflow/howto/operator/bash.rst | 2 +- docs/apache-airflow/howto/operator/index.rst | 2 +- docs/apache-airflow/howto/operator/python.rst | 2 +- docs/apache-airflow/howto/variable.rst | 2 +- docs/apache-airflow/img/arch-diag-basic.png | Bin 16303 -> 16357 bytes docs/apache-airflow/img/basic-dag.png | Bin 0 -> 5393 bytes docs/apache-airflow/img/task_stages.png | Bin 12919 -> 0 bytes docs/apache-airflow/index.rst | 4 +- docs/apache-airflow/installation.rst | 2 +- docs/apache-airflow/macros-ref.rst | 2 +- .../operators-and-hooks-ref.rst | 2 +- docs/apache-airflow/redirects.txt | 5 + .../apache-airflow/security/secrets/index.rst | 4 +- docs/apache-airflow/start/docker.rst | 4 +- docs/apache-airflow/tutorial.rst | 5 +- docs/apache-airflow/tutorial_taskflow_api.rst | 8 +- docs/spelling_wordlist.txt | 4 + 60 files changed, 1592 insertions(+), 1846 deletions(-) delete mode 100644 docs/apache-airflow/concepts.rst create mode 100644 docs/apache-airflow/concepts/cluster-policies.rst create mode 100644 docs/apache-airflow/concepts/connections.rst create mode 100644 docs/apache-airflow/concepts/dags.rst create mode 100644 docs/apache-airflow/concepts/index.rst create mode 100644 docs/apache-airflow/concepts/operators.rst create mode 100644 docs/apache-airflow/concepts/overview.rst create mode 100644 docs/apache-airflow/concepts/pools.rst rename docs/apache-airflow/{ => concepts}/scheduler.rst (99%) create mode 100644 docs/apache-airflow/concepts/sensors.rst rename docs/apache-airflow/{smart-sensor.rst => concepts/smart-sensors.rst} (93%) create mode 100644 docs/apache-airflow/concepts/taskflow.rst create mode 100644 docs/apache-airflow/concepts/tasks.rst create mode 100644 docs/apache-airflow/concepts/variables.rst create mode 100644 docs/apache-airflow/concepts/xcoms.rst mode change 100644 => 100755 docs/apache-airflow/img/arch-diag-basic.png create mode 100755 docs/apache-airflow/img/basic-dag.png delete mode 100644 docs/apache-airflow/img/task_stages.png diff --git a/airflow/providers/databricks/operators/databricks.py b/airflow/providers/databricks/operators/databricks.py index 0a81f3bfd1e47..b365bf441ba26 100644 --- a/airflow/providers/databricks/operators/databricks.py +++ b/airflow/providers/databricks/operators/databricks.py @@ -162,7 +162,7 @@ class DatabricksSubmitRunOperator(BaseOperator): take precedence and override the top level json keys. (templated) .. seealso:: - For more information about templating see :ref:`jinja-templating`. + For more information about templating see :ref:`concepts:jinja-templating`. https://docs.databricks.com/api/latest/jobs.html#runs-submit :type json: dict :param spark_jar_task: The main class and parameters for the JAR task. Note that @@ -394,7 +394,7 @@ class DatabricksRunNowOperator(BaseOperator): take precedence and override the top level json keys. (templated) .. seealso:: - For more information about templating see :ref:`jinja-templating`. + For more information about templating see :ref:`concepts:jinja-templating`. https://docs.databricks.com/api/latest/jobs.html#run-now :type json: dict :param notebook_params: A dict from keys to values for jobs with notebook task, diff --git a/docs/apache-airflow-providers-google/operators/ads.rst b/docs/apache-airflow-providers-google/operators/ads.rst index 95de1614241cb..eee7dcdf485ac 100644 --- a/docs/apache-airflow-providers-google/operators/ads.rst +++ b/docs/apache-airflow-providers-google/operators/ads.rst @@ -43,7 +43,7 @@ To query the Google Ads API and generate a CSV report of the results use :start-after: [START howto_google_ads_to_gcs_operator] :end-before: [END howto_google_ads_to_gcs_operator] -Use :ref:`Jinja templating ` with +Use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.ads.transfers.ads_to_gcs.GoogleAdsToGcsOperator` parameters which allow you to dynamically determine values. The result is saved to :ref:`XCom `, which allows the result to be used by other operators. @@ -62,7 +62,7 @@ To upload Google Ads accounts to Google Cloud Storage bucket use the :start-after: [START howto_ads_list_accounts_operator] :end-before: [END howto_ads_list_accounts_operator] -Use :ref:`Jinja templating ` with +Use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.ads.transfers.ads_to_gcs.GoogleAdsToGcsOperator` parameters which allow you to dynamically determine values. The result is saved to :ref:`XCom `, which allows the result to be used by other operators. diff --git a/docs/apache-airflow-providers-google/operators/cloud/bigquery_dts.rst b/docs/apache-airflow-providers-google/operators/cloud/bigquery_dts.rst index c3e74a2e358b8..ed63ac6de0e2a 100644 --- a/docs/apache-airflow-providers-google/operators/cloud/bigquery_dts.rst +++ b/docs/apache-airflow-providers-google/operators/cloud/bigquery_dts.rst @@ -67,7 +67,7 @@ it will be retrieved from the Google Cloud connection used. Basic usage of the o :start-after: [START howto_bigquery_create_data_transfer] :end-before: [END howto_bigquery_create_data_transfer] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.bigquery_dts.BigQueryCreateDataTransferOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. Additionally, id of the new config is accessible in @@ -90,7 +90,7 @@ Basic usage of the operator: :start-after: [START howto_bigquery_delete_data_transfer] :end-before: [END howto_bigquery_delete_data_transfer] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.bigquery_dts.BigQueryCreateDataTransferOperator` parameters which allows you to dynamically determine values. @@ -111,7 +111,7 @@ Basic usage of the operator: :start-after: [START howto_bigquery_start_transfer] :end-before: [END howto_bigquery_start_transfer] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.bigquery_dts.BigQueryDataTransferServiceStartTransferRunsOperator` parameters which allows you to dynamically determine values. @@ -124,7 +124,7 @@ To check if operation succeeded you can use :start-after: [START howto_bigquery_dts_sensor] :end-before: [END howto_bigquery_dts_sensor] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.sensors.bigquery_dts.BigQueryDataTransferServiceTransferRunSensor` parameters which allows you to dynamically determine values. diff --git a/docs/apache-airflow-providers-google/operators/cloud/cloud_build.rst b/docs/apache-airflow-providers-google/operators/cloud/cloud_build.rst index 72f25eb4f56b4..3861cd9a9554c 100644 --- a/docs/apache-airflow-providers-google/operators/cloud/cloud_build.rst +++ b/docs/apache-airflow-providers-google/operators/cloud/cloud_build.rst @@ -106,7 +106,7 @@ Trigger a build is performed with the :start-after: [START howto_operator_create_build_from_storage] :end-before: [END howto_operator_create_build_from_storage] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.cloud_build.CloudBuildCreateBuildOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. diff --git a/docs/apache-airflow-providers-google/operators/cloud/cloud_memorystore.rst b/docs/apache-airflow-providers-google/operators/cloud/cloud_memorystore.rst index 068f9e65b1fef..33c778ad23b8f 100644 --- a/docs/apache-airflow-providers-google/operators/cloud/cloud_memorystore.rst +++ b/docs/apache-airflow-providers-google/operators/cloud/cloud_memorystore.rst @@ -86,7 +86,7 @@ Create a instance is performed with the :start-after: [START howto_operator_create_instance] :end-before: [END howto_operator_create_instance] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.cloud_memorystore.CloudMemorystoreCreateInstanceOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. @@ -112,7 +112,7 @@ Delete a instance is performed with the :start-after: [START howto_operator_delete_instance] :end-before: [END howto_operator_delete_instance] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.cloud_memorystore.CloudMemorystoreDeleteInstanceOperator` parameters which allows you to dynamically determine values. @@ -130,7 +130,7 @@ Delete a instance is performed with the :start-after: [START howto_operator_export_instance] :end-before: [END howto_operator_export_instance] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.cloud_memorystore.CloudMemorystoreExportInstanceOperator` parameters which allows you to dynamically determine values. @@ -148,7 +148,7 @@ Delete a instance is performed with the :start-after: [START howto_operator_failover_instance] :end-before: [END howto_operator_failover_instance] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.cloud_memorystore.CloudMemorystoreFailoverInstanceOperator` parameters which allows you to dynamically determine values. @@ -166,7 +166,7 @@ Delete a instance is performed with the :start-after: [START howto_operator_get_instance] :end-before: [END howto_operator_get_instance] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.cloud_memorystore.CloudMemorystoreGetInstanceOperator` parameters which allows you to dynamically determine values. @@ -184,7 +184,7 @@ Delete a instance is performed with the :start-after: [START howto_operator_import_instance] :end-before: [END howto_operator_import_instance] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.cloud_memorystore.CloudMemorystoreImportOperator` parameters which allows you to dynamically determine values. @@ -202,7 +202,7 @@ List a instances is performed with the :start-after: [START howto_operator_list_instances] :end-before: [END howto_operator_list_instances] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.cloud_memorystore.CloudMemorystoreListInstancesOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. @@ -227,7 +227,7 @@ Update a instance is performed with the :start-after: [START howto_operator_update_instance] :end-before: [END howto_operator_update_instance] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.cloud_memorystore.CloudMemorystoreUpdateInstanceOperator` parameters which allows you to dynamically determine values. @@ -246,7 +246,7 @@ Scale a instance is performed with the :start-after: [START howto_operator_scale_instance] :end-before: [END howto_operator_scale_instance] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.cloud_memorystore.CloudMemorystoreScaleInstanceOperator` parameters which allows you to dynamically determine values. @@ -266,7 +266,7 @@ Export and delete instance If you want to export data and immediately delete instances then you can use :class:`~airflow.providers.google.cloud.operators.cloud_memorystore.CloudMemorystoreExportAndDeleteInstanceOperator` operator. -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.cloud_memorystore.CloudMemorystoreScaleInstanceOperator` parameters which allows you to dynamically determine values. diff --git a/docs/apache-airflow-providers-google/operators/cloud/datacatalog.rst b/docs/apache-airflow-providers-google/operators/cloud/datacatalog.rst index 314db785c6173..6b18e445850ef 100644 --- a/docs/apache-airflow-providers-google/operators/cloud/datacatalog.rst +++ b/docs/apache-airflow-providers-google/operators/cloud/datacatalog.rst @@ -69,7 +69,7 @@ The ``CloudDataCatalogGetEntryOperator`` use Project ID, Entry Group ID, Entry I :start-after: [START howto_operator_gcp_datacatalog_get_entry] :end-before: [END howto_operator_gcp_datacatalog_get_entry] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogGetEntryOperator` parameters which allows you to dynamically determine values. @@ -89,7 +89,7 @@ The ``CloudDataCatalogLookupEntryOperator`` use the resource name to get the ent :start-after: [START howto_operator_gcp_datacatalog_lookup_entry_linked_resource] :end-before: [END howto_operator_gcp_datacatalog_lookup_entry_linked_resource] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogLookupEntryOperator` parameters which allows you to dynamically determine values. @@ -115,7 +115,7 @@ operator create the entry. :start-after: [START howto_operator_gcp_datacatalog_create_entry_gcs] :end-before: [END howto_operator_gcp_datacatalog_create_entry_gcs] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogCreateEntryOperator` parameters which allows you to dynamically determine values. @@ -149,7 +149,7 @@ operator update the entry. :start-after: [START howto_operator_gcp_datacatalog_update_entry] :end-before: [END howto_operator_gcp_datacatalog_update_entry] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogUpdateEntryOperator` parameters which allows you to dynamically determine values. @@ -167,7 +167,7 @@ operator delete the entry. :start-after: [START howto_operator_gcp_datacatalog_delete_entry] :end-before: [END howto_operator_gcp_datacatalog_delete_entry] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteEntryOperator` parameters which allows you to dynamically determine values. @@ -196,7 +196,7 @@ operator create the entry group. :start-after: [START howto_operator_gcp_datacatalog_create_entry_group] :end-before: [END howto_operator_gcp_datacatalog_create_entry_group] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogCreateEntryGroupOperator` parameters which allows you to dynamically determine values. @@ -230,7 +230,7 @@ operator get the entry group. :start-after: [START howto_operator_gcp_datacatalog_get_entry_group] :end-before: [END howto_operator_gcp_datacatalog_get_entry_group] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogGetEntryGroupOperator` parameters which allows you to dynamically determine values. @@ -256,7 +256,7 @@ operator delete the entry group. :start-after: [START howto_operator_gcp_datacatalog_delete_entry_group] :end-before: [END howto_operator_gcp_datacatalog_delete_entry_group] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteEntryGroupOperator` parameters which allows you to dynamically determine values. @@ -285,7 +285,7 @@ operator get the tag template. :start-after: [START howto_operator_gcp_datacatalog_create_tag_template] :end-before: [END howto_operator_gcp_datacatalog_create_tag_template] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogCreateTagTemplateOperator` parameters which allows you to dynamically determine values. @@ -319,7 +319,7 @@ operator delete the tag template. :start-after: [START howto_operator_gcp_datacatalog_delete_tag_template] :end-before: [END howto_operator_gcp_datacatalog_delete_tag_template] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteTagTemplateOperator` parameters which allows you to dynamically determine values. @@ -338,7 +338,7 @@ operator get the tag template. :start-after: [START howto_operator_gcp_datacatalog_get_tag_template] :end-before: [END howto_operator_gcp_datacatalog_get_tag_template] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogGetTagTemplateOperator` parameters which allows you to dynamically determine values. @@ -364,7 +364,7 @@ operator update the tag template. :start-after: [START howto_operator_gcp_datacatalog_update_tag_template] :end-before: [END howto_operator_gcp_datacatalog_update_tag_template] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogUpdateTagTemplateOperator` parameters which allows you to dynamically determine values. @@ -393,7 +393,7 @@ operator get the tag template. :start-after: [START howto_operator_gcp_datacatalog_create_tag] :end-before: [END howto_operator_gcp_datacatalog_create_tag] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogCreateTagOperator` parameters which allows you to dynamically determine values. @@ -427,7 +427,7 @@ operator update the tag template. :start-after: [START howto_operator_gcp_datacatalog_update_tag_template] :end-before: [END howto_operator_gcp_datacatalog_update_tag_template] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogUpdateTagOperator` parameters which allows you to dynamically determine values. @@ -445,7 +445,7 @@ operator delete the tag template. :start-after: [START howto_operator_gcp_datacatalog_delete_tag_template] :end-before: [END howto_operator_gcp_datacatalog_delete_tag_template] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteTagOperator` parameters which allows you to dynamically determine values. @@ -463,7 +463,7 @@ operator get list of the tags on the entry. :start-after: [START howto_operator_gcp_datacatalog_list_tags] :end-before: [END howto_operator_gcp_datacatalog_list_tags] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogListTagsOperator` parameters which allows you to dynamically determine values. @@ -501,7 +501,7 @@ operator get the tag template field. :start-after: [START howto_operator_gcp_datacatalog_create_tag_template_field] :end-before: [END howto_operator_gcp_datacatalog_create_tag_template_field] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogCreateTagTemplateFieldOperator` parameters which allows you to dynamically determine values. @@ -535,7 +535,7 @@ operator rename the tag template field. :start-after: [START howto_operator_gcp_datacatalog_rename_tag_template_field] :end-before: [END howto_operator_gcp_datacatalog_rename_tag_template_field] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogRenameTagTemplateFieldOperator` parameters which allows you to dynamically determine values. @@ -553,7 +553,7 @@ operator get the tag template field. :start-after: [START howto_operator_gcp_datacatalog_update_tag_template_field] :end-before: [END howto_operator_gcp_datacatalog_update_tag_template_field] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogUpdateTagTemplateFieldOperator` parameters which allows you to dynamically determine values. @@ -572,7 +572,7 @@ operator delete the tag template field. :start-after: [START howto_operator_gcp_datacatalog_delete_tag_template_field] :end-before: [END howto_operator_gcp_datacatalog_delete_tag_template_field] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteTagTemplateFieldOperator` parameters which allows you to dynamically determine values. @@ -593,7 +593,7 @@ The ``query`` parameters should defined using `search syntax ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogSearchCatalogOperator` parameters which allows you to dynamically determine values. diff --git a/docs/apache-airflow-providers-google/operators/cloud/datafusion.rst b/docs/apache-airflow-providers-google/operators/cloud/datafusion.rst index b1527e7e5c020..b32f8a3d0c093 100644 --- a/docs/apache-airflow-providers-google/operators/cloud/datafusion.rst +++ b/docs/apache-airflow-providers-google/operators/cloud/datafusion.rst @@ -50,7 +50,7 @@ To restart Data Fusion instance use: :start-after: [START howto_cloud_data_fusion_restart_instance_operator] :end-before: [END howto_cloud_data_fusion_restart_instance_operator] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datafusion.CloudDataFusionRestartInstanceOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. @@ -69,7 +69,7 @@ To delete Data Fusion instance use: :start-after: [START howto_cloud_data_fusion_delete_instance_operator] :end-before: [END howto_cloud_data_fusion_delete_instance_operator] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datafusion.CloudDataFusionDeleteInstanceOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. @@ -89,7 +89,7 @@ To create Data Fusion instance use: :start-after: [START howto_cloud_data_fusion_create_instance_operator] :end-before: [END howto_cloud_data_fusion_create_instance_operator] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datafusion.CloudDataFusionCreateInstanceOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. @@ -109,7 +109,7 @@ To update Data Fusion instance use: :start-after: [START howto_cloud_data_fusion_update_instance_operator] :end-before: [END howto_cloud_data_fusion_update_instance_operator] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datafusion.CloudDataFusionUpdateInstanceOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. @@ -128,7 +128,7 @@ To retrieve Data Fusion instance use: :start-after: [START howto_cloud_data_fusion_get_instance_operator] :end-before: [END howto_cloud_data_fusion_get_instance_operator] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datafusion.CloudDataFusionGetInstanceOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. @@ -148,7 +148,7 @@ To create Data Fusion pipeline use: :start-after: [START howto_cloud_data_fusion_create_pipeline] :end-before: [END howto_cloud_data_fusion_create_pipeline] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datafusion.CloudDataFusionCreatePipelineOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. @@ -167,7 +167,7 @@ To start Data Fusion pipeline use: :start-after: [START howto_cloud_data_fusion_start_pipeline] :end-before: [END howto_cloud_data_fusion_start_pipeline] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datafusion.CloudDataFusionStartPipelineOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. @@ -186,7 +186,7 @@ To stop Data Fusion pipeline use: :start-after: [START howto_cloud_data_fusion_stop_pipeline] :end-before: [END howto_cloud_data_fusion_stop_pipeline] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datafusion.CloudDataFusionStopPipelineOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. @@ -205,7 +205,7 @@ To delete Data Fusion pipeline use: :start-after: [START howto_cloud_data_fusion_delete_pipeline] :end-before: [END howto_cloud_data_fusion_delete_pipeline] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datafusion.CloudDataFusionDeletePipelineOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. @@ -225,7 +225,7 @@ To list Data Fusion pipelines use: :start-after: [START howto_cloud_data_fusion_list_pipelines] :end-before: [END howto_cloud_data_fusion_list_pipelines] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.datafusion.CloudDataFusionListPipelinesOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. diff --git a/docs/apache-airflow-providers-google/operators/cloud/gcs.rst b/docs/apache-airflow-providers-google/operators/cloud/gcs.rst index eaa4d235ca4e4..5fa330c7a7d6c 100644 --- a/docs/apache-airflow-providers-google/operators/cloud/gcs.rst +++ b/docs/apache-airflow-providers-google/operators/cloud/gcs.rst @@ -155,7 +155,7 @@ It is performed through the :end-before: [END howto_operator_gcs_delete_bucket] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.gcs.GCSDeleteBucketOperator` parameters which allows you to dynamically determine values. diff --git a/docs/apache-airflow-providers-google/operators/cloud/mlengine.rst b/docs/apache-airflow-providers-google/operators/cloud/mlengine.rst index cd986c07eaae1..fe86f5788e5ee 100644 --- a/docs/apache-airflow-providers-google/operators/cloud/mlengine.rst +++ b/docs/apache-airflow-providers-google/operators/cloud/mlengine.rst @@ -79,7 +79,7 @@ must be defined in the operator. :start-after: [START howto_operator_gcp_mlengine_get_model] :end-before: [END howto_operator_gcp_mlengine_get_model] -You can use :ref:`Jinja templating ` with the ``project_id`` and ``model`` +You can use :ref:`Jinja templating ` with the ``project_id`` and ``model`` fields to dynamically determine their values. The result are saved to :ref:`XCom `, allowing them to be used by other operators. In this case, the :class:`~airflow.operators.bash.BashOperator` is used to print the model information. @@ -140,7 +140,7 @@ while specifying the ``model_name`` parameter. :start-after: [START howto_operator_gcp_mlengine_list_versions] :end-before: [END howto_operator_gcp_mlengine_list_versions] -You can use :ref:`Jinja templating ` with the ``project_id`` and ``model`` +You can use :ref:`Jinja templating ` with the ``project_id`` and ``model`` fields to dynamically determine their values. The result are saved to :ref:`XCom `, allowing them to be used by other operators. In this case, the :class:`~airflow.operators.bash.BashOperator` is used to print the version information. diff --git a/docs/apache-airflow-providers-google/operators/cloud/natural_language.rst b/docs/apache-airflow-providers-google/operators/cloud/natural_language.rst index 3d405140cfb16..c3485a7e9b350 100644 --- a/docs/apache-airflow-providers-google/operators/cloud/natural_language.rst +++ b/docs/apache-airflow-providers-google/operators/cloud/natural_language.rst @@ -76,7 +76,7 @@ Entity analysis is performed with the :start-after: [START howto_operator_gcp_natural_language_analyze_entities] :end-before: [END howto_operator_gcp_natural_language_analyze_entities] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.natural_language.CloudNaturalLanguageAnalyzeEntitiesOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. @@ -104,7 +104,7 @@ operator. :start-after: [START howto_operator_gcp_natural_language_analyze_entity_sentiment] :end-before: [END howto_operator_gcp_natural_language_analyze_entity_sentiment] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.natural_language.CloudNaturalLanguageAnalyzeEntitiesOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. @@ -133,7 +133,7 @@ operator. :start-after: [START howto_operator_gcp_natural_language_analyze_sentiment] :end-before: [END howto_operator_gcp_natural_language_analyze_sentiment] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.natural_language.CloudNaturalLanguageAnalyzeSentimentOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. @@ -161,7 +161,7 @@ operator. :start-after: [START howto_operator_gcp_natural_language_analyze_classify_text] :end-before: [END howto_operator_gcp_natural_language_analyze_classify_text] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.operators.natural_language.CloudNaturalLanguageClassifyTextOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. diff --git a/docs/apache-airflow-providers-google/operators/firebase/firestore.rst b/docs/apache-airflow-providers-google/operators/firebase/firestore.rst index afee6107dda44..6267ef0569519 100644 --- a/docs/apache-airflow-providers-google/operators/firebase/firestore.rst +++ b/docs/apache-airflow-providers-google/operators/firebase/firestore.rst @@ -51,7 +51,7 @@ Exports a copy of all or a subset of documents from Google Cloud Firestore to Go :start-after: [START howto_operator_export_database_to_gcs] :end-before: [END howto_operator_export_database_to_gcs] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.firebase.operators.firestore.CloudFirestoreExportDatabaseOperator` parameters which allows you to dynamically determine values. diff --git a/docs/apache-airflow-providers-google/operators/marketing_platform/analytics.rst b/docs/apache-airflow-providers-google/operators/marketing_platform/analytics.rst index 231b72a04e680..964ca51bcebdc 100644 --- a/docs/apache-airflow-providers-google/operators/marketing_platform/analytics.rst +++ b/docs/apache-airflow-providers-google/operators/marketing_platform/analytics.rst @@ -46,7 +46,7 @@ To list accounts from Analytics you can use the :start-after: [START howto_marketing_platform_list_accounts_operator] :end-before: [END howto_marketing_platform_list_accounts_operator] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.operators.analytics.GoogleAnalyticsListAccountsOperator` .. _howto/operator:GoogleAnalyticsGetAdsLinkOperator: @@ -64,7 +64,7 @@ To list web property-Google Ads link you can use the :start-after: [START howto_marketing_platform_get_ads_link_operator] :end-before: [END howto_marketing_platform_get_ads_link_operator] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.operators.analytics.GoogleAnalyticsGetAdsLinkOperator` .. _howto/operator:GoogleAnalyticsRetrieveAdsLinksListOperator: @@ -82,5 +82,5 @@ To list Google Ads links you can use the :start-after: [START howto_marketing_platform_retrieve_ads_links_list_operator] :end-before: [END howto_marketing_platform_retrieve_ads_links_list_operator] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.operators.analytics.GoogleAnalyticsRetrieveAdsLinksListOperator` diff --git a/docs/apache-airflow-providers-google/operators/marketing_platform/campaign_manager.rst b/docs/apache-airflow-providers-google/operators/marketing_platform/campaign_manager.rst index 1613b22c5eb97..67e86fcc5db98 100644 --- a/docs/apache-airflow-providers-google/operators/marketing_platform/campaign_manager.rst +++ b/docs/apache-airflow-providers-google/operators/marketing_platform/campaign_manager.rst @@ -47,7 +47,7 @@ It deletes a report by its unique ID. :start-after: [START howto_campaign_manager_delete_report_operator] :end-before: [END howto_campaign_manager_delete_report_operator] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.operators.campaign_manager.GoogleCampaignManagerDeleteReportOperator` parameters which allows you to dynamically determine values. @@ -65,7 +65,7 @@ allows you to download a Campaign Manager to Google Cloud Storage bucket. :start-after: [START howto_campaign_manager_get_report_operator] :end-before: [END howto_campaign_manager_get_report_operator] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.operators.campaign_manager.GoogleCampaignManagerDownloadReportOperator` parameters which allows you to dynamically determine values. @@ -83,7 +83,7 @@ you can use :class:`~airflow.providers.google.marketing_platform.sensors.campaig :start-after: [START howto_campaign_manager_wait_for_operation] :end-before: [END howto_campaign_manager_wait_for_operation] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.sensors.campaign_manager.GoogleCampaignManagerReportSensor` parameters which allows you to dynamically determine values. @@ -102,7 +102,7 @@ Running this operator creates a new report. :start-after: [START howto_campaign_manager_insert_report_operator] :end-before: [END howto_campaign_manager_insert_report_operator] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.operators.campaign_manager.GoogleCampaignManagerInsertReportOperator` parameters which allows you to dynamically determine values. You can provide report definition using ``.json`` file as this operator supports this template extension. @@ -122,7 +122,7 @@ To run Campaign Manager report you can use the :start-after: [START howto_campaign_manager_run_report_operator] :end-before: [END howto_campaign_manager_run_report_operator] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.operators.campaign_manager.GoogleCampaignManagerRunReportOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. @@ -141,7 +141,7 @@ To insert Campaign Manager conversions you can use the :start-after: [START howto_campaign_manager_insert_conversions] :end-before: [END howto_campaign_manager_insert_conversions] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.operators.campaign_manager.GoogleCampaignManagerBatchInsertConversionsOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. @@ -160,7 +160,7 @@ To update Campaign Manager conversions you can use the :start-after: [START howto_campaign_manager_update_conversions] :end-before: [END howto_campaign_manager_update_conversions] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.operators.campaign_manager.GoogleCampaignManagerBatchUpdateConversionsOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. diff --git a/docs/apache-airflow-providers-google/operators/marketing_platform/display_video.rst b/docs/apache-airflow-providers-google/operators/marketing_platform/display_video.rst index bfdee88dc10e4..c3c1abd36bf3e 100644 --- a/docs/apache-airflow-providers-google/operators/marketing_platform/display_video.rst +++ b/docs/apache-airflow-providers-google/operators/marketing_platform/display_video.rst @@ -43,7 +43,7 @@ To create Display&Video 360 report use :start-after: [START howto_google_display_video_createquery_report_operator] :end-before: [END howto_google_display_video_createquery_report_operator] -Use :ref:`Jinja templating ` with +Use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.operators.display_video.GoogleDisplayVideo360CreateReportOperator` parameters which allow you to dynamically determine values. You can provide body definition using `` .json`` file as this operator supports this template extension. @@ -63,7 +63,7 @@ To delete Display&Video 360 report use :start-after: [START howto_google_display_video_deletequery_report_operator] :end-before: [END howto_google_display_video_deletequery_report_operator] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.operators.display_video.GoogleDisplayVideo360DeleteReportOperator` parameters which allow you to dynamically determine values. @@ -81,7 +81,7 @@ To wait for the report use :start-after: [START howto_google_display_video_wait_report_operator] :end-before: [END howto_google_display_video_wait_report_operator] -Use :ref:`Jinja templating ` with +Use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.sensors.display_video.GoogleDisplayVideo360ReportSensor` parameters which allow you to dynamically determine values. @@ -99,7 +99,7 @@ To download a report to GCS bucket use :start-after: [START howto_google_display_video_getquery_report_operator] :end-before: [END howto_google_display_video_getquery_report_operator] -Use :ref:`Jinja templating ` with +Use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.operators.display_video.GoogleDisplayVideo360DownloadReportOperator` parameters which allow you to dynamically determine values. @@ -118,7 +118,7 @@ To run Display&Video 360 report use :start-after: [START howto_google_display_video_runquery_report_operator] :end-before: [END howto_google_display_video_runquery_report_operator] -Use :ref:`Jinja templating ` with +Use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.operators.display_video.GoogleDisplayVideo360RunReportOperator` parameters which allow you to dynamically determine values. @@ -147,7 +147,7 @@ To download line items in CSV format report use :start-after: [START howto_google_display_video_download_line_items_operator] :end-before: [END howto_google_display_video_download_line_items_operator] -Use :ref:`Jinja templating ` with +Use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.operators.display_video.GoogleDisplayVideo360DownloadLineItemsOperator` parameters which allow you to dynamically determine values. @@ -166,7 +166,7 @@ To run Display&Video 360 uploading line items use :start-after: [START howto_google_display_video_upload_line_items_operator] :end-before: [END howto_google_display_video_upload_line_items_operator] -Use :ref:`Jinja templating ` with +Use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.operators.display_video.GoogleDisplayVideo360UploadLineItemsOperator` parameters which allow you to dynamically determine values. @@ -184,7 +184,7 @@ To create SDF download task use :start-after: [START howto_google_display_video_create_sdf_download_task_operator] :end-before: [END howto_google_display_video_create_sdf_download_task_operator] -Use :ref:`Jinja templating ` with +Use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.operators.display_video.GoogleDisplayVideo360CreateSDFDownloadTaskOperator` parameters which allow you to dynamically determine values. @@ -203,7 +203,7 @@ To save SDF files and save them in the Google Cloud Storage use :start-after: [START howto_google_display_video_save_sdf_in_gcs_operator] :end-before: [END howto_google_display_video_save_sdf_in_gcs_operator] -Use :ref:`Jinja templating ` with +Use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.operators.display_video.GoogleDisplayVideo360SDFtoGCSOperator` parameters which allow you to dynamically determine values. @@ -221,6 +221,6 @@ Wait for SDF operation is executed by: :start-after: [START howto_google_display_video_wait_for_operation_sensor] :end-before: [END howto_google_display_video_wait_for_operation_sensor] -Use :ref:`Jinja templating ` with +Use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.sensors.display_video.GoogleDisplayVideo360GetSDFDownloadOperationSensor` parameters which allow you to dynamically determine values. diff --git a/docs/apache-airflow-providers-google/operators/marketing_platform/search_ads.rst b/docs/apache-airflow-providers-google/operators/marketing_platform/search_ads.rst index 72dee98e26629..d19e8717d5966 100644 --- a/docs/apache-airflow-providers-google/operators/marketing_platform/search_ads.rst +++ b/docs/apache-airflow-providers-google/operators/marketing_platform/search_ads.rst @@ -44,7 +44,7 @@ To insert a Search Ads report use the :start-after: [START howto_search_ads_generate_report_operator] :end-before: [END howto_search_ads_generate_report_operator] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.operators.search_ads.GoogleSearchAdsInsertReportOperator` parameters which allows you to dynamically determine values. You can provide report definition using `` .json`` file as this operator supports this template extension. @@ -70,7 +70,7 @@ To wait for a report to be ready for download use :start-after: [START howto_search_ads_get_report_operator] :end-before: [END howto_search_ads_get_report_operator] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.sensors.search_ads.GoogleSearchAdsReportSensor` parameters which allows you to dynamically determine values. @@ -88,7 +88,7 @@ To download a Search Ads report to Google Cloud Storage bucket use the :start-after: [START howto_search_ads_getfile_report_operator] :end-before: [END howto_search_ads_getfile_report_operator] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.operators.search_ads.GoogleSearchAdsDownloadReportOperator` parameters which allows you to dynamically determine values. The result is saved to :ref:`XCom `, which allows it to be used by other operators. diff --git a/docs/apache-airflow-providers-google/operators/suite/sheets.rst b/docs/apache-airflow-providers-google/operators/suite/sheets.rst index df2810cee6fe7..498459e147e21 100644 --- a/docs/apache-airflow-providers-google/operators/suite/sheets.rst +++ b/docs/apache-airflow-providers-google/operators/suite/sheets.rst @@ -55,7 +55,7 @@ To create new spreadsheet you can use the :start-after: [START create_spreadsheet] :end-before: [END create_spreadsheet] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.suite.operators.sheets.GoogleSheetsCreateSpreadsheetOperator`. To get the URL of newly created spreadsheet use XCom value: diff --git a/docs/apache-airflow-providers-google/operators/transfer/gcs_to_gdrive.rst b/docs/apache-airflow-providers-google/operators/transfer/gcs_to_gdrive.rst index 09ed6425dd27f..e7d32dcce106e 100644 --- a/docs/apache-airflow-providers-google/operators/transfer/gcs_to_gdrive.rst +++ b/docs/apache-airflow-providers-google/operators/transfer/gcs_to_gdrive.rst @@ -43,7 +43,7 @@ Operator Transfer files between Google Storage and Google Drive is performed with the :class:`~airflow.providers.google.suite.transfers.gcs_to_gdrive.GCSToGoogleDriveOperator` operator. -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.suite.transfers.gcs_to_gdrive.GCSToGoogleDriveOperator` parameters which allows you to dynamically determine values. diff --git a/docs/apache-airflow-providers-google/operators/transfer/gcs_to_sftp.rst b/docs/apache-airflow-providers-google/operators/transfer/gcs_to_sftp.rst index 66d57a56a62ce..b84e01b50873b 100644 --- a/docs/apache-airflow-providers-google/operators/transfer/gcs_to_sftp.rst +++ b/docs/apache-airflow-providers-google/operators/transfer/gcs_to_sftp.rst @@ -43,7 +43,7 @@ Operator Transfer files between SFTP and Google Storage is performed with the :class:`~airflow.providers.google.cloud.transfers.gcs_to_sftp.GCSToSFTPOperator` operator. -Use :ref:`Jinja templating ` with +Use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.transfers.gcs_to_sftp.GCSToSFTPOperator` to define values dynamically. diff --git a/docs/apache-airflow-providers-google/operators/transfer/gcs_to_sheets.rst b/docs/apache-airflow-providers-google/operators/transfer/gcs_to_sheets.rst index 88330c6dec563..dd709c02b6468 100644 --- a/docs/apache-airflow-providers-google/operators/transfer/gcs_to_sheets.rst +++ b/docs/apache-airflow-providers-google/operators/transfer/gcs_to_sheets.rst @@ -48,5 +48,5 @@ To upload data from Google Cloud Storage to Google Spreadsheet you can use the :start-after: [START upload_gcs_to_sheets] :end-before: [END upload_gcs_to_sheets] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.suite.transfers.gcs_to_sheets.GCSToGoogleSheetsOperator`. diff --git a/docs/apache-airflow-providers-google/operators/transfer/gdrive_to_gcs.rst b/docs/apache-airflow-providers-google/operators/transfer/gdrive_to_gcs.rst index 459721ae71b0f..08ffd97556fb0 100644 --- a/docs/apache-airflow-providers-google/operators/transfer/gdrive_to_gcs.rst +++ b/docs/apache-airflow-providers-google/operators/transfer/gdrive_to_gcs.rst @@ -58,7 +58,7 @@ drive to both the ``folder_id`` and ``drive_id`` parameters. :start-after: [START upload_gdrive_to_gcs] :end-before: [END upload_gdrive_to_gcs] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.transfers.gdrive_to_gcs.GoogleDriveToGCSOperator` parameters which allows you to dynamically determine values. diff --git a/docs/apache-airflow-providers-google/operators/transfer/sftp_to_gcs.rst b/docs/apache-airflow-providers-google/operators/transfer/sftp_to_gcs.rst index dd498d0c01824..a4becde35d2b5 100644 --- a/docs/apache-airflow-providers-google/operators/transfer/sftp_to_gcs.rst +++ b/docs/apache-airflow-providers-google/operators/transfer/sftp_to_gcs.rst @@ -42,7 +42,7 @@ Operator Transfer files between SFTP and Google Storage is performed with the :class:`~airflow.providers.google.cloud.transfers.sftp_to_gcs.SFTPToGCSOperator` operator. -Use :ref:`Jinja templating ` with +Use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.transfers.sftp_to_gcs.SFTPToGCSOperator` to define values dynamically. diff --git a/docs/apache-airflow-providers-google/operators/transfer/sheets_to_gcs.rst b/docs/apache-airflow-providers-google/operators/transfer/sheets_to_gcs.rst index 37c609660357c..bae193d840a3f 100644 --- a/docs/apache-airflow-providers-google/operators/transfer/sheets_to_gcs.rst +++ b/docs/apache-airflow-providers-google/operators/transfer/sheets_to_gcs.rst @@ -48,5 +48,5 @@ To upload data from Google Spreadsheet to Google Cloud Storage you can use the :start-after: [START upload_sheet_to_gcs] :end-before: [END upload_sheet_to_gcs] -You can use :ref:`Jinja templating ` with +You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.cloud.transfers.sheets_to_gcs.GoogleSheetsToGCSOperator`. diff --git a/docs/apache-airflow-providers-jdbc/operators.rst b/docs/apache-airflow-providers-jdbc/operators.rst index 648fc2fc42692..e0da3ff5a8a84 100644 --- a/docs/apache-airflow-providers-jdbc/operators.rst +++ b/docs/apache-airflow-providers-jdbc/operators.rst @@ -88,7 +88,7 @@ each command (default is ``False``) Templating ---------- -You can use :ref:`Jinja templates ` to parameterize +You can use :ref:`Jinja templates ` to parameterize ``sql``. .. exampleinclude:: /../../airflow/providers/jdbc/example_dags/example_jdbc_queries.py diff --git a/docs/apache-airflow/best-practices.rst b/docs/apache-airflow/best-practices.rst index 2d5b409a0a4ea..4827036efa203 100644 --- a/docs/apache-airflow/best-practices.rst +++ b/docs/apache-airflow/best-practices.rst @@ -75,7 +75,7 @@ For example, if we have a task that stores processed data in S3 that task can pu and the downstream tasks can pull the path from XCom and use it to read the data. The tasks should also not store any authentication parameters such as passwords or token inside them. -Where at all possible, use :ref:`Connections ` to store data securely in Airflow backend and retrieve them using a unique connection id. +Where at all possible, use :doc:`Connections ` to store data securely in Airflow backend and retrieve them using a unique connection id. Variables diff --git a/docs/apache-airflow/concepts.rst b/docs/apache-airflow/concepts.rst deleted file mode 100644 index 4712007ad5137..0000000000000 --- a/docs/apache-airflow/concepts.rst +++ /dev/null @@ -1,1708 +0,0 @@ - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -.. _concepts: - -Concepts -######## - -The Airflow platform is a tool for describing, executing, and monitoring -workflows. - -.. _architecture: - -Basic Airflow architecture -'''''''''''''''''''''''''' - -Primarily intended for development use, the basic Airflow architecture with the Local and Sequential executors is an -excellent starting point for understanding the architecture of Apache Airflow. - -.. image:: img/arch-diag-basic.png - - -There are a few components to note: - -* **Metadata Database**: Airflow uses a SQL database to store metadata about the data pipelines being run. In the - diagram above, this is represented as Postgres which is extremely popular with Airflow. - Alternate databases supported with Airflow include MySQL. - -* **Web Server** and **Scheduler**: The Airflow web server and Scheduler are separate processes run (in this case) - on the local machine and interact with the database mentioned above. - -* The **Executor** is shown separately above, since it is commonly discussed within Airflow and in the documentation, but - in reality it is NOT a separate process, but run within the Scheduler. - -* The **Worker(s)** are separate processes which also interact with the other components of the Airflow architecture and - the metadata repository. - -* ``airflow.cfg`` is the Airflow configuration file which is accessed by the Web Server, Scheduler, and Workers. - -* **DAGs** refers to the DAG files containing Python code, representing the data pipelines to be run by Airflow. The - location of these files is specified in the Airflow configuration file, but they need to be accessible by the - Web Server, Scheduler, and Workers. - -Core Ideas -'''''''''' - -DAGs -==== - -In Airflow, a ``DAG`` -- or a Directed Acyclic Graph -- is a collection of all -the tasks you want to run, organized in a way that reflects their relationships -and dependencies. - -A DAG is defined in a Python script, which represents the DAGs structure (tasks -and their dependencies) as code. - -For example, a simple DAG could consist of three tasks: A, B, and C. It could -say that A has to run successfully before B can run, but C can run anytime. It -could say that task A times out after 5 minutes, and B can be restarted up to 5 -times in case it fails. It might also say that the workflow will run every night -at 10pm, but should not start until a certain date. - -In this way, a DAG describes *how* you want to carry out your workflow; but -notice that we haven't said anything about *what* we actually want to do! A, B, -and C could be anything. Maybe A prepares data for B to analyze while C sends an -email. Or perhaps A monitors your location so B can open your garage door while -C turns on your house lights. The important thing is that the DAG isn't -concerned with what its constituent tasks do; its job is to make sure that -whatever they do happens at the right time, or in the right order, or with the -right handling of any unexpected issues. - -DAGs are defined in standard Python files that are placed in Airflow's -``DAG_FOLDER``. Airflow will execute the code in each file to dynamically build -the ``DAG`` objects. You can have as many DAGs as you want, each describing an -arbitrary number of tasks. In general, each one should correspond to a single -logical workflow. - -.. note:: When searching for DAGs, Airflow only considers Python files - that contain the strings "airflow" and "dag" by default (case-insensitive). - To consider all Python files instead, disable the ``DAG_DISCOVERY_SAFE_MODE`` - configuration flag. - -.. _concepts:scope: - -Scope ------ - -Airflow will load any ``DAG`` object it can import from a DAG file. Critically, -that means the DAG must appear in ``globals()``. Consider the following two -DAGs. Only ``dag_1`` will be loaded; the other one only appears in a local -scope. - -.. code-block:: python - - dag_1 = DAG('this_dag_will_be_discovered') - - def my_function(): - dag_2 = DAG('but_this_dag_will_not') - - my_function() - -Sometimes this can be put to good use. For example, a common pattern with -:class:`~airflow.operators.subdag.SubDagOperator` is to define the subdag inside a function so that Airflow -doesn't try to load it as a standalone DAG. - -.. _default-args: - -Default Arguments ------------------ - -If a dictionary of ``default_args`` is passed to a DAG, it will apply them to -any of its operators. This makes it easy to apply a common parameter to many operators without having to type it many times. - -.. code-block:: python - - default_args = { - 'start_date': datetime(2016, 1, 1), - 'owner': 'airflow' - } - - dag = DAG('my_dag', default_args=default_args) - op = DummyOperator(task_id='dummy', dag=dag) - print(op.owner) # airflow - -.. _concepts:context_manager: - -Context Manager ---------------- - -*Added in Airflow 1.8* - -DAGs can be used as context managers to automatically assign new operators to that DAG. - -.. code-block:: python - - with DAG('my_dag', start_date=datetime(2016, 1, 1)) as dag: - op = DummyOperator('op') - - op.dag is dag # True - -.. _concepts:task_flow_api: - -TaskFlow API ------------- - -.. versionadded:: 2.0.0 - -Airflow 2.0 adds a new style of authoring dags called the TaskFlow API which removes a lot of the boilerplate -around creating PythonOperators, managing dependencies between task and accessing XCom values (During -development this feature was called "Functional DAGs", so if you see or hear any references to that, it's the -same thing). - -Outputs and inputs are sent between tasks using :ref:`XCom values `. In addition, you can wrap -functions as tasks using the :ref:`task decorator `. Airflow will also automatically -add dependencies between tasks to ensure that XCom messages are available when operators are executed. - -Example DAG built with the TaskFlow API - -.. code-block:: python - - with DAG( - 'send_server_ip', default_args=default_args, schedule_interval=None - ) as dag: - - # Using default connection as it's set to httpbin.org by default - get_ip = SimpleHttpOperator( - task_id='get_ip', endpoint='get', method='GET', do_xcom_push=True - ) - - @dag.task(multiple_outputs=True) - def prepare_email(raw_json: str) -> Dict[str, str]: - external_ip = json.loads(raw_json)['origin'] - return { - 'subject':f'Server connected from {external_ip}', - 'body': f'Seems like today your server executing Airflow is connected from the external IP {external_ip}
' - } - - email_info = prepare_email(get_ip.output) - - send_email = EmailOperator( - task_id='send_email', - to='example@example.com', - subject=email_info['subject'], - html_content=email_info['body'] - ) - -To retrieve current Task execution context dictionary and use it in the function check: -:ref:`Accessing context `. - -DAG decorator -------------- - -.. versionadded:: 2.0.0 - -In addition to creating DAGs using :ref:`context manager `, in Airflow 2.0 you can also -create DAGs from a function. DAG decorator creates a DAG generator function. Any function decorated with ``@dag`` -returns a DAG object. - -DAG decorator also sets up the parameters you have in the function as DAG params. This allows you to parameterize -your DAGs and set the parameters when triggering the DAG manually. See -:ref:`Passing Parameters when triggering dags ` to learn how to pass parameters when triggering DAGs. - -You can also use the parameters on jinja templates by using the ``{{context.params}}`` dictionary. - -Example DAG with decorator: - -.. exampleinclude:: /../../airflow/example_dags/example_dag_decorator.py - :language: python - :start-after: [START dag_decorator_usage] - :end-before: [END dag_decorator_usage] - -.. note:: Note that Airflow will only load DAGs that appear in ``globals()`` as noted in :ref:`scope section `. - This means you need to make sure to have a variable for your returned DAG in the module scope. - Otherwise Airflow won't detect your decorated DAG. - -.. _concepts:executor_config: - -``executor_config`` -=================== - -The ``executor_config`` is an argument placed into operators that allow Airflow users to override tasks -before launch. Currently this is primarily used by the :class:`KubernetesExecutor`, but will soon be available -for other overrides. - -.. _concepts:dagruns: - -DAG Runs -======== - -A DAG run is an instantiation of a DAG, containing task instances that run for a specific ``execution_date``. - -A DAG run is usually created by the Airflow scheduler, but can also be created by an external trigger. -Multiple DAG runs may be running at once for a particular DAG, each of them having a different ``execution_date``. -For example, we might currently have two DAG runs that are in progress for 2016-01-01 and 2016-01-02 respectively. - -.. _concepts:execution_date: - -execution_date --------------- - -The ``execution_date`` is the *logical* date and time which the DAG Run, and its task instances, are running for. - -This allows task instances to process data for the desired *logical* date & time. -While a task instance or DAG run might have an *actual* start date of now, -their *logical* date might be 3 months ago because we are busy reloading something. - -In the prior example the ``execution_date`` was 2016-01-01 for the first DAG Run and 2016-01-02 for the second. - -A DAG run and all task instances created within it are instanced with the same ``execution_date``, so -that logically you can think of a DAG run as simulating the DAG running all of its tasks at some -previous date & time specified by the ``execution_date``. - -.. _concepts:tasks: - -Tasks -===== - -A Task defines a unit of work within a DAG; it is represented as a node in the DAG graph, and it is written in Python. - -Each task is an implementation of an Operator, for example a ``PythonOperator`` to execute some Python code, -or a ``BashOperator`` to run a Bash command. - -The task implements an operator by defining specific values for that operator, -such as a Python callable in the case of ``PythonOperator`` or a Bash command in the case of ``BashOperator``. - -Relations between Tasks ------------------------ - -Consider the following DAG with two tasks. -Each task is a node in our DAG, and there is a dependency from task_1 to task_2: - -.. code-block:: python - - with DAG('my_dag', start_date=datetime(2016, 1, 1)) as dag: - task_1 = DummyOperator('task_1') - task_2 = DummyOperator('task_2') - task_1 >> task_2 # Define dependencies - -We can say that task_1 is *upstream* of task_2, and conversely task_2 is *downstream* of task_1. -When a DAG Run is created, task_1 will start running and task_2 waits for task_1 to complete successfully before it may start. - -.. _concepts:task_decorator: - -Python task decorator ---------------------- - -.. versionadded:: 2.0.0 - -Airflow ``task`` decorator converts any Python function to an Airflow operator. -The decorated function can be called once to set the arguments and key arguments for operator execution. - -.. code-block:: python - - with DAG('my_dag', start_date=datetime(2020, 5, 15)) as dag: - @dag.task - def hello_world(): - print('hello world!') - - - # Also... - from airflow.decorators import task - - - @task - def hello_name(name: str): - print(f'hello {name}!') - - - hello_name('Airflow users') - -Task decorator captures returned values and sends them to the :ref:`XCom backend `. By default, -the returned value is saved as a single XCom value. You can set ``multiple_outputs`` key argument to ``True`` -to unroll dictionaries, lists or tuples into separate XCom values. This can be used with regular operators to -create :ref:`DAGs with Task Flow API `. - -Calling a decorated function returns an ``XComArg`` instance. You can use it to set templated fields on downstream -operators. - -You can call a decorated function more than once in a DAG. The decorated function will automatically generate -a unique ``task_id`` for each generated operator. - -.. code-block:: python - - with DAG('my_dag', start_date=datetime(2020, 5, 15)) as dag: - - @dag.task - def update_user(user_id: int): - ... - - # Avoid generating this list dynamically to keep DAG topology stable between DAG runs - for user_id in user_ids: - update_user(user_id) - - # This will generate an operator for each user_id - -Task ids are generated by appending a number at the end of the original task id. For the above example, the DAG will have -the following task ids: ``[update_user, update_user__1, update_user__2, ... update_user__n]``. - -Due to dynamic nature of the ids generations users should be aware that changing a DAG by adding or removing additional -invocations of task-decorated function may change ``task_id`` of other task of the same type within a single DAG. - -For example, if there are many task-decorated tasks without explicitly given task_id. Their ``task_id`` will be -generated sequentially: ``task__1``, ``task__2``, ``task__3``, etc. After the DAG goes into production, one day -someone inserts a new task before ``task__2``. The ``task_id`` after that will all be shifted forward by one place. -This is going to produce ``task__1``, ``task__2``, ``task__3``, ``task__4``. But at this point the ``task__3`` is -no longer the same ``task__3`` as before. This may create confusion when analyzing history logs / DagRuns of a DAG -that changed over time. - -.. _concepts:accessing_context: - -Accessing current context -------------------------- - -To retrieve current execution context you can use ``get_current_context`` method. In this way -you can gain access to context dictionary from within your operators. This is especially helpful when -using ``@task`` decorator. - -.. code-block:: python - - from airflow.decorators import task - from airflow.operators.python import get_current_context - - @task - def my_task(): - context = get_current_context() - ti = context["ti"] - -Current context is accessible only during the task execution. The context is not accessible during -``pre_execute`` or ``post_execute``. Calling this method outside execution context will raise an error. - -The context dictionary contains the keys mentioned in the table: :doc:`macros-ref`. - -Task Instances -============== - -A task instance represents a specific run of a task and is characterized as the -combination of a DAG, a task, and a point in time (``execution_date``). Task instances -also have an indicative state, which could be "running", "success", "failed", "skipped", "up -for retry", etc. - -Tasks are defined in DAGs, and both are written in Python code to define what you want to do. -Task Instances belong to DAG Runs, have an associated ``execution_date``, and are instantiated, runnable entities. - -Relations between Task Instances --------------------------------- - -Again consider the following tasks, defined for some DAG: - -.. code-block:: python - - with DAG('my_dag', start_date=datetime(2016, 1, 1)) as dag: - task_1 = DummyOperator('task_1') - task_2 = DummyOperator('task_2') - task_1 >> task_2 # Define dependencies - -When we enable this DAG, the scheduler creates several DAG Runs - one with ``execution_date`` of 2016-01-01, -one with ``execution_date`` of 2016-01-02, and so on up to the current date. - -Each DAG Run will contain a task_1 Task Instance and a task_2 Task instance. Both Task Instances will -have ``execution_date`` equal to the DAG Run's ``execution_date``, and each task_2 will be *downstream* of -(depends on) its task_1. - -We can also say that task_1 for 2016-01-01 is the *previous* task instance of the task_1 for 2016-01-02. -Or that the DAG Run for 2016-01-01 is the *previous* DAG Run to the DAG Run of 2016-01-02. -Here, *previous* refers to the logical past/prior ``execution_date``, that runs independently of other runs, -and *upstream* refers to a dependency within the same run and having the same ``execution_date``. - -.. note:: - The Airflow documentation sometimes refers to *previous* instead of *upstream* in places, and vice-versa. - If you find any occurrences of this, please help us improve by contributing some corrections! - -Task Lifecycle -============== - -A task goes through various stages from start to completion. In the Airflow UI -(graph and tree views), these stages are displayed by a color representing each -stage: - -.. image:: img/task_stages.png - -The complete lifecycle of the task looks like this: - -.. image:: img/task_lifecycle_diagram.png - -The happy flow consists of the following stages: - -1. No status (scheduler created empty task instance) -2. Scheduled (scheduler determined task instance needs to run) -3. Queued (scheduler sent task to executor to run on the queue) -4. Running (worker picked up a task and is now running it) -5. Success (task completed) - -.. _concepts:operators: - -Operators -========= - -While DAGs describe *how* to run a workflow, ``Operators`` determine what -actually gets done by a task. - -An operator describes a single task in a workflow. Operators are usually (but -not always) atomic, meaning they can stand on their own and don't need to share -resources with any other operators. The DAG will make sure that operators run in -the correct order; other than those dependencies, operators generally -run independently. In fact, they may run on two completely different machines. - -This is a subtle but very important point: in general, if two operators need to -share information, like a filename or small amount of data, you should consider -combining them into a single operator. If it absolutely can't be avoided, -Airflow does have a feature for operator cross-communication called XCom that is -described in the section :ref:`XComs `. - -Airflow provides many built-in operators for many common tasks, including: - -- :class:`~airflow.operators.bash.BashOperator` - executes a bash command -- :class:`~airflow.operators.python.PythonOperator` - calls an arbitrary Python function -- :class:`~airflow.operators.email.EmailOperator` - sends an email - -There are also other, commonly used operators that are installed together with airflow automatically, -by pre-installing some :doc:`apache-airflow-providers:index` packages (they are always available no -matter which extras you chose when installing Apache Airflow): - -- :class:`~airflow.providers.http.operators.http.SimpleHttpOperator` - sends an HTTP request -- :class:`~airflow.providers.sqlite.operators.sqlite.SqliteOperator` - SQLite DB operator - -In addition to these basic building blocks, there are many more specific operators developed by the -community that you can install additionally by installing community-maintained provider packages. You -can install them by adding an extra (for example (``[mysql]``) when installing Airflow or by installing -additional packages manually (for example ``apache-airflow-providers-mysql`` package). - -Some examples of popular operators are: - -- :class:`~airflow.providers.mysql.operators.mysql.MySqlOperator` -- :class:`~airflow.providers.postgres.operators.postgres.PostgresOperator` -- :class:`~airflow.providers.microsoft.mssql.operators.mssql.MsSqlOperator` -- :class:`~airflow.providers.oracle.operators.oracle.OracleOperator` -- :class:`~airflow.providers.jdbc.operators.jdbc.JdbcOperator` -- :class:`~airflow.providers.docker.operators.docker.DockerOperator` -- :class:`~airflow.providers.apache.hive.operators.hive.HiveOperator` -- :class:`~airflow.providers.amazon.aws.operators.s3_file_transform.S3FileTransformOperator` -- :class:`~airflow.providers.mysql.transfers.presto_to_mysql.PrestoToMySqlOperator` -- :class:`~airflow.providers.slack.operators.slack.SlackAPIOperator` - -But there are many, many more - you can see the list of those by following the providers documentation -at :doc:`apache-airflow-providers:index`. - -Operators are only loaded by Airflow if they are assigned to a DAG. - -.. seealso:: - - :ref:`List Airflow operators ` - - :doc:`How-to guides for some Airflow operators` - -.. _concepts:sensors: - -Sensors -------- - -``Sensor`` is an Operator that waits (polls) for a certain time, file, database row, S3 key, another DAG/task, etc... - -There are currently 3 different modes for how a sensor operates: - - -.. list-table:: - :header-rows: 1 - - * - Schedule Mode - - Description - - Use case - * - ``poke`` (default) - - The sensor is taking up a worker slot for its whole execution time and sleeps between pokes. - - Use this mode if the expected runtime of the sensor is short or if a short poke interval is required. - Note that the sensor will hold onto a worker slot and a pool slot for the duration of the sensor's - runtime in this mode. - * - ``reschedule`` - - The sensor task frees the worker slot when the criteria is not yet met and it's rescheduled at a later time. - - Use this mode if the time before the criteria is met is expected to be quite long. - The poke interval should be more than one minute to prevent too much load on the scheduler. - * - ``smart sensor`` - - smart sensor is a service (run by a builtin DAG) which consolidate the execution of sensors in batches. - Instead of holding a long running process for each sensor and poking periodically, a sensor will only - store poke context at ``sensor_instance`` table and then exits with a 'sensing' state. - - Use this mode if you have a large amount of sensor tasks running in your airflow cluster. - This can largely reduce airflow’s infrastructure cost and improve cluster stability - reduce meta database load. - -How to use: - -For ``poke|reschedule`` mode, you can configure them at the task level by supplying the ``mode`` parameter, -i.e. ``S3KeySensor(task_id='check-bucket', mode='reschedule', ...)``. - -For ``smart sensor``, you need to configure it in ``airflow.cfg``, for example: - -.. code-block:: ini - - [smart_sensor] - use_smart_sensor = true - shard_code_upper_limit = 10000 - - # Users can change the following config based on their requirements - shards = 5 - sensors_enabled = NamedHivePartitionSensor, MetastorePartitionSensor - -For more information on how to configure ``smart sensor`` and its architecture, see: -:doc:`Smart Sensor Architecture and Configuration` - -DAG Assignment --------------- - -*Added in Airflow 1.8* - -Operators do not have to be assigned to DAGs immediately (previously ``dag`` was -a required argument). However, once an operator is assigned to a DAG, it can not -be transferred or unassigned. DAG assignment can be done explicitly when the -operator is created, through deferred assignment, or even inferred from other -operators. - -.. code-block:: python - - dag = DAG('my_dag', start_date=datetime(2016, 1, 1)) - - # sets the DAG explicitly - explicit_op = DummyOperator(task_id='op1', dag=dag) - - # deferred DAG assignment - deferred_op = DummyOperator(task_id='op2') - deferred_op.dag = dag - - # inferred DAG assignment (linked operators must be in the same DAG) - inferred_op = DummyOperator(task_id='op3') - inferred_op.set_upstream(deferred_op) - - -Bitshift Composition --------------------- - -*Added in Airflow 1.8* - -We recommend you setting operator relationships with bitshift operators rather than ``set_upstream()`` -and ``set_downstream()``. - -Traditionally, operator relationships are set with the ``set_upstream()`` and -``set_downstream()`` methods. In Airflow 1.8, this can be done with the Python -bitshift operators ``>>`` and ``<<``. The following four statements are all -functionally equivalent: - -.. code-block:: python - - op1 >> op2 - op1.set_downstream(op2) - - op2 << op1 - op2.set_upstream(op1) - -When using the bitshift to compose operators, the relationship is set in the -direction that the bitshift operator points. For example, ``op1 >> op2`` means -that ``op1`` runs first and ``op2`` runs second. Multiple operators can be -composed -- keep in mind the chain is executed left-to-right and the rightmost -object is always returned. For example: - -.. code-block:: python - - op1 >> op2 >> op3 << op4 - -is equivalent to: - -.. code-block:: python - - op1.set_downstream(op2) - op2.set_downstream(op3) - op3.set_upstream(op4) - -We can put this all together to build a simple pipeline: - -.. code-block:: python - - with DAG('my_dag', start_date=datetime(2016, 1, 1)) as dag: - ( - DummyOperator(task_id='dummy_1') - >> BashOperator( - task_id='bash_1', - bash_command='echo "HELLO!"') - >> PythonOperator( - task_id='python_1', - python_callable=lambda: print("GOODBYE!")) - ) - -Bitshift can also be used with lists. For example: - -.. code-block:: python - - op1 >> [op2, op3] >> op4 - -is equivalent to: - -.. code-block:: python - - op1 >> op2 >> op4 - op1 >> op3 >> op4 - -and equivalent to: - -.. code-block:: python - - op1.set_downstream([op2, op3]) - op4.set_upstream([op2, op3]) - - -Relationship Builders ---------------------- - -*Moved in Airflow 2.0* - -``chain`` and ``cross_downstream`` function provide easier ways to set relationships -between operators in specific situation. - -In Airflow 2.0 those two methods moved from ``airflow.utils.helpers`` to ``airflow.models.baseoperator``. - -When setting a relationship between two lists, -if we want all operators in one list to be upstream to all operators in the other, -we cannot use a single bitshift composition. Instead we have to split one of the lists: - -.. code-block:: python - - [op1, op2, op3] >> op4 - [op1, op2, op3] >> op5 - [op1, op2, op3] >> op6 - -``cross_downstream`` could handle list relationships easier. - -.. code-block:: python - - cross_downstream([op1, op2, op3], [op4, op5, op6]) - -When setting single direction relationships to many operators, we could -concat them with bitshift composition. - -.. code-block:: python - - op1 >> op2 >> op3 >> op4 >> op5 - -This can be accomplished using ``chain`` - -.. code-block:: python - - chain(op1, op2, op3, op4, op5) - -even without operator's name - -.. code-block:: python - - chain([DummyOperator(task_id='op' + i, dag=dag) for i in range(1, 6)]) - -``chain`` can handle a list of operators - -.. code-block:: python - - chain(op1, [op2, op3], op4) - -is equivalent to: - -.. code-block:: python - - op1 >> [op2, op3] >> op4 - -When ``chain`` sets relationships between two lists of operators, they must have the same size. - -.. code-block:: python - - chain(op1, [op2, op3], [op4, op5], op6) - -is equivalent to: - -.. code-block:: python - - op1 >> [op2, op3] - op2 >> op4 - op3 >> op5 - [op4, op5] >> op6 - - -Workflows -========= - -You're now familiar with the core building blocks of Airflow. -Some of the concepts may sound very similar, but the vocabulary can -be conceptualized like this: - -- DAG: The work (tasks), and the order in which - work should take place (dependencies), written in Python. -- DAG Run: An instance of a DAG for a particular logical date and time. -- Operator: A class that acts as a template for carrying out some work. -- Task: Defines work by implementing an operator, written in Python. -- Task Instance: An instance of a task - that has been assigned to a DAG and has a - state associated with a specific DAG run (i.e. for a specific execution_date). -- execution_date: The logical date and time for a DAG Run and its Task Instances. - -By combining ``DAGs`` and ``Operators`` to create ``TaskInstances``, you can -build complex workflows. - -Additional Functionality -'''''''''''''''''''''''' - -In addition to the core Airflow objects, there are a number of more complex -features that enable behaviors like limiting simultaneous access to resources, -cross-communication, conditional execution, and more. - -Hooks -===== - -Hooks are interfaces to external platforms and databases like Hive, S3, -MySQL, Postgres, HDFS, and Pig. Hooks implement a common interface when -possible, and act as a building block for operators. They also use -the ``airflow.models.connection.Connection`` model to retrieve hostnames -and authentication information. Hooks keep authentication code and -information out of pipelines, centralized in the metadata database. - -Hooks are also very useful on their own to use in Python scripts, -Airflow airflow.operators.PythonOperator, and in interactive environments -like iPython or Jupyter Notebook. - -.. seealso:: - :ref:`List Airflow hooks ` - -Pools -===== - -Some systems can get overwhelmed when too many processes hit them at the same -time. Airflow pools can be used to **limit the execution parallelism** on -arbitrary sets of tasks. The list of pools is managed in the UI -(``Menu -> Admin -> Pools``) by giving the pools a name and assigning -it a number of worker slots. Tasks can then be associated with -one of the existing pools by using the ``pool`` parameter when -creating tasks (i.e., instantiating operators). - -.. code-block:: python - - aggregate_db_message_job = BashOperator( - task_id='aggregate_db_message_job', - execution_timeout=timedelta(hours=3), - pool='ep_data_pipeline_db_msg_agg', - bash_command=aggregate_db_message_job_cmd, - dag=dag) - aggregate_db_message_job.set_upstream(wait_for_empty_queue) - -The ``pool`` parameter can -be used in conjunction with ``priority_weight`` to define priorities -in the queue, and which tasks get executed first as slots open up in the -pool. The default ``priority_weight`` is ``1``, and can be bumped to any -number. When sorting the queue to evaluate which task should be executed -next, we use the ``priority_weight``, summed up with all of the -``priority_weight`` values from tasks downstream from this task. You can -use this to bump a specific important task and the whole path to that task -gets prioritized accordingly. - -Tasks will be scheduled as usual while the slots fill up. Once capacity is -reached, runnable tasks get queued and their state will show as such in the -UI. As slots free up, queued tasks start running based on the -``priority_weight`` (of the task and its descendants). - -Note that if tasks are not given a pool, they are assigned to a default -pool ``default_pool``. ``default_pool`` is initialized with 128 slots and -can be changed through the UI or CLI (though it cannot be removed). - -To combine Pools with SubDAGs see the `SubDAGs`_ section. - -.. _concepts-connections: - -Connections -=========== - -The information needed to connect to external systems is stored in the Airflow metastore database and can be -managed in the UI (``Menu -> Admin -> Connections``). A ``conn_id`` is defined there, and hostname / login / -password / schema information attached to it. Airflow pipelines retrieve centrally-managed connections -information by specifying the relevant ``conn_id``. - -Airflow also provides a mechanism to store connections outside the database, e.g. in :ref:`environment variables `. -Additional sources may be enabled, e.g. :ref:`AWS SSM Parameter Store `, or you may -:ref:`roll your own secrets backend `. - -Many hooks have a default ``conn_id``, where operators using that hook do not -need to supply an explicit connection ID. For example, the default -``conn_id`` for the :class:`~airflow.providers.postgres.hooks.postgres.PostgresHook` is -``postgres_default``. - -See :doc:`howto/connection` for details on creating and managing connections. - -.. _concepts:xcom: - -XComs -===== - -XComs let tasks exchange messages, allowing more nuanced forms of control and -shared state. The name is an abbreviation of "cross-communication". XComs are -principally defined by a key, value, and timestamp, but also track attributes -like the task/DAG that created the XCom and when it should become visible. Any -object that can be pickled can be used as an XCom value, so users should make -sure to use objects of appropriate size. - -XComs can be "pushed" (sent) or "pulled" (received). When a task pushes an -XCom, it makes it generally available to other tasks. Tasks can push XComs at -any time by calling the ``xcom_push()`` method. In addition, if a task returns -a value (either from its Operator's ``execute()`` method, or from a -PythonOperator's ``python_callable`` function), then an XCom containing that -value is automatically pushed. - -Tasks call ``xcom_pull()`` to retrieve XComs, optionally applying filters -based on criteria like ``key``, source ``task_ids``, and source ``dag_id``. By -default, ``xcom_pull()`` filters for the keys that are automatically given to -XComs when they are pushed by being returned from execute functions (as -opposed to XComs that are pushed manually). - -If ``xcom_pull`` is passed a single string for ``task_ids``, then the most -recent XCom value from that task is returned; if a list of ``task_ids`` is -passed, then a corresponding list of XCom values is returned. - -.. code-block:: python - - # inside a PythonOperator called 'pushing_task' - def push_function(): - return value - - # inside another PythonOperator - def pull_function(task_instance): - value = task_instance.xcom_pull(task_ids='pushing_task') - -When specifying arguments that are part of the context, they will be -automatically passed to the function. - -It is also possible to pull XCom directly in a template, here's an example -of what this may look like: - -.. code-block:: jinja - - SELECT * FROM {{ task_instance.xcom_pull(task_ids='foo', key='table_name') }} - -Note that XComs are similar to `Variables`_, but are specifically designed -for inter-task communication rather than global settings. - -Custom XCom backend -------------------- - -It is possible to change ``XCom`` behaviour of serialization and deserialization of tasks' result. -To do this one have to change ``xcom_backend`` parameter in Airflow config. Provided value should point -to a class that is subclass of :class:`~airflow.models.xcom.BaseXCom`. To alter the serialization / -deserialization mechanism the custom class should override ``serialize_value`` and ``deserialize_value`` -methods. - -It is also possible to override the ``orm_deserialize_value`` method which is used for deserialization when -recreating ORM XCom object. This happens every time we query the XCom table, for example when we want to populate -XCom list view in webserver. If your XCom backend performs expensive operations, or has large values that are not -useful to show in such a view, override this method to provide an alternative representation. By default Airflow will -use ``BaseXCom.orm_deserialize_value`` method which returns the value stored in Airflow database. - -See :doc:`modules_management` for details on how Python and Airflow manage modules. - -.. _concepts:variables: - -Variables -========= - -Variables are a generic way to store and retrieve arbitrary content or -settings as a simple key value store within Airflow. Variables can be -listed, created, updated and deleted from the UI (``Admin -> Variables``), -code or CLI. In addition, json settings files can be bulk uploaded through -the UI. While your pipeline code definition and most of your constants -and variables should be defined in code and stored in source control, -it can be useful to have some variables or configuration items -accessible and modifiable through the UI. - - -.. code-block:: python - - from airflow.models import Variable - foo = Variable.get("foo") - bar = Variable.get("bar", deserialize_json=True) - baz = Variable.get("baz", default_var=None) - -The second call assumes ``json`` content and will be deserialized into -``bar``. Note that ``Variable`` is a sqlalchemy model and can be used -as such. The third call uses the ``default_var`` parameter with the value -``None``, which either returns an existing value or ``None`` if the variable -isn't defined. The get function will throw a ``KeyError`` if the variable -doesn't exist and no default is provided. - -You can use a variable from a jinja template with the syntax : - -.. code-block:: bash - - echo {{ var.value. }} - -or if you need to deserialize a json object from the variable : - -.. code-block:: bash - - echo {{ var.json. }} - -See :doc:`howto/variable` for details on managing variables. - -Branching -========= - -Sometimes you need a workflow to branch, or only go down a certain path -based on an arbitrary condition which is typically related to something -that happened in an upstream task. One way to do this is by using the -``BranchPythonOperator``. - -The ``BranchPythonOperator`` is much like the PythonOperator except that it -expects a ``python_callable`` that returns a task_id (or list of task_ids). The -task_id returned is followed, and all of the other paths are skipped. -The task_id returned by the Python function has to reference a task -directly downstream from the BranchPythonOperator task. - -Note that when a path is a downstream task of the returned task (list), it will -not be skipped: - -.. image:: img/branch_note.png - -Paths of the branching task are ``branch_a``, ``join`` and ``branch_b``. Since -``join`` is a downstream task of ``branch_a``, it will be excluded from the skipped -tasks when ``branch_a`` is returned by the Python callable. - -The ``BranchPythonOperator`` can also be used with XComs allowing branching -context to dynamically decide what branch to follow based on upstream tasks. -For example: - -.. code-block:: python - - def branch_func(ti): - xcom_value = int(ti.xcom_pull(task_ids='start_task')) - if xcom_value >= 5: - return 'continue_task' - else: - return 'stop_task' - - start_op = BashOperator( - task_id='start_task', - bash_command="echo 5", - xcom_push=True, - dag=dag) - - branch_op = BranchPythonOperator( - task_id='branch_task', - python_callable=branch_func, - dag=dag) - - continue_op = DummyOperator(task_id='continue_task', dag=dag) - stop_op = DummyOperator(task_id='stop_task', dag=dag) - - start_op >> branch_op >> [continue_op, stop_op] - -If you wish to implement your own operators with branching functionality, you -can inherit from :class:`~airflow.operators.branch.BaseBranchOperator`, -which behaves similarly to ``BranchPythonOperator`` but expects you to provide -an implementation of the method ``choose_branch``. As with the callable for -``BranchPythonOperator``, this method should return the ID of a downstream task, -or a list of task IDs, which will be run, and all others will be skipped. - -.. code-block:: python - - class MyBranchOperator(BaseBranchOperator): - def choose_branch(self, context): - """ - Run an extra branch on the first day of the month - """ - if context['execution_date'].day == 1: - return ['daily_task_id', 'monthly_task_id'] - else: - return 'daily_task_id' - - -SubDAGs -======= - -SubDAGs are perfect for repeating patterns. Defining a function that returns a -DAG object is a nice design pattern when using Airflow. - -Airbnb uses the *stage-check-exchange* pattern when loading data. Data is staged -in a temporary table, after which data quality checks are performed against -that table. Once the checks all pass the partition is moved into the production -table. - -As another example, consider the following DAG: - -.. image:: img/subdag_before.png - -We can combine all of the parallel ``task-*`` operators into a single SubDAG, -so that the resulting DAG resembles the following: - -.. image:: img/subdag_after.png - -Note that SubDAG operators should contain a factory method that returns a DAG -object. This will prevent the SubDAG from being treated like a separate DAG in -the main UI. For example: - -.. exampleinclude:: /../../airflow/example_dags/subdags/subdag.py - :language: python - :start-after: [START subdag] - :end-before: [END subdag] - -This SubDAG can then be referenced in your main DAG file: - -.. exampleinclude:: /../../airflow/example_dags/example_subdag_operator.py - :language: python - :start-after: [START example_subdag_operator] - :end-before: [END example_subdag_operator] - -You can zoom into a :class:`~airflow.operators.subdag.SubDagOperator` from the graph view of the main DAG to show -the tasks contained within the SubDAG: - -.. image:: img/subdag_zoom.png - -Some other tips when using SubDAGs: - -- by convention, a SubDAG's ``dag_id`` should be prefixed by its parent and - a dot. As in ``parent.child`` -- share arguments between the main DAG and the SubDAG by passing arguments to - the SubDAG operator (as demonstrated above) -- SubDAGs must have a schedule and be enabled. If the SubDAG's schedule is - set to ``None`` or ``@once``, the SubDAG will succeed without having done - anything -- clearing a :class:`~airflow.operators.subdag.SubDagOperator` also clears the state of the tasks within -- marking success on a :class:`~airflow.operators.subdag.SubDagOperator` does not affect the state of the tasks - within -- refrain from using ``depends_on_past=True`` in tasks within the SubDAG as - this can be confusing -- it is possible to specify an executor for the SubDAG. It is common to use - the SequentialExecutor if you want to run the SubDAG in-process and - effectively limit its parallelism to one. Using LocalExecutor can be - problematic as it may over-subscribe your worker, running multiple tasks in - a single slot - -See ``airflow/example_dags`` for a demonstration. - -Note that airflow pool is not honored by :class:`~airflow.operators.subdag.SubDagOperator`. Hence -resources could be consumed by SubdagOperators. - - -TaskGroup -========= -TaskGroup can be used to organize tasks into hierarchical groups in Graph View. It is -useful for creating repeating patterns and cutting down visual clutter. Unlike -:class:`~airflow.operators.subdag.SubDagOperator`, TaskGroup is a UI grouping concept. -Tasks in TaskGroups live on the same original DAG. They honor all the pool configurations. - -Dependency relationships can be applied across all tasks in a TaskGroup with the ``>>`` and ``<<`` -operators. For example, the following code puts ``task1`` and ``task2`` in TaskGroup ``group1`` -and then puts both tasks upstream of ``task3``: - -.. code-block:: python - - with TaskGroup("group1") as group1: - task1 = DummyOperator(task_id="task1") - task2 = DummyOperator(task_id="task2") - - task3 = DummyOperator(task_id="task3") - - group1 >> task3 - -.. note:: - By default, child tasks and TaskGroups have their task_id and group_id prefixed with the - group_id of their parent TaskGroup. This ensures uniqueness of group_id and task_id throughout - the DAG. To disable the prefixing, pass ``prefix_group_id=False`` when creating the TaskGroup. - This then gives the user full control over the actual group_id and task_id. They have to ensure - group_id and task_id are unique throughout the DAG. The option ``prefix_group_id=False`` is - mainly useful for putting tasks on existing DAGs into TaskGroup without altering their task_id. - -Here is a more complicated example DAG with multiple levels of nested TaskGroups: - -.. exampleinclude:: /../../airflow/example_dags/example_task_group.py - :language: python - :start-after: [START howto_task_group] - :end-before: [END howto_task_group] - -This animated gif shows the UI interactions. TaskGroups are expanded or collapsed when clicked: - -.. image:: img/task_group.gif - -TaskGroup can be created using ``@task_group`` decorator, it takes one argument ``group_id`` which is same as constructor of TaskGroup class, if not given it copies function name as ``group_id``. It works exactly same as creating TaskGroup using context manager ``with TaskGroup('groupid') as section:``. - -.. exampleinclude:: /../../airflow/example_dags/example_task_group_decorator.py - :language: python - :start-after: [START howto_task_group_decorator] - :end-before: [END howto_task_group_decorator] - - -Edge Labels -=========== - -As well as grouping tasks into groups, you can also label the edges between -different tasks in the Graph View - this can be especially useful for branching -areas of your DAG, so you can label the conditions under which certain branches -might run. - -To add labels, you can either pass a Label object to -``set_upstream``/``set_downstream``: - -.. code-block:: python - - from airflow.utils.edgemodifier import Label - my_task.set_downstream(other_task, Label("When empty")) - -Or, you can use them directly inline with the ``>>`` and ``<<`` operators: - -.. code-block:: python - - from airflow.utils.edgemodifier import Label - my_task >> Label("When empty") >> other_task - -Here's an example DAG which illustrates labeling different branches: - -.. image:: img/edge_label_example.png - -.. exampleinclude:: /../../airflow/example_dags/example_branch_labels.py - -SLAs -==== - -Service Level Agreements, or time by which a task or DAG should have -succeeded, can be set at a task level as a ``timedelta``. If -one or many instances have not succeeded by that time, an alert email is sent -detailing the list of tasks that missed their SLA. The event is also recorded -in the database and made available in the web UI under ``Browse->SLA Misses`` -where events can be analyzed and documented. - -SLAs can be configured for scheduled tasks by using the ``sla`` parameter. -In addition to sending alerts to the addresses specified in a task's ``email`` parameter, -the ``sla_miss_callback`` specifies an additional ``Callable`` -object to be invoked when the SLA is not met. - -If you don't want to check SLAs, you can disable globally (all the DAGs) by -setting ``check_slas=False`` under ``[core]`` section in ``airflow.cfg`` file: - -.. code-block:: ini - - [core] - check_slas = False - -.. note:: - For information on the email configuration, see :doc:`howto/email-config` - -.. _concepts/trigger_rule: - -Trigger Rules -============= - -Though the normal workflow behavior is to trigger tasks when all their -directly upstream tasks have succeeded, Airflow allows for more complex -dependency settings. - -All operators have a ``trigger_rule`` argument which defines the rule by which -the generated task get triggered. The default value for ``trigger_rule`` is -``all_success`` and can be defined as "trigger this task when all directly -upstream tasks have succeeded". All other rules described here are based -on direct parent tasks and are values that can be passed to any operator -while creating tasks: - -* ``all_success``: (default) all parents have succeeded -* ``all_failed``: all parents are in a ``failed`` or ``upstream_failed`` state -* ``all_done``: all parents are done with their execution -* ``one_failed``: fires as soon as at least one parent has failed, it does not wait for all parents to be done -* ``one_success``: fires as soon as at least one parent succeeds, it does not wait for all parents to be done -* ``none_failed``: all parents have not failed (``failed`` or ``upstream_failed``) i.e. all parents have succeeded or been skipped -* ``none_failed_or_skipped``: all parents have not failed (``failed`` or ``upstream_failed``) and at least one parent has succeeded. -* ``none_skipped``: no parent is in a ``skipped`` state, i.e. all parents are in a ``success``, ``failed``, or ``upstream_failed`` state -* ``dummy``: dependencies are just for show, trigger at will - -Note that these can be used in conjunction with ``depends_on_past`` (boolean) -that, when set to ``True``, keeps a task from getting triggered if the -previous schedule for the task hasn't succeeded. - -One must be aware of the interaction between trigger rules and skipped tasks -in schedule level. Skipped tasks will cascade through trigger rules -``all_success`` and ``all_failed`` but not ``all_done``, ``one_failed``, ``one_success``, -``none_failed``, ``none_failed_or_skipped``, ``none_skipped`` and ``dummy``. - -For example, consider the following DAG: - -.. code-block:: python - - #dags/branch_without_trigger.py - import datetime as dt - - from airflow.models import DAG - from airflow.operators.dummy import DummyOperator - from airflow.operators.python import BranchPythonOperator - - dag = DAG( - dag_id='branch_without_trigger', - schedule_interval='@once', - start_date=dt.datetime(2019, 2, 28) - ) - - run_this_first = DummyOperator(task_id='run_this_first', dag=dag) - branching = BranchPythonOperator( - task_id='branching', dag=dag, - python_callable=lambda: 'branch_a' - ) - - branch_a = DummyOperator(task_id='branch_a', dag=dag) - follow_branch_a = DummyOperator(task_id='follow_branch_a', dag=dag) - - branch_false = DummyOperator(task_id='branch_false', dag=dag) - - join = DummyOperator(task_id='join', dag=dag) - - run_this_first >> branching - branching >> branch_a >> follow_branch_a >> join - branching >> branch_false >> join - -In the case of this DAG, ``join`` is downstream of ``follow_branch_a`` -and ``branch_false``. The ``join`` task will show up as skipped -because its ``trigger_rule`` is set to ``all_success`` by default and -skipped tasks will cascade through ``all_success``. - -.. image:: img/branch_without_trigger.png - -By setting ``trigger_rule`` to ``none_failed_or_skipped`` in ``join`` task, - -.. code-block:: python - - #dags/branch_with_trigger.py - ... - join = DummyOperator(task_id='join', dag=dag, trigger_rule='none_failed_or_skipped') - ... - -The ``join`` task will be triggered as soon as -``branch_false`` has been skipped (a valid completion state) and -``follow_branch_a`` has succeeded. Because skipped tasks **will not** -cascade through ``none_failed_or_skipped``. - -.. image:: img/branch_with_trigger.png - -Latest Run Only -=============== - -Standard workflow behavior involves running a series of tasks for a -particular date/time range. Some workflows, however, perform tasks that -are independent of run time but need to be run on a schedule, much like a -standard cron job. In these cases, backfills or running jobs missed during -a pause just wastes CPU cycles. - -For situations like this, you can use the ``LatestOnlyOperator`` to skip -tasks that are not being run during the most recent scheduled run for a -DAG. The ``LatestOnlyOperator`` skips all direct downstream tasks, if the time -right now is not between its ``execution_time`` and the next scheduled -``execution_time`` or the DagRun has been externally triggered. - -For example, consider the following DAG: - -.. exampleinclude:: /../../airflow/example_dags/example_latest_only_with_trigger.py - :language: python - :start-after: [START example] - :end-before: [END example] - -In the case of this DAG, the task ``task1`` is directly downstream of -``latest_only`` and will be skipped for all runs except the latest. -``task2`` is entirely independent of ``latest_only`` and will run in all -scheduled periods. ``task3`` is downstream of ``task1`` and ``task2`` and -because of the default ``trigger_rule`` being ``all_success`` will receive -a cascaded skip from ``task1``. ``task4`` is downstream of ``task1`` and -``task2``, but it will not be skipped, since its ``trigger_rule`` is set to -``all_done``. - -.. image:: img/latest_only_with_trigger.png - - -Zombies & Undeads -================= - -Task instances die all the time, usually as part of their normal life cycle, -but sometimes unexpectedly. - -Zombie tasks are characterized by the absence -of a heartbeat (emitted by the job periodically) and a ``running`` status -in the database. They can occur when a worker node can't reach the database, -when Airflow processes are killed externally, or when a node gets rebooted -for instance. Zombie killing is performed periodically by the scheduler's -process. - -Undead processes are characterized by the existence of a process and a matching -heartbeat, but Airflow isn't aware of this task as ``running`` in the database. -This mismatch typically occurs as the state of the database is altered, -most likely by deleting rows in the "Task Instances" view in the UI. -Tasks are instructed to verify their state as part of the heartbeat routine, -and terminate themselves upon figuring out that they are in this "undead" -state. - - -Cluster Policy -============== - -Cluster policies provide an interface for taking action on every Airflow task -or DAG either at DAG load time or just before task execution. In this way users -are able to do the following: - -- set default arguments on each DAG/task -- checks that DAG/task meets required standards -- perform custom logic of routing task to a queue - -And many other options. To use cluster-wide policies users can define in their -``airflow_local_settings`` the following functions - -- ``dag_policy`` - which as an input takes ``dag`` argument of :class:`~airflow.models.dag.DAG` type. - This function allows users to define dag-level policy which is executed for every DAG at loading time. -- ``task_policy`` - which as an input takes ``task`` argument of :class:`~airflow.models.baseoperator.BaseOperator` - type. This function allows users to define task-level policy which is executed for every task at DAG loading time. -- ``task_instance_mutation_hook`` - which as an input takes ``task_instance`` argument of - :class:`~airflow.models.taskinstance.TaskInstance` type. This function allows users to define task-level - policy that is executed right before the task execution. - -In case of DAG and task policies users may raise :class:`~airflow.exceptions.AirflowClusterPolicyViolation` -to prevent a DAG from being imported or prevent a task from being executed if the task is not compliant with -users' check. - -Please note, cluster policy will have precedence over task attributes defined in DAG meaning that -if ``task.sla`` is defined in dag and also mutated via cluster policy then the latter will have precedence. - -In next sections we show examples of each type of cluster policy. - -Where to put ``airflow_local_settings.py``? -------------------------------------------- -Add a ``airflow_local_settings.py`` file to your ``$PYTHONPATH`` or to ``$AIRFLOW_HOME/config`` folder. - -See :doc:`modules_management` for details on how Python and Airflow manage modules. - - -DAG level cluster policy ------------------------------------ -In this example we check if each DAG has at least one tag defined. -Here is what it may look like: - -.. literalinclude:: /../../tests/cluster_policies/__init__.py - :language: python - :start-after: [START example_dag_cluster_policy] - :end-before: [END example_dag_cluster_policy] - - -.. note:: - - To avoid import cycles, if using ``DAG`` in type annotations in your cluster policy, be sure to import from ``airflow.models`` and not from ``airflow``. - - -Task level cluster policy ------------------------------ -For example, this function could apply a specific queue property when -using a specific operator, or enforce a task timeout policy, making sure -that no tasks run for more than 48 hours. Here's an example of what this -may look like: - -.. literalinclude:: /../../tests/cluster_policies/__init__.py - :language: python - :start-after: [START example_task_cluster_policy] - :end-before: [END example_task_cluster_policy] - -As a more advanced example we may consider implementing checks that are intended to help -teams using Airflow to protect against common beginner errors that may get past a code -reviewer, rather than as technical security controls. - -For example, don't run tasks without airflow owners: - -.. literalinclude:: /../../tests/cluster_policies/__init__.py - :language: python - :start-after: [START example_cluster_policy_rule] - :end-before: [END example_cluster_policy_rule] - -If you have multiple checks to apply, it is best practice to curate these rules -in a separate python module and have a single policy / task mutation hook that -performs multiple of these custom checks and aggregates the various error -messages so that a single ``AirflowClusterPolicyViolation`` can be reported in -the UI (and import errors table in the database). - -For Example in ``airflow_local_settings.py``: - -.. literalinclude:: /../../tests/cluster_policies/__init__.py - :language: python - :start-after: [START example_list_of_cluster_policy_rules] - :end-before: [END example_list_of_cluster_policy_rules] - -Task instance mutation hook -------------------------------------------- -Task instance mutation hook can be used for example to re-routes the task to -execute in a different queue during retries: - -.. literalinclude:: /../../tests/cluster_policies/__init__.py - :language: python - :start-after: [START example_task_mutation_hook] - :end-before: [END example_task_mutation_hook] - -Documentation & Notes -===================== - -It's possible to add documentation or notes to your DAGs & task objects that -become visible in the web interface ("Graph View" & "Tree View" for DAGs, "Task Instance Details" -for tasks). There are a set of special task attributes that get rendered as rich -content if defined: - -========== ================ -attribute rendered to -========== ================ -doc monospace -doc_json json -doc_yaml yaml -doc_md markdown -doc_rst reStructuredText -========== ================ - -Please note that for DAGs, doc_md is the only attribute interpreted. - -This is especially useful if your tasks are built dynamically from -configuration files, it allows you to expose the configuration that led -to the related tasks in Airflow. - -.. code-block:: python - - """ - ### My great DAG - """ - - dag = DAG('my_dag', default_args=default_args) - dag.doc_md = __doc__ - - t = BashOperator("foo", dag=dag) - t.doc_md = """\ - #Title" - Here's a [url](www.airbnb.com) - """ - -This content will get rendered as markdown respectively in the "Graph View" and -"Task Instance Details" pages. - -.. _jinja-templating: - -Jinja Templating -================ - -Airflow leverages the power of -`Jinja Templating `_ and this can be a -powerful tool to use in combination with macros (see the :doc:`macros-ref` section). - -For example, say you want to pass the execution date as an environment variable -to a Bash script using the ``BashOperator``. - -.. code-block:: python - - # The execution date as YYYY-MM-DD - date = "{{ ds }}" - t = BashOperator( - task_id='test_env', - bash_command='/tmp/test.sh ', - dag=dag, - env={'EXECUTION_DATE': date}) - -Here, ``{{ ds }}`` is a macro, and because the ``env`` parameter of the -``BashOperator`` is templated with Jinja, the execution date will be available -as an environment variable named ``EXECUTION_DATE`` in your Bash script. - -You can use Jinja templating with every parameter that is marked as "templated" -in the documentation. Template substitution occurs just before the pre_execute -function of your operator is called. - -You can also use Jinja templating with nested fields, as long as these nested fields -are marked as templated in the structure they belong to: fields registered in -``template_fields`` property will be submitted to template substitution, like the -``path`` field in the example below: - -.. code-block:: python - - class MyDataReader: - template_fields = ['path'] - - def __init__(self, my_path): - self.path = my_path - - # [additional code here...] - - t = PythonOperator( - task_id='transform_data', - python_callable=transform_data - op_args=[ - MyDataReader('/tmp/{{ ds }}/my_file') - ], - dag=dag) - -.. note:: ``template_fields`` property can equally be a class variable or an - instance variable. - -Deep nested fields can also be substituted, as long as all intermediate fields are -marked as template fields: - -.. code-block:: python - - class MyDataTransformer: - template_fields = ['reader'] - - def __init__(self, my_reader): - self.reader = my_reader - - # [additional code here...] - - class MyDataReader: - template_fields = ['path'] - - def __init__(self, my_path): - self.path = my_path - - # [additional code here...] - - t = PythonOperator( - task_id='transform_data', - python_callable=transform_data - op_args=[ - MyDataTransformer(MyDataReader('/tmp/{{ ds }}/my_file')) - ], - dag=dag) - -You can pass custom options to the Jinja ``Environment`` when creating your DAG. -One common usage is to avoid Jinja from dropping a trailing newline from a -template string: - -.. code-block:: python - - my_dag = DAG(dag_id='my-dag', - jinja_environment_kwargs={ - 'keep_trailing_newline': True, - # some other jinja2 Environment options here - }) - -See `Jinja documentation `_ -to find all available options. - -.. _exceptions: - -Exceptions -========== - -Airflow defines a number of exceptions; most of these are used internally, but a few -are relevant to authors of custom operators or Python callables called from ``PythonOperator`` -tasks. Normally any exception raised from an ``execute`` method or Python callable will either -cause a task instance to fail if it is not configured to retry or has reached its limit on -retry attempts, or to be marked as "up for retry". A few exceptions can be used when different -behavior is desired: - -* ``AirflowSkipException`` can be raised to set the state of the current task instance to "skipped" -* ``AirflowFailException`` can be raised to set the state of the current task to "failed" regardless - of whether there are any retry attempts remaining. - -This example illustrates some possibilities - -.. code-block:: python - - from airflow.exceptions import AirflowFailException, AirflowSkipException - - def fetch_data(): - try: - data = get_some_data(get_api_key()) - if not data: - # Set state to skipped and do not retry - # Downstream task behavior will be determined by trigger rules - raise AirflowSkipException("No data available.") - except Unauthorized: - # If we retry, our api key will still be bad, so don't waste time retrying! - # Set state to failed and move on - raise AirflowFailException("Our api key is bad!") - except TransientError: - print("Looks like there was a blip.") - # Raise the exception and let the task retry unless max attempts were reached - raise - handle(data) - - task = PythonOperator(task_id="fetch_data", python_callable=fetch_data, retries=10) - -.. seealso:: - - :ref:`List of Airflow exceptions ` - - -Packaged DAGs -============= - -While often you will specify DAGs in a single ``.py`` file it might sometimes -be required to combine a DAG and its dependencies. For example, you might want -to combine several DAGs together to version them together or you might want -to manage them together or you might need an extra module that is not available -by default on the system you are running Airflow on. To allow this you can create -a zip file that contains the DAG(s) in the root of the zip file and have the extra -modules unpacked in directories. - -For instance you can create a zip file that looks like this: - -.. code-block:: bash - - my_dag1.py - my_dag2.py - package1/__init__.py - package1/functions.py - -Airflow will scan the zip file and try to load ``my_dag1.py`` and ``my_dag2.py``. -It will not go into subdirectories as these are considered to be potential -packages. - -In case you would like to add module dependencies to your DAG you basically would -do the same, but then it is more suitable to use a virtualenv and pip. - -.. code-block:: bash - - virtualenv zip_dag - source zip_dag/bin/activate - - mkdir zip_dag_contents - cd zip_dag_contents - - pip install --install-option="--install-lib=$PWD" my_useful_package - cp ~/my_dag.py . - - zip -r zip_dag.zip * - -.. note:: the zip file will be inserted at the beginning of module search list - (sys.path) and as such it will be available to any other code that resides - within the same interpreter. - -.. note:: packaged dags cannot be used with pickling turned on. - -.. note:: packaged dags cannot contain dynamic libraries (eg. libz.so) these need - to be available on the system if a module needs those. In other words only - pure Python modules can be packaged. - - -``.airflowignore`` -================== - -A ``.airflowignore`` file specifies the directories or files in ``DAG_FOLDER`` -or ``PLUGINS_FOLDER`` that Airflow should intentionally ignore. -Each line in ``.airflowignore`` specifies a regular expression pattern, -and directories or files whose names (not DAG id) match any of the patterns -would be ignored (under the hood, ``Pattern.search()`` is used to match the pattern). -Overall it works like a ``.gitignore`` file. -Use the ``#`` character to indicate a comment; all characters -on a line following a ``#`` will be ignored. - -``.airflowignore`` file should be put in your ``DAG_FOLDER``. -For example, you can prepare a ``.airflowignore`` file with contents - -.. code-block:: - - project_a - tenant_[\d] - - -Then files like ``project_a_dag_1.py``, ``TESTING_project_a.py``, ``tenant_1.py``, -``project_a/dag_1.py``, and ``tenant_1/dag_1.py`` in your ``DAG_FOLDER`` would be ignored -(If a directory's name matches any of the patterns, this directory and all its subfolders -would not be scanned by Airflow at all. This improves efficiency of DAG finding). - -The scope of a ``.airflowignore`` file is the directory it is in plus all its subfolders. -You can also prepare ``.airflowignore`` file for a subfolder in ``DAG_FOLDER`` and it -would only be applicable for that subfolder. diff --git a/docs/apache-airflow/concepts/cluster-policies.rst b/docs/apache-airflow/concepts/cluster-policies.rst new file mode 100644 index 0000000000000..e1c664660a4d6 --- /dev/null +++ b/docs/apache-airflow/concepts/cluster-policies.rst @@ -0,0 +1,92 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Cluster Policies +================ + +If you want to check or mutate DAGs or Tasks on a cluster-wide level, then a Cluster Policy will let you do that. They have three main purposes: + +* Checking that DAGs/Tasks meet a certain standard +* Setting default arguments on DAGs/Tasks +* Performing custom routing logic + +There are three types of cluster policy: + +* ``dag_policy``: Takes a :class:`~airflow.models.dag.DAG` parameter called ``dag``. Runs at load time. +* ``task_policy``: Takes a :class:`~airflow.models.baseoperator.BaseOperator` parameter called ``task``. Runs at load time. +* ``task_instance_mutation_hook``: Takes a :class:`~airflow.models.taskinstance.TaskInstance` parameter called ``task_instance``. Called right before task execution. + +The DAG and Task cluster policies can raise the :class:`~airflow.exceptions.AirflowClusterPolicyViolation` exception to indicate that the dag/task they were passed is not compliant and should not be loaded. + +Any extra attributes set by a cluster policy take priority over those defined in your DAG file; for example, if you set an ``sla`` on your Task in the DAG file, and then your cluster policy also sets an ``sla``, the cluster policy's value will take precedence. + +To configure cluster policies, you should create an ``airflow_local_settings.py`` file in either the ``config`` folder under your ``$AIRFLOW_HOME``, or place it on the ``$PYTHONPATH``, and then add callables to the file matching one or more of the cluster policy names above (e.g. ``dag_policy``) + + +Examples +-------- + +DAG policies +~~~~~~~~~~~~ + +This policy checks if each DAG has at least one tag defined: + +.. literalinclude:: /../../tests/cluster_policies/__init__.py + :language: python + :start-after: [START example_dag_cluster_policy] + :end-before: [END example_dag_cluster_policy] + +.. note:: + + To avoid import cycles, if you use ``DAG`` in type annotations in your cluster policy, be sure to import from ``airflow.models`` and not from ``airflow``. + +Task policies +------------- + +Here's an example of enforcing a maximum timeout policy on every task: + +.. literalinclude:: /../../tests/cluster_policies/__init__.py + :language: python + :start-after: [START example_task_cluster_policy] + :end-before: [END example_task_cluster_policy] + +You could also implement to protect against common errors, rather than as technical security controls. For example, don't run tasks without airflow owners: + +.. literalinclude:: /../../tests/cluster_policies/__init__.py + :language: python + :start-after: [START example_cluster_policy_rule] + :end-before: [END example_cluster_policy_rule] + +If you have multiple checks to apply, it is best practice to curate these rules in a separate python module and have a single policy / task mutation hook that performs multiple of these custom checks and aggregates the various error messages so that a single ``AirflowClusterPolicyViolation`` can be reported in the UI (and import errors table in the database). + +For example, your ``airflow_local_settings.py`` might follow this pattern: + +.. literalinclude:: /../../tests/cluster_policies/__init__.py + :language: python + :start-after: [START example_list_of_cluster_policy_rules] + :end-before: [END example_list_of_cluster_policy_rules] + + +Task instance mutation +---------------------- + +Here's an example of re-routing tasks that are on their second (or greater) retry to a different queue: + +.. literalinclude:: /../../tests/cluster_policies/__init__.py + :language: python + :start-after: [START example_task_mutation_hook] + :end-before: [END example_task_mutation_hook] diff --git a/docs/apache-airflow/concepts/connections.rst b/docs/apache-airflow/concepts/connections.rst new file mode 100644 index 0000000000000..3f1bf6b233cc2 --- /dev/null +++ b/docs/apache-airflow/concepts/connections.rst @@ -0,0 +1,37 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Connections & Hooks +=================== + +Airflow is often used to pull and push data into other systems, and so it has a first-class *Connection* concept for storing credentials that are used to talk to external systems. + +A Connection is essentially set of parameters - such as username, password and hostname - along with the type of system that it connects to, and a unique name, called the ``conn_id``. + +They can be managed via the UI or via the CLI; see :doc:`/howto/connection` for more information on creating, editing and managing connections. There are customizable connection storage and backend options. + +You can use Connections directly from your own code, or you can use them via Hooks. + + +Hooks +----- + +A Hook is a high-level interface to an external platform that lets you quickly and easily talk to them without having to write low-level code that hits their API or uses special libraries. They're also often the building blocks that Operators are built out of. + +They integrate with Connections to gather credentials, and many have a default ``conn_id``; for example, the :class:`~airflow.providers.postgres.hooks.postgres.PostgresHook` automatically looks for the Connection with a ``conn_id`` of ``postgres_default`` if you don't pass one in. + +You can view a :ref:`full list of airflow hooks ` in our API documentation. diff --git a/docs/apache-airflow/concepts/dags.rst b/docs/apache-airflow/concepts/dags.rst new file mode 100644 index 0000000000000..b82b55f5c1fc1 --- /dev/null +++ b/docs/apache-airflow/concepts/dags.rst @@ -0,0 +1,587 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +DAGs +==== + +A *DAG* (Directed Acyclic Graph) is the core concept of Airflow, collecting :doc:`tasks` together, organized with dependencies and relationships to say how they should run. + +Here's a basic example DAG: + +.. image:: /img/basic-dag.png + +It defines four Tasks - A, B, C, and D - and dictates the order in which they have to run, and which tasks depend on what others. It will also say how often to run the DAG - maybe "every 5 minutes starting tomorrow", or "every day since January 1st, 2020". + +The DAG itself doesn't care about *what* is happening inside the tasks; it is merely concerned with *how* to execute them - the order to run them in, how many times to retry them, if they have timeouts, and so on. + + +Declaring a DAG +--------------- + +There are three ways to declare a DAG - either you can use a context manager, +which will add the DAG to anything inside it implicitly:: + + with DAG("my_dag_name") as dag: + op = DummyOperator(task_id="task") + +Or, you can use a standard constructor, passing the dag into any +operators you use:: + + my_dag = DAG("my_dag_name") + op = DummyOperator(task_id="task", dag=my_dag) + +Or, you can use the ``@dag`` decorator to :ref:`turn a function into a DAG generator `:: + + @dag(start_date=days_ago(2)) + def generate_dag(): + op = DummyOperator(task_id="task") + + dag = generate_dag() + +DAGs are nothing without :doc:`tasks` to run, and those will usually either come in the form of either :doc:`operators`, :doc:`sensors` or :doc:`taskflow`. + + +Task Dependencies +~~~~~~~~~~~~~~~~~ + +A Task/Operator does not usually live alone; it has dependencies on other tasks (those *upstream* of it), and other tasks depend on it (those *downstream* of it). Declaring these dependencies between tasks is what makes up the DAG structure (the *edges* of the *directed acyclic graph*). + +There are two main ways to declare individual task dependencies. The recommended one is to use the ``>>`` and ``<<`` operators:: + + first_task >> [second_task, third_task] + third_task << fourth_task + +Or, you can also use the more explicit ``set_upstream`` and ``set_downstream`` methods:: + + first_task.set_downstream(second_task, third_task) + third_task.set_upstream(fourth_task) + +There are also shortcuts to declaring more complex dependencies. If you want to make two lists of tasks depend on all parts of each other, you can't use either of the approaches above, so you need to use ``cross_downstream``:: + + from airflow.models.baseoperator import cross_downstream + + # Replaces + # [op1, op2] >> op3 + # [op1, op2] >> op4 + cross_downstream([op1, op2], [op3, op4]) + +And if you want to chain together dependencies, you can use ``chain``:: + + from airflow.models.baseoperator import chain + + # Replaces op1 >> op2 >> op3 >> op4 + chain(op1, op2, op3, op4) + + # You can also do it dynamically + chain([DummyOperator(task_id='op' + i) for i in range(1, 6)]) + +Chain can also do *pairwise* dependencies for lists the same size (this is different to the *cross dependencies* done by ``cross_downstream``!):: + + from airflow.models.baseoperator import chain + + # Replaces + # op1 >> op2 >> op4 >> op6 + # op1 >> op3 >> op5 >> op6 + chain(op1, [op2, op3], [op4, op5], op6) + + +.. _concepts:dag-loading: + +Loading DAGs +------------ + +Airflow loads DAGs from Python source files, which it looks for inside its configured ``DAG_FOLDER``. It will take each file, execute it, and then load any DAG objects from that file. + +This means you can define multiple DAGs per Python file, or even spread one very complex DAG across multiple Python files using imports. + +Note, though, that when Airflow comes to load DAGs from a Python file, it will only pull any objects at the *top level* that are a DAG instance. For example, take this DAG file:: + + dag_1 = DAG('this_dag_will_be_discovered') + + def my_function(): + dag_2 = DAG('but_this_dag_will_not') + + my_function() + +While both DAG constructors get called when the file is accessed, only ``dag_1`` is at the top level (in the ``globals()``), and so only it is added to Airflow. ``dag_2`` is not loaded. + +.. note:: + + When searching for DAGs inside the ``DAG_FOLDER``, Airflow only considers Python files that contain the strings ``airflow`` and ``dag`` (case-insensitively) as an optimization. + + To consider all Python files instead, disable the ``DAG_DISCOVERY_SAFE_MODE`` configuration flag. + +You can also provide an ``.airflowignore`` file inside your ``DAG_FOLDER``, or any of its subfolders, which describes files for the loader to ignore. It covers the directory it's in plus all subfolders underneath it, and should be one regular expression per line, with ``#`` indicating comments. + + +.. _concepts:dag-run: + +Running DAGs +------------ + +DAGs will run in one of two ways: + + - When they are *triggered* either manually or via the API + - On a defined *schedule*, which is defined as part of the DAG + +DAGs do not *require* a schedule, but it's very common to define one. You define it via the ``schedule_interval`` argument, like this:: + + with DAG("my_daily_dag", schedule_interval="@daily"): + ... + +The ``schedule_interval`` argument takes any value that is a valid `Crontab `_ schedule value, so you could also do:: + + with DAG("my_daily_dag", schedule_interval="0 * * * *"): + ... + +Every time you run a DAG, you are creating a new instance of that DAG which Airflow calls a :doc:`DAG Run `. DAG Runs can run in parallel for the same DAG, and each has a defined ``execution_date``, which identifies the *logical* date and time it is running for - not the *actual* time when it was started. + +As an example of why this is useful, consider writing a DAG that processes a daily set of experimental data. It's been rewritten, and you want to run it on the previous 3 months of data - no problem, since Airflow can *backfill* the DAG and run copies of it for every day in those previous 3 months, all at once. + +Those DAG Runs will all have been started on the same actual day, but their ``execution_date`` values will cover those last 3 months, and that's what all the tasks, operators and sensors inside the DAG look at when they run. + +In much the same way a DAG instantiates into a DAG Run every time it's run, Tasks specified inside a DAG also instantiate into :ref:`Task Instances ` along with it. + + +DAG Assignment +-------------- + +Note that every single Operator/Task must be assigned to a DAG in order to run. Airflow has several ways of calculating the DAG without you passing it explicitly: + +* If you declare your Operator inside a ``with DAG`` block +* If you declare your Operator inside a ``@dag`` decorator, +* If you put your Operator upstream or downstream of a Operator that has a DAG + +Otherwise, you must pass it into each Operator with ``dag=``. + + +.. _concepts:default-arguments: + +Default Arguments +----------------- + +Often, many Operators inside a DAG need the same set of default arguments (such as their ``start_date``). Rather than having to specify this individually for every Operator, you can instead pass ``default_args`` to the DAG when you create it, and it will auto-apply them to any operator tied to it:: + + default_args = { + 'start_date': datetime(2016, 1, 1), + 'owner': 'airflow' + } + + with DAG('my_dag', default_args=default_args) as dag: + op = DummyOperator(task_id='dummy') + print(op.owner) # "airflow" + + +.. _concepts:dag-decorator: + +The DAG decorator +----------------- + +.. versionadded:: 2.0 + +As well as the more traditional ways of declaring a single DAG using a context manager or the ``DAG()`` constructor, you can also decorate a function with ``@dag`` to turn it into a DAG generator function: + +.. exampleinclude:: /../../airflow/example_dags/example_dag_decorator.py + :language: python + :start-after: [START dag_decorator_usage] + :end-before: [END dag_decorator_usage] + +As well as being a new way of making DAGs cleanly, the decorator also sets up any parameters you have in your function as DAG parameters, letting you :ref:`set those parameters when triggering the DAG `. You can then access the parameters from Python code, or from ``{{ context.params }}`` inside a :ref:`Jinja template `. + +.. note:: + + Airflow will only load DAGs that :ref:`appear in the top level ` of a DAG file. This means you cannot just declare a function with ``@dag`` - you must also call it at least once in your DAG file and assign it to a top-level object, as you can see in the example above. + + +.. _concepts:control-flow: + +Control Flow +------------ + +By default, a DAG will only run a Task when all the Tasks it depends on are successful. There are several ways of modifying this, however: + +* :ref:`concepts:branching`, where you can select which Task to move onto based on a condition +* :ref:`concepts:latest-only`, a special form of branching that only runs on DAGs running against the present +* :ref:`concepts:depends-on-past`, where tasks can depend on themselves *from a previous run* +* :ref:`concepts:trigger-rules`, which let you set the conditions under which a DAG will run a task. + + +.. _concepts:branching: + +Branching +~~~~~~~~~ + +You can make use of branching in order to tell the DAG *not* to run all dependent tasks, but instead to pick and choose one or more paths to go down. This is where the branching Operators come in. + +The ``BranchPythonOperator`` is much like the PythonOperator except that it expects a ``python_callable`` that returns a task_id (or list of task_ids). The task_id returned is followed, and all of the other paths are skipped. + +The task_id returned by the Python function has to reference a task directly downstream from the BranchPythonOperator task. + +.. note:: + When a Task is downstream of both the branching operator *and* downstream of one of more of the selected tasks, it will not be skipped: + + .. image:: /img/branch_note.png + + The paths of the branching task are ``branch_a``, ``join`` and ``branch_b``. Since ``join`` is a downstream task of ``branch_a``, it will be still be run, even though it was not returned as part of the branch decision. + +The ``BranchPythonOperator`` can also be used with XComs allowing branching context to dynamically decide what branch to follow based on upstream tasks. For example: + +.. code-block:: python + + def branch_func(ti): + xcom_value = int(ti.xcom_pull(task_ids='start_task')) + if xcom_value >= 5: + return 'continue_task' + else: + return 'stop_task' + + start_op = BashOperator( + task_id='start_task', + bash_command="echo 5", + xcom_push=True, + dag=dag, + ) + + branch_op = BranchPythonOperator( + task_id='branch_task', + python_callable=branch_func, + dag=dag, + ) + + continue_op = DummyOperator(task_id='continue_task', dag=dag) + stop_op = DummyOperator(task_id='stop_task', dag=dag) + + start_op >> branch_op >> [continue_op, stop_op] + +If you wish to implement your own operators with branching functionality, you can inherit from :class:`~airflow.operators.branch.BaseBranchOperator`, which behaves similarly to ``BranchPythonOperator`` but expects you to provide an implementation of the method ``choose_branch``. + +As with the callable for ``BranchPythonOperator``, this method should return the ID of a downstream task, or a list of task IDs, which will be run, and all others will be skipped:: + + class MyBranchOperator(BaseBranchOperator): + def choose_branch(self, context): + """ + Run an extra branch on the first day of the month + """ + if context['execution_date'].day == 1: + return ['daily_task_id', 'monthly_task_id'] + else: + return 'daily_task_id' + + +.. _concepts:latest-only: + +Latest Only +~~~~~~~~~~~ + +Airflow's DAG Runs are often run for a date that is not the same as the current date - for example, running one copy of a DAG for every day in the last month to backfill some data. + +There are situations, though, where you *don't* want to let some (or all) parts of a DAG run for a previous date; in this case, you can use the ``LatestOnlyOperator``. + +This special Operator skips all tasks downstream of itself if you are not on the "latest" DAG run (if the wall-clock time right now is between its execution_time and the next scheduled execution_time, and it was not an externally-triggered run). + +Here's an example: + +.. exampleinclude:: /../../airflow/example_dags/example_latest_only_with_trigger.py + :language: python + :start-after: [START example] + :end-before: [END example] + +In the case of this DAG: + +* ``task1`` is directly downstream of ``latest_only`` and will be skipped for all runs except the latest. +* ``task2`` is entirely independent of ``latest_only`` and will run in all scheduled periods +* ``task3`` is downstream of ``task1`` and ``task2`` and because of the default :ref:`trigger rule ` being ``all_success`` will receive a cascaded skip from ``task1``. +* ``task4`` is downstream of ``task1`` and ``task2``, but it will not be skipped, since its ``trigger_rule`` is set to ``all_done``. + +.. image:: /img/latest_only_with_trigger.png + +.. _concepts:depends-on-past: + +Depends On Past +~~~~~~~~~~~~~~~ + +You can also say a task can only run if the *previous* run of the task in the previous DAG Run succeeded. To use this, you just need to set the ``depends_on_past`` argument on your Task to ``True``. + +Note that if you are running the DAG at the very start of its life - specifically, that the ``execution_date`` matches the ``start_date`` - then the Task will still run, as there is no previous run to depend on. + + +.. _concepts:trigger-rules: + +Trigger Rules +~~~~~~~~~~~~~ + +By default, Airflow will wait for all upstream tasks for a task to be :ref:`successful ` before it runs that task. + +However, this is just the default behaviour, and you can control it using the ``trigger_rule`` argument to a Task. The options for ``trigger_rule`` are: + +* ``all_success`` (default): All upstream tasks have succeeded +* ``all_failed``: All upstream tasks are in a ``failed`` or ``upstream_failed`` state +* ``all_done``: All upstream tasks are done with their execution +* ``one_failed``: At least one upstream task has failed (does not wait for all upstream tasks to be done) +* ``one_success``: At least one upstream task has succeeded (does not wait for all upstream tasks to be done) +* ``none_failed``: All upstream tasks have not ``failed`` or ``upstream_failed`` - that is, all upstream tasks have succeeded or been skipped +* ``none_failed_or_skipped``: All upstream tasks have not ``failed`` or ``upstream_failed``, and at least one upstream task has succeeded. +* ``none_skipped``: No upstream task is in a ``skipped`` state - that is, all upstream tasks are in a ``success``, ``failed``, or ``upstream_failed`` state +* ``dummy``: No dependencies at all, run this task at any time + +You can also combine this with the :ref:`concepts:depends-on-past` functionality if you wish. + +.. note:: + + It's important to be aware of the interaction between trigger rules and skipped tasks, especially tasks that are skipped as part of a branching operation. *You almost never want to use all_success or all_failed downstream of a branching operation*. + + Skipped tasks will cascade through trigger rules ``all_success`` and ``all_failed``, and cause them to skip as well. Consider the following DAG: + + .. code-block:: python + + #dags/branch_without_trigger.py + import datetime as dt + + from airflow.models import DAG + from airflow.operators.dummy import DummyOperator + from airflow.operators.python import BranchPythonOperator + + dag = DAG( + dag_id='branch_without_trigger', + schedule_interval='@once', + start_date=dt.datetime(2019, 2, 28) + ) + + run_this_first = DummyOperator(task_id='run_this_first', dag=dag) + branching = BranchPythonOperator( + task_id='branching', dag=dag, + python_callable=lambda: 'branch_a' + ) + + branch_a = DummyOperator(task_id='branch_a', dag=dag) + follow_branch_a = DummyOperator(task_id='follow_branch_a', dag=dag) + + branch_false = DummyOperator(task_id='branch_false', dag=dag) + + join = DummyOperator(task_id='join', dag=dag) + + run_this_first >> branching + branching >> branch_a >> follow_branch_a >> join + branching >> branch_false >> join + + ``join`` is downstream of ``follow_branch_a`` and ``branch_false``. The ``join`` task will show up as skipped because its ``trigger_rule`` is set to ``all_success`` by default, and the skip caused by the branching operation cascades down to skip a task marked as ``all_success``. + + .. image:: /img/branch_without_trigger.png + + By setting ``trigger_rule`` to ``none_failed_or_skipped`` in the ``join`` task, we can instead get the intended behaviour: + + .. image:: /img/branch_with_trigger.png + + +Dynamic DAGs +------------ + +Since a DAG is defined by Python code, there is no need for it to be purely declarative; you are free to use loops, functions, and more to define your DAG. + +For example, here is a DAG that uses a ``for`` loop to define some Tasks:: + + with DAG("loop_example") as dag: + + first = DummyOperator(task_id="first") + last = DummyOperator( task_id="last") + + options = ["branch_a", "branch_b", "branch_c", "branch_d"] + for option in options: + t = DummyOperator(task_id=option) + first >> t >> last + +In general, we advise you to try and keep the *topology* (the layout) of your DAG tasks relatively stable; dynamic DAGs are usually better used for dynamically loading configuration options or changing operator options. + + +DAG Visualization +----------------- + +If you want to see a visual representation of a DAG, you have two options: + +* You can load up the Airflow UI, navigate to your DAG, and select "Graph View" +* You can run ``airflow dags show``, which renders it out as an image file + +We generally recommend you use the Graph View, as it will also show you the state of all the :ref:`Task Instances ` within any DAG Run you select. + +Of course, as you develop out your DAGs they are going to get increasingly complex, so we provide a few ways to modify these DAG views to make them easier to understand. + + +.. _concepts:taskgroups: + +TaskGroups +~~~~~~~~~~ + +A TaskGroup can be used to organize tasks into hierarchical groups in Graph View. It is useful for creating repeating patterns and cutting down visual clutter. + +Unlike :ref:`concepts:subdags`, TaskGroups are purely a UI grouping concept. Tasks in TaskGroups live on the same original DAG, and honor all the DAG settings and pool configurations. + +.. image:: /img/task_group.gif + +Dependency relationships can be applied across all tasks in a TaskGroup with the ``>>`` and ``<<`` operators. For example, the following code puts ``task1`` and ``task2`` in TaskGroup ``group1`` and then puts both tasks upstream of ``task3``:: + + with TaskGroup("group1") as group1: + task1 = DummyOperator(task_id="task1") + task2 = DummyOperator(task_id="task2") + + task3 = DummyOperator(task_id="task3") + + group1 >> task3 + +If you want to see a more advanced use of TaskGroup, you can look at the ``example_task_group.py`` example DAG that comes with Airflow. + +.. note:: + + By default, child tasks/TaskGroups have their IDs prefixed with the group_id of their parent TaskGroup. This helps to ensure uniqueness of group_id and task_id throughout the DAG. + + To disable the prefixing, pass ``prefix_group_id=False`` when creating the TaskGroup, but note that you will now be responsible for ensuring every single task and group has a unique ID of its own. + + +.. _concepts:edge-labels: + +Edge Labels +~~~~~~~~~~~ + +As well as grouping tasks into groups, you can also label the *dependency edges* between different tasks in the Graph View - this can be especially useful for branching areas of your DAG, so you can label the conditions under which certain branches might run. + +To add labels, you can use them directly inline with the ``>>`` and ``<<`` operators: + +.. code-block:: python + + from airflow.utils.edgemodifier import Label + my_task >> Label("When empty") >> other_task + +Or, you can pass a Label object to ``set_upstream``/``set_downstream``: + +.. code-block:: python + + from airflow.utils.edgemodifier import Label + my_task.set_downstream(other_task, Label("When empty")) + +Here's an example DAG which illustrates labeling different branches: + +.. image:: /img/edge_label_example.png + +.. exampleinclude:: /../../airflow/example_dags/example_branch_labels.py + :language: python + :start-after: from airflow.utils.edgemodifier import Label + + +DAG & Task Documentation +------------------------ + +It's possible to add documentation or notes to your DAGs & task objects that are visible in the web interface ("Graph View" & "Tree View" for DAGs, "Task Instance Details" for tasks). + +There are a set of special task attributes that get rendered as rich content if defined: + +========== ================ +attribute rendered to +========== ================ +doc monospace +doc_json json +doc_yaml yaml +doc_md markdown +doc_rst reStructuredText +========== ================ + +Please note that for DAGs, ``doc_md`` is the only attribute interpreted. + +This is especially useful if your tasks are built dynamically from configuration files, as it allows you to expose the configuration that led to the related tasks in Airflow: + +.. code-block:: python + + """ + ### My great DAG + """ + + dag = DAG('my_dag', default_args=default_args) + dag.doc_md = __doc__ + + t = BashOperator("foo", dag=dag) + t.doc_md = """\ + #Title" + Here's a [url](www.airbnb.com) + """ + + +.. _concepts:subdags: + +SubDAGs +------- + +Sometimes, you will find that you are regularly adding exactly the same set of tasks to every DAG, or you want to group a lot of tasks into a single, logical unit. This is what SubDAGs are for. + +For example, here's a DAG that has a lot of parallel tasks in two sections: + +.. image:: /img/subdag_before.png + +We can combine all of the parallel ``task-*`` operators into a single SubDAG, so that the resulting DAG resembles the following: + +.. image:: /img/subdag_after.png + +Note that SubDAG operators should contain a factory method that returns a DAG object. This will prevent the SubDAG from being treated like a separate DAG in the main UI - remember, if Airflow sees a DAG at the top level of a Python file, it will :ref:`load it as its own DAG `. For example: + +.. exampleinclude:: /../../airflow/example_dags/subdags/subdag.py + :language: python + :start-after: [START subdag] + :end-before: [END subdag] + +This SubDAG can then be referenced in your main DAG file: + +.. exampleinclude:: /../../airflow/example_dags/example_subdag_operator.py + :language: python + :start-after: [START example_subdag_operator] + :end-before: [END example_subdag_operator] + +You can zoom into a :class:`~airflow.operators.subdag.SubDagOperator` from the graph view of the main DAG to show the tasks contained within the SubDAG: + +.. image:: /img/subdag_zoom.png + +Some other tips when using SubDAGs: + +- By convention, a SubDAG's ``dag_id`` should be prefixed by the name of its parent DAG and a dot (``parent.child``) +- You should share arguments between the main DAG and the SubDAG by passing arguments to the SubDAG operator (as demonstrated above) +- SubDAGs must have a schedule and be enabled. If the SubDAG's schedule is set to ``None`` or ``@once``, the SubDAG will succeed without having done anything. +- Clearing a :class:`~airflow.operators.subdag.SubDagOperator` also clears the state of the tasks within it. +- Marking success on a :class:`~airflow.operators.subdag.SubDagOperator` does not affect the state of the tasks within it. +- Refrain from using :ref:`concepts:depends-on-past` in tasks within the SubDAG as this can be confusing. +- You can specify an executor for the SubDAG. It is common to use the SequentialExecutor if you want to run the SubDAG in-process and effectively limit its parallelism to one. Using LocalExecutor can be problematic as it may over-subscribe your worker, running multiple tasks in a single slot. + +See ``airflow/example_dags`` for a demonstration. + +Note that :doc:`pools` are *not honored* by :class:`~airflow.operators.subdag.SubDagOperator`, and so +resources could be consumed by SubdagOperators beyond any limits you may have set. + + +Packaging DAGs +-------------- + +While simpler DAGs are usually only in a single Python file, it is not uncommon that more complex DAGs might be spread across multiple files and have dependencies that should be shipped with them ("vendored"). + +You can either do this all inside of the ``DAG_FOLDER``, with a standard filesystem layout, or you can package the DAG and all of its Python files up as a single zip file. For instance, you could ship two dags along with a dependency they need as a zip file with the following contents:: + + my_dag1.py + my_dag2.py + package1/__init__.py + package1/functions.py + +Note that packaged DAGs come with some caveats: + +* They cannot be used if you have picking enabled for serialization +* They cannot contain compiled libraries (e.g. ``libz.so``), only pure Python +* They will be inserted into Python's ``sys.path`` and importable by any other code in the Airflow process, so ensure the package names don't clash with other packages already installed on your system. + +In general, if you have a complex set of compiled dependencies and modules, you are likely better off using the Python ``virtualenv`` system and installing the necessary packages on your target systems with ``pip``. diff --git a/docs/apache-airflow/concepts/index.rst b/docs/apache-airflow/concepts/index.rst new file mode 100644 index 0000000000000..c635f872bdb04 --- /dev/null +++ b/docs/apache-airflow/concepts/index.rst @@ -0,0 +1,54 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Concepts +======== + +Here you can find detailed documentation about each one of Airflow's core concepts and how to use them, as well as a high-level :doc:`architectural overview `. + +**Architecture** + +.. toctree:: + :maxdepth: 2 + + overview + + +**Workloads** + +.. toctree:: + :maxdepth: 2 + + dags + tasks + operators + sensors + smart-sensors + taskflow + ../executor/index + scheduler + pools + cluster-policies + +**Communication** + +.. toctree:: + :maxdepth: 2 + + xcoms + variables + connections diff --git a/docs/apache-airflow/concepts/operators.rst b/docs/apache-airflow/concepts/operators.rst new file mode 100644 index 0000000000000..f7eb6cf293deb --- /dev/null +++ b/docs/apache-airflow/concepts/operators.rst @@ -0,0 +1,143 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Operators +========= + +An Operator is conceptually a template for a predefined :doc:`Task `, that you can just define declaratively inside your DAG:: + + with DAG("my-dag") as dag: + ping = SimpleHttpOperator(endpoint="http://example.com/update/") + email = EmailOperator(to="admin@example.com", subject="Update complete") + + ping >> email + +Airflow has a very extensive set of operators available, with some built-in to the core or pre-installed providers, like: + +- :class:`~airflow.operators.bash.BashOperator` - executes a bash command +- :class:`~airflow.operators.python.PythonOperator` - calls an arbitrary Python function +- :class:`~airflow.operators.email.EmailOperator` - sends an email +- :class:`~airflow.providers.http.operators.http.SimpleHttpOperator` - sends an HTTP request +- :class:`~airflow.providers.sqlite.operators.sqlite.SqliteOperator` - SQLite DB operator + +If the operator you need isn't installed with Airflow by default, you can probably find it as part of our huge set of community :doc:`apache-airflow-providers:index`. Some popular operators from here include: + +- :class:`~airflow.providers.mysql.operators.mysql.MySqlOperator` +- :class:`~airflow.providers.postgres.operators.postgres.PostgresOperator` +- :class:`~airflow.providers.microsoft.mssql.operators.mssql.MsSqlOperator` +- :class:`~airflow.providers.oracle.operators.oracle.OracleOperator` +- :class:`~airflow.providers.jdbc.operators.jdbc.JdbcOperator` +- :class:`~airflow.providers.docker.operators.docker.DockerOperator` +- :class:`~airflow.providers.apache.hive.operators.hive.HiveOperator` +- :class:`~airflow.providers.amazon.aws.operators.s3_file_transform.S3FileTransformOperator` +- :class:`~airflow.providers.mysql.transfers.presto_to_mysql.PrestoToMySqlOperator` +- :class:`~airflow.providers.slack.operators.slack.SlackAPIOperator` + +But there are many, many more - you can see the list of those in our :doc:`apache-airflow-providers:index` documentation. + +.. note:: + + Inside Airflow's code, we often mix the concepts of :doc:`tasks` and Operators, and they are mostly interchangeable. However, when we talk about a *Task*, we mean the generic "unit of execution" of a DAG; when we talk about an *Operator*, we mean a reusable, pre-made Task template whose logic is all done for you and that just needs some arguments. + + +.. _concepts:jinja-templating: + +Jinja Templating +---------------- +Airflow leverages the power of `Jinja Templating `_ and this can be a powerful tool to use in combination with :doc:`macros `. + +For example, say you want to pass the execution date as an environment variable to a Bash script using the ``BashOperator``: + +.. code-block:: python + + # The execution date as YYYY-MM-DD + date = "{{ ds }}" + t = BashOperator( + task_id='test_env', + bash_command='/tmp/test.sh ', + dag=dag, + env={'EXECUTION_DATE': date}) + +Here, ``{{ ds }}`` is a macro, and because the ``env`` parameter of the ``BashOperator`` is templated with Jinja, the execution date will be available as an environment variable named ``EXECUTION_DATE`` in your Bash script. + +You can use Jinja templating with every parameter that is marked as "templated" in the documentation. Template substitution occurs just before the pre_execute function of your operator is called. + +You can also use Jinja templating with nested fields, as long as these nested fields are marked as templated in the structure they belong to: fields registered in ``template_fields`` property will be submitted to template substitution, like the ``path`` field in the example below: + +.. code-block:: python + + class MyDataReader: + template_fields = ['path'] + + def __init__(self, my_path): + self.path = my_path + + # [additional code here...] + + t = PythonOperator( + task_id='transform_data', + python_callable=transform_data + op_args=[ + MyDataReader('/tmp/{{ ds }}/my_file') + ], + dag=dag, + ) + +.. note:: The ``template_fields`` property can equally be a class variable or an instance variable. + +Deep nested fields can also be substituted, as long as all intermediate fields are marked as template fields: + +.. code-block:: python + + class MyDataTransformer: + template_fields = ['reader'] + + def __init__(self, my_reader): + self.reader = my_reader + + # [additional code here...] + + class MyDataReader: + template_fields = ['path'] + + def __init__(self, my_path): + self.path = my_path + + # [additional code here...] + + t = PythonOperator( + task_id='transform_data', + python_callable=transform_data + op_args=[ + MyDataTransformer(MyDataReader('/tmp/{{ ds }}/my_file')) + ], + dag=dag, + ) + +You can pass custom options to the Jinja ``Environment`` when creating your DAG. One common usage is to avoid Jinja from dropping a trailing newline from a template string: + +.. code-block:: python + + my_dag = DAG( + dag_id='my-dag', + jinja_environment_kwargs={ + 'keep_trailing_newline': True, + # some other jinja2 Environment options here + }, + ) + +See the `Jinja documentation `_ to find all available options. diff --git a/docs/apache-airflow/concepts/overview.rst b/docs/apache-airflow/concepts/overview.rst new file mode 100644 index 0000000000000..8b0a050fc3546 --- /dev/null +++ b/docs/apache-airflow/concepts/overview.rst @@ -0,0 +1,96 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Architecture Overview +===================== + +Airflow is a platform that lets you build and run *workflows*. A workflow is represented as a :doc:`DAG ` (a Directed Acyclic Graph), and contains individual pieces of work called :doc:`tasks`, arranged with dependencies and data flows taken into account. + +.. image:: /img/edge_label_example.png + :alt: An example Airflow DAG, rendered in Graph View + +A DAG specifies the dependencies between Tasks, and the order in which to execute them and run retries; the Tasks themselves describe what to do, be it fetching data, running analysis, triggering other systems, or more. + +An Airflow installation generally consists of the following components: + +* A :doc:`scheduler `, which handles both triggering scheduled workflows, and submitting :doc:`tasks` to the executor to run. + +* An :doc:`executor `, which handles running tasks. In the default Airflow installation, this runs everything *inside* the scheduler, but most production-suitable executors actually push task execution out to *workers*. + +* A *webserver*, which presents a handy user interface to inspect, trigger and debug the behaviour of DAGs and tasks. + +* A folder of *DAG files*, read by the scheduler and executor (and any workers the executor has) + +* A *metadata database*, used by the scheduler, executor and webserver to store state. + +.. image:: /img/arch-diag-basic.png + +Most executors will generally also introduce other components to let them talk to their workers - like a task queue - but you can still think of the executor and its workers as a single logical component in Airflow overall, handling the actual task execution. + +Airflow itself is agnostic to what you're running - it will happily orchestrate and run anything, either with high-level support from one of our providers, or directly as a command using the shell or Python :doc:`operators`. + +Workloads +--------- + +A DAG runs though a series of :doc:`tasks`, and there are three common types of task you will see: + +* :doc:`operators`, predefined tasks that you can string together quickly to build most parts of your DAGs. + +* :doc:`sensors`, a special subclass of Operators which are entirely about waiting for an external event to happen. + +* A :doc:`taskflow`-decorated ``@task``, which is a custom Python function packaged up as a Task. + +Internally, these are all actually subclasses of Airflow's ``BaseOperator``, and the concepts of Task and Operator are somewhat interchangeable, but it's useful to think of them as separate concepts - essentially, Operators and Sensors are *templates*, and when you call one in a DAG file, you're making a Task. + + +Control Flow +------------ + +:doc:`dags` are designed to be run many times, and multiple runs of them can happen in parallel. DAGs are parameterized, always including a date they are "running for" (the ``execution_date``), but with other optional parameters as well. + +:doc:`tasks` have dependencies declared on each other. You'll see this in a DAG either using the ``>>`` and ``<<`` operators:: + + first_task >> [second_task, third_task] + third_task << fourth_task + +Or, with the ``set_upstream`` and ``set_downstream`` methods:: + + first_task.set_downstream([second_task, third_task]) + third_task.set_upstream(fourth_task) + +These dependencies are what make up the "edges" of the graph, and how Airflow works out which order to run your tasks in. By default, a task will wait for all of its upstream tasks to succeed before it runs, but this can be customized using features like :ref:`Branching `, :ref:`LatestOnly `, and :ref:`Trigger Rules `. + +To pass data between tasks you have two options: + +* :doc:`xcoms` ("Cross-communications"), a system where you can have tasks push and pull small bits of metadata. + +* Uploading and downloading large files from a storage service (either one you run, or part of a public cloud) + +Airflow sends out Tasks to run on Workers as space becomes available, so there's no guarantee all the tasks in your DAG will run on the same worker or the same machine. + +As you build out your DAGs, they are likely to get very complex, so Airflow provides several mechanisms for making this more sustainable - :ref:`SubDAGs ` let you make "reusable" DAGs you can embed into other ones, and :ref:`concepts:taskgroups` let you visually group tasks in the UI. + +There are also features for letting you easily pre-configure access to a central resource, like a datastore, in the form of :doc:`connections`, and for limiting concurrency, via :doc:`pools`. + +User interface +-------------- + +Airflow comes with a user interface that lets you see what DAGs and their tasks are doing, trigger runs of DAGs, view logs, and do some limited debugging and resolution of problems with your DAGs. + +.. image:: /img/dags.png + +It's generally the best way to see the status of your Airflow installation as a whole, as well as diving into individual DAGs to see their layout, the status of each task, and the logs from each task. diff --git a/docs/apache-airflow/concepts/pools.rst b/docs/apache-airflow/concepts/pools.rst new file mode 100644 index 0000000000000..482f82b78c6f2 --- /dev/null +++ b/docs/apache-airflow/concepts/pools.rst @@ -0,0 +1,46 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Pools +===== + +Some systems can get overwhelmed when too many processes hit them at the same time. Airflow pools can be used to **limit the execution parallelism** on arbitrary sets of tasks. The list of pools is managed in the UI (``Menu -> Admin -> Pools``) by giving the pools a name and assigning it a number of worker slots. + +Tasks can then be associated with one of the existing pools by using the ``pool`` parameter when creating tasks: + +.. code-block:: python + + aggregate_db_message_job = BashOperator( + task_id='aggregate_db_message_job', + execution_timeout=timedelta(hours=3), + pool='ep_data_pipeline_db_msg_agg', + bash_command=aggregate_db_message_job_cmd, + dag=dag, + ) + aggregate_db_message_job.set_upstream(wait_for_empty_queue) + +The ``pool`` parameter can be used in conjunction with the ``priority_weight`` parameter to define priorities in the queue, and which tasks get executed first as slots open up in the pool. + +The default ``priority_weight`` is ``1``, and can be bumped to any number. When sorting the queue to evaluate which task should be executed next, we use the ``priority_weight``, summed up with all of the ``priority_weight`` values from tasks downstream from this task; the highest summed value wins. Thus, you can bump a specific important task, and the whole path to that task gets prioritized accordingly. + +Tasks will be scheduled as usual while the slots fill up. Once capacity is reached, runnable tasks get queued and their state will show as such in the UI. As slots free up, queued tasks start running based on the ``priority_weight`` (of the task and its descendants). + +Note that if tasks are not given a pool, they are assigned to a default pool ``default_pool``. ``default_pool`` is initialized with 128 slots and can be modified through the UI or CLI (but cannot be removed). + +.. warning:: + + Pools and SubDAGs do not interact as you might first expect. SubDAGs will *not* honor any pool you set on them at the top level; pools must be set on the tasks *inside* the SubDAG directly. diff --git a/docs/apache-airflow/scheduler.rst b/docs/apache-airflow/concepts/scheduler.rst similarity index 99% rename from docs/apache-airflow/scheduler.rst rename to docs/apache-airflow/concepts/scheduler.rst index 54c8f66cb7644..9febac3c2feba 100644 --- a/docs/apache-airflow/scheduler.rst +++ b/docs/apache-airflow/concepts/scheduler.rst @@ -56,7 +56,7 @@ In the UI, it appears as if Airflow is running your tasks a day **late** **Let’s Repeat That**, the scheduler runs your job one ``schedule_interval`` AFTER the start date, at the END of the period. - You should refer to :doc:`dag-run` for details on scheduling a DAG. + You should refer to :doc:`/dag-run` for details on scheduling a DAG. Triggering DAG with Future Date ------------------------------- diff --git a/docs/apache-airflow/concepts/sensors.rst b/docs/apache-airflow/concepts/sensors.rst new file mode 100644 index 0000000000000..c91a397af88eb --- /dev/null +++ b/docs/apache-airflow/concepts/sensors.rst @@ -0,0 +1,33 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Sensors +======= + +Sensors are a special type of :doc:`Operator ` that are designed to do exactly one thing - wait for something to occur. It can be time-based, or waiting for a file, or an external event, but all they do is wait until something happens, and then *succeed* so their downstream tasks can run. + +Because they are primarily idle, Sensors have three different modes of running so you can be a bit more efficient about using them: + +* ``poke`` (default): The Sensor takes up a worker slot for its entire runtime +* ``reschedule``: The Sensor takes up a worker slot only when it is checking, and sleeps for a set duration between checks +* ``smart sensor``: There is a single centralized version of this Sensor that batches all executions of it + +The ``poke`` and ``reschedule`` modes can be configured directly when you instantiate the sensor; generally, the trade-off between them is latency. Something that is checking every second should be in ``poke`` mode, while something that is checking every minute should be in ``reschedule`` mode. + +Smart Sensors take a bit more setup; for more information on them, see :doc:`smart-sensors`. + +Much like Operators, Airflow has a large set of pre-built Sensors you can use, both in core Airflow as well as via our *providers* system. diff --git a/docs/apache-airflow/smart-sensor.rst b/docs/apache-airflow/concepts/smart-sensors.rst similarity index 93% rename from docs/apache-airflow/smart-sensor.rst rename to docs/apache-airflow/concepts/smart-sensors.rst index 2944ece99963c..8a40859239342 100644 --- a/docs/apache-airflow/smart-sensor.rst +++ b/docs/apache-airflow/concepts/smart-sensors.rst @@ -18,19 +18,19 @@ -Smart Sensor -============ +Smart Sensors +============= .. warning:: This is an **early-access** feature and might change in incompatible ways in future Airflow versions. - However this feature can be considered bug-free, and Airbnb has been using this feature in Production + However this feature can be considered bug-free, and Airbnb has been using this feature in production since early 2020 and has significantly reduced their costs for heavy use of sensors. -The smart sensor is a service (run by a builtin DAG) which greatly reduces airflow’s infrastructure -cost by consolidating some of the airflow long running light weight tasks. +The smart sensor is a service (run by a builtin DAG) which greatly reduces Airflow’s infrastructure +cost by consolidating multiple instances of small, light-weight Sensors into a single process. -.. image:: img/smart_sensor_architecture.png +.. image:: /img/smart_sensor_architecture.png Instead of using one process for each task, the main idea of the smart sensor service is to improve the efficiency of these long running tasks by using centralized processes to execute those tasks in batches. @@ -41,7 +41,7 @@ tasks in batches. In this way, we only need a handful of running processes. -.. image:: img/smart_sensor_single_task_execute_flow.png +.. image:: /img/smart_sensor_single_task_execute_flow.png The smart sensor service is supported in a new mode called “smart sensor mode”. In smart sensor mode, instead of holding a long running process for each sensor and poking periodically, a sensor will only diff --git a/docs/apache-airflow/concepts/taskflow.rst b/docs/apache-airflow/concepts/taskflow.rst new file mode 100644 index 0000000000000..cb1487d93a4ab --- /dev/null +++ b/docs/apache-airflow/concepts/taskflow.rst @@ -0,0 +1,72 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +TaskFlow +======== + +.. versionadded:: 2.0 + +If you write most of your DAGs using plain Python code rather than Operators, then the TaskFlow API will make it much easier to author clean DAGs without extra boilerplate, all using the ``@task`` decorator. + +TaskFlow takes care of moving inputs and outputs between your Tasks using XComs for you, as well as automatically calculating dependencies - when you call a TaskFlow function in your DAG file, rather than executing it, you will get an object representing the XCom for the result (an ``XComArg``), that you can then use as inputs to downstream tasks or operators. For example:: + + from airflow.decorators import task + + @task + def get_ip(): + return my_ip_service.get_main_ip() + + @task + compose_email(external_ip): + return { + 'subject':f'Server connected from {external_ip}', + 'body': f'Your server executing Airflow is connected from the external IP {external_ip}
' + } + + email_info = compose_email(get_ip()) + + EmailOperator( + task_id='send_email', + to='example@example.com', + subject=email_info['subject'], + html_content=email_info['body'] + ) + +Here, there are three tasks - ``get_ip``, ``compose_email``, and ``send_email``. + +The first two are declared using TaskFlow, and automatically pass the return value of ``get_ip`` into ``compose_email``, not only linking the XCom across, but automatically declaring that ``compose_email`` is *downstream* of ``get_ip``. + +``send_email`` is a more traditional Operator, but even it can use the return value of ``compose_email`` to set its parameters, and again, automatically work out that it must be *downstream* of ``compose_email``. + +You can also use a plain value or variable to call a TaskFlow function - for example, this will work as you expect (but, of course, won't run the code inside the task until the DAG is executed - the ``name`` value is persisted as a task parameter until that time):: + + @task + def hello_name(name: str): + print(f'Hello {name}!') + + hello_name('Airflow users') + +If you want to learn more about using TaskFlow, you should consult :doc:`the TaskFlow tutorial `. + + +History +------- + +The TaskFlow API is new as of Airflow 2.0, and you are likely to encounter DAGs written for previous versions of Airflow that instead use ``PythonOperator`` to achieve similar goals, albeit with a lot more code. + +More context around the addition and design of the TaskFlow API can be found as part of its Airflow Improvement Proposal +`AIP-31: "Taskflow API" for clearer/simpler DAG definition `_ diff --git a/docs/apache-airflow/concepts/tasks.rst b/docs/apache-airflow/concepts/tasks.rst new file mode 100644 index 0000000000000..3988368a267e4 --- /dev/null +++ b/docs/apache-airflow/concepts/tasks.rst @@ -0,0 +1,167 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Tasks +===== + +A Task is the basic unit of execution in Airflow. Tasks are arranged into :doc:`dags`, and then have upstream and downstream dependencies set between them into order to express the order they should run in. + +There are three basic kinds of Task: + +* :doc:`operators`, predefined task templates that you can string together quickly to build most parts of your DAGs. + +* :doc:`sensors`, a special subclass of Operators which are entirely about waiting for an external event to happen. + +* A :doc:`taskflow`-decorated ``@task``, which is a custom Python function packaged up as a Task. + +Internally, these are all actually subclasses of Airflow's ``BaseOperator``, and the concepts of Task and Operator are somewhat interchangeable, but it's useful to think of them as separate concepts - essentially, Operators and Sensors are *templates*, and when you call one in a DAG file, you're making a Task. + + +Relationships +------------- + +The key part of using Tasks is defining how they relate to each other - their *dependencies*, or as we say in Airflow, their *upstream* and *downstream* tasks. You declare your Tasks first, and then you declare their dependencies second. + +There are two ways of declaring dependencies - using the ``>>`` and ``<<`` (bitshift) operators:: + + first_task >> second_task >> [third_task, fourth_task] + +Or the more explicit ``set_upstream`` and ``set_downstream`` methods:: + + first_task.set_downstream(second_task) + third_task.set_upstream(second_task) + +These both do exactly the same thing, but in general we recommend you use the bitshift operators, as they are easier to read in most cases. + +By default, a Task will run when all of its upstream (parent) tasks have succeeded, but there are many ways of modifying this behaviour to add branching, only wait for some upstream tasks, or change behaviour based on where the current run is in history. For more, see :ref:`concepts:control-flow`. + +Tasks don't pass information to each other by default, and run entirely independently. If you want to pass information from one Task to another, you should use :doc:`xcoms`. + + +.. _concepts:task-instances: + +Task Instances +-------------- + +Much in the same way that a DAG is instantiated into a :ref:`DAG Run ` each time it runs, the tasks under a DAG are instantiated into *Task Instances*. + +An instance of a Task is a specific run of that task for a given DAG (and thus for a given ``execution_date``). They are also the representation of a Task that has *state*, representing what stage of the lifecycle it is in. + +.. _concepts:task-states: + +The possible states for a Task Instance are: + +* ``none``: The Task has not yet been queued for execution (its dependencies are not yet met) +* ``scheduled``: The scheduler has determined the Task's dependencies are met and it should run +* ``queued``: The task has been assigned to an Executor and is awaiting a worker +* ``running``: The task is running on a worker (or on a local/synchronous executor) +* ``success``: The task finished running without errors +* ``failed``: The task had an error during execution and failed to run +* ``skipped``: The task was skipped due to branching, LatestOnly, or similar. +* ``upstream_failed``: An upstream task failed and the :ref:`Trigger Rule ` says we needed it +* ``up_for_retry``: The task failed, but has retry attempts left and will be rescheduled. +* ``up_for_reschedule``: The task is a :doc:`Sensor ` that is in ``reschedule`` mode +* ``sensing``: The task is a :doc:`Smart Sensor ` +* ``removed``: The task has vanished from the DAG since the run started + +.. image:: /img/task_lifecycle_diagram.png + +Ideally, a task should flow from ``none``, to ``scheduled``, to ``queued``, to ``running``, and finally to ``success``. + +When any custom Task (Operator) is running, it will get a copy of the task instance passed to it; as well as being able to inspect task metadata, it also contains methods for things like :doc:`xcoms`. + + +Relationship Terminology +~~~~~~~~~~~~~~~~~~~~~~~~ + +For any given Task Instance, there are two types of relationships it has with other instances. + +Firstly, it can have *upstream* and *downstream* tasks:: + + task1 >> task2 >> task3 + +When a DAG runs, it will create instances for each of these tasks that are upstream/downstream of each other, but which all have the same ``execution_date``. + +There may also be instances of the *same task*, but for different values of ``execution_date`` - from other runs of the same DAG. We call these *previous* and *next* - it is a different relationship to *upstream* and *downstream*! + +.. note:: + + Some older Airflow documentation may still use "previous" to mean "upstream". If you find an occurrence of this, please help us fix it! + + +.. _concepts:timeouts: + +Timeouts +-------- + +If you want a task to have a maximum runtime, set its ``execution_timeout`` attribute to a ``datetime.timedelta`` value that is the maximum permissible runtime. If it runs longer than this, Airflow will kick in and fail the task with a timeout exception. + +If you merely want to be notified if a task runs over but still let it run to completion, you want :ref:`concepts:slas` instead. + + +.. _concepts:slas: + +SLAs +---- + +An SLA, or a Service Level Agreement, is an expectation for the maximum time a Task should take. If a task takes longer than this to run, then it visible in the "SLA Misses" part of the user interface, as well going out in an email of all tasks that missed their SLA. + +Tasks over their SLA are not cancelled, though - they are allowed to run to completion. If you want to cancel a task after a certain runtime is reached, you want :ref:`concepts:timeouts` instead. + +To set an SLA for a task, pass a ``datetime.timedelta`` object to the Task/Operator's ``sla`` parameter. You can also supply an ``sla_miss_callback`` that will be called when the SLA is missed if you want to run your own logic. + +If you want to disable SLA checking entirely, you can set ``check_slas = False`` in Airflow's ``[core]`` configuration. + +To read more about configuring the emails, see :doc:`/howto/email-config`. + + +Special Exceptions +------------------ + +If you want to control your task's state from within custom Task/Operator code, Airflow provides two special exceptions you can raise: + +* ``AirflowSkipException`` will mark the current task as skipped +* ``AirflowFailException`` will mark the current task as failed *ignoring any remaining retry attempts* + +These can be useful if your code has extra knowledge about its environment and wants to fail/skip faster - e.g., skipping when it knows there's no data available, or fast-failing when it detects its API key is invalid (as that will not be fixed by a retry). + + +Zombie/Undead Tasks +------------------- + +No system runs perfectly, and task instances are expected to die once in a while. Airflow detects two kinds of task/process mismatch: + +* *Zombie tasks* are tasks that are supposed to be running but suddenly died (e.g. their process was killed, or the machine died). Airflow will find these periodically, clean them up, and either fail or retry the task depending on its settings. + +* *Undead tasks* are tasks that are *not* supposed to be running but are, often caused when you manually edit Task Instances via the UI. Airflow will find them periodically and terminate them. + + +Executor Configuration +---------------------- + +Some :doc:`Executors ` allow optional per-task configuration - such as the ``KubernetesExecutor``, which lets you set an image to run the task on. + +This is achieved via the ``executor_config`` argument to a Task or Operator. Here's an example of setting the Docker image for a task that will run on the ``KubernetesExecutor``:: + + MyOperator(..., + executor_config={ + "KubernetesExecutor": + {"image": "myCustomDockerImage"} + } + ) + +The settings you can pass into ``executor_config`` vary by executor, so read the :doc:`individual executor documentation ` in order to see what you can set. diff --git a/docs/apache-airflow/concepts/variables.rst b/docs/apache-airflow/concepts/variables.rst new file mode 100644 index 0000000000000..c05d900b5b920 --- /dev/null +++ b/docs/apache-airflow/concepts/variables.rst @@ -0,0 +1,48 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Variables +========= + +Variables are Airflow's runtime configuration concept - a general key/value store that is global and can be queried from your tasks, and easily set via Airflow's user interface, or bulk-uploaded as a JSON file. + +To use them, just import and call ``get`` on the Variable model:: + + from airflow.models import Variable + + # Normal call style + foo = Variable.get("foo") + + # Auto-deserializes a JSON value + bar = Variable.get("bar", deserialize_json=True) + + # Returns the value of default_var (None) if the variable is not set + baz = Variable.get("baz", default_var=None) + +You can also use them from :ref:`templates `:: + + # Raw value + echo {{ var.value. }} + + # Auto-deserialize JSON value + echo {{ var.json. }} + +Variables are **global**, and should only be used for overall configuration that covers the entire installation; to pass data from one Task/Operator to another, you should use :doc:`xcoms` instead. + +We also recommend that you try to keep most of your settings and configuration in your DAG files, so it can be versioned using source control; Variables are really only for values that are truly runtime-dependent. + +For more information on setting and managing variables, see :doc:`/howto/variable`. diff --git a/docs/apache-airflow/concepts/xcoms.rst b/docs/apache-airflow/concepts/xcoms.rst new file mode 100644 index 0000000000000..9bd20dbaef533 --- /dev/null +++ b/docs/apache-airflow/concepts/xcoms.rst @@ -0,0 +1,49 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +.. _concepts:xcom: + +XComs +===== + +XComs (short for "cross-communications") are a mechanism that let :doc:`tasks` talk to each other, as by default Tasks are entirely isolated and may be running on entirely different machines. + +An XCom is identified by a ``key`` (essentially its name), as well as the ``task_id`` and ``dag_id`` it came from. They can have any (serializable) value, but they are only designed for small amounts of data; do not use them to pass around large values, like dataframes. + +XComs are explicitly "pushed" and "pulled" to/from their storage using the ``xcom_push`` and ``xcom_pull`` methods on Task Instances. Many operators will auto-push their results into an XCom key called ``return_value`` if the ``do_xcom_push`` argument is set to ``True`` (as it is by default), and ``@task`` functions do this as well. + +``xcom_pull`` defaults to using this key if no key is passed to it, meaning it's possible to write code like this:: + + # Pulls the return_value XCOM from "pushing_task" + value = task_instance.xcom_pull(task_ids='pushing_task') + +You can also use XComs in :ref:`templates `:: + + SELECT * FROM {{ task_instance.xcom_pull(task_ids='foo', key='table_name') }} + +XComs are a relative of :doc:`variables`, with the main difference being that XComs are per-task-instance and designed for communication within a DAG run, while Variables are global and designed for overall configuration and value sharing. + + +Custom Backends +--------------- + +The XCom system has interchangeable backends, and you can set which backend is being used via the ``xcom_backend`` configuration option. + +If you want to implement your own backend, you should subclass :class:`~airflow.models.xcom.BaseXCom`, and override the ``serialize_value`` and ``deserialize_value`` methods. + +There is also an ``orm_deserialize_value`` method that is called whenever the XCom objects are rendered for UI or reporting purposes; if you have large or expensive-to-retrieve values in your XComs, you should override this method to avoid calling that code (and instead return a lighter, incomplete representation) so the UI remains responsive. diff --git a/docs/apache-airflow/executor/index.rst b/docs/apache-airflow/executor/index.rst index 6dcf71ac234d5..831b2f2e8751c 100644 --- a/docs/apache-airflow/executor/index.rst +++ b/docs/apache-airflow/executor/index.rst @@ -18,34 +18,62 @@ Executor ======== -Executors are the mechanism by which task instances get run. +Executors are the mechanism by which :doc:`task instances ` get run. They have a common API and are "pluggable", meaning you can swap executors based on your installation needs. -Airflow has support for various executors. Current used is determined by the ``executor`` option in the ``[core]`` -section of the configuration file. This option should contain the name executor e.g. ``KubernetesExecutor`` -if it is a core executor. If it is to load your own executor, then you should specify the -full path to the module e.g. ``my_acme_company.executors.MyCustomExecutor``. +Airflow can only have one executor configured at a time; this is set by the ``executor`` option in the ``[core]`` +section of :doc:`the configuration file
`. + +Built-in executors are referred to by name, for example: + +.. code-block:: ini + + [core] + executor = KubernetesExecutor + +You can also write your own custom executors, and refer to them by their full path: + +.. code-block:: ini + + [core] + executor = my_company.executors.MyCustomExecutor .. note:: - For more information on setting the configuration, see :doc:`../howto/set-config`. + For more information on Airflow's configuration, see :doc:`/howto/set-config`. -If you want to check which executor is currently set, you can use ``airflow config get-value core executor`` command as in -the example below. +If you want to check which executor is currently set, you can use the ``airflow config get-value core executor`` command: .. code-block:: bash $ airflow config get-value core executor SequentialExecutor -Supported Backends -^^^^^^^^^^^^^^^^^^ + +Executor Types +-------------- + +There are two types of executor - those that run tasks *locally* (inside the ``scheduler`` process), and those that run their tasks *remotely* (usually via a pool of *workers*). Airflow comes configured with the ``SequentialExecutor`` by default, which is a local executor, and the safest option for execution, but we *strongly recommend* you change this to ``LocalExecutor`` for small, single-machine installations, or one of the remote executors for a multi-machine/cloud installation. + + +**Local Executors** .. toctree:: :maxdepth: 1 - sequential debug local - dask + sequential + +**Remote Executors** + +.. toctree:: + :maxdepth: 1 + celery - kubernetes celery_kubernetes + dask + kubernetes + + +.. note:: + + Something that often confuses new users of Airflow is that they don't need to run a separate ``executor`` process. This is because the executor's logic runs *inside* the ``scheduler`` process - if you're running a scheduler, you're running the executor. diff --git a/docs/apache-airflow/faq.rst b/docs/apache-airflow/faq.rst index e5cdfd2050b6e..b6bb2cff3287a 100644 --- a/docs/apache-airflow/faq.rst +++ b/docs/apache-airflow/faq.rst @@ -85,7 +85,7 @@ sure you fully understand how it proceeds. How do I trigger tasks based on another task's failure? ------------------------------------------------------- -Check out the :ref:`concepts/trigger_rule`. +You can achieve this with :ref:`concepts:trigger-rules`. What's the deal with ``start_date``? ------------------------------------ diff --git a/docs/apache-airflow/howto/connection.rst b/docs/apache-airflow/howto/connection.rst index 2d15324278531..bb7450457e4cf 100644 --- a/docs/apache-airflow/howto/connection.rst +++ b/docs/apache-airflow/howto/connection.rst @@ -28,7 +28,7 @@ will author will reference the 'conn_id' of the Connection objects. Connections can be created and managed using either the UI or environment variables. -See the :ref:`Connections Concepts ` documentation for +See the :doc:`Connections Concepts ` documentation for more information. Creating a Connection with the UI diff --git a/docs/apache-airflow/howto/custom-operator.rst b/docs/apache-airflow/howto/custom-operator.rst index a804f6e0f3d11..bca1fc76dbd78 100644 --- a/docs/apache-airflow/howto/custom-operator.rst +++ b/docs/apache-airflow/howto/custom-operator.rst @@ -29,7 +29,7 @@ There are two methods that you need to override in a derived class: * Constructor - Define the parameters required for the operator. You only need to specify the arguments specific to your operator. Use ``@apply_defaults`` decorator function to fill unspecified arguments with ``default_args``. You can specify the ``default_args`` - in the dag file. See :ref:`Default args ` for more details. + in the dag file. See :ref:`Default args ` for more details. * Execute - The code to execute when the runner calls the operator. The method contains the airflow context as a parameter that can be used to read config values. @@ -152,7 +152,7 @@ Override ``ui_fgcolor`` to change the color of the label. Templating ^^^^^^^^^^^ -You can use :ref:`Jinja templates ` to parameterize your operator. +You can use :ref:`Jinja templates ` to parameterize your operator. Airflow considers the field names present in ``template_fields`` for templating while rendering the operator. diff --git a/docs/apache-airflow/howto/operator/bash.rst b/docs/apache-airflow/howto/operator/bash.rst index 736be0b8e04d4..debdd269d41a4 100644 --- a/docs/apache-airflow/howto/operator/bash.rst +++ b/docs/apache-airflow/howto/operator/bash.rst @@ -34,7 +34,7 @@ commands in a `Bash `__ shell. Templating ---------- -You can use :ref:`Jinja templates ` to parameterize the +You can use :ref:`Jinja templates ` to parameterize the ``bash_command`` argument. .. exampleinclude:: /../../airflow/example_dags/example_bash_operator.py diff --git a/docs/apache-airflow/howto/operator/index.rst b/docs/apache-airflow/howto/operator/index.rst index 062e1febe8181..0252ea5088b50 100644 --- a/docs/apache-airflow/howto/operator/index.rst +++ b/docs/apache-airflow/howto/operator/index.rst @@ -24,7 +24,7 @@ An operator represents a single, ideally idempotent, task. Operators determine what actually executes when your DAG runs. .. note:: - See the :ref:`Operators Concepts ` documentation and the + See the :doc:`Operators Concepts ` documentation and the :doc:`Operators API Reference ` for more information. diff --git a/docs/apache-airflow/howto/operator/python.rst b/docs/apache-airflow/howto/operator/python.rst index 4a59df61aa196..2f5c49c77c59c 100644 --- a/docs/apache-airflow/howto/operator/python.rst +++ b/docs/apache-airflow/howto/operator/python.rst @@ -51,7 +51,7 @@ Airflow passes in an additional set of keyword arguments: one for each of the argument. The ``templates_dict`` argument is templated, so each value in the dictionary -is evaluated as a :ref:`Jinja template `. +is evaluated as a :ref:`Jinja template `. diff --git a/docs/apache-airflow/howto/variable.rst b/docs/apache-airflow/howto/variable.rst index 70bb0c5b704c5..3ea613b6750dc 100644 --- a/docs/apache-airflow/howto/variable.rst +++ b/docs/apache-airflow/howto/variable.rst @@ -27,7 +27,7 @@ code or CLI. .. image:: ../img/variable_hidden.png -See the :ref:`Variables Concepts ` documentation for +See the :doc:`Variables Concepts ` documentation for more information. Storing Variables in Environment Variables diff --git a/docs/apache-airflow/img/arch-diag-basic.png b/docs/apache-airflow/img/arch-diag-basic.png old mode 100644 new mode 100755 index c0dd9648f62ce97e0be82843e77d1b13b6fa1633..1adde8f079e085975cc97301e5d3c4188a182884 GIT binary patch literal 16357 zcmcJWcU)83w(mnxWTW5#6$`}zA_9rhLI<$`2@nJXLJKHGx^zMZQML^Pflw1vx=8OO zplm^EkSYX-2qg3t0)`NHEADgdeSf_B-sjx+o}UjNla;mRoNKK)$NY^kzT=sZq0X^C z&i(-cfsX0v-ZBP(*ylkYw%H?xfg|pZ#sh%=*nEw3v_U0(LJPnbPUjm2H$b5Bc%D7` zL%?_LN4iK~5Qw*p^~cuXRp0;uX$I@vx?vh*yD}NF?&N<%bG_14Mf}%b8N0l~$?sX$ zrLr?pc&6m+w%zWsOS*E%8(_}Zb@>~gAEdD-9hhp(R4GJb9L%#{{H%MO_e#dEeBzK$ zWzvhM$jCRAGyB_NK^mw&5$u}B4kHYYqo|dR_M}lJ2xvTkj-`ZY5GbL2Ps11rAduEc zk?X<05U-xk<^XBk=JW%B3jU1b1%Xcfqn{0weUV)S1Twv)#RZCdegwE@qkpgo11Den z_fJ+_CVCC=J?h_Hoh0H{$13x~Y;@DSGQZ@}JSOVAhjw1?*BN9ezSUvn32Z$H1&t&|@>ZEiMM6&5V*aR^fi(#Zl1TV!eB@V#p82-bBQ86I zBlFj8HeA>t*^Z1`QesteLT4pZ}EsgvbNx|6JirS zY*%Q6ckE8tR)~7tdG$gw=Z^PV8w;*%xK^xWjnOs1uS+Sfv4j%Qkwg925oAlZ?y0IUx zvI$+AnoXO2q7mzPWvy$EU8U7spx?f$Sgh4?$~d^u$Y}ZMjpmp;pO{+ob6{xXPO=t$ z=+NB}iENG@MOJHXIqUc@3&_F`d^nt5@}x|9&y25QQ(lvp#1m&~6I&1oJJwS>u!y%` zw>kYH$1Z2LB_UscMvvulor9U*x3*h}J14j$ICyM$MtVD(m)sn{5mfA?@OC*qlJ^>s z(=S!_&Cy59^>Mz>GoM#JaZZgFJAzQpBY9a5(P^B0qOKJ`ej}oMy~(r;oJ)K2#}7C# z?~eoHKk}%JOg+^4e*gdJD)=|omz`eVq`$fE(#P7GQXXWVn%l}Y(aWn(b**XpBq%Q# zSR@Ni&v`!VQ{R~&cS)5By+YT)!Sy*=64a7g>`dAyO7pCW{0=E1!na$dwyIPrVWpJv zl#1>aKV;4V9^hqfO3)8{bIKvLMw!y}-P(Mymi zVCCvZg{(E#(tmYmPHhuVYBYRMok5y^Gx@*pOHf^*AFt~qqh=eTfRV`bkkjm_D zHuB9iC%ZT@?vB)_jLc9EWrJb;QI4uJFXhHt$IZ39QS855^0P*kr5yQRO*<-#HtZ3(dYJK{q@if63GI81 z+}+T~rU9$YkVT9H?$ar?H#8U+{tX}dgwX-qijZ&5bt)rku8RSRtiGJh(Ve1|VKi~9 zi@Wj6XlZz2S?2KxzST2^*@z%p-RMUxw={;0jo%5+z01}_L|CO^ACV?&ji2O-k{e+^ z))redK^9WLz%O!tT0;{;?%d=XIl0C)qbnJd+CHUDmbn$pJAhMFO@$&C(C&GZNM+#@zf#3C-=5{^XFG$$D!s4;9);Dst6{pd*E zVqMrrfdLbLF}AR`k8Fz)9rlN0)&hC%osqnU@{x0PQ4s&sXccXnvNG}{KHTIPax~2u zi`L>kxPQ;>;*vf_g)%v}J1q(t0akmlg*LjVCTHpJmOREnEGGX%6KN419p>2}&Hq_6 z=ttDI{+kU^O3>uIlPMQ-*N}lVD2M&kN8gTZNYd77w~UiklGjPc7;~f1^QaS`R#$=k zsFUzthIWR!t7i8WVn?g6iHjEsIj|~|lD&rCS5x$zZMa+0Qr0ugY%5kx<@8P_OIsCv z%Q?|3pOAW0g1Z+9rgh1oQdYXu>b1TG%OYG7#Fq%c#_3olq%BQ?O?^r4+`MCb5}DF` zi!5a?3>x|Tl!*%EQq($X>ajM}+zXid@%0(bg*$m$!4C%g+Z43wu+@@WKl>k)zON%Q)c=1!eUp^Kf~tvJroZr_{si%76~t4 zL=sxdXhF8gcC##dv-Lda^i^P@n2Pkgk$98>Ml7ps{3Sq?aNic4Y2i1wMb8y@(LEbi zYHBzlF8~kOu~R^G&;~Gpy}20MaSeO9_*R4wVNl)tB4S=G6;LUaKN1)%b`TL7hslqr zkTnmD{f+oqrmwH>%97>P_%6f(dNyGP5i&kF-a(ScL9wh8w1wVFRwkegRn_@hHP@r~ zPABkhGy;k<>bnvgdbVc#SkI_!+)@2*CqmCJXG_)p z`=voSA>`^yxjrSkdQKYWdP!2TGj_lc|FZkZjWiJ?cu5aKuh_4MPlGZtT@};90V~$y z1(}&igo9n;l*y@dbVVGMpxIKtx7kghutin_$}%>GbNQk@)V@aQ0-lrdMC`otSP3T? z|8mK*atlLqR?4O1j_dn<=)83XEO;zou7Z@7GO%fUe=2!8hU#-21aiBf#l@>9nGAjl zy+QxX193OhpwHIis7?BK9_bCPutr|CN%vnqCpaFjx&N!?DlB0|TpvU8`%rSLh@4ZU zA$VwvppUSCh|d6#$el30Gf=SjO5q&=7jJR@sY--+ zRp8A)^Spe8bW;z#^}c;)6ihNKFMkoJpC~fS*ZA*3J z{Is^X66X9Q3M{_pQk^SB_2Y;r9BKxAu!Z zl&GAaaBj>MQ2`YvA7gBv0~U_me^`_%3H%8EUxVURc~{}}XXK2BFXhgO+v%{ov-`07 za{pE6nP_%b7JLsTt3IuzDcg*M6@1>|ou8;(tMCq0kKzp(DAp1laJ(SIe>%Zy8 z{O=XqHAh9y38h62&{JKOEAXHr?yl&~a%Teet|+L|wGGd_!)#5`y+*3{btdBYY4!EW z;!7rA+`?3I2oYT4J~j|u0_qU~o`ESeCpKFntE=? zZM0ab*LYPYMp+EGoq*Z6YDpcug4skE?wz;q#>Y#yS7 z=^24un?38=xW`7kt=hn4W&9)X17nyG_(2vYYjKtvkF+9P+sJSoCXHw`szbq$t~f?x4>57$~^<>xyUA5FjB|wH*5_$ ztpa==xZivADw50YE6^@XTM1MN)8Zp`rk zF14!l^gyxMeNFoJz2JY%9#b!1)p>A%Dz$(fe@_!q6ccpKlwHN>Bj;w8YGB;c^cs{d zm)-pDo;B~z!D|?E-G1YB-VYcmP$-ntl43k;Pr#}dsU5YZht|4}$&zq0A2Q`Gdl!Fl z?c8W^8NA#fQzpH{-_OXsf3#!dqt)mZPeI7?uXpEaJhR7YJgii~9T*<@yt_Nma!<~H zUZe$S5%h{e$H*^gL%HQ9`7^r(`z@br@%M#&>cx6$#k$N19hM&biz68SP+*q-N9Y(Y zobEpQvpe7VJN!Cmld-WuDMye<0gT@a`k;c8!bVV`dJgI?V~0^|^p% z^yRbjPgI#cwhJ1mC^DuGAF>;tvnDqlx9Dj|&gQWKfz}_5-}pD84GzE8lp7dfNCa+) z*b>Ir-lo|te=mO@X7fU)OAa{~bq~EqyU$m8wWAk!2t;$I7X;h|Z#52hS_A)943*P& zI`p*9H#6{r+U{A9T^Fpsu{*y?VKUDkW86N^9NYj?LOdI&OY{>rtn8%_UMpdva`o?N z5h{>6?3n_@vLr%4cZ!UMKSl%f@IOzOGk7<{E;RTtDg8qOC#a{Pkb7RzMrQc@X_0%$ zAdrZBviSVW;uQ{1>+y&Gjv-3;ZL>@3)wunD+x0luULg`k=Twp$8v|GYjNqKTEh)a@ z^o)#*u~>6uY;4=ca*zmrN97D!cVs3ATNsIrICXHPt;&CCB#puUNR1COlLkBsIk7h? zSm(>xff$0Q;=k_PFq-cEAO+7UL};503Q*&FHZ!)NCiX^yb)xfF@ui6VJ^M3pn`541 z6}e2h%PA#nZ1;b5cNwilm@s%(=@XqQK6f?XRD_|Q8ejasdQ-zXU(BYsN(i^fyJ-rm zU}JkXdVnW;CQ(m>AZH$JTM}+}M2K+o+3)jxuT(w${}7P+*HZkiRIJ=$ij}8J;}dCx ztm+HEO7AI8+<=iF?zc`}U0B%}xm@#@eUzdGzjNAgMV|Grk`>^qnS( zdhbF$PDAwbTJz2ndAKy(pV|(^m(v%`TgKG@DeYyu(=b|nNY#dJh$b%-D*fwg!&oG00W4#R0{X0z)wS#&IkNleyer&`s1D4rfTxv^ zNn>0^oOQOtfuN;e>v3VgFZbAq;tREt&vU`gsN&xr`#Gn2;ht^ymQF+!db<)g$*Z$V z&D#7HAHMbJAD8koePGn2d^Jb;TR=4MWwL;Cv@3TZ;EYh&>>H#8;n{t9Di^j3-p9E< zb~lI33KYm8VEVAa5Qj-5SX>tH%fuWg-RHYcnA`Z@YxHtdA9Lbfe{qPsOt@dMvKsPL z!$Gu?kID0@DZVEVkmxVlK|N1V2Q-wI2(3V;`jA3t&=;xn+NL>~Uef*wDK1aZV_FZ^ zjsP4ojs`-+KJ~q_$iT4;Azdx5c9@F}_Xk$Q;@lmR&l4?r@}E@1yOoiFj$?i{lW2Y) zmgo+cOyFNoq79i4M=uH<$vvT2I_4Q+#hmuu3_taT=ZU*2<6h2Lgm%bS?K{*8mz||L z`Fgwye*FpL&xpa|o82e9LjJHnSc(lvMAYq~zBBliA6$RDip9rW*YmF~f;d_>}d z{<@ToJD}sdc$9W+@CXY)C|MY3dr0Q?9eamXXT^Jr=Qt@>az}vhkUOIx)tj1>jk09> zdPu8wDAzqdW^4ZGyzZYGC+09wN$&z{vplVrIJ?oMpur!Vh;Se393+As>Fb{0owl7L zq3$A2zCb|t`HtIR7iI3U@0#FthsXlBU$?AUua`VP1F~j}Uti$6$uaR^je+Dc%joU5 zXMuo+XStCY_|}e0O@#P*{$fv^2P8j6#O`eKuP+c!6p6VSC~EZTI`fPmBPtg*_-I04j&HqbB za)Tedmth~GEXehOshR9g@t>$kPDS8^LYNOKzE*i2U$Atq$M;}-mwhO)XPH0v_U(vQ z;o5%b@V$B*qrL_n(e7wTeuin974Pjv?QfDX5p+xjv}yX7hXUrKRc*~6Z0hTlsi6Mg za~Id^P@40;An``mP_-;kAEl6_AS$sUMTKS0+kBQ|{EHc?9O>T<(qX-*JBX;?>1m3epQ&H`knxuLmiH z8E-;MKJE(%&`&;`s0!Uqs~IXccCe{mUlMV?_9{_%e+zL?)hne*mD3yySRmAe7lbpH zP$VoVVy`2j2|YfD9ckLGF7;r=A-@fA+4z0C-Mhw|T2mk$Fi9l$siyj@7Skt^NqG=M zEz|=S*DIQ|(A`0t$SZWe=KhSP;sK-OfcSJHhF0?Og>?5gvHU?tL3~N3TD;okSHOqD6XR8M3i~(Qq`9TR>VBBE19-7^s{v}_uxVu!k zNdEc@<9p(iorxc5U}o+v4Aqwm2^s4ir#{*r#jeLlyv$P*gTBr?p+Q;eAtgJ)I)dUS z4h}W!dG+5+%7@QwlwF5HNAcj%V40AD0gZ>SPG5mLFxvB^A@PYE`val6`oec#Pr9uZ z_4NwgUi4``V6J)XlfCxy=)0pWQ)5Yiz(v-3DUxfhDt>sY%J< z_>i4d4=<-Nz_SXG2!1LNW-atwPnbZL_FWb$o~`VCrqbc=$=~a8NnGM~-008)>5e-v zed8`;_E1=JC05otXG?8K(qB+6ljbn-J4S>g1dp`UB%>P`NjUS$$%(S0^I2h?cxQD- zY|O4cA=DAOuiz7)fAUZD5%7QOCl`-D(R1em3-zMPI+@j2HGPb}1`&r!bdV;;sLhZKfPtDxDcY zm+{i525en`e!X*(mQj5`Ldta#%(-{ZhLGiOV>MJH?8}RY-TVmGn>~EYem>!AGHl8c z{d;~VluvJdo!C>`dgT@Req{jXoo41>ZCFvLXu^xdSHaU4Ip{yHK%a=+%Eek3erIhO zQ%%&YHTUoZOt=fac|b>V_uEPXbuNM4Wi##-LL`oFBY^$ppjz1Q5dmZRQ4=X1L%9{P zJ?0Drw5wYiy1l$AG$B7p>;lw=eNl;s6CLb7-8T zotO;k+H)i_JyOtJiValhD#V<+A4ckw<r-WTFBeMK+^=}cZIQC(WVo*TrWT)TpOR{I_yb=vcvYlYvwx~bs< z2J?}-lvQu24fpR>V0+%Qe7e8({%b>5L*;U?`9fEHLuRRO$y20QHbN_$tas#~%N#$z zoBxl4n03+pY0+>CEdr(^f;k$|Ywo3|rw1D|36z3b<@`3D=7wdG-}c{V(%%5)F7#KM zsHbQLBcp|$;SuyaX5}Ts|I4O81YU{1j};>izEP5&p!RicVg$B8Hk?M%ed|XLjqJ7?e3ll{X9Mk+3 z2#hN&ai<)u$!S&LR|4@)Bi!=>ugkJjP!8+Lj7r7J$fl~ytJD3={X{7(WG}Ld~K(XYg_l(v@eb^ zQPaD0g-x9?#!R~&_<(W?%k$)CuU9ygZP;g|?bFlEp2kGO1=S(Qh3a#5zP9kfV7j>2 zCk30dQ@Nb#ZQSFNsJmSd1*RQQJ@!tKT#E6$JH)tIjKPmYz&><02ozMttwOZYQdV{# zhZ~4pcn9Vr8c3t$T(z7HbHF!IE;5l32VprmVTmz57T=$S(Rh6H~LmAi9$ z3bA-KeCxfaM@y*j^Q?ij%{l%#iJmH_1eJBkl~rucV2c0Zhxj*(;Cj%npMovl-2o@c z<6_~mS5Dlj7jwj`df3J1lKmpg>3|wj-l4wg{l5P4`vba@k_58}Jlg*1oK6^AYN=P2 zNpR$MRl*`wE4S$-G=_mL} z%23FD-PMUy-(y`NzTqO+Y7LaXk7DTg#j1#5qF zgD+5h7lWlfDIchzZTdRZsWP?Q!M@)Hm^z0RlUk9mR5+7*zuZN#5mqM>7%nt=YeuOM zAyN<0)kRFM70XR4`}e^mL!`G}7w^|JOsn}sCGb3}D2bsN2aAlwa&F8OhkTsNR5pNy zer|wq@8&FzRXUSc{!xupAKr65h8@*_pJT zt$OxssWisJZ%!>^gQru&Ay`?|YpMxNx-Dokv<-16_jl6Vi9IM^yS{d5^8Tq^UgR;J zF#H*u=UZQ&+KkIf^n;8UDDpw`oKnM&XdW`P|0sjSrgUS9OZUb7mW@-eIZk?U)s3 z()$*y?m|ckAz+YX6F8-V4{TThuf5+3E)(gZ*G3qUf6OWA-}{)nC`yTaCuF4@PCfYY zNuXci%01*JqW|L*ze|3-KD_O~rhbEZ2oiLI_?P`+Za!elS6ZUa^;WCVmE}-pU3$f3 z4$tPdPYk8ea)V(aV;PV{`MK0-z@V+v3)1<4x!Qx98~hAktONr5kCl~+V>iDTH9StH z%{p6U8*+c(u4P`8PH<6{aI)myydCZF+4J+Ah7*=a*^@Zvw8o%MoI!!1lH{-saMx)` z3rIv!twWmeN%2z2%+3H=M1w41jKbg0acFDVttRcY#%frO9?5&{eI7`>xD;o7tHewT zA{bpycwo7y^>IQ><;G-0gAeYlh~iJnCo+YX#_>KPC-Hyn3qD7Sdn>=+t2(ZVFQ z^w#2R+w&DLA*=QI%~6tkPxW}Ni@%OovLd;{x8JuA>Z9bL*NMrE>=Zh3H!e+jR9&OOolGcy^I}4uAj`d{ zjIZqDAMkb0?+$ket-<+kCwtrpL_N>lcKK-Vdvf2Hi&0(Wv}}*(q1+Wo%P2Y5_uwrp z?i}XS{oR>}S)K44nY((O2-s$0?p%F>-$d|5nYP)xt&+)lgD;o<(Z`mO=xfqvKZE}& zm~o1mO&NOmq2EmphcsXe-D+up6ZO}+oq`*@r`0M-Dn9Nj*Sfo17~3}<#85R{--|JS zh#$rDV@JY$Hc}~Nvx4cKw7jIj?QT~p9Pa6Am<=SO4yUZ`@?$S88MGaDQNHp?p)0sy z32q1JzMM(9DBp2EP_!cJWp}_u>a2N=C)=zxBWd~qKR|FDzWPaN$8Pqg^jnn+^%FV? z-e>y492W>*9JRgkB!)0WSCJYw>PW^`Y0#zK+qU(;dfZZ8^t^Gkp0l?>dVx>FO|lbQ zkP%}9;24T~uKc@(Bk`sf5DhzOdLWp=x%mZ_Wl}MDaQkvgiGzCK{ekLClr2BLyOQg9 zvF1I8ON_)tC^90@hI=YE>>!3Zw>oIVqR2QDO1kc?s#@t5VRCHIu;E4lOyW>I4P^%3c(=xLH?c z45=cPu9c@V17^x63<#FsX z%w}^nwElv^$C|%fR|l(7gSdB7WUR)`#eq9olJXz&ed7eH7r>fa%rY|ux9n{OSflIi zX@@R`mEIajPNWuJvTu+tweURKTbERkpNvU*=d3#BWNUKrGBmZF)u6@bRj|vldg*5s zQ0u4e$6*b$a(LE?=WwF#6+?3|PKwKq$;FGgXWEM8KG8FM`_jDha=$jnHM>nH0#1vw zb#zcVO#c~r1SdgvVoJIY9>IDl`DSWrGsDg%KGV>v`d+B8`_>tM-jn+iVM7&2Tq5L)ZD3s~6YQpG-n){o*&{066p(5va zYCISzqj0x2BxUaa#F1ou;g%gd`7-)T+w4aQ#j1t~5w?L(?wp&` z4jjxgcz>O55w!>2jxd{PD(~M@=s6d=x!>NKfjZqV<2)UjytA~YW(}wxAc*Xdi1Rwh z7N*QQ7ynb#>H7$kDnqm>jl}jtJ3T75H^<0wali2N%+vw2P2rySyj6|63o1F=A1W| zECGu9NEK>bP9MJtp5y7koL-X5&8&YkQSVE+b#pgZWjaRS)bdXjD8{kD+#!*81)-CE zCcZ#~IR2?fk}ZoT`X6`gp?j-X5Rx#?dlVHBMih0# zyN65!%X$I80eW|k&F-W7_gYdx#8oj)@uR}U0Duy|d-}Q0r&CKbdO&=)9*~C<%zG2s z!d<=)ur{TBR?b#?8%@a?6$;z>_M6~{gFn=y_Q++h@PM#aJ-$SbT7`UUu&Zk3wM<82 z{)k`Gv4r@>*+h7HZw3_ln^Rb)TEdnp-5We<>6{D2jXiHhhvErpI>P>@suAE*;HLG@ zS))QXzIYrl6g*@26+8|7qL8j2{IJp3 zGOSl&w2GReIj#1V&H~3cd?gq9n#%3lo^JbJkQ_#U;-yjS0kFR+HIb-x3@yK^+3GoN zhxXjO>0Oo=|FjV3{vxcO;iFKQG+}B9gW7!KjptAA)YUbpea$)HcbjbRU4s?FOk9gu zG?vBEMF21<_8wZ#$j~KV%&Y+j=l4QJG3%2g&st7(w&vpeq&>`noAGNx=Kl&fzZBP zx}bc|8ny&sg86^EAM$_z9M>7lI?0QkYsIc%$UV=T>$-hwY(F^t>EBIod;t4*n_$$S zv6t8LM1t8vxWCjR3xsQL@6y^EZL~eL{VZ?l32zyfsxrA-xEH=1Tj8EVt1lH-jvu#y zm6;6KSO)546GG7jOH#SFlbf7E9D^H*Yd=gJNvGw@Yi_0oQ<30a8L~oKeRe_fA4frL zP#O@Oi)}25^*Ck4eTDCKx`X;$QV<}}C*Qx_FqRcgE9G2yCruK@J<3YlehKkN+5zCM zL}zTwN;r4m$lW4BrcaloM{8eQkNM|-6a>awyo_@H{7hHVLf4_&4xwAMwId@-R2xVi zp-u4x#kqs}2oNkM`_qE)pws*vayNelIq6=e(~Eu+S!wl=*bkzo|M=%|A%f8Jv?KGA zl_IUfO*N`n$1f!#a6 z)|2Ga+uQrb7gxb!5#tY#! zg}!r+`}-SB-)O}bJ?S-8VYjpyXJ8Q=Fx=lHAE`pG^`mor)XFB8P`kACM!tv30^!|4OKveoD} zEfI9c$B=?4an(OXXb$hRh3!Ubm0LG0)YnH<-6Sop@I7WkhZg9~dvtb#$6+W*b! zg1?jD9H#bwE8Pnq{W>>iJSH24XAam-i;2%``IE`yJ7GKmeQYe|uwVetY5;mA0d(%b zu5!&k#pE+DXz*{~Wd-YuG${i!`H1Ty>ejYnjM(nyu`( z`24kNb8L4$qqDjU@VWHcpm~#M0Glu8-&(Co>dPB^t{bM-dL;4-P<7#Wif2C8R56Oc z@p92jLyGSjdp6>v;OX;GrQx|l*7w9fy8v(WXHt8LtH2Oq_!8*U#q2gv;owowApC!N z{U7UElaK``F;Ch9(UEtXVbx+lLt5M_JxBPi6?0y~NcE37Zl1-@#jGOClf5B8ik>XI z7S!>uC&&v3sbvfzG@DsKIkm{Vxd^>a1@h>u+*SBaU_>qbvmpLjIuB@39Owd*y)PKC zA=ukEMJpjIl%fr&1v=_Q^@~Lx(ouU~P}XICF&00RdfNW_EU1WME<0yH!Z%*LFXsxL zT(TW)S_@lqGgMuv+EyUytrKo*vF}ezSX1i>VV?3((2$-M*T|Q^h@C(vA$+@%XuO&f zj01LxUXR+zk-bl%d!HW0#fHw`6m@0gfE8H{XXLBRN%k^G2m9ZWV#QuZ%w7_8u|3!w zg_Z(@ZAQK(b7AEdP*(6(I2?#ZjPETI_T7DFUO{VS*s{%l<@&@Ft;dbl4nVQ-=R`$jV&L-gw3<(yv!!$iXHg z!vvvTUBB5SmGL`Yq?Q{D9|wz4=HJJA0y#ftA*0?o1Fv8cV&+dPAQnn?goGf^{j3a) z4ry=R9WM2Ra02h^-1Z7|kt?qifF?6oI~3D}(P**oi}w1Q8iSR+tjZnFokC9lu17CW z7&Ev{xp9+6atSU4L4_L~Iu4&Ii-2b8Xh*c?H|WC#2JyS{F}|Q9|C0gzFW0`=Y)Rv)>puyh7s_i* zRNf#lO;DKVg+_?XXmXpQ!m1;ATZe!*e$!{`n+}v! zhH|bC!YXVzFOej;0C}rCY%iNR>Q+*p$KtYy8wp0Jc7M!*)Y2N^&9rI#f|q7j@L+CB z2-|f}oUynBGYkMsrT~!X{dG_oV7l#wb8ZO92Z^1KI4ZGx#}*!B*u0(8E@3Kh95b-k zW5VvE9Zc?QlH35wOd@hLLKhH{K`D^#oNh(g5&(`412asEW?7igidzCtyosT5uJYb` zn+`CuX~4`bV)@5_E?A14;3UQTT&*jX0P-PX6dKEoWVtc{O8kGB@M#RYw|7?U%Z}Xv zxCis1g`^dV0^=8Urzu}J&YDx~yR!z!0uE5DMX*jZ)O{y6srkx117jZ`)#Hgt+?Hk9 zEryYN3F;5pLTVxaM;1sA3{B>LJ#>?ME`cY;`Wg@gD^>vJ!9DKkLY*m@YFmBoP04b@ z>XdNvqKurOklYGU@gISX)_F2{Nk1@(6L3k*zb{ab^w9WzJ|i1zG_eNjN9g_H;8W}0 z7OX#AM<-`=JYCm4f{+YFrC9a7!a8Sq-(iHq^n$?`foBWc`UQBT!O7&RxI`@*&jITa zP7;623(lWat;R*EVq5twM|Fy_X4?rRo))6D`n!WdGd|#pKm`(JpVhe&K;M0jb#U5b=3Q1H@~BqMS^r%O`P< zQp`&3S$#|A&MnRj&SmL0)2qRixNh!wq*n@}IoKblA4)pLXn1v)V;HVF$;u8hYQ0EP z_;y%_XmOHH{0GIv^XlMKiHSTFJs8g9Y}OAztXKkTxD+-_@e&Ffu5G*f|f zXwfYL($D;_ie;~T8E{erU6W?@`05-qZZ~~9N>Mdn`SdY1(9QFJbKE6x8EyoS;Q6n4 zK^%Y_N*ZtQ|G@ez2K)%uB~>`WiKQAUprelg-R24W)5zff4I;E+*^Upe*(tH~)bEN} zP^1g6v-~dJJ$)YflD%~cs9ik|2s6;r8^FmMpi7|CaCViWF9A*0>Ic+jMzVg(1E4dG zTn|4`DzLpAO$GWL)B%(cm31~4DB=uXT;wo%@waZ}Ncxc$@a;YEbGNjV3^6A;11pK<8P7aRW?kang(<`#c^0@S zOgmdotW8neK2|+syv2%6q={!!yy`7)*l2NSX~muyIJUr@RJbYYz_i2eShLeUU5RKc zt`+@mE>-Y%TMruE`d4c^dYrm?9kqfw#-GQqXBr*k0kz8PDm~fZk-}ZQ@5QA9IjCYq z)M6(i!QT~~cl;mQ>4!9L?ceSQZo&;8S^WF-vheK8_SYKV)Q(_$y!VZ;`Uoq)xU&gA zZ z#v8DAx`@soznhFY5H4}kuTrR|aT99HC2Us%DClH8U4%>UD}>J>yOrO9Izi{9EgL8j rfWZE5O2_}p9rnLfl>RD@egM08N78yfRt_l52kG55yj7xY7yUl~lUn=t literal 16303 zcmdVBXH*kh(CWdkz3p#1fvH6X3o-vQ*Mi0|5M<0sz4w0Kh2@6ub%m zc=7-MYi0m|XbJ#8>5|d#LL4`MX8}@_1N;t;?QO+ zRt~q>H-+B*QVqrem*(KKf;;8Gc+My_fF2j+>GcwgM@&vy_on{DXZKNeY45#ViqatKn`Hbf+vzs{-(mZ+%#O4K7pWNy2 zk7?lq_vleFqOlg&rr&@83)-%SY!v0xIH)AKZMMa=^#4Y{iX34M>!MHOK>h)_p4}$5xbo z$B^^Ub=-0^0sC*;pXvOTDrO$ieV{D%5ljlLb?m=$@mg`uCH8yTz%x+eCTVICuG>=ITWK zfJR;SehG!2w+={IKSD{WSYud{JTP9Hyu;~nxa?rZq3%so#xjo8 zrlABs)QX@91lr<5L>S|Fp=3f{zQ!3PFK!GQ5%_6>`a`sq)7%x4Q#{W;*iY)@@s&)8 zF3kFbDhUuE?`7h_mqL{IyMA_Xl|@!jDGozydl9hF2D7Hs-ol^JAp(|udcEuoSys=S z+&kR%Q5jeGcOkhI4!(}_e#RN2*4FtA3PV!+d9a5Rn+-^srF?Lep%~Ypv7z^3$7#0B z@NYhntK~)Ct}kA)NaiMAk$E+p=qyPH`GD7o8HT>?Jf_FA{=0rRIl z$6qfX`&t^eGA8C{e3sIQ^5l=n2q&U6AZ%Cr+2xL&_u^~Ht4+D13|thj`EI6X8gR)a z*^c*YF&Uh=ey{(_8l(Gsdy&Csn;hta%GWu+^>_a4=JsI~4ZB6DBgsy@XZWalqZ=+X zQujc%CB*wPXLN)UcQL~wK#wTb1!fO?^-JG77ZOKVe=t)TJzKQ}C-8LP4Fe>5#Mu#5 z8Wq8P(m)Rlw9uVxJyG}hUqi<_8G#W54mTjwHh^ErGwP5)&VPlDRrhB4>z}CXC8c_% z-E)kJM$lKHQT(pj75cGa~YYiv3e(V1~;JAQd$4PQIzyHmO zAvNUMwI2y!qXqx}Ik%;p(P0E6BeF_o+*Ywsom-d|-v0a3IUS1WY#ldmdsuy zn0StSJIux$S;3w_vB$7-c65#68tOPZsg=w$bAu`QsnFKFr4c{nmy|yazmyXX6UGog zcBD9hfGv=l^&Ywq2-DIuHf;TCb)3(_-s0HbcA{LYa+{pXwddgM9==&62s_a9Tx6VY zB`Lr4NA%I-C?nGPrTaH!Lz!O+j=0#bafxSlbscTvbK?EsE+Tox+8wZ|^V6}S_$y!T zv%r=~=8r8w3}*K=BYzv7tnUcc*EeZ%w!W;ck_79U*AJ^mVwZL<&H$0Y;PZhV;vefh zR-wj;So4MU(>v$`d=}%be^F1W1w`9@=gHFTOW%wS%<>ER82K-=cu~xo5tULRZ0wng zGpDEe!swyYpNznNB+thDnB{aYrF|VSdY1QO>5Qb9z_dI`XeIQO3U5pwWgd?=6)}dd z>WAS7%U(I=Ipxo@u8Gam-j%7FZ^@lw7l(YQ?RvzmHI$b6`SJB= zAs_JJOH`=x`nB8hhekf!5ot)6lB@d@_u_Qrn*o|E^(0t=Y&;aF9w?o1*vqb_AudTG z8enHN7Il-i%%80XS*8UYlxy{C+o)XFzP^~ZHqG}Sv{|WSnyj8WeT>3OthFCaSQ7<* zshZxH`hKM1rMi8(7+Al4t!1(KCF`pq-T=`)1&qvj%JP$!7 z&|%uzXAcW)9=Z$1-^a2PjT);d7T%C<&oY*Hi`!@!>cC-D(#j3>=}7RMX8BHP^eV-y zYV9ilzs!t@&&BeG4^XRW0$5L3cj2V;IZfCBpWH{28h0NPz(g{Car?|BUw!Ov?8>vK z>CkUFJiT`TJs!||?P4gVqlZ&B?xPFpZ^f6RkrosE3%=LPO#6$W2;;0ECrBDQn77vp z;X#c?fjf9P4jRFrz>~p9@O95|uU?P2y9^w21rKqX*v$f!Qa?DAoc*#wb-f6U(2&=H z<&7RtArA8NyT|VQBt0($-)V@~x%*HZ^R-(@a3siHZ_UyjS)0)2MLa+VI3rB}hlv=P z?k!%p4DYr}aL}n4#ZNEfilFV2>@P&zw&!kW>Ad;*E-pa) zCCD|%PV4F^Jp+Ir7Zd-Q^ad{ag2}_Guz$&lPlQ(o18@hgiX*oi10qyXjM5~YV|@M& zApMsz5NVS-Ek%cX0{AQ}=!pV_uXhhb6k9uU_XItqSt^tFboeU@V)<}X=T*{zbr(lQ3 zTbqn7UNa@kl^k+4@*#)!gvGW-NyqD;bJgz$>G6s>o54S5qNg9~=i#Q0^+$d4BzfVC z^et^}A&~a)$$_S91w1Q2tlt8-#E?KfylSD4HEf6h09Jf*DUPaoJ(sLr3ui0!=+3H} z7v)nVtd1^i)-MKnJ8C&(REunEFY=#jMlRO7PM;?@V_)GSk`NB9G1|mfua|CP-G1EE z@YcoIF_{J~35&Xnx~NL0zChF@j`zqydNLxlDLh0QbdpVdXP3Z^MHNgLGlgOL&3XSjD}OJ@8XR7h4# zdA^il#2DS2Zo|(fQA@GTTust9{u6H80IlFT&;+gin;8rw{?*|0c=_v2{r>>}|DMG2 z)JugOL+baptlmE3*l9Mz|0@$q*VyJKW1W{9~$iGwYgd#wL8&i1;&Yb1(?d>); z6pTyTz9qz1vW`oz$T<0Y3em@U(lcO4H7MRQ)8UU=PyD23Fo{VhB1505j%f_++WEB- z|4o0l3;lfq<_}+C7~Hi=0cmkY;z3T1e<4R|wY74gKEu3^^jTKg=_jqpF}twi9H>vF zG$)oS<$WGcDYbIdS}mXX2kcEdzAt-DiNR(zc!A9`6d5bgz9kg0mrlrmzH~UOTZFr} z^dS0P%@-!^j>?8`u$OSt`u=93&XxFgRWZXbzxt)7b^8vOKtexzzgvI&VKJ25ynX}J zA?HND#X-g3F8`%?$3P!7$Vu;quH5Eu|5Uxn0adxsyNTLx`Nw+VSgN?ieTfJN9h#mHHSg)rNy>J^?7yWQ1B1s&R{E8ASN5(a!VoECk=hqd`*kFUN^Fm`uQ3~ zT%FBUw>vKv1hIjCpTBSott~-w7HkH$j{S#TMx(QBDt%p|;Xp=>u{}QbBgUx3h2zb> zmObw_Wy6a-1>k`k@MOsVq2;Crtdnrv{Kq$PixyE}zNd(bVH3KKgIYytS=#hYlN00d zcZnVg>Y|O_!E1@~LTL2A(wEpYF$13i{b}c}+_9J+1&2L&=e!~Yv4erDmruO1+mEz> zFsDl;NDUJvC{J$gKm@4#bTcmLYcZ_mg69wtpK@Bz$Q2VVD8?S2WY}W}8y((vlnJ`f zLYElnnU3qa_3;p?oVr1UNn6Y4)W}$q8!}^ zwr9->x;OrrRwy6pD&ShFtUFxdPo`cArI$A*_XR~CJ7s(94t)HRfSz%(v(4M5hb zPI~ux9g&ZG_52N6;p{Fj^OxN64nnOCvsx9RzyDP17eg6sGZq)**h@r280j;srWDC~ z^C{nHATx$F4Yyaa9Idw+r!p;vH`UqP_Qa|UZ4=)VY}<~ntm^!6nX*T6{wR#jK11uBdi0(2A4S9-v){fVLn2-5o>ZM z92FI57lkax3P2%Sleqv9I^FFU&``rq2H#((3UMY>|A~wxS$J`~Nl~w%y4qn3U+|@! z73X>{^Kjx0=%N1P%#e^r7gpizMe~JxE&~NkX}?>QOrf(>pf{?z*Gr;0;*!nd9Q8th zBLzXf>KE+9p=s#g$*lTO)q%K>xg^JxQ28B}^7SVWsye}@?OBWt$dQIGwBHoC#Eo1W zHDoDOB}VMuhRW7z`X?9Z3?RnSTArG3X|U>J*ak`{Hp#`lnccpPPIf%LbhqVshE1q0 z%=X=l;>rkhIxMg;qvVe74vn;bG!pHxxae&|MA!34iJN2YB5TXkzS~f=q5MGuf#Rl6 zyOznS_S_2^zSI|zV{anfvcy6bydwLV9vK@jx`+xPXJ%riY3Cy(R4u7mc&Kgp#bM0ec;&QSqYiZgi2S?5EXx8-a2fyO>Jtdc!}x&6Dh%&{603VZ)ZGe z2aSp1ud5S1wuFX@GpnVKoj-!VmE`B?4XA@vc1 zSWT>cE1xxzYK(-{I6U{G{zaR@vKyP=DZl$z8HZ%7^Qc!^*w!y=Wo2sL)#ECCN>=zV zZMZ_~bdCCD4)kl2M*hgp2z%0$q2FvjcT|ENP}iQIvyOY}rol}$L@Q_Fx_9RXVas)S z+HAzAY8AEwD>09k(c46Abz&FJw*D1PO?f9T51?sVg=IB2Yrm6?kcb$g6F>2`VgufD zF}!}}RCbC=s*e|$RXRB23q_QhIL80yNQ{k%F6d z;Y(~kT`Lv3(-^}C8+)RbD{3`~gk0xW*w%COey2@-V0wD6g0EH0b)rZgAO}l6?R2p@ zy>vTE?W@FZZ9Irr?vy9VT*=0GC1?ByNo7nY;s`&p*!0<3%Y#mX6Y7uoX1fApzMiEw z+b2V?SQehv{*MB>kJ5DvdKnDqtAD|%j8FUOOvMgpNxIYHqx!FngpS!+)HDb`tWMwC-OL zS~2^=hNNbgYA9Sp<*WxqDc33{Nnb35}bb0^}PVu+M!?dsjY=Ql1s-Y9cqN6HZ z+eAwhXk3#9hvnsZ7-#8uROGGeT8KJ@Dkss_3~tvNg2i+}+H5ErvoncywK>uT<;Nzp zTLvgr=-uY08N2#wvJb#LIP;JN+Rw^n@g*%k%e|!1o)ys*a!TcD2+_ob)W{}xW4N;5 z^3GOB?qsiwQm0?PqN2wE{I8n_pX@jKLVjd9SxG%_)rfc8yt7a|CgVNBR3DSL%+}?> zX_S3b0%j#T1&Y;V6>*|;_n8*keRd2}GKc$f!R!f&KJ}2IkI=C7i1gmFlHr3A^@DfY zj6WP!r)%F)Zn?d$K2rV=E-q*ipl-AW)9JM z%cwpUYNA(7Xi*zESr*Uk?)BH78ffp$`^D%J+Jasiu-kM#tZbB7iw4uW4J(ruVv_8N z1ypL^Ziuy6pY1F+71K^B0M$G>uv$OM+pK`i#R$MN5suOFO16@P8}xfhAESr1t2%G= ziw_39z-#y>DmO#SIO6pNtX>91e)n)36)@l3iLnwvoOlo4Wrp;<_lD|VhU@z!yp1^w z@~8cp=$9{rI8F=%pNah(aNge20oK;CEEhFMxW2&ac%HeIe~n#HVKuI*gn7K?m1-lH z$K>vbtQcdfi7hCdi{-hMs(|iSCybkMiFdM^IZ44r&s*|LzOr88gS|8P0A2KE!5#b8 zA2f0)dRV-=XHs^pzjDP_6fB-heId=Ta@jOQAR0cVhG{`|Rf~f%RR`WR=%PJKn{E1! z<$yN3Oi#$FWRIF0PK%+_h@7#RSo$U~)TQ|W%sP8cp=N>hx-pZba_0yt+L39+My1ED z%sW#Xq&Ng~r;&#aGb`{vSc(QC?ZXwrqN-M>IOIy%tzTzW?4C#%&lOwq+iIdw!xmf{ zL5)+SAc|XJbQ4vbiYLk(813knixBz!8d~|8#wd8a43&p98-1zKtO( z_~^;gq9h8+zlq{B&TcC_QC(Se-gJ1K=$+Qz?=U(DB*SF!1_GD4GSaC1qwE<*Et>fq z?|nGfaRin5ztu;dU7zVc*|~s#I1L{`J!YPyzM6jP_uT2}u=;vTM=Z?dSxk)}CEz+* z%s&)vR+&F-;XFQ^wJ*d_xKy|^4>X=eegdcI?&&ck&N<(21y42JPckj5`4&%Lq1-89 z?8J2gF`awT_KjG;044Y^MKNb+1CQBJ!I)E74K$9@M^!J@QMf0nI1f>wx+FR!znunh zN;Kg_Dk+#1_BpG&Md$8?8f0p#5!6y!uX40w5Ofw|-AI{g;R>5l*2gk8w$+Wb@C9+O zZORxYs?8&c1*-;`(JQ@Te9iNWekk#0!DFfs$I-Wp@ll`T9QsBXG;-FTHoxvzF#*PI zW=}iKE`DGp{1zV*J6Q}}UbOo?TQELw+~Tsbx1R35;^)|E~m{yWME)5>#&W+KMLHR{Ish!0GW-^(xgg za$P+81abP~=r7kYR}6|FD;2|m->-~V22}j*asPnP*PGIiyODo;#=mOVr{CpaOX>dw zN}}<3Z>X+pW#ZE0kLdrD!QMXM${>^Z%8y(Aik*?}_mr<(`kx6J6Prt*0h?bKaaJfF zx$m{VqBPXd)(D!M?oUiM=Jn5dr*!4W|ICb=qiU#b`%J4v_sSL_ng1%94e=<9uAOTA zmzni%8@OFt1^pLEjhW4c%9FNrUK)wp`Vou{Q$NMs0IU#Hm`DvMUX}87TA+GX7Ayqh z>k|w%n{OoF?fUgqy;HwsZS&`PxJ<}JU_cZHiZdgQxyUw%p>OE)GRkWw7FXnVQO$eD z*BaX$`|X8ZA(af(Ikv2R;-1-%6hFC6Bh-=>pXHu(heVmJvRx_}ciqtveR=v$!29zya`es4Y10YhVK_YW?LbG7M)N#| z$5dqc=nIbq@4)hYf^Mrn4!^%p%&2N$W}&57&*Ag~A?_w4P8P$b5L6VYBRRW2>7F&p z_k2^Dd44y2zG>d3&kJNQOKTDr^YQnBv1o(x{&1>>11g5EJHW41Ob_dE)mGRRt|DkfyNh{XPX?bv1G-*e|qZ8C=1j##ef-mGVb+@H5WXTix?4o?R$I=lw=)%H0*#hn<}K6Vl3wM zPradui4~AardB;QPRc_BH;&D1OYX9Ru=p6?I~_9i1I>igkosczp|>K8GQVGpw@|o> zC?`7^Y5!hjlLZCFBu%br*<;Vlm5CHHxdKaSpIW1DSaHqv^Ah><+px!Zh(C|q6VrAJ z;I@KWgY9`CCnSeyS-K!#JeIj79ZXnSRJvi=92p5#buPb5kI~0t0hu<4I~JEju1$by z-J>s~1^S64==S1-+tv;O82B)5x`gEZ#-*3xU|2qQAKly1G+W)O3z|NnEHxU9FAu15 z^D;h71NR;m5dcGQ3)n$*DIdw2=+P_tBr8sbFL!m%zFyeQ-=&672flDGb|EBnm2VA- z_~aHSGK+E11<8x4H@GUIltg7%_&uxk=}^@*1cx=DFB|dm3EsPu%RL0=Xu50F!qSqwkeY{5Cnu>EYH{6 zgF3CGed{=>mM6pcVz=Xd$GZ)PW;qYt zNM!7*+-q)8wuL43MyF74*xAd<<{EcZ{)~enV4n2wPRPPoW*k^u%t8Y(3px#dVB#aLhB~eE%AE!;Sy3 zMXD+rxeCl)x$8|5ledP(-3VB!V4P~8BZ-9FeFB^dPbshAQKwX~TEUqW3g^%1r$rRh zzh*f88SAmzH^19!NXCaD1%g0^j|{(WN-X}pD`^cPa-d7nK*sxK`O{&T| zY{+lvV(;*k6~%`Jq$)WuWY2+0t{GQx{Fc^r)T4ISLfrl56Q%o@w`85ExF8<;NL~|d zryb>FG%qc*^=~#|-~fDxVYt?{xller3@Pa~1rk&SIh>gZFNz)_!l4OLx=tm5anpOO z9U1TJ1R-Xk+mZXdtUmWyE5vC=kWEBCqD@_KVn{LWG2EJf@GRd_+4aw9awSE^v-VOy z{#esT*!L`_LX_J0p!*oNDYE>B<3fYcjk^!NiZZS!^J0EH{7=3966mJFqTiiABZhCl zGLJv6=0dATQ^78wT|zFQtDocwpb;iZmR)-QxzVXh1`#8xj+iT9 zGRy~P4Q6Uf({M)Cf9m(+K(@8u3WUN)uI1pYJ+eZUjtI@#`&{ z$iCc$rC)kbwGJ{bC-1LN`ZJv9BQ<_OsLrrUr#YfAVL#l?S+0+r?Xxo@pG-tTlaaEI zE|-$`{m;=-bun%D@h<+o*|^nt1YJwcH5vZs&l7}+&Xsbd3t@n_a4%L8;vAE-1GY|e z`nz{xzAokqCAdRQ)eTnxsU)Ae2=0+YU2u={9?SGCva{p7-`C&&hfpy1={H@S82!k5 z&u345z4d8uCHmtJS1a9=`FZ!DH1NR>>Z0f0bipw=kk2*To;2kGLM~f`E9#!Ib_BnK zrCx5RC(Y|^Z>`PZVJf9GLDjUT;KHh1RMZm5`m`G4qt`jE6my9T^maMB~I!sx>70-2eUSG>s(&Gkiui}^hSfL z{_C#?0iNP)snixG-3!jl23*nL;^DNM5(mMe_{{i2PwRCFm9i9gJ8j;=7!U{|6q8l# zBi(yr3^A$YL(adw!2JrAc}abojePg(Cfs_7#c1Ux!eFQ(1p0|_-}Jc}St*OvD&_vD)puH% zVNTnXAZ#C}Vn%IXZBw)7Q>~*8MI`|H_I(Q;qZyh{_f0gdG& z9XyD1QUDg$26Zm0!AxIlzGD`68EU3$Npt{*M%C~KiT3wV7)zN*^F6>{b}phb_Cs2% zbvX9+)o`(`9xAPMMB6qUvaDuf9vAQZN*W$!PE9Ocw9!&jH)bEe3{M;ebvul68Ee%j zS&xhFqh9F>E*HLVUyU$Hh)+y4Q4I1(I}+D4cm7Gi><_& zXnF1Crov3uVQWs1@ zH~M5uZ+wO!%|LiQtfLsV|A>qP+LV%Muh^0Y)w7Y&=^l(%J(v{8tOxq*fSgXja9*qV zMVq2R>NO4`|E6P3JC12erOf&W*XdYLxO1Q9`dej7deOv7mdS}AMC_5j!iea|0lV0j zDwE>guO$2+gbDi=^b*Ydp%d1@^}!W;vsnihl5E(TIWJHqq_LoAW6}{=59b9B=f!dD z_}+p8f}sOtwI%D?l33!L_|18@c?;i~TTI*89{Ib7Ry~W=u&|*7>@ivhaerBnd*l&V z03lc`g{l`ZT&z=qy_6C=Ykp#aQUB7i%dMdg*F`tq7d(^T@>4b*wK!EUn2Ul`lp)sgLh~i!=ZN-&JSlli)?cZe zA*%5&)Rx-CYZrIe<&yVa3kL16NCj>>WD>xc!7T>%^>-s2Um@7Ung`R*?uzZj1e%Vs zZIsX0%$P*i($rLLac!qfKL}5!OIj-m-xAa9@q@!X4un&@RT`H>=LdKue{4wuhe$^b zFuK?|9?=gK^Nqw}qB+o4Z@^<4_^1tB=N?cKofen_ZGHpJTlxf>q=j1t;pjx{?Iztow11LFYR{`?27vCt#$~@geYZYg`NKQ8Y{3>Od+nx+ z3`!L&w~i!TxHHsZ)DqU}Mt55{n7Bfw3@QVOGhIm$i3^Oxf1Q=|UD$>+4mS&K^<5WCvy@XuT4bb%zT z?sNpgA@0TP)$hj!iUD5BrbDBo+$ql?Ehby+fJ3Oy{e}rpovI+FR z^}vAs^ZW&D{nqE_a!6>+_g+FD*Td($DhL%7!&?^1FBfd&bA=H5g~)sYh6gAx&B7F` z_A_zWM*%dO2?2V{&eoiANT7R10>#Z2CCsp?x4U@75-{XIr;uF|GKYzu`~m;E?}<;C zN|i;SO2TS!l&F6ux3q;Gs4c~Vu8rI`?YW<$uIm2h?x&+YHy6AL515>YwRL`hl)im} z&Xn1q5{P8&-sZFMzbb5TB{2Orp!-3Z+CFjZ~FsHE^)qX#q6q9-$MN%XEC?Bb%am!dA+0H0LuGF~W zh3ih5He8-Lo5Ng+bm{clhH58uS<6K(d2EZmxQlu&Gz7AayxB*D-xhzIC9rApEygq- z`WfF=Gt%OrjizvvXag5z|Le^kmO*`&cSk8=Mc#pVJhlv|&YZa<5A}ycM7jENC^}Te zb5+@>BiF~F^^kkdfs*dTC{`Q@D;~LG=|R0>n(W=3OVRoUJkw527Umy{~depBj(&6B^UA?2#l6LUpkCAZK&Yzyr!m?pK)BRuQEej`3xO5F)5u( zQ6`cECJkaI)Ct^Oi$A_;)bGqc5z0K-zxNfTVzXGmy~zb{x9=~YxSJ^!W+yYnuEFit zqNdHN%T+odklimp*>^iCfgi!VSCSQ1-s^em%*FJZXB#u9jHuNr<%?UW)s*>V zPj|Da-QnQiTX55|D#W}uVJr@2!5OnIwE?s}^m>9N0R_IS z|NNjIMzJJXz&3O@(?6AXw*y)fnqnK8$jzWchCYcTgSQ6;UG1i2 zs4;wNc&t}vmnmWU8Sg;SK!M0mkEFGvegdla<{TbMeA+N2$gM-Inb{KJ6HdaM$5+-^ z2|oU{x3Ux3uLJyGth7B0?dqJ}!tmAJUX;1z6bkF0JXl=8BY6YP+H{Z~Cals%YZ)7@ zdg~g~C|uVyKQ_zm!;#7XKLC$`59hTDxrT`-uw2dhYC^@D?Z}zc8cJ)NeyXQDdQ(r9Db%{UC9jIPg@34;jr=|ncuS8D83=Jl#Uu}9n*_{;w7X7bbtEFUzaBPWsyj`FJmCL$_U-aO>G^WJq_klaEAwv;RqICs0?|r{3&VbQ3N}y zn!-VA$j3NOV+|pmrJv1>YGI2+rg)jZE6SGK`nb}ryc=DWSw!^MoLhdxf4KVKB^wMI z`hwaxZ%)tg15UirVHRqUI%d>rLl|-@>GTsK_X$jweU!uDx30}R?OfxumGE4_8oXm0 zFk5F1z^&lqGQ?QE%wsm>H4m1hYP7X$(YctGSieY>OKM9M=LY#und~QAz1{V9CVleJ zlbYiXmdWC)%eH47acXMfD$M7yrl9sR&orgKeX6Ej)RIl`pj3nec89v>TA4S^4sEdCJ1mbUh=-3jB))taM)3jqb}4V+OVwrKq7a( zsl~gt+eO?yA%bh$bo4QUB&E+S%&v#+%T#AJ+gMpPKAM`L0xxo16R=a*dkZ6UITK>% ztNS$g6Z}E({iYglm%&_Z=pmnHbgh7yzWN`#(N7kPIVWBeJjX}|aQ(UmU%D-;oAqim zB{dvY2p#n?R7$Fo@yLhv&s;NSf10YtZ)vBf|4vSKUA_YZjCp2mq<^|UHh#4Aj5^jq z@ko2qcB7-ceMLGLJkv=jI_`1HP5QZKlS-V+OSa1RVh@$~#bx|2ssq_Lz`&j`Ck>oF ztsg|_89CP7!Tr{geSajl3WV>J$^Uo7G~f7`w_93?l{)DxB=m43zDqK07W_ZqmOq== zePFcrjJXUKGx4(D=^oOTuqdVGv8{Sf<=Ye^2|&12Kbf|LE~Z>hKc3Ge*rF&G^kt_R3L zal(wDkp(Ud1G`Pm%goo=A9{KTnR^|$x?O_y4wN}(H>_U%TNHO}&P}0%1TbMAX|hgW zX1PtkR~4J0pyBNjq?uPwebW}$uSqFks@}7cdZ2wKpZ(uLE)Vne<~g2%b@~m&R2NOL zt%r7xjHT4U%B2MucxXCsZH{KCa@Ldn2kXF9=)|2Ine`qz@4%cNXt?(T8Y*Qp4A^=T z`d{VuHf29V4azS^k!$tHjC()Tut?2(BJi(s$(D=m9ic&<;ERV66L#nR{LL=?)P0=d zz2Rc&+s;P|<2S1`Jv${o5I}B|{4xAq@z`UoZ8Dn@*IkFCv=bL@6ont_$JrIHdy$nI z7H_~`pE#i~ZLMp|rl!AuSrQ|;^cbylZgC%UVD1R6=V;LdO4O>5b;8JiOivFI7*>22 zc`sC<7xLnR6GBKcGQnde%APmHcEYA1VmNh+jK<6!DZ=q39&CcKsLe+!Z(55*+24*! z8I>Us#`xC01Jf3?9mdSerA|QGwDr>}1vo^lvs#8H6!Sb6WeumaJcXvs1)PHAN-1VP z7lCk(L?S&>{NGdL$mP-{6mmcNA|b~}S`QApnaGi@H&B`9yt9!WV>3nNC*RMPTE5m! zQMV@|XQ3ZiA8;F0r=+-}ML9zBz}})(Ge}}55RwN3bz}m z!DC=?-7%CrFp{O(!sd4Fl%u5J}ys5On|rR8HxWVAq{H3^cRrh zl;WQWeK9{w|4l�Y&1n#LW|xP+yx9#gqt9=QI34>Uo*6_U7i5!hNmMsy|mQ`BI7 zHFS2hH4ND!oG#yWYAyd1ns*HqOpVLf*1DDmw>yj@Qa!RNm=e}xnB95xEQTw68=N`i z@o`yChB7eFbnB%nR>d;zQ-+w8HaTKcedin3-@Lg{6A@^22`agu;YV zT*Yx2PPw+VX%KNkXu#%DRAkYf`{*6FnZtupqnF!M6&8~o zx2JoSPVpHc!G7`=`lFPDeu_o79(nAIGGPR`LqV;jazOq=p9N!-(Qd_Eg?x%LC5pfD zX^^|f8s}?wFe+-?r_%3)o*^O>etfXzEe@g1E=(gk_(IT9LMR4Rs#oXrY5OqB=Y@Rg zW;hBvHR6;IY~QynjG(a8A|%>VrkI^8&qP8ks6y&~U(lF2tCvF2K2y~J$@*fc8;lfr zePM(;t_WBWG=eTj^+*EK2)(RnnE|A|E}12eT>J?c5@-val#!UXp-QRD+uV;&6J=aK z#tf(LbYhnddsX!aod7|~CG4pmcHY}v_Vru*!flYkp0G+yoi%f7vAbgf-scqpl<-}u zQ46giE3chM}??GHU>vcAJcfvgbB8{U< zl!Mmq<+WLnP*g8LqwKw5Et73>?(o{)weUbU!+at#dQEg_4Lf}$s&Cra-=gNx6MeLF ze)C7d9V?ae6_?y3EhJ(M&>9SO!uC3F$P~22I=nU#Y*OyefhzkY6(b<=gy;fP!d>Sj z*}(fU9{})nIy$6hun+Em?|;MB(Elf1Y@Y`ueT_+zeDCIid%(`@A#dPeW$s}uYUyT; z`vCCq@bLh7gn_&wI(&Sh{KBGq{2V+yqC7k-?EBRJ1A(Knm7R_6|C~V2TNQ>Q04U3= K%OPdV-u^%36^dH` diff --git a/docs/apache-airflow/img/basic-dag.png b/docs/apache-airflow/img/basic-dag.png new file mode 100755 index 0000000000000000000000000000000000000000..ee14ac94ac24edbf046feaeeaebce3ca27fa857d GIT binary patch literal 5393 zcmb`LS5#Bqw#HGZ5Hlmr0<6$xFC5tDqTQoC@Nh_=!i5iNUuTwDWL@E z66pyoAW8&@A=FR;7w`X`hkGB+xDV&yTXU?j_SpMn?Kywn7xzd{i{(7`c^VoTmiyZ3 zk54Q9R85TZr)#QJJkM!4QK@9OD>)>&0om4>Dc&U|D`ce-bKu5IQ^L&J*x zt22FGr4BSSmy7SKs~QDb5$BmqIes29Y>35QdgJn1MAao5=+8dUWASw2j&4@%}LR(Yg3|GnH7(VKV;Ld=(5*OsL-;fpT27&JdBjxg)n$c7;Ym_ zPQro%SMv5KJqo0t4rix@yHXU6%a?O$F5*V;rk7H_0-*xB{|Qk5oMN>da(q!*Y+P#SgTn+bE$Q8M$l{nXzV_(Ekr{6cMz#S*bc3I;(G4}&Yd#GZ<^jazzWDG!ai46lQp~~qDHP(l%NP7K#QkFlXOI8DX{mTZYgtNW1A&FvyC2V1_mkQ^JcG3Kk@y$9b#MS`gN^@ROcUI za%U?LsLqA&?tK`c1hMSch4E>gn*4S#!%X|p?kH^??M`0|G{ROb#^%5mBY*F6f@%Xz zfz^r-SNc$<=$B6)ABq5Mh%?C1wHe?rRz|l<4BP@>QR6P(#3eIj<)E5btw&l}YXE6?J)Z_!|{#ocrkt&TU(&qv*Z~)oE6R!7v{r;4!Gj5*q)-u z36g(d1f$0pG9pXr`Cmlk)azy2+?>se5^V-wT+)ova8PiBIkq|;a;fZzhdfDF4Dbxv zaPYFpKbfd{I^C$0r_-!My)kV%gw{gzE)s6C8SjUX-1~P2{d3Nw<1FMA0M;D+!qj&% zWk0*dKQ4jdex-XOyvvpzTtkNOl<8YIwz;xNz4QU4!8+^F8&4hw2Or#oDOz?PVtR`M zJkcQbJKR~^pFG&|?;ofN*%et!N=hb}m*0-Ax#ZxlPj9NOPWzn!Antl!o93f+>LqqL zW^)Yyo~wTc!euHsBK{`Y@KA%-QGU78y&eS9BIcXVX40MVcBgyK$J2+K}<3c!hU`ec}reuRd`iwRW|=5 zj|**bW=l~_TH5IMaIskGacWq_uOm{0Scmk3Ye3Zbw}c=;O=$O4+pWJxzDg%UvrTMq zOj0SC`NdNMaZ3t`x%K@)R-}c{!2E#BB7D(Fkr+Pc&_m~~_`+xjPYQ|gT3*k_XL04K z99gew)F|oZnXogOOmbFhmTcxe9&k5>`N=v%En0@@&Nm#^OTR`D<(>uhY}&#SA&^wU zh{1FPJuA2SL}2u9Dr|PEiRU_LaRwalN9qOA_2&bfT=6qszLex|Wn^TOvB~+^Q2>%k z5V+p2@sgUBsNS@42BSX5#Js&jQT!jv@eLF3V;S4e!-B40suqR7TN5_TK3)-LC%5GcX zf@nQ}+t};L`>+z;udA0CxFyn@s6@s@3KrYmM(LRqLe8`CNnEO{tLyEk?Ulv3k-upL zRBY}eA)VhKf}?wzDA##Cz7vXsQ;kk%SNAzrDsq-k7)qHeUw4U$VyFuo6(}kyD&2|C zk?x{|>vi_dy>Q?g)O5jlKk?!S@1T(`$1zGI+Id@b25D9Wd;_Brj;aEcJno=o4x;S8 zE)>(!(h78w9{2EiMGOd*0FhJXD7TxvVY`e;d;udg4;T^L8VJ2adDr;|bQ*O1{kX^A zmd?(#z&|V5zx=(~ke(l{!>JsGX~m0#8Lk^`SNT4EoQnPfKYH#F&qKEPT^M`b!**iz zqfE#E9-}%oxZ3+J4WJnwj?oP=DH*IyvYJX&!>lCF>==Tf5RaQ4VY+-du;yH%j9Yp}MrJtnZFB%v#H%Vr1w`nb zhA1y%bAgp2p;8~}x8CMfVrX;O{?}3tqlc4b#63Ye(@%a(D_#`NT~0aSGn)ROn8O<0 z(n0^^3)RCGuRN04!)@;u8n}U0XIGN+sgRUu>=k+PO0mPW5NhSE=_*6EfV&I7^F@O{ z>;_Gdh*8@_!YmZ6*axFNMQvHYp7*5yr-CDI{r+%0L~>Cx zCa{_0S(n)kgWm%IZ843BJ9AuNkeOO%CHgR|b;eU!H+Z%Z`^C$FYu6I;z?PDi+#0JN z{p^7btVL*Ziq_}F;Zc>DSsLnYEcwZ=He3s)FOI%1GuNLHR}&qO8a_@x&Ip_0QsiIO z?adSKD}*%0=tG&p7q$?$;Oj@F=D`s$3U}F5?-0%532jxUZ9OF{{rIxeN$@$}xdFL! za%ntq>1hrzWb8>GHU8V3IT$Oc+9g;NQfjH6G3``i@X!a2gOAmWbu8TL%}Y?WP0|9n zv*Ab1DNKAaqYh1sv?0!oc%5WSF_N=8@>`a(Y3pg!5a`p5Nm$7H!sX- z@ipjTBlel#=e~8Vdh7PMyT`-i(uhnZP60bgY=bQ&R{GzoUpTK*v+Y92MJsCca4A^{ zz?UNQS5OdYY(C!GYa{B}1zxM;adJ?otX8VuFj{9T6ns1iS?0k1tY1o#UH<8WzStzs z4vVj!)ZcwW#vVKH6+qeEzxlC*r+%+{HWf>iXq=>qU0~T07K$2 z;oM_<#QD#3oO74ro@dqWTLWKZ*6!|C8*ic=n>$5l^62(%sn?`70efVb$V? zqw6MKc1g;XKB!@4`KJmn3|9STR-cf6dK$yiu=e}%9+GF;KF`l-T6%J&Z;s=gX zE;e%Xfo~(WOT-VSdEPGT^%f>*SvKf21K=(^&R1FoUAiOhk{^KRw5qwjD2aSKrPj9X(;&H=yi ztz4J8;$<1x)S;{CY1LTHGqx(Bcv(RwgIK%w9AfNPrLMVPd^wX>+O^71nYi9}_Ok1IFTcs}TwL@bvVLx;yd1=^`@JVFEeF~*1O&Lt zU`^|d{}l4>EY-Vj2_SmI=ASN<(_5pHH8m5FsT~CoMH%CXGJTy}TWVN37G4q($)P&7Z*%BkX&w^qU z64kg7e&{=<?k|p7~{w%R?8a zVbGPz{HX8W(MbrF@N4*Eu(_Pa7at*}6asZAFWzlq;`%X|K4vjgB)5z!5Sc%{MFscI9oEaw z1^;ZE9&PY6T;@0=Vmr>Uoe)(0C?MS8zf@uowz0kB{AbAmK`UJ|J$ECBVS2Z@ zaU3+?;@4+5p0D}!olGLPFq6ZLy^x*PQjK^6Kf=nh3sB4G;lho=PEVeKxUlBvu*T7iVfvh-N-1t<>+b_xU%Kw zd38#ddA-n+ot$kxs!TLVtl27TaUXAe%H?R?yO^{vgp@sHM1{5B$EMn2yR(>+5SV)R)H?k5?d(H^vx|p2=+*Xf zUM?yq-2|c@r2Y;klg~{Qz0R=i+`T^FODJc`WT0pLbmU*@dl79yH{TMV6~hYRQelt^ z9OK*UQh(C7@gzm(Ml=04MTWVK@Q#*nqJ{9R$gBrFoc*@*sWD@gTpV_LDT%YOicUV9 z&IeK{cX{sN{>OvSW0cy`=2@rJ63+2D|n~tO}iXdk$@~OyAkq9*UZc;Nol@D zTmPXUpX8+$EOu(mO36xZ0}zc-`&3^q;oTi*Pc+g{)#z`Y+kbByfOe+$+FZL0a9M`4 zV`DiZV1jPPP1eq4u ze_0uts`%8pbm(O6mQa1XW=2}oBXFZ)c;Kp3*PSQ*0s?}QehTy>^109JlII5I_Znx# zg9eQdy_P}!M}>@aS*E@chXl<>ANM?&h15^VRL&3h%dhA_T+ zBPjX2B|fdN!no=k`AtbA|K=NR?USeg(_CgClr06P!qH-79QmiHaT*T?8Cv(1+;!AU z+)}Jv-Wu_Jun7Vb#As700eLsYJ%N&jc4U=tMrlS?6XpEem;8s~AyDnJ%}sKxM=@Pn zy8q&*e_IP!?v)oD38pZ-+5N|oP5Y6;Zp)5jGxi2;F9bT2IzHSu?63C(PUT)mq$PAsoV_F*o zo?173m6~dYm=OD^3T7yn~AbHs;h&%JHyAjy9kW76E$&{MCzXY=~M01n#F_W%F@ literal 0 HcmV?d00001 diff --git a/docs/apache-airflow/img/task_stages.png b/docs/apache-airflow/img/task_stages.png deleted file mode 100644 index fde65882b06a4d13b96d05963a74b8bda00de972..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12919 zcmeHt^;cZMvMv%dxCD2X;2zv1NN{%#gS!TIcN+o(cXxO9;1C$x2X`Jh>)gA}J@>5j z{($#>*wcGf@7~qbRsB`%t}k3kK?)fW9}xlq0$D~{Tp0oadiedG4<7dY*XECXGz0{a zw3V2cl8l%bsgk3exs{C>1cY>WvId-{%E%|6jVQ;GHTAE;8tH5Ze?Lz{9^4+#Siwnivn7Pt#jBf#7xao~ z%TeCWVL}9O zSbMvbEYj%@u|aNrEkDpW?)DD2Hl(`sQ*xZxfTIdEqFW28MiDc1%syRE6`P1nuXJ3Dj(}Y)S zbx~nB!aZzag=479^P0>LBSf{V?Ae?lAyp;|3y;KY?=d{*+<`9XuW?@t@|F$bHTc|@t+^a9tq|1E4Xv}Lb1 zH|wb=2AOjPKiW7hwSe7H28kjBgCcxOTx#tDYX1w^(j7UZkLa$)2d?hLTG3t9l7^pL zm&O^*RuLO#tg|c>X3!r6mrPg1X|RbCAgW z?PPYd>GW0+V%J((li#eYG|D2&GO*>P2%rH>k$o~#>^hC(Ta3!>bPO@?jGcNYoP%ivRh;%Rv#yFCP;w%gmBP@t)Mw&k%;unQh*oSYV zb@9Q{ytNo5qFiJy!&So~J3NjkR#LVUsPVQt*q7)7O8C}4GICuHXy@U~^1oS<^Wja1 z7UfG=BD7*vh<~5SwxsYR?D(NC^<{eBk_49Jb5yvNF;g1qV2^PP2P0;pky8y@IaJvI ze)Xq&zs?93LxCl@tT1-_PiIgX_LRV&EAZjHt~Y{yu-y;=IPqTXnQSA{k8B#D@PkDF ziN9Q$@G$u}X%YrKybb~!e9@oPc?owpN{n=IuRLyV0+sOPeyx70{+s^wev^JzW15;+ zadN!GEE&jOni#Q3iu6)78Od-G8HT?MOATubD~`Ni z=|y}Vh{cdYS29;l`PQhUO`A%qm*iVYya2=(mj7k0f}_Mxf;_J>?=`=ut*x!D-Kh;~ z=-0k#pl`Uf%wF&;zVRy=NScC z9`;?hHR%)7UdZV48EvZe<$gS0=b*reGg+?>ShrsP zZeqkfY2GN`F+rpgQ4w1)>{@Y(x9aA4>q_S;w8FV8P{&=nzk;*8xVq`|^UDX`uWrRI z&3n?$*AC+jy65i82rie-7`$p<^c*F#JF*5QkF!^$&?N6;;-ZR)9Oi=edR^)4OWZto z5_s7>O~J#3@ywFTQL+RL#`Edl40Hwbob;NSH(ci22#(cHesAB7u8(A&>!12g1fHRv zu}Qb9h$IE)kR#ykVthWt56~_82W=&z_)OCK? zMBDV}4D}Ye_juTSI=y)s+Z+%Y@(X>dW4$#AGHHxDM5;#o17m}D0B;IY2=_|3gl>x0 z|FIO=6U&B(5q<;zIq7tG()`@C3*G@aK9~^RV+afy-y-}tAW9)RCc4^t-Ivf?+Djay z6KoN@7ue}Q z+^%b-4tK^jCX}r5HKZv7ZS=RkSw3sm&6|UmLqpM3#BY=vpM71Q(?3Y|&Ib`8*kOuc zq*86jJ4!24Hc%f^D#}*M`KDrI)Up+r^cEvHQR_-`%3v396~)gc%rzF?$RlMon;S1u zPRSG`+J0@!(dBtq!%C!UQ9)5$NFU8?WzLQRCUjD~U^1s@j2(~tF`qZRzT|ywc-{{% z?6DSUVi9guzUN>kbu!zlEB{fhrnR0&`YpIvY8+i7+`wGQ+-}@A6*IH$vjr(Pm75ze zFSEe;PjQ0;3SQm`Z|$vm*OIz=)fLutyT^>fj#}DfbzbruM!QMw9|X6Nh;Rd_5uq^t zFbgo}7L-#<8pRs4bnQHIR(sp&ijlpMzUjeJ+f&xr0A8=2lTfE6C$0&JJ@auuwuL~I zss7?~*uyH063!0eeaBLRyBRn$nC_#F?6~Y+s`D!vSj?xywWR!dFZNZ)RyT+lgV|c= zq{e-Ho7pU&_G-6s7kf7+-04PIttn>S{gJyxIz6*Mas$A>8Ym%PDkfi2j!XTvI!8%h^j`y#ovAceXo1~z%vJ#8j;aQk+>1Wtvrlc)#^p5}nVuS+iLK>I1o z-we)>caV{!J=lQO3_p7O&CQ`MtO?d=enE9NBi*55F6{FslOENTAo{-rEm(*oyF*Sx z*o~g@?c+jt{i)7GZP}`GECb>qDWS&Z=AwHPhPGuy!Yc9y-9bZFO+b(~KwM~Xu}pXQ z8veQZly(Auh>_RGVNMnevIxx4OFoE@&_lRactsv_{o%;`M1=7EX@Oubt>pv(@rmm1 z1u3IUc>w`IwICxdqUsKLx(?S)DFKGNfb1jN`@u^Z21!9{D9RcMgmi*s-TzVigEj(> zbw7;PxIYVxqWHa+bkE*gVoUY@-0gnR$u`aB3%lLj`lSBC{?*3Wr1PEgTWf1;X=|%R z-Ocf0&NoUbsBmHOIC#o`YchGDGVQicr}k+6t>d3hQ$bmg(op>SsDE}+e3%th;FA7; z;V({#{_l+>{4f73oBu8GA7i7?q@j72|M#GOya|^q4%V?&zeJqP8De{hK;C~SK zKGrZu7=L&K_Mg-Hx27B3pCc|2`9Dq;!yo;xq6Gg>C~9GVfLN5{;8`<}@mXEDzrX!> zu_`Bx8nYY)YI(CP@XA_nH*24Nf1J2!xp-&J@tJG;i`<$3%dR=aNtvrd;Y-`H`M~3x zf=)x@PGCr6_tyve4`YS&78r|LG2y$`z?W7+zRZmdU6GCJM}D0w_;G9~i!298@Lkti z2hYp(+8`#(<@0kKr_|Sm z_GrB&RKNSa?GZ&^M~7pb%~tmVF_PArGIsF8vFn-gQhkuVXmhQ|U*6ZuIhWvDp(_Di!(8~MmcT=Yt|I48w^O?`N7mi!Uv7Q%p z%d#caMN)puVX~WU;b*sXtF;RK#aN#0u!G3_{q2U=xp5|-kxu`<7l&!}Y}fY8sn=m_{mj*FN1D&Mji_}u9W;;Xdl4A9 z`SyA@ptEtAvhBV9WwUhgdi~tj4Ls@ASw}du*uKZlXFo;Cy4_*a)Fx$5?v>F$39q8uf7FW0YFD99Xzc602RDh?XVx&CHw z`gS1n_Mo_oWnxctnlS{{J2J$wzh*Yu)^M^M!E6DzVD ztMa`m%jGt+J$}pFd@kg)o5(}&SuFy3TAy!=p^Pg}D&_+_2zaFM{9cX?aN5pVzt^01 zK23hBsHMLO5RMv<$Vuep+Gsm%)Zs;uc&P{qVF4C32DD@RWV0i$wuHB7SWqAEha=Y1 zJu$}7sV|hT5C|plb=UeWXYe??ZLQI!UFC?muU|bunfRjP4%G0V0`8`(GJTZ)7l8Wt z7toqM&?izs>7f}mukUd+9VVD1$~~oaJr~$qQ877$ZvYgtyu`R~qL6lW9!mN$V5t8a z8oTK!Slpqt+~>Kw9wsi&_l4d(T2c>ZQ#+j<){pg@nlF>9+Urp#UwOQUn${f0*s|(k z>ev(nA9l7^7CT*QZhq@9uYa<3dj=2T+~WJZn~zxxnc1ynEP2=9P*y{mUWo1aY4eS# z^Yx}G)hK_~;35Erk;(6242QdY3pr9w|1u%o=1@g*n7b1kU2xg7YTLG5s?lNn;I?rE zbfe2=_Itg`iTrDXfxFJT4#)G~5=@Tqj(e0&LPO+hz%d3P4Z zyMxlAIK`)3*A0(-$7GbMt-el%YH-xT^K&mwr_%Elsvm8XhG|aO#nG|exu=|ocv>ae zNumN@30g$HLF2mX>h`-N-v_)KKrinZrw={%AVGRHDCSGbJ)WDq^L98*%2Fca?D4UT zmqX@gdeisnp&X%iFa1bnMNV`v&D3_HtosKFCrwk&@pzabmeDMsL;7~*>G>j5NO!Me zF8fUNvZ#}yZrIfOsLf)<>&Qc+eH%NH$aTpGnsf{Pe|{IL8RK$Ap?9>p`ReMh<}j2@mf;Td zvO^<8S9MHMJM=6!DS55-=S&wOsEhpZps}d~K$kj_H>Q7EJD~98RQ1CfNXU~^>SW>F zYm|Q{!Kl8bD~?1ucp6b1>iQCK3&jtZR@7Lm<#x8OtE-QDmQMF zdfylYHPuP?Z(Z=;)T9y-j}mU*BGaB?4PGx(seD=ZkBajnG&+vd%(98DXN&A^NOseV zZtIH#0D-scK!iXsQ~_t1+Jyc3mQP!$CQMNS-%k2711n=DhK=i>MyMV%xW{ zIfId%`4>N_v$%(q9vsG5hp-fVj#MJM-r+({e~e@gXCuUNe0oO}J36^!|Hqy9sSj3y-9jRdAK#JhjvjTa z41Xkx`02dM@1b3KiViegoVjsb=wM}^Gd3a{eHfsEqi+O(w+KgQFYJ+~jLnm#95-&Y zFL>8d)-(#n0YI3Ewcw|yy3b^ zi;j(}M*@@`M3c0bUPDR3_MNc4=cCX+yh-EjUw=F4!k*0x{;})gtu=Y=dV4uHpw6Ws zdl^C~n)+R-r*moMk$)L$B!8aIfD*nku4qc~LW{H`*uF0D*S)y_#L6V<$176Y^*^P6 z8AAc0Re4e~Rx6BI^~qpoc}aIP0o<8Q{@9M|wSB>+q^fvo!I2+2Yulf8Kb6aX(usv% zXQ!giTm8omt-pc*D6I31$lr1ra{{^ISh1 zz$AKmBt6mW#lUSdm>?y$Z6>O2aak}sSOvwol)qBoJ!@fr;ea8YGcKgz-$E#0B?Qj= zjX=A0uuuA4r)uyx4ccMocoVl*tV|Ajv)qkt9AqCuw%1Puk1gB1Vg&Z0Q%!Gi11(CN9 zO%NWmWVO@&>r#1=N1^xPNIBF)T5iPsJ*pX~!{mIi?+Y@e0!ERSU4Y-$Zffv|k0or4 z9fhu{+9o2Z#3G~)&BpsXC(bQ7`S7g(=I)b{~@^ zvf)iG>o){Y1h&N(#qh8t)R;v@s!vmUTi8;8EJVag_8C_ME63jxXWrfRj(wn4N{sgv zmjxC`GYBr-?j{m58hR-pfVa1qDiv?Sen8`z!4dj!b*c2KfZ3;Hhk4hd?uT@pz2;na zyL{f!ecj5<#$9I<;}AbPrFQf~No4s!px)x8M*f7)3hM+vz(p)FFpG~6H>(BJb2>00 zQvW({HvkfEjFb~KqP|#?>-X`B#9(6XNsjv*e22ydizvEQXf1`N%abwPJ+=Y``Hk8s z!S}2;wRZDnchIRLCehc;s_$3r(H(b{0q(5bxIcVTg_O~ZIvo60AfhCth!tlZ>bET1 z>@c(y$G8~Kq@u*&aXjf|F_&6dy9BP`ilBS>9!WJteF1|Tw^g>U_tfYpSczZ6`q}ZQgBFxzzFZT8eh4Vo zT%msKSz^~Ok0U^xMeim25LVR6!1^g}(a}D`f#bkV)*H8%Sc- zP;VaMG~&i!V_rE&MwxhAwy;g830-&CL!m+aJbetT25iz7-;!5G|GcLG>tL+y={$-* zyiPpC{cUx5x${oyr*`x>%(F$~CJK3bNps9uZ#z}VOqV%XU90P7>i9#0Mb%zakrX)x z?!%f~3NP6&CvQ5STos3VkC^v8QT4~2qAPy-jNW0*etP41kNcc*sMO3R>b#6o$R!@z z$NoV1)w-cY@k?QPQH8Cg(KP>O0llRqmQ4plt*pA!X=AYi4$#q(#NgaO6tBrAyhPtn zCM-}Su@Qgb%U}!4c-TtAeYBZ32X!!lfIjH zi4--qk!%e~C|DR=S85Uq(J zOuV@Bb~~J&6~dUa+|%b^6K{;<bE88eyp0ODR$jQ2R3nd8Z&`GE$@}9K}!=3 zhvIMJy2$(d-DNiLj(;64TEM_iTS*L<_q*a;L?QhLRV^uwBW~G5Sg`dB>_7ilWHE1u1GMq&9))+ zj&gVur&gphoOaxL`@+9Co z^a7rgc@lSqgvR2lU5$L?BQjdHg?Xgig)2Vsk>Xq%-M)_ z0N)dd&zmZ@xIyp7>dS_zJZSwn0#di_obpR9q(_5FK@5u>@i*S!FR=CmB20=&b~iOv zO5!amVf7*hTw;*W5&KQ}7!2gM1BfrXXa@;gK=!05hrw;fo2okiCm?ZFzr)~dgVRT# zYZEZ~N2UqiKLd+@G4*@ulNjYz>TT4DSYQ;Y0zDa@Xc!9L!Ou(2RKKKL(yipN_=Usn z)uqVu8-8`XP+2U=eUi9zB#F@YWuYBZhi)w}D_C{p2c7<5@OhiT=cWQ8qSO$wOE4xhhCyGfIMf0EVg-}F! z@xpnmwzTSR?)st@0LL1PKHVKD#C3%HK&S(|^Nu?v z4P6#s_4guQPXU?s&OEu5LW_yx2EnqSo6hXTMzv!zWgN1tAJF`28m9 z&ZV@Vv~I6a6`*n@U$9|}T5csEs>bGP7>AW|Ua46~-55=*Wg^r<6Vt-J&G`8(Ts-z` z)}yshqCKNgH4DZqA)CAak>LKkbVS8xV*dX3WXg-Wa!HaP6||r!U;2e@b&pm3L~CD- zTjb+hj(BeeuL|C8`6_4Y6@Au{agR2G)MOv~*+Q;%a^zkq_g}Wt_InP)_dCtD$C`8> ziC&KRLi!9znsW48pu%jr*!uo-mo2#IfsrQ{1rdjjQy zdF>#1IaiI}tC&C8k)wN;StcGzCRcsT26z}Xv|kHZXkz;+eY3?{`2094$cLT>l=N(M z#g_r5-fARa-rx~e_Rrm2l|EmR zsE3GK`0PrBAcxbmIJSeu`D1lrdv|+zTu+<2UMgZI`{}6K6VdKEyZJoi;Z4@uLHFdA zCj3r~3nTZ$M|4B_1d`^1{<16P&+vFOIpj87F`O3BBPn&HjqW^8q$ot?CaQ0zO7YYd z6oN5<;z#iudY9s9rYf|ZueB=`ej|QZPr0B5&zncdVb_<(X+O8WF(=JUj4YzKkY0UH zAhe06`} z=_!js^db*!gJ;#08;W^IjbOdm(3%WvQ2VjU-r(q*oA1%+s-7tfqq&yvCYN2gV8)G; z$*VC5$G33Autu^tEEmEcmB#L5%w)g$yvji%zEL<;-F5U4-KOBNG0mzW zw}ki|53_siUa%VimqtAjVM1NI~>^t;q?0(C}?77_d~cb&oV^zyhT~?=OcOiD*jkdvp=Oz zWEA(!%?fY4#(Lu5EJ6KM;^$wxab5M2Re@*Ednlq1Y3M$xXmcPM>666ze(aFkA_AG1k6q7u5lFXoZ;P-YU61NDg_8t=RM9jBKTI+t; z)WZ=3L#@t#3MCFV6j9I2#9Ax#MtLZ`B}yN42qT=w`o_7?4!G+7QVy10u;eyB39Q{6 zv^e=36X=EV<)pe``qI5J`M{mqO;;yr>K~~i=yzF09IHAf%Z-Ok`o&}RuIQhktu+^< z<9+uw2w?Fgk)Xy9HR8ZHA#v!Uj753Sv(+fhqIww++@9;prkPDb(K?nAP5--dh+&^c zCXoD63JBj4?FA47QR>+tlXWfwcK_1qq>pL6)mX2!Qh9CT1>Qp0YV!h26tfQqcC1bm+ z_Ts7U%O$=!oC_KLa_BTo_WSQK?}M%C`OG|IuM2I9Xp8Ye^med2*9kZH(gP?%7KGB6 z$H5CPT5RN;cg?4G&n}S$$&_8|&eazv@`Gmy7EI^`_pA2Q%lsO#L)|>c13@4xnNBvq z?G3^1Is+Oph!24Qt2Zo&lPyoa^@zMHnQ2RuUpmPJsgJ=rI10Q@1$c;qxnFFs3!OC` z^%opcypK@c!iND0Z==(Ai}D~B=#4F48hCBiU<;W{Jh4S9f@GFBw@Wx6-OZ$R9Hpbc z_~-Z7X1%!shyCf22LvV}xAZE-{EL)gjqn3h8o1+F7p>?d9{Ue-2v1QuvL(Zb4c`}2 zYM~2n$@_ZALXD(jzDM3sW~SZ5FQn1Cf-d8N^+n+1E~gM{MrKNuHtvWp%{l#6`FBdu zn5%*>6{KIJToYFH--6oD=tR9n)PLc#?Px zXe~F+e&*0|52BHGZC9ZBT)an4Fs(1U6k?GE3BS3T*0A{;Zj2aj1sgtr*GY|3x;jCg zPvTK0{!Nx@DA`&Q)#N=A)uuW-Vk2*~t;vjk3=S3?vV(e97Da4!@N^2#ZK)4&Bn{wd zN~KlsN=0MijGV!c{rK{!-UIk(li^bH!(yZN%?4_fX~JL+R82ytx$GR^*FfxZ4LQZi zT&E~*XdjjFeOYBs+lhC5UY!8F{RFc$vt?nK#)jjDh#R=9Z2Gw^5-6%ax7bccd&1Hx zrj)}az9IAX90l1hqQ1kvx1c=Y17sQ;Z<6&Qf*LV$SyvLN-)$uVUOlQpkw!GKY|6=X7 zJ2pay(N>pluQ|)(g8$Yu5X~>i8zsAckdJF}Yy@8K`aYd%jG_|D-3Y6z^M3xSRAcSJQE^n8&P*}vv!9Coc0Ps4>V3{yQX^CVc= zo=?`mohR3cO6@}xYEO~}+c?#Lb^KtA*Q)#mabpzk-p@Z^Tat}YoI!foaSI3vR-?KC zL5pkOkAFaC(`LE{QXn&z3JusKWEn(zrXb^WcUiwJlzktc+cf-&5xt7Nu#(- z?RC~-k*@!tk}rl}3P?tR2WZzZoRBPA@M462a5xL=b^bU}61Qk_|%o#bkASWA~_cdhVmB zvzqklu0iZ~R!MQw>A3^y(OeT~G^6R*T`A|tO&p$8fflcZrr>)vm_WP9Ak`kU@2I!dmrr$w&K@Z;IO4-W0k?(yhYu9q84MkRD7glh@vn7DDwhk^t(H?Q)Q+! z&ji;RPLuK{`6-NA>h;b5EyoIGms}utp`M;JGO^(jmasN{rm~6fY&)7_og)wC+9nUgdCc3BZRn5eN#x9}lKCvc(jy}(R?qdd; zx+dm7<#elmeJBd=9wcT$ZTeg5`}CV^Dr?mFaP*%m8AY`>Tmq~&hb!T)MZ{fCzkX$(RV}}c7ysaLER^pd!||T2GVftNBD)N8TKFBaWU^Z5d&9xnnf;Od zbFWjLt`d%J<{r&pLZ1~;xu)NZp23DK1B1Dqpm)Z~$)haYo|!j~e8fYMT$igy=YsZ? zHGz-0Zi}9ow+ipfoihWdxm0J1cC0fOtmUl*M5Q@gGYT%wM#J{dEpYaBS04r z7E|mhd87a0m0BOa_yNz0aeKuv2NoaIRfmnNUM9AN)*FEI#ALPl+i7F&=5W;mka%H$ zSskvB#!QSP0a$N!DWK6?&s5mTUgT=1NCgYCR$xLdJ9hwWXYB@mc5&^b2VLc%>pM0l zbS;pxm7LNW&zC3yD;sewjrJI1@^LM{zOw;vW4q^XNn`!`N6*IT&Hc<8RydqA z?ntK){ZG#8$iJV5Btm4f*qj$CiIfdG(9UW()pucdm8|Sr<~UM$#jOgNrO{c0e-Ym^ zBlG|yX}rQ4>|I!i`CQi1xpB^o*ZWCrbdwL2lkoxYkACRVa&C_iGP z3F3$wa_ePuxSJ=Vtpv^By=JH1+TjoPS;VPX_q=l==0oJwCMhe+`n$`X(@qxEV7(_v zn#iF&CRbAy|7UWd@IP{S>ZRU^F}Ip# zqyLjy@y~xSlJC7RXKOav|B+VX_5NzxShd2h|7l|XU+{nV?my{4|F@<7^9E7*`-oIp UokaDW^#vg#p&(u changelog best-practices diff --git a/docs/apache-airflow/installation.rst b/docs/apache-airflow/installation.rst index a348334be83c0..fd1b5d125894e 100644 --- a/docs/apache-airflow/installation.rst +++ b/docs/apache-airflow/installation.rst @@ -45,7 +45,7 @@ Airflow is tested with: * Kubernetes: 1.18.15 1.19.7 1.20.2 **Note:** MySQL 5.x versions are unable to or have limitations with -running multiple schedulers -- please see: :doc:`/scheduler`. MariaDB is not tested/recommended. +running multiple schedulers -- please see: :doc:`/concepts/scheduler`. MariaDB is not tested/recommended. **Note:** SQLite is used in Airflow tests. Do not use it in production. We recommend using the latest stable version of SQLite for local development. diff --git a/docs/apache-airflow/macros-ref.rst b/docs/apache-airflow/macros-ref.rst index 832dad74109a3..8fb4cf6031feb 100644 --- a/docs/apache-airflow/macros-ref.rst +++ b/docs/apache-airflow/macros-ref.rst @@ -20,7 +20,7 @@ Macros reference ================ -Variables and macros can be used in templates (see the :ref:`jinja-templating` section) +Variables and macros can be used in templates (see the :ref:`concepts:jinja-templating` section) The following come for free out of the box with Airflow. Additional custom macros can be added globally through :doc:`plugins`, or at a DAG level through the ``DAG.user_defined_macros`` argument. diff --git a/docs/apache-airflow/operators-and-hooks-ref.rst b/docs/apache-airflow/operators-and-hooks-ref.rst index 9ba35272cb2e2..91ad4d5c243b0 100644 --- a/docs/apache-airflow/operators-and-hooks-ref.rst +++ b/docs/apache-airflow/operators-and-hooks-ref.rst @@ -116,7 +116,7 @@ Airflow has many more integrations available for separate installation as a prov - * - :mod:`airflow.sensors.smart_sensor_operator` - - :doc:`smart-sensor` + - :doc:`concepts/smart-sensors` **Hooks:** diff --git a/docs/apache-airflow/redirects.txt b/docs/apache-airflow/redirects.txt index fac6825e29fff..ea7712723a182 100644 --- a/docs/apache-airflow/redirects.txt +++ b/docs/apache-airflow/redirects.txt @@ -43,3 +43,8 @@ start.rst start/index.rst # References cli-ref.rst cli-and-env-variables-ref.rst _api/index.rst python-api-ref.rst + +# Concepts +concepts.rst concepts/index.rst +smart-sensor.rst concepts/smart-sensors.rst +scheduler.rst concepts/scheduler.rst diff --git a/docs/apache-airflow/security/secrets/index.rst b/docs/apache-airflow/security/secrets/index.rst index 6b0be12e4c035..5cb0594fb76b6 100644 --- a/docs/apache-airflow/security/secrets/index.rst +++ b/docs/apache-airflow/security/secrets/index.rst @@ -23,8 +23,8 @@ This guide provides ways to protect this data. The following are particularly protected: -* Variables. See the :ref:`Variables Concepts ` documentation for more information. -* Connections. See the :ref:`Connections Concepts ` documentation for more information. +* Variables. See the :doc:`Variables Concepts ` documentation for more information. +* Connections. See the :doc:`Connections Concepts ` documentation for more information. .. toctree:: diff --git a/docs/apache-airflow/start/docker.rst b/docs/apache-airflow/start/docker.rst index 20f5846e926c9..fe96d9cb664f0 100644 --- a/docs/apache-airflow/start/docker.rst +++ b/docs/apache-airflow/start/docker.rst @@ -43,7 +43,7 @@ To deploy Airflow on Docker Compose, you should fetch `docker-compose.yaml <../d This file contains several service definitions: -- ``airflow-scheduler`` - The :doc:`scheduler ` monitors all tasks and DAGs, then triggers the +- ``airflow-scheduler`` - The :doc:`scheduler ` monitors all tasks and DAGs, then triggers the task instances once their dependencies are complete. - ``airflow-webserver`` - The webserver available at ``http://localhost:8080``. - ``airflow-worker`` - The worker that executes the tasks given by the scheduler. @@ -52,7 +52,7 @@ This file contains several service definitions: - ``postgres`` - The database. - ``redis`` - `The redis `__ - broker that forwards messages from scheduler to worker. -All these services allow you to run Airflow with :doc:`CeleryExecutor `. For more information, see :ref:`architecture`. +All these services allow you to run Airflow with :doc:`CeleryExecutor `. For more information, see :doc:`/concepts/overview`. Some directories in the container are mounted, which means that their contents are synchronized between your computer and the container. diff --git a/docs/apache-airflow/tutorial.rst b/docs/apache-airflow/tutorial.rst index 3a6b7ce9341ab..b7db57a4ba913 100644 --- a/docs/apache-airflow/tutorial.rst +++ b/docs/apache-airflow/tutorial.rst @@ -45,7 +45,7 @@ The actual tasks defined here will run in a different context from the context of this script. Different tasks run on different workers at different points in time, which means that this script cannot be used to cross communicate between tasks. Note that for this -purpose we have a more advanced feature called :ref:`XComs `. +purpose we have a more advanced feature called :doc:`/concepts/xcoms`. People sometimes think of the DAG definition file as a place where they can do some actual data processing - that is not the case at all! @@ -362,8 +362,7 @@ running against it should get it to get triggered and run every day. Here's a few things you might want to do next: .. seealso:: - - Read the :ref:`Concepts page` for detailed explanation - of Airflow concepts such as DAGs, Tasks, Operators, etc. + - Read the :doc:`/concepts/index` section for detailed explanation of Airflow concepts such as DAGs, Tasks, Operators, and more. - Take an in-depth tour of the UI - click all the things! - Keep reading the docs! diff --git a/docs/apache-airflow/tutorial_taskflow_api.rst b/docs/apache-airflow/tutorial_taskflow_api.rst index fffe3a553ad67..39820b21d73d8 100644 --- a/docs/apache-airflow/tutorial_taskflow_api.rst +++ b/docs/apache-airflow/tutorial_taskflow_api.rst @@ -235,9 +235,5 @@ What's Next? ------------ You have seen how simple it is to write DAGs using the Taskflow API paradigm within Airflow 2.0. Please do -read the :ref:`Concepts page` for detailed explanation of Airflow concepts such as DAGs, Tasks, -Operators, etc, and the :ref:`concepts:task_decorator` in particular. - -More details about the Taskflow API, can be found as part of the Airflow Improvement Proposal -`AIP-31: "Taskflow API" for clearer/simpler DAG definition `__ -and specifically within the Concepts guide at :ref:`Concepts - Taskflow API`. +read the :doc:`Concepts section ` for detailed explanation of Airflow concepts such as DAGs, Tasks, +Operators, and more. There's also a whole section on the :doc:`TaskFlow API ` and the ``@task`` decorator. diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index e01b7b8e6d4e3..627ddd3875609 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -367,6 +367,8 @@ TCP TLS TTY TZ +TaskGroup +TaskGroups TaskInstance Taskfail Templated @@ -636,6 +638,7 @@ datacenter datadog dataflow dataframe +dataframes datapipe datapoint dataproc @@ -657,6 +660,7 @@ dbs dbtapquerycmd dbutils de +declaratively decomissioning decrypt decrypted