From 8069558c810a0e8892c5c52f88762c3bf1bb4e72 Mon Sep 17 00:00:00 2001 From: Ash Berlin-Taylor Date: Tue, 4 Mar 2025 17:39:47 +0000 Subject: [PATCH 1/6] Start working on the docs for the Task SDK In order to have a nicer experience (i.e. rather than just being given an alphabetical list of classes/functions) I have chosen to not use the auto generate feature of the AutoAPI extension, but instead to precisely control the order and grouping of the classes. For this to be complete we will likely need some testing that compares the items on the generated `objects.inv` with the things we re-export from `airflow/sdk/__init__.py`. The `airflow/sdk/__init__.pyi` type stub is sadly needed, as without it (and with the imports only inside the `if TYPE_CHECKING` block) we weren't getting the re-exported classes showing up where we want them, specifically we want users to import things directly from airflow.sdk, not the submodule. It was generated with `stubgen -o task_sdk/src -m airflow.sdk` Test this with `sphinx-build -b html -Tv task_sdk/docs/ task_sdk/docs/_build` --- task-sdk/docs/.gitignore | 1 + task-sdk/docs/api.rst | 77 +++++++++++++++ task-sdk/docs/conf.py | 88 ++++++++++++++++++ task-sdk/docs/index.rst | 26 ++++++ task-sdk/src/airflow/sdk/__init__.pyi | 93 +++++++++++++++++++ task-sdk/src/airflow/sdk/bases/operator.py | 4 +- .../src/airflow/sdk/definitions/context.py | 2 + task-sdk/src/airflow/sdk/definitions/dag.py | 4 +- 8 files changed, 290 insertions(+), 5 deletions(-) create mode 100644 task-sdk/docs/.gitignore create mode 100644 task-sdk/docs/api.rst create mode 100644 task-sdk/docs/conf.py create mode 100644 task-sdk/docs/index.rst create mode 100644 task-sdk/src/airflow/sdk/__init__.pyi diff --git a/task-sdk/docs/.gitignore b/task-sdk/docs/.gitignore new file mode 100644 index 0000000000000..69fa449dd96e2 --- /dev/null +++ b/task-sdk/docs/.gitignore @@ -0,0 +1 @@ +_build/ diff --git a/task-sdk/docs/api.rst b/task-sdk/docs/api.rst new file mode 100644 index 0000000000000..79f089bda630a --- /dev/null +++ b/task-sdk/docs/api.rst @@ -0,0 +1,77 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +airflow.sdk API Reference +========================= + +This page documents the full public API exposed in Airflow 3.0+ via the Task SDK python module. + +If something is not on this page it is best to assume that it is not part of the public API and use of it is entirely at your own risk +-- we won't go out of our way break usage of them, but we make no promises either. + +.. :py:module: airflow.sdk + +Defining DAGs +------------- + +.. autoapiclass:: airflow.sdk.DAG + +.. autoapifunction:: airflow.sdk.dag + +.. autoapiclass:: airflow.sdk.TaskGroup + +.. autoapifunction:: airflow.sdk.get_parsing_context + +.. autoapiclass:: airflow.sdk.definitions.context.AirflowParsingContext + :undoc-members: + :members: + + +Tasks and Operators +------------------- + +.. autoapiclass:: airflow.sdk.BaseOperator + +.. autoapiclass:: airflow.sdk.XComArg + +.. autoapifunction:: airflow.sdk.get_current_context + +Assets +------ + +.. autoapiclass:: airflow.sdk.Asset + +.. autoapiclass:: airflow.sdk.AssetAlias + +.. autoapiclass:: airflow.sdk.AssetAll + +.. autoapiclass:: airflow.sdk.AssetAny + +.. autoapiclass:: airflow.sdk.AssetWatcher + + +.. Asset, AssetAlias, AssetAll, AssetAny, AssetWatcher + +Everything else +--------------- + +.. autoapimodule:: airflow.sdk + :members: + :exclude-members: BaseOperator, DAG, dag, asset, Asset, AssetAlias, AssetAll, AssetAny, AssetWatcher, TaskGroup, XComArg, get_current_context, get_parsing_context + :undoc-members: + :imported-members: + :no-index: diff --git a/task-sdk/docs/conf.py b/task-sdk/docs/conf.py new file mode 100644 index 0000000000000..47695f360e375 --- /dev/null +++ b/task-sdk/docs/conf.py @@ -0,0 +1,88 @@ +# Disable Flake8 because of all the sphinx imports +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from pathlib import Path + +CONF_DIR = Path(__file__).parent.absolute() + +project = "Apache Airflow Task SDK" + +language = "en" + +extensions = [ + "sphinx.ext.autodoc", + "autoapi.extension", + "sphinx.ext.intersphinx", +] + +autoapi_dirs = [CONF_DIR.joinpath("..", "src").resolve()] +autoapi_root = "api" +autoapi_ignore = [ + "*/airflow/sdk/execution_time", + "*/airflow/sdk/api", + "*/_internal*", +] +autoapi_options = [ + "undoc-members", + "members", +] +autoapi_add_toctree_entry = False +autoapi_generate_api_docs = False + +autodoc_typehints = "description" + +# Prefer pyi over py files if both are found +autoapi_file_patterns = ["*.pyi", "*.py"] +# autoapi_generate_api_docs = False + +html_theme = "sphinx_airflow_theme" + +global_substitutions = { + "experimental": "This is an :ref:`experimental feature `.", +} + +rst_epilog = "\n".join(f".. |{key}| replace:: {replace}" for key, replace in global_substitutions.items()) + + +intersphinx_resolve_self = "airflow" +intersphinx_mapping = { + "airflow": ( + "https://airflow.apache.org/docs/apache-airflow/stable/", + ( + "../../docs/_inventory_cache/apache-airflow/objects.inv", + "../../docs/_build/apache-airflow/objects.inv", + None, + ), + ) +} + + +def skip_util_classes(app, objtype, name, obj, skip, options): + if "definitions" in name: + if name == "DAG": + obj.id = "airflow.sdk.DAG" + return skip + skip = True + return skip + + +def setup(sphinx): + # sphinx.connect("autoapi-skip-member", skip_util_classes) + ... diff --git a/task-sdk/docs/index.rst b/task-sdk/docs/index.rst new file mode 100644 index 0000000000000..06e1e80802658 --- /dev/null +++ b/task-sdk/docs/index.rst @@ -0,0 +1,26 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Apache Airflow Task Execution SDK +================================= + +:any:`DAG` is where to start. :any:`dag` + +.. toctree:: + :hidden: + + api diff --git a/task-sdk/src/airflow/sdk/__init__.pyi b/task-sdk/src/airflow/sdk/__init__.pyi new file mode 100644 index 0000000000000..ddd720ea6e986 --- /dev/null +++ b/task-sdk/src/airflow/sdk/__init__.pyi @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from airflow.sdk.bases.notifier import BaseNotifier as BaseNotifier +from airflow.sdk.bases.operator import ( + BaseOperator as BaseOperator, + chain as chain, + chain_linear as chain_linear, + cross_downstream as cross_downstream, +) +from airflow.sdk.bases.operatorlink import BaseOperatorLink as BaseOperatorLink +from airflow.sdk.bases.sensor import ( + BaseSensorOperator as BaseSensorOperator, + PokeReturnValue as PokeReturnValue, +) +from airflow.sdk.definitions.asset import ( + Asset as Asset, + AssetAlias as AssetAlias, + AssetAll as AssetAll, + AssetAny as AssetAny, + AssetWatcher as AssetWatcher, +) +from airflow.sdk.definitions.asset.decorators import asset as asset +from airflow.sdk.definitions.asset.metadata import Metadata as Metadata +from airflow.sdk.definitions.connection import Connection as Connection +from airflow.sdk.definitions.context import ( + Context as Context, + get_current_context as get_current_context, + get_parsing_context as get_parsing_context, +) +from airflow.sdk.definitions.dag import DAG as DAG, dag as dag +from airflow.sdk.definitions.decorators import setup as setup, task as task, teardown as teardown +from airflow.sdk.definitions.decorators.task_group import task_group as task_group +from airflow.sdk.definitions.edges import EdgeModifier as EdgeModifier, Label as Label +from airflow.sdk.definitions.param import Param as Param +from airflow.sdk.definitions.taskgroup import TaskGroup as TaskGroup +from airflow.sdk.definitions.template import literal as literal +from airflow.sdk.definitions.variable import Variable as Variable +from airflow.sdk.definitions.xcom_arg import XComArg as XComArg +from airflow.sdk.io.path import ObjectStoragePath as ObjectStoragePath + +__all__ = [ + "__version__", + "Asset", + "AssetAlias", + "AssetAll", + "AssetAny", + "AssetWatcher", + "BaseNotifier", + "BaseOperator", + "BaseOperatorLink", + "BaseSensorOperator", + "Connection", + "Context", + "DAG", + "EdgeModifier", + "Label", + "Metadata", + "ObjectStoragePath", + "Param", + "PokeReturnValue", + "TaskGroup", + "Variable", + "XComArg", + "asset", + "chain", + "chain_linear", + "cross_downstream", + "dag", + "get_current_context", + "get_parsing_context", + "literal", + "setup", + "task", + "task_group", + "teardown", +] + +__version__: str diff --git a/task-sdk/src/airflow/sdk/bases/operator.py b/task-sdk/src/airflow/sdk/bases/operator.py index 3f74694aaa7c4..06937a4fb9f1b 100644 --- a/task-sdk/src/airflow/sdk/bases/operator.py +++ b/task-sdk/src/airflow/sdk/bases/operator.py @@ -77,9 +77,7 @@ import jinja2 - from airflow.sdk.definitions.context import Context - from airflow.sdk.definitions.dag import DAG - from airflow.sdk.definitions.taskgroup import TaskGroup + from airflow.sdk import DAG, Context, TaskGroup from airflow.sdk.definitions.xcom_arg import XComArg from airflow.serialization.enums import DagAttributeTypes from airflow.task.priority_strategy import PriorityWeightStrategy diff --git a/task-sdk/src/airflow/sdk/definitions/context.py b/task-sdk/src/airflow/sdk/definitions/context.py index 6580b8bcf5e81..082ad36202ec2 100644 --- a/task-sdk/src/airflow/sdk/definitions/context.py +++ b/task-sdk/src/airflow/sdk/definitions/context.py @@ -118,6 +118,8 @@ class AirflowParsingContext(NamedTuple): If these values are not None, they will contain the specific DAG and Task ID that Airflow is requesting to execute. You can use these for optimizing dynamically generated DAG files. + + You can obtain the current values via :py:func:`.get_parsing_context`. """ dag_id: str | None diff --git a/task-sdk/src/airflow/sdk/definitions/dag.py b/task-sdk/src/airflow/sdk/definitions/dag.py index 8b25e6a8b3919..11c419cef04d4 100644 --- a/task-sdk/src/airflow/sdk/definitions/dag.py +++ b/task-sdk/src/airflow/sdk/definitions/dag.py @@ -278,7 +278,7 @@ class DAG: :param schedule: If provided, this defines the rules according to which DAG runs are scheduled. Possible values include a cron expression string, timedelta object, Timetable, or list of Asset objects. - See also :doc:`/howto/timetable`. + See also :external:doc:`howto/timetable`. :param start_date: The timestamp from which the scheduler will attempt to backfill. If this is not provided, backfilling must be done manually with an explicit time range. @@ -352,7 +352,7 @@ class DAG: :param tags: List of tags to help filtering DAGs in the UI. :param owner_links: Dict of owners and their links, that will be clickable on the DAGs view UI. Can be used as an HTTP link (for example the link to your Slack channel), or a mailto link. - e.g: {"dag_owner": "https://airflow.apache.org/"} + e.g: ``{"dag_owner": "https://airflow.apache.org/"}`` :param auto_register: Automatically register this DAG when it is used in a ``with`` block :param fail_fast: Fails currently running tasks when task in DAG fails. **Warning**: A fail stop dag can only have tasks with the default trigger rule ("all_success"). From 8639cdb3f7ca497374ee0c5c574340c11cc87841 Mon Sep 17 00:00:00 2001 From: Ankit Chaurasia <8670962+sunank200@users.noreply.github.com> Date: Wed, 28 May 2025 17:52:20 +0545 Subject: [PATCH 2/6] Add Breeze build-task-sdk-docs command and use the top-level SDK path (e.g. airflow.sdk.DAG) fix static checks Fix the static checks Fix the tests Fix autoapimodule explicitly import all your public API into airflow/sdk/__init__.py (including literal, MappedOperator, SecretCache, etc.), and we wired up __lazy_imports and __all__ so AutoAPI only sees those names. Add SecretCache in the SDK module for both runtime and static typing use the top-level SDK path (e.g. airflow.sdk.DAG) Add Breeze build-task-sdk-docs command; tests for Task-SDK API vs docs; clean up Sphinx config --- airflow-core/docs/index.rst | 9 ++ .../example_dynamic_task_mapping.py | 3 + .../example_dags/example_simplest_dag.py | 3 + dev/breeze/doc/03_developer_tasks.rst | 17 +++ dev/breeze/doc/images/output-commands.svg | 102 ++++++------- .../doc/images/output_build-task-sdk-docs.svg | 95 ++++++++++++ .../doc/images/output_build-task-sdk-docs.txt | 1 + dev/breeze/doc/images/output_setup.svg | 2 +- dev/breeze/doc/images/output_setup.txt | 2 +- ...utput_setup_check-all-params-in-groups.svg | 10 +- ...utput_setup_check-all-params-in-groups.txt | 2 +- ...output_setup_regenerate-command-images.svg | 16 +-- ...output_setup_regenerate-command-images.txt | 2 +- .../commands/developer_commands.py | 29 ++++ .../commands/developer_commands_config.py | 6 + task-sdk/docs/api.rst | 102 +++++++++++-- task-sdk/docs/conf.py | 21 ++- task-sdk/docs/index.rst | 135 ++++++++++++++++++ task-sdk/docs/requirements.txt | 3 + task-sdk/src/airflow/sdk/__init__.py | 11 ++ task-sdk/src/airflow/sdk/__init__.pyi | 4 + task-sdk/tests/test_docs_inventory.py | 82 +++++++++++ task-sdk/tests/test_public_api.py | 57 ++++++++ 23 files changed, 627 insertions(+), 87 deletions(-) create mode 100644 dev/breeze/doc/images/output_build-task-sdk-docs.svg create mode 100644 dev/breeze/doc/images/output_build-task-sdk-docs.txt create mode 100644 task-sdk/docs/requirements.txt create mode 100644 task-sdk/tests/test_docs_inventory.py create mode 100644 task-sdk/tests/test_public_api.py diff --git a/airflow-core/docs/index.rst b/airflow-core/docs/index.rst index d6773d3055793..ebde796e28b98 100644 --- a/airflow-core/docs/index.rst +++ b/airflow-core/docs/index.rst @@ -32,6 +32,15 @@ Airflow workflows are defined entirely in Python. This "workflows as code" appro - **Extensible**: The Airflow framework includes a wide range of built-in operators and can be extended to fit your needs. - **Flexible**: Airflow leverages the `Jinja `_ templating engine, allowing rich customizations. +.. _task-sdk-docs: + +Task SDK +======== + +For Airflow Task SDK (Airflow 3.x+), see the standalone reference & tutorial site: + + https://airflow.apache.org/docs/task-sdk/stable/ + Dags ----------------------------------------- diff --git a/airflow-core/src/airflow/example_dags/example_dynamic_task_mapping.py b/airflow-core/src/airflow/example_dags/example_dynamic_task_mapping.py index 9f4f45511cf04..654b6bd5af2ab 100644 --- a/airflow-core/src/airflow/example_dags/example_dynamic_task_mapping.py +++ b/airflow-core/src/airflow/example_dags/example_dynamic_task_mapping.py @@ -23,6 +23,7 @@ from airflow.sdk import DAG, task +# [START example_dynamic_task_mapping] with DAG(dag_id="example_dynamic_task_mapping", schedule=None, start_date=datetime(2022, 3, 4)) as dag: @task @@ -56,3 +57,5 @@ def add_10(num): _get_nums = get_nums() _times_2 = times_2.expand(num=_get_nums) add_10.expand(num=_times_2) + +# [END example_dynamic_task_mapping] diff --git a/airflow-core/src/airflow/example_dags/example_simplest_dag.py b/airflow-core/src/airflow/example_dags/example_simplest_dag.py index fad6f57950a9e..7d0fe0ffbda76 100644 --- a/airflow-core/src/airflow/example_dags/example_simplest_dag.py +++ b/airflow-core/src/airflow/example_dags/example_simplest_dag.py @@ -21,6 +21,7 @@ from airflow.sdk import dag, task +# [START simplest_dag] @dag def example_simplest_dag(): @task @@ -30,4 +31,6 @@ def my_task(): my_task() +# [END simplest_dag] + example_simplest_dag() diff --git a/dev/breeze/doc/03_developer_tasks.rst b/dev/breeze/doc/03_developer_tasks.rst index 86097163f2715..596513d08c0e1 100644 --- a/dev/breeze/doc/03_developer_tasks.rst +++ b/dev/breeze/doc/03_developer_tasks.rst @@ -242,6 +242,23 @@ These are all available flags of ``build-docs`` command: :width: 100% :alt: Breeze build documentation +Building Task SDK documentation +------------------------------- +To build documentation for the Apache Airflow Task SDK, use the ``build-task-sdk-docs`` command: + +.. code-block:: bash + + breeze build-task-sdk-docs + +Results of the build can be found in the ``task-sdk/docs/_build`` folder. + +These are all available flags of the ``build-task-sdk-docs`` command: + +.. image:: ./images/output_build-task-sdk-docs.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_build-task-sdk-docs.svg + :width: 100% + :alt: Breeze build-task-sdk-docs + While you can use full name of doc package starting with ``apache-airflow-providers-`` in package filter, You can use shorthand version - just take the remaining part and replace every ``dash("-")`` with a ``dot(".")``. diff --git a/dev/breeze/doc/images/output-commands.svg b/dev/breeze/doc/images/output-commands.svg index 234bfa5ee81a3..7452012431243 100644 --- a/dev/breeze/doc/images/output-commands.svg +++ b/dev/breeze/doc/images/output-commands.svg @@ -1,4 +1,4 @@ - +