diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index 72672361a5cbe..ee246e1c0156e 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -1828,6 +1828,21 @@ webserver: type: boolean example: ~ default: "False" + allow_raw_html_descriptions: + description: | + A DAG author is able to provide any raw HTML into ``doc_md`` or params description in + ``description_md`` for text formatting. This is including potentially unsafe javascript. + Displaying the DAG or trigger form in web UI provides the DAG author the potential to + inject malicious code into clients browsers. To ensure the web UI is safe by default, + raw HTML is disabled by default. If you trust your DAG authors, you can enable HTML + support in markdown by setting this option to True. + + This parameter also enables the deprecated fields ``description_html`` and + ``custom_html_form`` in DAG params until the feature is removed in a future version. + version_added: 2.8.0 + type: boolean + example: "False" + default: "False" email: description: | Configuration email backend and whether to diff --git a/airflow/example_dags/example_params_ui_tutorial.py b/airflow/example_dags/example_params_ui_tutorial.py index 12992c545c25a..489a4681c91b7 100644 --- a/airflow/example_dags/example_params_ui_tutorial.py +++ b/airflow/example_dags/example_params_ui_tutorial.py @@ -47,18 +47,17 @@ "flag": False, "a_simple_list": ["one", "two", "three", "actually one value is made per line"], # But of course you might want to have it nicer! Let's add some description to parameters. - # Note if you can add any HTML formatting to the description, you need to use the description_html + # Note if you can add any Markdown formatting to the description, you need to use the description_md # attribute. "most_loved_number": Param( 42, type="integer", title="Your favorite number", - description_html="""Everybody should have a favorite number. Not only math teachers. - If you can not think of any at the moment please think of the 42 which is very famous because - of the book - - The Hitchhiker's Guide to the Galaxy""", + description_md="Everybody should have a **favorite** number. Not only _math teachers_. " + "If you can not think of any at the moment please think of the 42 which is very famous because" + "of the book [The Hitchhiker's Guide to the Galaxy]" + "(https://en.wikipedia.org/wiki/Phrases_from_The_Hitchhiker%27s_Guide_to_the_Galaxy#" + "The_Answer_to_the_Ultimate_Question_of_Life,_the_Universe,_and_Everything_is_42).", ), # If you want to have a selection list box then you can use the enum feature of JSON schema "pick_one": Param( @@ -177,8 +176,8 @@ "optional text, you can trigger also w/o text", type=["null", "string"], title="Optional text field", - description_html="This field is optional. As field content is JSON schema validated you must " - "allow the null type.", + description_md="This field is optional. As field content is JSON schema validated you must " + "allow the `null` type.", ), # You can arrange the entry fields in sections so that you can have a better overview for the user # Therefore you can add the "section" attribute. @@ -188,10 +187,10 @@ "length-checked-field", type="string", title="Text field with length check", - description_html="""This field is required. And you need to provide something between 10 and 30 - characters. See the - - JSON schema description (string) in for more details""", + description_md="""This field is required. And you need to provide something between 10 and 30 + characters. See the JSON + [schema description (string)](https://json-schema.org/understanding-json-schema/reference/string.html) + for more details""", minLength=10, maxLength=20, section="JSON Schema validation options", @@ -200,9 +199,10 @@ 100, type="number", title="Number field with value check", - description_html="""This field is required. You need to provide any number between 64 and 128. - See the - JSON schema description (numbers) in for more details""", + description_md="""This field is required. You need to provide any number between 64 and 128. + See the JSON + [schema description (numbers)](https://json-schema.org/understanding-json-schema/reference/numeric.html) + for more details""", minimum=64, maximum=128, section="JSON Schema validation options", @@ -217,9 +217,9 @@ ), "array_of_objects": Param( [{"name": "account_name", "country": "country_name"}], - "Array with complex objects and validation rules. " - "See JSON Schema validation options in specs.", + description_md="Array with complex objects and validation rules. " + "See [JSON Schema validation options in specs]" + "(https://json-schema.org/understanding-json-schema/reference/array.html#items).", type="array", title="JSON array field", items={ @@ -233,69 +233,6 @@ # then you can use the JSON schema option of passing constant values. These parameters # will not be displayed but passed to the DAG "hidden_secret_field": Param("constant value", const="constant value"), - # Finally besides the standard provided field generator you can have you own HTML form code - # injected - but be careful, you can also mess-up the layout! - "color_picker": Param( - "#FF8800", - type="string", - title="Pick a color", - description_html="""This is a special HTML widget as custom implementation in the DAG code. - It is templated with the following parameter to render proper HTML form fields: - - Example: <input name='{name}' value='{value}' onchange='updateJSONconf()' /> - """, - custom_html_form=""" -
- - - - -
 
- -
- - - -
- - - -
- """, - section="Special advanced stuff with form fields", - ), }, ) as dag: diff --git a/airflow/www/utils.py b/airflow/www/utils.py index 8c2282a31ca97..5fe8a7ccb769e 100644 --- a/airflow/www/utils.py +++ b/airflow/www/utils.py @@ -38,6 +38,7 @@ from sqlalchemy import delete, func, select, types from sqlalchemy.ext.associationproxy import AssociationProxy +from airflow.configuration import conf from airflow.exceptions import RemovedInAirflow3Warning from airflow.models import errors from airflow.models.dagrun import DagRun @@ -154,16 +155,16 @@ def get_mapped_summary(parent_instance, task_instances): def get_dag_run_conf( dag_run_conf: Any, *, json_encoder: type[json.JSONEncoder] = json.JSONEncoder ) -> tuple[str | None, bool]: - conf: str | None = None + result: str | None = None conf_is_json: bool = False if isinstance(dag_run_conf, str): - conf = dag_run_conf + result = dag_run_conf elif isinstance(dag_run_conf, (dict, list)) and any(dag_run_conf): - conf = json.dumps(dag_run_conf, sort_keys=True, cls=json_encoder, ensure_ascii=False) + result = json.dumps(dag_run_conf, sort_keys=True, cls=json_encoder, ensure_ascii=False) conf_is_json = True - return conf, conf_is_json + return result, conf_is_json def encode_dag_run( @@ -172,7 +173,7 @@ def encode_dag_run( if not dag_run: return None - conf, conf_is_json = get_dag_run_conf(dag_run.conf, json_encoder=json_encoder) + dag_run_conf, conf_is_json = get_dag_run_conf(dag_run.conf, json_encoder=json_encoder) return { "run_id": dag_run.run_id, @@ -186,7 +187,7 @@ def encode_dag_run( "run_type": dag_run.run_type, "last_scheduling_decision": datetime_to_string(dag_run.last_scheduling_decision), "external_trigger": dag_run.external_trigger, - "conf": conf, + "conf": dag_run_conf, "conf_is_json": conf_is_json, "note": dag_run.note, } @@ -613,7 +614,7 @@ def json_render(obj, lexer): def wrapped_markdown(s, css_class="rich_doc"): """Convert a Markdown string to HTML.""" - md = MarkdownIt("gfm-like") + md = MarkdownIt("gfm-like", {"html": conf.getboolean("webserver", "allow_raw_html_descriptions")}) if s is None: return None s = textwrap.dedent(s) diff --git a/airflow/www/views.py b/airflow/www/views.py index 691a11f2d5cec..3e4ed75a5953c 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -1956,30 +1956,69 @@ def trigger(self, dag_id: str, session: Session = NEW_SESSION): # Prepare form fields with param struct details to render a proper form with schema information form_fields = {} + allow_raw_html_descriptions = conf.getboolean("webserver", "allow_raw_html_descriptions") + form_trust_problems = [] for k, v in dag.params.items(): form_fields[k] = v.dump() + form_field: dict = form_fields[k] # If no schema is provided, auto-detect on default values - if "schema" not in form_fields[k]: - form_fields[k]["schema"] = {} - if "type" not in form_fields[k]["schema"]: - if isinstance(form_fields[k]["value"], bool): - form_fields[k]["schema"]["type"] = "boolean" - elif isinstance(form_fields[k]["value"], int): - form_fields[k]["schema"]["type"] = ["integer", "null"] - elif isinstance(form_fields[k]["value"], list): - form_fields[k]["schema"]["type"] = ["array", "null"] - elif isinstance(form_fields[k]["value"], dict): - form_fields[k]["schema"]["type"] = ["object", "null"] - # Mark markup fields as safe - if ( - "description_html" in form_fields[k]["schema"] - and form_fields[k]["schema"]["description_html"] - ): - form_fields[k]["description"] = Markup(form_fields[k]["schema"]["description_html"]) - if "custom_html_form" in form_fields[k]["schema"]: - form_fields[k]["schema"]["custom_html_form"] = Markup( - form_fields[k]["schema"]["custom_html_form"] - ) + if "schema" not in form_field: + form_field["schema"] = {} + form_field_schema: dict = form_field["schema"] + if "type" not in form_field_schema: + form_field_value = form_field["value"] + if isinstance(form_field_value, bool): + form_field_schema["type"] = "boolean" + elif isinstance(form_field_value, int): + form_field_schema["type"] = ["integer", "null"] + elif isinstance(form_field_value, list): + form_field_schema["type"] = ["array", "null"] + elif isinstance(form_field_value, dict): + form_field_schema["type"] = ["object", "null"] + # Mark HTML fields as safe if allowed + if allow_raw_html_descriptions: + if "description_html" in form_field_schema: + form_field["description"] = Markup(form_field_schema["description_html"]) + if "custom_html_form" in form_field_schema: + form_field_schema["custom_html_form"] = Markup(form_field_schema["custom_html_form"]) + else: + if "description_html" in form_field_schema and "description_md" not in form_field_schema: + form_trust_problems.append(f"Field {k} uses HTML description") + form_field["description"] = form_field_schema.pop("description_html") + if "custom_html_form" in form_field_schema: + form_trust_problems.append(f"Field {k} uses custom HTML form definition") + form_field_schema.pop("custom_html_form") + if "description_md" in form_field_schema: + form_field["description"] = wwwutils.wrapped_markdown(form_field_schema["description_md"]) + if form_trust_problems: + flash( + Markup( + "At least one field in the trigger form uses a raw HTML form definition. This is not allowed for " + "security. Please switch to markdown description via description_md. " + "Raw HTML is deprecated and must be enabled via " + "webserver.allow_raw_html_descriptions configuration parameter. Using plain text " + "as fallback for these fields. " + f"" + ), + "warning", + ) + if allow_raw_html_descriptions and any("description_html" in p.schema for p in dag.params.values()): + flash( + Markup( + "The form params use raw HTML in description_html which is deprecated. " + "Please migrate to description_md." + ), + "warning", + ) + if allow_raw_html_descriptions and any("custom_html_form" in p.schema for p in dag.params.values()): + flash( + Markup( + "The form params use custom_html_form definition. " + "This is deprecated with Airflow 2.8.0 and will be removed in a future release." + ), + "warning", + ) + ui_fields_defined = any("const" not in f["schema"] for f in form_fields.values()) show_trigger_form_if_no_params = conf.getboolean("webserver", "show_trigger_form_if_no_params") diff --git a/docs/apache-airflow/core-concepts/params.rst b/docs/apache-airflow/core-concepts/params.rst index b2b95252ec719..72eb058d4b74c 100644 --- a/docs/apache-airflow/core-concepts/params.rst +++ b/docs/apache-airflow/core-concepts/params.rst @@ -173,6 +173,8 @@ JSON Schema Validation Use Params to Provide a Trigger UI Form --------------------------------------- +.. versionadded:: 2.6.0 + :class:`~airflow.models.dag.DAG` level params are used to render a user friendly trigger form. This form is provided when a user clicks on the "Trigger DAG" button. @@ -189,8 +191,8 @@ The following features are supported in the Trigger UI Form: - The :class:`~airflow.models.param.Param` attribute ``title`` is used to render the form field label of the entry box. If no ``title`` is defined the parameter name/key is used instead. - The :class:`~airflow.models.param.Param` attribute ``description`` is rendered below an entry field as help text in gray color. - If you want to provide HTML tags for special formatting or links you need to use the Param attribute - ``description_html``, see tutorial DAG ``example_params_ui_tutorial`` for an example. + If you want to provide special formatting or links you need to use the Param attribute + ``description_md``. See tutorial DAG ``example_params_ui_tutorial`` for an example. - The :class:`~airflow.models.param.Param` attribute ``type`` influences how a field is rendered. The following types are supported: .. list-table:: @@ -313,7 +315,6 @@ The following features are supported in the Trigger UI Form: The ``const`` value must match the default value to pass `JSON Schema validation `_. - On the bottom of the form the generated JSON configuration can be expanded. If you want to change values manually, the JSON configuration can be adjusted. Changes are overridden when form fields change. -- If you want to render custom HTML as form on top of the provided features, you can use the ``custom_html_form`` attribute. .. note:: If the field is required the default value must be valid according to the schema as well. If the DAG is defined with @@ -324,9 +325,17 @@ For examples also please take a look to two example DAGs provided: ``example_par .. image:: ../img/trigger-dag-tutorial-form.png .. versionadded:: 2.7.0 - -The trigger form can also be forced to be displayed also if no params are defined using the configuration switch -``webserver.show_trigger_form_if_no_params``. + The trigger form can also be forced to be displayed also if no params are defined using the configuration switch + ``webserver.show_trigger_form_if_no_params``. + +.. versionchanged:: 2.8.0 + By default custom HTML is not allowed to prevent injection of scripts or other malicious HTML code. If you trust your DAG authors + you can change the trust level of parameter descriptions to allow raw HTML by setting the configuration entry + ``webserver.allow_raw_html_descriptions`` to ``True``. With the default setting all HTML will be displayed as plain text. + This relates to the previous feature to enable rich formatting with the attribute ``description_html`` which is now super-seeded + with the attribute ``description_md``. + Custom form elements using the attribute ``custom_html_form`` allow a DAG author to specify raw HTML form templates. These + custom HTML form elements are deprecated as of version 2.8.0. Disabling Runtime Param Modification ------------------------------------ diff --git a/newsfragments/35460.significant.rst b/newsfragments/35460.significant.rst new file mode 100644 index 0000000000000..d29481d219a6f --- /dev/null +++ b/newsfragments/35460.significant.rst @@ -0,0 +1,10 @@ +Raw HTML code in DAG docs and DAG params descriptions is disabled by default + +To ensure that no malicious javascript can be injected with DAG descriptions or trigger UI forms by DAG authors +a new parameter ``webserver.allow_raw_html_descriptions`` was added with default value of ``False``. +If you trust your DAG authors code and want to allow using raw HTML in DAG descriptions and params, you can restore the previous +behavior by setting the configuration value to ``True``. + +To ensure Airflow is secure by default, the raw HTML support in trigger UI has been super-seeded by markdown support via +the ``description_md`` attribute. If you have been using ``description_html`` please migrate to ``description_md``. +The ``custom_html_form`` is now deprecated. diff --git a/tests/www/test_utils.py b/tests/www/test_utils.py index 19941c15e0d2d..dfd8b563dc415 100644 --- a/tests/www/test_utils.py +++ b/tests/www/test_utils.py @@ -32,6 +32,7 @@ from airflow.utils import json as utils_json from airflow.www import utils from airflow.www.utils import DagRunCustomSQLAInterface, json_f, wrapped_markdown +from tests.test_utils.config import conf_vars class TestUtils: @@ -386,8 +387,9 @@ def test_wrapped_markdown_with_nested_list(self): ) def test_wrapped_markdown_with_collapsible_section(self): - rendered = wrapped_markdown( - """ + with conf_vars({("webserver", "allow_raw_html_descriptions"): "true"}): + rendered = wrapped_markdown( + """ # A collapsible section with markdown
Click to expand! @@ -399,10 +401,10 @@ def test_wrapped_markdown_with_collapsible_section(self): * Sub bullets
""" - ) + ) - assert ( - """

A collapsible section with markdown

+ assert ( + """

A collapsible section with markdown

Click to expand!

Heading

@@ -417,8 +419,20 @@ def test_wrapped_markdown_with_collapsible_section(self):
""" - == rendered - ) + == rendered + ) + + @pytest.mark.parametrize("allow_html", [False, True]) + def test_wrapped_markdown_with_raw_html(self, allow_html): + with conf_vars({("webserver", "allow_raw_html_descriptions"): str(allow_html)}): + HTML = "test raw HTML" + rendered = wrapped_markdown(HTML) + if allow_html: + assert HTML in rendered + else: + from markupsafe import escape + + assert escape(HTML) in rendered @pytest.mark.db_test diff --git a/tests/www/views/test_views_trigger_dag.py b/tests/www/views/test_views_trigger_dag.py index c48e053639fd7..65ad8734d5140 100644 --- a/tests/www/views/test_views_trigger_dag.py +++ b/tests/www/views/test_views_trigger_dag.py @@ -31,7 +31,7 @@ from airflow.utils.types import DagRunType from tests.test_utils.api_connexion_utils import create_test_client from tests.test_utils.config import conf_vars -from tests.test_utils.www import check_content_in_response +from tests.test_utils.www import check_content_in_response, check_content_not_in_response pytestmark = pytest.mark.db_test @@ -236,6 +236,55 @@ def test_trigger_dag_params_render(admin_client, dag_maker, session, app, monkey ) +@pytest.mark.parametrize("allow_html", [False, True]) +def test_trigger_dag_html_allow(admin_client, dag_maker, session, app, monkeypatch, allow_html): + """ + Test that HTML is escaped per default in description. + """ + from markupsafe import escape + + DAG_ID = "params_dag" + HTML_DESCRIPTION1 = "HTML raw code." + HTML_DESCRIPTION2 = "HTML in md text." + expect_escape = not allow_html + with conf_vars({("webserver", "allow_raw_html_descriptions"): str(allow_html)}): + param1 = Param( + 42, + description_html=HTML_DESCRIPTION1, + type="integer", + minimum=1, + maximum=100, + ) + param2 = Param( + 42, + description_md=HTML_DESCRIPTION2, + type="integer", + minimum=1, + maximum=100, + ) + with monkeypatch.context() as m: + with dag_maker( + dag_id=DAG_ID, serialized=True, session=session, params={"param1": param1, "param2": param2} + ): + EmptyOperator(task_id="task1") + + m.setattr(app, "dag_bag", dag_maker.dagbag) + resp = admin_client.get(f"dags/{DAG_ID}/trigger") + + if expect_escape: + check_content_in_response(escape(HTML_DESCRIPTION1), resp) + check_content_in_response(escape(HTML_DESCRIPTION2), resp) + check_content_in_response( + "At least one field in the trigger form uses a raw HTML form definition.", resp + ) + else: + check_content_in_response(HTML_DESCRIPTION1, resp) + check_content_in_response(HTML_DESCRIPTION2, resp) + check_content_not_in_response( + "At least one field in the trigger form uses a raw HTML form definition.", resp + ) + + def test_trigger_endpoint_uses_existing_dagbag(admin_client): """ Test that Trigger Endpoint uses the DagBag already created in views.py