From 8d68708e4b8d4a8ce7641448a6cea39f51a911b6 Mon Sep 17 00:00:00 2001 From: Michael Ferguson Date: Thu, 15 Dec 2022 14:41:06 -0600 Subject: [PATCH 1/9] Updated serialized_dag to pass `usedforsecurity=False` Updated app to support configuring caching hash method Added tests --- airflow/models/serialized_dag.py | 2 +- airflow/www/app.py | 23 +++++++++++++++++++++++ tests/www/test_app.py | 25 +++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/airflow/models/serialized_dag.py b/airflow/models/serialized_dag.py index 53e5e2ccbd223..ddba2faee340d 100644 --- a/airflow/models/serialized_dag.py +++ b/airflow/models/serialized_dag.py @@ -102,7 +102,7 @@ def __init__(self, dag: DAG, processor_subdir: str | None = None): dag_data = SerializedDAG.to_dict(dag) dag_data_json = json.dumps(dag_data, sort_keys=True).encode("utf-8") - self.dag_hash = hashlib.md5(dag_data_json).hexdigest() + self.dag_hash = hashlib.new("md5", data=dag_data_json, usedforsecurity=False).hexdigest() if COMPRESS_SERIALIZED_DAGS: self._data = None diff --git a/airflow/www/app.py b/airflow/www/app.py index 8d8bc9b069787..e2476f9290469 100644 --- a/airflow/www/app.py +++ b/airflow/www/app.py @@ -17,6 +17,7 @@ # under the License. from __future__ import annotations +import hashlib import warnings from datetime import timedelta from tempfile import gettempdir @@ -134,7 +135,29 @@ def create_app(config=None, testing=False): init_robots(flask_app) + # Configure caching + webserver_caching_hash_method = conf.get(section="webserver", key="CACHING_HASH_METHOD", fallback=None) cache_config = {"CACHE_TYPE": "flask_caching.backends.filesystem", "CACHE_DIR": gettempdir()} + + if ( + webserver_caching_hash_method is not None + and webserver_caching_hash_method.casefold() != "md5".casefold() + ): + if webserver_caching_hash_method.casefold() == "sha512".casefold(): + cache_config["CACHE_OPTIONS"] = {"hash_method": hashlib.sha512} + elif webserver_caching_hash_method.casefold() == "sha384".casefold(): + cache_config["CACHE_OPTIONS"] = {"hash_method": hashlib.sha384} + elif webserver_caching_hash_method.casefold() == "sha256".casefold(): + cache_config["CACHE_OPTIONS"] = {"hash_method": hashlib.sha256} + elif webserver_caching_hash_method.casefold() == "sha224".casefold(): + cache_config["CACHE_OPTIONS"] = {"hash_method": hashlib.sha224} + elif webserver_caching_hash_method.casefold() == "sha1".casefold(): + cache_config["CACHE_OPTIONS"] = {"hash_method": hashlib.sha1} + else: + raise AirflowConfigException( + f"Unsupported webserver caching hash method: `{webserver_caching_hash_method}`." + ) + Cache(app=flask_app, config=cache_config) init_flash_views(flask_app) diff --git a/tests/www/test_app.py b/tests/www/test_app.py index ae6c0e265575c..bf25a30e119f1 100644 --- a/tests/www/test_app.py +++ b/tests/www/test_app.py @@ -17,6 +17,7 @@ # under the License. from __future__ import annotations +import hashlib import runpy import sys from datetime import timedelta @@ -27,6 +28,7 @@ from werkzeug.test import create_environ from werkzeug.wrappers import Response +from airflow.exceptions import AirflowConfigException from airflow.www import app as application from tests.test_utils.config import conf_vars from tests.test_utils.decorators import dont_initialize_flask_app_submodules @@ -228,6 +230,29 @@ def test_correct_default_is_set_for_cookie_samesite(self): app = application.cached_app(testing=True) assert app.config["SESSION_COOKIE_SAMESITE"] == "Lax" + @pytest.mark.parametrize( + "hash_method, result, exception", + [ + ("sha512", hashlib.sha512, None), + ("sha384", hashlib.sha384, None), + ("sha256", hashlib.sha256, None), + ("sha224", hashlib.sha224, None), + ("sha1", hashlib.sha1, None), + ("md5", hashlib.md5, None), + (None, hashlib.md5, None), + ("invalid", None, AirflowConfigException), + ], + ) + @dont_initialize_flask_app_submodules + def test_should_respect_caching_hash_method(self, hash_method, result, exception): + with conf_vars({("webserver", "caching_hash_method"): hash_method}): + if exception: + with pytest.raises(expected_exception=exception): + app = application.cached_app(testing=True) + else: + app = application.cached_app(testing=True) + assert next(iter(app.extensions["cache"])).cache._hash_method == result + class TestFlaskCli: @dont_initialize_flask_app_submodules(skip_all_except=["init_appbuilder"]) From 7d98b5b7cfb9c662bc717913428192b977cc0fb9 Mon Sep 17 00:00:00 2001 From: Michael Ferguson Date: Fri, 20 Jan 2023 10:17:07 -0600 Subject: [PATCH 2/9] - Added caching_hash_method to config - Cleaned up app cache code --- airflow/config_templates/config.yml | 26 ++++++---- airflow/config_templates/default_airflow.cfg | 4 ++ airflow/www/app.py | 29 +---------- airflow/www/extensions/init_cache.py | 52 ++++++++++++++++++++ 4 files changed, 75 insertions(+), 36 deletions(-) create mode 100644 airflow/www/extensions/init_cache.py diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index eed20e351c9a0..dbbf26d8645c4 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -16,7 +16,6 @@ # under the License. --- - core: description: ~ options: @@ -271,8 +270,8 @@ core: default: "0" default_task_retry_delay: description: | - The number of seconds each task is going to wait by default between retries. Can be overridden at - dag or task level. + The number of seconds each task is going to wait by default between retries. Can be overridden at + dag or task level. version_added: 2.4.0 type: integer example: ~ @@ -419,7 +418,7 @@ core: version_added: 2.4.0 type: string default: ~ - example: 'airflow.datasets.manager.DatasetManager' + example: "airflow.datasets.manager.DatasetManager" dataset_manager_kwargs: description: Kwargs to supply to dataset manager. version_added: 2.4.0 @@ -438,7 +437,7 @@ core: version_added: 2.6.0 type: string default: ~ - example: 'http://localhost:8080' + example: "http://localhost:8080" database: description: ~ options: @@ -744,9 +743,10 @@ logging: version_added: 2.0.0 type: string example: ~ - default: "dag_id={{{{ ti.dag_id }}}}/run_id={{{{ ti.run_id }}}}/task_id={{{{ ti.task_id }}}}/\ - {{%% if ti.map_index >= 0 %%}}map_index={{{{ ti.map_index }}}}/{{%% endif %%}}\ - attempt={{{{ try_number }}}}.log" + default: | + "dag_id={{{{ ti.dag_id }}}}/run_id={{{{ ti.run_id }}}}/task_id={{{{ ti.task_id }}}}/\ + {{%% if ti.map_index >= 0 %%}}map_index={{{{ ti.map_index }}}}/{{%% endif %%}}\ + attempt={{{{ try_number }}}}.log" log_processor_filename_template: description: | Formatting for how airflow generates file names for log @@ -1020,7 +1020,8 @@ api: version_added: 2.0.0 default: "100" google_oauth2_audience: - description: The intended audience for JWT token credentials used for authorization. + description: | + The intended audience for JWT token credentials used for authorization. This value must match on the client and server sides. If empty, audience will not be tested. type: string @@ -1591,6 +1592,13 @@ webserver: type: boolean example: ~ default: "False" + caching_hash_method: + description: | + The caching algorithm used by the webserver. + version_added: + type: string + example: "sha256" + default: "md5" email: description: | diff --git a/airflow/config_templates/default_airflow.cfg b/airflow/config_templates/default_airflow.cfg index a4b03c8764dfe..92f3b127556b8 100644 --- a/airflow/config_templates/default_airflow.cfg +++ b/airflow/config_templates/default_airflow.cfg @@ -818,6 +818,10 @@ enable_swagger_ui = True # Boolean for running Internal API in the webserver. run_internal_api = False +# The caching algorithm used by the webserver. +# Example: caching_hash_method = sha256 +caching_hash_method = md5 + [email] # Configuration email backend and whether to diff --git a/airflow/www/app.py b/airflow/www/app.py index e2476f9290469..3ccba00a59649 100644 --- a/airflow/www/app.py +++ b/airflow/www/app.py @@ -17,14 +17,11 @@ # under the License. from __future__ import annotations -import hashlib import warnings from datetime import timedelta -from tempfile import gettempdir from flask import Flask from flask_appbuilder import SQLA -from flask_caching import Cache from flask_wtf.csrf import CSRFProtect from sqlalchemy.engine.url import make_url @@ -37,6 +34,7 @@ from airflow.utils.json import AirflowJsonProvider from airflow.www.extensions.init_appbuilder import init_appbuilder from airflow.www.extensions.init_appbuilder_links import init_appbuilder_links +from airflow.www.extensions.init_cache import init_cache from airflow.www.extensions.init_dagbag import init_dagbag from airflow.www.extensions.init_jinja_globals import init_jinja_globals from airflow.www.extensions.init_manifest_files import configure_manifest_files @@ -135,30 +133,7 @@ def create_app(config=None, testing=False): init_robots(flask_app) - # Configure caching - webserver_caching_hash_method = conf.get(section="webserver", key="CACHING_HASH_METHOD", fallback=None) - cache_config = {"CACHE_TYPE": "flask_caching.backends.filesystem", "CACHE_DIR": gettempdir()} - - if ( - webserver_caching_hash_method is not None - and webserver_caching_hash_method.casefold() != "md5".casefold() - ): - if webserver_caching_hash_method.casefold() == "sha512".casefold(): - cache_config["CACHE_OPTIONS"] = {"hash_method": hashlib.sha512} - elif webserver_caching_hash_method.casefold() == "sha384".casefold(): - cache_config["CACHE_OPTIONS"] = {"hash_method": hashlib.sha384} - elif webserver_caching_hash_method.casefold() == "sha256".casefold(): - cache_config["CACHE_OPTIONS"] = {"hash_method": hashlib.sha256} - elif webserver_caching_hash_method.casefold() == "sha224".casefold(): - cache_config["CACHE_OPTIONS"] = {"hash_method": hashlib.sha224} - elif webserver_caching_hash_method.casefold() == "sha1".casefold(): - cache_config["CACHE_OPTIONS"] = {"hash_method": hashlib.sha1} - else: - raise AirflowConfigException( - f"Unsupported webserver caching hash method: `{webserver_caching_hash_method}`." - ) - - Cache(app=flask_app, config=cache_config) + init_cache(flask_app) init_flash_views(flask_app) diff --git a/airflow/www/extensions/init_cache.py b/airflow/www/extensions/init_cache.py new file mode 100644 index 0000000000000..84d952dd7120e --- /dev/null +++ b/airflow/www/extensions/init_cache.py @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import hashlib +from tempfile import gettempdir + +from flask_caching import Cache + +from airflow.configuration import conf +from airflow.exceptions import AirflowConfigException + +HASH_METHOD_MAPPING = { + "md5": hashlib.md5, + "sha1": hashlib.sha1, + "sha224": hashlib.sha224, + "sha256": hashlib.sha256, + "sha384": hashlib.sha384, + "sha512": hashlib.sha512, +} + + +def init_cache(app): + webserver_caching_hash_method = conf.get( + section="webserver", key="CACHING_HASH_METHOD", fallback="md5" + ).casefold() + cache_config = {"CACHE_TYPE": "flask_caching.backends.filesystem", "CACHE_DIR": gettempdir()} + + mapped_hash_method = HASH_METHOD_MAPPING.get(webserver_caching_hash_method) + + if mapped_hash_method is None: + raise AirflowConfigException( + f"Unsupported webserver caching hash method: `{webserver_caching_hash_method}`." + ) + + cache_config["CACHE_OPTIONS"] = {"hash_method": mapped_hash_method} + + Cache(app=app, config=cache_config) From e5e2d2b4c3c6574518fbf9e649007ba3da6cd896 Mon Sep 17 00:00:00 2001 From: Michael Ferguson Date: Fri, 20 Jan 2023 17:12:30 -0600 Subject: [PATCH 3/9] - Added hashlib_wrapper and switched serialized dag to use it - Fixed unintended change to config.yml --- airflow/compat/hashlib_wrapper.py | 41 ++++++++++++++++++++ airflow/config_templates/config.yml | 2 +- airflow/config_templates/default_airflow.cfg | 4 +- airflow/models/serialized_dag.py | 4 +- 4 files changed, 47 insertions(+), 4 deletions(-) create mode 100644 airflow/compat/hashlib_wrapper.py diff --git a/airflow/compat/hashlib_wrapper.py b/airflow/compat/hashlib_wrapper.py new file mode 100644 index 0000000000000..06a7c073ef9b7 --- /dev/null +++ b/airflow/compat/hashlib_wrapper.py @@ -0,0 +1,41 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import hashlib +import inspect +from typing import Any + +# Check if "usedforsecurity" is available for hashlib +sig = inspect.getfullargspec(hashlib.new) +HAS_USEDFORSECURITY = "usedforsecurity" in sig.kwonlyargs + + +def md5(data: Any, used_for_security: bool = None): + """Safely allows calling the hashlib.md5 function with the "usedforsecurity" param. + + Args: + data (Any): The data to hash. + used_for_security (bool, optional): The value to pass to the md5 function's "usedforsecurity" param. Defaults to None. + + Returns: + _Hash: The hashed value. + """ + if HAS_USEDFORSECURITY and used_for_security is not None: + return hashlib.md5(data, usedforsecurity=used_for_security) + else: + return hashlib.md5(data) diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index dbbf26d8645c4..11742499062ae 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -743,7 +743,7 @@ logging: version_added: 2.0.0 type: string example: ~ - default: | + default: "dag_id={{{{ ti.dag_id }}}}/run_id={{{{ ti.run_id }}}}/task_id={{{{ ti.task_id }}}}/\ {{%% if ti.map_index >= 0 %%}}map_index={{{{ ti.map_index }}}}/{{%% endif %%}}\ attempt={{{{ try_number }}}}.log" diff --git a/airflow/config_templates/default_airflow.cfg b/airflow/config_templates/default_airflow.cfg index 92f3b127556b8..ef57cb1d11000 100644 --- a/airflow/config_templates/default_airflow.cfg +++ b/airflow/config_templates/default_airflow.cfg @@ -559,7 +559,9 @@ maximum_page_limit = 100 # If no limit is supplied, the OpenApi spec default is used. fallback_page_limit = 100 -# The intended audience for JWT token credentials used for authorization. This value must match on the client and server sides. If empty, audience will not be tested. +# The intended audience for JWT token credentials used for authorization. +# This value must match on the client and server sides. +# If empty, audience will not be tested. # Example: google_oauth2_audience = project-id-random-value.apps.googleusercontent.com google_oauth2_audience = diff --git a/airflow/models/serialized_dag.py b/airflow/models/serialized_dag.py index ddba2faee340d..3592008e98ac3 100644 --- a/airflow/models/serialized_dag.py +++ b/airflow/models/serialized_dag.py @@ -18,7 +18,6 @@ """Serialized DAG table in database.""" from __future__ import annotations -import hashlib import logging import zlib from datetime import datetime, timedelta @@ -28,6 +27,7 @@ from sqlalchemy.orm import Session, backref, foreign, relationship from sqlalchemy.sql.expression import func, literal +import airflow.compat.hashlib_wrapper as hashlib_wrapper from airflow.models.base import ID_LEN, Base from airflow.models.dag import DAG, DagModel from airflow.models.dagcode import DagCode @@ -102,7 +102,7 @@ def __init__(self, dag: DAG, processor_subdir: str | None = None): dag_data = SerializedDAG.to_dict(dag) dag_data_json = json.dumps(dag_data, sort_keys=True).encode("utf-8") - self.dag_hash = hashlib.new("md5", data=dag_data_json, usedforsecurity=False).hexdigest() + self.dag_hash = hashlib_wrapper.md5(dag_data_json, False).hexdigest() if COMPRESS_SERIALIZED_DAGS: self._data = None From 593a976f4c443a37c994e897c069c76c01832746 Mon Sep 17 00:00:00 2001 From: Michael Ferguson Date: Mon, 23 Jan 2023 15:33:43 -0600 Subject: [PATCH 4/9] - Switched check for usedforsecurity, which only worked in certain cases - Added ignore for mypy --- airflow/compat/hashlib_wrapper.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/airflow/compat/hashlib_wrapper.py b/airflow/compat/hashlib_wrapper.py index 06a7c073ef9b7..015716a741c17 100644 --- a/airflow/compat/hashlib_wrapper.py +++ b/airflow/compat/hashlib_wrapper.py @@ -15,27 +15,31 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from __future__ import annotations import hashlib -import inspect +import sys from typing import Any # Check if "usedforsecurity" is available for hashlib -sig = inspect.getfullargspec(hashlib.new) -HAS_USEDFORSECURITY = "usedforsecurity" in sig.kwonlyargs +if sys.version_info >= (3, 9): + HAS_USEDFORSECURITY = True +else: + HAS_USEDFORSECURITY = False -def md5(data: Any, used_for_security: bool = None): +def md5(data: Any, used_for_security: bool | None = None): """Safely allows calling the hashlib.md5 function with the "usedforsecurity" param. Args: data (Any): The data to hash. - used_for_security (bool, optional): The value to pass to the md5 function's "usedforsecurity" param. Defaults to None. + used_for_security (bool, optional): The value to pass to the md5 function's "usedforsecurity" param. + Defaults to None. Returns: _Hash: The hashed value. """ if HAS_USEDFORSECURITY and used_for_security is not None: - return hashlib.md5(data, usedforsecurity=used_for_security) + return hashlib.md5(data, usedforsecurity=used_for_security) # type: ignore else: return hashlib.md5(data) From 2815baf31ae809514b9579028dda3208e2912c9c Mon Sep 17 00:00:00 2001 From: Michael Ferguson Date: Sun, 29 Jan 2023 13:50:56 -0600 Subject: [PATCH 5/9] Revert unrelated config formatting --- airflow/config_templates/config.yml | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index 11742499062ae..3502d6429aa00 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -16,6 +16,7 @@ # under the License. --- + core: description: ~ options: @@ -270,8 +271,8 @@ core: default: "0" default_task_retry_delay: description: | - The number of seconds each task is going to wait by default between retries. Can be overridden at - dag or task level. + The number of seconds each task is going to wait by default between retries. Can be overridden at + dag or task level. version_added: 2.4.0 type: integer example: ~ @@ -418,7 +419,7 @@ core: version_added: 2.4.0 type: string default: ~ - example: "airflow.datasets.manager.DatasetManager" + example: 'airflow.datasets.manager.DatasetManager' dataset_manager_kwargs: description: Kwargs to supply to dataset manager. version_added: 2.4.0 @@ -437,7 +438,7 @@ core: version_added: 2.6.0 type: string default: ~ - example: "http://localhost:8080" + example: 'http://localhost:8080' database: description: ~ options: @@ -743,10 +744,9 @@ logging: version_added: 2.0.0 type: string example: ~ - default: - "dag_id={{{{ ti.dag_id }}}}/run_id={{{{ ti.run_id }}}}/task_id={{{{ ti.task_id }}}}/\ - {{%% if ti.map_index >= 0 %%}}map_index={{{{ ti.map_index }}}}/{{%% endif %%}}\ - attempt={{{{ try_number }}}}.log" + default: "dag_id={{{{ ti.dag_id }}}}/run_id={{{{ ti.run_id }}}}/task_id={{{{ ti.task_id }}}}/\ + {{%% if ti.map_index >= 0 %%}}map_index={{{{ ti.map_index }}}}/{{%% endif %%}}\ + attempt={{{{ try_number }}}}.log" log_processor_filename_template: description: | Formatting for how airflow generates file names for log @@ -1020,8 +1020,7 @@ api: version_added: 2.0.0 default: "100" google_oauth2_audience: - description: | - The intended audience for JWT token credentials used for authorization. + description: The intended audience for JWT token credentials used for authorization. This value must match on the client and server sides. If empty, audience will not be tested. type: string @@ -2663,4 +2662,4 @@ sensors: version_added: 2.3.0 type: float example: ~ - default: "604800" + default: "604800" \ No newline at end of file From 294bbf85cb1864a2d7fcf3a621b4b612dba77856 Mon Sep 17 00:00:00 2001 From: Michael Ferguson Date: Sun, 29 Jan 2023 13:51:14 -0600 Subject: [PATCH 6/9] Include newsfragment for PR --- newsfragments/28846.misc.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/28846.misc.rst diff --git a/newsfragments/28846.misc.rst b/newsfragments/28846.misc.rst new file mode 100644 index 0000000000000..4a8cc49e87a77 --- /dev/null +++ b/newsfragments/28846.misc.rst @@ -0,0 +1 @@ +Various updates for FIPS-compliance when running Airflow in Python 3.9+. This includes a new webserver option, ``caching_hash_method``, for changing the default flask caching method. \ No newline at end of file From b611fe830708c716a2646d7c1753b5ffb0aa55cc Mon Sep 17 00:00:00 2001 From: Michael Ferguson Date: Mon, 30 Jan 2023 14:10:03 -0600 Subject: [PATCH 7/9] Update hashlib_wrapper params and docs --- airflow/compat/hashlib_wrapper.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/airflow/compat/hashlib_wrapper.py b/airflow/compat/hashlib_wrapper.py index 015716a741c17..4e5600f2962f9 100644 --- a/airflow/compat/hashlib_wrapper.py +++ b/airflow/compat/hashlib_wrapper.py @@ -19,7 +19,6 @@ import hashlib import sys -from typing import Any # Check if "usedforsecurity" is available for hashlib if sys.version_info >= (3, 9): @@ -28,18 +27,17 @@ HAS_USEDFORSECURITY = False -def md5(data: Any, used_for_security: bool | None = None): - """Safely allows calling the hashlib.md5 function with the "usedforsecurity" param. - - Args: - data (Any): The data to hash. - used_for_security (bool, optional): The value to pass to the md5 function's "usedforsecurity" param. - Defaults to None. +def md5(data: bytes, usedforsecurity: bool | None = None): + """ + Safely allows calling the hashlib.md5 function with the "usedforsecurity" param. - Returns: - _Hash: The hashed value. + :param data: The data to hash. + :param used_for_security: The value to pass to the md5 function's "usedforsecurity" param. + Defaults to None. + :return: The hashed value. + :rtype: _Hash """ - if HAS_USEDFORSECURITY and used_for_security is not None: - return hashlib.md5(data, usedforsecurity=used_for_security) # type: ignore + if HAS_USEDFORSECURITY and usedforsecurity is not None: + return hashlib.md5(data, usedforsecurity=usedforsecurity) # type: ignore else: return hashlib.md5(data) From 00cc3d16f35ccf4aeb242c9a344af9581f588576 Mon Sep 17 00:00:00 2001 From: Michael Ferguson Date: Mon, 30 Jan 2023 14:18:37 -0600 Subject: [PATCH 8/9] Update config documentation and run pre-commit --- airflow/config_templates/config.yml | 4 ++-- airflow/config_templates/default_airflow.cfg | 6 ++---- newsfragments/28846.misc.rst | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index 3502d6429aa00..105a8f8a2a5b2 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -1593,7 +1593,7 @@ webserver: default: "False" caching_hash_method: description: | - The caching algorithm used by the webserver. + The caching algorithm used by the webserver. Must be a valid hashlib function name. version_added: type: string example: "sha256" @@ -2662,4 +2662,4 @@ sensors: version_added: 2.3.0 type: float example: ~ - default: "604800" \ No newline at end of file + default: "604800" diff --git a/airflow/config_templates/default_airflow.cfg b/airflow/config_templates/default_airflow.cfg index ef57cb1d11000..a0c1cfbc3011c 100644 --- a/airflow/config_templates/default_airflow.cfg +++ b/airflow/config_templates/default_airflow.cfg @@ -559,9 +559,7 @@ maximum_page_limit = 100 # If no limit is supplied, the OpenApi spec default is used. fallback_page_limit = 100 -# The intended audience for JWT token credentials used for authorization. -# This value must match on the client and server sides. -# If empty, audience will not be tested. +# The intended audience for JWT token credentials used for authorization. This value must match on the client and server sides. If empty, audience will not be tested. # Example: google_oauth2_audience = project-id-random-value.apps.googleusercontent.com google_oauth2_audience = @@ -820,7 +818,7 @@ enable_swagger_ui = True # Boolean for running Internal API in the webserver. run_internal_api = False -# The caching algorithm used by the webserver. +# The caching algorithm used by the webserver. Must be a valid hashlib function name. # Example: caching_hash_method = sha256 caching_hash_method = md5 diff --git a/newsfragments/28846.misc.rst b/newsfragments/28846.misc.rst index 4a8cc49e87a77..14d072c8779d4 100644 --- a/newsfragments/28846.misc.rst +++ b/newsfragments/28846.misc.rst @@ -1 +1 @@ -Various updates for FIPS-compliance when running Airflow in Python 3.9+. This includes a new webserver option, ``caching_hash_method``, for changing the default flask caching method. \ No newline at end of file +Various updates for FIPS-compliance when running Airflow in Python 3.9+. This includes a new webserver option, ``caching_hash_method``, for changing the default flask caching method. From fc0b3e426ad0215b99aa3c2cd7eb08dd17693158 Mon Sep 17 00:00:00 2001 From: Michael Ferguson Date: Sat, 4 Feb 2023 17:52:39 -0600 Subject: [PATCH 9/9] Move and update hashlib_wrapper Moved hashlib_wrapper from compat to utils. Used PY39 variable. --- airflow/models/serialized_dag.py | 4 ++-- airflow/{compat => utils}/hashlib_wrapper.py | 13 ++++--------- 2 files changed, 6 insertions(+), 11 deletions(-) rename airflow/{compat => utils}/hashlib_wrapper.py (75%) diff --git a/airflow/models/serialized_dag.py b/airflow/models/serialized_dag.py index 3592008e98ac3..3c3a299b2cfbd 100644 --- a/airflow/models/serialized_dag.py +++ b/airflow/models/serialized_dag.py @@ -27,7 +27,6 @@ from sqlalchemy.orm import Session, backref, foreign, relationship from sqlalchemy.sql.expression import func, literal -import airflow.compat.hashlib_wrapper as hashlib_wrapper from airflow.models.base import ID_LEN, Base from airflow.models.dag import DAG, DagModel from airflow.models.dagcode import DagCode @@ -35,6 +34,7 @@ from airflow.serialization.serialized_objects import DagDependency, SerializedDAG from airflow.settings import COMPRESS_SERIALIZED_DAGS, MIN_SERIALIZED_DAG_UPDATE_INTERVAL, json from airflow.utils import timezone +from airflow.utils.hashlib_wrapper import md5 from airflow.utils.session import provide_session from airflow.utils.sqlalchemy import UtcDateTime @@ -102,7 +102,7 @@ def __init__(self, dag: DAG, processor_subdir: str | None = None): dag_data = SerializedDAG.to_dict(dag) dag_data_json = json.dumps(dag_data, sort_keys=True).encode("utf-8") - self.dag_hash = hashlib_wrapper.md5(dag_data_json, False).hexdigest() + self.dag_hash = md5(dag_data_json, usedforsecurity=False).hexdigest() if COMPRESS_SERIALIZED_DAGS: self._data = None diff --git a/airflow/compat/hashlib_wrapper.py b/airflow/utils/hashlib_wrapper.py similarity index 75% rename from airflow/compat/hashlib_wrapper.py rename to airflow/utils/hashlib_wrapper.py index 4e5600f2962f9..0f756c0046261 100644 --- a/airflow/compat/hashlib_wrapper.py +++ b/airflow/utils/hashlib_wrapper.py @@ -18,26 +18,21 @@ from __future__ import annotations import hashlib -import sys -# Check if "usedforsecurity" is available for hashlib -if sys.version_info >= (3, 9): - HAS_USEDFORSECURITY = True -else: - HAS_USEDFORSECURITY = False +from airflow import PY39 -def md5(data: bytes, usedforsecurity: bool | None = None): +def md5(data: bytes, *, usedforsecurity: bool | None = None): """ Safely allows calling the hashlib.md5 function with the "usedforsecurity" param. :param data: The data to hash. - :param used_for_security: The value to pass to the md5 function's "usedforsecurity" param. + :param usedforsecurity: The value to pass to the md5 function's "usedforsecurity" param. Defaults to None. :return: The hashed value. :rtype: _Hash """ - if HAS_USEDFORSECURITY and usedforsecurity is not None: + if PY39 and usedforsecurity is not None: return hashlib.md5(data, usedforsecurity=usedforsecurity) # type: ignore else: return hashlib.md5(data)