From 1d6ce26c004e2c79ed399209e1f071c095338edf Mon Sep 17 00:00:00 2001 From: pierrejeambrun Date: Wed, 21 May 2025 11:05:04 +0200 Subject: [PATCH 1/3] Move secret_key config to api section --- .../logging-monitoring/logging-tasks.rst | 2 +- .../docs/cli-and-env-variables-ref.rst | 2 +- airflow-core/docs/configurations-ref.rst | 2 +- airflow-core/docs/howto/set-config.rst | 4 +-- .../src/airflow/api_fastapi/core_api/app.py | 2 +- .../api_fastapi/core_api/datamodels/dags.py | 2 +- .../airflow/cli/commands/config_command.py | 4 +++ .../src/airflow/config_templates/config.yml | 28 +++++++++---------- airflow-core/src/airflow/configuration.py | 1 + .../airflow/utils/log/file_task_handler.py | 2 +- airflow-core/src/airflow/utils/serve_logs.py | 2 +- .../tests/unit/core/test_configuration.py | 13 +++++---- contributing-docs/testing/k8s_tests.rst | 2 +- .../src/airflow_breeze/params/shell_params.py | 2 +- .../src/tests_common/pytest_plugin.py | 4 +-- .../fab/src/airflow/providers/fab/www/app.py | 2 +- 16 files changed, 41 insertions(+), 33 deletions(-) diff --git a/airflow-core/docs/administration-and-deployment/logging-monitoring/logging-tasks.rst b/airflow-core/docs/administration-and-deployment/logging-monitoring/logging-tasks.rst index 81def9b50cdd9..d506082aad996 100644 --- a/airflow-core/docs/administration-and-deployment/logging-monitoring/logging-tasks.rst +++ b/airflow-core/docs/administration-and-deployment/logging-monitoring/logging-tasks.rst @@ -178,7 +178,7 @@ Most task handlers send logs upon completion of a task. In order to view logs in In triggerer, logs are served unless the service is started with option ``--skip-serve-logs``. The server is running on the port specified by ``worker_log_server_port`` option in ``[logging]`` section, and option ``triggerer_log_server_port`` for triggerer. Defaults are 8793 and 8794, respectively. -Communication between the webserver and the worker is signed with the key specified by ``secret_key`` option in ``[webserver]`` section. You must ensure that the key matches so that communication can take place without problems. +Communication between the webserver and the worker is signed with the key specified by ``secret_key`` option in ``[api]`` section. You must ensure that the key matches so that communication can take place without problems. We are using `Gunicorn `__ as a WSGI server. Its configuration options can be overridden with the ``GUNICORN_CMD_ARGS`` env variable. For details, see `Gunicorn settings `__. diff --git a/airflow-core/docs/cli-and-env-variables-ref.rst b/airflow-core/docs/cli-and-env-variables-ref.rst index b679bc7b7986c..6336396911baf 100644 --- a/airflow-core/docs/cli-and-env-variables-ref.rst +++ b/airflow-core/docs/cli-and-env-variables-ref.rst @@ -77,7 +77,7 @@ Environment Variables * ``result_backend`` in ``[celery]`` section * ``password`` in ``[atlas]`` section * ``smtp_password`` in ``[smtp]`` section -* ``secret_key`` in ``[webserver]`` section +* ``secret_key`` in ``[api]`` section .. envvar:: AIRFLOW__{SECTION}__{KEY}_SECRET diff --git a/airflow-core/docs/configurations-ref.rst b/airflow-core/docs/configurations-ref.rst index e67b8e39ce2a5..83c5d8a8ed51a 100644 --- a/airflow-core/docs/configurations-ref.rst +++ b/airflow-core/docs/configurations-ref.rst @@ -24,7 +24,7 @@ can set in ``airflow.cfg`` file or using environment variables. Use the same configuration across all the Airflow components. While each component does not require all, some configurations need to be same otherwise they would not -work as expected. A good example for that is :ref:`secret_key` which +work as expected. A good example for that is :ref:`secret_key` which should be same on the Webserver and Worker to allow Webserver to fetch logs from Worker. The webserver key is also used to authorize requests to Celery workers when logs are retrieved. The token diff --git a/airflow-core/docs/howto/set-config.rst b/airflow-core/docs/howto/set-config.rst index 60b92f29c08ba..13a46e15f57e7 100644 --- a/airflow-core/docs/howto/set-config.rst +++ b/airflow-core/docs/howto/set-config.rst @@ -105,7 +105,7 @@ The following config options support this ``_cmd`` and ``_secret`` version: * ``result_backend`` in ``[celery]`` section * ``password`` in ``[atlas]`` section * ``smtp_password`` in ``[smtp]`` section -* ``secret_key`` in ``[webserver]`` section +* ``secret_key`` in ``[api]`` section The ``_cmd`` config options can also be set using a corresponding environment variable the same way the usual config options can. For example: @@ -159,7 +159,7 @@ the example below. .. note:: Use the same configuration across all the Airflow components. While each component does not require all, some configurations need to be same otherwise they would not - work as expected. A good example for that is :ref:`secret_key` which + work as expected. A good example for that is :ref:`secret_key` which should be same on the Webserver and Worker to allow Webserver to fetch logs from Worker. The webserver key is also used to authorize requests to Celery workers when logs are retrieved. The token diff --git a/airflow-core/src/airflow/api_fastapi/core_api/app.py b/airflow-core/src/airflow/api_fastapi/core_api/app.py index 90327ecc26677..d378e009bc0c9 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/app.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/app.py @@ -155,7 +155,7 @@ def init_config(app: FastAPI) -> None: # and 9 (slowest, most compression) app.add_middleware(GZipMiddleware, minimum_size=1024, compresslevel=5) - app.state.secret_key = get_signing_key("webserver", "secret_key") + app.state.secret_key = get_signing_key("api", "secret_key") def init_error_handlers(app: FastAPI) -> None: diff --git a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dags.py b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dags.py index f17b3d943f99c..c7d3c5b4b9004 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dags.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dags.py @@ -107,7 +107,7 @@ def get_timetable_summary(cls, tts: str | None) -> str | None: @property def file_token(self) -> str: """Return file token.""" - serializer = URLSafeSerializer(conf.get_mandatory_value("webserver", "secret_key")) + serializer = URLSafeSerializer(conf.get_mandatory_value("api", "secret_key")) payload = { "bundle_name": self.bundle_name, "relative_fileloc": self.relative_fileloc, diff --git a/airflow-core/src/airflow/cli/commands/config_command.py b/airflow-core/src/airflow/cli/commands/config_command.py index e69411111aff0..77ef371ff1212 100644 --- a/airflow-core/src/airflow/cli/commands/config_command.py +++ b/airflow-core/src/airflow/cli/commands/config_command.py @@ -410,6 +410,10 @@ def message(self) -> str | None: config=ConfigParameter("webserver", "base_url"), renamed_to=ConfigParameter("api", "base_url"), ), + ConfigChange( + config=ConfigParameter("webserver", "secret_key"), + renamed_to=ConfigParameter("api", "secret_key"), + ), ConfigChange( config=ConfigParameter("webserver", "web_server_host"), renamed_to=ConfigParameter("api", "host"), diff --git a/airflow-core/src/airflow/config_templates/config.yml b/airflow-core/src/airflow/config_templates/config.yml index 67a2b8a99b411..02137117bb658 100644 --- a/airflow-core/src/airflow/config_templates/config.yml +++ b/airflow-core/src/airflow/config_templates/config.yml @@ -1284,6 +1284,20 @@ secrets: api: description: ~ options: + secret_key: + description: | + Secret key used to run your api server. It should be as random as possible. However, when running + more than 1 instances of the api, make sure all of them use the same ``secret_key`` otherwise + one of them will error with "CSRF session token is missing". + The api key is also used to authorize requests to Celery workers when logs are retrieved. + The token generated using the secret key has a short expiry time though - make sure that time on + ALL the machines that you run airflow components on is synchronized (for example using ntpd) + otherwise you might get "forbidden" errors when the logs are accessed. + version_added: ~ + type: string + sensitive: true + example: ~ + default: "{SECRET_KEY}" expose_config: description: | Expose the configuration file in the web server. Set to ``non-sensitive-only`` to show all values @@ -1748,20 +1762,6 @@ operators: webserver: description: ~ options: - secret_key: - description: | - Secret key used to run your api server. It should be as random as possible. However, when running - more than 1 instances of webserver, make sure all of them use the same ``secret_key`` otherwise - one of them will error with "CSRF session token is missing". - The webserver key is also used to authorize requests to Celery workers when logs are retrieved. - The token generated using the secret key has a short expiry time though - make sure that time on - ALL the machines that you run airflow components on is synchronized (for example using ntpd) - otherwise you might get "forbidden" errors when the logs are accessed. - version_added: ~ - type: string - sensitive: true - example: ~ - default: "{SECRET_KEY}" instance_name: description: | Sets a custom page title for the DAGs overview page and site title for all pages diff --git a/airflow-core/src/airflow/configuration.py b/airflow-core/src/airflow/configuration.py index 9e896884a6a6c..5ec5bf770a18e 100644 --- a/airflow-core/src/airflow/configuration.py +++ b/airflow-core/src/airflow/configuration.py @@ -362,6 +362,7 @@ def sensitive_config_values(self) -> set[tuple[str, str]]: ("fab", "navbar_text_color"): ("webserver", "navbar_text_color", "3.0.2"), ("fab", "navbar_hover_color"): ("webserver", "navbar_hover_color", "3.0.2"), ("fab", "navbar_text_hover_color"): ("webserver", "navbar_text_hover_color", "3.0.2"), + ("api", "secret_key"): ("webserver", "secret_key", "3.0.2"), ("api", "grid_view_sorting_order"): ("webserver", "grid_view_sorting_order", "3.1.0"), ("api", "log_fetch_timeout_sec"): ("webserver", "log_fetch_timeout_sec", "3.1.0"), ("api", "hide_paused_dags_by_default"): ("webserver", "hide_paused_dags_by_default", "3.1.0"), diff --git a/airflow-core/src/airflow/utils/log/file_task_handler.py b/airflow-core/src/airflow/utils/log/file_task_handler.py index 4c87b9bf07fd4..8d6b47953c424 100644 --- a/airflow-core/src/airflow/utils/log/file_task_handler.py +++ b/airflow-core/src/airflow/utils/log/file_task_handler.py @@ -109,7 +109,7 @@ def _fetch_logs_from_service(url, log_relative_path): timeout = conf.getint("api", "log_fetch_timeout_sec", fallback=None) generator = JWTGenerator( - secret_key=get_signing_key("webserver", "secret_key"), + secret_key=get_signing_key("api", "secret_key"), # Since we are using a secret key, we need to be explicit about the algorithm here too algorithm="HS512", private_key=None, diff --git a/airflow-core/src/airflow/utils/serve_logs.py b/airflow-core/src/airflow/utils/serve_logs.py index 635bd0f345d22..899547346b65c 100644 --- a/airflow-core/src/airflow/utils/serve_logs.py +++ b/airflow-core/src/airflow/utils/serve_logs.py @@ -72,7 +72,7 @@ def create_app(): raise ImportError(f"Unable to load {log_config_class} due to error: {e}") signer = JWTValidator( issuer=None, - secret_key=get_signing_key("webserver", "secret_key"), + secret_key=get_signing_key("api", "secret_key"), algorithm="HS512", leeway=leeway, audience="task-instance-logs", diff --git a/airflow-core/tests/unit/core/test_configuration.py b/airflow-core/tests/unit/core/test_configuration.py index 52d0b88b6ae7a..8b64640f93532 100644 --- a/airflow-core/tests/unit/core/test_configuration.py +++ b/airflow-core/tests/unit/core/test_configuration.py @@ -609,8 +609,8 @@ def test_get_section_should_respect_cmd_env_variable(self, tmp_path, monkeypatch cmd_file.write_text("#!/usr/bin/env bash\necho -n difficult_unpredictable_cat_password\n") cmd_file.chmod(0o0555) - monkeypatch.setenv("AIRFLOW__WEBSERVER__SECRET_KEY_CMD", str(cmd_file)) - content = conf.getsection("webserver") + monkeypatch.setenv("AIRFLOW__API__SECRET_KEY_CMD", str(cmd_file)) + content = conf.getsection("api") assert content["secret_key"] == "difficult_unpredictable_cat_password" @pytest.mark.parametrize( @@ -628,11 +628,13 @@ def test_config_value_types(self, key, type): assert isinstance(section_dict[key], type) def test_command_from_env(self): - test_cmdenv_config = textwrap.dedent("""\ + test_cmdenv_config = textwrap.dedent( + """\ [testcmdenv] itsacommand=NOT OK notacommand=OK - """) + """ + ) test_cmdenv_conf = AirflowConfigParser() test_cmdenv_conf.read_string(test_cmdenv_config) test_cmdenv_conf.sensitive_config_values.add(("testcmdenv", "itsacommand")) @@ -1712,7 +1714,7 @@ def test_sensitive_values(): ("database", "sql_alchemy_conn"), ("core", "fernet_key"), ("api_auth", "jwt_secret"), - ("webserver", "secret_key"), + ("api", "secret_key"), ("secrets", "backend_kwargs"), ("sentry", "sentry_dsn"), ("database", "sql_alchemy_engine_args"), @@ -1723,6 +1725,7 @@ def test_sensitive_values(): ("celery", "result_backend"), ("opensearch", "username"), ("opensearch", "password"), + ("webserver", "secret_key"), } all_keys = {(s, k) for s, v in conf.configuration_description.items() for k in v.get("options")} suspected_sensitive = {(s, k) for (s, k) in all_keys if k.endswith(("password", "kwargs"))} diff --git a/contributing-docs/testing/k8s_tests.rst b/contributing-docs/testing/k8s_tests.rst index f05b025c59a68..49fb8e83bd44d 100644 --- a/contributing-docs/testing/k8s_tests.rst +++ b/contributing-docs/testing/k8s_tests.rst @@ -469,7 +469,7 @@ Should show the status of current KinD cluster. unnecessary restarts of your Airflow components. Information on how to set a static webserver secret key can be found here: - https://airflow.apache.org/docs/helm-chart/stable/production-guide.html#webserver-secret-key + https://airflow.apache.org/docs/helm-chart/stable/production-guide.html#api-secret-key Deployed kind-airflow-python-3.9-v1.24.2 with Airflow Helm Chart. Airflow for Python 3.9 and K8S version v1.24.2 has been successfully deployed. diff --git a/dev/breeze/src/airflow_breeze/params/shell_params.py b/dev/breeze/src/airflow_breeze/params/shell_params.py index 57cb299e1b289..9894ae2cc4349 100644 --- a/dev/breeze/src/airflow_breeze/params/shell_params.py +++ b/dev/breeze/src/airflow_breeze/params/shell_params.py @@ -528,7 +528,7 @@ def env_variables_for_docker_commands(self) -> dict[str, str]: "AIRFLOW__CORE__SIMPLE_AUTH_MANAGER_PASSWORDS_FILE", "/opt/airflow/dev/breeze/src/airflow_breeze/files/simple_auth_manager_passwords.json", ) - _set_var(_env, "AIRFLOW__WEBSERVER__SECRET_KEY", b64encode(os.urandom(16)).decode("utf-8")) + _set_var(_env, "AIRFLOW__API__SECRET_KEY", b64encode(os.urandom(16)).decode("utf-8")) if self.executor == EDGE_EXECUTOR: _set_var( _env, diff --git a/devel-common/src/tests_common/pytest_plugin.py b/devel-common/src/tests_common/pytest_plugin.py index 145e287930803..eb2ecc26485ea 100644 --- a/devel-common/src/tests_common/pytest_plugin.py +++ b/devel-common/src/tests_common/pytest_plugin.py @@ -1765,10 +1765,10 @@ def secret_key() -> str: """Return secret key configured.""" from airflow.configuration import conf - the_key = conf.get("webserver", "SECRET_KEY") + the_key = conf.get("api", "SECRET_KEY") if the_key is None: raise RuntimeError( - "The secret key SHOULD be configured as `[webserver] secret_key` in the " + "The secret key SHOULD be configured as `[api] secret_key` in the " "configuration/environment at this stage! " ) return the_key diff --git a/providers/fab/src/airflow/providers/fab/www/app.py b/providers/fab/src/airflow/providers/fab/www/app.py index e2a82a40f3e94..ea7a59b9e4ee6 100644 --- a/providers/fab/src/airflow/providers/fab/www/app.py +++ b/providers/fab/src/airflow/providers/fab/www/app.py @@ -56,7 +56,7 @@ def create_app(enable_plugins: bool): from airflow.providers.fab.auth_manager.fab_auth_manager import FabAuthManager flask_app = Flask(__name__) - flask_app.secret_key = conf.get("webserver", "SECRET_KEY") + flask_app.secret_key = conf.get("api", "SECRET_KEY") flask_app.config["SQLALCHEMY_DATABASE_URI"] = conf.get("database", "SQL_ALCHEMY_CONN") flask_app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False flask_app.config["PERMANENT_SESSION_LIFETIME"] = timedelta(minutes=get_session_lifetime_config()) From bdf92b10df1f830d3e583e3f8b5a39aef59fde8e Mon Sep 17 00:00:00 2001 From: pierrejeambrun Date: Wed, 21 May 2025 13:20:34 +0200 Subject: [PATCH 2/3] Fix CI --- providers/fab/www-hash.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/providers/fab/www-hash.txt b/providers/fab/www-hash.txt index 6e6567d548505..a7c8f31c1fd19 100644 --- a/providers/fab/www-hash.txt +++ b/providers/fab/www-hash.txt @@ -1 +1 @@ -f962d6f103d24518ac6d3b902b0b30652e44f24293142f94f28684ab72e10d64 +ddf8a182739267a948ae36962905b6abe0df984ed041f4fbc0778e24d4a52884 From f0a80df4b53ced29069ff03f79a14a04f7b0dbbb Mon Sep 17 00:00:00 2001 From: pierrejeambrun Date: Wed, 21 May 2025 14:09:50 +0200 Subject: [PATCH 3/3] Fix CI --- providers/fab/www-hash.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/providers/fab/www-hash.txt b/providers/fab/www-hash.txt index a7c8f31c1fd19..5ba1e80f4b528 100644 --- a/providers/fab/www-hash.txt +++ b/providers/fab/www-hash.txt @@ -1 +1 @@ -ddf8a182739267a948ae36962905b6abe0df984ed041f4fbc0778e24d4a52884 +e6d566ed6f8c2b19eda800f6bb53b2bf7b3ede110978d3628e55f1e63f201704