From 34120fea2f83bbbe3f184469bb7edae1ac422448 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Mon, 7 Jul 2025 20:55:13 +0100 Subject: [PATCH 01/33] Refactor bundle view_url to not instaniate bundle on server components This refactor introduces a view_url_template in bundle configuration which is saved in the db and rendered when needed. The url is signed with [api]secret_key for security. Also the view_url_template is also checked for safety before saving to the DB --- airflow-core/docs/img/airflow_erd.sha256 | 2 +- airflow-core/docs/img/airflow_erd.svg | 3304 ++++++++--------- airflow-core/docs/migrations-ref.rst | 8 +- .../core_api/datamodels/dag_versions.py | 16 +- .../airflow/dag_processing/bundles/base.py | 32 +- .../airflow/dag_processing/bundles/manager.py | 119 +- ..._and_template_params_to_dagbundle_model.py | 53 + airflow-core/src/airflow/models/dagbundle.py | 59 + airflow-core/src/airflow/utils/db.py | 2 +- .../bundles/test_dag_bundle_manager.py | 182 +- .../src/airflow/providers/git/bundles/git.py | 43 +- 11 files changed, 2113 insertions(+), 1707 deletions(-) create mode 100644 airflow-core/src/airflow/migrations/versions/0076_3_1_0_add_url_and_template_params_to_dagbundle_model.py diff --git a/airflow-core/docs/img/airflow_erd.sha256 b/airflow-core/docs/img/airflow_erd.sha256 index bb0a75b55cd66..1f03bd8c42947 100644 --- a/airflow-core/docs/img/airflow_erd.sha256 +++ b/airflow-core/docs/img/airflow_erd.sha256 @@ -1 +1 @@ -d2e81695973bf8b6b30e1f4543627547330ef531e50be633cf589fbdf639b0e8 \ No newline at end of file +c5baad84b54b522b8a5a84509ef49580f34585f1289e4e1da6e6c9ee3898d25e \ No newline at end of file diff --git a/airflow-core/docs/img/airflow_erd.svg b/airflow-core/docs/img/airflow_erd.svg index d2ce6af2ab97e..d92180d1a4da9 100644 --- a/airflow-core/docs/img/airflow_erd.svg +++ b/airflow-core/docs/img/airflow_erd.svg @@ -4,11 +4,11 @@ - - + + %3 - + dag_priority_parsing_request @@ -305,2046 +305,1976 @@ asset_alias - -asset_alias - -id - - [INTEGER] - NOT NULL - -group - - [VARCHAR(1500)] - NOT NULL - -name - - [VARCHAR(1500)] - NOT NULL + +asset_alias + +id + + [INTEGER] + NOT NULL + +group + + [VARCHAR(1500)] + NOT NULL + +name + + [VARCHAR(1500)] + NOT NULL asset_alias_asset - -asset_alias_asset - -alias_id - - [INTEGER] - NOT NULL - -asset_id - - [INTEGER] - NOT NULL + +asset_alias_asset + +alias_id + + [INTEGER] + NOT NULL + +asset_id + + [INTEGER] + NOT NULL asset_alias--asset_alias_asset - -0..N -1 + +0..N +1 asset_alias_asset_event - -asset_alias_asset_event - -alias_id - - [INTEGER] - NOT NULL - -event_id - - [INTEGER] - NOT NULL + +asset_alias_asset_event + +alias_id + + [INTEGER] + NOT NULL + +event_id + + [INTEGER] + NOT NULL asset_alias--asset_alias_asset_event - -0..N -1 + +0..N +1 dag_schedule_asset_alias_reference - -dag_schedule_asset_alias_reference - -alias_id - - [INTEGER] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL + +dag_schedule_asset_alias_reference + +alias_id + + [INTEGER] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL asset_alias--dag_schedule_asset_alias_reference - -0..N -1 + +0..N +1 asset - -asset - -id - - [INTEGER] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -extra - - [JSON] - NOT NULL - -group - - [VARCHAR(1500)] - NOT NULL - -name - - [VARCHAR(1500)] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL - -uri - - [VARCHAR(1500)] - NOT NULL + +asset + +id + + [INTEGER] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +extra + + [JSON] + NOT NULL + +group + + [VARCHAR(1500)] + NOT NULL + +name + + [VARCHAR(1500)] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL + +uri + + [VARCHAR(1500)] + NOT NULL asset--asset_alias_asset - -0..N -1 + +0..N +1 asset_trigger - -asset_trigger - -asset_id - - [INTEGER] - NOT NULL - -trigger_id - - [INTEGER] - NOT NULL + +asset_trigger + +asset_id + + [INTEGER] + NOT NULL + +trigger_id + + [INTEGER] + NOT NULL asset--asset_trigger - -0..N -1 + +0..N +1 asset_active - -asset_active - -name - - [VARCHAR(1500)] - NOT NULL - -uri - - [VARCHAR(1500)] - NOT NULL + +asset_active + +name + + [VARCHAR(1500)] + NOT NULL + +uri + + [VARCHAR(1500)] + NOT NULL asset--asset_active - -1 -1 + +1 +1 asset--asset_active - -1 -1 + +1 +1 dag_schedule_asset_reference - -dag_schedule_asset_reference - -asset_id - - [INTEGER] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL + +dag_schedule_asset_reference + +asset_id + + [INTEGER] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL asset--dag_schedule_asset_reference - -0..N -1 + +0..N +1 task_outlet_asset_reference - -task_outlet_asset_reference - -asset_id - - [INTEGER] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL + +task_outlet_asset_reference + +asset_id + + [INTEGER] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL asset--task_outlet_asset_reference - -0..N -1 + +0..N +1 task_inlet_asset_reference - -task_inlet_asset_reference - -asset_id - - [INTEGER] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL + +task_inlet_asset_reference + +asset_id + + [INTEGER] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL asset--task_inlet_asset_reference - -0..N -1 + +0..N +1 asset_dag_run_queue - -asset_dag_run_queue - -asset_id - - [INTEGER] - NOT NULL - -target_dag_id - - [VARCHAR(250)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL + +asset_dag_run_queue + +asset_id + + [INTEGER] + NOT NULL + +target_dag_id + + [VARCHAR(250)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL asset--asset_dag_run_queue - -0..N -1 + +0..N +1 asset_event - -asset_event - -id - - [INTEGER] - NOT NULL - -asset_id - - [INTEGER] - NOT NULL - -extra - - [JSON] - NOT NULL - -source_dag_id - - [VARCHAR(250)] - -source_map_index - - [INTEGER] - -source_run_id - - [VARCHAR(250)] - -source_task_id - - [VARCHAR(250)] - -timestamp - - [TIMESTAMP] - NOT NULL + +asset_event + +id + + [INTEGER] + NOT NULL + +asset_id + + [INTEGER] + NOT NULL + +extra + + [JSON] + NOT NULL + +source_dag_id + + [VARCHAR(250)] + +source_map_index + + [INTEGER] + +source_run_id + + [VARCHAR(250)] + +source_task_id + + [VARCHAR(250)] + +timestamp + + [TIMESTAMP] + NOT NULL asset_event--asset_alias_asset_event - -0..N -1 + +0..N +1 dagrun_asset_event - -dagrun_asset_event - -dag_run_id - - [INTEGER] - NOT NULL - -event_id - - [INTEGER] - NOT NULL + +dagrun_asset_event + +dag_run_id + + [INTEGER] + NOT NULL + +event_id + + [INTEGER] + NOT NULL asset_event--dagrun_asset_event - -0..N -1 + +0..N +1 trigger - -trigger - -id - - [INTEGER] - NOT NULL - -classpath - - [VARCHAR(1000)] - NOT NULL - -created_date - - [TIMESTAMP] - NOT NULL - -kwargs - - [TEXT] - NOT NULL - -triggerer_id - - [INTEGER] + +trigger + +id + + [INTEGER] + NOT NULL + +classpath + + [VARCHAR(1000)] + NOT NULL + +created_date + + [TIMESTAMP] + NOT NULL + +kwargs + + [TEXT] + NOT NULL + +triggerer_id + + [INTEGER] trigger--asset_trigger - -0..N -1 + +0..N +1 task_instance - -task_instance - -id - - [UUID] - NOT NULL - -context_carrier - - [JSONB] - -custom_operator_name - - [VARCHAR(1000)] - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_version_id - - [UUID] - NOT NULL - -duration - - [DOUBLE_PRECISION] - -end_date - - [TIMESTAMP] - -executor - - [VARCHAR(1000)] - -executor_config - - [BYTEA] - -external_executor_id - - [VARCHAR(250)] - -hostname - - [VARCHAR(1000)] - -last_heartbeat_at - - [TIMESTAMP] - -map_index - - [INTEGER] - NOT NULL - -max_tries - - [INTEGER] - -next_kwargs - - [JSONB] - -next_method - - [VARCHAR(1000)] - -operator - - [VARCHAR(1000)] - -pid - - [INTEGER] - -pool - - [VARCHAR(256)] - NOT NULL - -pool_slots - - [INTEGER] - NOT NULL - -priority_weight - - [INTEGER] - -queue - - [VARCHAR(256)] - -queued_by_job_id - - [INTEGER] - -queued_dttm - - [TIMESTAMP] - -rendered_map_index - - [VARCHAR(250)] - -run_id - - [VARCHAR(250)] - NOT NULL - -scheduled_dttm - - [TIMESTAMP] - -span_status - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(20)] - -task_display_name - - [VARCHAR(2000)] - -task_id - - [VARCHAR(250)] - NOT NULL - -trigger_id - - [INTEGER] - -trigger_timeout - - [TIMESTAMP] - -try_number - - [INTEGER] - -unixname - - [VARCHAR(1000)] - -updated_at - - [TIMESTAMP] + +task_instance + +id + + [UUID] + NOT NULL + +context_carrier + + [JSONB] + +custom_operator_name + + [VARCHAR(1000)] + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_version_id + + [UUID] + +duration + + [DOUBLE_PRECISION] + +end_date + + [TIMESTAMP] + +executor + + [VARCHAR(1000)] + +executor_config + + [BYTEA] + +external_executor_id + + [VARCHAR(250)] + +hostname + + [VARCHAR(1000)] + +last_heartbeat_at + + [TIMESTAMP] + +map_index + + [INTEGER] + NOT NULL + +max_tries + + [INTEGER] + +next_kwargs + + [JSONB] + +next_method + + [VARCHAR(1000)] + +operator + + [VARCHAR(1000)] + +pid + + [INTEGER] + +pool + + [VARCHAR(256)] + NOT NULL + +pool_slots + + [INTEGER] + NOT NULL + +priority_weight + + [INTEGER] + +queue + + [VARCHAR(256)] + +queued_by_job_id + + [INTEGER] + +queued_dttm + + [TIMESTAMP] + +rendered_map_index + + [VARCHAR(250)] + +run_id + + [VARCHAR(250)] + NOT NULL + +scheduled_dttm + + [TIMESTAMP] + +span_status + + [VARCHAR(250)] + NOT NULL + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(20)] + +task_display_name + + [VARCHAR(2000)] + +task_id + + [VARCHAR(250)] + NOT NULL + +trigger_id + + [INTEGER] + +trigger_timeout + + [TIMESTAMP] + +try_number + + [INTEGER] + +unixname + + [VARCHAR(1000)] + +updated_at + + [TIMESTAMP] trigger--task_instance - -0..N -{0,1} - - - -deadline - -deadline - -id - - [UUID] - NOT NULL - -callback - - [VARCHAR(500)] - NOT NULL - -callback_kwargs - - [JSON] - -callback_state - - [VARCHAR(20)] - -dag_id - - [VARCHAR(250)] - -dagrun_id - - [INTEGER] - -deadline_time - - [TIMESTAMP] - NOT NULL - -trigger_id - - [INTEGER] - - - -trigger--deadline - -0..N -{0,1} - - - -hitl_detail - -hitl_detail - -ti_id - - [UUID] - NOT NULL - -body - - [TEXT] - -chosen_options - - [JSON] - -defaults - - [JSON] - -multiple - - [BOOLEAN] - -options - - [JSON] - NOT NULL - -params - - [JSON] - NOT NULL - -params_input - - [JSON] - NOT NULL - -response_at - - [TIMESTAMP] - -subject - - [TEXT] - NOT NULL - -user_id - - [VARCHAR(128)] - - - -task_instance--hitl_detail - -1 -1 + +0..N +{0,1} - + task_map - -task_map - -dag_id - - [VARCHAR(250)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -keys - - [JSONB] - -length - - [INTEGER] - NOT NULL + +task_map + +dag_id + + [VARCHAR(250)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +keys + + [JSONB] + +length + + [INTEGER] + NOT NULL - + task_instance--task_map - -0..N -1 + +0..N +1 - + task_instance--task_map - -0..N -1 + +0..N +1 - + task_instance--task_map - -0..N -1 + +0..N +1 - + task_instance--task_map - -0..N -1 + +0..N +1 - + task_reschedule - -task_reschedule - -id - - [INTEGER] - NOT NULL - -duration - - [INTEGER] - NOT NULL - -end_date - - [TIMESTAMP] - NOT NULL - -reschedule_date - - [TIMESTAMP] - NOT NULL - -start_date - - [TIMESTAMP] - NOT NULL - -ti_id - - [UUID] - NOT NULL + +task_reschedule + +id + + [INTEGER] + NOT NULL + +duration + + [INTEGER] + NOT NULL + +end_date + + [TIMESTAMP] + NOT NULL + +reschedule_date + + [TIMESTAMP] + NOT NULL + +start_date + + [TIMESTAMP] + NOT NULL + +ti_id + + [UUID] + NOT NULL - + task_instance--task_reschedule - -0..N -1 + +0..N +1 - + xcom - -xcom - -dag_run_id - - [INTEGER] - NOT NULL - -key - - [VARCHAR(512)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -timestamp - - [TIMESTAMP] - NOT NULL - -value - - [JSONB] + +xcom + +dag_run_id + + [INTEGER] + NOT NULL + +key + + [VARCHAR(512)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +timestamp + + [TIMESTAMP] + NOT NULL + +value + + [JSONB] - + task_instance--xcom - -0..N -1 + +0..N +1 - + task_instance--xcom - -0..N -1 + +0..N +1 - + task_instance--xcom - -0..N -1 + +0..N +1 - + task_instance--xcom - -0..N -1 + +0..N +1 - + task_instance_note - -task_instance_note - -ti_id - - [UUID] - NOT NULL - -content - - [VARCHAR(1000)] - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL - -user_id - - [VARCHAR(128)] + +task_instance_note + +ti_id + + [UUID] + NOT NULL + +content + + [VARCHAR(1000)] + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL + +user_id + + [VARCHAR(128)] - + task_instance--task_instance_note - -1 -1 + +1 +1 - + task_instance_history - -task_instance_history - -task_instance_id - - [UUID] - NOT NULL - -context_carrier - - [JSONB] - -custom_operator_name - - [VARCHAR(1000)] - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_version_id - - [UUID] - -duration - - [DOUBLE_PRECISION] - -end_date - - [TIMESTAMP] - -executor - - [VARCHAR(1000)] - -executor_config - - [BYTEA] - -external_executor_id - - [VARCHAR(250)] - -hostname - - [VARCHAR(1000)] - -map_index - - [INTEGER] - NOT NULL - -max_tries - - [INTEGER] - -next_kwargs - - [JSONB] - -next_method - - [VARCHAR(1000)] - -operator - - [VARCHAR(1000)] - -pid - - [INTEGER] - -pool - - [VARCHAR(256)] - NOT NULL - -pool_slots - - [INTEGER] - NOT NULL - -priority_weight - - [INTEGER] - -queue - - [VARCHAR(256)] - -queued_by_job_id - - [INTEGER] - -queued_dttm - - [TIMESTAMP] - -rendered_map_index - - [VARCHAR(250)] - -run_id - - [VARCHAR(250)] - NOT NULL - -scheduled_dttm - - [TIMESTAMP] - -span_status - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(20)] - -task_display_name - - [VARCHAR(2000)] - -task_id - - [VARCHAR(250)] - NOT NULL - -trigger_id - - [INTEGER] - -trigger_timeout - - [TIMESTAMP] - -try_number - - [INTEGER] - NOT NULL - -unixname - - [VARCHAR(1000)] - -updated_at - - [TIMESTAMP] + +task_instance_history + +task_instance_id + + [UUID] + NOT NULL + +context_carrier + + [JSONB] + +custom_operator_name + + [VARCHAR(1000)] + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_version_id + + [UUID] + +duration + + [DOUBLE_PRECISION] + +end_date + + [TIMESTAMP] + +executor + + [VARCHAR(1000)] + +executor_config + + [BYTEA] + +external_executor_id + + [VARCHAR(250)] + +hostname + + [VARCHAR(1000)] + +map_index + + [INTEGER] + NOT NULL + +max_tries + + [INTEGER] + +next_kwargs + + [JSONB] + +next_method + + [VARCHAR(1000)] + +operator + + [VARCHAR(1000)] + +pid + + [INTEGER] + +pool + + [VARCHAR(256)] + NOT NULL + +pool_slots + + [INTEGER] + NOT NULL + +priority_weight + + [INTEGER] + +queue + + [VARCHAR(256)] + +queued_by_job_id + + [INTEGER] + +queued_dttm + + [TIMESTAMP] + +rendered_map_index + + [VARCHAR(250)] + +run_id + + [VARCHAR(250)] + NOT NULL + +scheduled_dttm + + [TIMESTAMP] + +span_status + + [VARCHAR(250)] + NOT NULL + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(20)] + +task_display_name + + [VARCHAR(2000)] + +task_id + + [VARCHAR(250)] + NOT NULL + +trigger_id + + [INTEGER] + +trigger_timeout + + [TIMESTAMP] + +try_number + + [INTEGER] + NOT NULL + +unixname + + [VARCHAR(1000)] + +updated_at + + [TIMESTAMP] - + task_instance--task_instance_history - -0..N -1 + +0..N +1 - + task_instance--task_instance_history - -0..N -1 + +0..N +1 - + task_instance--task_instance_history - -0..N -1 + +0..N +1 - + task_instance--task_instance_history - -0..N -1 + +0..N +1 - + rendered_task_instance_fields - -rendered_task_instance_fields - -dag_id - - [VARCHAR(250)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -k8s_pod_yaml - - [JSON] - -rendered_fields - - [JSON] - NOT NULL + +rendered_task_instance_fields + +dag_id + + [VARCHAR(250)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +k8s_pod_yaml + + [JSON] + +rendered_fields + + [JSON] + NOT NULL - + task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 - + task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 - + task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 - + task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 - + dag_bundle - -dag_bundle + +dag_bundle + +name + + [VARCHAR(250)] + NOT NULL + +active + + [BOOLEAN] -name - - [VARCHAR(250)] - NOT NULL +last_refreshed + + [TIMESTAMP] -active - - [BOOLEAN] +template_params + + [JSON] -last_refreshed - - [TIMESTAMP] +url + + [VARCHAR(200)] version [VARCHAR(200)] - + dag - -dag + +dag + +dag_id + + [VARCHAR(250)] + NOT NULL -dag_id - - [VARCHAR(250)] - NOT NULL +asset_expression + + [JSON] -asset_expression - - [JSON] +bundle_name + + [VARCHAR(250)] -bundle_name - - [VARCHAR(250)] +bundle_version + + [VARCHAR(200)] -bundle_version - - [VARCHAR(200)] +dag_display_name + + [VARCHAR(2000)] -dag_display_name - - [VARCHAR(2000)] +deadline + + [JSON] -deadline - - [JSON] +description + + [TEXT] -description - - [TEXT] +fileloc + + [VARCHAR(2000)] -fileloc - - [VARCHAR(2000)] +has_import_errors + + [BOOLEAN] -has_import_errors - - [BOOLEAN] +has_task_concurrency_limits + + [BOOLEAN] + NOT NULL -has_task_concurrency_limits - - [BOOLEAN] - NOT NULL +is_paused + + [BOOLEAN] -is_paused - - [BOOLEAN] +is_stale + + [BOOLEAN] -is_stale - - [BOOLEAN] +last_expired + + [TIMESTAMP] -last_expired - - [TIMESTAMP] +last_parsed_time + + [TIMESTAMP] -last_parsed_time - - [TIMESTAMP] +max_active_runs + + [INTEGER] -max_active_runs - - [INTEGER] +max_active_tasks + + [INTEGER] + NOT NULL -max_active_tasks - - [INTEGER] - NOT NULL +max_consecutive_failed_dag_runs + + [INTEGER] + NOT NULL -max_consecutive_failed_dag_runs - - [INTEGER] - NOT NULL +next_dagrun + + [TIMESTAMP] -next_dagrun - - [TIMESTAMP] +next_dagrun_create_after + + [TIMESTAMP] -next_dagrun_create_after - - [TIMESTAMP] +next_dagrun_data_interval_end + + [TIMESTAMP] -next_dagrun_data_interval_end - - [TIMESTAMP] +next_dagrun_data_interval_start + + [TIMESTAMP] -next_dagrun_data_interval_start - - [TIMESTAMP] +owners + + [VARCHAR(2000)] -owners - - [VARCHAR(2000)] +relative_fileloc + + [VARCHAR(2000)] -relative_fileloc - - [VARCHAR(2000)] +timetable_description + + [VARCHAR(1000)] -timetable_description - - [VARCHAR(1000)] - -timetable_summary - - [TEXT] +timetable_summary + + [TEXT] - + dag_bundle--dag - -0..N -{0,1} + +0..N +{0,1} - + dag--dag_schedule_asset_alias_reference - -0..N -1 + +0..N +1 - + dag--dag_schedule_asset_reference - -0..N -1 + +0..N +1 - + dag--task_outlet_asset_reference - -0..N -1 + +0..N +1 - + dag--task_inlet_asset_reference - -0..N -1 + +0..N +1 - + dag--asset_dag_run_queue - -0..N -1 - - - -dag--deadline - -0..N -{0,1} + +0..N +1 - + dag_schedule_asset_name_reference - -dag_schedule_asset_name_reference - -dag_id - - [VARCHAR(250)] - NOT NULL - -name - - [VARCHAR(1500)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL + +dag_schedule_asset_name_reference + +dag_id + + [VARCHAR(250)] + NOT NULL + +name + + [VARCHAR(1500)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL - + dag--dag_schedule_asset_name_reference - -0..N -1 + +0..N +1 - + dag_schedule_asset_uri_reference - -dag_schedule_asset_uri_reference - -dag_id - - [VARCHAR(250)] - NOT NULL - -uri - - [VARCHAR(1500)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL + +dag_schedule_asset_uri_reference + +dag_id + + [VARCHAR(250)] + NOT NULL + +uri + + [VARCHAR(1500)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL - + dag--dag_schedule_asset_uri_reference - -0..N -1 + +0..N +1 - + dag_version - -dag_version - -id - - [UUID] - NOT NULL - -bundle_name - - [VARCHAR(250)] - -bundle_version - - [VARCHAR(250)] - -created_at - - [TIMESTAMP] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -last_updated - - [TIMESTAMP] - NOT NULL - -version_number - - [INTEGER] - NOT NULL + +dag_version + +id + + [UUID] + NOT NULL + +bundle_name + + [VARCHAR(250)] + +bundle_version + + [VARCHAR(250)] + +created_at + + [TIMESTAMP] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +last_updated + + [TIMESTAMP] + NOT NULL + +version_number + + [INTEGER] + NOT NULL - + dag--dag_version - -0..N -1 + +0..N +1 - + dag_tag - -dag_tag - -dag_id - - [VARCHAR(250)] - NOT NULL - -name - - [VARCHAR(100)] - NOT NULL + +dag_tag + +dag_id + + [VARCHAR(250)] + NOT NULL + +name + + [VARCHAR(100)] + NOT NULL - + dag--dag_tag - -0..N -1 + +0..N +1 - + dag_owner_attributes - -dag_owner_attributes - -dag_id - - [VARCHAR(250)] - NOT NULL - -owner - - [VARCHAR(500)] - NOT NULL - -link - - [VARCHAR(500)] - NOT NULL + +dag_owner_attributes + +dag_id + + [VARCHAR(250)] + NOT NULL + +owner + + [VARCHAR(500)] + NOT NULL + +link + + [VARCHAR(500)] + NOT NULL - + dag--dag_owner_attributes - -0..N -1 + +0..N +1 - + dag_warning - -dag_warning - -dag_id - - [VARCHAR(250)] - NOT NULL - -warning_type - - [VARCHAR(50)] - NOT NULL - -message - - [TEXT] - NOT NULL - -timestamp - - [TIMESTAMP] - NOT NULL + +dag_warning + +dag_id + + [VARCHAR(250)] + NOT NULL + +warning_type + + [VARCHAR(50)] + NOT NULL + +message + + [TEXT] + NOT NULL + +timestamp + + [TIMESTAMP] + NOT NULL - + dag--dag_warning - -0..N -1 + +0..N +1 - + dag_favorite - -dag_favorite - -dag_id - - [VARCHAR(250)] - NOT NULL - -user_id - - [VARCHAR(250)] - NOT NULL + +dag_favorite + +dag_id + + [VARCHAR(250)] + NOT NULL + +user_id + + [VARCHAR(250)] + NOT NULL - + dag--dag_favorite - -0..N -1 + +0..N +1 + + + +deadline + +deadline + +id + + [UUID] + NOT NULL + +callback + + [VARCHAR(500)] + NOT NULL + +callback_kwargs + + [JSON] + +dag_id + + [VARCHAR(250)] + +dagrun_id + + [INTEGER] + +deadline_time + + [TIMESTAMP] + NOT NULL + + + +dag--deadline + +0..N +{0,1} - + dag_version--task_instance - -0..N -1 + +0..N +{0,1} dag_run - -dag_run - -id - - [INTEGER] - NOT NULL - -backfill_id - - [INTEGER] - -bundle_version - - [VARCHAR(250)] - -clear_number - - [INTEGER] - NOT NULL - -conf - - [JSONB] - -context_carrier - - [JSONB] - -created_dag_version_id - - [UUID] - -creating_job_id - - [INTEGER] - -dag_id - - [VARCHAR(250)] - NOT NULL - -data_interval_end - - [TIMESTAMP] - -data_interval_start - - [TIMESTAMP] - -end_date - - [TIMESTAMP] - -last_scheduling_decision - - [TIMESTAMP] - -log_template_id - - [INTEGER] - -logical_date - - [TIMESTAMP] - -queued_at - - [TIMESTAMP] - -run_after - - [TIMESTAMP] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -run_type - - [VARCHAR(50)] - NOT NULL - -scheduled_by_job_id - - [INTEGER] - -span_status - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(50)] - -triggered_by - - [VARCHAR(50)] - -triggering_user_name - - [VARCHAR(512)] - -updated_at - - [TIMESTAMP] + +dag_run + +id + + [INTEGER] + NOT NULL + +backfill_id + + [INTEGER] + +bundle_version + + [VARCHAR(250)] + +clear_number + + [INTEGER] + NOT NULL + +conf + + [JSONB] + +context_carrier + + [JSONB] + +created_dag_version_id + + [UUID] + +creating_job_id + + [INTEGER] + +dag_id + + [VARCHAR(250)] + NOT NULL + +data_interval_end + + [TIMESTAMP] + +data_interval_start + + [TIMESTAMP] + +end_date + + [TIMESTAMP] + +last_scheduling_decision + + [TIMESTAMP] + +log_template_id + + [INTEGER] + +logical_date + + [TIMESTAMP] + +queued_at + + [TIMESTAMP] + +run_after + + [TIMESTAMP] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +run_type + + [VARCHAR(50)] + NOT NULL + +scheduled_by_job_id + + [INTEGER] + +span_status + + [VARCHAR(250)] + NOT NULL + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(50)] + +triggered_by + + [VARCHAR(50)] + +triggering_user_name + + [VARCHAR(512)] + +updated_at + + [TIMESTAMP] - + dag_version--dag_run - -0..N -{0,1} + +0..N +{0,1} dag_code - -dag_code - -id - - [UUID] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_version_id - - [UUID] - NOT NULL - -fileloc - - [VARCHAR(2000)] - NOT NULL - -last_updated - - [TIMESTAMP] - NOT NULL - -source_code - - [TEXT] - NOT NULL - -source_code_hash - - [VARCHAR(32)] - NOT NULL + +dag_code + +id + + [UUID] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_version_id + + [UUID] + NOT NULL + +fileloc + + [VARCHAR(2000)] + NOT NULL + +last_updated + + [TIMESTAMP] + NOT NULL + +source_code + + [TEXT] + NOT NULL + +source_code_hash + + [VARCHAR(32)] + NOT NULL - + dag_version--dag_code - -0..N -1 + +0..N +1 serialized_dag - -serialized_dag - -id - - [UUID] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -dag_hash - - [VARCHAR(32)] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_version_id - - [UUID] - NOT NULL - -data - - [JSON] - -data_compressed - - [BYTEA] - -last_updated - - [TIMESTAMP] - NOT NULL + +serialized_dag + +id + + [UUID] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +dag_hash + + [VARCHAR(32)] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_version_id + + [UUID] + NOT NULL + +data + + [JSON] + +data_compressed + + [BYTEA] + +last_updated + + [TIMESTAMP] + NOT NULL - + dag_version--serialized_dag - -0..N -1 + +0..N +1 - + dag_run--dagrun_asset_event - -0..N -1 + +0..N +1 dag_run--task_instance - -0..N -1 + +0..N +1 dag_run--task_instance - -0..N -1 + +0..N +1 - + dag_run--deadline - -0..N -{0,1} + +0..N +{0,1} backfill_dag_run - -backfill_dag_run - -id - - [INTEGER] - NOT NULL - -backfill_id - - [INTEGER] - NOT NULL - -dag_run_id - - [INTEGER] - -exception_reason - - [VARCHAR(250)] - -logical_date - - [TIMESTAMP] - NOT NULL - -sort_ordinal - - [INTEGER] - NOT NULL + +backfill_dag_run + +id + + [INTEGER] + NOT NULL + +backfill_id + + [INTEGER] + NOT NULL + +dag_run_id + + [INTEGER] + +exception_reason + + [VARCHAR(250)] + +logical_date + + [TIMESTAMP] + NOT NULL + +sort_ordinal + + [INTEGER] + NOT NULL - + dag_run--backfill_dag_run - -0..N -{0,1} + +0..N +{0,1} dag_run_note - -dag_run_note - -dag_run_id - - [INTEGER] - NOT NULL - -content - - [VARCHAR(1000)] - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL - -user_id - - [VARCHAR(128)] + +dag_run_note + +dag_run_id + + [INTEGER] + NOT NULL + +content + + [VARCHAR(1000)] + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL + +user_id + + [VARCHAR(128)] - + dag_run--dag_run_note - -1 -1 + +1 +1 log_template - -log_template - -id - - [INTEGER] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -elasticsearch_id - - [TEXT] - NOT NULL - -filename - - [TEXT] - NOT NULL + +log_template + +id + + [INTEGER] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +elasticsearch_id + + [TEXT] + NOT NULL + +filename + + [TEXT] + NOT NULL - + log_template--dag_run - -0..N -{0,1} + +0..N +{0,1} backfill - -backfill - -id - - [INTEGER] - NOT NULL - -completed_at - - [TIMESTAMP] - -created_at - - [TIMESTAMP] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_run_conf - - [JSON] - NOT NULL - -from_date - - [TIMESTAMP] - NOT NULL - -is_paused - - [BOOLEAN] - -max_active_runs - - [INTEGER] - NOT NULL - -reprocess_behavior - - [VARCHAR(250)] - NOT NULL - -to_date - - [TIMESTAMP] - NOT NULL - -triggering_user_name - - [VARCHAR(512)] - -updated_at - - [TIMESTAMP] - NOT NULL + +backfill + +id + + [INTEGER] + NOT NULL + +completed_at + + [TIMESTAMP] + +created_at + + [TIMESTAMP] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_run_conf + + [JSON] + NOT NULL + +from_date + + [TIMESTAMP] + NOT NULL + +is_paused + + [BOOLEAN] + +max_active_runs + + [INTEGER] + NOT NULL + +reprocess_behavior + + [VARCHAR(250)] + NOT NULL + +to_date + + [TIMESTAMP] + NOT NULL + +triggering_user_name + + [VARCHAR(512)] + +updated_at + + [TIMESTAMP] + NOT NULL - + backfill--dag_run - -0..N -{0,1} + +0..N +{0,1} - + backfill--backfill_dag_run - -0..N -1 + +0..N +1 - + alembic_version - -alembic_version - -version_num - - [VARCHAR(32)] - NOT NULL + +alembic_version + +version_num + + [VARCHAR(32)] + NOT NULL diff --git a/airflow-core/docs/migrations-ref.rst b/airflow-core/docs/migrations-ref.rst index dce45f2cb0aef..650c52b4e3ebe 100644 --- a/airflow-core/docs/migrations-ref.rst +++ b/airflow-core/docs/migrations-ref.rst @@ -39,13 +39,7 @@ Here's the list of all the Database Migrations that are executed via when you ru +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | Revision ID | Revises ID | Airflow Version | Description | +=========================+==================+===================+==============================================================+ -| ``f56f68b9e02f`` (head) | ``09fa89ba1710`` | ``3.1.0`` | Add callback_state to deadline. | -+-------------------------+------------------+-------------------+--------------------------------------------------------------+ -| ``09fa89ba1710`` | ``40f7c30a228b`` | ``3.1.0`` | Add trigger_id to deadline. | -+-------------------------+------------------+-------------------+--------------------------------------------------------------+ -| ``40f7c30a228b`` | ``5d3072c51bac`` | ``3.1.0`` | Add Human In the Loop Detail table. | -+-------------------------+------------------+-------------------+--------------------------------------------------------------+ -| ``5d3072c51bac`` | ``ffdb0566c7c0`` | ``3.1.0`` | Make dag_version_id non-nullable in TaskInstance. | +| ``3bda03debd04`` (head) | ``ffdb0566c7c0`` | ``3.1.0`` | Add url and template params to DagBundleModel. | +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | ``ffdb0566c7c0`` | ``66a7743fe20e`` | ``3.1.0`` | Add dag_favorite table. | +-------------------------+------------------+-------------------+--------------------------------------------------------------+ diff --git a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_versions.py b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_versions.py index a32e315bbee29..0b104e2ce050a 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_versions.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_versions.py @@ -20,9 +20,9 @@ from uuid import UUID from pydantic import AliasPath, Field, computed_field +from sqlalchemy import select from airflow.api_fastapi.core_api.base import BaseModel -from airflow.dag_processing.bundles.manager import DagBundlesManager class DagVersionResponse(BaseModel): @@ -41,9 +41,17 @@ class DagVersionResponse(BaseModel): @property def bundle_url(self) -> str | None: if self.bundle_name: - try: - return DagBundlesManager().view_url(self.bundle_name, self.bundle_version) - except ValueError: + # Get the bundle model from the database and render the URL + from airflow.models.dagbundle import DagBundleModel + from airflow.utils.session import create_session + + with create_session() as session: + bundle_model = session.scalar( + select(DagBundleModel).where(DagBundleModel.name == self.bundle_name) + ) + + if bundle_model: + return bundle_model.render_url() return None return None diff --git a/airflow-core/src/airflow/dag_processing/bundles/base.py b/airflow-core/src/airflow/dag_processing/bundles/base.py index 5d49bf43fd4a4..249e67e3d4308 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/base.py +++ b/airflow-core/src/airflow/dag_processing/bundles/base.py @@ -22,6 +22,7 @@ import os import shutil import tempfile +import warnings from abc import ABC, abstractmethod from contextlib import contextmanager from dataclasses import dataclass, field @@ -35,7 +36,6 @@ from sqlalchemy_utils.types.enriched_datetime.pendulum_datetime import pendulum from airflow.configuration import conf -from airflow.dag_processing.bundles.manager import DagBundlesManager if TYPE_CHECKING: from pendulum import DateTime @@ -218,6 +218,9 @@ def remove_stale_bundle_versions(self): with other processes. """ log.info("checking for stale bundle versions locally") + # Import here to avoid circular imports + from airflow.dag_processing.bundles.manager import DagBundlesManager + bundles = list(DagBundlesManager().get_all_dag_bundles()) for bundle in bundles: if not bundle.supports_versioning: @@ -247,6 +250,7 @@ class BaseDagBundle(ABC): """ supports_versioning: bool = False + template_fields: list[str] = [] _locked: bool = False @@ -256,6 +260,7 @@ def __init__( name: str, refresh_interval: int = conf.getint("dag_processor", "refresh_interval"), version: str | None = None, + view_url_template: str | None = None, ) -> None: self.name = name self.version = version @@ -268,6 +273,8 @@ def __init__( self.versions_dir = get_bundle_versions_base_folder(bundle_name=self.name) """Where bundle versions are stored locally for this bundle.""" + self._view_url_template = view_url_template + def initialize(self) -> None: """ Initialize the bundle. @@ -316,10 +323,31 @@ def view_url(self, version: str | None = None) -> str | None: URL to view the bundle on an external website. This is shown to users in the Airflow UI, allowing them to navigate to this url for more details about that version of the bundle. This needs to function without `initialize` being called. - :param version: Version to view :return: URL to view the bundle """ + warnings.warn( + "The 'view_url' method is deprecated and will be removed in a future version. " + "Use 'view_url_template' instead.", + DeprecationWarning, + stacklevel=2, + ) + return None + + def view_url_template(self) -> str | None: + """ + URL template to view the bundle on an external website. This is shown to users in the Airflow UI, allowing them to navigate to this url for more details about that version of the bundle. + + The template should use format string placeholders like {version}, {subdir}, etc. + Common placeholders: + - {version}: The version identifier + - {subdir}: The subdirectory within the bundle (if applicable) + + This needs to function without `initialize` being called. + + :return: URL template string or None if not applicable + """ + return self._view_url_template @contextmanager def lock(self): diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index a3538f1e29191..b4b068681d35d 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -16,8 +16,10 @@ # under the License. from __future__ import annotations +import warnings from typing import TYPE_CHECKING +from itsdangerous import URLSafeSerializer from sqlalchemy import delete from airflow.configuration import conf @@ -81,6 +83,54 @@ def _add_example_dag_bundle(config_list): ) +def _is_safe_bundle_url(url: str) -> bool: + """ + Check if a bundle URL is safe to use. + + This function validates that the URL: + - Uses HTTP or HTTPS schemes (no JavaScript, data, or other schemes) + - Is properly formatted + - Doesn't contain malicious content + """ + from urllib.parse import urlparse + + if not url: + return False + + try: + parsed = urlparse(url) + if parsed.scheme not in {"http", "https"}: + return False + + if not parsed.netloc: + return False + + if ";" in url: + return False + if any(ord(c) < 32 for c in url): + return False + + return True + except Exception: + return False + + +def _sign_bundle_url(url: str, bundle_name: str) -> str: + """ + Sign a bundle URL for integrity verification. + + :param url: The URL to sign + :param bundle_name: The name of the bundle (used in the payload) + :return: The signed URL token + """ + serializer = URLSafeSerializer(conf.get_mandatory_value("api", "secret_key")) + payload = { + "url": url, + "bundle_name": bundle_name, + } + return serializer.dumps(payload) + + class DagBundlesManager(LoggingMixin): """Manager for DAG bundles.""" @@ -125,11 +175,55 @@ def parse_config(self) -> None: def sync_bundles_to_db(self, *, session: Session = NEW_SESSION) -> None: self.log.debug("Syncing DAG bundles to the database") stored = {b.name: b for b in session.query(DagBundleModel).all()} + for name in self._bundle_config.keys(): if bundle := stored.pop(name, None): bundle.active = True + # Update URL template and parameters if they've changed + bundle_instance = self.get_bundle(name) + new_template = bundle_instance.view_url_template() + new_params = self._extract_template_params(bundle_instance) + + # Validate and sign the URL before saving + if new_template: + if not _is_safe_bundle_url(new_template): + self.log.warning( + "Bundle %s has unsafe URL template '%s', skipping URL update", name, new_template + ) + new_template = None + else: + # Sign the URL for integrity verification + new_template = _sign_bundle_url(new_template, name) + self.log.debug("Signed URL template for bundle %s", name) + + if new_template != bundle.url: + bundle.url = new_template + self.log.debug("Updated URL template for bundle %s", name) + if new_params != bundle.template_params: + bundle.template_params = new_params + self.log.debug("Updated template parameters for bundle %s", name) else: - session.add(DagBundleModel(name=name)) + new_bundle = DagBundleModel(name=name) + # Set URL template and parameters for new bundle + bundle_instance = self.get_bundle(name) + new_template = bundle_instance.view_url_template() + new_params = self._extract_template_params(bundle_instance) + + # Validate and sign the URL before saving + if new_template: + if not _is_safe_bundle_url(new_template): + self.log.warning( + "Bundle %s has unsafe URL template '%s', skipping URL", name, new_template + ) + new_template = None + else: + # Sign the URL for integrity verification + new_template = _sign_bundle_url(new_template, name) + self.log.debug("Signed URL template for bundle %s", name) + + new_bundle.url = new_template + new_bundle.template_params = new_params + session.add(new_bundle) self.log.info("Added new DAG bundle %s to the database", name) for name, bundle in stored.items(): @@ -140,6 +234,23 @@ def sync_bundles_to_db(self, *, session: Session = NEW_SESSION) -> None: session.execute(delete(ParseImportError).where(ParseImportError.bundle_name == name)) self.log.info("Deleted import errors for bundle %s which is no longer configured", name) + @staticmethod + def _extract_template_params(bundle_instance: BaseDagBundle) -> dict: + """ + Extract template parameters from a bundle instance using its template_fields. + + :param bundle_instance: The bundle instance to extract parameters from + :return: Dictionary of template parameters + """ + params = {} + + # Extract values for each field specified in template_fields + for field_name in bundle_instance.template_fields: + field_value = getattr(bundle_instance, field_name, None) + if field_value: + params[field_name] = field_value + return params + def get_bundle(self, name: str, version: str | None = None) -> BaseDagBundle: """ Get a DAG bundle by name. @@ -165,5 +276,11 @@ def get_all_dag_bundles(self) -> Iterable[BaseDagBundle]: yield class_(name=name, version=None, **kwargs) def view_url(self, name: str, version: str | None = None) -> str | None: + warnings.warn( + "The 'view_url' method is deprecated and will be removed in a future version. " + "Use DagBundleModel.render_url() instead.", + DeprecationWarning, + stacklevel=2, + ) bundle = self.get_bundle(name, version) return bundle.view_url(version=version) diff --git a/airflow-core/src/airflow/migrations/versions/0076_3_1_0_add_url_and_template_params_to_dagbundle_model.py b/airflow-core/src/airflow/migrations/versions/0076_3_1_0_add_url_and_template_params_to_dagbundle_model.py new file mode 100644 index 0000000000000..f909a815a46aa --- /dev/null +++ b/airflow-core/src/airflow/migrations/versions/0076_3_1_0_add_url_and_template_params_to_dagbundle_model.py @@ -0,0 +1,53 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Add url and template params to DagBundleModel. + +Revision ID: 3bda03debd04 +Revises: f56f68b9e02f +Create Date: 2025-07-04 10:12:12.711292 + +""" + +from __future__ import annotations + +import sqlalchemy as sa +from alembic import op +from sqlalchemy_utils import JSONType + +# revision identifiers, used by Alembic. +revision = "3bda03debd04" +down_revision = "f56f68b9e02f" +branch_labels = None +depends_on = None +airflow_version = "3.1.0" + + +def upgrade(): + """Apply Add url and template params to DagBundleModel.""" + with op.batch_alter_table("dag_bundle", schema=None) as batch_op: + batch_op.add_column(sa.Column("url", sa.String(length=200), nullable=True)) + batch_op.add_column(sa.Column("template_params", JSONType(), nullable=True)) + + +def downgrade(): + """Unapply Add url and template params to DagBundleModel.""" + with op.batch_alter_table("dag_bundle", schema=None) as batch_op: + batch_op.drop_column("template_params") + batch_op.drop_column("url") diff --git a/airflow-core/src/airflow/models/dagbundle.py b/airflow-core/src/airflow/models/dagbundle.py index e1f99d5effcc9..31169e7c3a7b9 100644 --- a/airflow-core/src/airflow/models/dagbundle.py +++ b/airflow-core/src/airflow/models/dagbundle.py @@ -17,6 +17,7 @@ from __future__ import annotations from sqlalchemy import Boolean, Column, String +from sqlalchemy_utils import JSONType from airflow.models.base import Base, StringID from airflow.utils.sqlalchemy import UtcDateTime @@ -32,6 +33,8 @@ class DagBundleModel(Base): - active: Is the bundle currently found in configuration? - version: The latest version Airflow has seen for the bundle. - last_refreshed: When the bundle was last refreshed. + - url: URL template for viewing the bundle (e.g., "https://github.com/repo/tree/{version}") + - template_params: JSON object containing template parameters (e.g., {"subdir": "dags"}) """ @@ -40,6 +43,62 @@ class DagBundleModel(Base): active = Column(Boolean, default=True) version = Column(String(200), nullable=True) last_refreshed = Column(UtcDateTime, nullable=True) + url = Column(String(200), nullable=True) + template_params = Column(JSONType, nullable=True) def __init__(self, *, name: str): self.name = name + self.template_params = {} + + def _unsign_url(self) -> str | None: + """ + Unsign a URL token to get the original URL template. + + :param signed_url: The signed URL token + :return: The original URL template or None if unsigning fails + """ + try: + from itsdangerous import BadSignature, URLSafeSerializer + + from airflow.configuration import conf + + serializer = URLSafeSerializer(conf.get_mandatory_value("api", "secret_key")) + payload = serializer.loads(self.url) + if isinstance(payload, dict) and "url" in payload and "bundle_name" in payload: + if payload["bundle_name"] == self.name: + return payload["url"] + + return None + except (BadSignature, Exception): + return None + + def render_url(self) -> str | None: + """ + Render the URL template with the given version and stored template parameters. + + First unsigns the URL to get the original template, then formats it with + the provided version and any additional parameters. + + :param version: The version to substitute in the template + :return: The rendered URL or None if no template is available + """ + if not self.url: + return None + + url_template = self._unsign_url() + + if url_template is None: + url_template = self.url + + params = dict(self.template_params or {}) + params["version"] = self.version + + try: + return url_template.format(**params) + except (KeyError, ValueError) as e: + import logging + + logging.getLogger(__name__).warning( + "Failed to render URL template for bundle %s: %s", self.name, e + ) + return url_template diff --git a/airflow-core/src/airflow/utils/db.py b/airflow-core/src/airflow/utils/db.py index d6e157574600a..4cf633360dab6 100644 --- a/airflow-core/src/airflow/utils/db.py +++ b/airflow-core/src/airflow/utils/db.py @@ -93,7 +93,7 @@ class MappedClassProtocol(Protocol): "2.10.3": "5f2621c13b39", "3.0.0": "29ce7909c52b", "3.0.3": "fe199e1abd77", - "3.1.0": "f56f68b9e02f", + "3.1.0": "3bda03debd04", } diff --git a/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py b/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py index 05c5baf1dd830..8c5a639188219 100644 --- a/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py +++ b/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py @@ -188,10 +188,190 @@ def test_view_url(version): """Test that view_url calls the bundle's view_url method.""" bundle_manager = DagBundlesManager() with patch.object(BaseDagBundle, "view_url") as view_url_mock: - bundle_manager.view_url("my-test-bundle", version=version) + # Test that deprecation warning is raised + with pytest.warns(DeprecationWarning, match="'view_url' method is deprecated"): + bundle_manager.view_url("my-test-bundle", version=version) view_url_mock.assert_called_once_with(version=version) +class BundleWithTemplate(BaseDagBundle): + """Test bundle that provides a URL template.""" + + template_fields = ["subdir"] + + def __init__(self, *, subdir: str | None = None, **kwargs): + super().__init__(**kwargs) + self.subdir = subdir + + def refresh(self): + pass + + def get_current_version(self): + return "v1.0" + + @property + def path(self): + return "/tmp/test" + + +TEMPLATE_BUNDLE_CONFIG = [ + { + "name": "template-bundle", + "classpath": "unit.dag_processing.bundles.test_dag_bundle_manager.BundleWithTemplate", + "kwargs": { + "view_url_template": "https://github.com/example/repo/tree/{version}/{subdir}", + "subdir": "dags", + "refresh_interval": 1, + }, + } +] + + +@pytest.mark.db_test +@conf_vars({("core", "LOAD_EXAMPLES"): "False"}) +def test_sync_bundles_to_db_with_template(clear_db, session): + """Test that URL templates and parameters are stored in the database during sync.""" + with patch.dict( + os.environ, {"AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_CONFIG_LIST": json.dumps(TEMPLATE_BUNDLE_CONFIG)} + ): + manager = DagBundlesManager() + manager.sync_bundles_to_db() + + # Check that the template and parameters were stored + bundle_model = session.query(DagBundleModel).filter_by(name="template-bundle").first() + bundle_model.version = "v1.0" + session.merge(bundle_model) + + assert bundle_model is not None + assert bundle_model.render_url() == "https://github.com/example/repo/tree/v1.0/dags" + assert bundle_model.template_params == {"subdir": "dags"} + assert bundle_model.active is True + + +@pytest.mark.db_test +@conf_vars({("core", "LOAD_EXAMPLES"): "False"}) +def test_bundle_model_render_url(clear_db, session): + """Test the DagBundleModel render_url method.""" + with patch.dict( + os.environ, {"AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_CONFIG_LIST": json.dumps(TEMPLATE_BUNDLE_CONFIG)} + ): + manager = DagBundlesManager() + manager.sync_bundles_to_db() + bundle_model = session.query(DagBundleModel).filter_by(name="template-bundle").first() + bundle_model.version = "main" + session.merge(bundle_model) + assert bundle_model is not None + + url = bundle_model.render_url() + assert url == "https://github.com/example/repo/tree/main/dags" + bundle_model.version = None + session.merge(bundle_model) + url = bundle_model.render_url() + assert url == "https://github.com/example/repo/tree/None/dags" + + +@pytest.mark.db_test +@conf_vars({("core", "LOAD_EXAMPLES"): "False"}) +def test_template_params_update_on_sync(clear_db, session): + """Test that template parameters are updated when bundle configuration changes.""" + with patch.dict( + os.environ, {"AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_CONFIG_LIST": json.dumps(TEMPLATE_BUNDLE_CONFIG)} + ): + manager = DagBundlesManager() + manager.sync_bundles_to_db() + + # Verify initial template and parameters + bundle_model = session.query(DagBundleModel).filter_by(name="template-bundle").first() + url = bundle_model._unsign_url() + assert url == "https://github.com/example/repo/tree/{version}/{subdir}" + assert bundle_model.template_params == {"subdir": "dags"} + + # Update the bundle config with different parameters + updated_config = [ + { + "name": "template-bundle", + "classpath": "unit.dag_processing.bundles.test_dag_bundle_manager.BundleWithTemplate", + "kwargs": { + "view_url_template": "https://gitlab.com/example/repo/-/tree/{version}/{subdir}", + "subdir": "workflows", + "refresh_interval": 1, + }, + } + ] + + with patch.dict( + os.environ, {"AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_CONFIG_LIST": json.dumps(updated_config)} + ): + manager = DagBundlesManager() + manager.sync_bundles_to_db() + + # Verify the template and parameters were updated + bundle_model = session.query(DagBundleModel).filter_by(name="template-bundle").first() + url = bundle_model._unsign_url() + assert url == "https://gitlab.com/example/repo/-/tree/{version}/{subdir}" + assert bundle_model.template_params == {"subdir": "workflows"} + assert ( + bundle_model.render_url() + == f"https://gitlab.com/example/repo/-/tree/{bundle_model.version}/workflows" + ) + + +@pytest.mark.db_test +@conf_vars({("core", "LOAD_EXAMPLES"): "False"}) +def test_template_update_on_sync(clear_db, session): + """Test that templates are updated when bundle configuration changes.""" + # First, sync with initial template + with patch.dict( + os.environ, {"AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_CONFIG_LIST": json.dumps(TEMPLATE_BUNDLE_CONFIG)} + ): + manager = DagBundlesManager() + manager.sync_bundles_to_db() + + # Verify initial template + bundle_model = session.query(DagBundleModel).filter_by(name="template-bundle").first() + bundle_model.version = "v1.0" + session.merge(bundle_model) + url = bundle_model._unsign_url() + assert url == "https://github.com/example/repo/tree/{version}/{subdir}" + assert bundle_model.render_url() == f"https://github.com/example/repo/tree/{bundle_model.version}/dags" + + # Update the bundle config with a different template + updated_config = [ + { + "name": "template-bundle", + "classpath": "unit.dag_processing.bundles.test_dag_bundle_manager.BundleWithTemplate", + "kwargs": { + "view_url_template": "https://gitlab.com/example/repo/-/tree/{version}/{subdir}", + "subdir": "dags", + "refresh_interval": 1, + }, + } + ] + + with patch.dict( + os.environ, {"AIRFLOW__DAG_PROCESSOR__DAG_BUNDLE_CONFIG_LIST": json.dumps(updated_config)} + ): + manager = DagBundlesManager() + manager.sync_bundles_to_db() + + # Verify the template was updated + bundle_model = session.query(DagBundleModel).filter_by(name="template-bundle").first() + url = bundle_model._unsign_url() + assert url == "https://gitlab.com/example/repo/-/tree/{version}/{subdir}" + assert bundle_model.render_url() == f"https://gitlab.com/example/repo/-/tree/{bundle_model.version}/dags" + + +def test_dag_bundle_model_render_url_with_invalid_template(): + """Test that DagBundleModel.render_url handles invalid templates gracefully.""" + bundle_model = DagBundleModel(name="test-bundle") + bundle_model.url = "https://github.com/example/repo/tree/{invalid_placeholder}" + bundle_model.template_params = {"subdir": "dags"} + + # Should return the original template when rendering fails + url = bundle_model.render_url() + assert url == "https://github.com/example/repo/tree/{invalid_placeholder}" + + def test_example_dags_bundle_added(): manager = DagBundlesManager() manager.parse_config() diff --git a/providers/git/src/airflow/providers/git/bundles/git.py b/providers/git/src/airflow/providers/git/bundles/git.py index 18a393749163c..0e144382b323f 100644 --- a/providers/git/src/airflow/providers/git/bundles/git.py +++ b/providers/git/src/airflow/providers/git/bundles/git.py @@ -25,9 +25,7 @@ from git import Repo from git.exc import BadName, GitCommandError, NoSuchPathError -from airflow.dag_processing.bundles.base import ( - BaseDagBundle, -) +from airflow.dag_processing.bundles.base import BaseDagBundle from airflow.exceptions import AirflowException from airflow.providers.git.hooks.git import GitHook @@ -48,6 +46,7 @@ class GitDagBundle(BaseDagBundle): """ supports_versioning = True + template_fields = ["subdir"] def __init__( self, @@ -244,3 +243,41 @@ def view_url(self, version: str | None = None) -> str | None: if host == allowed_host or host.endswith(f".{allowed_host}"): return template return None + + def view_url_template(self) -> str | None: + if self._view_url_template: + return self._view_url_template + + if not self.repo_url: + return None + + url = self.repo_url + if url.startswith("git@"): + url = self._convert_git_ssh_url_to_https(url) + if url.endswith(".git"): + url = url[:-4] + + parsed_url = urlparse(url) + host = parsed_url.hostname + if not host: + return None + + if parsed_url.username or parsed_url.password: + new_netloc = host + if parsed_url.port: + new_netloc += f":{parsed_url.port}" + url = parsed_url._replace(netloc=new_netloc).geturl() + + host_patterns = { + "github.com": f"{url}/tree/{{version}}", + "gitlab.com": f"{url}/-/tree/{{version}}", + "bitbucket.org": f"{url}/src/{{version}}", + } + + # Add subdir placeholder if applicable + for allowed_host, template in host_patterns.items(): + if host == allowed_host or host.endswith(f".{allowed_host}"): + if self.subdir: + return f"{template}/{self.subdir}" + return template + return None From c622adbf834bb4408c0310bde77debf818241ff2 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Fri, 4 Jul 2025 16:22:53 +0100 Subject: [PATCH 02/33] fixup! Refactor bundle view_url to not instaniate bundle on server components --- airflow-core/docs/img/airflow_erd.svg | 6 +++--- .../api_fastapi/core_api/datamodels/dag_versions.py | 8 ++++++-- .../src/airflow/dag_processing/bundles/manager.py | 2 +- ...1_0_add_url_and_template_params_to_dagbundle_model.py | 6 +++--- airflow-core/src/airflow/models/dagbundle.py | 9 ++++----- 5 files changed, 17 insertions(+), 14 deletions(-) diff --git a/airflow-core/docs/img/airflow_erd.svg b/airflow-core/docs/img/airflow_erd.svg index d92180d1a4da9..39ec5fa929661 100644 --- a/airflow-core/docs/img/airflow_erd.svg +++ b/airflow-core/docs/img/airflow_erd.svg @@ -1406,9 +1406,9 @@ [JSON] -url - - [VARCHAR(200)] +url_template + + [VARCHAR(200)] version diff --git a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_versions.py b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_versions.py index 0b104e2ce050a..d26f3a7208f0a 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_versions.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_versions.py @@ -23,6 +23,7 @@ from sqlalchemy import select from airflow.api_fastapi.core_api.base import BaseModel +from airflow.dag_processing.bundles.manager import DagBundlesManager class DagVersionResponse(BaseModel): @@ -50,9 +51,12 @@ def bundle_url(self) -> str | None: select(DagBundleModel).where(DagBundleModel.name == self.bundle_name) ) - if bundle_model: + if bundle_model and hasattr(bundle_model, "url_template"): return bundle_model.render_url() - return None + try: + return DagBundlesManager().view_url(self.bundle_name, self.bundle_version) + except ValueError: + return None return None diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index b4b068681d35d..5d627f97ce2bb 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -123,7 +123,7 @@ def _sign_bundle_url(url: str, bundle_name: str) -> str: :param bundle_name: The name of the bundle (used in the payload) :return: The signed URL token """ - serializer = URLSafeSerializer(conf.get_mandatory_value("api", "secret_key")) + serializer = URLSafeSerializer(conf.get_mandatory_value("core", "fernet_key")) payload = { "url": url, "bundle_name": bundle_name, diff --git a/airflow-core/src/airflow/migrations/versions/0076_3_1_0_add_url_and_template_params_to_dagbundle_model.py b/airflow-core/src/airflow/migrations/versions/0076_3_1_0_add_url_and_template_params_to_dagbundle_model.py index f909a815a46aa..62fb7ce5ba12e 100644 --- a/airflow-core/src/airflow/migrations/versions/0076_3_1_0_add_url_and_template_params_to_dagbundle_model.py +++ b/airflow-core/src/airflow/migrations/versions/0076_3_1_0_add_url_and_template_params_to_dagbundle_model.py @@ -17,7 +17,7 @@ # under the License. """ -Add url and template params to DagBundleModel. +Add url template and template params to DagBundleModel. Revision ID: 3bda03debd04 Revises: f56f68b9e02f @@ -42,7 +42,7 @@ def upgrade(): """Apply Add url and template params to DagBundleModel.""" with op.batch_alter_table("dag_bundle", schema=None) as batch_op: - batch_op.add_column(sa.Column("url", sa.String(length=200), nullable=True)) + batch_op.add_column(sa.Column("url_template", sa.String(length=200), nullable=True)) batch_op.add_column(sa.Column("template_params", JSONType(), nullable=True)) @@ -50,4 +50,4 @@ def downgrade(): """Unapply Add url and template params to DagBundleModel.""" with op.batch_alter_table("dag_bundle", schema=None) as batch_op: batch_op.drop_column("template_params") - batch_op.drop_column("url") + batch_op.drop_column("url_template") diff --git a/airflow-core/src/airflow/models/dagbundle.py b/airflow-core/src/airflow/models/dagbundle.py index 31169e7c3a7b9..6095e9e3e80ac 100644 --- a/airflow-core/src/airflow/models/dagbundle.py +++ b/airflow-core/src/airflow/models/dagbundle.py @@ -43,12 +43,11 @@ class DagBundleModel(Base): active = Column(Boolean, default=True) version = Column(String(200), nullable=True) last_refreshed = Column(UtcDateTime, nullable=True) - url = Column(String(200), nullable=True) + url_template = Column(String(200), nullable=True) template_params = Column(JSONType, nullable=True) def __init__(self, *, name: str): self.name = name - self.template_params = {} def _unsign_url(self) -> str | None: """ @@ -62,8 +61,8 @@ def _unsign_url(self) -> str | None: from airflow.configuration import conf - serializer = URLSafeSerializer(conf.get_mandatory_value("api", "secret_key")) - payload = serializer.loads(self.url) + serializer = URLSafeSerializer(conf.get_mandatory_value("api", "fernet_key")) + payload = serializer.loads(self.url_template) if isinstance(payload, dict) and "url" in payload and "bundle_name" in payload: if payload["bundle_name"] == self.name: return payload["url"] @@ -88,7 +87,7 @@ def render_url(self) -> str | None: url_template = self._unsign_url() if url_template is None: - url_template = self.url + url_template = self.url_template params = dict(self.template_params or {}) params["version"] = self.version From c927764e91692e34727dc51ae6c367b97678976d Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Sun, 6 Jul 2025 21:36:53 +0100 Subject: [PATCH 03/33] Rename bundle url to url_template and fix backcompat --- .../airflow/dag_processing/bundles/base.py | 2 +- .../airflow/dag_processing/bundles/manager.py | 8 +- airflow-core/src/airflow/models/dagbundle.py | 18 ++- .../tests/unit/api_fastapi/conftest.py | 26 ++-- .../routes/public/test_task_instances.py | 129 +++++++++++++++++- .../src/tests_common/pytest_plugin.py | 2 +- 6 files changed, 156 insertions(+), 29 deletions(-) diff --git a/airflow-core/src/airflow/dag_processing/bundles/base.py b/airflow-core/src/airflow/dag_processing/bundles/base.py index 249e67e3d4308..4a89924d57d4e 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/base.py +++ b/airflow-core/src/airflow/dag_processing/bundles/base.py @@ -218,7 +218,7 @@ def remove_stale_bundle_versions(self): with other processes. """ log.info("checking for stale bundle versions locally") - # Import here to avoid circular imports + from airflow.dag_processing.bundles.manager import DagBundlesManager bundles = list(DagBundlesManager().get_all_dag_bundles()) diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index 5d627f97ce2bb..68fda4ad83069 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -196,8 +196,8 @@ def sync_bundles_to_db(self, *, session: Session = NEW_SESSION) -> None: new_template = _sign_bundle_url(new_template, name) self.log.debug("Signed URL template for bundle %s", name) - if new_template != bundle.url: - bundle.url = new_template + if new_template != bundle.url_template: + bundle.url_template = new_template self.log.debug("Updated URL template for bundle %s", name) if new_params != bundle.template_params: bundle.template_params = new_params @@ -221,8 +221,10 @@ def sync_bundles_to_db(self, *, session: Session = NEW_SESSION) -> None: new_template = _sign_bundle_url(new_template, name) self.log.debug("Signed URL template for bundle %s", name) - new_bundle.url = new_template + new_bundle.url_template = new_template new_bundle.template_params = new_params + new_bundle.version = bundle_instance.get_current_version() + session.add(new_bundle) self.log.info("Added new DAG bundle %s to the database", name) diff --git a/airflow-core/src/airflow/models/dagbundle.py b/airflow-core/src/airflow/models/dagbundle.py index 6095e9e3e80ac..71316cf186370 100644 --- a/airflow-core/src/airflow/models/dagbundle.py +++ b/airflow-core/src/airflow/models/dagbundle.py @@ -20,10 +20,11 @@ from sqlalchemy_utils import JSONType from airflow.models.base import Base, StringID +from airflow.utils.log.logging_mixin import LoggingMixin from airflow.utils.sqlalchemy import UtcDateTime -class DagBundleModel(Base): +class DagBundleModel(Base, LoggingMixin): """ A table for storing DAG bundle metadata. @@ -33,7 +34,7 @@ class DagBundleModel(Base): - active: Is the bundle currently found in configuration? - version: The latest version Airflow has seen for the bundle. - last_refreshed: When the bundle was last refreshed. - - url: URL template for viewing the bundle (e.g., "https://github.com/repo/tree/{version}") + - url_template: Signed URL template for viewing the bundle - template_params: JSON object containing template parameters (e.g., {"subdir": "dags"}) """ @@ -46,8 +47,9 @@ class DagBundleModel(Base): url_template = Column(String(200), nullable=True) template_params = Column(JSONType, nullable=True) - def __init__(self, *, name: str): + def __init__(self, *, name: str, version: str | None = None): self.name = name + self.version = version def _unsign_url(self) -> str | None: """ @@ -61,7 +63,7 @@ def _unsign_url(self) -> str | None: from airflow.configuration import conf - serializer = URLSafeSerializer(conf.get_mandatory_value("api", "fernet_key")) + serializer = URLSafeSerializer(conf.get_mandatory_value("core", "fernet_key")) payload = serializer.loads(self.url_template) if isinstance(payload, dict) and "url" in payload and "bundle_name" in payload: if payload["bundle_name"] == self.name: @@ -81,7 +83,7 @@ def render_url(self) -> str | None: :param version: The version to substitute in the template :return: The rendered URL or None if no template is available """ - if not self.url: + if not self.url_template: return None url_template = self._unsign_url() @@ -95,9 +97,5 @@ def render_url(self) -> str | None: try: return url_template.format(**params) except (KeyError, ValueError) as e: - import logging - - logging.getLogger(__name__).warning( - "Failed to render URL template for bundle %s: %s", self.name, e - ) + self.log.warning("Failed to render URL template for bundle %s: %s", self.name, e) return url_template diff --git a/airflow-core/tests/unit/api_fastapi/conftest.py b/airflow-core/tests/unit/api_fastapi/conftest.py index b2497c194a5b5..1c14cc9783f7e 100644 --- a/airflow-core/tests/unit/api_fastapi/conftest.py +++ b/airflow-core/tests/unit/api_fastapi/conftest.py @@ -19,6 +19,7 @@ import datetime import os from typing import TYPE_CHECKING +from unittest import mock import pytest import time_machine @@ -26,7 +27,9 @@ from airflow.api_fastapi.app import create_app from airflow.api_fastapi.auth.managers.simple.user import SimpleAuthManagerUser +from airflow.dag_processing.bundles.manager import DagBundlesManager from airflow.models import Connection +from airflow.providers.git.bundles.git import GitDagBundle from airflow.providers.standard.operators.empty import EmptyOperator from tests_common.test_utils.config import conf_vars @@ -121,19 +124,26 @@ def configure_git_connection_for_dag_bundle(session): conn_id="git_default", conn_type="git", description="default git connection", - host="fakeprotocol://test_host.github.com", + host="http://test_host.github.com", port=8081, login="", ) session.add(connection) - with conf_vars( - { - ( - "dag_processor", - "dag_bundle_config_list", - ): '[{ "name": "dag_maker", "classpath": "airflow.providers.git.bundles.git.GitDagBundle", "kwargs": {"subdir": "dags", "tracking_ref": "main", "refresh_interval": 0}}, { "name": "another_bundle_name", "classpath": "airflow.providers.git.bundles.git.GitDagBundle", "kwargs": {"subdir": "dags", "tracking_ref": "main", "refresh_interval": 0}}]' - } + with ( + conf_vars( + { + ( + "dag_processor", + "dag_bundle_config_list", + ): '[{ "name": "dag_maker", "classpath": "airflow.providers.git.bundles.git.GitDagBundle", "kwargs": {"subdir": "dags", "tracking_ref": "main", "refresh_interval": 0}}, { "name": "another_bundle_name", "classpath": "airflow.providers.git.bundles.git.GitDagBundle", "kwargs": {"subdir": "dags", "tracking_ref": "main", "refresh_interval": 0}}]' + } + ), + mock.patch("airflow.providers.git.bundles.git.GitHook") as mock_git_hook, + mock.patch.object(GitDagBundle, "get_current_version") as mock_get_current_version, ): + mock_get_current_version.return_value = "some_commit_hash" + mock_git_hook.return_value.repo_url = connection.host + DagBundlesManager().sync_bundles_to_db() yield # in case no flush or commit was executed after the "session.add" above, we need to flush the session # manually here to make sure that the added connection will be deleted by query(Connection).delete() diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py index 3cb85f80a70e0..0a93076cd22bd 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py @@ -237,7 +237,11 @@ def test_should_respond_403(self, unauthorized_test_client): ], ) @pytest.mark.usefixtures("make_dag_with_multiple_versions") - def test_should_respond_200_with_versions(self, test_client, run_id, expected_version_number): + @mock.patch("airflow.api_fastapi.core_api.datamodels.dag_versions.hasattr") + def test_should_respond_200_with_versions( + self, mock_hasattr, test_client, run_id, expected_version_number + ): + mock_hasattr.return_value = False response = test_client.get(f"/dags/dag_with_multiple_versions/dagRuns/{run_id}/taskInstances/task1") assert response.status_code == 200 @@ -281,7 +285,7 @@ def test_should_respond_200_with_versions(self, test_client, run_id, expected_ve "dag_display_name": "dag_with_multiple_versions", "bundle_name": "dag_maker", "bundle_version": f"some_commit_hash{expected_version_number}", - "bundle_url": f"fakeprotocol://test_host.github.com/tree/some_commit_hash{expected_version_number}/dags", + "bundle_url": f"http://test_host.github.com/tree/some_commit_hash{expected_version_number}/dags", "created_at": mock.ANY, }, } @@ -1983,7 +1987,64 @@ def test_raises_404_for_nonexistent_task_instance(self, test_client, session): ], ) @pytest.mark.usefixtures("make_dag_with_multiple_versions") - def test_should_respond_200_with_versions(self, test_client, run_id, expected_version_number): + @mock.patch("airflow.api_fastapi.core_api.datamodels.dag_versions.hasattr") + def test_should_respond_200_with_versions( + self, mock_hasattr, test_client, run_id, expected_version_number, session + ): + mock_hasattr.return_value = False + response = test_client.get( + f"/dags/dag_with_multiple_versions/dagRuns/{run_id}/taskInstances/task1/tries/0" + ) + assert response.status_code == 200 + assert response.json() == { + "task_id": "task1", + "dag_id": "dag_with_multiple_versions", + "dag_display_name": "dag_with_multiple_versions", + "dag_run_id": run_id, + "map_index": -1, + "start_date": None, + "end_date": mock.ANY, + "duration": None, + "state": None, + "try_number": 0, + "max_tries": 0, + "task_display_name": "task1", + "hostname": "", + "unixname": getuser(), + "pool": "default_pool", + "pool_slots": 1, + "queue": "default", + "priority_weight": 1, + "operator": "EmptyOperator", + "queued_when": None, + "scheduled_when": None, + "pid": None, + "executor": None, + "executor_config": "{}", + "dag_version": { + "id": mock.ANY, + "version_number": expected_version_number, + "dag_id": "dag_with_multiple_versions", + "bundle_name": "dag_maker", + "bundle_version": f"some_commit_hash{expected_version_number}", + "bundle_url": f"http://test_host.github.com/tree/some_commit_hash{expected_version_number}/dags", + "created_at": mock.ANY, + "dag_display_name": "dag_with_multiple_versions", + }, + } + + @pytest.mark.parametrize( + "run_id, expected_version_number", + [ + ("run1", 1), + ("run2", 2), + ("run3", 3), + ], + ) + @pytest.mark.usefixtures("make_dag_with_multiple_versions") + def test_should_respond_200_with_versions_new( + self, test_client, run_id, expected_version_number, session + ): response = test_client.get( f"/dags/dag_with_multiple_versions/dagRuns/{run_id}/taskInstances/task1/tries/0" ) @@ -2019,7 +2080,7 @@ def test_should_respond_200_with_versions(self, test_client, run_id, expected_ve "dag_id": "dag_with_multiple_versions", "bundle_name": "dag_maker", "bundle_version": f"some_commit_hash{expected_version_number}", - "bundle_url": f"fakeprotocol://test_host.github.com/tree/some_commit_hash{expected_version_number}/dags", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash/dags", "created_at": mock.ANY, "dag_display_name": "dag_with_multiple_versions", }, @@ -3065,7 +3126,63 @@ def test_raises_404_for_nonexistent_task_instance(self, test_client, session): ], ) @pytest.mark.usefixtures("make_dag_with_multiple_versions") - def test_should_respond_200_with_versions(self, test_client, run_id, expected_version_number): + @mock.patch("airflow.api_fastapi.core_api.datamodels.dag_versions.hasattr") + def test_should_respond_200_with_versions( + self, mock_hasattr, test_client, run_id, expected_version_number + ): + mock_hasattr.return_value = False + response = test_client.get( + f"/dags/dag_with_multiple_versions/dagRuns/{run_id}/taskInstances/task1/tries" + ) + assert response.status_code == 200 + + assert response.json()["task_instances"][0] == { + "task_id": "task1", + "dag_id": "dag_with_multiple_versions", + "dag_display_name": "dag_with_multiple_versions", + "dag_run_id": run_id, + "map_index": -1, + "start_date": None, + "end_date": mock.ANY, + "duration": None, + "state": mock.ANY, + "try_number": 0, + "max_tries": 0, + "task_display_name": "task1", + "hostname": "", + "unixname": getuser(), + "pool": "default_pool", + "pool_slots": 1, + "queue": "default", + "priority_weight": 1, + "operator": "EmptyOperator", + "queued_when": None, + "scheduled_when": None, + "pid": None, + "executor": None, + "executor_config": "{}", + "dag_version": { + "id": mock.ANY, + "version_number": expected_version_number, + "dag_id": "dag_with_multiple_versions", + "bundle_name": "dag_maker", + "bundle_version": f"some_commit_hash{expected_version_number}", + "bundle_url": f"http://test_host.github.com/tree/some_commit_hash{expected_version_number}/dags", + "created_at": mock.ANY, + "dag_display_name": "dag_with_multiple_versions", + }, + } + + @pytest.mark.parametrize( + "run_id, expected_version_number", + [ + ("run1", 1), + ("run2", 2), + ("run3", 3), + ], + ) + @pytest.mark.usefixtures("make_dag_with_multiple_versions") + def test_should_respond_200_with_versions_new(self, test_client, run_id, expected_version_number): response = test_client.get( f"/dags/dag_with_multiple_versions/dagRuns/{run_id}/taskInstances/task1/tries" ) @@ -3102,7 +3219,7 @@ def test_should_respond_200_with_versions(self, test_client, run_id, expected_ve "dag_id": "dag_with_multiple_versions", "bundle_name": "dag_maker", "bundle_version": f"some_commit_hash{expected_version_number}", - "bundle_url": f"fakeprotocol://test_host.github.com/tree/some_commit_hash{expected_version_number}/dags", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash/dags", "created_at": mock.ANY, "dag_display_name": "dag_with_multiple_versions", }, diff --git a/devel-common/src/tests_common/pytest_plugin.py b/devel-common/src/tests_common/pytest_plugin.py index e10bcd1d5bde9..56c8bf7e26894 100644 --- a/devel-common/src/tests_common/pytest_plugin.py +++ b/devel-common/src/tests_common/pytest_plugin.py @@ -1216,7 +1216,7 @@ def __call__( self.session.query(DagBundleModel).filter(DagBundleModel.name == self.bundle_name).count() == 0 ): - self.session.add(DagBundleModel(name=self.bundle_name)) + self.session.add(DagBundleModel(name=self.bundle_name, version=self.bundle_version)) self.session.commit() return self From bfd723a02852b1124a0533336b61d15110fa5ecf Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Mon, 7 Jul 2025 10:19:40 +0100 Subject: [PATCH 04/33] Refactor render_url to not depend on dagbundlemodel version --- .../core_api/datamodels/dag_versions.py | 2 +- .../airflow/dag_processing/bundles/manager.py | 1 - airflow-core/src/airflow/models/dagbundle.py | 5 +- .../tests/unit/api_fastapi/conftest.py | 6 +- .../routes/public/test_dag_versions.py | 203 ++++++++++++++++-- .../routes/public/test_task_instances.py | 10 +- .../bundles/test_dag_bundle_manager.py | 25 +-- 7 files changed, 208 insertions(+), 44 deletions(-) diff --git a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_versions.py b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_versions.py index d26f3a7208f0a..536b16e446a61 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_versions.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_versions.py @@ -52,7 +52,7 @@ def bundle_url(self) -> str | None: ) if bundle_model and hasattr(bundle_model, "url_template"): - return bundle_model.render_url() + return bundle_model.render_url(self.bundle_version) try: return DagBundlesManager().view_url(self.bundle_name, self.bundle_version) except ValueError: diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index 68fda4ad83069..35e024b94bf01 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -223,7 +223,6 @@ def sync_bundles_to_db(self, *, session: Session = NEW_SESSION) -> None: new_bundle.url_template = new_template new_bundle.template_params = new_params - new_bundle.version = bundle_instance.get_current_version() session.add(new_bundle) self.log.info("Added new DAG bundle %s to the database", name) diff --git a/airflow-core/src/airflow/models/dagbundle.py b/airflow-core/src/airflow/models/dagbundle.py index 71316cf186370..4eb2ff569e88f 100644 --- a/airflow-core/src/airflow/models/dagbundle.py +++ b/airflow-core/src/airflow/models/dagbundle.py @@ -48,6 +48,7 @@ class DagBundleModel(Base, LoggingMixin): template_params = Column(JSONType, nullable=True) def __init__(self, *, name: str, version: str | None = None): + super().__init__() self.name = name self.version = version @@ -73,7 +74,7 @@ def _unsign_url(self) -> str | None: except (BadSignature, Exception): return None - def render_url(self) -> str | None: + def render_url(self, version: str | None = None) -> str | None: """ Render the URL template with the given version and stored template parameters. @@ -92,7 +93,7 @@ def render_url(self) -> str | None: url_template = self.url_template params = dict(self.template_params or {}) - params["version"] = self.version + params["version"] = version try: return url_template.format(**params) diff --git a/airflow-core/tests/unit/api_fastapi/conftest.py b/airflow-core/tests/unit/api_fastapi/conftest.py index 1c14cc9783f7e..65340e87efc72 100644 --- a/airflow-core/tests/unit/api_fastapi/conftest.py +++ b/airflow-core/tests/unit/api_fastapi/conftest.py @@ -163,11 +163,7 @@ def make_dag_with_multiple_versions(dag_maker, configure_git_connection_for_dag_ """ dag_id = "dag_with_multiple_versions" for version_number in range(1, 4): - with dag_maker( - dag_id, - session=session, - bundle_version=f"some_commit_hash{version_number}", - ): + with dag_maker(dag_id, session=session, bundle_version=f"some_commit_hash{version_number}"): for task_number in range(version_number): EmptyOperator(task_id=f"task{task_number + 1}") dag_maker.create_dagrun( diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_versions.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_versions.py index bb363475b9442..1756209564a35 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_versions.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_versions.py @@ -22,7 +22,7 @@ from airflow.providers.standard.operators.empty import EmptyOperator -from tests_common.test_utils.db import clear_db_dags, clear_db_serialized_dags +from tests_common.test_utils.db import clear_db_dag_bundles, clear_db_dags, clear_db_serialized_dags pytestmark = pytest.mark.db_test @@ -32,6 +32,7 @@ class TestDagVersionEndpoint: def setup(request, dag_maker, session): clear_db_dags() clear_db_serialized_dags() + clear_db_dag_bundles() with dag_maker( dag_id="ANOTHER_DAG_ID", bundle_version="some_commit_hash", bundle_name="another_bundle_name" @@ -50,7 +51,7 @@ class TestGetDagVersion(TestDagVersionEndpoint): { "bundle_name": "another_bundle_name", "bundle_version": "some_commit_hash", - "bundle_url": "fakeprotocol://test_host.github.com/tree/some_commit_hash/dags", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash/dags", "created_at": mock.ANY, "dag_id": "ANOTHER_DAG_ID", "id": mock.ANY, @@ -64,7 +65,7 @@ class TestGetDagVersion(TestDagVersionEndpoint): { "bundle_name": "dag_maker", "bundle_version": "some_commit_hash1", - "bundle_url": "fakeprotocol://test_host.github.com/tree/some_commit_hash1/dags", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash1/dags", "created_at": mock.ANY, "dag_id": "dag_with_multiple_versions", "id": mock.ANY, @@ -78,7 +79,7 @@ class TestGetDagVersion(TestDagVersionEndpoint): { "bundle_name": "dag_maker", "bundle_version": "some_commit_hash2", - "bundle_url": "fakeprotocol://test_host.github.com/tree/some_commit_hash2/dags", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash2/dags", "created_at": mock.ANY, "dag_id": "dag_with_multiple_versions", "id": mock.ANY, @@ -92,7 +93,7 @@ class TestGetDagVersion(TestDagVersionEndpoint): { "bundle_name": "dag_maker", "bundle_version": "some_commit_hash3", - "bundle_url": "fakeprotocol://test_host.github.com/tree/some_commit_hash3/dags", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash3/dags", "created_at": mock.ANY, "dag_id": "dag_with_multiple_versions", "id": mock.ANY, @@ -103,15 +104,88 @@ class TestGetDagVersion(TestDagVersionEndpoint): ], ) @pytest.mark.usefixtures("make_dag_with_multiple_versions") - def test_get_dag_version(self, test_client, dag_id, dag_version, expected_response): + @mock.patch("airflow.api_fastapi.core_api.datamodels.dag_versions.hasattr") + def test_get_dag_version(self, mock_hasattr, test_client, dag_id, dag_version, expected_response): + mock_hasattr.return_value = False + response = test_client.get(f"/dags/{dag_id}/dagVersions/{dag_version}") + assert response.status_code == 200 + assert response.json() == expected_response + + @pytest.mark.parametrize( + "dag_id, dag_version, expected_response", + [ + [ + "ANOTHER_DAG_ID", + 1, + { + "bundle_name": "another_bundle_name", + "bundle_version": "some_commit_hash", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash/dags", + "created_at": mock.ANY, + "dag_id": "ANOTHER_DAG_ID", + "id": mock.ANY, + "version_number": 1, + "dag_display_name": "ANOTHER_DAG_ID", + }, + ], + [ + "dag_with_multiple_versions", + 1, + { + "bundle_name": "dag_maker", + "bundle_version": "some_commit_hash1", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash1/dags", + "created_at": mock.ANY, + "dag_id": "dag_with_multiple_versions", + "id": mock.ANY, + "version_number": 1, + "dag_display_name": "dag_with_multiple_versions", + }, + ], + [ + "dag_with_multiple_versions", + 2, + { + "bundle_name": "dag_maker", + "bundle_version": "some_commit_hash2", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash2/dags", + "created_at": mock.ANY, + "dag_id": "dag_with_multiple_versions", + "id": mock.ANY, + "version_number": 2, + "dag_display_name": "dag_with_multiple_versions", + }, + ], + [ + "dag_with_multiple_versions", + 3, + { + "bundle_name": "dag_maker", + "bundle_version": "some_commit_hash3", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash3/dags", + "created_at": mock.ANY, + "dag_id": "dag_with_multiple_versions", + "id": mock.ANY, + "version_number": 3, + "dag_display_name": "dag_with_multiple_versions", + }, + ], + ], + ) + @pytest.mark.usefixtures("make_dag_with_multiple_versions") + def test_get_dag_version_with_url_template(self, test_client, dag_id, dag_version, expected_response): response = test_client.get(f"/dags/{dag_id}/dagVersions/{dag_version}") assert response.status_code == 200 assert response.json() == expected_response @pytest.mark.usefixtures("make_dag_with_multiple_versions") @mock.patch("airflow.dag_processing.bundles.manager.DagBundlesManager.view_url") - def test_get_dag_version_with_unconfigured_bundle(self, mock_view_url, test_client, dag_maker, session): + @mock.patch("airflow.api_fastapi.core_api.datamodels.dag_versions.hasattr") + def test_get_dag_version_with_unconfigured_bundle( + self, mock_hasattr, mock_view_url, test_client, dag_maker, session + ): """Test that when a bundle is no longer configured, the bundle_url returns an error message.""" + mock_hasattr.return_value = False mock_view_url.side_effect = ValueError("Bundle not configured") response = test_client.get("/dags/dag_with_multiple_versions/dagVersions/1") @@ -149,7 +223,106 @@ class TestGetDagVersions(TestDagVersionEndpoint): { "bundle_name": "another_bundle_name", "bundle_version": "some_commit_hash", - "bundle_url": "fakeprotocol://test_host.github.com/tree/some_commit_hash/dags", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash/dags", + "created_at": mock.ANY, + "dag_id": "ANOTHER_DAG_ID", + "id": mock.ANY, + "version_number": 1, + "dag_display_name": "ANOTHER_DAG_ID", + }, + { + "bundle_name": "dag_maker", + "bundle_version": "some_commit_hash1", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash1/dags", + "created_at": mock.ANY, + "dag_id": "dag_with_multiple_versions", + "id": mock.ANY, + "version_number": 1, + "dag_display_name": "dag_with_multiple_versions", + }, + { + "bundle_name": "dag_maker", + "bundle_version": "some_commit_hash2", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash2/dags", + "created_at": mock.ANY, + "dag_id": "dag_with_multiple_versions", + "id": mock.ANY, + "version_number": 2, + "dag_display_name": "dag_with_multiple_versions", + }, + { + "bundle_name": "dag_maker", + "bundle_version": "some_commit_hash3", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash3/dags", + "created_at": mock.ANY, + "dag_id": "dag_with_multiple_versions", + "id": mock.ANY, + "version_number": 3, + "dag_display_name": "dag_with_multiple_versions", + }, + ], + "total_entries": 4, + }, + ], + [ + "dag_with_multiple_versions", + { + "dag_versions": [ + { + "bundle_name": "dag_maker", + "bundle_version": "some_commit_hash1", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash1/dags", + "created_at": mock.ANY, + "dag_id": "dag_with_multiple_versions", + "id": mock.ANY, + "version_number": 1, + "dag_display_name": "dag_with_multiple_versions", + }, + { + "bundle_name": "dag_maker", + "bundle_version": "some_commit_hash2", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash2/dags", + "created_at": mock.ANY, + "dag_id": "dag_with_multiple_versions", + "id": mock.ANY, + "version_number": 2, + "dag_display_name": "dag_with_multiple_versions", + }, + { + "bundle_name": "dag_maker", + "bundle_version": "some_commit_hash3", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash3/dags", + "created_at": mock.ANY, + "dag_id": "dag_with_multiple_versions", + "id": mock.ANY, + "version_number": 3, + "dag_display_name": "dag_with_multiple_versions", + }, + ], + "total_entries": 3, + }, + ], + ], + ) + @pytest.mark.usefixtures("make_dag_with_multiple_versions") + @mock.patch("airflow.api_fastapi.core_api.datamodels.dag_versions.hasattr") + def test_get_dag_versions(self, mock_hasattr, test_client, dag_id, expected_response): + mock_hasattr.return_value = False + response = test_client.get(f"/dags/{dag_id}/dagVersions") + assert response.status_code == 200 + assert response.json() == expected_response + + @pytest.mark.parametrize( + "dag_id, expected_response", + [ + [ + "~", + { + "dag_versions": [ + { + "bundle_name": "another_bundle_name", + "bundle_version": "some_commit_hash", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash/dags", "created_at": mock.ANY, "dag_id": "ANOTHER_DAG_ID", "id": mock.ANY, @@ -159,7 +332,7 @@ class TestGetDagVersions(TestDagVersionEndpoint): { "bundle_name": "dag_maker", "bundle_version": "some_commit_hash1", - "bundle_url": "fakeprotocol://test_host.github.com/tree/some_commit_hash1/dags", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash1/dags", "created_at": mock.ANY, "dag_id": "dag_with_multiple_versions", "id": mock.ANY, @@ -169,7 +342,7 @@ class TestGetDagVersions(TestDagVersionEndpoint): { "bundle_name": "dag_maker", "bundle_version": "some_commit_hash2", - "bundle_url": "fakeprotocol://test_host.github.com/tree/some_commit_hash2/dags", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash2/dags", "created_at": mock.ANY, "dag_id": "dag_with_multiple_versions", "id": mock.ANY, @@ -179,7 +352,7 @@ class TestGetDagVersions(TestDagVersionEndpoint): { "bundle_name": "dag_maker", "bundle_version": "some_commit_hash3", - "bundle_url": "fakeprotocol://test_host.github.com/tree/some_commit_hash3/dags", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash3/dags", "created_at": mock.ANY, "dag_id": "dag_with_multiple_versions", "id": mock.ANY, @@ -197,7 +370,7 @@ class TestGetDagVersions(TestDagVersionEndpoint): { "bundle_name": "dag_maker", "bundle_version": "some_commit_hash1", - "bundle_url": "fakeprotocol://test_host.github.com/tree/some_commit_hash1/dags", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash1/dags", "created_at": mock.ANY, "dag_id": "dag_with_multiple_versions", "id": mock.ANY, @@ -207,7 +380,7 @@ class TestGetDagVersions(TestDagVersionEndpoint): { "bundle_name": "dag_maker", "bundle_version": "some_commit_hash2", - "bundle_url": "fakeprotocol://test_host.github.com/tree/some_commit_hash2/dags", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash2/dags", "created_at": mock.ANY, "dag_id": "dag_with_multiple_versions", "id": mock.ANY, @@ -217,7 +390,7 @@ class TestGetDagVersions(TestDagVersionEndpoint): { "bundle_name": "dag_maker", "bundle_version": "some_commit_hash3", - "bundle_url": "fakeprotocol://test_host.github.com/tree/some_commit_hash3/dags", + "bundle_url": "http://test_host.github.com/tree/some_commit_hash3/dags", "created_at": mock.ANY, "dag_id": "dag_with_multiple_versions", "id": mock.ANY, @@ -231,7 +404,7 @@ class TestGetDagVersions(TestDagVersionEndpoint): ], ) @pytest.mark.usefixtures("make_dag_with_multiple_versions") - def test_get_dag_versions(self, test_client, dag_id, expected_response): + def test_get_dag_versions_with_url_template(self, test_client, dag_id, expected_response): response = test_client.get(f"/dags/{dag_id}/dagVersions") assert response.status_code == 200 assert response.json() == expected_response diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py index 0a93076cd22bd..8cd42df4299ee 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py @@ -2042,7 +2042,7 @@ def test_should_respond_200_with_versions( ], ) @pytest.mark.usefixtures("make_dag_with_multiple_versions") - def test_should_respond_200_with_versions_new( + def test_should_respond_200_with_versions_using_url_template( self, test_client, run_id, expected_version_number, session ): response = test_client.get( @@ -2080,7 +2080,7 @@ def test_should_respond_200_with_versions_new( "dag_id": "dag_with_multiple_versions", "bundle_name": "dag_maker", "bundle_version": f"some_commit_hash{expected_version_number}", - "bundle_url": "http://test_host.github.com/tree/some_commit_hash/dags", + "bundle_url": f"http://test_host.github.com/tree/some_commit_hash{expected_version_number}/dags", "created_at": mock.ANY, "dag_display_name": "dag_with_multiple_versions", }, @@ -3182,7 +3182,9 @@ def test_should_respond_200_with_versions( ], ) @pytest.mark.usefixtures("make_dag_with_multiple_versions") - def test_should_respond_200_with_versions_new(self, test_client, run_id, expected_version_number): + def test_should_respond_200_with_versions_using_url_template( + self, test_client, run_id, expected_version_number + ): response = test_client.get( f"/dags/dag_with_multiple_versions/dagRuns/{run_id}/taskInstances/task1/tries" ) @@ -3219,7 +3221,7 @@ def test_should_respond_200_with_versions_new(self, test_client, run_id, expecte "dag_id": "dag_with_multiple_versions", "bundle_name": "dag_maker", "bundle_version": f"some_commit_hash{expected_version_number}", - "bundle_url": "http://test_host.github.com/tree/some_commit_hash/dags", + "bundle_url": f"http://test_host.github.com/tree/some_commit_hash{expected_version_number}/dags", "created_at": mock.ANY, "dag_display_name": "dag_with_multiple_versions", }, diff --git a/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py b/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py index 8c5a639188219..dff8e546ec280 100644 --- a/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py +++ b/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py @@ -239,11 +239,11 @@ def test_sync_bundles_to_db_with_template(clear_db, session): # Check that the template and parameters were stored bundle_model = session.query(DagBundleModel).filter_by(name="template-bundle").first() - bundle_model.version = "v1.0" + session.merge(bundle_model) assert bundle_model is not None - assert bundle_model.render_url() == "https://github.com/example/repo/tree/v1.0/dags" + assert bundle_model.render_url(version="v1.0") == "https://github.com/example/repo/tree/v1.0/dags" assert bundle_model.template_params == {"subdir": "dags"} assert bundle_model.active is True @@ -258,14 +258,12 @@ def test_bundle_model_render_url(clear_db, session): manager = DagBundlesManager() manager.sync_bundles_to_db() bundle_model = session.query(DagBundleModel).filter_by(name="template-bundle").first() - bundle_model.version = "main" + session.merge(bundle_model) assert bundle_model is not None - url = bundle_model.render_url() + url = bundle_model.render_url(version="main") assert url == "https://github.com/example/repo/tree/main/dags" - bundle_model.version = None - session.merge(bundle_model) url = bundle_model.render_url() assert url == "https://github.com/example/repo/tree/None/dags" @@ -310,10 +308,7 @@ def test_template_params_update_on_sync(clear_db, session): url = bundle_model._unsign_url() assert url == "https://gitlab.com/example/repo/-/tree/{version}/{subdir}" assert bundle_model.template_params == {"subdir": "workflows"} - assert ( - bundle_model.render_url() - == f"https://gitlab.com/example/repo/-/tree/{bundle_model.version}/workflows" - ) + assert bundle_model.render_url(version="v1") == "https://gitlab.com/example/repo/-/tree/v1/workflows" @pytest.mark.db_test @@ -329,11 +324,9 @@ def test_template_update_on_sync(clear_db, session): # Verify initial template bundle_model = session.query(DagBundleModel).filter_by(name="template-bundle").first() - bundle_model.version = "v1.0" - session.merge(bundle_model) url = bundle_model._unsign_url() assert url == "https://github.com/example/repo/tree/{version}/{subdir}" - assert bundle_model.render_url() == f"https://github.com/example/repo/tree/{bundle_model.version}/dags" + assert bundle_model.render_url(version="v1") == "https://github.com/example/repo/tree/v1/dags" # Update the bundle config with a different template updated_config = [ @@ -358,17 +351,17 @@ def test_template_update_on_sync(clear_db, session): bundle_model = session.query(DagBundleModel).filter_by(name="template-bundle").first() url = bundle_model._unsign_url() assert url == "https://gitlab.com/example/repo/-/tree/{version}/{subdir}" - assert bundle_model.render_url() == f"https://gitlab.com/example/repo/-/tree/{bundle_model.version}/dags" + assert bundle_model.render_url("v1") == "https://gitlab.com/example/repo/-/tree/v1/dags" def test_dag_bundle_model_render_url_with_invalid_template(): """Test that DagBundleModel.render_url handles invalid templates gracefully.""" bundle_model = DagBundleModel(name="test-bundle") - bundle_model.url = "https://github.com/example/repo/tree/{invalid_placeholder}" + bundle_model.url_template = "https://github.com/example/repo/tree/{invalid_placeholder}" bundle_model.template_params = {"subdir": "dags"} # Should return the original template when rendering fails - url = bundle_model.render_url() + url = bundle_model.render_url("v1") assert url == "https://github.com/example/repo/tree/{invalid_placeholder}" From c22d3d111f5c6e5745014f736adcbfdbb5be2ce7 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Mon, 7 Jul 2025 12:30:54 +0100 Subject: [PATCH 05/33] Render view_url_template in view_url --- .../src/airflow/providers/git/bundles/git.py | 29 +----- .../git/tests/unit/git/bundles/test_git.py | 93 +++++++++++++++++++ 2 files changed, 96 insertions(+), 26 deletions(-) diff --git a/providers/git/src/airflow/providers/git/bundles/git.py b/providers/git/src/airflow/providers/git/bundles/git.py index 0e144382b323f..dbf3c0ec635ca 100644 --- a/providers/git/src/airflow/providers/git/bundles/git.py +++ b/providers/git/src/airflow/providers/git/bundles/git.py @@ -216,33 +216,10 @@ def _convert_git_ssh_url_to_https(url: str) -> str: def view_url(self, version: str | None = None) -> str | None: if not version: return None - url = self.repo_url - if not url: + template = self.view_url_template() + if not template: return None - if url.startswith("git@"): - url = self._convert_git_ssh_url_to_https(url) - if url.endswith(".git"): - url = url[:-4] - parsed_url = urlparse(url) - host = parsed_url.hostname - if not host: - return None - if parsed_url.username or parsed_url.password: - new_netloc = host - if parsed_url.port: - new_netloc += f":{parsed_url.port}" - url = parsed_url._replace(netloc=new_netloc).geturl() - host_patterns = { - "github.com": f"{url}/tree/{version}", - "gitlab.com": f"{url}/-/tree/{version}", - "bitbucket.org": f"{url}/src/{version}", - } - if self.subdir: - host_patterns = {k: f"{v}/{self.subdir}" for k, v in host_patterns.items()} - for allowed_host, template in host_patterns.items(): - if host == allowed_host or host.endswith(f".{allowed_host}"): - return template - return None + return template.format(version=version) def view_url_template(self) -> str | None: if self._view_url_template: diff --git a/providers/git/tests/unit/git/bundles/test_git.py b/providers/git/tests/unit/git/bundles/test_git.py index 1a97c726c8773..491f23cce4a0b 100644 --- a/providers/git/tests/unit/git/bundles/test_git.py +++ b/providers/git/tests/unit/git/bundles/test_git.py @@ -522,6 +522,99 @@ def test_view_url_subdir( assert view_url == expected_url bundle.initialize.assert_not_called() + @pytest.mark.parametrize( + "repo_url, extra_conn_kwargs, expected_url", + [ + ( + "git@github.com:apache/airflow.git", + None, + "https://github.com/apache/airflow/tree/{version}/subdir", + ), + ( + "git@github.com:apache/airflow", + None, + "https://github.com/apache/airflow/tree/{version}/subdir", + ), + ( + "https://github.com/apache/airflow", + None, + "https://github.com/apache/airflow/tree/{version}/subdir", + ), + ( + "https://github.com/apache/airflow.git", + None, + "https://github.com/apache/airflow/tree/{version}/subdir", + ), + ( + "git@gitlab.com:apache/airflow.git", + None, + "https://gitlab.com/apache/airflow/-/tree/{version}/subdir", + ), + ( + "git@bitbucket.org:apache/airflow.git", + None, + "https://bitbucket.org/apache/airflow/src/{version}/subdir", + ), + ( + "git@myorg.github.com:apache/airflow.git", + None, + "https://myorg.github.com/apache/airflow/tree/{version}/subdir", + ), + ( + "https://myorg.github.com/apache/airflow.git", + None, + "https://myorg.github.com/apache/airflow/tree/{version}/subdir", + ), + ( + "https://github.com/apache/airflow", + {"password": "abc123"}, + "https://github.com/apache/airflow/tree/{version}/subdir", + ), + ( + "https://github.com/apache/airflow", + {"login": "abc123"}, + "https://github.com/apache/airflow/tree/{version}/subdir", + ), + ( + "https://github.com/apache/airflow", + {"login": "abc123", "password": "def456"}, + "https://github.com/apache/airflow/tree/{version}/subdir", + ), + ( + "https://github.com:443/apache/airflow", + None, + "https://github.com:443/apache/airflow/tree/{version}/subdir", + ), + ( + "https://github.com:443/apache/airflow", + {"password": "abc123"}, + "https://github.com:443/apache/airflow/tree/{version}/subdir", + ), + ], + ) + @mock.patch("airflow.providers.git.bundles.git.Repo") + def test_view_url_template_subdir( + self, mock_gitrepo, repo_url, extra_conn_kwargs, expected_url, create_connection_without_db + ): + create_connection_without_db( + Connection( + conn_id="git_default", + host=repo_url, + conn_type="git", + **(extra_conn_kwargs or {}), + ) + ) + bundle = GitDagBundle( + name="test", + tracking_ref="main", + subdir="subdir", + git_conn_id="git_default", + ) + bundle.initialize = mock.MagicMock() + view_url_template = bundle.view_url_template() + assert view_url_template == expected_url + bundle.initialize.assert_not_called() + @mock.patch("airflow.providers.git.bundles.git.Repo") def test_view_url_returns_none_when_no_version_in_view_url(self, mock_gitrepo): bundle = GitDagBundle( From ad13791baef273b26ef2f8b9accf2fb8d2a916d5 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Mon, 7 Jul 2025 13:42:40 +0100 Subject: [PATCH 06/33] fixup! Render view_url_template in view_url --- .../core_api/routes/public/test_dags.py | 97 +++++++++++++++++++ .../unit/cli/commands/test_asset_command.py | 43 +++++++- 2 files changed, 139 insertions(+), 1 deletion(-) diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dags.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dags.py index 60302878dba90..55adcb3e0e4c2 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dags.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dags.py @@ -518,8 +518,10 @@ class TestDagDetails(TestDagEndpoint): ], ) @pytest.mark.usefixtures("configure_git_connection_for_dag_bundle") + @mock.patch("airflow.api_fastapi.core_api.datamodels.dag_versions.hasattr") def test_dag_details( self, + mock_hasattr, test_client, query_params, dag_id, @@ -528,6 +530,7 @@ def test_dag_details( start_date, owner_links, ): + mock_hasattr.return_value = False response = test_client.get(f"/dags/{dag_id}/details", params=query_params) assert response.status_code == expected_status_code if expected_status_code != 200: @@ -554,6 +557,7 @@ def test_dag_details( }, "description": None, "doc_md": "details", + "deadline": None, "end_date": None, "fileloc": __file__, "file_token": file_token, @@ -603,6 +607,99 @@ def test_dag_details( } assert res_json == expected + @pytest.mark.parametrize( + "query_params, dag_id, expected_status_code, dag_display_name, start_date, owner_links", + [ + ({}, "fake_dag_id", 404, "fake_dag", "2023-12-31T00:00:00Z", {}), + ({}, DAG2_ID, 200, DAG2_ID, "2021-06-15T00:00:00Z", {}), + ], + ) + @pytest.mark.usefixtures("configure_git_connection_for_dag_bundle") + def test_dag_details_with_view_url_template( + self, + test_client, + query_params, + dag_id, + expected_status_code, + dag_display_name, + start_date, + owner_links, + ): + response = test_client.get(f"/dags/{dag_id}/details", params=query_params) + assert response.status_code == expected_status_code + if expected_status_code != 200: + return + + # Match expected and actual responses below. + res_json = response.json() + last_parsed = res_json["last_parsed"] + last_parsed_time = res_json["last_parsed_time"] + file_token = res_json["file_token"] + expected = { + "bundle_name": "dag_maker", + "bundle_version": None, + "asset_expression": None, + "catchup": False, + "concurrency": 16, + "dag_id": dag_id, + "dag_display_name": dag_display_name, + "dag_run_timeout": None, + "default_args": { + "depends_on_past": False, + "retries": 1, + "retry_delay": "PT5M", + }, + "description": None, + "doc_md": "details", + "end_date": None, + "fileloc": __file__, + "file_token": file_token, + "has_import_errors": False, + "has_task_concurrency_limits": True, + "is_stale": False, + "is_paused": False, + "is_paused_upon_creation": None, + "latest_dag_version": { + "bundle_name": "dag_maker", + "bundle_url": "http://test_host.github.com/tree/None/dags", + "bundle_version": None, + "created_at": mock.ANY, + "dag_id": "test_dag2", + "id": mock.ANY, + "version_number": 1, + "dag_display_name": dag_display_name, + }, + "last_expired": None, + "last_parsed": last_parsed, + "last_parsed_time": last_parsed_time, + "max_active_runs": 16, + "max_active_tasks": 16, + "max_consecutive_failed_dag_runs": 0, + "next_dagrun_data_interval_end": None, + "next_dagrun_data_interval_start": None, + "next_dagrun_logical_date": None, + "next_dagrun_run_after": None, + "owners": ["airflow"], + "owner_links": {}, + "params": { + "foo": { + "__class": "airflow.sdk.definitions.param.Param", + "description": None, + "schema": {}, + "value": 1, + } + }, + "relative_fileloc": "test_dags.py", + "render_template_as_native_obj": False, + "timetable_summary": None, + "start_date": start_date, + "tags": [], + "template_search_path": None, + "timetable_description": "Never, external triggers only", + "timezone": UTC_JSON_REPR, + } + assert res_json == expected + def test_dag_details_should_response_401(self, unauthenticated_test_client): response = unauthenticated_test_client.get(f"/dags/{DAG1_ID}/details") assert response.status_code == 401 diff --git a/airflow-core/tests/unit/cli/commands/test_asset_command.py b/airflow-core/tests/unit/cli/commands/test_asset_command.py index 47de83d6c3e5a..88f2131575115 100644 --- a/airflow-core/tests/unit/cli/commands/test_asset_command.py +++ b/airflow-core/tests/unit/cli/commands/test_asset_command.py @@ -23,6 +23,7 @@ import json import os import typing +from unittest import mock import pytest @@ -119,7 +120,9 @@ def test_cli_assets_alias_details(parser: ArgumentParser) -> None: assert alias_detail_list[0] | undeterministic == undeterministic | {"name": "example-alias", "group": ""} -def test_cli_assets_materialize(parser: ArgumentParser) -> None: +@mock.patch("airflow.api_fastapi.core_api.datamodels.dag_versions.hasattr") +def test_cli_assets_materialize(mock_hasattr, parser: ArgumentParser) -> None: + mock_hasattr.return_value = False args = parser.parse_args(["assets", "materialize", "--name=asset1_producer", "--output=json"]) with contextlib.redirect_stdout(io.StringIO()) as temp_stdout: asset_command.asset_materialize(args) @@ -157,3 +160,41 @@ def test_cli_assets_materialize(parser: ArgumentParser) -> None: "triggering_user_name": "root", "run_after": "2025-02-12T19:27:59.066046Z", } + + +def test_cli_assets_materialize_with_view_url_template(parser: ArgumentParser) -> None: + args = parser.parse_args(["assets", "materialize", "--name=asset1_producer", "--output=json"]) + with contextlib.redirect_stdout(io.StringIO()) as temp_stdout: + asset_command.asset_materialize(args) + + output = temp_stdout.getvalue() + run_list = json.loads(output) + assert len(run_list) == 1 + + # No good way to statically compare these. + undeterministic: dict = { + "dag_run_id": None, + "dag_versions": [], + "data_interval_end": None, + "data_interval_start": None, + "logical_date": None, + "queued_at": None, + "run_after": "2025-02-12T19:27:59.066046Z", + } + + assert run_list[0] | undeterministic == undeterministic | { + "conf": {}, + "bundle_version": None, + "dag_display_name": "asset1_producer", + "dag_id": "asset1_producer", + "end_date": None, + "duration": None, + "last_scheduling_decision": None, + "note": None, + "run_type": "manual", + "start_date": None, + "state": "queued", + "triggered_by": "cli", + "triggering_user_name": "root", + "run_after": "2025-02-12T19:27:59.066046Z", + } From b07a89837a466ba5c07f2b3bc1b1a4eb482947f3 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Mon, 7 Jul 2025 14:13:39 +0100 Subject: [PATCH 07/33] Add deprecation warning and update s3 bundle with view_url_template --- .../airflow/providers/amazon/aws/bundles/s3.py | 16 ++++++++++++++-- .../git/src/airflow/providers/git/bundles/git.py | 8 +++++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py b/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py index 3752705c7aac2..f509da1e846a0 100644 --- a/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py +++ b/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py @@ -17,12 +17,13 @@ from __future__ import annotations import os +import warnings from pathlib import Path import structlog from airflow.dag_processing.bundles.base import BaseDagBundle -from airflow.exceptions import AirflowException +from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook from airflow.providers.amazon.aws.hooks.s3 import S3Hook @@ -137,10 +138,21 @@ def refresh(self) -> None: ) def view_url(self, version: str | None = None) -> str | None: + """Return a URL for viewing the DAGs in S3. Currently, versioning is not supported.""" + warnings.warn( + message="The method 'view_url' is deprecated and will be removed in a future release. Use 'view_url_template' instead.", + category=AirflowProviderDeprecationWarning, + stacklevel=2, + ) + + return self.view_url_template() + + def view_url_template(self) -> str | None: """Return a URL for viewing the DAGs in S3. Currently, versioning is not supported.""" if self.version: raise AirflowException("S3 url with version is not supported") - + if self._view_url_template: + return self._view_url_template # https://.s3..amazonaws.com/ url = f"https://{self.bucket_name}.s3" if self.s3_hook.region_name: diff --git a/providers/git/src/airflow/providers/git/bundles/git.py b/providers/git/src/airflow/providers/git/bundles/git.py index dbf3c0ec635ca..f10973c46d280 100644 --- a/providers/git/src/airflow/providers/git/bundles/git.py +++ b/providers/git/src/airflow/providers/git/bundles/git.py @@ -17,6 +17,7 @@ from __future__ import annotations import os +import warnings from contextlib import nullcontext from pathlib import Path from urllib.parse import urlparse @@ -26,7 +27,7 @@ from git.exc import BadName, GitCommandError, NoSuchPathError from airflow.dag_processing.bundles.base import BaseDagBundle -from airflow.exceptions import AirflowException +from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning from airflow.providers.git.hooks.git import GitHook log = structlog.get_logger(__name__) @@ -214,6 +215,11 @@ def _convert_git_ssh_url_to_https(url: str) -> str: return f"{domain}/{repo_path}" def view_url(self, version: str | None = None) -> str | None: + warnings.warn( + message="The method 'view_url' is deprecated and will be removed in a future release. Use 'view_url_template' instead.", + category=AirflowProviderDeprecationWarning, + stacklevel=2, + ) if not version: return None template = self.view_url_template() From 21f5b77027b355627b87f0a0e39ceb46f95444a3 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Mon, 7 Jul 2025 14:36:54 +0100 Subject: [PATCH 08/33] fixup! Add deprecation warning and update s3 bundle with view_url_template --- .../airflow/providers/amazon/aws/bundles/s3.py | 6 +++++- .../tests/unit/amazon/aws/bundles/test_s3.py | 18 +++++++++++++++--- .../src/airflow/providers/git/bundles/git.py | 5 +++++ .../git/tests/unit/git/bundles/test_git.py | 11 +++++++---- 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py b/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py index f509da1e846a0..7801f6997b957 100644 --- a/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py +++ b/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py @@ -138,7 +138,11 @@ def refresh(self) -> None: ) def view_url(self, version: str | None = None) -> str | None: - """Return a URL for viewing the DAGs in S3. Currently, versioning is not supported.""" + """ + Return a URL for viewing the DAGs in S3. Currently, versioning is not supported. + + This method is deprecated and will be removed in a future release. Use `view_url_template` instead. + """ warnings.warn( message="The method 'view_url' is deprecated and will be removed in a future release. Use 'view_url_template' instead.", category=AirflowProviderDeprecationWarning, diff --git a/providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py b/providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py index c22c585b24380..d34c1a30f17a0 100644 --- a/providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py +++ b/providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py @@ -24,7 +24,7 @@ from moto import mock_aws import airflow.version -from airflow.exceptions import AirflowException +from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning from airflow.models import Connection from airflow.providers.amazon.aws.hooks.s3 import S3Hook from airflow.utils import db @@ -109,7 +109,16 @@ def test_view_url_generates_presigned_url(self): bundle = S3DagBundle( name="test", aws_conn_id=AWS_CONN_ID_DEFAULT, prefix="project1/dags", bucket_name=S3_BUCKET_NAME ) - url: str = bundle.view_url("test_version") + with pytest.warns(AirflowProviderDeprecationWarning): + url: str = bundle.view_url("test_version") + assert url.startswith("https://my-airflow-dags-bucket.s3.amazonaws.com/project1/dags") + + @pytest.mark.db_test + def test_view_url_template_generates_presigned_url(self): + bundle = S3DagBundle( + name="test", aws_conn_id=AWS_CONN_ID_DEFAULT, prefix="project1/dags", bucket_name=S3_BUCKET_NAME + ) + url: str = bundle.view_url_template() assert url.startswith("https://my-airflow-dags-bucket.s3.amazonaws.com/project1/dags") @pytest.mark.db_test @@ -124,7 +133,10 @@ def test_supports_versioning(self): with pytest.raises(AirflowException, match="Refreshing a specific version is not supported"): bundle.refresh() - with pytest.raises(AirflowException, match="S3 url with version is not supported"): + with ( + pytest.raises(AirflowException, match="S3 url with version is not supported"), + pytest.warns(AirflowProviderDeprecationWarning), + ): bundle.view_url("test_version") @pytest.mark.db_test diff --git a/providers/git/src/airflow/providers/git/bundles/git.py b/providers/git/src/airflow/providers/git/bundles/git.py index f10973c46d280..ea78fbad01784 100644 --- a/providers/git/src/airflow/providers/git/bundles/git.py +++ b/providers/git/src/airflow/providers/git/bundles/git.py @@ -215,6 +215,11 @@ def _convert_git_ssh_url_to_https(url: str) -> str: return f"{domain}/{repo_path}" def view_url(self, version: str | None = None) -> str | None: + """ + Return a URL for viewing the DAGs in the repository. + + This method is deprecated and will be removed in a future release. Use `view_url_template` instead. + """ warnings.warn( message="The method 'view_url' is deprecated and will be removed in a future release. Use 'view_url_template' instead.", category=AirflowProviderDeprecationWarning, diff --git a/providers/git/tests/unit/git/bundles/test_git.py b/providers/git/tests/unit/git/bundles/test_git.py index 491f23cce4a0b..416bc1fc6fd33 100644 --- a/providers/git/tests/unit/git/bundles/test_git.py +++ b/providers/git/tests/unit/git/bundles/test_git.py @@ -29,7 +29,7 @@ from git.exc import GitCommandError, NoSuchPathError from airflow.dag_processing.bundles.base import get_bundle_storage_root_path -from airflow.exceptions import AirflowException +from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning from airflow.models import Connection from airflow.providers.git.bundles.git import GitDagBundle from airflow.providers.git.hooks.git import GitHook @@ -429,7 +429,8 @@ def test_view_url( tracking_ref="main", ) bundle.initialize = mock.MagicMock() - view_url = bundle.view_url("0f0f0f") + with pytest.warns(AirflowProviderDeprecationWarning): + view_url = bundle.view_url("0f0f0f") assert view_url == expected_url bundle.initialize.assert_not_called() @@ -518,7 +519,8 @@ def test_view_url_subdir( git_conn_id="git_default", ) bundle.initialize = mock.MagicMock() - view_url = bundle.view_url("0f0f0f") + with pytest.warns(AirflowProviderDeprecationWarning): + view_url = bundle.view_url("0f0f0f") assert view_url == expected_url bundle.initialize.assert_not_called() @@ -621,7 +623,8 @@ def test_view_url_returns_none_when_no_version_in_view_url(self, mock_gitrepo): name="test", tracking_ref="main", ) - view_url = bundle.view_url(None) + with pytest.warns(AirflowProviderDeprecationWarning): + view_url = bundle.view_url(None) assert view_url is None @mock.patch("airflow.providers.git.bundles.git.GitHook") From d0371149b665e1563e81112ab6728acd454e61d4 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Mon, 7 Jul 2025 15:25:32 +0100 Subject: [PATCH 09/33] Remove deprecation warning in provider's view_url --- .../src/airflow/providers/amazon/aws/bundles/s3.py | 9 +-------- .../amazon/tests/unit/amazon/aws/bundles/test_s3.py | 4 ++-- providers/git/src/airflow/providers/git/bundles/git.py | 8 +------- providers/git/tests/unit/git/bundles/test_git.py | 6 ++---- 4 files changed, 6 insertions(+), 21 deletions(-) diff --git a/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py b/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py index 7801f6997b957..8798c0471b033 100644 --- a/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py +++ b/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py @@ -17,13 +17,12 @@ from __future__ import annotations import os -import warnings from pathlib import Path import structlog from airflow.dag_processing.bundles.base import BaseDagBundle -from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning +from airflow.exceptions import AirflowException from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook from airflow.providers.amazon.aws.hooks.s3 import S3Hook @@ -143,12 +142,6 @@ def view_url(self, version: str | None = None) -> str | None: This method is deprecated and will be removed in a future release. Use `view_url_template` instead. """ - warnings.warn( - message="The method 'view_url' is deprecated and will be removed in a future release. Use 'view_url_template' instead.", - category=AirflowProviderDeprecationWarning, - stacklevel=2, - ) - return self.view_url_template() def view_url_template(self) -> str | None: diff --git a/providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py b/providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py index d34c1a30f17a0..bd42d6a5e63c3 100644 --- a/providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py +++ b/providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py @@ -109,8 +109,8 @@ def test_view_url_generates_presigned_url(self): bundle = S3DagBundle( name="test", aws_conn_id=AWS_CONN_ID_DEFAULT, prefix="project1/dags", bucket_name=S3_BUCKET_NAME ) - with pytest.warns(AirflowProviderDeprecationWarning): - url: str = bundle.view_url("test_version") + + url: str = bundle.view_url("test_version") assert url.startswith("https://my-airflow-dags-bucket.s3.amazonaws.com/project1/dags") @pytest.mark.db_test diff --git a/providers/git/src/airflow/providers/git/bundles/git.py b/providers/git/src/airflow/providers/git/bundles/git.py index ea78fbad01784..171b37f6b7122 100644 --- a/providers/git/src/airflow/providers/git/bundles/git.py +++ b/providers/git/src/airflow/providers/git/bundles/git.py @@ -17,7 +17,6 @@ from __future__ import annotations import os -import warnings from contextlib import nullcontext from pathlib import Path from urllib.parse import urlparse @@ -27,7 +26,7 @@ from git.exc import BadName, GitCommandError, NoSuchPathError from airflow.dag_processing.bundles.base import BaseDagBundle -from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning +from airflow.exceptions import AirflowException from airflow.providers.git.hooks.git import GitHook log = structlog.get_logger(__name__) @@ -220,11 +219,6 @@ def view_url(self, version: str | None = None) -> str | None: This method is deprecated and will be removed in a future release. Use `view_url_template` instead. """ - warnings.warn( - message="The method 'view_url' is deprecated and will be removed in a future release. Use 'view_url_template' instead.", - category=AirflowProviderDeprecationWarning, - stacklevel=2, - ) if not version: return None template = self.view_url_template() diff --git a/providers/git/tests/unit/git/bundles/test_git.py b/providers/git/tests/unit/git/bundles/test_git.py index 416bc1fc6fd33..25233624349e5 100644 --- a/providers/git/tests/unit/git/bundles/test_git.py +++ b/providers/git/tests/unit/git/bundles/test_git.py @@ -429,8 +429,7 @@ def test_view_url( tracking_ref="main", ) bundle.initialize = mock.MagicMock() - with pytest.warns(AirflowProviderDeprecationWarning): - view_url = bundle.view_url("0f0f0f") + view_url = bundle.view_url("0f0f0f") assert view_url == expected_url bundle.initialize.assert_not_called() @@ -519,8 +518,7 @@ def test_view_url_subdir( git_conn_id="git_default", ) bundle.initialize = mock.MagicMock() - with pytest.warns(AirflowProviderDeprecationWarning): - view_url = bundle.view_url("0f0f0f") + view_url = bundle.view_url("0f0f0f") assert view_url == expected_url bundle.initialize.assert_not_called() From d1495ac7b3c001a2737f605157ac7747d5ef1457 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Mon, 7 Jul 2025 15:55:16 +0100 Subject: [PATCH 10/33] fixup! Remove deprecation warning in provider's view_url --- providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py | 7 ++----- providers/git/tests/unit/git/bundles/test_git.py | 5 ++--- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py b/providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py index bd42d6a5e63c3..e07755eed783a 100644 --- a/providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py +++ b/providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py @@ -24,7 +24,7 @@ from moto import mock_aws import airflow.version -from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning +from airflow.exceptions import AirflowException from airflow.models import Connection from airflow.providers.amazon.aws.hooks.s3 import S3Hook from airflow.utils import db @@ -133,10 +133,7 @@ def test_supports_versioning(self): with pytest.raises(AirflowException, match="Refreshing a specific version is not supported"): bundle.refresh() - with ( - pytest.raises(AirflowException, match="S3 url with version is not supported"), - pytest.warns(AirflowProviderDeprecationWarning), - ): + with pytest.raises(AirflowException, match="S3 url with version is not supported"): bundle.view_url("test_version") @pytest.mark.db_test diff --git a/providers/git/tests/unit/git/bundles/test_git.py b/providers/git/tests/unit/git/bundles/test_git.py index 25233624349e5..491f23cce4a0b 100644 --- a/providers/git/tests/unit/git/bundles/test_git.py +++ b/providers/git/tests/unit/git/bundles/test_git.py @@ -29,7 +29,7 @@ from git.exc import GitCommandError, NoSuchPathError from airflow.dag_processing.bundles.base import get_bundle_storage_root_path -from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning +from airflow.exceptions import AirflowException from airflow.models import Connection from airflow.providers.git.bundles.git import GitDagBundle from airflow.providers.git.hooks.git import GitHook @@ -621,8 +621,7 @@ def test_view_url_returns_none_when_no_version_in_view_url(self, mock_gitrepo): name="test", tracking_ref="main", ) - with pytest.warns(AirflowProviderDeprecationWarning): - view_url = bundle.view_url(None) + view_url = bundle.view_url(None) assert view_url is None @mock.patch("airflow.providers.git.bundles.git.GitHook") From 1395fe3416a5227dcf8457dc8b21fef1a5741765 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Mon, 7 Jul 2025 16:50:37 +0100 Subject: [PATCH 11/33] fixup! fixup! Remove deprecation warning in provider's view_url --- devel-common/src/tests_common/pytest_plugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devel-common/src/tests_common/pytest_plugin.py b/devel-common/src/tests_common/pytest_plugin.py index 56c8bf7e26894..e10bcd1d5bde9 100644 --- a/devel-common/src/tests_common/pytest_plugin.py +++ b/devel-common/src/tests_common/pytest_plugin.py @@ -1216,7 +1216,7 @@ def __call__( self.session.query(DagBundleModel).filter(DagBundleModel.name == self.bundle_name).count() == 0 ): - self.session.add(DagBundleModel(name=self.bundle_name, version=self.bundle_version)) + self.session.add(DagBundleModel(name=self.bundle_name)) self.session.commit() return self From 5922d8f8dd1e88c1efe5fb25970eac3c44f27b7b Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Mon, 7 Jul 2025 20:46:04 +0100 Subject: [PATCH 12/33] Add backcompat for bundles vs airflow releases --- .../amazon/src/airflow/providers/amazon/aws/bundles/s3.py | 3 ++- providers/git/src/airflow/providers/git/bundles/git.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py b/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py index 8798c0471b033..de7912d27e0a5 100644 --- a/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py +++ b/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py @@ -148,7 +148,8 @@ def view_url_template(self) -> str | None: """Return a URL for viewing the DAGs in S3. Currently, versioning is not supported.""" if self.version: raise AirflowException("S3 url with version is not supported") - if self._view_url_template: + if hasattr(self, "_view_url_template") and self._view_url_template: + # Backward compatibility for released Airflow versions return self._view_url_template # https://.s3..amazonaws.com/ url = f"https://{self.bucket_name}.s3" diff --git a/providers/git/src/airflow/providers/git/bundles/git.py b/providers/git/src/airflow/providers/git/bundles/git.py index 171b37f6b7122..b2195c1a3e322 100644 --- a/providers/git/src/airflow/providers/git/bundles/git.py +++ b/providers/git/src/airflow/providers/git/bundles/git.py @@ -227,7 +227,8 @@ def view_url(self, version: str | None = None) -> str | None: return template.format(version=version) def view_url_template(self) -> str | None: - if self._view_url_template: + if hasattr(self, "_view_url_template") and self._view_url_template: + # Backward compatibility for released Airflow versions return self._view_url_template if not self.repo_url: From 83a481cfbe40bce157edc855be0b6f89d8796423 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Mon, 7 Jul 2025 21:03:20 +0100 Subject: [PATCH 13/33] fixup! Add backcompat for bundles vs airflow releases --- airflow-core/docs/img/airflow_erd.sha256 | 2 +- airflow-core/docs/migrations-ref.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow-core/docs/img/airflow_erd.sha256 b/airflow-core/docs/img/airflow_erd.sha256 index 1f03bd8c42947..c469465b3862a 100644 --- a/airflow-core/docs/img/airflow_erd.sha256 +++ b/airflow-core/docs/img/airflow_erd.sha256 @@ -1 +1 @@ -c5baad84b54b522b8a5a84509ef49580f34585f1289e4e1da6e6c9ee3898d25e \ No newline at end of file +bce80e41c4a877775704f1c4c6f8cd2bfdf0e8381f8cdd1af4ef4de112eb783c \ No newline at end of file diff --git a/airflow-core/docs/migrations-ref.rst b/airflow-core/docs/migrations-ref.rst index 650c52b4e3ebe..eaecf3df2d9ec 100644 --- a/airflow-core/docs/migrations-ref.rst +++ b/airflow-core/docs/migrations-ref.rst @@ -39,7 +39,7 @@ Here's the list of all the Database Migrations that are executed via when you ru +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | Revision ID | Revises ID | Airflow Version | Description | +=========================+==================+===================+==============================================================+ -| ``3bda03debd04`` (head) | ``ffdb0566c7c0`` | ``3.1.0`` | Add url and template params to DagBundleModel. | +| ``3bda03debd04`` (head) | ``ffdb0566c7c0`` | ``3.1.0`` | Add url template and template params to DagBundleModel. | +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | ``ffdb0566c7c0`` | ``66a7743fe20e`` | ``3.1.0`` | Add dag_favorite table. | +-------------------------+------------------+-------------------+--------------------------------------------------------------+ From 0cbddc77affdfff3fc34e53eb41b4120d4d9730a Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Tue, 8 Jul 2025 12:32:43 +0100 Subject: [PATCH 14/33] rename url_template to signed_url_template. Also return None when we can't unsign a url --- airflow-core/docs/img/airflow_erd.sha256 | 2 +- airflow-core/docs/img/airflow_erd.svg | 60 +++++++++---------- .../core_api/datamodels/dag_versions.py | 2 +- .../airflow/dag_processing/bundles/manager.py | 6 +- ..._and_template_params_to_dagbundle_model.py | 4 +- airflow-core/src/airflow/models/dagbundle.py | 12 ++-- .../bundles/test_dag_bundle_manager.py | 2 +- 7 files changed, 44 insertions(+), 44 deletions(-) diff --git a/airflow-core/docs/img/airflow_erd.sha256 b/airflow-core/docs/img/airflow_erd.sha256 index c469465b3862a..8a1c4f105a803 100644 --- a/airflow-core/docs/img/airflow_erd.sha256 +++ b/airflow-core/docs/img/airflow_erd.sha256 @@ -1 +1 @@ -bce80e41c4a877775704f1c4c6f8cd2bfdf0e8381f8cdd1af4ef4de112eb783c \ No newline at end of file +7f06e1a640dcd5093095333ad30c17ca7cbf842965f54a9cda1240ce2f68455f \ No newline at end of file diff --git a/airflow-core/docs/img/airflow_erd.svg b/airflow-core/docs/img/airflow_erd.svg index 39ec5fa929661..4a8eea1dbce18 100644 --- a/airflow-core/docs/img/airflow_erd.svg +++ b/airflow-core/docs/img/airflow_erd.svg @@ -1386,33 +1386,33 @@ dag_bundle - -dag_bundle - -name - - [VARCHAR(250)] - NOT NULL - -active - - [BOOLEAN] - -last_refreshed - - [TIMESTAMP] - -template_params - - [JSON] - -url_template - - [VARCHAR(200)] - -version - - [VARCHAR(200)] + +dag_bundle + +name + + [VARCHAR(250)] + NOT NULL + +active + + [BOOLEAN] + +last_refreshed + + [TIMESTAMP] + +signed_url_template + + [VARCHAR(200)] + +template_params + + [JSON] + +version + + [VARCHAR(200)] @@ -1527,9 +1527,9 @@ dag_bundle--dag - -0..N -{0,1} + +0..N +{0,1} diff --git a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_versions.py b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_versions.py index 536b16e446a61..77f420a734f72 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_versions.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_versions.py @@ -51,7 +51,7 @@ def bundle_url(self) -> str | None: select(DagBundleModel).where(DagBundleModel.name == self.bundle_name) ) - if bundle_model and hasattr(bundle_model, "url_template"): + if bundle_model and hasattr(bundle_model, "signed_url_template"): return bundle_model.render_url(self.bundle_version) try: return DagBundlesManager().view_url(self.bundle_name, self.bundle_version) diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index 35e024b94bf01..2ba700813243e 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -196,8 +196,8 @@ def sync_bundles_to_db(self, *, session: Session = NEW_SESSION) -> None: new_template = _sign_bundle_url(new_template, name) self.log.debug("Signed URL template for bundle %s", name) - if new_template != bundle.url_template: - bundle.url_template = new_template + if new_template != bundle.signed_url_template: + bundle.signed_url_template = new_template self.log.debug("Updated URL template for bundle %s", name) if new_params != bundle.template_params: bundle.template_params = new_params @@ -221,7 +221,7 @@ def sync_bundles_to_db(self, *, session: Session = NEW_SESSION) -> None: new_template = _sign_bundle_url(new_template, name) self.log.debug("Signed URL template for bundle %s", name) - new_bundle.url_template = new_template + new_bundle.signed_url_template = new_template new_bundle.template_params = new_params session.add(new_bundle) diff --git a/airflow-core/src/airflow/migrations/versions/0076_3_1_0_add_url_and_template_params_to_dagbundle_model.py b/airflow-core/src/airflow/migrations/versions/0076_3_1_0_add_url_and_template_params_to_dagbundle_model.py index 62fb7ce5ba12e..a5b3787c8758a 100644 --- a/airflow-core/src/airflow/migrations/versions/0076_3_1_0_add_url_and_template_params_to_dagbundle_model.py +++ b/airflow-core/src/airflow/migrations/versions/0076_3_1_0_add_url_and_template_params_to_dagbundle_model.py @@ -42,7 +42,7 @@ def upgrade(): """Apply Add url and template params to DagBundleModel.""" with op.batch_alter_table("dag_bundle", schema=None) as batch_op: - batch_op.add_column(sa.Column("url_template", sa.String(length=200), nullable=True)) + batch_op.add_column(sa.Column("signed_url_template", sa.String(length=200), nullable=True)) batch_op.add_column(sa.Column("template_params", JSONType(), nullable=True)) @@ -50,4 +50,4 @@ def downgrade(): """Unapply Add url and template params to DagBundleModel.""" with op.batch_alter_table("dag_bundle", schema=None) as batch_op: batch_op.drop_column("template_params") - batch_op.drop_column("url_template") + batch_op.drop_column("signed_url_template") diff --git a/airflow-core/src/airflow/models/dagbundle.py b/airflow-core/src/airflow/models/dagbundle.py index 4eb2ff569e88f..d83a90e765cb2 100644 --- a/airflow-core/src/airflow/models/dagbundle.py +++ b/airflow-core/src/airflow/models/dagbundle.py @@ -34,7 +34,7 @@ class DagBundleModel(Base, LoggingMixin): - active: Is the bundle currently found in configuration? - version: The latest version Airflow has seen for the bundle. - last_refreshed: When the bundle was last refreshed. - - url_template: Signed URL template for viewing the bundle + - signed_url_template: Signed URL template for viewing the bundle - template_params: JSON object containing template parameters (e.g., {"subdir": "dags"}) """ @@ -44,7 +44,7 @@ class DagBundleModel(Base, LoggingMixin): active = Column(Boolean, default=True) version = Column(String(200), nullable=True) last_refreshed = Column(UtcDateTime, nullable=True) - url_template = Column(String(200), nullable=True) + signed_url_template = Column(String(200), nullable=True) template_params = Column(JSONType, nullable=True) def __init__(self, *, name: str, version: str | None = None): @@ -65,7 +65,7 @@ def _unsign_url(self) -> str | None: from airflow.configuration import conf serializer = URLSafeSerializer(conf.get_mandatory_value("core", "fernet_key")) - payload = serializer.loads(self.url_template) + payload = serializer.loads(self.signed_url_template) if isinstance(payload, dict) and "url" in payload and "bundle_name" in payload: if payload["bundle_name"] == self.name: return payload["url"] @@ -84,13 +84,13 @@ def render_url(self, version: str | None = None) -> str | None: :param version: The version to substitute in the template :return: The rendered URL or None if no template is available """ - if not self.url_template: + if not self.signed_url_template: return None url_template = self._unsign_url() if url_template is None: - url_template = self.url_template + return None params = dict(self.template_params or {}) params["version"] = version @@ -99,4 +99,4 @@ def render_url(self, version: str | None = None) -> str | None: return url_template.format(**params) except (KeyError, ValueError) as e: self.log.warning("Failed to render URL template for bundle %s: %s", self.name, e) - return url_template + return None diff --git a/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py b/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py index dff8e546ec280..fe9c1a5fb3128 100644 --- a/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py +++ b/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py @@ -357,7 +357,7 @@ def test_template_update_on_sync(clear_db, session): def test_dag_bundle_model_render_url_with_invalid_template(): """Test that DagBundleModel.render_url handles invalid templates gracefully.""" bundle_model = DagBundleModel(name="test-bundle") - bundle_model.url_template = "https://github.com/example/repo/tree/{invalid_placeholder}" + bundle_model.signed_url_template = "https://github.com/example/repo/tree/{invalid_placeholder}" bundle_model.template_params = {"subdir": "dags"} # Should return the original template when rendering fails From 07bf1abe299bc7f9aa2ff597c8f13ea4a2dfb0e4 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Tue, 8 Jul 2025 13:06:01 +0100 Subject: [PATCH 15/33] refactor template signing --- .../airflow/dag_processing/bundles/manager.py | 38 +++++++++---------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index 2ba700813243e..3f7ddb1a24af3 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -174,6 +174,22 @@ def parse_config(self) -> None: @provide_session def sync_bundles_to_db(self, *, session: Session = NEW_SESSION) -> None: self.log.debug("Syncing DAG bundles to the database") + + def _signed_template(new_template_: str | None, bundle_name: str) -> str | None: + if new_template_: + if not _is_safe_bundle_url(new_template_): + self.log.warning( + "Bundle %s has unsafe URL template '%s', skipping URL update", + bundle_name, + new_template_, + ) + new_template_ = None + else: + # Sign the URL for integrity verification + new_template_ = _sign_bundle_url(new_template_, bundle_name) + self.log.debug("Signed URL template for bundle %s", bundle_name) + return new_template_ + stored = {b.name: b for b in session.query(DagBundleModel).all()} for name in self._bundle_config.keys(): @@ -185,16 +201,7 @@ def sync_bundles_to_db(self, *, session: Session = NEW_SESSION) -> None: new_params = self._extract_template_params(bundle_instance) # Validate and sign the URL before saving - if new_template: - if not _is_safe_bundle_url(new_template): - self.log.warning( - "Bundle %s has unsafe URL template '%s', skipping URL update", name, new_template - ) - new_template = None - else: - # Sign the URL for integrity verification - new_template = _sign_bundle_url(new_template, name) - self.log.debug("Signed URL template for bundle %s", name) + new_template = _signed_template(new_template, name) if new_template != bundle.signed_url_template: bundle.signed_url_template = new_template @@ -210,16 +217,7 @@ def sync_bundles_to_db(self, *, session: Session = NEW_SESSION) -> None: new_params = self._extract_template_params(bundle_instance) # Validate and sign the URL before saving - if new_template: - if not _is_safe_bundle_url(new_template): - self.log.warning( - "Bundle %s has unsafe URL template '%s', skipping URL", name, new_template - ) - new_template = None - else: - # Sign the URL for integrity verification - new_template = _sign_bundle_url(new_template, name) - self.log.debug("Signed URL template for bundle %s", name) + new_template = _signed_template(new_template, name) new_bundle.signed_url_template = new_template new_bundle.template_params = new_params From 815863200dbe3621f130a5b7704c9ca227a73f8c Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Tue, 8 Jul 2025 14:58:23 +0100 Subject: [PATCH 16/33] fixup! refactor template signing --- .../unit/dag_processing/bundles/test_dag_bundle_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py b/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py index fe9c1a5fb3128..fe54830f63ef5 100644 --- a/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py +++ b/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py @@ -360,9 +360,9 @@ def test_dag_bundle_model_render_url_with_invalid_template(): bundle_model.signed_url_template = "https://github.com/example/repo/tree/{invalid_placeholder}" bundle_model.template_params = {"subdir": "dags"} - # Should return the original template when rendering fails + # Should return None if rendering fails url = bundle_model.render_url("v1") - assert url == "https://github.com/example/repo/tree/{invalid_placeholder}" + assert url is None def test_example_dags_bundle_added(): From cfc3c276502864a847712b3ecfa1e04e43ae0807 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Wed, 9 Jul 2025 10:58:33 +0100 Subject: [PATCH 17/33] Fix templating and conflict --- airflow-core/docs/img/airflow_erd.sha256 | 2 +- airflow-core/docs/img/airflow_erd.svg | 3 ++- airflow-core/docs/migrations-ref.rst | 4 +++- .../airflow/dag_processing/bundles/base.py | 3 +++ .../airflow/dag_processing/bundles/manager.py | 22 +++++-------------- ...and_template_params_to_dagbundle_model.py} | 0 .../src/airflow/providers/git/bundles/git.py | 4 +--- 7 files changed, 16 insertions(+), 22 deletions(-) rename airflow-core/src/airflow/migrations/versions/{0076_3_1_0_add_url_and_template_params_to_dagbundle_model.py => 0077_3_1_0_add_url_and_template_params_to_dagbundle_model.py} (100%) diff --git a/airflow-core/docs/img/airflow_erd.sha256 b/airflow-core/docs/img/airflow_erd.sha256 index 8a1c4f105a803..2e8d8e048e006 100644 --- a/airflow-core/docs/img/airflow_erd.sha256 +++ b/airflow-core/docs/img/airflow_erd.sha256 @@ -1 +1 @@ -7f06e1a640dcd5093095333ad30c17ca7cbf842965f54a9cda1240ce2f68455f \ No newline at end of file +8333e4b99aa5a6db3ad844dfe34bfdb31a4829089b7dc2cdb32773a2e956d201 \ No newline at end of file diff --git a/airflow-core/docs/img/airflow_erd.svg b/airflow-core/docs/img/airflow_erd.svg index 4a8eea1dbce18..991d8ddcc5196 100644 --- a/airflow-core/docs/img/airflow_erd.svg +++ b/airflow-core/docs/img/airflow_erd.svg @@ -776,6 +776,7 @@ dag_version_id [UUID] + NOT NULL duration @@ -1820,7 +1821,7 @@ dag_version--task_instance 0..N -{0,1} +1 diff --git a/airflow-core/docs/migrations-ref.rst b/airflow-core/docs/migrations-ref.rst index eaecf3df2d9ec..bf77a5573bb94 100644 --- a/airflow-core/docs/migrations-ref.rst +++ b/airflow-core/docs/migrations-ref.rst @@ -39,7 +39,9 @@ Here's the list of all the Database Migrations that are executed via when you ru +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | Revision ID | Revises ID | Airflow Version | Description | +=========================+==================+===================+==============================================================+ -| ``3bda03debd04`` (head) | ``ffdb0566c7c0`` | ``3.1.0`` | Add url template and template params to DagBundleModel. | +| ``3bda03debd04`` (head) | ``5d3072c51bac`` | ``3.1.0`` | Add url template and template params to DagBundleModel. | ++-------------------------+------------------+-------------------+--------------------------------------------------------------+ +| ``5d3072c51bac`` | ``ffdb0566c7c0`` | ``3.1.0`` | Make dag_version_id non-nullable in TaskInstance. | +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | ``ffdb0566c7c0`` | ``66a7743fe20e`` | ``3.1.0`` | Add dag_favorite table. | +-------------------------+------------------+-------------------+--------------------------------------------------------------+ diff --git a/airflow-core/src/airflow/dag_processing/bundles/base.py b/airflow-core/src/airflow/dag_processing/bundles/base.py index 4a89924d57d4e..1d3836859afc6 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/base.py +++ b/airflow-core/src/airflow/dag_processing/bundles/base.py @@ -334,6 +334,9 @@ def view_url(self, version: str | None = None) -> str | None: ) return None + def _templated_url_fragment(self): + return "".join(f"/{{{item}}}" for item in self.template_fields) + def view_url_template(self) -> str | None: """ URL template to view the bundle on an external website. This is shown to users in the Airflow UI, allowing them to navigate to this url for more details about that version of the bundle. diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index 3f7ddb1a24af3..3c2f5e271574b 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -193,16 +193,14 @@ def _signed_template(new_template_: str | None, bundle_name: str) -> str | None: stored = {b.name: b for b in session.query(DagBundleModel).all()} for name in self._bundle_config.keys(): + # Update URL template and parameters if they've changed + bundle_instance = self.get_bundle(name) + new_template = bundle_instance.view_url_template() + new_params = self._extract_template_params(bundle_instance) + new_template = _signed_template(new_template, name) + if bundle := stored.pop(name, None): bundle.active = True - # Update URL template and parameters if they've changed - bundle_instance = self.get_bundle(name) - new_template = bundle_instance.view_url_template() - new_params = self._extract_template_params(bundle_instance) - - # Validate and sign the URL before saving - new_template = _signed_template(new_template, name) - if new_template != bundle.signed_url_template: bundle.signed_url_template = new_template self.log.debug("Updated URL template for bundle %s", name) @@ -211,14 +209,6 @@ def _signed_template(new_template_: str | None, bundle_name: str) -> str | None: self.log.debug("Updated template parameters for bundle %s", name) else: new_bundle = DagBundleModel(name=name) - # Set URL template and parameters for new bundle - bundle_instance = self.get_bundle(name) - new_template = bundle_instance.view_url_template() - new_params = self._extract_template_params(bundle_instance) - - # Validate and sign the URL before saving - new_template = _signed_template(new_template, name) - new_bundle.signed_url_template = new_template new_bundle.template_params = new_params diff --git a/airflow-core/src/airflow/migrations/versions/0076_3_1_0_add_url_and_template_params_to_dagbundle_model.py b/airflow-core/src/airflow/migrations/versions/0077_3_1_0_add_url_and_template_params_to_dagbundle_model.py similarity index 100% rename from airflow-core/src/airflow/migrations/versions/0076_3_1_0_add_url_and_template_params_to_dagbundle_model.py rename to airflow-core/src/airflow/migrations/versions/0077_3_1_0_add_url_and_template_params_to_dagbundle_model.py diff --git a/providers/git/src/airflow/providers/git/bundles/git.py b/providers/git/src/airflow/providers/git/bundles/git.py index b2195c1a3e322..73d62e20b0850 100644 --- a/providers/git/src/airflow/providers/git/bundles/git.py +++ b/providers/git/src/airflow/providers/git/bundles/git.py @@ -260,7 +260,5 @@ def view_url_template(self) -> str | None: # Add subdir placeholder if applicable for allowed_host, template in host_patterns.items(): if host == allowed_host or host.endswith(f".{allowed_host}"): - if self.subdir: - return f"{template}/{self.subdir}" - return template + return template + self._templated_url_fragment() return None From dec98c4388f89a9899922b702e9d78006352a5e6 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Wed, 9 Jul 2025 14:21:52 +0100 Subject: [PATCH 18/33] Fix backcompat & refactor template signing --- .../src/airflow/dag_processing/bundles/base.py | 4 ++-- .../airflow/dag_processing/bundles/manager.py | 15 +++++++-------- .../airflow/providers/amazon/aws/bundles/s3.py | 4 +++- .../git/src/airflow/providers/git/bundles/git.py | 16 ++++++++++++---- 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/airflow-core/src/airflow/dag_processing/bundles/base.py b/airflow-core/src/airflow/dag_processing/bundles/base.py index 1d3836859afc6..2ea94a6ad3e46 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/base.py +++ b/airflow-core/src/airflow/dag_processing/bundles/base.py @@ -217,10 +217,10 @@ def remove_stale_bundle_versions(self): This isn't really necessary on worker types that don't share storage with other processes. """ - log.info("checking for stale bundle versions locally") - from airflow.dag_processing.bundles.manager import DagBundlesManager + log.info("checking for stale bundle versions locally") + bundles = list(DagBundlesManager().get_all_dag_bundles()) for bundle in bundles: if not bundle.supports_versioning: diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index 3c2f5e271574b..c72490ab043e2 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -175,7 +175,10 @@ def parse_config(self) -> None: def sync_bundles_to_db(self, *, session: Session = NEW_SESSION) -> None: self.log.debug("Syncing DAG bundles to the database") - def _signed_template(new_template_: str | None, bundle_name: str) -> str | None: + def _extract_and_sign_template(bundle_name: str) -> tuple[str | None, dict]: + bundle_instance = self.get_bundle(name) + new_template_ = bundle_instance.view_url_template() + new_params_ = self._extract_template_params(bundle_instance) if new_template_: if not _is_safe_bundle_url(new_template_): self.log.warning( @@ -188,19 +191,14 @@ def _signed_template(new_template_: str | None, bundle_name: str) -> str | None: # Sign the URL for integrity verification new_template_ = _sign_bundle_url(new_template_, bundle_name) self.log.debug("Signed URL template for bundle %s", bundle_name) - return new_template_ + return new_template_, new_params_ stored = {b.name: b for b in session.query(DagBundleModel).all()} for name in self._bundle_config.keys(): - # Update URL template and parameters if they've changed - bundle_instance = self.get_bundle(name) - new_template = bundle_instance.view_url_template() - new_params = self._extract_template_params(bundle_instance) - new_template = _signed_template(new_template, name) - if bundle := stored.pop(name, None): bundle.active = True + new_template, new_params = _extract_and_sign_template(name) if new_template != bundle.signed_url_template: bundle.signed_url_template = new_template self.log.debug("Updated URL template for bundle %s", name) @@ -208,6 +206,7 @@ def _signed_template(new_template_: str | None, bundle_name: str) -> str | None: bundle.template_params = new_params self.log.debug("Updated template parameters for bundle %s", name) else: + new_template, new_params = _extract_and_sign_template(name) new_bundle = DagBundleModel(name=name) new_bundle.signed_url_template = new_template new_bundle.template_params = new_params diff --git a/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py b/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py index de7912d27e0a5..224442e4983af 100644 --- a/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py +++ b/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py @@ -149,7 +149,9 @@ def view_url_template(self) -> str | None: if self.version: raise AirflowException("S3 url with version is not supported") if hasattr(self, "_view_url_template") and self._view_url_template: - # Backward compatibility for released Airflow versions + # Because we use this method in the view_url method, we need to handle + # backward compatibility for Airflow versions that doesn't have the + # _view_url_template attribute. Should be removed when we drop support for Airflow 3.0 return self._view_url_template # https://.s3..amazonaws.com/ url = f"https://{self.bucket_name}.s3" diff --git a/providers/git/src/airflow/providers/git/bundles/git.py b/providers/git/src/airflow/providers/git/bundles/git.py index 73d62e20b0850..b41bf01be04b6 100644 --- a/providers/git/src/airflow/providers/git/bundles/git.py +++ b/providers/git/src/airflow/providers/git/bundles/git.py @@ -224,11 +224,13 @@ def view_url(self, version: str | None = None) -> str | None: template = self.view_url_template() if not template: return None - return template.format(version=version) + return template.format(version=version, subdir=self.subdir) def view_url_template(self) -> str | None: if hasattr(self, "_view_url_template") and self._view_url_template: - # Backward compatibility for released Airflow versions + # Because we use this method in the view_url method, we need to handle + # backward compatibility for Airflow versions that doesn't have the + # _view_url_template attribute. Should be removed when we drop support for Airflow 3.0 return self._view_url_template if not self.repo_url: @@ -257,8 +259,14 @@ def view_url_template(self) -> str | None: "bitbucket.org": f"{url}/src/{{version}}", } - # Add subdir placeholder if applicable for allowed_host, template in host_patterns.items(): if host == allowed_host or host.endswith(f".{allowed_host}"): - return template + self._templated_url_fragment() + # Because we use this method in the view_url method, we need to handle + # backward compatibility for Airflow versions that doesn't have the + # _templated_url_fragment attribute. Should be removed when we drop support for Airflow 3.0 + if hasattr(self, "_templated_url_fragment"): + return template + self._templated_url_fragment() + if self.subdir: + return f"{template}/{self.subdir}" + return template return None From 7d483882b77eb5dfc0e35f0c85bc1fb4b3ba1ee1 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Wed, 9 Jul 2025 16:31:53 +0100 Subject: [PATCH 19/33] fixup! Fix backcompat & refactor template signing --- providers/git/src/airflow/providers/git/bundles/git.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/providers/git/src/airflow/providers/git/bundles/git.py b/providers/git/src/airflow/providers/git/bundles/git.py index b41bf01be04b6..faa9587884689 100644 --- a/providers/git/src/airflow/providers/git/bundles/git.py +++ b/providers/git/src/airflow/providers/git/bundles/git.py @@ -224,6 +224,9 @@ def view_url(self, version: str | None = None) -> str | None: template = self.view_url_template() if not template: return None + if not self.subdir: + # remove {subdir} from the template if subdir is not set + template = template.replace("/{subdir}", "") return template.format(version=version, subdir=self.subdir) def view_url_template(self) -> str | None: From 8c636716ed9a1fcf271978b84d45939df7bff88d Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Wed, 9 Jul 2025 17:08:51 +0100 Subject: [PATCH 20/33] fixup! fixup! Fix backcompat & refactor template signing --- .../tests/unit/amazon/aws/bundles/test_s3.py | 1 - .../git/tests/unit/git/bundles/test_git.py | 26 +++++++++---------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py b/providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py index e07755eed783a..50750b44cc8c4 100644 --- a/providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py +++ b/providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py @@ -113,7 +113,6 @@ def test_view_url_generates_presigned_url(self): url: str = bundle.view_url("test_version") assert url.startswith("https://my-airflow-dags-bucket.s3.amazonaws.com/project1/dags") - @pytest.mark.db_test def test_view_url_template_generates_presigned_url(self): bundle = S3DagBundle( name="test", aws_conn_id=AWS_CONN_ID_DEFAULT, prefix="project1/dags", bucket_name=S3_BUCKET_NAME diff --git a/providers/git/tests/unit/git/bundles/test_git.py b/providers/git/tests/unit/git/bundles/test_git.py index 491f23cce4a0b..06684587dbf54 100644 --- a/providers/git/tests/unit/git/bundles/test_git.py +++ b/providers/git/tests/unit/git/bundles/test_git.py @@ -528,67 +528,67 @@ def test_view_url_subdir( ( "git@github.com:apache/airflow.git", None, - "https://github.com/apache/airflow/tree/{version}/subdir", + "https://github.com/apache/airflow/tree/{version}/{subdir}", ), ( "git@github.com:apache/airflow", None, - "https://github.com/apache/airflow/tree/{version}/subdir", + "https://github.com/apache/airflow/tree/{version}/{subdir}", ), ( "https://github.com/apache/airflow", None, - "https://github.com/apache/airflow/tree/{version}/subdir", + "https://github.com/apache/airflow/tree/{version}/{subdir}", ), ( "https://github.com/apache/airflow.git", None, - "https://github.com/apache/airflow/tree/{version}/subdir", + "https://github.com/apache/airflow/tree/{version}/{subdir}", ), ( "git@gitlab.com:apache/airflow.git", None, - "https://gitlab.com/apache/airflow/-/tree/{version}/subdir", + "https://gitlab.com/apache/airflow/-/tree/{version}/{subdir}", ), ( "git@bitbucket.org:apache/airflow.git", None, - "https://bitbucket.org/apache/airflow/src/{version}/subdir", + "https://bitbucket.org/apache/airflow/src/{version}/{subdir}", ), ( "git@myorg.github.com:apache/airflow.git", None, - "https://myorg.github.com/apache/airflow/tree/{version}/subdir", + "https://myorg.github.com/apache/airflow/tree/{version}/{subdir}", ), ( "https://myorg.github.com/apache/airflow.git", None, - "https://myorg.github.com/apache/airflow/tree/{version}/subdir", + "https://myorg.github.com/apache/airflow/tree/{version}/{subdir}", ), ( "https://github.com/apache/airflow", {"password": "abc123"}, - "https://github.com/apache/airflow/tree/{version}/subdir", + "https://github.com/apache/airflow/tree/{version}/{subdir}", ), ( "https://github.com/apache/airflow", {"login": "abc123"}, - "https://github.com/apache/airflow/tree/{version}/subdir", + "https://github.com/apache/airflow/tree/{version}/{subdir}", ), ( "https://github.com/apache/airflow", {"login": "abc123", "password": "def456"}, - "https://github.com/apache/airflow/tree/{version}/subdir", + "https://github.com/apache/airflow/tree/{version}/{subdir}", ), ( "https://github.com:443/apache/airflow", None, - "https://github.com:443/apache/airflow/tree/{version}/subdir", + "https://github.com:443/apache/airflow/tree/{version}/{subdir}", ), ( "https://github.com:443/apache/airflow", {"password": "abc123"}, - "https://github.com:443/apache/airflow/tree/{version}/subdir", + "https://github.com:443/apache/airflow/tree/{version}/{subdir}", ), ], ) From dcbffbb5ed1ec117569c36670b7735a4d5289abd Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Wed, 9 Jul 2025 19:07:57 +0100 Subject: [PATCH 21/33] skip some test if not airflow 3.1+ --- providers/git/tests/unit/git/bundles/test_git.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/providers/git/tests/unit/git/bundles/test_git.py b/providers/git/tests/unit/git/bundles/test_git.py index 06684587dbf54..cfe6ba6865554 100644 --- a/providers/git/tests/unit/git/bundles/test_git.py +++ b/providers/git/tests/unit/git/bundles/test_git.py @@ -35,6 +35,7 @@ from airflow.providers.git.hooks.git import GitHook from tests_common.test_utils.config import conf_vars +from tests_common.test_utils.version_compat import AIRFLOW_V_3_1_PLUS @pytest.fixture(autouse=True) @@ -522,6 +523,7 @@ def test_view_url_subdir( assert view_url == expected_url bundle.initialize.assert_not_called() + @pytest.mark.skipif(AIRFLOW_V_3_1_PLUS, reason="Airflow 3.0 does not support view_url_template") @pytest.mark.parametrize( "repo_url, extra_conn_kwargs, expected_url", [ From abff6ccb1c4099c5922ab005ab4d9b58ad78de90 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Thu, 10 Jul 2025 09:09:16 +0100 Subject: [PATCH 22/33] fixup! skip some test if not airflow 3.1+ --- providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py | 1 + providers/git/tests/unit/git/bundles/test_git.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py b/providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py index 50750b44cc8c4..e07755eed783a 100644 --- a/providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py +++ b/providers/amazon/tests/unit/amazon/aws/bundles/test_s3.py @@ -113,6 +113,7 @@ def test_view_url_generates_presigned_url(self): url: str = bundle.view_url("test_version") assert url.startswith("https://my-airflow-dags-bucket.s3.amazonaws.com/project1/dags") + @pytest.mark.db_test def test_view_url_template_generates_presigned_url(self): bundle = S3DagBundle( name="test", aws_conn_id=AWS_CONN_ID_DEFAULT, prefix="project1/dags", bucket_name=S3_BUCKET_NAME diff --git a/providers/git/tests/unit/git/bundles/test_git.py b/providers/git/tests/unit/git/bundles/test_git.py index cfe6ba6865554..40ff39accdd16 100644 --- a/providers/git/tests/unit/git/bundles/test_git.py +++ b/providers/git/tests/unit/git/bundles/test_git.py @@ -523,7 +523,7 @@ def test_view_url_subdir( assert view_url == expected_url bundle.initialize.assert_not_called() - @pytest.mark.skipif(AIRFLOW_V_3_1_PLUS, reason="Airflow 3.0 does not support view_url_template") + @pytest.mark.skipif(not AIRFLOW_V_3_1_PLUS, reason="Airflow 3.0 does not support view_url_template") @pytest.mark.parametrize( "repo_url, extra_conn_kwargs, expected_url", [ From 9ee12d1f11426cc4457d5ff8282e067c84953a96 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Fri, 11 Jul 2025 09:51:44 +0100 Subject: [PATCH 23/33] fixup! fixup! skip some test if not airflow 3.1+ --- .../src/airflow/api_fastapi/core_api/datamodels/dag_versions.py | 2 ++ airflow-core/src/airflow/models/dagbundle.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_versions.py b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_versions.py index 77f420a734f72..2475d49031fa1 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_versions.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_versions.py @@ -53,6 +53,8 @@ def bundle_url(self) -> str | None: if bundle_model and hasattr(bundle_model, "signed_url_template"): return bundle_model.render_url(self.bundle_version) + # fallback to the deprecated option if the bundle model does not have a signed_url_template + # attribute try: return DagBundlesManager().view_url(self.bundle_name, self.bundle_version) except ValueError: diff --git a/airflow-core/src/airflow/models/dagbundle.py b/airflow-core/src/airflow/models/dagbundle.py index d83a90e765cb2..f0343d9de7cd7 100644 --- a/airflow-core/src/airflow/models/dagbundle.py +++ b/airflow-core/src/airflow/models/dagbundle.py @@ -35,7 +35,7 @@ class DagBundleModel(Base, LoggingMixin): - version: The latest version Airflow has seen for the bundle. - last_refreshed: When the bundle was last refreshed. - signed_url_template: Signed URL template for viewing the bundle - - template_params: JSON object containing template parameters (e.g., {"subdir": "dags"}) + - template_params: JSON object containing template parameters for constructing view url (e.g., {"subdir": "dags"}) """ From af03c99bf513422d20f79a6f674d29a4146eedb3 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Fri, 11 Jul 2025 10:01:10 +0100 Subject: [PATCH 24/33] Resolve conflict --- airflow-core/docs/img/airflow_erd.sha256 | 2 +- airflow-core/docs/img/airflow_erd.svg | 1668 +++++++++-------- airflow-core/docs/migrations-ref.rst | 4 +- ...and_template_params_to_dagbundle_model.py} | 0 4 files changed, 869 insertions(+), 805 deletions(-) rename airflow-core/src/airflow/migrations/versions/{0077_3_1_0_add_url_and_template_params_to_dagbundle_model.py => 0078_3_1_0_add_url_and_template_params_to_dagbundle_model.py} (100%) diff --git a/airflow-core/docs/img/airflow_erd.sha256 b/airflow-core/docs/img/airflow_erd.sha256 index 2e8d8e048e006..8bdc9f924785a 100644 --- a/airflow-core/docs/img/airflow_erd.sha256 +++ b/airflow-core/docs/img/airflow_erd.sha256 @@ -1 +1 @@ -8333e4b99aa5a6db3ad844dfe34bfdb31a4829089b7dc2cdb32773a2e956d201 \ No newline at end of file +eb323215a352312d87d9f2b44006e43d8d5493f1c372575b762dae09da3aad72 \ No newline at end of file diff --git a/airflow-core/docs/img/airflow_erd.svg b/airflow-core/docs/img/airflow_erd.svg index 991d8ddcc5196..07f0909aff6fe 100644 --- a/airflow-core/docs/img/airflow_erd.svg +++ b/airflow-core/docs/img/airflow_erd.svg @@ -4,11 +4,11 @@ - - + + %3 - + dag_priority_parsing_request @@ -692,24 +692,24 @@ dagrun_asset_event - -dagrun_asset_event - -dag_run_id - - [INTEGER] - NOT NULL - -event_id - - [INTEGER] - NOT NULL + +dagrun_asset_event + +dag_run_id + + [INTEGER] + NOT NULL + +event_id + + [INTEGER] + NOT NULL asset_event--dagrun_asset_event - -0..N + +0..N 1 @@ -752,637 +752,699 @@ task_instance - -task_instance - -id - - [UUID] - NOT NULL - -context_carrier - - [JSONB] - -custom_operator_name - - [VARCHAR(1000)] - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_version_id - - [UUID] - NOT NULL - -duration - - [DOUBLE_PRECISION] - -end_date - - [TIMESTAMP] - -executor - - [VARCHAR(1000)] - -executor_config - - [BYTEA] - -external_executor_id - - [VARCHAR(250)] - -hostname - - [VARCHAR(1000)] - -last_heartbeat_at - - [TIMESTAMP] - -map_index - - [INTEGER] - NOT NULL - -max_tries - - [INTEGER] - -next_kwargs - - [JSONB] - -next_method - - [VARCHAR(1000)] - -operator - - [VARCHAR(1000)] - -pid - - [INTEGER] - -pool - - [VARCHAR(256)] - NOT NULL - -pool_slots - - [INTEGER] - NOT NULL - -priority_weight - - [INTEGER] - -queue - - [VARCHAR(256)] - -queued_by_job_id - - [INTEGER] - -queued_dttm - - [TIMESTAMP] - -rendered_map_index - - [VARCHAR(250)] - -run_id - - [VARCHAR(250)] - NOT NULL - -scheduled_dttm - - [TIMESTAMP] - -span_status - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(20)] - -task_display_name - - [VARCHAR(2000)] - -task_id - - [VARCHAR(250)] - NOT NULL - -trigger_id - - [INTEGER] - -trigger_timeout - - [TIMESTAMP] - -try_number - - [INTEGER] - -unixname - - [VARCHAR(1000)] - -updated_at - - [TIMESTAMP] + +task_instance + +id + + [UUID] + NOT NULL + +context_carrier + + [JSONB] + +custom_operator_name + + [VARCHAR(1000)] + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_version_id + + [UUID] + NOT NULL + +duration + + [DOUBLE_PRECISION] + +end_date + + [TIMESTAMP] + +executor + + [VARCHAR(1000)] + +executor_config + + [BYTEA] + +external_executor_id + + [VARCHAR(250)] + +hostname + + [VARCHAR(1000)] + +last_heartbeat_at + + [TIMESTAMP] + +map_index + + [INTEGER] + NOT NULL + +max_tries + + [INTEGER] + +next_kwargs + + [JSONB] + +next_method + + [VARCHAR(1000)] + +operator + + [VARCHAR(1000)] + +pid + + [INTEGER] + +pool + + [VARCHAR(256)] + NOT NULL + +pool_slots + + [INTEGER] + NOT NULL + +priority_weight + + [INTEGER] + +queue + + [VARCHAR(256)] + +queued_by_job_id + + [INTEGER] + +queued_dttm + + [TIMESTAMP] + +rendered_map_index + + [VARCHAR(250)] + +run_id + + [VARCHAR(250)] + NOT NULL + +scheduled_dttm + + [TIMESTAMP] + +span_status + + [VARCHAR(250)] + NOT NULL + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(20)] + +task_display_name + + [VARCHAR(2000)] + +task_id + + [VARCHAR(250)] + NOT NULL + +trigger_id + + [INTEGER] + +trigger_timeout + + [TIMESTAMP] + +try_number + + [INTEGER] + +unixname + + [VARCHAR(1000)] + +updated_at + + [TIMESTAMP] trigger--task_instance - -0..N + +0..N {0,1} - + -task_map - -task_map - -dag_id - - [VARCHAR(250)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -keys - - [JSONB] - -length - - [INTEGER] - NOT NULL - - +hitl_detail + +hitl_detail + +ti_id + + [UUID] + NOT NULL + +body + + [TEXT] + +chosen_options + + [JSON] + +defaults + + [JSON] + +multiple + + [BOOLEAN] + +options + + [JSON] + NOT NULL + +params + + [JSON] + NOT NULL + +params_input + + [JSON] + NOT NULL + +response_at + + [TIMESTAMP] + +subject + + [TEXT] + NOT NULL + +user_id + + [VARCHAR(128)] + + -task_instance--task_map - -0..N -1 +task_instance--hitl_detail + +1 +1 + + + +task_map + +task_map + +dag_id + + [VARCHAR(250)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +keys + + [JSONB] + +length + + [INTEGER] + NOT NULL task_instance--task_map - -0..N -1 + +0..N +1 task_instance--task_map - -0..N -1 + +0..N +1 task_instance--task_map - -0..N -1 + +0..N +1 + + + +task_instance--task_map + +0..N +1 - + task_reschedule - -task_reschedule - -id - - [INTEGER] - NOT NULL - -duration - - [INTEGER] - NOT NULL - -end_date - - [TIMESTAMP] - NOT NULL - -reschedule_date - - [TIMESTAMP] - NOT NULL - -start_date - - [TIMESTAMP] - NOT NULL - -ti_id - - [UUID] - NOT NULL + +task_reschedule + +id + + [INTEGER] + NOT NULL + +duration + + [INTEGER] + NOT NULL + +end_date + + [TIMESTAMP] + NOT NULL + +reschedule_date + + [TIMESTAMP] + NOT NULL + +start_date + + [TIMESTAMP] + NOT NULL + +ti_id + + [UUID] + NOT NULL - + task_instance--task_reschedule - -0..N -1 + +0..N +1 - + xcom - -xcom - -dag_run_id - - [INTEGER] - NOT NULL - -key - - [VARCHAR(512)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -timestamp - - [TIMESTAMP] - NOT NULL - -value - - [JSONB] - - - -task_instance--xcom - -0..N -1 + +xcom + +dag_run_id + + [INTEGER] + NOT NULL + +key + + [VARCHAR(512)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +timestamp + + [TIMESTAMP] + NOT NULL + +value + + [JSONB] task_instance--xcom - -0..N -1 + +0..N +1 task_instance--xcom - -0..N -1 + +0..N +1 task_instance--xcom - -0..N -1 + +0..N +1 + + + +task_instance--xcom + +0..N +1 - + task_instance_note - -task_instance_note - -ti_id - - [UUID] - NOT NULL - -content - - [VARCHAR(1000)] - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL - -user_id - - [VARCHAR(128)] + +task_instance_note + +ti_id + + [UUID] + NOT NULL + +content + + [VARCHAR(1000)] + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL + +user_id + + [VARCHAR(128)] - + task_instance--task_instance_note - -1 -1 + +1 +1 - + task_instance_history - -task_instance_history - -task_instance_id - - [UUID] - NOT NULL - -context_carrier - - [JSONB] - -custom_operator_name - - [VARCHAR(1000)] - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_version_id - - [UUID] - -duration - - [DOUBLE_PRECISION] - -end_date - - [TIMESTAMP] - -executor - - [VARCHAR(1000)] - -executor_config - - [BYTEA] - -external_executor_id - - [VARCHAR(250)] - -hostname - - [VARCHAR(1000)] - -map_index - - [INTEGER] - NOT NULL - -max_tries - - [INTEGER] - -next_kwargs - - [JSONB] - -next_method - - [VARCHAR(1000)] - -operator - - [VARCHAR(1000)] - -pid - - [INTEGER] - -pool - - [VARCHAR(256)] - NOT NULL - -pool_slots - - [INTEGER] - NOT NULL - -priority_weight - - [INTEGER] - -queue - - [VARCHAR(256)] - -queued_by_job_id - - [INTEGER] - -queued_dttm - - [TIMESTAMP] - -rendered_map_index - - [VARCHAR(250)] - -run_id - - [VARCHAR(250)] - NOT NULL - -scheduled_dttm - - [TIMESTAMP] - -span_status - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(20)] - -task_display_name - - [VARCHAR(2000)] - -task_id - - [VARCHAR(250)] - NOT NULL - -trigger_id - - [INTEGER] - -trigger_timeout - - [TIMESTAMP] - -try_number - - [INTEGER] - NOT NULL - -unixname - - [VARCHAR(1000)] - -updated_at - - [TIMESTAMP] - - - -task_instance--task_instance_history - -0..N -1 + +task_instance_history + +task_instance_id + + [UUID] + NOT NULL + +context_carrier + + [JSONB] + +custom_operator_name + + [VARCHAR(1000)] + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_version_id + + [UUID] + +duration + + [DOUBLE_PRECISION] + +end_date + + [TIMESTAMP] + +executor + + [VARCHAR(1000)] + +executor_config + + [BYTEA] + +external_executor_id + + [VARCHAR(250)] + +hostname + + [VARCHAR(1000)] + +map_index + + [INTEGER] + NOT NULL + +max_tries + + [INTEGER] + +next_kwargs + + [JSONB] + +next_method + + [VARCHAR(1000)] + +operator + + [VARCHAR(1000)] + +pid + + [INTEGER] + +pool + + [VARCHAR(256)] + NOT NULL + +pool_slots + + [INTEGER] + NOT NULL + +priority_weight + + [INTEGER] + +queue + + [VARCHAR(256)] + +queued_by_job_id + + [INTEGER] + +queued_dttm + + [TIMESTAMP] + +rendered_map_index + + [VARCHAR(250)] + +run_id + + [VARCHAR(250)] + NOT NULL + +scheduled_dttm + + [TIMESTAMP] + +span_status + + [VARCHAR(250)] + NOT NULL + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(20)] + +task_display_name + + [VARCHAR(2000)] + +task_id + + [VARCHAR(250)] + NOT NULL + +trigger_id + + [INTEGER] + +trigger_timeout + + [TIMESTAMP] + +try_number + + [INTEGER] + NOT NULL + +unixname + + [VARCHAR(1000)] + +updated_at + + [TIMESTAMP] task_instance--task_instance_history - -0..N -1 + +0..N +1 task_instance--task_instance_history - -0..N -1 + +0..N +1 task_instance--task_instance_history - -0..N -1 + +0..N +1 + + + +task_instance--task_instance_history + +0..N +1 - + rendered_task_instance_fields - -rendered_task_instance_fields - -dag_id - - [VARCHAR(250)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -k8s_pod_yaml - - [JSON] - -rendered_fields - - [JSON] - NOT NULL - - - -task_instance--rendered_task_instance_fields - -0..N -1 + +rendered_task_instance_fields + +dag_id + + [VARCHAR(250)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +k8s_pod_yaml + + [JSON] + +rendered_fields + + [JSON] + NOT NULL task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 + + + +task_instance--rendered_task_instance_fields + +0..N +1 @@ -1779,173 +1841,173 @@ deadline - -deadline - -id - - [UUID] - NOT NULL - -callback - - [VARCHAR(500)] - NOT NULL - -callback_kwargs - - [JSON] - -dag_id - - [VARCHAR(250)] - -dagrun_id - - [INTEGER] - -deadline_time - - [TIMESTAMP] - NOT NULL + +deadline + +id + + [UUID] + NOT NULL + +callback + + [VARCHAR(500)] + NOT NULL + +callback_kwargs + + [JSON] + +dag_id + + [VARCHAR(250)] + +dagrun_id + + [INTEGER] + +deadline_time + + [TIMESTAMP] + NOT NULL dag--deadline - -0..N + +0..N {0,1} dag_version--task_instance - -0..N + +0..N 1 dag_run - -dag_run - -id - - [INTEGER] - NOT NULL - -backfill_id - - [INTEGER] - -bundle_version - - [VARCHAR(250)] - -clear_number - - [INTEGER] - NOT NULL - -conf - - [JSONB] - -context_carrier - - [JSONB] - -created_dag_version_id - - [UUID] - -creating_job_id - - [INTEGER] - -dag_id - - [VARCHAR(250)] - NOT NULL - -data_interval_end - - [TIMESTAMP] - -data_interval_start - - [TIMESTAMP] - -end_date - - [TIMESTAMP] - -last_scheduling_decision - - [TIMESTAMP] - -log_template_id - - [INTEGER] - -logical_date - - [TIMESTAMP] - -queued_at - - [TIMESTAMP] - -run_after - - [TIMESTAMP] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -run_type - - [VARCHAR(50)] - NOT NULL - -scheduled_by_job_id - - [INTEGER] - -span_status - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(50)] - -triggered_by - - [VARCHAR(50)] - -triggering_user_name - - [VARCHAR(512)] - -updated_at - - [TIMESTAMP] + +dag_run + +id + + [INTEGER] + NOT NULL + +backfill_id + + [INTEGER] + +bundle_version + + [VARCHAR(250)] + +clear_number + + [INTEGER] + NOT NULL + +conf + + [JSONB] + +context_carrier + + [JSONB] + +created_dag_version_id + + [UUID] + +creating_job_id + + [INTEGER] + +dag_id + + [VARCHAR(250)] + NOT NULL + +data_interval_end + + [TIMESTAMP] + +data_interval_start + + [TIMESTAMP] + +end_date + + [TIMESTAMP] + +last_scheduling_decision + + [TIMESTAMP] + +log_template_id + + [INTEGER] + +logical_date + + [TIMESTAMP] + +queued_at + + [TIMESTAMP] + +run_after + + [TIMESTAMP] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +run_type + + [VARCHAR(50)] + NOT NULL + +scheduled_by_job_id + + [INTEGER] + +span_status + + [VARCHAR(250)] + NOT NULL + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(50)] + +triggered_by + + [VARCHAR(50)] + +triggering_user_name + + [VARCHAR(512)] + +updated_at + + [TIMESTAMP] dag_version--dag_run - -0..N -{0,1} + +0..N +{0,1} @@ -2054,107 +2116,107 @@ dag_run--dagrun_asset_event - -0..N -1 + +0..N +1 dag_run--task_instance - -0..N -1 + +0..N +1 dag_run--task_instance - -0..N -1 + +0..N +1 dag_run--deadline - -0..N -{0,1} + +0..N +{0,1} backfill_dag_run - -backfill_dag_run - -id - - [INTEGER] - NOT NULL - -backfill_id - - [INTEGER] - NOT NULL - -dag_run_id - - [INTEGER] - -exception_reason - - [VARCHAR(250)] - -logical_date - - [TIMESTAMP] - NOT NULL - -sort_ordinal - - [INTEGER] - NOT NULL + +backfill_dag_run + +id + + [INTEGER] + NOT NULL + +backfill_id + + [INTEGER] + NOT NULL + +dag_run_id + + [INTEGER] + +exception_reason + + [VARCHAR(250)] + +logical_date + + [TIMESTAMP] + NOT NULL + +sort_ordinal + + [INTEGER] + NOT NULL dag_run--backfill_dag_run - -0..N -{0,1} + +0..N +{0,1} dag_run_note - -dag_run_note - -dag_run_id - - [INTEGER] - NOT NULL - -content - - [VARCHAR(1000)] - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL - -user_id - - [VARCHAR(128)] + +dag_run_note + +dag_run_id + + [INTEGER] + NOT NULL + +content + + [VARCHAR(1000)] + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL + +user_id + + [VARCHAR(128)] dag_run--dag_run_note - -1 -1 + +1 +1 @@ -2185,9 +2247,9 @@ log_template--dag_run - -0..N -{0,1} + +0..N +{0,1} @@ -2255,19 +2317,19 @@ backfill--dag_run - -0..N -{0,1} + +0..N +{0,1} backfill--backfill_dag_run - -0..N + +0..N 1 - + alembic_version alembic_version diff --git a/airflow-core/docs/migrations-ref.rst b/airflow-core/docs/migrations-ref.rst index bf77a5573bb94..28100bbcd1f00 100644 --- a/airflow-core/docs/migrations-ref.rst +++ b/airflow-core/docs/migrations-ref.rst @@ -39,7 +39,9 @@ Here's the list of all the Database Migrations that are executed via when you ru +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | Revision ID | Revises ID | Airflow Version | Description | +=========================+==================+===================+==============================================================+ -| ``3bda03debd04`` (head) | ``5d3072c51bac`` | ``3.1.0`` | Add url template and template params to DagBundleModel. | +| ``3bda03debd04`` (head) | ``40f7c30a228b`` | ``3.1.0`` | Add url template and template params to DagBundleModel. | ++-------------------------+------------------+-------------------+--------------------------------------------------------------+ +| ``40f7c30a228b`` | ``5d3072c51bac`` | ``3.1.0`` | Add Human In the Loop Detail table. | +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | ``5d3072c51bac`` | ``ffdb0566c7c0`` | ``3.1.0`` | Make dag_version_id non-nullable in TaskInstance. | +-------------------------+------------------+-------------------+--------------------------------------------------------------+ diff --git a/airflow-core/src/airflow/migrations/versions/0077_3_1_0_add_url_and_template_params_to_dagbundle_model.py b/airflow-core/src/airflow/migrations/versions/0078_3_1_0_add_url_and_template_params_to_dagbundle_model.py similarity index 100% rename from airflow-core/src/airflow/migrations/versions/0077_3_1_0_add_url_and_template_params_to_dagbundle_model.py rename to airflow-core/src/airflow/migrations/versions/0078_3_1_0_add_url_and_template_params_to_dagbundle_model.py From f982f7f0ed4e2c08ec5830cee1798dd704ca62ac Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Fri, 11 Jul 2025 10:26:04 +0100 Subject: [PATCH 25/33] Add version to be removed for deprecated view_url --- airflow-core/src/airflow/dag_processing/bundles/manager.py | 3 ++- .../amazon/src/airflow/providers/amazon/aws/bundles/s3.py | 3 ++- providers/git/src/airflow/providers/git/bundles/git.py | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index c72490ab043e2..d4318a28f5f66 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -265,7 +265,8 @@ def get_all_dag_bundles(self) -> Iterable[BaseDagBundle]: def view_url(self, name: str, version: str | None = None) -> str | None: warnings.warn( - "The 'view_url' method is deprecated and will be removed in a future version. " + "The 'view_url' method is deprecated and will be removed when providers " + "have Airflow 3.1 as the minimum supported version. " "Use DagBundleModel.render_url() instead.", DeprecationWarning, stacklevel=2, diff --git a/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py b/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py index 224442e4983af..16786929e66bb 100644 --- a/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py +++ b/providers/amazon/src/airflow/providers/amazon/aws/bundles/s3.py @@ -140,7 +140,8 @@ def view_url(self, version: str | None = None) -> str | None: """ Return a URL for viewing the DAGs in S3. Currently, versioning is not supported. - This method is deprecated and will be removed in a future release. Use `view_url_template` instead. + This method is deprecated and will be removed when the minimum supported Airflow version is 3.1. + Use `view_url_template` instead. """ return self.view_url_template() diff --git a/providers/git/src/airflow/providers/git/bundles/git.py b/providers/git/src/airflow/providers/git/bundles/git.py index faa9587884689..dec8eb573a542 100644 --- a/providers/git/src/airflow/providers/git/bundles/git.py +++ b/providers/git/src/airflow/providers/git/bundles/git.py @@ -217,7 +217,8 @@ def view_url(self, version: str | None = None) -> str | None: """ Return a URL for viewing the DAGs in the repository. - This method is deprecated and will be removed in a future release. Use `view_url_template` instead. + This method is deprecated and will be removed when the minimum supported Airflow version is 3.1. + Use `view_url_template` instead. """ if not version: return None From 258ecad1c223c93a6297f3af1d107b124e762dbc Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Fri, 25 Jul 2025 15:00:06 +0100 Subject: [PATCH 26/33] Remove template_fields and use regex to extract placeholders --- .../dag-bundles.rst | 21 ++++++++++++++++ .../airflow/dag_processing/bundles/base.py | 1 - .../airflow/dag_processing/bundles/manager.py | 24 ++++++++++++++----- .../bundles/test_dag_bundle_manager.py | 2 -- .../src/airflow/providers/git/bundles/git.py | 1 - 5 files changed, 39 insertions(+), 10 deletions(-) diff --git a/airflow-core/docs/administration-and-deployment/dag-bundles.rst b/airflow-core/docs/administration-and-deployment/dag-bundles.rst index 48401b0c1439c..fda4dfe019de6 100644 --- a/airflow-core/docs/administration-and-deployment/dag-bundles.rst +++ b/airflow-core/docs/administration-and-deployment/dag-bundles.rst @@ -83,6 +83,27 @@ For example, adding multiple dag bundles to your ``airflow.cfg`` file: The whitespace, particularly on the last line, is important so a multi-line value works properly. More details can be found in the the `configparser docs `_. +If you want a view url different from the default provided by the dag bundle, you can change the url in the kwargs of the dag bundle configuration. +For example, if you want to use a custom URL for the git dag bundle: + +.. code-block:: ini + + [dag_processor] + dag_bundle_config_list = [ + { + "name": "my_git_repo", + "classpath": "airflow.dag_processing.bundles.git.GitDagBundle", + "kwargs": { + "tracking_ref": "main", + "git_conn_id": "my_git_conn", + "view_url_template": "https://my.custom.git.repo/view/{subdir}", + } + } + ] + +Above, the ``view_url_template`` is set to a custom URL that will be used to view the Dags in the ``my_git_repo`` bundle. The ``{subdir}`` placeholder will be replaced +with the ``subdir`` attribute of the bundle. When you specify a custom URL, it overrides the default URL provided by the dag bundle. + You can also override the :ref:`config:dag_processor__refresh_interval` per dag bundle by passing it in kwargs. This controls how often the dag processor refreshes, or looks for new files, in the dag bundles. diff --git a/airflow-core/src/airflow/dag_processing/bundles/base.py b/airflow-core/src/airflow/dag_processing/bundles/base.py index 2ea94a6ad3e46..ec4aad09efa59 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/base.py +++ b/airflow-core/src/airflow/dag_processing/bundles/base.py @@ -250,7 +250,6 @@ class BaseDagBundle(ABC): """ supports_versioning: bool = False - template_fields: list[str] = [] _locked: bool = False diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index d4318a28f5f66..a1bd7ce3bdeb5 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -225,18 +225,30 @@ def _extract_and_sign_template(bundle_name: str) -> tuple[str | None, dict]: @staticmethod def _extract_template_params(bundle_instance: BaseDagBundle) -> dict: """ - Extract template parameters from a bundle instance using its template_fields. + Extract template parameters from a bundle instance's view_url_template method. :param bundle_instance: The bundle instance to extract parameters from :return: Dictionary of template parameters """ - params = {} + import re - # Extract values for each field specified in template_fields - for field_name in bundle_instance.template_fields: - field_value = getattr(bundle_instance, field_name, None) + params: dict[str, str] = {} + template = bundle_instance.view_url_template() + + if not template: + return params + + # Extract template placeholders using regex + # This matches {placeholder} patterns in the template + placeholder_pattern = r"\{([^}]+)\}" + placeholders = re.findall(placeholder_pattern, template) + + # Extract values for each placeholder found in the template + for placeholder in placeholders: + field_value = getattr(bundle_instance, placeholder, None) if field_value: - params[field_name] = field_value + params[placeholder] = field_value + return params def get_bundle(self, name: str, version: str | None = None) -> BaseDagBundle: diff --git a/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py b/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py index fe54830f63ef5..b1e8b4f8b651a 100644 --- a/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py +++ b/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py @@ -197,8 +197,6 @@ def test_view_url(version): class BundleWithTemplate(BaseDagBundle): """Test bundle that provides a URL template.""" - template_fields = ["subdir"] - def __init__(self, *, subdir: str | None = None, **kwargs): super().__init__(**kwargs) self.subdir = subdir diff --git a/providers/git/src/airflow/providers/git/bundles/git.py b/providers/git/src/airflow/providers/git/bundles/git.py index dec8eb573a542..5152077e181da 100644 --- a/providers/git/src/airflow/providers/git/bundles/git.py +++ b/providers/git/src/airflow/providers/git/bundles/git.py @@ -46,7 +46,6 @@ class GitDagBundle(BaseDagBundle): """ supports_versioning = True - template_fields = ["subdir"] def __init__( self, From 23cdf577394a59bc03be9ae50a01839a0d1ab960 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Fri, 25 Jul 2025 15:16:47 +0100 Subject: [PATCH 27/33] Fix conflict --- airflow-core/docs/img/airflow_erd.sha256 | 2 +- airflow-core/docs/img/airflow_erd.svg | 3123 +++++++++-------- airflow-core/docs/migrations-ref.rst | 6 +- ...and_template_params_to_dagbundle_model.py} | 0 4 files changed, 1575 insertions(+), 1556 deletions(-) rename airflow-core/src/airflow/migrations/versions/{0078_3_1_0_add_url_and_template_params_to_dagbundle_model.py => 0080_3_1_0_add_url_and_template_params_to_dagbundle_model.py} (100%) diff --git a/airflow-core/docs/img/airflow_erd.sha256 b/airflow-core/docs/img/airflow_erd.sha256 index 8bdc9f924785a..07e93bb4ceafa 100644 --- a/airflow-core/docs/img/airflow_erd.sha256 +++ b/airflow-core/docs/img/airflow_erd.sha256 @@ -1 +1 @@ -eb323215a352312d87d9f2b44006e43d8d5493f1c372575b762dae09da3aad72 \ No newline at end of file +efbae2f1de68413e5a6f671a306e748581fe454b81e25eeb2927567f11ebd59c \ No newline at end of file diff --git a/airflow-core/docs/img/airflow_erd.svg b/airflow-core/docs/img/airflow_erd.svg index 07f0909aff6fe..537b1e5f6e3fd 100644 --- a/airflow-core/docs/img/airflow_erd.svg +++ b/airflow-core/docs/img/airflow_erd.svg @@ -4,11 +4,11 @@ - - + + %3 - + dag_priority_parsing_request @@ -305,1149 +305,1197 @@ asset_alias - -asset_alias - -id - - [INTEGER] - NOT NULL - -group - - [VARCHAR(1500)] - NOT NULL - -name - - [VARCHAR(1500)] - NOT NULL + +asset_alias + +id + + [INTEGER] + NOT NULL + +group + + [VARCHAR(1500)] + NOT NULL + +name + + [VARCHAR(1500)] + NOT NULL asset_alias_asset - -asset_alias_asset - -alias_id - - [INTEGER] - NOT NULL - -asset_id - - [INTEGER] - NOT NULL + +asset_alias_asset + +alias_id + + [INTEGER] + NOT NULL + +asset_id + + [INTEGER] + NOT NULL asset_alias--asset_alias_asset - -0..N -1 + +0..N +1 asset_alias_asset_event - -asset_alias_asset_event - -alias_id - - [INTEGER] - NOT NULL - -event_id - - [INTEGER] - NOT NULL + +asset_alias_asset_event + +alias_id + + [INTEGER] + NOT NULL + +event_id + + [INTEGER] + NOT NULL asset_alias--asset_alias_asset_event - -0..N -1 + +0..N +1 dag_schedule_asset_alias_reference - -dag_schedule_asset_alias_reference - -alias_id - - [INTEGER] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL + +dag_schedule_asset_alias_reference + +alias_id + + [INTEGER] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL asset_alias--dag_schedule_asset_alias_reference - -0..N -1 + +0..N +1 asset - -asset - -id - - [INTEGER] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -extra - - [JSON] - NOT NULL - -group - - [VARCHAR(1500)] - NOT NULL - -name - - [VARCHAR(1500)] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL - -uri - - [VARCHAR(1500)] - NOT NULL + +asset + +id + + [INTEGER] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +extra + + [JSON] + NOT NULL + +group + + [VARCHAR(1500)] + NOT NULL + +name + + [VARCHAR(1500)] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL + +uri + + [VARCHAR(1500)] + NOT NULL asset--asset_alias_asset - -0..N -1 + +0..N +1 asset_trigger - -asset_trigger - -asset_id - - [INTEGER] - NOT NULL - -trigger_id - - [INTEGER] - NOT NULL + +asset_trigger + +asset_id + + [INTEGER] + NOT NULL + +trigger_id + + [INTEGER] + NOT NULL asset--asset_trigger - -0..N -1 + +0..N +1 asset_active - -asset_active - -name - - [VARCHAR(1500)] - NOT NULL - -uri - - [VARCHAR(1500)] - NOT NULL + +asset_active + +name + + [VARCHAR(1500)] + NOT NULL + +uri + + [VARCHAR(1500)] + NOT NULL asset--asset_active - -1 -1 + +1 +1 asset--asset_active - -1 -1 + +1 +1 dag_schedule_asset_reference - -dag_schedule_asset_reference - -asset_id - - [INTEGER] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL + +dag_schedule_asset_reference + +asset_id + + [INTEGER] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL asset--dag_schedule_asset_reference - -0..N -1 + +0..N +1 task_outlet_asset_reference - -task_outlet_asset_reference - -asset_id - - [INTEGER] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL + +task_outlet_asset_reference + +asset_id + + [INTEGER] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL asset--task_outlet_asset_reference - -0..N -1 + +0..N +1 task_inlet_asset_reference - -task_inlet_asset_reference - -asset_id - - [INTEGER] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL + +task_inlet_asset_reference + +asset_id + + [INTEGER] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL asset--task_inlet_asset_reference - -0..N -1 + +0..N +1 asset_dag_run_queue - -asset_dag_run_queue - -asset_id - - [INTEGER] - NOT NULL - -target_dag_id - - [VARCHAR(250)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL + +asset_dag_run_queue + +asset_id + + [INTEGER] + NOT NULL + +target_dag_id + + [VARCHAR(250)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL asset--asset_dag_run_queue - -0..N -1 + +0..N +1 asset_event - -asset_event - -id - - [INTEGER] - NOT NULL - -asset_id - - [INTEGER] - NOT NULL - -extra - - [JSON] - NOT NULL - -source_dag_id - - [VARCHAR(250)] - -source_map_index - - [INTEGER] - -source_run_id - - [VARCHAR(250)] - -source_task_id - - [VARCHAR(250)] - -timestamp - - [TIMESTAMP] - NOT NULL + +asset_event + +id + + [INTEGER] + NOT NULL + +asset_id + + [INTEGER] + NOT NULL + +extra + + [JSON] + NOT NULL + +source_dag_id + + [VARCHAR(250)] + +source_map_index + + [INTEGER] + +source_run_id + + [VARCHAR(250)] + +source_task_id + + [VARCHAR(250)] + +timestamp + + [TIMESTAMP] + NOT NULL asset_event--asset_alias_asset_event - -0..N -1 + +0..N +1 dagrun_asset_event - -dagrun_asset_event - -dag_run_id - - [INTEGER] - NOT NULL - -event_id - - [INTEGER] - NOT NULL + +dagrun_asset_event + +dag_run_id + + [INTEGER] + NOT NULL + +event_id + + [INTEGER] + NOT NULL asset_event--dagrun_asset_event - -0..N -1 + +0..N +1 trigger - -trigger - -id - - [INTEGER] - NOT NULL - -classpath - - [VARCHAR(1000)] - NOT NULL - -created_date - - [TIMESTAMP] - NOT NULL - -kwargs - - [TEXT] - NOT NULL - -triggerer_id - - [INTEGER] + +trigger + +id + + [INTEGER] + NOT NULL + +classpath + + [VARCHAR(1000)] + NOT NULL + +created_date + + [TIMESTAMP] + NOT NULL + +kwargs + + [TEXT] + NOT NULL + +triggerer_id + + [INTEGER] trigger--asset_trigger - -0..N -1 + +0..N +1 task_instance - -task_instance - -id - - [UUID] - NOT NULL - -context_carrier - - [JSONB] - -custom_operator_name - - [VARCHAR(1000)] - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_version_id - - [UUID] - NOT NULL - -duration - - [DOUBLE_PRECISION] - -end_date - - [TIMESTAMP] - -executor - - [VARCHAR(1000)] - -executor_config - - [BYTEA] - -external_executor_id - - [VARCHAR(250)] - -hostname - - [VARCHAR(1000)] - -last_heartbeat_at - - [TIMESTAMP] - -map_index - - [INTEGER] - NOT NULL - -max_tries - - [INTEGER] - -next_kwargs - - [JSONB] - -next_method - - [VARCHAR(1000)] - -operator - - [VARCHAR(1000)] - -pid - - [INTEGER] - -pool - - [VARCHAR(256)] - NOT NULL - -pool_slots - - [INTEGER] - NOT NULL - -priority_weight - - [INTEGER] - -queue - - [VARCHAR(256)] - -queued_by_job_id - - [INTEGER] - -queued_dttm - - [TIMESTAMP] - -rendered_map_index - - [VARCHAR(250)] - -run_id - - [VARCHAR(250)] - NOT NULL - -scheduled_dttm - - [TIMESTAMP] - -span_status - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(20)] - -task_display_name - - [VARCHAR(2000)] - -task_id - - [VARCHAR(250)] - NOT NULL - -trigger_id - - [INTEGER] - -trigger_timeout - - [TIMESTAMP] - -try_number - - [INTEGER] - -unixname - - [VARCHAR(1000)] - -updated_at - - [TIMESTAMP] + +task_instance + +id + + [UUID] + NOT NULL + +context_carrier + + [JSONB] + +custom_operator_name + + [VARCHAR(1000)] + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_version_id + + [UUID] + NOT NULL + +duration + + [DOUBLE_PRECISION] + +end_date + + [TIMESTAMP] + +executor + + [VARCHAR(1000)] + +executor_config + + [BYTEA] + +external_executor_id + + [VARCHAR(250)] + +hostname + + [VARCHAR(1000)] + +last_heartbeat_at + + [TIMESTAMP] + +map_index + + [INTEGER] + NOT NULL + +max_tries + + [INTEGER] + +next_kwargs + + [JSONB] + +next_method + + [VARCHAR(1000)] + +operator + + [VARCHAR(1000)] + +pid + + [INTEGER] + +pool + + [VARCHAR(256)] + NOT NULL + +pool_slots + + [INTEGER] + NOT NULL + +priority_weight + + [INTEGER] + +queue + + [VARCHAR(256)] + +queued_by_job_id + + [INTEGER] + +queued_dttm + + [TIMESTAMP] + +rendered_map_index + + [VARCHAR(250)] + +run_id + + [VARCHAR(250)] + NOT NULL + +scheduled_dttm + + [TIMESTAMP] + +span_status + + [VARCHAR(250)] + NOT NULL + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(20)] + +task_display_name + + [VARCHAR(2000)] + +task_id + + [VARCHAR(250)] + NOT NULL + +trigger_id + + [INTEGER] + +trigger_timeout + + [TIMESTAMP] + +try_number + + [INTEGER] + +unixname + + [VARCHAR(1000)] + +updated_at + + [TIMESTAMP] trigger--task_instance - -0..N -{0,1} + +0..N +{0,1} + + + +deadline + +deadline + +id + + [UUID] + NOT NULL + +callback + + [VARCHAR(500)] + NOT NULL + +callback_kwargs + + [JSON] + +callback_state + + [VARCHAR(20)] + +dag_id + + [VARCHAR(250)] + +dagrun_id + + [INTEGER] + +deadline_time + + [TIMESTAMP] + NOT NULL + +trigger_id + + [INTEGER] + + + +trigger--deadline + +0..N +{0,1} hitl_detail - -hitl_detail - -ti_id - - [UUID] - NOT NULL - -body - - [TEXT] - -chosen_options - - [JSON] - -defaults - - [JSON] - -multiple - - [BOOLEAN] - -options - - [JSON] - NOT NULL - -params - - [JSON] - NOT NULL - -params_input - - [JSON] - NOT NULL - -response_at - - [TIMESTAMP] - -subject - - [TEXT] - NOT NULL - -user_id - - [VARCHAR(128)] + +hitl_detail + +ti_id + + [UUID] + NOT NULL + +body + + [TEXT] + +chosen_options + + [JSON] + +defaults + + [JSON] + +multiple + + [BOOLEAN] + +options + + [JSON] + NOT NULL + +params + + [JSON] + NOT NULL + +params_input + + [JSON] + NOT NULL + +response_at + + [TIMESTAMP] + +subject + + [TEXT] + NOT NULL + +user_id + + [VARCHAR(128)] - + task_instance--hitl_detail - -1 -1 + +1 +1 task_map - -task_map - -dag_id - - [VARCHAR(250)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -keys - - [JSONB] - -length - - [INTEGER] - NOT NULL - - - -task_instance--task_map - -0..N -1 + +task_map + +dag_id + + [VARCHAR(250)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +keys + + [JSONB] + +length + + [INTEGER] + NOT NULL task_instance--task_map - -0..N -1 + +0..N +1 task_instance--task_map - -0..N -1 + +0..N +1 task_instance--task_map - -0..N -1 + +0..N +1 + + + +task_instance--task_map + +0..N +1 task_reschedule - -task_reschedule - -id - - [INTEGER] - NOT NULL - -duration - - [INTEGER] - NOT NULL - -end_date - - [TIMESTAMP] - NOT NULL - -reschedule_date - - [TIMESTAMP] - NOT NULL - -start_date - - [TIMESTAMP] - NOT NULL - -ti_id - - [UUID] - NOT NULL + +task_reschedule + +id + + [INTEGER] + NOT NULL + +duration + + [INTEGER] + NOT NULL + +end_date + + [TIMESTAMP] + NOT NULL + +reschedule_date + + [TIMESTAMP] + NOT NULL + +start_date + + [TIMESTAMP] + NOT NULL + +ti_id + + [UUID] + NOT NULL - + task_instance--task_reschedule - -0..N -1 + +0..N +1 xcom - -xcom - -dag_run_id - - [INTEGER] - NOT NULL - -key - - [VARCHAR(512)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -timestamp - - [TIMESTAMP] - NOT NULL - -value - - [JSONB] - - - -task_instance--xcom - -0..N -1 + +xcom + +dag_run_id + + [INTEGER] + NOT NULL + +key + + [VARCHAR(512)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +timestamp + + [TIMESTAMP] + NOT NULL + +value + + [JSONB] task_instance--xcom - -0..N -1 + +0..N +1 task_instance--xcom - -0..N -1 + +0..N +1 task_instance--xcom - -0..N -1 + +0..N +1 + + + +task_instance--xcom + +0..N +1 task_instance_note - -task_instance_note - -ti_id - - [UUID] - NOT NULL - -content - - [VARCHAR(1000)] - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL - -user_id - - [VARCHAR(128)] + +task_instance_note + +ti_id + + [UUID] + NOT NULL + +content + + [VARCHAR(1000)] + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL + +user_id + + [VARCHAR(128)] - + task_instance--task_instance_note - -1 -1 + +1 +1 task_instance_history - -task_instance_history - -task_instance_id - - [UUID] - NOT NULL - -context_carrier - - [JSONB] - -custom_operator_name - - [VARCHAR(1000)] - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_version_id - - [UUID] - -duration - - [DOUBLE_PRECISION] - -end_date - - [TIMESTAMP] - -executor - - [VARCHAR(1000)] - -executor_config - - [BYTEA] - -external_executor_id - - [VARCHAR(250)] - -hostname - - [VARCHAR(1000)] - -map_index - - [INTEGER] - NOT NULL - -max_tries - - [INTEGER] - -next_kwargs - - [JSONB] - -next_method - - [VARCHAR(1000)] - -operator - - [VARCHAR(1000)] - -pid - - [INTEGER] - -pool - - [VARCHAR(256)] - NOT NULL - -pool_slots - - [INTEGER] - NOT NULL - -priority_weight - - [INTEGER] - -queue - - [VARCHAR(256)] - -queued_by_job_id - - [INTEGER] - -queued_dttm - - [TIMESTAMP] - -rendered_map_index - - [VARCHAR(250)] - -run_id - - [VARCHAR(250)] - NOT NULL - -scheduled_dttm - - [TIMESTAMP] - -span_status - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(20)] - -task_display_name - - [VARCHAR(2000)] - -task_id - - [VARCHAR(250)] - NOT NULL - -trigger_id - - [INTEGER] - -trigger_timeout - - [TIMESTAMP] - -try_number - - [INTEGER] - NOT NULL - -unixname - - [VARCHAR(1000)] - -updated_at - - [TIMESTAMP] - - - -task_instance--task_instance_history - -0..N -1 + +task_instance_history + +task_instance_id + + [UUID] + NOT NULL + +context_carrier + + [JSONB] + +custom_operator_name + + [VARCHAR(1000)] + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_version_id + + [UUID] + +duration + + [DOUBLE_PRECISION] + +end_date + + [TIMESTAMP] + +executor + + [VARCHAR(1000)] + +executor_config + + [BYTEA] + +external_executor_id + + [VARCHAR(250)] + +hostname + + [VARCHAR(1000)] + +map_index + + [INTEGER] + NOT NULL + +max_tries + + [INTEGER] + +next_kwargs + + [JSONB] + +next_method + + [VARCHAR(1000)] + +operator + + [VARCHAR(1000)] + +pid + + [INTEGER] + +pool + + [VARCHAR(256)] + NOT NULL + +pool_slots + + [INTEGER] + NOT NULL + +priority_weight + + [INTEGER] + +queue + + [VARCHAR(256)] + +queued_by_job_id + + [INTEGER] + +queued_dttm + + [TIMESTAMP] + +rendered_map_index + + [VARCHAR(250)] + +run_id + + [VARCHAR(250)] + NOT NULL + +scheduled_dttm + + [TIMESTAMP] + +span_status + + [VARCHAR(250)] + NOT NULL + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(20)] + +task_display_name + + [VARCHAR(2000)] + +task_id + + [VARCHAR(250)] + NOT NULL + +trigger_id + + [INTEGER] + +trigger_timeout + + [TIMESTAMP] + +try_number + + [INTEGER] + NOT NULL + +unixname + + [VARCHAR(1000)] + +updated_at + + [TIMESTAMP] task_instance--task_instance_history - -0..N -1 + +0..N +1 task_instance--task_instance_history - -0..N -1 + +0..N +1 task_instance--task_instance_history - -0..N -1 + +0..N +1 + + + +task_instance--task_instance_history + +0..N +1 rendered_task_instance_fields - -rendered_task_instance_fields - -dag_id - - [VARCHAR(250)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -k8s_pod_yaml - - [JSON] - -rendered_fields - - [JSON] - NOT NULL - - - -task_instance--rendered_task_instance_fields - -0..N -1 + +rendered_task_instance_fields + +dag_id + + [VARCHAR(250)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +k8s_pod_yaml + + [JSON] + +rendered_fields + + [JSON] + NOT NULL task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 + + + +task_instance--rendered_task_instance_fields + +0..N +1 - + dag_bundle dag_bundle @@ -1478,7 +1526,7 @@ [VARCHAR(200)] - + dag dag @@ -1588,745 +1636,712 @@ [TEXT] - + dag_bundle--dag 0..N {0,1} - + dag--dag_schedule_asset_alias_reference - -0..N -1 + +0..N +1 - + dag--dag_schedule_asset_reference - -0..N -1 + +0..N +1 - + dag--task_outlet_asset_reference - -0..N -1 + +0..N +1 - + dag--task_inlet_asset_reference - -0..N -1 + +0..N +1 - + dag--asset_dag_run_queue - -0..N -1 + +0..N +1 + + + +dag--deadline + +0..N +{0,1} - + dag_schedule_asset_name_reference - -dag_schedule_asset_name_reference - -dag_id - - [VARCHAR(250)] - NOT NULL - -name - - [VARCHAR(1500)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL + +dag_schedule_asset_name_reference + +dag_id + + [VARCHAR(250)] + NOT NULL + +name + + [VARCHAR(1500)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL - + dag--dag_schedule_asset_name_reference - -0..N -1 + +0..N +1 - + dag_schedule_asset_uri_reference - -dag_schedule_asset_uri_reference - -dag_id - - [VARCHAR(250)] - NOT NULL - -uri - - [VARCHAR(1500)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL + +dag_schedule_asset_uri_reference + +dag_id + + [VARCHAR(250)] + NOT NULL + +uri + + [VARCHAR(1500)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL - + dag--dag_schedule_asset_uri_reference - -0..N -1 + +0..N +1 - + dag_version - -dag_version - -id - - [UUID] - NOT NULL - -bundle_name - - [VARCHAR(250)] - -bundle_version - - [VARCHAR(250)] - -created_at - - [TIMESTAMP] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -last_updated - - [TIMESTAMP] - NOT NULL - -version_number - - [INTEGER] - NOT NULL + +dag_version + +id + + [UUID] + NOT NULL + +bundle_name + + [VARCHAR(250)] + +bundle_version + + [VARCHAR(250)] + +created_at + + [TIMESTAMP] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +last_updated + + [TIMESTAMP] + NOT NULL + +version_number + + [INTEGER] + NOT NULL - + dag--dag_version - -0..N -1 + +0..N +1 - + dag_tag - -dag_tag - -dag_id - - [VARCHAR(250)] - NOT NULL - -name - - [VARCHAR(100)] - NOT NULL + +dag_tag + +dag_id + + [VARCHAR(250)] + NOT NULL + +name + + [VARCHAR(100)] + NOT NULL - + dag--dag_tag - -0..N -1 + +0..N +1 - + dag_owner_attributes - -dag_owner_attributes - -dag_id - - [VARCHAR(250)] - NOT NULL - -owner - - [VARCHAR(500)] - NOT NULL - -link - - [VARCHAR(500)] - NOT NULL + +dag_owner_attributes + +dag_id + + [VARCHAR(250)] + NOT NULL + +owner + + [VARCHAR(500)] + NOT NULL + +link + + [VARCHAR(500)] + NOT NULL - + dag--dag_owner_attributes - -0..N -1 + +0..N +1 - + dag_warning - -dag_warning - -dag_id - - [VARCHAR(250)] - NOT NULL - -warning_type - - [VARCHAR(50)] - NOT NULL - -message - - [TEXT] - NOT NULL - -timestamp - - [TIMESTAMP] - NOT NULL + +dag_warning + +dag_id + + [VARCHAR(250)] + NOT NULL + +warning_type + + [VARCHAR(50)] + NOT NULL + +message + + [TEXT] + NOT NULL + +timestamp + + [TIMESTAMP] + NOT NULL - + dag--dag_warning - -0..N -1 + +0..N +1 - + dag_favorite - -dag_favorite - -dag_id - - [VARCHAR(250)] - NOT NULL - -user_id - - [VARCHAR(250)] - NOT NULL + +dag_favorite + +dag_id + + [VARCHAR(250)] + NOT NULL + +user_id + + [VARCHAR(250)] + NOT NULL - -dag--dag_favorite - -0..N -1 - - - -deadline - -deadline - -id - - [UUID] - NOT NULL - -callback - - [VARCHAR(500)] - NOT NULL - -callback_kwargs - - [JSON] - -dag_id - - [VARCHAR(250)] - -dagrun_id - - [INTEGER] - -deadline_time - - [TIMESTAMP] - NOT NULL - - -dag--deadline - -0..N -{0,1} +dag--dag_favorite + +0..N +1 - + dag_version--task_instance - -0..N -1 + +0..N +1 dag_run - -dag_run - -id - - [INTEGER] - NOT NULL - -backfill_id - - [INTEGER] - -bundle_version - - [VARCHAR(250)] - -clear_number - - [INTEGER] - NOT NULL - -conf - - [JSONB] - -context_carrier - - [JSONB] - -created_dag_version_id - - [UUID] - -creating_job_id - - [INTEGER] - -dag_id - - [VARCHAR(250)] - NOT NULL - -data_interval_end - - [TIMESTAMP] - -data_interval_start - - [TIMESTAMP] - -end_date - - [TIMESTAMP] - -last_scheduling_decision - - [TIMESTAMP] - -log_template_id - - [INTEGER] - -logical_date - - [TIMESTAMP] - -queued_at - - [TIMESTAMP] - -run_after - - [TIMESTAMP] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -run_type - - [VARCHAR(50)] - NOT NULL - -scheduled_by_job_id - - [INTEGER] - -span_status - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(50)] - -triggered_by - - [VARCHAR(50)] - -triggering_user_name - - [VARCHAR(512)] - -updated_at - - [TIMESTAMP] + +dag_run + +id + + [INTEGER] + NOT NULL + +backfill_id + + [INTEGER] + +bundle_version + + [VARCHAR(250)] + +clear_number + + [INTEGER] + NOT NULL + +conf + + [JSONB] + +context_carrier + + [JSONB] + +created_dag_version_id + + [UUID] + +creating_job_id + + [INTEGER] + +dag_id + + [VARCHAR(250)] + NOT NULL + +data_interval_end + + [TIMESTAMP] + +data_interval_start + + [TIMESTAMP] + +end_date + + [TIMESTAMP] + +last_scheduling_decision + + [TIMESTAMP] + +log_template_id + + [INTEGER] + +logical_date + + [TIMESTAMP] + +queued_at + + [TIMESTAMP] + +run_after + + [TIMESTAMP] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +run_type + + [VARCHAR(50)] + NOT NULL + +scheduled_by_job_id + + [INTEGER] + +span_status + + [VARCHAR(250)] + NOT NULL + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(50)] + +triggered_by + + [VARCHAR(50)] + +triggering_user_name + + [VARCHAR(512)] + +updated_at + + [TIMESTAMP] - + dag_version--dag_run - -0..N -{0,1} + +0..N +{0,1} dag_code - -dag_code - -id - - [UUID] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_version_id - - [UUID] - NOT NULL - -fileloc - - [VARCHAR(2000)] - NOT NULL - -last_updated - - [TIMESTAMP] - NOT NULL - -source_code - - [TEXT] - NOT NULL - -source_code_hash - - [VARCHAR(32)] - NOT NULL + +dag_code + +id + + [UUID] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_version_id + + [UUID] + NOT NULL + +fileloc + + [VARCHAR(2000)] + NOT NULL + +last_updated + + [TIMESTAMP] + NOT NULL + +source_code + + [TEXT] + NOT NULL + +source_code_hash + + [VARCHAR(32)] + NOT NULL - + dag_version--dag_code - -0..N -1 + +0..N +1 serialized_dag - -serialized_dag - -id - - [UUID] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -dag_hash - - [VARCHAR(32)] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_version_id - - [UUID] - NOT NULL - -data - - [JSON] - -data_compressed - - [BYTEA] - -last_updated - - [TIMESTAMP] - NOT NULL + +serialized_dag + +id + + [UUID] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +dag_hash + + [VARCHAR(32)] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_version_id + + [UUID] + NOT NULL + +data + + [JSON] + +data_compressed + + [BYTEA] + +last_updated + + [TIMESTAMP] + NOT NULL - + dag_version--serialized_dag - -0..N -1 + +0..N +1 - + dag_run--dagrun_asset_event - -0..N -1 + +0..N +1 dag_run--task_instance - -0..N -1 + +0..N +1 dag_run--task_instance - -0..N -1 + +0..N +1 - + dag_run--deadline - -0..N -{0,1} + +0..N +{0,1} backfill_dag_run - -backfill_dag_run - -id - - [INTEGER] - NOT NULL - -backfill_id - - [INTEGER] - NOT NULL - -dag_run_id - - [INTEGER] - -exception_reason - - [VARCHAR(250)] - -logical_date - - [TIMESTAMP] - NOT NULL - -sort_ordinal - - [INTEGER] - NOT NULL + +backfill_dag_run + +id + + [INTEGER] + NOT NULL + +backfill_id + + [INTEGER] + NOT NULL + +dag_run_id + + [INTEGER] + +exception_reason + + [VARCHAR(250)] + +logical_date + + [TIMESTAMP] + NOT NULL + +sort_ordinal + + [INTEGER] + NOT NULL - + dag_run--backfill_dag_run - -0..N -{0,1} + +0..N +{0,1} dag_run_note - -dag_run_note - -dag_run_id - - [INTEGER] - NOT NULL - -content - - [VARCHAR(1000)] - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL - -user_id - - [VARCHAR(128)] + +dag_run_note + +dag_run_id + + [INTEGER] + NOT NULL + +content + + [VARCHAR(1000)] + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL + +user_id + + [VARCHAR(128)] - + dag_run--dag_run_note - -1 -1 + +1 +1 log_template - -log_template - -id - - [INTEGER] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -elasticsearch_id - - [TEXT] - NOT NULL - -filename - - [TEXT] - NOT NULL + +log_template + +id + + [INTEGER] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +elasticsearch_id + + [TEXT] + NOT NULL + +filename + + [TEXT] + NOT NULL - + log_template--dag_run - -0..N -{0,1} + +0..N +{0,1} backfill - -backfill - -id - - [INTEGER] - NOT NULL - -completed_at - - [TIMESTAMP] - -created_at - - [TIMESTAMP] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_run_conf - - [JSON] - NOT NULL - -from_date - - [TIMESTAMP] - NOT NULL - -is_paused - - [BOOLEAN] - -max_active_runs - - [INTEGER] - NOT NULL - -reprocess_behavior - - [VARCHAR(250)] - NOT NULL - -to_date - - [TIMESTAMP] - NOT NULL - -triggering_user_name - - [VARCHAR(512)] - -updated_at - - [TIMESTAMP] - NOT NULL + +backfill + +id + + [INTEGER] + NOT NULL + +completed_at + + [TIMESTAMP] + +created_at + + [TIMESTAMP] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_run_conf + + [JSON] + NOT NULL + +from_date + + [TIMESTAMP] + NOT NULL + +is_paused + + [BOOLEAN] + +max_active_runs + + [INTEGER] + NOT NULL + +reprocess_behavior + + [VARCHAR(250)] + NOT NULL + +to_date + + [TIMESTAMP] + NOT NULL + +triggering_user_name + + [VARCHAR(512)] + +updated_at + + [TIMESTAMP] + NOT NULL - + backfill--dag_run - -0..N -{0,1} + +0..N +{0,1} - + backfill--backfill_dag_run - -0..N -1 + +0..N +1 diff --git a/airflow-core/docs/migrations-ref.rst b/airflow-core/docs/migrations-ref.rst index 28100bbcd1f00..cf9dc37553978 100644 --- a/airflow-core/docs/migrations-ref.rst +++ b/airflow-core/docs/migrations-ref.rst @@ -39,7 +39,11 @@ Here's the list of all the Database Migrations that are executed via when you ru +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | Revision ID | Revises ID | Airflow Version | Description | +=========================+==================+===================+==============================================================+ -| ``3bda03debd04`` (head) | ``40f7c30a228b`` | ``3.1.0`` | Add url template and template params to DagBundleModel. | +| ``3bda03debd04`` (head) | ``f56f68b9e02f`` | ``3.1.0`` | Add url template and template params to DagBundleModel. | ++-------------------------+------------------+-------------------+--------------------------------------------------------------+ +| ``f56f68b9e02f`` | ``09fa89ba1710`` | ``3.1.0`` | Add callback_state to deadline. | ++-------------------------+------------------+-------------------+--------------------------------------------------------------+ +| ``09fa89ba1710`` | ``40f7c30a228b`` | ``3.1.0`` | Add trigger_id to deadline. | +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | ``40f7c30a228b`` | ``5d3072c51bac`` | ``3.1.0`` | Add Human In the Loop Detail table. | +-------------------------+------------------+-------------------+--------------------------------------------------------------+ diff --git a/airflow-core/src/airflow/migrations/versions/0078_3_1_0_add_url_and_template_params_to_dagbundle_model.py b/airflow-core/src/airflow/migrations/versions/0080_3_1_0_add_url_and_template_params_to_dagbundle_model.py similarity index 100% rename from airflow-core/src/airflow/migrations/versions/0078_3_1_0_add_url_and_template_params_to_dagbundle_model.py rename to airflow-core/src/airflow/migrations/versions/0080_3_1_0_add_url_and_template_params_to_dagbundle_model.py From 46f730e1a133d9d62bf7344e28fbbefd8e684e86 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Fri, 25 Jul 2025 19:42:56 +0100 Subject: [PATCH 28/33] fixup! Remove template_fields and use regex to extract placeholders --- .../airflow/dag_processing/bundles/base.py | 3 --- .../src/airflow/providers/git/bundles/git.py | 5 ---- .../git/tests/unit/git/bundles/test_git.py | 26 +++++++++---------- 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/airflow-core/src/airflow/dag_processing/bundles/base.py b/airflow-core/src/airflow/dag_processing/bundles/base.py index ec4aad09efa59..3618c8d4c9052 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/base.py +++ b/airflow-core/src/airflow/dag_processing/bundles/base.py @@ -333,9 +333,6 @@ def view_url(self, version: str | None = None) -> str | None: ) return None - def _templated_url_fragment(self): - return "".join(f"/{{{item}}}" for item in self.template_fields) - def view_url_template(self) -> str | None: """ URL template to view the bundle on an external website. This is shown to users in the Airflow UI, allowing them to navigate to this url for more details about that version of the bundle. diff --git a/providers/git/src/airflow/providers/git/bundles/git.py b/providers/git/src/airflow/providers/git/bundles/git.py index 5152077e181da..28241ab045dc4 100644 --- a/providers/git/src/airflow/providers/git/bundles/git.py +++ b/providers/git/src/airflow/providers/git/bundles/git.py @@ -264,11 +264,6 @@ def view_url_template(self) -> str | None: for allowed_host, template in host_patterns.items(): if host == allowed_host or host.endswith(f".{allowed_host}"): - # Because we use this method in the view_url method, we need to handle - # backward compatibility for Airflow versions that doesn't have the - # _templated_url_fragment attribute. Should be removed when we drop support for Airflow 3.0 - if hasattr(self, "_templated_url_fragment"): - return template + self._templated_url_fragment() if self.subdir: return f"{template}/{self.subdir}" return template diff --git a/providers/git/tests/unit/git/bundles/test_git.py b/providers/git/tests/unit/git/bundles/test_git.py index 40ff39accdd16..6fc213bf19205 100644 --- a/providers/git/tests/unit/git/bundles/test_git.py +++ b/providers/git/tests/unit/git/bundles/test_git.py @@ -530,67 +530,67 @@ def test_view_url_subdir( ( "git@github.com:apache/airflow.git", None, - "https://github.com/apache/airflow/tree/{version}/{subdir}", + "https://github.com/apache/airflow/tree/{version}/subdir", ), ( "git@github.com:apache/airflow", None, - "https://github.com/apache/airflow/tree/{version}/{subdir}", + "https://github.com/apache/airflow/tree/{version}/subdir", ), ( "https://github.com/apache/airflow", None, - "https://github.com/apache/airflow/tree/{version}/{subdir}", + "https://github.com/apache/airflow/tree/{version}/subdir", ), ( "https://github.com/apache/airflow.git", None, - "https://github.com/apache/airflow/tree/{version}/{subdir}", + "https://github.com/apache/airflow/tree/{version}/subdir", ), ( "git@gitlab.com:apache/airflow.git", None, - "https://gitlab.com/apache/airflow/-/tree/{version}/{subdir}", + "https://gitlab.com/apache/airflow/-/tree/{version}/subdir", ), ( "git@bitbucket.org:apache/airflow.git", None, - "https://bitbucket.org/apache/airflow/src/{version}/{subdir}", + "https://bitbucket.org/apache/airflow/src/{version}/subdir", ), ( "git@myorg.github.com:apache/airflow.git", None, - "https://myorg.github.com/apache/airflow/tree/{version}/{subdir}", + "https://myorg.github.com/apache/airflow/tree/{version}/subdir", ), ( "https://myorg.github.com/apache/airflow.git", None, - "https://myorg.github.com/apache/airflow/tree/{version}/{subdir}", + "https://myorg.github.com/apache/airflow/tree/{version}/subdir", ), ( "https://github.com/apache/airflow", {"password": "abc123"}, - "https://github.com/apache/airflow/tree/{version}/{subdir}", + "https://github.com/apache/airflow/tree/{version}/subdir", ), ( "https://github.com/apache/airflow", {"login": "abc123"}, - "https://github.com/apache/airflow/tree/{version}/{subdir}", + "https://github.com/apache/airflow/tree/{version}/subdir", ), ( "https://github.com/apache/airflow", {"login": "abc123", "password": "def456"}, - "https://github.com/apache/airflow/tree/{version}/{subdir}", + "https://github.com/apache/airflow/tree/{version}/subdir", ), ( "https://github.com:443/apache/airflow", None, - "https://github.com:443/apache/airflow/tree/{version}/{subdir}", + "https://github.com:443/apache/airflow/tree/{version}/subdir", ), ( "https://github.com:443/apache/airflow", {"password": "abc123"}, - "https://github.com:443/apache/airflow/tree/{version}/{subdir}", + "https://github.com:443/apache/airflow/tree/{version}/subdir", ), ], ) From 002dd5fe5a7544da995537977d9d965ca10c3324 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Mon, 28 Jul 2025 08:00:19 +0100 Subject: [PATCH 29/33] Remove added deadline in dag details --- .../tests/unit/api_fastapi/core_api/routes/public/test_dags.py | 1 - 1 file changed, 1 deletion(-) diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dags.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dags.py index 55adcb3e0e4c2..e745d2a0b4fe2 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dags.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dags.py @@ -557,7 +557,6 @@ def test_dag_details( }, "description": None, "doc_md": "details", - "deadline": None, "end_date": None, "fileloc": __file__, "file_token": file_token, From 8f460fb07210344cdc9bdc97cbc2fe4a1afb8132 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Tue, 29 Jul 2025 09:57:43 +0100 Subject: [PATCH 30/33] Update docs --- .../docs/administration-and-deployment/dag-bundles.rst | 5 ++++- airflow-core/src/airflow/dag_processing/bundles/base.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/airflow-core/docs/administration-and-deployment/dag-bundles.rst b/airflow-core/docs/administration-and-deployment/dag-bundles.rst index fda4dfe019de6..53bd5e16afbdf 100644 --- a/airflow-core/docs/administration-and-deployment/dag-bundles.rst +++ b/airflow-core/docs/administration-and-deployment/dag-bundles.rst @@ -102,7 +102,10 @@ For example, if you want to use a custom URL for the git dag bundle: ] Above, the ``view_url_template`` is set to a custom URL that will be used to view the Dags in the ``my_git_repo`` bundle. The ``{subdir}`` placeholder will be replaced -with the ``subdir`` attribute of the bundle. When you specify a custom URL, it overrides the default URL provided by the dag bundle. +with the ``subdir`` attribute of the bundle. The placeholders are attributes of the bundle. You cannot use any placeholder outside of the bundle's attributes. +When you specify a custom URL, it overrides the default URL provided by the dag bundle. + +The url is verified for safety, and if it is not safe, the view url for the bundle will be set to ``None``. This is to prevent any potential security issues with unsafe URLs. You can also override the :ref:`config:dag_processor__refresh_interval` per dag bundle by passing it in kwargs. This controls how often the dag processor refreshes, or looks for new files, in the dag bundles. diff --git a/airflow-core/src/airflow/dag_processing/bundles/base.py b/airflow-core/src/airflow/dag_processing/bundles/base.py index 3618c8d4c9052..f85e9389cb5fa 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/base.py +++ b/airflow-core/src/airflow/dag_processing/bundles/base.py @@ -335,7 +335,10 @@ def view_url(self, version: str | None = None) -> str | None: def view_url_template(self) -> str | None: """ - URL template to view the bundle on an external website. This is shown to users in the Airflow UI, allowing them to navigate to this url for more details about that version of the bundle. + URL template to view the bundle on an external website. + + This is shown to users in the Airflow UI, allowing them to navigate to + this url for more details about that version of the bundle. The template should use format string placeholders like {version}, {subdir}, etc. Common placeholders: From 8fb7b0c73f1c526168a1f2ad358a66d0eb5d0ead Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Tue, 29 Jul 2025 11:02:49 +0100 Subject: [PATCH 31/33] Remove ; from url safety check --- airflow-core/src/airflow/dag_processing/bundles/manager.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index a1bd7ce3bdeb5..537dcfb646125 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -105,8 +105,6 @@ def _is_safe_bundle_url(url: str) -> bool: if not parsed.netloc: return False - if ";" in url: - return False if any(ord(c) < 32 for c in url): return False From b0e8a1ca798dbbb74f3e64998ab0d182df78bf04 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Tue, 29 Jul 2025 11:24:07 +0100 Subject: [PATCH 32/33] Log url sanity check errors --- .../src/airflow/dag_processing/bundles/manager.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index 537dcfb646125..18bb019f593ba 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -92,24 +92,33 @@ def _is_safe_bundle_url(url: str) -> bool: - Is properly formatted - Doesn't contain malicious content """ + import logging from urllib.parse import urlparse + logger = logging.getLogger(__name__) + if not url: return False try: parsed = urlparse(url) if parsed.scheme not in {"http", "https"}: + logger.error( + "Bundle URL uses unsafe scheme '%s'. Only 'http' and 'https' are allowed", parsed.scheme + ) return False if not parsed.netloc: + logger.error("Bundle URL '%s' has no network location", url) return False if any(ord(c) < 32 for c in url): + logger.error("Bundle URL '%s' contains control characters (ASCII < 32)", url) return False return True - except Exception: + except Exception as e: + logger.error("Failed to parse bundle URL '%s': %s", url, str(e)) return False From 5bf2505f5d417251b6ea784ad3c13c81a0772ff2 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Wed, 30 Jul 2025 08:41:38 +0100 Subject: [PATCH 33/33] compile re --- airflow-core/src/airflow/dag_processing/bundles/manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index 18bb019f593ba..cf3b7c5104824 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -247,8 +247,8 @@ def _extract_template_params(bundle_instance: BaseDagBundle) -> dict: # Extract template placeholders using regex # This matches {placeholder} patterns in the template - placeholder_pattern = r"\{([^}]+)\}" - placeholders = re.findall(placeholder_pattern, template) + PLACEHOLDER_PATTERN = re.compile(r"\{([^}]+)\}") + placeholders = PLACEHOLDER_PATTERN.findall(template) # Extract values for each placeholder found in the template for placeholder in placeholders: