From 8d72946d7df0e5269d230b092958d895ca437e5a Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Wed, 17 Dec 2025 15:18:40 +0100 Subject: [PATCH] Fix confusing cluster name and named collection name in cluster functions --- .../ObjectStorage/StorageObjectStorageCluster.cpp | 10 +++++++++- tests/integration/helpers/iceberg_utils.py | 9 ++++++--- tests/integration/test_storage_iceberg/test.py | 6 ++++-- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp index 9cf1532fcd18..377bd3aff111 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -396,7 +396,15 @@ void StorageObjectStorageCluster::updateQueryToSendIfNeeded( } ASTPtr object_storage_type_arg; - configuration->extractDynamicStorageType(args, context, &object_storage_type_arg); + if (cluster_name_in_settings) + configuration->extractDynamicStorageType(args, context, &object_storage_type_arg); + else + { + auto args_copy = args; + // Remove cluster name from args to avoid confusing cluster name and named collection name + args_copy.erase(args_copy.begin()); + configuration->extractDynamicStorageType(args_copy, context, &object_storage_type_arg); + } ASTPtr settings_temporary_storage = nullptr; for (auto * it = args.begin(); it != args.end(); ++it) { diff --git a/tests/integration/helpers/iceberg_utils.py b/tests/integration/helpers/iceberg_utils.py index a8a662569b69..5479fdc5a938 100644 --- a/tests/integration/helpers/iceberg_utils.py +++ b/tests/integration/helpers/iceberg_utils.py @@ -196,6 +196,7 @@ def get_creation_expression( explicit_metadata_path="", storage_type_as_arg=False, storage_type_in_named_collection=False, + cluster_name_as_literal=True, additional_settings = [], **kwargs, ): @@ -224,6 +225,8 @@ def get_creation_expression( else: settings_expression = "" + cluster_name = "'cluster_simple'" if cluster_name_as_literal else "cluster_simple" + storage_arg = storage_type engine_part = "" if (storage_type_in_named_collection): @@ -252,7 +255,7 @@ def get_creation_expression( if run_on_cluster: assert table_function - return f"iceberg{engine_part}Cluster('cluster_simple', {storage_arg}, filename = 'iceberg_data/default/{table_name}/', format={format}, url = 'http://minio1:9001/{bucket}/')" + return f"iceberg{engine_part}Cluster({cluster_name}, {storage_arg}, filename = 'iceberg_data/default/{table_name}/', format={format}, url = 'http://minio1:9001/{bucket}/')" else: if table_function: return f"iceberg{engine_part}({storage_arg}, filename = 'iceberg_data/default/{table_name}/', format={format}, url = 'http://minio1:9001/{bucket}/')" @@ -271,7 +274,7 @@ def get_creation_expression( if run_on_cluster: assert table_function return f""" - iceberg{engine_part}Cluster('cluster_simple', {storage_arg}, container = '{cluster.azure_container_name}', storage_account_url = '{cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]}', blob_path = '/iceberg_data/default/{table_name}/', format={format}) + iceberg{engine_part}Cluster({cluster_name}, {storage_arg}, container = '{cluster.azure_container_name}', storage_account_url = '{cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]}', blob_path = '/iceberg_data/default/{table_name}/', format={format}) """ else: if table_function: @@ -293,7 +296,7 @@ def get_creation_expression( if run_on_cluster: assert table_function return f""" - iceberg{engine_part}Cluster('cluster_simple', {storage_arg}, path = '/iceberg_data/default/{table_name}/', format={format}) + iceberg{engine_part}Cluster({cluster_name}, {storage_arg}, path = '/iceberg_data/default/{table_name}/', format={format}) """ else: if table_function: diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py index b2ccd8375b04..6bc1963a87b2 100644 --- a/tests/integration/test_storage_iceberg/test.py +++ b/tests/integration/test_storage_iceberg/test.py @@ -412,7 +412,8 @@ def count_secondary_subqueries(started_cluster, query_id, expected, comment): @pytest.mark.parametrize("format_version", ["1", "2"]) @pytest.mark.parametrize("storage_type", ["s3", "azure", "local"]) -def test_cluster_table_function(started_cluster, format_version, storage_type): +@pytest.mark.parametrize("cluster_name_as_literal", [True, False]) +def test_cluster_table_function(started_cluster, format_version, storage_type, cluster_name_as_literal): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session @@ -471,7 +472,7 @@ def add_df(mode): # Regular Query only node1 table_function_expr = get_creation_expression( - storage_type, TABLE_NAME, started_cluster, table_function=True + storage_type, TABLE_NAME, started_cluster, table_function=True, cluster_name_as_literal=cluster_name_as_literal ) select_regular = ( instance.query(f"SELECT * FROM {table_function_expr}").strip().split() @@ -492,6 +493,7 @@ def make_query_from_function( run_on_cluster=run_on_cluster, storage_type_as_arg=storage_type_as_arg, storage_type_in_named_collection=storage_type_in_named_collection, + cluster_name_as_literal=cluster_name_as_literal, ) query_id = str(uuid.uuid4()) settings = f"SETTINGS object_storage_cluster='cluster_simple'" if (alt_syntax and not run_on_cluster) else ""