From d09df401935f4be0bc3032d48b670a0d7622ee4e Mon Sep 17 00:00:00 2001 From: Andrey Zvonov Date: Wed, 1 Oct 2025 19:03:18 +0200 Subject: [PATCH 1/3] port changes --- src/Databases/DataLake/GlueCatalog.cpp | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/Databases/DataLake/GlueCatalog.cpp b/src/Databases/DataLake/GlueCatalog.cpp index 970eb3853efa..b659f9cc4467 100644 --- a/src/Databases/DataLake/GlueCatalog.cpp +++ b/src/Databases/DataLake/GlueCatalog.cpp @@ -405,10 +405,12 @@ bool GlueCatalog::empty() const bool GlueCatalog::classifyTimestampTZ(const String & column_name, const TableMetadata & table_metadata) const { String metadata_path; + String metadata_uri; if (auto table_specific_properties = table_metadata.getDataLakeSpecificProperties(); table_specific_properties.has_value()) { metadata_path = table_specific_properties->iceberg_metadata_file_location; + metadata_uri = metadata_path; if (metadata_path.starts_with("s3:/")) metadata_path = metadata_path.substr(5); @@ -420,22 +422,24 @@ bool GlueCatalog::classifyTimestampTZ(const String & column_name, const TableMet else throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Metadata specific properties should be defined"); - if (!metadata_objects.get(metadata_path)) + if (!metadata_objects.get(metadata_uri)) { DB::ASTStorage * storage = table_engine_definition->as(); DB::ASTs args = storage->engine->arguments->children; - auto table_endpoint = settings[DB::DatabaseDataLakeSetting::storage_endpoint].value; + String storage_endpoint = !settings[DB::DatabaseDataLakeSetting::storage_endpoint].empty() ? settings[DB::DatabaseDataLakeSetting::storage_endpoint].value : metadata_uri; + if (args.empty()) - args.emplace_back(std::make_shared(table_endpoint)); + args.emplace_back(std::make_shared(storage_endpoint)); else - args[0] = std::make_shared(table_endpoint); + args[0] = std::make_shared(storage_endpoint); - if (args.size() == 1 && table_metadata.hasStorageCredentials()) + if (args.size() == 1) { - auto storage_credentials = table_metadata.getStorageCredentials(); - if (storage_credentials) - storage_credentials->addCredentialsToEngineArgs(args); + if (table_metadata.hasStorageCredentials()) + table_metadata.getStorageCredentials()->addCredentialsToEngineArgs(args); + else if (!credentials.IsExpiredOrEmpty()) + DataLake::S3Credentials(credentials.GetAWSAccessKeyId(), credentials.GetAWSSecretKey(), credentials.GetSessionToken()).addCredentialsToEngineArgs(args); } auto storage_settings = std::make_shared(); @@ -454,9 +458,9 @@ bool GlueCatalog::classifyTimestampTZ(const String & column_name, const TableMet Poco::JSON::Parser parser; Poco::Dynamic::Var result = parser.parse(metadata_file); auto metadata_object = result.extract(); - metadata_objects.set(metadata_path, std::make_shared(metadata_object)); + metadata_objects.set(metadata_uri, std::make_shared(metadata_object)); } - auto metadata_object = *metadata_objects.get(metadata_path); + auto metadata_object = *metadata_objects.get(metadata_uri); auto current_schema_id = metadata_object->getValue("current-schema-id"); auto schemas = metadata_object->getArray(Iceberg::f_schemas); for (size_t i = 0; i < schemas->size(); ++i) From 0c5a8d142c42cdcea37b3650c58ba271a1330624 Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Wed, 1 Oct 2025 14:35:36 -0400 Subject: [PATCH 2/3] disable new_tests_check --- ci/workflows/pull_request.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/workflows/pull_request.py b/ci/workflows/pull_request.py index e828049d8485..17fce2181876 100644 --- a/ci/workflows/pull_request.py +++ b/ci/workflows/pull_request.py @@ -85,7 +85,7 @@ # "python3 ./ci/jobs/scripts/workflow_hooks/pr_description.py", # NOTE (strtgbb): relies on labels we don't use "python3 ./ci/jobs/scripts/workflow_hooks/version_log.py", # "python3 ./ci/jobs/scripts/workflow_hooks/quick_sync.py", # NOTE (strtgbb): we don't do this - "python3 ./ci/jobs/scripts/workflow_hooks/new_tests_check.py", + # "python3 ./ci/jobs/scripts/workflow_hooks/new_tests_check.py", # NOTE (strtgbb): we don't use this ], workflow_filter_hooks=[should_skip_job], post_hooks=[ From a988a045e1e35b3e68e34cfdfea5fa9c83a6216c Mon Sep 17 00:00:00 2001 From: Andrey Zvonov Date: Wed, 1 Oct 2025 21:05:41 +0200 Subject: [PATCH 3/3] fix build --- src/Databases/DataLake/GlueCatalog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DataLake/GlueCatalog.cpp b/src/Databases/DataLake/GlueCatalog.cpp index b659f9cc4467..e8e0578b7381 100644 --- a/src/Databases/DataLake/GlueCatalog.cpp +++ b/src/Databases/DataLake/GlueCatalog.cpp @@ -427,7 +427,7 @@ bool GlueCatalog::classifyTimestampTZ(const String & column_name, const TableMet DB::ASTStorage * storage = table_engine_definition->as(); DB::ASTs args = storage->engine->arguments->children; - String storage_endpoint = !settings[DB::DatabaseDataLakeSetting::storage_endpoint].empty() ? settings[DB::DatabaseDataLakeSetting::storage_endpoint].value : metadata_uri; + String storage_endpoint = !settings[DB::DatabaseDataLakeSetting::storage_endpoint].value.empty() ? settings[DB::DatabaseDataLakeSetting::storage_endpoint].value : metadata_uri; if (args.empty()) args.emplace_back(std::make_shared(storage_endpoint));