From 8ec22854f8cd53bac0896ed5a6a6036a0dbe92fc Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Tue, 4 Nov 2025 16:59:14 +0100 Subject: [PATCH 1/3] More metrics for Iceberg --- src/Common/ProfileEvents.cpp | 8 +++++ .../AzureBlobStorage/AzureObjectStorage.cpp | 4 +++ .../ObjectStorages/S3/S3ObjectStorage.cpp | 4 +++ src/IO/S3/getObjectInfo.cpp | 3 ++ src/IO/S3Common.cpp | 10 ------ src/Interpreters/IcebergMetadataLog.cpp | 34 +++++++++++++++++++ src/Interpreters/IcebergMetadataLog.h | 9 +++++ .../Iceberg/AvroForIcebergDeserializer.cpp | 10 ++++++ .../DataLakes/Iceberg/IcebergMetadata.cpp | 14 ++------ .../ObjectStorage/DataLakes/Iceberg/Utils.cpp | 7 ++++ 10 files changed, 81 insertions(+), 22 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 610b732cafd2..119b25ba1d6f 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -299,6 +299,11 @@ M(IcebergTrivialCountOptimizationApplied, "Trivial count optimization applied while reading from Iceberg", ValueType::Number) \ M(IcebergVersionHintUsed, "Number of times version-hint.text has been used.", ValueType::Number) \ M(IcebergMinMaxIndexPrunedFiles, "Number of skipped files by using MinMax index in Iceberg", ValueType::Number) \ + M(IcebergAvroFileParsing, "Number of times avro metadata files have been parsed.", ValueType::Number) \ + M(IcebergAvroFileParsingMicroseconds, "Time spent for parsing avro metadata files for Iceberg tables.", ValueType::Microseconds) \ + M(IcebergJsonFileParsing, "Number of times json metadata files have been parsed.", ValueType::Number) \ + M(IcebergJsonFileParsingMicroseconds, "Time spent for parsing json metadata files for Iceberg tables.", ValueType::Microseconds) \ + \ M(JoinBuildTableRowCount, "Total number of rows in the build table for a JOIN operation.", ValueType::Number) \ M(JoinProbeTableRowCount, "Total number of rows in the probe table for a JOIN operation.", ValueType::Number) \ M(JoinResultRowCount, "Total number of rows in the result of a JOIN operation.", ValueType::Number) \ @@ -580,7 +585,9 @@ The server successfully detected this situation and will download merged part fr M(S3DeleteObjects, "Number of S3 API DeleteObject(s) calls.", ValueType::Number) \ M(S3CopyObject, "Number of S3 API CopyObject calls.", ValueType::Number) \ M(S3ListObjects, "Number of S3 API ListObjects calls.", ValueType::Number) \ + M(S3ListObjectsMicroseconds, "Time of S3 API ListObjects execution.", ValueType::Microseconds) \ M(S3HeadObject, "Number of S3 API HeadObject calls.", ValueType::Number) \ + M(S3HeadObjectMicroseconds, "Time of S3 API HeadObject execution.", ValueType::Microseconds) \ M(S3GetObjectAttributes, "Number of S3 API GetObjectAttributes calls.", ValueType::Number) \ M(S3CreateMultipartUpload, "Number of S3 API CreateMultipartUpload calls.", ValueType::Number) \ M(S3UploadPartCopy, "Number of S3 API UploadPartCopy calls.", ValueType::Number) \ @@ -634,6 +641,7 @@ The server successfully detected this situation and will download merged part fr M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls", ValueType::Number) \ M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.", ValueType::Number) \ M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.", ValueType::Number) \ + M(AzureListObjectsMicroseconds, "Time of Azure blob storage API ListObjects execution.", ValueType::Microseconds) \ M(AzureGetProperties, "Number of Azure blob storage API GetProperties calls.", ValueType::Number) \ M(AzureCreateContainer, "Number of Azure blob storage API CreateContainer calls.", ValueType::Number) \ \ diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 4fe9c5c43117..1d7039e0624e 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace CurrentMetrics @@ -25,6 +26,7 @@ namespace CurrentMetrics namespace ProfileEvents { extern const Event AzureListObjects; + extern const Event AzureListObjectsMicroseconds; extern const Event DiskAzureListObjects; extern const Event AzureDeleteObjects; extern const Event DiskAzureDeleteObjects; @@ -76,6 +78,7 @@ class AzureIteratorAsync final : public IObjectStorageIteratorAsync ProfileEvents::increment(ProfileEvents::AzureListObjects); if (client->IsClientForDisk()) ProfileEvents::increment(ProfileEvents::DiskAzureListObjects); + ProfileEventTimeIncrement watch(ProfileEvents::AzureListObjectsMicroseconds); chassert(batch.empty()); auto blob_list_response = client->ListBlobs(options); @@ -182,6 +185,7 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith ProfileEvents::increment(ProfileEvents::AzureListObjects); if (client_ptr->IsClientForDisk()) ProfileEvents::increment(ProfileEvents::DiskAzureListObjects); + ProfileEventTimeIncrement watch(ProfileEvents::AzureListObjectsMicroseconds); blob_list_response = client_ptr->ListBlobs(options); const auto & blobs_list = blob_list_response.Blobs; diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 320173bd7a49..9953682e6348 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -30,11 +30,13 @@ #include #include #include +#include namespace ProfileEvents { extern const Event S3ListObjects; + extern const Event S3ListObjectsMicroseconds; extern const Event DiskS3DeleteObjects; extern const Event DiskS3ListObjects; } @@ -136,6 +138,7 @@ class S3IteratorAsync final : public IObjectStorageIteratorAsync { ProfileEvents::increment(ProfileEvents::S3ListObjects); ProfileEvents::increment(ProfileEvents::DiskS3ListObjects); + ProfileEventTimeIncrement watch(ProfileEvents::S3ListObjectsMicroseconds); auto outcome = client->ListObjectsV2(*request); @@ -263,6 +266,7 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet { ProfileEvents::increment(ProfileEvents::S3ListObjects); ProfileEvents::increment(ProfileEvents::DiskS3ListObjects); + ProfileEventTimeIncrement watch(ProfileEvents::S3ListObjectsMicroseconds); outcome = client.get()->ListObjectsV2(request); throwIfError(outcome); diff --git a/src/IO/S3/getObjectInfo.cpp b/src/IO/S3/getObjectInfo.cpp index b69f2a23a0dc..b85743e4c5fe 100644 --- a/src/IO/S3/getObjectInfo.cpp +++ b/src/IO/S3/getObjectInfo.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #if USE_AWS_S3 @@ -15,6 +16,7 @@ namespace ProfileEvents extern const Event S3GetObject; extern const Event S3GetObjectAttributes; extern const Event S3HeadObject; + extern const Event S3HeadObjectMicroseconds; extern const Event DiskS3GetObject; extern const Event DiskS3GetObjectAttributes; extern const Event DiskS3HeadObject; @@ -32,6 +34,7 @@ namespace ProfileEvents::increment(ProfileEvents::S3HeadObject); if (client.isClientForDisk()) ProfileEvents::increment(ProfileEvents::DiskS3HeadObject); + ProfileEventTimeIncrement watch(ProfileEvents::S3HeadObjectMicroseconds); S3::HeadObjectRequest req; req.SetBucket(bucket); diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index f12de6a7b546..cc321ba7af8c 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -19,16 +19,6 @@ #include -namespace ProfileEvents -{ - extern const Event S3GetObjectAttributes; - extern const Event S3GetObjectMetadata; - extern const Event S3HeadObject; - extern const Event DiskS3GetObjectAttributes; - extern const Event DiskS3GetObjectMetadata; - extern const Event DiskS3HeadObject; -} - namespace DB { diff --git a/src/Interpreters/IcebergMetadataLog.cpp b/src/Interpreters/IcebergMetadataLog.cpp index ab8266dfdc30..10cc34f9f2f6 100644 --- a/src/Interpreters/IcebergMetadataLog.cpp +++ b/src/Interpreters/IcebergMetadataLog.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -106,4 +107,37 @@ void insertRowToLogTable( .metadata_content = row, .row_in_file = row_in_file}); } + +String dumpMetadataObjectToString(const Poco::JSON::Object::Ptr & metadata_object) +{ + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + Poco::JSON::Stringifier::stringify(metadata_object, oss); + return removeEscapedSlashes(oss.str()); +} + +void insertRowToLogTable( + const ContextPtr & local_context, + const Poco::JSON::Object::Ptr metadata_object, + IcebergMetadataLogLevel row_log_level, + const String & table_path, + const String & file_path, + std::optional row_in_file) +{ + IcebergMetadataLogLevel set_log_level = local_context->getSettingsRef()[Setting::iceberg_metadata_log_level].value; + if (set_log_level < row_log_level) + return; + timespec spec{}; + if (clock_gettime(CLOCK_REALTIME, &spec)) + throw ErrnoException(ErrorCodes::CANNOT_CLOCK_GETTIME, "Cannot clock_gettime"); + + Context::getGlobalContextInstance()->getIcebergMetadataLog()->add( + DB::IcebergMetadataLogElement{ + .current_time = spec.tv_sec, + .query_id = local_context->getCurrentQueryId(), + .content_type = row_log_level, + .table_path = table_path, + .file_path = file_path, + .metadata_content = dumpMetadataObjectToString(metadata_object), + .row_in_file = row_in_file}); +} } diff --git a/src/Interpreters/IcebergMetadataLog.h b/src/Interpreters/IcebergMetadataLog.h index d4c65c4d6346..f1e6718585b3 100644 --- a/src/Interpreters/IcebergMetadataLog.h +++ b/src/Interpreters/IcebergMetadataLog.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -31,6 +32,14 @@ void insertRowToLogTable( const String & file_path, std::optional row_in_file); +void insertRowToLogTable( + const ContextPtr & local_context, + const Poco::JSON::Object::Ptr metadata_object, + IcebergMetadataLogLevel row_log_level, + const String & table_path, + const String & file_path, + std::optional row_in_file); + class IcebergMetadataLog : public SystemLog { using SystemLog::SystemLog; diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/AvroForIcebergDeserializer.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/AvroForIcebergDeserializer.cpp index 5057b5df9d2d..a574b06f2eeb 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/AvroForIcebergDeserializer.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/AvroForIcebergDeserializer.cpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace DB::ErrorCodes { @@ -19,6 +20,12 @@ namespace DB::ErrorCodes extern const int INCORRECT_DATA; } +namespace ProfileEvents +{ + extern const Event IcebergAvroFileParsing; + extern const Event IcebergAvroFileParsingMicroseconds; +} + namespace DB::Iceberg { @@ -30,6 +37,9 @@ try : buffer(std::move(buffer_)) , manifest_file_path(manifest_file_path_) { + ProfileEvents::increment(ProfileEvents::IcebergAvroFileParsing); + ProfileEventTimeIncrement watch(ProfileEvents::IcebergAvroFileParsingMicroseconds); + auto manifest_file_reader = std::make_unique(std::make_unique(*buffer)); diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp index 12fcb29b24ac..66235695eaf2 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp @@ -110,16 +110,6 @@ extern const SettingsBool allow_experimental_insert_into_iceberg; extern const SettingsBool allow_experimental_iceberg_compaction; } -namespace -{ -String dumpMetadataObjectToString(const Poco::JSON::Object::Ptr & metadata_object) -{ - std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - Poco::JSON::Stringifier::stringify(metadata_object, oss); - return removeEscapedSlashes(oss.str()); -} -} - using namespace Iceberg; @@ -245,7 +235,7 @@ bool IcebergMetadata::update(const ContextPtr & local_context) insertRowToLogTable( local_context, - dumpMetadataObjectToString(metadata_object), + metadata_object, DB::IcebergMetadataLogLevel::Metadata, configuration_ptr->getRawPath().path, metadata_file_path, @@ -775,7 +765,7 @@ DataLakeMetadataPtr IcebergMetadata::create( insertRowToLogTable( local_context, - dumpMetadataObjectToString(object), + object, DB::IcebergMetadataLogLevel::Metadata, configuration_ptr->getRawPath().path, metadata_file_path, diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/Utils.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/Utils.cpp index 260c50ea4080..644197a140fb 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/Utils.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/Utils.cpp @@ -40,6 +40,8 @@ #include #include +#include + using namespace DB; @@ -65,6 +67,8 @@ namespace DB::DataLakeStorageSetting namespace ProfileEvents { extern const Event IcebergVersionHintUsed; + extern const Event IcebergJsonFileParsing; + extern const Event IcebergJsonFileParsingMicroseconds; } namespace DB::Setting @@ -310,6 +314,9 @@ Poco::JSON::Object::Ptr getMetadataJSONObject( return json_str; }; + ProfileEvents::increment(ProfileEvents::IcebergJsonFileParsing); + ProfileEventTimeIncrement watch(ProfileEvents::IcebergJsonFileParsingMicroseconds); + String metadata_json_str; if (cache_ptr) metadata_json_str = cache_ptr->getOrSetTableMetadata(IcebergMetadataFilesCache::getKey(configuration_ptr, metadata_file_path), create_fn); From d4b55b5225a5a3373f46234052c1e32c9e471fbe Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Wed, 5 Nov 2025 14:28:08 +0100 Subject: [PATCH 2/3] Move json serialization under condition in all insertRowToLogTable calls --- .../AzureBlobStorage/AzureObjectStorage.cpp | 7 +++- .../ObjectStorages/S3/S3ObjectStorage.cpp | 15 ++++++-- src/Interpreters/IcebergMetadataLog.cpp | 37 +------------------ src/Interpreters/IcebergMetadataLog.h | 10 +---- .../DataLakes/Iceberg/IcebergMetadata.cpp | 16 +++++++- .../DataLakes/Iceberg/ManifestFile.cpp | 6 ++- .../Iceberg/StatelessMetadataFileGetter.cpp | 6 ++- 7 files changed, 41 insertions(+), 56 deletions(-) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 1d7039e0624e..e5e139b04c5c 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -185,9 +185,12 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith ProfileEvents::increment(ProfileEvents::AzureListObjects); if (client_ptr->IsClientForDisk()) ProfileEvents::increment(ProfileEvents::DiskAzureListObjects); - ProfileEventTimeIncrement watch(ProfileEvents::AzureListObjectsMicroseconds); - blob_list_response = client_ptr->ListBlobs(options); + { + ProfileEventTimeIncrement watch(ProfileEvents::AzureListObjectsMicroseconds); + blob_list_response = client_ptr->ListBlobs(options); + } + const auto & blobs_list = blob_list_response.Blobs; for (const auto & blob : blobs_list) diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 9953682e6348..5ae8da17fa93 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -138,9 +138,13 @@ class S3IteratorAsync final : public IObjectStorageIteratorAsync { ProfileEvents::increment(ProfileEvents::S3ListObjects); ProfileEvents::increment(ProfileEvents::DiskS3ListObjects); - ProfileEventTimeIncrement watch(ProfileEvents::S3ListObjectsMicroseconds); - auto outcome = client->ListObjectsV2(*request); + Aws::S3::Model::ListObjectsV2Outcome outcome; + + { + ProfileEventTimeIncrement watch(ProfileEvents::S3ListObjectsMicroseconds); + outcome = client->ListObjectsV2(*request); + } /// Outcome failure will be handled on the caller side. if (outcome.IsSuccess()) @@ -266,9 +270,12 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet { ProfileEvents::increment(ProfileEvents::S3ListObjects); ProfileEvents::increment(ProfileEvents::DiskS3ListObjects); - ProfileEventTimeIncrement watch(ProfileEvents::S3ListObjectsMicroseconds); - outcome = client.get()->ListObjectsV2(request); + { + ProfileEventTimeIncrement watch(ProfileEvents::S3ListObjectsMicroseconds); + outcome = client.get()->ListObjectsV2(request); + } + throwIfError(outcome); auto result = outcome.GetResult(); diff --git a/src/Interpreters/IcebergMetadataLog.cpp b/src/Interpreters/IcebergMetadataLog.cpp index 10cc34f9f2f6..316665eb2fc5 100644 --- a/src/Interpreters/IcebergMetadataLog.cpp +++ b/src/Interpreters/IcebergMetadataLog.cpp @@ -84,7 +84,7 @@ void IcebergMetadataLogElement::appendToBlock(MutableColumns & columns) const void insertRowToLogTable( const ContextPtr & local_context, - String row, + std::function get_row, IcebergMetadataLogLevel row_log_level, const String & table_path, const String & file_path, @@ -104,40 +104,7 @@ void insertRowToLogTable( .content_type = row_log_level, .table_path = table_path, .file_path = file_path, - .metadata_content = row, - .row_in_file = row_in_file}); -} - -String dumpMetadataObjectToString(const Poco::JSON::Object::Ptr & metadata_object) -{ - std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - Poco::JSON::Stringifier::stringify(metadata_object, oss); - return removeEscapedSlashes(oss.str()); -} - -void insertRowToLogTable( - const ContextPtr & local_context, - const Poco::JSON::Object::Ptr metadata_object, - IcebergMetadataLogLevel row_log_level, - const String & table_path, - const String & file_path, - std::optional row_in_file) -{ - IcebergMetadataLogLevel set_log_level = local_context->getSettingsRef()[Setting::iceberg_metadata_log_level].value; - if (set_log_level < row_log_level) - return; - timespec spec{}; - if (clock_gettime(CLOCK_REALTIME, &spec)) - throw ErrnoException(ErrorCodes::CANNOT_CLOCK_GETTIME, "Cannot clock_gettime"); - - Context::getGlobalContextInstance()->getIcebergMetadataLog()->add( - DB::IcebergMetadataLogElement{ - .current_time = spec.tv_sec, - .query_id = local_context->getCurrentQueryId(), - .content_type = row_log_level, - .table_path = table_path, - .file_path = file_path, - .metadata_content = dumpMetadataObjectToString(metadata_object), + .metadata_content = get_row(), .row_in_file = row_in_file}); } } diff --git a/src/Interpreters/IcebergMetadataLog.h b/src/Interpreters/IcebergMetadataLog.h index f1e6718585b3..2c408a155693 100644 --- a/src/Interpreters/IcebergMetadataLog.h +++ b/src/Interpreters/IcebergMetadataLog.h @@ -26,15 +26,7 @@ struct IcebergMetadataLogElement void insertRowToLogTable( const ContextPtr & local_context, - String row, - IcebergMetadataLogLevel row_log_level, - const String & table_path, - const String & file_path, - std::optional row_in_file); - -void insertRowToLogTable( - const ContextPtr & local_context, - const Poco::JSON::Object::Ptr metadata_object, + std::function get_row, IcebergMetadataLogLevel row_log_level, const String & table_path, const String & file_path, diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp index 66235695eaf2..2fab3e3349f1 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp @@ -110,6 +110,16 @@ extern const SettingsBool allow_experimental_insert_into_iceberg; extern const SettingsBool allow_experimental_iceberg_compaction; } +namespace +{ +String dumpMetadataObjectToString(const Poco::JSON::Object::Ptr & metadata_object) +{ + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + Poco::JSON::Stringifier::stringify(metadata_object, oss); + return removeEscapedSlashes(oss.str()); +} +} + using namespace Iceberg; @@ -233,9 +243,10 @@ bool IcebergMetadata::update(const ContextPtr & local_context) updateState(local_context, metadata_object); + auto dump_metadata = [&]()->String { return dumpMetadataObjectToString(metadata_object); }; insertRowToLogTable( local_context, - metadata_object, + dump_metadata, DB::IcebergMetadataLogLevel::Metadata, configuration_ptr->getRawPath().path, metadata_file_path, @@ -763,9 +774,10 @@ DataLakeMetadataPtr IcebergMetadata::create( auto format_version = object->getValue(f_format_version); + auto dump_metadata = [&]()->String { return dumpMetadataObjectToString(object); }; insertRowToLogTable( local_context, - object, + dump_metadata, DB::IcebergMetadataLogLevel::Metadata, configuration_ptr->getRawPath().path, metadata_file_path, diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.cpp index 1fdde4972ef0..f4260051ff8d 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.cpp @@ -155,9 +155,10 @@ ManifestFileContent::ManifestFileContent( const String & path_to_manifest_file_) : path_to_manifest_file(path_to_manifest_file_) { + auto dump_metadata = [&]()->String { return manifest_file_deserializer.getMetadataContent(); }; insertRowToLogTable( context, - manifest_file_deserializer.getMetadataContent(), + dump_metadata, DB::IcebergMetadataLogLevel::ManifestFileMetadata, common_path, path_to_manifest_file, @@ -230,9 +231,10 @@ ManifestFileContent::ManifestFileContent( for (size_t i = 0; i < manifest_file_deserializer.rows(); ++i) { + auto dump_row_metadata = [&]()->String { return manifest_file_deserializer.getContent(i); }; insertRowToLogTable( context, - manifest_file_deserializer.getContent(i), + dump_row_metadata, DB::IcebergMetadataLogLevel::ManifestFileEntry, common_path, path_to_manifest_file, diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/StatelessMetadataFileGetter.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/StatelessMetadataFileGetter.cpp index 9372a13e2074..eb7123b0d8e1 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/StatelessMetadataFileGetter.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/StatelessMetadataFileGetter.cpp @@ -148,9 +148,10 @@ ManifestFileCacheKeys getManifestList( ManifestFileCacheKeys manifest_file_cache_keys; + auto dump_metadata = [&]()->String { return manifest_list_deserializer.getMetadataContent(); }; insertRowToLogTable( local_context, - manifest_list_deserializer.getMetadataContent(), + dump_metadata, DB::IcebergMetadataLogLevel::ManifestListMetadata, configuration_ptr->getRawPath().path, filename, @@ -185,9 +186,10 @@ ManifestFileCacheKeys getManifestList( manifest_file_cache_keys.emplace_back( manifest_file_name, added_sequence_number, added_snapshot_id.safeGet(), content_type); + auto dump_row_metadata = [&]()->String { return manifest_list_deserializer.getContent(i); }; insertRowToLogTable( local_context, - manifest_list_deserializer.getContent(i), + dump_row_metadata, DB::IcebergMetadataLogLevel::ManifestListEntry, configuration_ptr->getRawPath().path, filename, From b746e015d76545fe3134831c73b37debea380fb5 Mon Sep 17 00:00:00 2001 From: Anton Ivashkin <56930273+ianton-ru@users.noreply.github.com> Date: Fri, 7 Nov 2025 15:29:11 +0100 Subject: [PATCH 3/3] Add comment Refactor insertRowToLogTable to use get_row function for lazy evaluation and improve exit logic. --- src/Interpreters/IcebergMetadataLog.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Interpreters/IcebergMetadataLog.h b/src/Interpreters/IcebergMetadataLog.h index 2c408a155693..042615a20a43 100644 --- a/src/Interpreters/IcebergMetadataLog.h +++ b/src/Interpreters/IcebergMetadataLog.h @@ -24,6 +24,8 @@ struct IcebergMetadataLogElement void appendToBlock(MutableColumns & columns) const; }; +/// Here `get_row` function is used instead `row` string to calculate string only when required. +/// Inside `insertRowToLogTable` code can exit immediately after `iceberg_metadata_log_level` setting check. void insertRowToLogTable( const ContextPtr & local_context, std::function get_row,