From 1a5d1d1839f620d855462a00b26b025c3d9d0f6b Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 8 Dec 2025 16:29:54 -0300 Subject: [PATCH 1/2] max msg size parquet reader v3 --- src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp b/src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp index cd824639d8eb..16ef89c9bbea 100644 --- a/src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp +++ b/src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp @@ -45,7 +45,13 @@ size_t deserializeThriftStruct(T & out, const char * buf, size_t limit) /// TMemoryBuffer promises to not write to the buffer (in OBSERVE mode), /// so it should be ok to const_cast. uint8_t * cast_buf = const_cast(reinterpret_cast(buf)); - auto trans = std::make_shared(cast_buf, uint32_t(limit)); + + /// Set max message size to avoid 'apache::thrift::transport::TTransportException: MaxMessageSize reached' on big files + /// Similar to https://github.com/ClickHouse/arrow/blob/5cfccd8ea65f33d4517e7409815d761c7650b45d/cpp/src/parquet/thrift_internal.h#L437 + auto configuration = std::make_shared(); + configuration->setMaxMessageSize(std::numeric_limits::max()); + auto trans = std::make_shared(cast_buf, uint32_t(limit), apache::thrift::transport::TMemoryBuffer::OBSERVE, configuration); + apache::thrift::protocol::TCompactProtocolT proto(trans); uint32_t bytes_read = out.read(&proto); chassert(size_t(bytes_read + trans->available_read()) == limit); From ae038bbc56658267808e4b8794cf9d25c3144f84 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 8 Dec 2025 18:08:15 -0300 Subject: [PATCH 2/2] empty to retrigger ci