diff --git a/src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp b/src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp index cd824639d8eb..16ef89c9bbea 100644 --- a/src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp +++ b/src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp @@ -45,7 +45,13 @@ size_t deserializeThriftStruct(T & out, const char * buf, size_t limit) /// TMemoryBuffer promises to not write to the buffer (in OBSERVE mode), /// so it should be ok to const_cast. uint8_t * cast_buf = const_cast(reinterpret_cast(buf)); - auto trans = std::make_shared(cast_buf, uint32_t(limit)); + + /// Set max message size to avoid 'apache::thrift::transport::TTransportException: MaxMessageSize reached' on big files + /// Similar to https://github.com/ClickHouse/arrow/blob/5cfccd8ea65f33d4517e7409815d761c7650b45d/cpp/src/parquet/thrift_internal.h#L437 + auto configuration = std::make_shared(); + configuration->setMaxMessageSize(std::numeric_limits::max()); + auto trans = std::make_shared(cast_buf, uint32_t(limit), apache::thrift::transport::TMemoryBuffer::OBSERVE, configuration); + apache::thrift::protocol::TCompactProtocolT proto(trans); uint32_t bytes_read = out.read(&proto); chassert(size_t(bytes_read + trans->available_read()) == limit);