From d9966bac3093d409e5dae3ce90182f61dcf4f8a1 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 13 Jan 2025 14:17:56 -0300 Subject: [PATCH 1/4] fix parquet mem alignment issue --- src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h index 8f0b39c917c1..dcae54e6ef60 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h @@ -54,11 +54,14 @@ class ParquetDataBuffer auto necessary_bytes = count * sizeof(ParquetType); checkAvaible(necessary_bytes); - const ParquetType* src = reinterpret_cast(data); - for (std::size_t i = 0; i < count; i++) { - dst[i] = static_cast(src[i]); + ParquetType temp; + auto offset = i * sizeof(ParquetType); + + // necessary to prevent memory alignment issues https://github.com/ClickHouse/ClickHouse/issues/74512#issuecomment-2587260001 + std::memcpy(&temp, data + offset, sizeof(ParquetType)); + dst[i] = static_cast(temp); } consume(necessary_bytes); From bdb237dff3abc0dcc9bdbf6f3ab8895f7671d357 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 13 Jan 2025 14:19:48 -0300 Subject: [PATCH 2/4] remove std namespace qualifier --- src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h index dcae54e6ef60..8a1b5f5616bc 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h @@ -60,7 +60,7 @@ class ParquetDataBuffer auto offset = i * sizeof(ParquetType); // necessary to prevent memory alignment issues https://github.com/ClickHouse/ClickHouse/issues/74512#issuecomment-2587260001 - std::memcpy(&temp, data + offset, sizeof(ParquetType)); + memcpy(&temp, data + offset, sizeof(ParquetType)); dst[i] = static_cast(temp); } From b2d0ddaca4678cb57769d9caa0ba4233dd14e157 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 13 Jan 2025 14:21:34 -0300 Subject: [PATCH 3/4] fix tabs --- src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h index 8a1b5f5616bc..f5ebb0a5d512 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h @@ -57,9 +57,9 @@ class ParquetDataBuffer for (std::size_t i = 0; i < count; i++) { ParquetType temp; - auto offset = i * sizeof(ParquetType); + auto offset = i * sizeof(ParquetType); - // necessary to prevent memory alignment issues https://github.com/ClickHouse/ClickHouse/issues/74512#issuecomment-2587260001 + // necessary to prevent memory alignment issues https://github.com/ClickHouse/ClickHouse/issues/74512#issuecomment-2587260001 memcpy(&temp, data + offset, sizeof(ParquetType)); dst[i] = static_cast(temp); } From 1244161b9a3d435b336d1f0cff7e43d201b0166a Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 13 Jan 2025 15:43:35 -0300 Subject: [PATCH 4/4] call unalignedLoad --- src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h index f5ebb0a5d512..9e5603eebe26 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h @@ -56,12 +56,8 @@ class ParquetDataBuffer for (std::size_t i = 0; i < count; i++) { - ParquetType temp; auto offset = i * sizeof(ParquetType); - - // necessary to prevent memory alignment issues https://github.com/ClickHouse/ClickHouse/issues/74512#issuecomment-2587260001 - memcpy(&temp, data + offset, sizeof(ParquetType)); - dst[i] = static_cast(temp); + dst[i] = unalignedLoad(data + offset); } consume(necessary_bytes);