From 857e0b3f187b241799f04373ccc6d3c7b788bb41 Mon Sep 17 00:00:00 2001 From: BePPPower Date: Mon, 18 Mar 2024 16:08:48 +0800 Subject: [PATCH 1/2] fix --- .../exec/format/parquet/parquet_column_convert.h | 13 +++++++------ be/src/vec/exec/format/parquet/parquet_common.cpp | 6 +++--- be/src/vec/exec/format/parquet/parquet_common.h | 11 ++++++----- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.h b/be/src/vec/exec/format/parquet/parquet_column_convert.h index 39ee29f663f9ed..1d6cfd63515366 100644 --- a/be/src/vec/exec/format/parquet/parquet_column_convert.h +++ b/be/src/vec/exec/format/parquet/parquet_column_convert.h @@ -318,12 +318,13 @@ struct Int96toTimestamp : public ColumnConvert { auto& data = static_cast*>(dst_col.get())->get_data(); for (int i = 0; i < rows; i++) { - ParquetInt96 x = ParquetInt96_data[i]; - auto& num = data[start_idx + i]; - auto& value = reinterpret_cast&>(num); - int64_t micros = x.to_timestamp_micros(); - value.from_unixtime(micros / 1000000, *_convert_params->ctz); - value.set_microsecond(micros % 1000000); + ParquetInt96 src_cell_data = ParquetInt96_data[i]; + auto& dst_value = + reinterpret_cast&>(data[start_idx + i]); + + int64_t timestamp_with_micros = src_cell_data.to_timestamp_micros(); + dst_value.from_unixtime(timestamp_with_micros / 1000000, *_convert_params->ctz); + dst_value.set_microsecond(timestamp_with_micros % 1000000); } return Status::OK(); } diff --git a/be/src/vec/exec/format/parquet/parquet_common.cpp b/be/src/vec/exec/format/parquet/parquet_common.cpp index cbef2a0f286d48..33e9f11242b46d 100644 --- a/be/src/vec/exec/format/parquet/parquet_common.cpp +++ b/be/src/vec/exec/format/parquet/parquet_common.cpp @@ -24,9 +24,9 @@ namespace doris::vectorized { -const uint32_t ParquetInt96::JULIAN_EPOCH_OFFSET_DAYS = 2440588; -const uint64_t ParquetInt96::MICROS_IN_DAY = 86400000000; -const uint64_t ParquetInt96::NANOS_PER_MICROSECOND = 1000; +const int32_t ParquetInt96::JULIAN_EPOCH_OFFSET_DAYS = 2440588; +const int64_t ParquetInt96::MICROS_IN_DAY = 86400000000; +const int64_t ParquetInt96::NANOS_PER_MICROSECOND = 1000; ColumnSelectVector::ColumnSelectVector(const uint8_t* filter_map, size_t filter_map_size, bool filter_all) { diff --git a/be/src/vec/exec/format/parquet/parquet_common.h b/be/src/vec/exec/format/parquet/parquet_common.h index 6667ab2c101637..6a63517350bbd0 100644 --- a/be/src/vec/exec/format/parquet/parquet_common.h +++ b/be/src/vec/exec/format/parquet/parquet_common.h @@ -51,8 +51,9 @@ struct ParquetInt96 { uint64_t lo; // time of nanoseconds in a day uint32_t hi; // days from julian epoch - inline uint64_t to_timestamp_micros() const { - return (hi - JULIAN_EPOCH_OFFSET_DAYS) * MICROS_IN_DAY + lo / NANOS_PER_MICROSECOND; + inline int64_t to_timestamp_micros() const { + return ((int32_t)hi - JULIAN_EPOCH_OFFSET_DAYS) * MICROS_IN_DAY + + int64_t(lo) / NANOS_PER_MICROSECOND; } inline __int128 to_int128() const { __int128 ans = 0; @@ -60,9 +61,9 @@ struct ParquetInt96 { return ans; } - static const uint32_t JULIAN_EPOCH_OFFSET_DAYS; - static const uint64_t MICROS_IN_DAY; - static const uint64_t NANOS_PER_MICROSECOND; + static const int32_t JULIAN_EPOCH_OFFSET_DAYS; + static const int64_t MICROS_IN_DAY; + static const int64_t NANOS_PER_MICROSECOND; }; #pragma pack() static_assert(sizeof(ParquetInt96) == 12, "The size of ParquetInt96 is not 12."); From 8760b82c0d978534be4485b66c4e94455e068574 Mon Sep 17 00:00:00 2001 From: BePPPower Date: Tue, 19 Mar 2024 10:39:51 +0800 Subject: [PATCH 2/2] fix2 --- be/src/vec/exec/format/parquet/parquet_common.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/be/src/vec/exec/format/parquet/parquet_common.h b/be/src/vec/exec/format/parquet/parquet_common.h index 6a63517350bbd0..2cf745882ee139 100644 --- a/be/src/vec/exec/format/parquet/parquet_common.h +++ b/be/src/vec/exec/format/parquet/parquet_common.h @@ -48,12 +48,11 @@ struct RowRange { #pragma pack(1) struct ParquetInt96 { - uint64_t lo; // time of nanoseconds in a day - uint32_t hi; // days from julian epoch + int64_t lo; // time of nanoseconds in a day + int32_t hi; // days from julian epoch inline int64_t to_timestamp_micros() const { - return ((int32_t)hi - JULIAN_EPOCH_OFFSET_DAYS) * MICROS_IN_DAY + - int64_t(lo) / NANOS_PER_MICROSECOND; + return (hi - JULIAN_EPOCH_OFFSET_DAYS) * MICROS_IN_DAY + lo / NANOS_PER_MICROSECOND; } inline __int128 to_int128() const { __int128 ans = 0;