Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions be/src/vec/exec/format/parquet/parquet_pred_cmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,17 @@ class ParquetPredicate {
ParquetInt96 datetime96_max =
*reinterpret_cast<const ParquetInt96*>(encoded_max.data());
int64_t micros_max = datetime96_max.to_timestamp_micros();

// From Trino: Parquet INT96 timestamp values were compared incorrectly
// for the purposes of producing statistics by older parquet writers,
// so PARQUET-1065 deprecated them. The result is that any writer that produced stats
// was producing unusable incorrect values, except the special case where min == max
// and an incorrect ordering would not be material to the result.
// PARQUET-1026 made binary stats available and valid in that special case.
if (micros_min != micros_max) {
return false;
}

if constexpr (std::is_same_v<CppType, VecDateTimeValue> ||
std::is_same_v<CppType, DateV2Value<DateTimeV2ValueType>>) {
min_value.from_unixtime(micros_min / 1000000, ctz);
Expand Down