diff --git a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h index b993370a159895..e53b76afb8b19b 100644 --- a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h +++ b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h @@ -256,6 +256,17 @@ class ParquetPredicate { ParquetInt96 datetime96_max = *reinterpret_cast(encoded_max.data()); int64_t micros_max = datetime96_max.to_timestamp_micros(); + + // From Trino: Parquet INT96 timestamp values were compared incorrectly + // for the purposes of producing statistics by older parquet writers, + // so PARQUET-1065 deprecated them. The result is that any writer that produced stats + // was producing unusable incorrect values, except the special case where min == max + // and an incorrect ordering would not be material to the result. + // PARQUET-1026 made binary stats available and valid in that special case. + if (micros_min != micros_max) { + return false; + } + if constexpr (std::is_same_v || std::is_same_v>) { min_value.from_unixtime(micros_min / 1000000, ctz);