From 78700f3d55f4389265e85eaf432dc3510822d35f Mon Sep 17 00:00:00 2001 From: shuai-xu Date: Fri, 25 Oct 2024 17:23:22 +0800 Subject: [PATCH 1/4] ORC-2054: [C++] fix return wrong result if lack of hasnull --- c++/src/sargs/PredicateLeaf.cc | 4 ++++ c++/test/TestPredicateLeaf.cc | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/c++/src/sargs/PredicateLeaf.cc b/c++/src/sargs/PredicateLeaf.cc index d9df1c5d5c..d625558024 100644 --- a/c++/src/sargs/PredicateLeaf.cc +++ b/c++/src/sargs/PredicateLeaf.cc @@ -701,6 +701,10 @@ namespace orc { } } + // files written by trino may lack of hasnull field. + if (!colStats.has_hasnull()) + return TruthValue::YES_NO_NULL; + bool allNull = colStats.has_null() && colStats.number_of_values() == 0; if (operator_ == Operator::IS_NULL || ((operator_ == Operator::EQUALS || operator_ == Operator::NULL_SAFE_EQUALS) && diff --git a/c++/test/TestPredicateLeaf.cc b/c++/test/TestPredicateLeaf.cc index 2703776e39..2738083d0d 100644 --- a/c++/test/TestPredicateLeaf.cc +++ b/c++/test/TestPredicateLeaf.cc @@ -168,6 +168,12 @@ namespace orc { return colStats; } + static proto::ColumnStatistics createIncompleteNullStats() { + proto::ColumnStatistics colStats; + colStats.set_numberofvalues(0); + return colStats; + } + static TruthValue evaluate(const PredicateLeaf& pred, const proto::ColumnStatistics& pbStats, const BloomFilter* bf = nullptr) { return pred.evaluate(WriterVersion_ORC_135, pbStats, bf); @@ -663,4 +669,10 @@ namespace orc { evaluate(pred8, createTimestampStats(2114380800, 1109000, 2114380800, 6789100))); } + TEST(TestPredicateLeaf, testLackOfSataistics) { + PredicateLeaf pred(PredicateLeaf::Operator::IS_NULL, PredicateDataType::STRING, 1, {}); + EXPECT_EQ(TruthValue::YES_NO, evaluate(pred, createStringStats("c", "d", true))); + EXPECT_EQ(TruthValue::YES_NO_NULL, evaluate(pred, createIncompleteNullStats())); + } + } // namespace orc From 082cd8b340a8548a572df9df9df718c20748a59b Mon Sep 17 00:00:00 2001 From: shuai-xu Date: Mon, 28 Oct 2024 10:16:58 +0800 Subject: [PATCH 2/4] fix build break --- c++/src/sargs/PredicateLeaf.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c++/src/sargs/PredicateLeaf.cc b/c++/src/sargs/PredicateLeaf.cc index d625558024..cf9cf5b2f9 100644 --- a/c++/src/sargs/PredicateLeaf.cc +++ b/c++/src/sargs/PredicateLeaf.cc @@ -702,7 +702,7 @@ namespace orc { } // files written by trino may lack of hasnull field. - if (!colStats.has_hasnull()) + if (!colStats.has_has_null()) return TruthValue::YES_NO_NULL; bool allNull = colStats.has_null() && colStats.number_of_values() == 0; From af88c4084cd230c3951b484970500121b1334ba7 Mon Sep 17 00:00:00 2001 From: shuai-xu Date: Mon, 28 Oct 2024 11:32:12 +0800 Subject: [PATCH 3/4] fix build break --- c++/test/TestPredicateLeaf.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c++/test/TestPredicateLeaf.cc b/c++/test/TestPredicateLeaf.cc index 2738083d0d..3946123ec5 100644 --- a/c++/test/TestPredicateLeaf.cc +++ b/c++/test/TestPredicateLeaf.cc @@ -170,7 +170,7 @@ namespace orc { static proto::ColumnStatistics createIncompleteNullStats() { proto::ColumnStatistics colStats; - colStats.set_numberofvalues(0); + colStats.set_number_of_values(0); return colStats; } From bf7c8d397904eb2aa0f7c62ec1be06d04cc23310 Mon Sep 17 00:00:00 2001 From: shuai-xu Date: Mon, 28 Oct 2024 14:11:23 +0800 Subject: [PATCH 4/4] fix code style --- c++/src/sargs/PredicateLeaf.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/c++/src/sargs/PredicateLeaf.cc b/c++/src/sargs/PredicateLeaf.cc index cf9cf5b2f9..5c77616836 100644 --- a/c++/src/sargs/PredicateLeaf.cc +++ b/c++/src/sargs/PredicateLeaf.cc @@ -702,8 +702,7 @@ namespace orc { } // files written by trino may lack of hasnull field. - if (!colStats.has_has_null()) - return TruthValue::YES_NO_NULL; + if (!colStats.has_has_null()) return TruthValue::YES_NO_NULL; bool allNull = colStats.has_null() && colStats.number_of_values() == 0; if (operator_ == Operator::IS_NULL ||