From b2696af4a48a74529a1bfff98eec7dbc39433cf7 Mon Sep 17 00:00:00 2001 From: Sandeep More Date: Wed, 12 Dec 2018 14:51:26 -0500 Subject: [PATCH 1/4] ORC-422 - Fix issue with Predicate push down when lower/upper bounds are set --- .../apache/orc/impl/ColumnStatisticsImpl.java | 5 --- .../org/apache/orc/impl/RecordReaderImpl.java | 5 +-- .../org/apache/orc/TestColumnStatistics.java | 1 - .../apache/orc/impl/TestRecordReaderImpl.java | 37 +++++++++++++++++++ 4 files changed, 39 insertions(+), 9 deletions(-) diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java index e983f04c68..ed93a08592 100644 --- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java +++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java @@ -35,13 +35,8 @@ import org.apache.orc.TimestampColumnStatistics; import org.apache.orc.TypeDescription; -import java.nio.ByteBuffer; -import java.nio.CharBuffer; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; import java.sql.Date; import java.sql.Timestamp; -import java.util.Arrays; import java.util.TimeZone; public class ColumnStatisticsImpl implements ColumnStatistics { diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java index 3c4342a423..3f4f471b98 100644 --- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java +++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java @@ -52,7 +52,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.io.DiskRange; import org.apache.hadoop.hive.common.io.DiskRangeList; import org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -359,7 +358,7 @@ static Object getMax(ColumnStatistics index, boolean useUTCTimestamp) { } else if (index instanceof DoubleColumnStatistics) { return ((DoubleColumnStatistics) index).getMaximum(); } else if (index instanceof StringColumnStatistics) { - return ((StringColumnStatistics) index).getMaximum(); + return ((StringColumnStatistics) index).getUpperBound(); } else if (index instanceof DateColumnStatistics) { return ((DateColumnStatistics) index).getMaximum(); } else if (index instanceof DecimalColumnStatistics) { @@ -406,7 +405,7 @@ static Object getMin(ColumnStatistics index, boolean useUTCTimestamp) { } else if (index instanceof DoubleColumnStatistics) { return ((DoubleColumnStatistics) index).getMinimum(); } else if (index instanceof StringColumnStatistics) { - return ((StringColumnStatistics) index).getMinimum(); + return ((StringColumnStatistics) index).getLowerBound(); } else if (index instanceof DateColumnStatistics) { return ((DateColumnStatistics) index).getMinimum(); } else if (index instanceof DecimalColumnStatistics) { diff --git a/java/core/src/test/org/apache/orc/TestColumnStatistics.java b/java/core/src/test/org/apache/orc/TestColumnStatistics.java index b1f4dea13f..02091f017e 100644 --- a/java/core/src/test/org/apache/orc/TestColumnStatistics.java +++ b/java/core/src/test/org/apache/orc/TestColumnStatistics.java @@ -37,7 +37,6 @@ import java.sql.Timestamp; import java.text.ParseException; import java.text.SimpleDateFormat; -import java.util.Arrays; import java.util.TimeZone; import static junit.framework.Assert.assertEquals; diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java index 529a08b12c..42a2fbd005 100644 --- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java +++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java @@ -47,6 +47,7 @@ import java.util.List; import java.util.TimeZone; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; @@ -354,6 +355,22 @@ private static OrcProto.ColumnStatistics createStringStats(String min, String ma return OrcProto.ColumnStatistics.newBuilder().setStringStatistics(strStats.build()).build(); } + /* used for testing, simulate setting of upper and lower bounds */ + private static OrcProto.ColumnStatistics createStringStatsUpperLowerBounds(String lowerbound, String upperbound) { + OrcProto.StringStatistics.Builder strStats = OrcProto.StringStatistics.newBuilder(); + if(upperbound.length() > 1024) { + strStats.setUpperBound(upperbound); + } else { + strStats.setMaximum(upperbound); + } + if(lowerbound.length() > 1024) { + strStats.setLowerBound(lowerbound); + } else { + strStats.setMinimum(lowerbound); + } + return OrcProto.ColumnStatistics.newBuilder().setStringStatistics(strStats.build()).build(); + } + private static OrcProto.ColumnStatistics createDateStats(int min, int max) { OrcProto.DateStatistics.Builder dateStats = OrcProto.DateStatistics.newBuilder(); dateStats.setMinimum(min); @@ -1649,6 +1666,26 @@ public void testStringEqualsBloomFilter() throws Exception { assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); } + /* Test predicate push down when upper and lower bounds are set */ + @Test + public void testStringBounds() throws Exception { + final String inputString = StringUtils.repeat("a", 1100); + final String testString = inputString+"_"+"15"; + + PredicateLeaf pred = createPredicateLeaf( + PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x", testString, null); + BloomFilter bf = new BloomFilter(10000); + for (int i = 20; i < 1000; i++) { + bf.addString(inputString+"_"+i); + } + ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, createStringStatsUpperLowerBounds(inputString+"_"+"10", inputString+"_"+"500")); + assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); + + bf.addString(testString); + assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); + } + + @Test public void testStringInBloomFilter() throws Exception { List args = new ArrayList(); From f7a47c55f8cbfc84ac324ee2542aaac5b9d101ed Mon Sep 17 00:00:00 2001 From: Sandeep More Date: Thu, 13 Dec 2018 21:24:25 -0500 Subject: [PATCH 2/4] ORC-422 - Undo fix imports --- .../src/java/org/apache/orc/impl/ColumnStatisticsImpl.java | 5 +++++ java/core/src/test/org/apache/orc/TestColumnStatistics.java | 1 + 2 files changed, 6 insertions(+) diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java index ed93a08592..e983f04c68 100644 --- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java +++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java @@ -35,8 +35,13 @@ import org.apache.orc.TimestampColumnStatistics; import org.apache.orc.TypeDescription; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.sql.Date; import java.sql.Timestamp; +import java.util.Arrays; import java.util.TimeZone; public class ColumnStatisticsImpl implements ColumnStatistics { diff --git a/java/core/src/test/org/apache/orc/TestColumnStatistics.java b/java/core/src/test/org/apache/orc/TestColumnStatistics.java index 02091f017e..b1f4dea13f 100644 --- a/java/core/src/test/org/apache/orc/TestColumnStatistics.java +++ b/java/core/src/test/org/apache/orc/TestColumnStatistics.java @@ -37,6 +37,7 @@ import java.sql.Timestamp; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.util.Arrays; import java.util.TimeZone; import static junit.framework.Assert.assertEquals; From 03aa51730a08c57c311f1591756ebbe2e15b0238 Mon Sep 17 00:00:00 2001 From: Sandeep More Date: Tue, 26 Feb 2019 19:29:42 -0500 Subject: [PATCH 3/4] ORC-422 [WIP] some improvements --- .../org/apache/orc/impl/RecordReaderImpl.java | 171 +++++++++++----- .../orc/impl/TestPredicatePushDownBounds.java | 183 ++++++++++++++++++ .../apache/orc/impl/TestRecordReaderImpl.java | 86 +++----- 3 files changed, 330 insertions(+), 110 deletions(-) create mode 100644 java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java index 3f4f471b98..77c068ed60 100644 --- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java +++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java @@ -17,22 +17,18 @@ */ package org.apache.orc.impl; -import org.apache.orc.CompressionKind; - -import java.io.IOException; -import java.math.BigDecimal; -import java.sql.Date; -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.TimeZone; - -import org.apache.orc.OrcFile; -import org.apache.orc.util.BloomFilter; -import org.apache.orc.util.BloomFilterIO; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.io.DiskRangeList; +import org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; +import org.apache.hadoop.hive.ql.util.TimestampUtils; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.io.Text; import org.apache.orc.BooleanColumnStatistics; import org.apache.orc.ColumnStatistics; import org.apache.orc.CompressionCodec; @@ -42,6 +38,7 @@ import org.apache.orc.DoubleColumnStatistics; import org.apache.orc.IntegerColumnStatistics; import org.apache.orc.OrcConf; +import org.apache.orc.OrcFile; import org.apache.orc.OrcProto; import org.apache.orc.Reader; import org.apache.orc.RecordReader; @@ -49,20 +46,21 @@ import org.apache.orc.StripeInformation; import org.apache.orc.TimestampColumnStatistics; import org.apache.orc.TypeDescription; +import org.apache.orc.util.BloomFilter; +import org.apache.orc.util.BloomFilterIO; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.io.DiskRangeList; -import org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; -import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; -import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; -import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.ql.util.TimestampUtils; -import org.apache.hadoop.io.Text; + +import java.io.IOException; +import java.math.BigDecimal; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TimeZone; public class RecordReaderImpl implements RecordReader { static final Logger LOG = LoggerFactory.getLogger(RecordReaderImpl.class); @@ -317,16 +315,25 @@ enum Location { * @param the type of the comparision * @return the location of the point */ - static Location compareToRange(Comparable point, T min, T max) { + static Location compareToRange(Comparable point, T min, T max, boolean isLowerBoundSet, boolean isUpperBoundSet) { + int minCompare = point.compareTo(min); if (minCompare < 0) { return Location.BEFORE; + } + /* since min value is truncated when we have compare=0, it means the predicate string is BEFORE the min value*/ + else if (minCompare == 0 && isLowerBoundSet) { + return Location.BEFORE; } else if (minCompare == 0) { return Location.MIN; } int maxCompare = point.compareTo(max); if (maxCompare > 0) { return Location.AFTER; + } + /* if upperbound is set then location here will be AFTER */ + else if (maxCompare == 0 && isUpperBoundSet) { + return Location.AFTER; } else if (maxCompare == 0) { return Location.MAX; } @@ -340,7 +347,7 @@ static Location compareToRange(Comparable point, T min, T max) { * @return the object for the maximum value or null if there isn't one */ static Object getMax(ColumnStatistics index) { - return getMax(index, false); + return getMax(index, false, null); } /** @@ -352,13 +359,31 @@ static Object getMax(ColumnStatistics index) { * @param useUTCTimestamp * @return the object for the maximum value or null if there isn't one */ - static Object getMax(ColumnStatistics index, boolean useUTCTimestamp) { + static Object getMax(ColumnStatistics index, boolean useUTCTimestamp, PredicateLeaf predicate) { if (index instanceof IntegerColumnStatistics) { return ((IntegerColumnStatistics) index).getMaximum(); } else if (index instanceof DoubleColumnStatistics) { return ((DoubleColumnStatistics) index).getMaximum(); } else if (index instanceof StringColumnStatistics) { + + /* + final boolean isUpperBoundSet = ((StringColumnStatistics) index).getMaximum() == null + && ((StringColumnStatistics) index).getUpperBound() != null; + + // if the literal is bigger than the stats we won't be able to properly determine if is in the given stats. + if((predicate.getLiteral().toString().getBytes().length > 1024)) { + return UNKNOWN_VALUE; + } + // if the literal is 1024 bytes then check the bounds + else if (predicate.getLiteral().toString().getBytes().length == 1024 && isUpperBoundSet) { + // We won't be able to correctly check for equality + if( (predicate.getOperator() == PredicateLeaf.Operator.EQUALS) || (predicate.getOperator() == PredicateLeaf.Operator.LESS_THAN_EQUALS) ) { + return UNKNOWN_VALUE; + } + } + */ return ((StringColumnStatistics) index).getUpperBound(); + } else if (index instanceof DateColumnStatistics) { return ((DateColumnStatistics) index).getMaximum(); } else if (index instanceof DecimalColumnStatistics) { @@ -387,7 +412,7 @@ static Object getMax(ColumnStatistics index, boolean useUTCTimestamp) { * @return the object for the minimum value or null if there isn't one */ static Object getMin(ColumnStatistics index) { - return getMin(index, false); + return getMin(index, false, null); } /** @@ -399,13 +424,30 @@ static Object getMin(ColumnStatistics index) { * @param useUTCTimestamp * @return the object for the minimum value or null if there isn't one */ - static Object getMin(ColumnStatistics index, boolean useUTCTimestamp) { + static Object getMin(ColumnStatistics index, boolean useUTCTimestamp, PredicateLeaf predicate) { if (index instanceof IntegerColumnStatistics) { return ((IntegerColumnStatistics) index).getMinimum(); } else if (index instanceof DoubleColumnStatistics) { return ((DoubleColumnStatistics) index).getMinimum(); } else if (index instanceof StringColumnStatistics) { + /* + final boolean isLowerBoundSet = ((StringColumnStatistics) index).getMinimum() == null + && ((StringColumnStatistics) index).getLowerBound() != null; + + // if the literal is bigger than the stats we won't be able to properly determine if is in the given stats. + if((predicate.getLiteral().toString().getBytes().length > 1024)) { + return UNKNOWN_VALUE; + } + // if the literal is 1024 bytes then check the bounds + else if (predicate.getLiteral().toString().getBytes().length == 1024 && isLowerBoundSet) { + // We won't be able to correctly check for equality + if( (predicate.getOperator() == PredicateLeaf.Operator.EQUALS) || (predicate.getOperator() == PredicateLeaf.Operator.LESS_THAN_EQUALS) ) { + return UNKNOWN_VALUE; + } + } + */ return ((StringColumnStatistics) index).getLowerBound(); + } else if (index instanceof DateColumnStatistics) { return ((DateColumnStatistics) index).getMinimum(); } else if (index instanceof DecimalColumnStatistics) { @@ -463,6 +505,7 @@ static TruthValue evaluatePredicateProto(OrcProto.ColumnStatistics statsProto, * @return the set of truth values that may be returned for the given * predicate. */ + static TruthValue evaluatePredicateProto(OrcProto.ColumnStatistics statsProto, PredicateLeaf predicate, OrcProto.Stream.Kind kind, @@ -472,8 +515,8 @@ static TruthValue evaluatePredicateProto(OrcProto.ColumnStatistics statsProto, TypeDescription.Category type, boolean useUTCTimestamp) { ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, statsProto); - Object minValue = getMin(cs, useUTCTimestamp); - Object maxValue = getMax(cs, useUTCTimestamp); + Object minValue = getMin(cs, useUTCTimestamp, predicate); + Object maxValue = getMax(cs, useUTCTimestamp, predicate); // files written before ORC-135 stores timestamp wrt to local timezone causing issues with PPD. // disable PPD for timestamp for all old files if (type.equals(TypeDescription.Category.TIMESTAMP)) { @@ -489,9 +532,20 @@ static TruthValue evaluatePredicateProto(OrcProto.ColumnStatistics statsProto, return TruthValue.YES_NO_NULL; } } + + boolean isLowerBoundSet = false; + boolean isUpperBoundSet = false; + if(cs instanceof StringColumnStatistics) { + isLowerBoundSet = ((StringColumnStatistics) cs).getMinimum() == null + && ((StringColumnStatistics) cs).getLowerBound() != null; + + isUpperBoundSet = ((StringColumnStatistics) cs).getMaximum() == null + && ((StringColumnStatistics) cs).getUpperBound() != null; + } + return evaluatePredicateRange(predicate, minValue, maxValue, cs.hasNull(), BloomFilterIO.deserialize(kind, encoding, writerVersion, type, bloomFilter), - useUTCTimestamp); + useUTCTimestamp, isLowerBoundSet, isUpperBoundSet); } /** @@ -524,13 +578,24 @@ public static TruthValue evaluatePredicate(ColumnStatistics stats, PredicateLeaf predicate, BloomFilter bloomFilter, boolean useUTCTimestamp) { - Object minValue = getMin(stats, useUTCTimestamp); - Object maxValue = getMax(stats, useUTCTimestamp); - return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull(), bloomFilter, useUTCTimestamp); + Object minValue = getMin(stats, useUTCTimestamp, predicate); + Object maxValue = getMax(stats, useUTCTimestamp, predicate); + + boolean isLowerBoundSet = false; + boolean isUpperBoundSet = false; + if(stats instanceof StringColumnStatistics) { + isLowerBoundSet = ((StringColumnStatistics) stats).getMinimum() == null + && ((StringColumnStatistics) stats).getLowerBound() != null; + + isUpperBoundSet = ((StringColumnStatistics) stats).getMaximum() == null + && ((StringColumnStatistics) stats).getUpperBound() != null; + } + + return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull(), bloomFilter, useUTCTimestamp, isLowerBoundSet, isUpperBoundSet); } static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, - Object max, boolean hasNull, BloomFilter bloomFilter, boolean useUTCTimestamp) { + Object max, boolean hasNull, BloomFilter bloomFilter, boolean useUTCTimestamp, boolean isLowerBoundSet, boolean isUpperBoundSet) { // if we didn't have any values, everything must have been null if (min == null) { if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) { @@ -542,6 +607,10 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, return TruthValue.YES_NO_NULL; } + if(max == UNKNOWN_VALUE) { + return TruthValue.YES_NO; + } + TruthValue result; Object baseObj = predicate.getLiteral(); // Predicate object and stats objects are converted to the type of the predicate object. @@ -549,7 +618,7 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, Object maxValue = getBaseObjectForComparison(predicate.getType(), max); Object predObj = getBaseObjectForComparison(predicate.getType(), baseObj); - result = evaluatePredicateMinMax(predicate, predObj, minValue, maxValue, hasNull); + result = evaluatePredicateMinMax(predicate, predObj, minValue, maxValue, hasNull, isLowerBoundSet, isUpperBoundSet); if (shouldEvaluateBloomFilter(predicate, result, bloomFilter)) { return evaluatePredicateBloomFilter(predicate, predObj, bloomFilter, hasNull, useUTCTimestamp); } else { @@ -576,19 +645,21 @@ private static boolean shouldEvaluateBloomFilter(PredicateLeaf predicate, private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Object predObj, Object minValue, Object maxValue, - boolean hasNull) { + boolean hasNull, + boolean isLowerBoundSet, + boolean isUpperBoundSet) { Location loc; switch (predicate.getOperator()) { case NULL_SAFE_EQUALS: - loc = compareToRange((Comparable) predObj, minValue, maxValue); + loc = compareToRange((Comparable) predObj, minValue, maxValue, isLowerBoundSet, isUpperBoundSet); if (loc == Location.BEFORE || loc == Location.AFTER) { return TruthValue.NO; } else { return TruthValue.YES_NO; } case EQUALS: - loc = compareToRange((Comparable) predObj, minValue, maxValue); + loc = compareToRange((Comparable) predObj, minValue, maxValue, isLowerBoundSet, isUpperBoundSet); if (minValue.equals(maxValue) && loc == Location.MIN) { return hasNull ? TruthValue.YES_NULL : TruthValue.YES; } else if (loc == Location.BEFORE || loc == Location.AFTER) { @@ -597,7 +668,7 @@ private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Objec return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO; } case LESS_THAN: - loc = compareToRange((Comparable) predObj, minValue, maxValue); + loc = compareToRange((Comparable) predObj, minValue, maxValue, isLowerBoundSet, isUpperBoundSet); if (loc == Location.AFTER) { return hasNull ? TruthValue.YES_NULL : TruthValue.YES; } else if (loc == Location.BEFORE || loc == Location.MIN) { @@ -606,7 +677,7 @@ private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Objec return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO; } case LESS_THAN_EQUALS: - loc = compareToRange((Comparable) predObj, minValue, maxValue); + loc = compareToRange((Comparable) predObj, minValue, maxValue, isLowerBoundSet, isUpperBoundSet); if (loc == Location.AFTER || loc == Location.MAX) { return hasNull ? TruthValue.YES_NULL : TruthValue.YES; } else if (loc == Location.BEFORE) { @@ -620,7 +691,7 @@ private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Objec // set for (Object arg : predicate.getLiteralList()) { predObj = getBaseObjectForComparison(predicate.getType(), arg); - loc = compareToRange((Comparable) predObj, minValue, maxValue); + loc = compareToRange((Comparable) predObj, minValue, maxValue, isLowerBoundSet, isUpperBoundSet); if (loc == Location.MIN) { return hasNull ? TruthValue.YES_NULL : TruthValue.YES; } @@ -630,7 +701,7 @@ private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Objec // are all of the values outside of the range? for (Object arg : predicate.getLiteralList()) { predObj = getBaseObjectForComparison(predicate.getType(), arg); - loc = compareToRange((Comparable) predObj, minValue, maxValue); + loc = compareToRange((Comparable) predObj, minValue, maxValue, isLowerBoundSet, isUpperBoundSet); if (loc == Location.MIN || loc == Location.MIDDLE || loc == Location.MAX) { return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO; @@ -645,10 +716,10 @@ private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Objec } Object predObj1 = getBaseObjectForComparison(predicate.getType(), args.get(0)); - loc = compareToRange((Comparable) predObj1, minValue, maxValue); + loc = compareToRange((Comparable) predObj1, minValue, maxValue, isLowerBoundSet, isUpperBoundSet); if (loc == Location.BEFORE || loc == Location.MIN) { Object predObj2 = getBaseObjectForComparison(predicate.getType(), args.get(1)); - Location loc2 = compareToRange((Comparable) predObj2, minValue, maxValue); + Location loc2 = compareToRange((Comparable) predObj2, minValue, maxValue, isLowerBoundSet, isUpperBoundSet); if (loc2 == Location.AFTER || loc2 == Location.MAX) { return hasNull ? TruthValue.YES_NULL : TruthValue.YES; } else if (loc2 == Location.BEFORE) { diff --git a/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java b/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java new file mode 100644 index 0000000000..41e480383d --- /dev/null +++ b/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java @@ -0,0 +1,183 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.orc.impl; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.orc.ColumnStatistics; +import org.apache.orc.OrcProto; +import org.apache.orc.TypeDescription; +import org.apache.orc.util.BloomFilter; +import org.junit.Test; + +import java.nio.charset.StandardCharsets; + +import static junit.framework.Assert.assertEquals; + +public class TestPredicatePushDownBounds { + + /** + * This test case handles the Equals corner case where the predicate is + * equal to truncated upper and lower bounds. + * @throws Exception + */ + @Test + public void testCornerCases() throws Exception { + + int stringLength = 1100; + //int stringLength = 11; + byte[] utf8F; + byte[] utf8P; + + final TypeDescription schema = TypeDescription.createString(); + final ColumnStatisticsImpl stat = ColumnStatisticsImpl.create(schema); + + BloomFilter bf = new BloomFilter(100); + // FFF... to PPP... + for (int i = 70; i <= 80; i++) { + final String inputString = StringUtils.repeat(Character.toString((char)i), stringLength); + bf.addString(inputString); + } + + final String longStringF = StringUtils.repeat(Character.toString('F'), stringLength); + final String longStringP = StringUtils.repeat(Character.toString('P'), stringLength); + + /* String that matches the upperbound value after truncation */ + final String upperboundString = StringUtils.repeat(Character.toString('P'), 1023) + "Q"; + /* String that matches the lower value after truncation */ + final String lowerboundString = StringUtils.repeat(Character.toString('F'), 1024); + + + final String shortStringF = StringUtils.repeat(Character.toString('F'), 50); + final String shortStringP = StringUtils.repeat(Character.toString('P'), 50) + "Q"; + + + /* Test for a case EQUALS where only upperbound is set */ + final PredicateLeaf predicateUpperBoundEquals = TestRecordReaderImpl.createPredicateLeaf( + PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x", upperboundString, null); + + /* Test for a case LESS_THAN where only upperbound is set */ + final PredicateLeaf predicateUpperBoundLessThan = TestRecordReaderImpl.createPredicateLeaf( + PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.STRING, "x", upperboundString, null); + + /* Test for a case LESS_THAN_EQUALS where only upperbound is set */ + final PredicateLeaf predicateUpperBoundLessThanEquals = TestRecordReaderImpl.createPredicateLeaf( + PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.STRING, "x", upperboundString, null); + + utf8F = shortStringF.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8F, 0, utf8F.length, 1); + + + utf8P = longStringP.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8P, 0, utf8P.length, 1); + + //assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl.evaluatePredicate(stat, predicateUpperBoundEquals, null)); + + //assertEquals(SearchArgument.TruthValue.YES, RecordReaderImpl.evaluatePredicate(stat, predicateUpperBoundLessThan, null)); + + //assertEquals(SearchArgument.TruthValue.YES, RecordReaderImpl.evaluatePredicate(stat, predicateUpperBoundLessThanEquals, null)); + + stat.reset(); + + utf8F = longStringF.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8F, 0, utf8F.length, 1); + + + utf8P = shortStringP.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8P, 0, utf8P.length, 1); + + /* Test for a case Equals where only lowerbound is set */ + final PredicateLeaf predicateLowerBoundEquals = TestRecordReaderImpl.createPredicateLeaf( + PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x", lowerboundString, null); + + /* Test for a case LESS_THAN where only lowerbound is set */ + final PredicateLeaf predicateLowerBoundLessThan = TestRecordReaderImpl.createPredicateLeaf( + PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.STRING, "x", lowerboundString, null); + + /* Test for a case LESS_THAN_EQUALS where only lowerbound is set */ + final PredicateLeaf predicateLowerBoundLessThanEquals = TestRecordReaderImpl.createPredicateLeaf( + PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.STRING, "x", lowerboundString, null); + + + //assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl.evaluatePredicate(stat, predicateLowerBoundEquals, null)); + + //FIXME should be YES ? + assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl.evaluatePredicate(stat, predicateLowerBoundLessThan, bf)); + + //FIXME should be YES ? + assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl.evaluatePredicate(stat, predicateLowerBoundLessThanEquals, null)); + + + } + + /** + * A case where the search values fall within the upperbound and lower bound range. + * @throws Exception + */ + @Test + public void testNormalCase() throws Exception { + + int stringLength = 1100; + /* length of string in BF */ + int bfStringLength = 50; + //int stringLength = 11; + byte[] utf8F; + byte[] utf8P; + + final TypeDescription schema = TypeDescription.createString(); + final ColumnStatisticsImpl stat = ColumnStatisticsImpl.create(schema); + + BloomFilter bf = new BloomFilter(100); + // FFF... to PPP... + for (int i = 70; i <= 80; i++) { + final String inputString = StringUtils.repeat(Character.toString((char)i), bfStringLength); + bf.addString(inputString); + } + + final String longStringF = StringUtils.repeat(Character.toString('F'), stringLength); + final String longStringP = StringUtils.repeat(Character.toString('P'), stringLength); + final String predicateString = StringUtils.repeat(Character.toString('I'), 50); + + + /* Test for a case where only upperbound is set */ + final PredicateLeaf predicateEquals = TestRecordReaderImpl.createPredicateLeaf( + PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x", predicateString, null); + + /* trigger lower bound */ + utf8F = longStringF.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8F, 0, utf8F.length, 1); + + /* trigger upper bound */ + utf8P = longStringP.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8P, 0, utf8P.length, 1); + + assertEquals(SearchArgument.TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(stat, predicateEquals, bf)); + + stat.reset(); + + } + + +} diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java index 42a2fbd005..1d3605a28c 100644 --- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java +++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java @@ -36,6 +36,7 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.sql.Date; import java.sql.Timestamp; import java.text.DateFormat; @@ -222,61 +223,61 @@ public void testMaxLengthToReader() throws Exception { @Test public void testCompareToRangeInt() throws Exception { assertEquals(Location.BEFORE, - RecordReaderImpl.compareToRange(19L, 20L, 40L)); + RecordReaderImpl.compareToRange(19L, 20L, 40L, false, false)); assertEquals(Location.AFTER, - RecordReaderImpl.compareToRange(41L, 20L, 40L)); + RecordReaderImpl.compareToRange(41L, 20L, 40L, false, false)); assertEquals(Location.MIN, - RecordReaderImpl.compareToRange(20L, 20L, 40L)); + RecordReaderImpl.compareToRange(20L, 20L, 40L, false, false)); assertEquals(Location.MIDDLE, - RecordReaderImpl.compareToRange(21L, 20L, 40L)); + RecordReaderImpl.compareToRange(21L, 20L, 40L, false, false)); assertEquals(Location.MAX, - RecordReaderImpl.compareToRange(40L, 20L, 40L)); + RecordReaderImpl.compareToRange(40L, 20L, 40L, false, false)); assertEquals(Location.BEFORE, - RecordReaderImpl.compareToRange(0L, 1L, 1L)); + RecordReaderImpl.compareToRange(0L, 1L, 1L, false, false)); assertEquals(Location.MIN, - RecordReaderImpl.compareToRange(1L, 1L, 1L)); + RecordReaderImpl.compareToRange(1L, 1L, 1L, false, false)); assertEquals(Location.AFTER, - RecordReaderImpl.compareToRange(2L, 1L, 1L)); + RecordReaderImpl.compareToRange(2L, 1L, 1L, false, false)); } @Test public void testCompareToRangeString() throws Exception { assertEquals(Location.BEFORE, - RecordReaderImpl.compareToRange("a", "b", "c")); + RecordReaderImpl.compareToRange("a", "b", "c", false, false)); assertEquals(Location.AFTER, - RecordReaderImpl.compareToRange("d", "b", "c")); + RecordReaderImpl.compareToRange("d", "b", "c", false, false)); assertEquals(Location.MIN, - RecordReaderImpl.compareToRange("b", "b", "c")); + RecordReaderImpl.compareToRange("b", "b", "c", false, false)); assertEquals(Location.MIDDLE, - RecordReaderImpl.compareToRange("bb", "b", "c")); + RecordReaderImpl.compareToRange("bb", "b", "c", false, false)); assertEquals(Location.MAX, - RecordReaderImpl.compareToRange("c", "b", "c")); + RecordReaderImpl.compareToRange("c", "b", "c", false, false)); assertEquals(Location.BEFORE, - RecordReaderImpl.compareToRange("a", "b", "b")); + RecordReaderImpl.compareToRange("a", "b", "b", false, false)); assertEquals(Location.MIN, - RecordReaderImpl.compareToRange("b", "b", "b")); + RecordReaderImpl.compareToRange("b", "b", "b", false, false)); assertEquals(Location.AFTER, - RecordReaderImpl.compareToRange("c", "b", "b")); + RecordReaderImpl.compareToRange("c", "b", "b", false, false)); } @Test public void testCompareToCharNeedConvert() throws Exception { assertEquals(Location.BEFORE, - RecordReaderImpl.compareToRange("apple", "hello", "world")); + RecordReaderImpl.compareToRange("apple", "hello", "world", false, false)); assertEquals(Location.AFTER, - RecordReaderImpl.compareToRange("zombie", "hello", "world")); + RecordReaderImpl.compareToRange("zombie", "hello", "world", false, false)); assertEquals(Location.MIN, - RecordReaderImpl.compareToRange("hello", "hello", "world")); + RecordReaderImpl.compareToRange("hello", "hello", "world", false, false)); assertEquals(Location.MIDDLE, - RecordReaderImpl.compareToRange("pilot", "hello", "world")); + RecordReaderImpl.compareToRange("pilot", "hello", "world", false, false)); assertEquals(Location.MAX, - RecordReaderImpl.compareToRange("world", "hello", "world")); + RecordReaderImpl.compareToRange("world", "hello", "world", false, false)); assertEquals(Location.BEFORE, - RecordReaderImpl.compareToRange("apple", "hello", "hello")); + RecordReaderImpl.compareToRange("apple", "hello", "hello", false, false)); assertEquals(Location.MIN, - RecordReaderImpl.compareToRange("hello", "hello", "hello")); + RecordReaderImpl.compareToRange("hello", "hello", "hello", false, false)); assertEquals(Location.AFTER, - RecordReaderImpl.compareToRange("zombie", "hello", "hello")); + RecordReaderImpl.compareToRange("zombie", "hello", "hello", false, false)); } @Test @@ -339,6 +340,7 @@ private static OrcProto.ColumnStatistics createDoubleStats(double min, double ma return OrcProto.ColumnStatistics.newBuilder().setDoubleStatistics(dblStats.build()).build(); } + //fixme private static OrcProto.ColumnStatistics createStringStats(String min, String max, boolean hasNull) { OrcProto.StringStatistics.Builder strStats = OrcProto.StringStatistics.newBuilder(); @@ -355,22 +357,6 @@ private static OrcProto.ColumnStatistics createStringStats(String min, String ma return OrcProto.ColumnStatistics.newBuilder().setStringStatistics(strStats.build()).build(); } - /* used for testing, simulate setting of upper and lower bounds */ - private static OrcProto.ColumnStatistics createStringStatsUpperLowerBounds(String lowerbound, String upperbound) { - OrcProto.StringStatistics.Builder strStats = OrcProto.StringStatistics.newBuilder(); - if(upperbound.length() > 1024) { - strStats.setUpperBound(upperbound); - } else { - strStats.setMaximum(upperbound); - } - if(lowerbound.length() > 1024) { - strStats.setLowerBound(lowerbound); - } else { - strStats.setMinimum(lowerbound); - } - return OrcProto.ColumnStatistics.newBuilder().setStringStatistics(strStats.build()).build(); - } - private static OrcProto.ColumnStatistics createDateStats(int min, int max) { OrcProto.DateStatistics.Builder dateStats = OrcProto.DateStatistics.newBuilder(); dateStats.setMinimum(min); @@ -1666,26 +1652,6 @@ public void testStringEqualsBloomFilter() throws Exception { assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); } - /* Test predicate push down when upper and lower bounds are set */ - @Test - public void testStringBounds() throws Exception { - final String inputString = StringUtils.repeat("a", 1100); - final String testString = inputString+"_"+"15"; - - PredicateLeaf pred = createPredicateLeaf( - PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x", testString, null); - BloomFilter bf = new BloomFilter(10000); - for (int i = 20; i < 1000; i++) { - bf.addString(inputString+"_"+i); - } - ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, createStringStatsUpperLowerBounds(inputString+"_"+"10", inputString+"_"+"500")); - assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); - - bf.addString(testString); - assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); - } - - @Test public void testStringInBloomFilter() throws Exception { List args = new ArrayList(); From 0b776d7f2e7608cb458de0d3b344e298dcf823b3 Mon Sep 17 00:00:00 2001 From: Sandeep More Date: Wed, 27 Mar 2019 14:26:00 -0400 Subject: [PATCH 4/4] ORC-422 Address review comments --- .../apache/orc/impl/ColumnStatisticsImpl.java | 2 +- .../org/apache/orc/impl/RecordReaderImpl.java | 128 ++++----- .../orc/impl/TestPredicatePushDownBounds.java | 242 ++++++++++++++---- .../apache/orc/impl/TestRecordReaderImpl.java | 48 ++-- 4 files changed, 273 insertions(+), 147 deletions(-) diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java index e983f04c68..ae9b276005 100644 --- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java +++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java @@ -677,7 +677,7 @@ public String getMinimum() { @Override public String getMaximum() { - /* if we have upper bound is set (in case of truncation) + /* if we have upper bound set (in case of truncation) getMaximum will be null */ if(isUpperBoundSet) { return null; diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java index 77c068ed60..df78eac34f 100644 --- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java +++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java @@ -315,19 +315,24 @@ enum Location { * @param the type of the comparision * @return the location of the point */ - static Location compareToRange(Comparable point, T min, T max, boolean isLowerBoundSet, boolean isUpperBoundSet) { + static Location compareToRange(Comparable point, T min, T max, T lowerBound, T upperBound) { - int minCompare = point.compareTo(min); + final boolean isLowerBoundSet = (min == null && lowerBound != null) ? true : false; + final boolean isUpperBoundSet = (max == null && upperBound != null) ? true : false; + + final int minCompare = isLowerBoundSet ? point.compareTo(lowerBound) : point.compareTo(min); if (minCompare < 0) { return Location.BEFORE; } + /* since min value is truncated when we have compare=0, it means the predicate string is BEFORE the min value*/ else if (minCompare == 0 && isLowerBoundSet) { return Location.BEFORE; } else if (minCompare == 0) { return Location.MIN; } - int maxCompare = point.compareTo(max); + + int maxCompare = isUpperBoundSet ? point.compareTo(upperBound) : point.compareTo(max); if (maxCompare > 0) { return Location.AFTER; } @@ -347,7 +352,7 @@ else if (maxCompare == 0 && isUpperBoundSet) { * @return the object for the maximum value or null if there isn't one */ static Object getMax(ColumnStatistics index) { - return getMax(index, false, null); + return getMax(index, false); } /** @@ -359,31 +364,13 @@ static Object getMax(ColumnStatistics index) { * @param useUTCTimestamp * @return the object for the maximum value or null if there isn't one */ - static Object getMax(ColumnStatistics index, boolean useUTCTimestamp, PredicateLeaf predicate) { + static Object getMax(ColumnStatistics index, boolean useUTCTimestamp) { if (index instanceof IntegerColumnStatistics) { return ((IntegerColumnStatistics) index).getMaximum(); } else if (index instanceof DoubleColumnStatistics) { return ((DoubleColumnStatistics) index).getMaximum(); } else if (index instanceof StringColumnStatistics) { - - /* - final boolean isUpperBoundSet = ((StringColumnStatistics) index).getMaximum() == null - && ((StringColumnStatistics) index).getUpperBound() != null; - - // if the literal is bigger than the stats we won't be able to properly determine if is in the given stats. - if((predicate.getLiteral().toString().getBytes().length > 1024)) { - return UNKNOWN_VALUE; - } - // if the literal is 1024 bytes then check the bounds - else if (predicate.getLiteral().toString().getBytes().length == 1024 && isUpperBoundSet) { - // We won't be able to correctly check for equality - if( (predicate.getOperator() == PredicateLeaf.Operator.EQUALS) || (predicate.getOperator() == PredicateLeaf.Operator.LESS_THAN_EQUALS) ) { - return UNKNOWN_VALUE; - } - } - */ return ((StringColumnStatistics) index).getUpperBound(); - } else if (index instanceof DateColumnStatistics) { return ((DateColumnStatistics) index).getMaximum(); } else if (index instanceof DecimalColumnStatistics) { @@ -412,7 +399,7 @@ else if (predicate.getLiteral().toString().getBytes().length == 1024 && isUpperB * @return the object for the minimum value or null if there isn't one */ static Object getMin(ColumnStatistics index) { - return getMin(index, false, null); + return getMin(index, false); } /** @@ -424,30 +411,13 @@ static Object getMin(ColumnStatistics index) { * @param useUTCTimestamp * @return the object for the minimum value or null if there isn't one */ - static Object getMin(ColumnStatistics index, boolean useUTCTimestamp, PredicateLeaf predicate) { + static Object getMin(ColumnStatistics index, boolean useUTCTimestamp) { if (index instanceof IntegerColumnStatistics) { return ((IntegerColumnStatistics) index).getMinimum(); } else if (index instanceof DoubleColumnStatistics) { return ((DoubleColumnStatistics) index).getMinimum(); } else if (index instanceof StringColumnStatistics) { - /* - final boolean isLowerBoundSet = ((StringColumnStatistics) index).getMinimum() == null - && ((StringColumnStatistics) index).getLowerBound() != null; - - // if the literal is bigger than the stats we won't be able to properly determine if is in the given stats. - if((predicate.getLiteral().toString().getBytes().length > 1024)) { - return UNKNOWN_VALUE; - } - // if the literal is 1024 bytes then check the bounds - else if (predicate.getLiteral().toString().getBytes().length == 1024 && isLowerBoundSet) { - // We won't be able to correctly check for equality - if( (predicate.getOperator() == PredicateLeaf.Operator.EQUALS) || (predicate.getOperator() == PredicateLeaf.Operator.LESS_THAN_EQUALS) ) { - return UNKNOWN_VALUE; - } - } - */ return ((StringColumnStatistics) index).getLowerBound(); - } else if (index instanceof DateColumnStatistics) { return ((DateColumnStatistics) index).getMinimum(); } else if (index instanceof DecimalColumnStatistics) { @@ -515,8 +485,8 @@ static TruthValue evaluatePredicateProto(OrcProto.ColumnStatistics statsProto, TypeDescription.Category type, boolean useUTCTimestamp) { ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, statsProto); - Object minValue = getMin(cs, useUTCTimestamp, predicate); - Object maxValue = getMax(cs, useUTCTimestamp, predicate); + Object minValue = getMin(cs, useUTCTimestamp); + Object maxValue = getMax(cs, useUTCTimestamp); // files written before ORC-135 stores timestamp wrt to local timezone causing issues with PPD. // disable PPD for timestamp for all old files if (type.equals(TypeDescription.Category.TIMESTAMP)) { @@ -533,19 +503,20 @@ static TruthValue evaluatePredicateProto(OrcProto.ColumnStatistics statsProto, } } - boolean isLowerBoundSet = false; - boolean isUpperBoundSet = false; + String lowerBound = null; + String upperBound = null; + if(cs instanceof StringColumnStatistics) { - isLowerBoundSet = ((StringColumnStatistics) cs).getMinimum() == null - && ((StringColumnStatistics) cs).getLowerBound() != null; + lowerBound = ((StringColumnStatistics) cs).getLowerBound(); + minValue = ((StringColumnStatistics) cs).getMinimum(); - isUpperBoundSet = ((StringColumnStatistics) cs).getMaximum() == null - && ((StringColumnStatistics) cs).getUpperBound() != null; + upperBound = ((StringColumnStatistics) cs).getUpperBound(); + maxValue = ((StringColumnStatistics) cs).getMaximum(); } return evaluatePredicateRange(predicate, minValue, maxValue, cs.hasNull(), BloomFilterIO.deserialize(kind, encoding, writerVersion, type, bloomFilter), - useUTCTimestamp, isLowerBoundSet, isUpperBoundSet); + useUTCTimestamp, lowerBound, upperBound); } /** @@ -578,26 +549,28 @@ public static TruthValue evaluatePredicate(ColumnStatistics stats, PredicateLeaf predicate, BloomFilter bloomFilter, boolean useUTCTimestamp) { - Object minValue = getMin(stats, useUTCTimestamp, predicate); - Object maxValue = getMax(stats, useUTCTimestamp, predicate); + Object minValue = getMin(stats, useUTCTimestamp); + Object maxValue = getMax(stats, useUTCTimestamp); + + String lowerBound = null; + String upperBound = null; - boolean isLowerBoundSet = false; - boolean isUpperBoundSet = false; if(stats instanceof StringColumnStatistics) { - isLowerBoundSet = ((StringColumnStatistics) stats).getMinimum() == null - && ((StringColumnStatistics) stats).getLowerBound() != null; + lowerBound = ((StringColumnStatistics) stats).getLowerBound(); + minValue = ((StringColumnStatistics) stats).getMinimum(); - isUpperBoundSet = ((StringColumnStatistics) stats).getMaximum() == null - && ((StringColumnStatistics) stats).getUpperBound() != null; + upperBound = ((StringColumnStatistics) stats).getUpperBound(); + maxValue = ((StringColumnStatistics) stats).getMaximum(); } - return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull(), bloomFilter, useUTCTimestamp, isLowerBoundSet, isUpperBoundSet); + return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull(), bloomFilter, useUTCTimestamp, lowerBound, upperBound); } static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, - Object max, boolean hasNull, BloomFilter bloomFilter, boolean useUTCTimestamp, boolean isLowerBoundSet, boolean isUpperBoundSet) { + Object max, boolean hasNull, BloomFilter bloomFilter, + boolean useUTCTimestamp, Object lowerBound, Object upperBound) { // if we didn't have any values, everything must have been null - if (min == null) { + if (min == null && lowerBound == null) { if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) { return TruthValue.YES; } else { @@ -618,7 +591,7 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, Object maxValue = getBaseObjectForComparison(predicate.getType(), max); Object predObj = getBaseObjectForComparison(predicate.getType(), baseObj); - result = evaluatePredicateMinMax(predicate, predObj, minValue, maxValue, hasNull, isLowerBoundSet, isUpperBoundSet); + result = evaluatePredicateMinMax(predicate, predObj, minValue, maxValue, hasNull, lowerBound, upperBound); if (shouldEvaluateBloomFilter(predicate, result, bloomFilter)) { return evaluatePredicateBloomFilter(predicate, predObj, bloomFilter, hasNull, useUTCTimestamp); } else { @@ -646,21 +619,21 @@ private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Objec Object minValue, Object maxValue, boolean hasNull, - boolean isLowerBoundSet, - boolean isUpperBoundSet) { + Object lowerBound, + Object upperBound) { Location loc; switch (predicate.getOperator()) { case NULL_SAFE_EQUALS: - loc = compareToRange((Comparable) predObj, minValue, maxValue, isLowerBoundSet, isUpperBoundSet); + loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound); if (loc == Location.BEFORE || loc == Location.AFTER) { return TruthValue.NO; } else { return TruthValue.YES_NO; } case EQUALS: - loc = compareToRange((Comparable) predObj, minValue, maxValue, isLowerBoundSet, isUpperBoundSet); - if (minValue.equals(maxValue) && loc == Location.MIN) { + loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound); + if (minValue != null && minValue.equals(maxValue) && loc == Location.MIN) { return hasNull ? TruthValue.YES_NULL : TruthValue.YES; } else if (loc == Location.BEFORE || loc == Location.AFTER) { return hasNull ? TruthValue.NO_NULL : TruthValue.NO; @@ -668,7 +641,7 @@ private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Objec return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO; } case LESS_THAN: - loc = compareToRange((Comparable) predObj, minValue, maxValue, isLowerBoundSet, isUpperBoundSet); + loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound); if (loc == Location.AFTER) { return hasNull ? TruthValue.YES_NULL : TruthValue.YES; } else if (loc == Location.BEFORE || loc == Location.MIN) { @@ -677,7 +650,7 @@ private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Objec return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO; } case LESS_THAN_EQUALS: - loc = compareToRange((Comparable) predObj, minValue, maxValue, isLowerBoundSet, isUpperBoundSet); + loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound); if (loc == Location.AFTER || loc == Location.MAX) { return hasNull ? TruthValue.YES_NULL : TruthValue.YES; } else if (loc == Location.BEFORE) { @@ -686,12 +659,17 @@ private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Objec return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO; } case IN: - if (minValue.equals(maxValue)) { + boolean minEqualsMax = predicate.getType() + .equals(PredicateLeaf.Type.STRING) ? + lowerBound.equals(upperBound) : + minValue.equals(maxValue); + + if (minEqualsMax) { // for a single value, look through to see if that value is in the // set for (Object arg : predicate.getLiteralList()) { predObj = getBaseObjectForComparison(predicate.getType(), arg); - loc = compareToRange((Comparable) predObj, minValue, maxValue, isLowerBoundSet, isUpperBoundSet); + loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound); if (loc == Location.MIN) { return hasNull ? TruthValue.YES_NULL : TruthValue.YES; } @@ -701,7 +679,7 @@ private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Objec // are all of the values outside of the range? for (Object arg : predicate.getLiteralList()) { predObj = getBaseObjectForComparison(predicate.getType(), arg); - loc = compareToRange((Comparable) predObj, minValue, maxValue, isLowerBoundSet, isUpperBoundSet); + loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound); if (loc == Location.MIN || loc == Location.MIDDLE || loc == Location.MAX) { return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO; @@ -716,10 +694,10 @@ private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Objec } Object predObj1 = getBaseObjectForComparison(predicate.getType(), args.get(0)); - loc = compareToRange((Comparable) predObj1, minValue, maxValue, isLowerBoundSet, isUpperBoundSet); + loc = compareToRange((Comparable) predObj1, minValue, maxValue, lowerBound, upperBound); if (loc == Location.BEFORE || loc == Location.MIN) { Object predObj2 = getBaseObjectForComparison(predicate.getType(), args.get(1)); - Location loc2 = compareToRange((Comparable) predObj2, minValue, maxValue, isLowerBoundSet, isUpperBoundSet); + Location loc2 = compareToRange((Comparable) predObj2, minValue, maxValue, lowerBound, upperBound); if (loc2 == Location.AFTER || loc2 == Location.MAX) { return hasNull ? TruthValue.YES_NULL : TruthValue.YES; } else if (loc2 == Location.BEFORE) { diff --git a/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java b/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java index 41e480383d..d018efa3e4 100644 --- a/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java +++ b/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java @@ -20,28 +20,29 @@ import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; -import org.apache.orc.ColumnStatistics; -import org.apache.orc.OrcProto; import org.apache.orc.TypeDescription; import org.apache.orc.util.BloomFilter; import org.junit.Test; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; import static junit.framework.Assert.assertEquals; +import static org.apache.orc.impl.TestRecordReaderImpl.createPredicateLeaf; public class TestPredicatePushDownBounds { /** - * This test case handles the Equals corner case where the predicate is - * equal to truncated upper and lower bounds. + * This test case handles the Equals corner case where the predicate is equal + * to truncated upper and lower bounds. + * * @throws Exception */ @Test - public void testCornerCases() throws Exception { + public void testCornerCases() { int stringLength = 1100; - //int stringLength = 11; byte[] utf8F; byte[] utf8P; @@ -51,49 +52,58 @@ public void testCornerCases() throws Exception { BloomFilter bf = new BloomFilter(100); // FFF... to PPP... for (int i = 70; i <= 80; i++) { - final String inputString = StringUtils.repeat(Character.toString((char)i), stringLength); + final String inputString = StringUtils + .repeat(Character.toString((char) i), stringLength); bf.addString(inputString); } - final String longStringF = StringUtils.repeat(Character.toString('F'), stringLength); - final String longStringP = StringUtils.repeat(Character.toString('P'), stringLength); + final String longStringF = StringUtils + .repeat(Character.toString('F'), stringLength); + final String longStringP = StringUtils + .repeat(Character.toString('P'), stringLength); /* String that matches the upperbound value after truncation */ - final String upperboundString = StringUtils.repeat(Character.toString('P'), 1023) + "Q"; + final String upperboundString = + StringUtils.repeat(Character.toString('P'), 1023) + "Q"; /* String that matches the lower value after truncation */ - final String lowerboundString = StringUtils.repeat(Character.toString('F'), 1024); - + final String lowerboundString = StringUtils + .repeat(Character.toString('F'), 1024); final String shortStringF = StringUtils.repeat(Character.toString('F'), 50); - final String shortStringP = StringUtils.repeat(Character.toString('P'), 50) + "Q"; - + final String shortStringP = + StringUtils.repeat(Character.toString('P'), 50) + "Q"; /* Test for a case EQUALS where only upperbound is set */ - final PredicateLeaf predicateUpperBoundEquals = TestRecordReaderImpl.createPredicateLeaf( - PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x", upperboundString, null); + final PredicateLeaf predicateUpperBoundEquals = TestRecordReaderImpl + .createPredicateLeaf(PredicateLeaf.Operator.EQUALS, + PredicateLeaf.Type.STRING, "x", upperboundString, null); /* Test for a case LESS_THAN where only upperbound is set */ - final PredicateLeaf predicateUpperBoundLessThan = TestRecordReaderImpl.createPredicateLeaf( - PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.STRING, "x", upperboundString, null); + final PredicateLeaf predicateUpperBoundLessThan = TestRecordReaderImpl + .createPredicateLeaf(PredicateLeaf.Operator.LESS_THAN, + PredicateLeaf.Type.STRING, "x", upperboundString, null); /* Test for a case LESS_THAN_EQUALS where only upperbound is set */ - final PredicateLeaf predicateUpperBoundLessThanEquals = TestRecordReaderImpl.createPredicateLeaf( - PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.STRING, "x", upperboundString, null); + final PredicateLeaf predicateUpperBoundLessThanEquals = TestRecordReaderImpl + .createPredicateLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, + PredicateLeaf.Type.STRING, "x", upperboundString, null); utf8F = shortStringF.getBytes(StandardCharsets.UTF_8); stat.increment(); stat.updateString(utf8F, 0, utf8F.length, 1); - utf8P = longStringP.getBytes(StandardCharsets.UTF_8); stat.increment(); stat.updateString(utf8P, 0, utf8P.length, 1); - //assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl.evaluatePredicate(stat, predicateUpperBoundEquals, null)); + assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl + .evaluatePredicate(stat, predicateUpperBoundEquals, null)); - //assertEquals(SearchArgument.TruthValue.YES, RecordReaderImpl.evaluatePredicate(stat, predicateUpperBoundLessThan, null)); + assertEquals(SearchArgument.TruthValue.YES, RecordReaderImpl + .evaluatePredicate(stat, predicateUpperBoundLessThan, null)); - //assertEquals(SearchArgument.TruthValue.YES, RecordReaderImpl.evaluatePredicate(stat, predicateUpperBoundLessThanEquals, null)); + assertEquals(SearchArgument.TruthValue.YES, RecordReaderImpl + .evaluatePredicate(stat, predicateUpperBoundLessThanEquals, null)); stat.reset(); @@ -101,37 +111,40 @@ public void testCornerCases() throws Exception { stat.increment(); stat.updateString(utf8F, 0, utf8F.length, 1); - utf8P = shortStringP.getBytes(StandardCharsets.UTF_8); stat.increment(); stat.updateString(utf8P, 0, utf8P.length, 1); /* Test for a case Equals where only lowerbound is set */ - final PredicateLeaf predicateLowerBoundEquals = TestRecordReaderImpl.createPredicateLeaf( - PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x", lowerboundString, null); + final PredicateLeaf predicateLowerBoundEquals = createPredicateLeaf( + PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x", + lowerboundString, null); /* Test for a case LESS_THAN where only lowerbound is set */ - final PredicateLeaf predicateLowerBoundLessThan = TestRecordReaderImpl.createPredicateLeaf( - PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.STRING, "x", lowerboundString, null); + final PredicateLeaf predicateLowerBoundLessThan = createPredicateLeaf( + PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.STRING, "x", + lowerboundString, null); /* Test for a case LESS_THAN_EQUALS where only lowerbound is set */ - final PredicateLeaf predicateLowerBoundLessThanEquals = TestRecordReaderImpl.createPredicateLeaf( - PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.STRING, "x", lowerboundString, null); + final PredicateLeaf predicateLowerBoundLessThanEquals = createPredicateLeaf( + PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.STRING, "x", + lowerboundString, null); + assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl + .evaluatePredicate(stat, predicateLowerBoundEquals, null)); - //assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl.evaluatePredicate(stat, predicateLowerBoundEquals, null)); - - //FIXME should be YES ? - assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl.evaluatePredicate(stat, predicateLowerBoundLessThan, bf)); - - //FIXME should be YES ? - assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl.evaluatePredicate(stat, predicateLowerBoundLessThanEquals, null)); + assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl + .evaluatePredicate(stat, predicateLowerBoundLessThan, bf)); + assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl + .evaluatePredicate(stat, predicateLowerBoundLessThanEquals, null)); } /** - * A case where the search values fall within the upperbound and lower bound range. + * A case where the search values fall within the upperbound and lower bound + * range. + * * @throws Exception */ @Test @@ -150,18 +163,23 @@ public void testNormalCase() throws Exception { BloomFilter bf = new BloomFilter(100); // FFF... to PPP... for (int i = 70; i <= 80; i++) { - final String inputString = StringUtils.repeat(Character.toString((char)i), bfStringLength); + final String inputString = StringUtils + .repeat(Character.toString((char) i), bfStringLength); bf.addString(inputString); } - final String longStringF = StringUtils.repeat(Character.toString('F'), stringLength); - final String longStringP = StringUtils.repeat(Character.toString('P'), stringLength); - final String predicateString = StringUtils.repeat(Character.toString('I'), 50); + final String longStringF = StringUtils + .repeat(Character.toString('F'), stringLength); + final String longStringP = StringUtils + .repeat(Character.toString('P'), stringLength); + final String predicateString = StringUtils + .repeat(Character.toString('I'), 50); /* Test for a case where only upperbound is set */ - final PredicateLeaf predicateEquals = TestRecordReaderImpl.createPredicateLeaf( - PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x", predicateString, null); + final PredicateLeaf predicateEquals = createPredicateLeaf( + PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x", + predicateString, null); /* trigger lower bound */ utf8F = longStringF.getBytes(StandardCharsets.UTF_8); @@ -173,11 +191,141 @@ public void testNormalCase() throws Exception { stat.increment(); stat.updateString(utf8P, 0, utf8P.length, 1); - assertEquals(SearchArgument.TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(stat, predicateEquals, bf)); + assertEquals(SearchArgument.TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicate(stat, predicateEquals, bf)); + + } + + /** + * Test for IN search arg when upper and lower bounds are set. + * + * @throws Exception + */ + @Test + public void testIN() throws Exception { + int stringLength = 1100; + byte[] utf8F; + byte[] utf8P; + + final TypeDescription schema = TypeDescription.createString(); + final ColumnStatisticsImpl stat = ColumnStatisticsImpl.create(schema); + + final BloomFilter bf = new BloomFilter(100); + // FFF... to PPP... + for (int i = 70; i <= 80; i++) { + final String inputString = StringUtils + .repeat(Character.toString((char) i), stringLength); + bf.addString(inputString); + } + + final String longStringF = StringUtils + .repeat(Character.toString('F'), stringLength); + final String longStringP = StringUtils + .repeat(Character.toString('P'), stringLength); + + /* String that matches the upperbound value after truncation */ + final String upperboundString = + StringUtils.repeat(Character.toString('P'), 1023) + "Q"; + /* String that matches the lower value after truncation */ + final String lowerboundString = StringUtils + .repeat(Character.toString('F'), 1024); + + final String shortStringF = StringUtils.repeat(Character.toString('F'), 50); + final String shortStringP = + StringUtils.repeat(Character.toString('P'), 50) + "Q"; + + final List args = new ArrayList(); + args.add(upperboundString); + + /* set upper bound */ + utf8F = shortStringF.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8F, 0, utf8F.length, 1); + + utf8P = longStringP.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8P, 0, utf8P.length, 1); + + /* Test for a case IN where only upper bound is set and test literal is equal to upperbound */ + final PredicateLeaf predicateUpperBoundSet = TestRecordReaderImpl + .createPredicateLeaf(PredicateLeaf.Operator.IN, + PredicateLeaf.Type.STRING, "x", null, args); + + assertEquals(SearchArgument.TruthValue.NO, + RecordReaderImpl.evaluatePredicate(stat, predicateUpperBoundSet, null)); + + /* Test for lower bound set only */ + args.clear(); + args.add(lowerboundString); stat.reset(); + /* set lower bound */ + utf8F = longStringF.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8F, 0, utf8F.length, 1); - } + utf8P = shortStringP.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8P, 0, utf8P.length, 1); + + /* Test for a case IN where only lower bound is set and the test literal is lowerbound string */ + final PredicateLeaf predicateLowerBoundSet = TestRecordReaderImpl + .createPredicateLeaf(PredicateLeaf.Operator.IN, + PredicateLeaf.Type.STRING, "x", null, args); + + assertEquals(SearchArgument.TruthValue.NO, + RecordReaderImpl.evaluatePredicate(stat, predicateLowerBoundSet, null)); + + /* Test for a case LESS_THAN_EQUALS where only upperbound is set */ + final PredicateLeaf predicateUpperBoundLessThanEquals = TestRecordReaderImpl + .createPredicateLeaf(PredicateLeaf.Operator.IN, + PredicateLeaf.Type.STRING, "x", null, args); + + + /* Test the case were both upper and lower bounds are set */ + args.clear(); + args.add(lowerboundString); + args.add(upperboundString); + + stat.reset(); + /* set upper and lower bound */ + utf8F = longStringF.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8F, 0, utf8F.length, 1); + + utf8P = longStringP.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8P, 0, utf8P.length, 1); + + final PredicateLeaf predicateUpperLowerBoundSet = TestRecordReaderImpl + .createPredicateLeaf(PredicateLeaf.Operator.IN, + PredicateLeaf.Type.STRING, "x", null, args); + + assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl + .evaluatePredicate(stat, predicateUpperLowerBoundSet, null)); + + /* test the boundary condition */ + args.clear(); + args.add(longStringF); + args.add(longStringP); + + stat.reset(); + /* set upper and lower bound */ + utf8F = longStringF.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8F, 0, utf8F.length, 1); + + utf8P = longStringP.getBytes(StandardCharsets.UTF_8); + stat.increment(); + stat.updateString(utf8P, 0, utf8P.length, 1); + + final PredicateLeaf predicateUpperLowerBoundSetBoundary = TestRecordReaderImpl + .createPredicateLeaf(PredicateLeaf.Operator.IN, + PredicateLeaf.Type.STRING, "x", null, args); + assertEquals(SearchArgument.TruthValue.YES_NO, RecordReaderImpl + .evaluatePredicate(stat, predicateUpperLowerBoundSetBoundary, null)); + + } } diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java index 1d3605a28c..37083ee2fb 100644 --- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java +++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java @@ -223,61 +223,61 @@ public void testMaxLengthToReader() throws Exception { @Test public void testCompareToRangeInt() throws Exception { assertEquals(Location.BEFORE, - RecordReaderImpl.compareToRange(19L, 20L, 40L, false, false)); + RecordReaderImpl.compareToRange(19L, 20L, 40L, null, null)); assertEquals(Location.AFTER, - RecordReaderImpl.compareToRange(41L, 20L, 40L, false, false)); + RecordReaderImpl.compareToRange(41L, 20L, 40L, null, null)); assertEquals(Location.MIN, - RecordReaderImpl.compareToRange(20L, 20L, 40L, false, false)); + RecordReaderImpl.compareToRange(20L, 20L, 40L, null, null)); assertEquals(Location.MIDDLE, - RecordReaderImpl.compareToRange(21L, 20L, 40L, false, false)); + RecordReaderImpl.compareToRange(21L, 20L, 40L, null, null)); assertEquals(Location.MAX, - RecordReaderImpl.compareToRange(40L, 20L, 40L, false, false)); + RecordReaderImpl.compareToRange(40L, 20L, 40L, null, null)); assertEquals(Location.BEFORE, - RecordReaderImpl.compareToRange(0L, 1L, 1L, false, false)); + RecordReaderImpl.compareToRange(0L, 1L, 1L, null, null)); assertEquals(Location.MIN, - RecordReaderImpl.compareToRange(1L, 1L, 1L, false, false)); + RecordReaderImpl.compareToRange(1L, 1L, 1L, null, null)); assertEquals(Location.AFTER, - RecordReaderImpl.compareToRange(2L, 1L, 1L, false, false)); + RecordReaderImpl.compareToRange(2L, 1L, 1L, null, null)); } @Test public void testCompareToRangeString() throws Exception { assertEquals(Location.BEFORE, - RecordReaderImpl.compareToRange("a", "b", "c", false, false)); + RecordReaderImpl.compareToRange("a", "b", "c", null, null)); assertEquals(Location.AFTER, - RecordReaderImpl.compareToRange("d", "b", "c", false, false)); + RecordReaderImpl.compareToRange("d", "b", "c", null, null)); assertEquals(Location.MIN, - RecordReaderImpl.compareToRange("b", "b", "c", false, false)); + RecordReaderImpl.compareToRange("b", "b", "c", null, null)); assertEquals(Location.MIDDLE, - RecordReaderImpl.compareToRange("bb", "b", "c", false, false)); + RecordReaderImpl.compareToRange("bb", "b", "c", null, null)); assertEquals(Location.MAX, - RecordReaderImpl.compareToRange("c", "b", "c", false, false)); + RecordReaderImpl.compareToRange("c", "b", "c", null, null)); assertEquals(Location.BEFORE, - RecordReaderImpl.compareToRange("a", "b", "b", false, false)); + RecordReaderImpl.compareToRange("a", "b", "b", null, null)); assertEquals(Location.MIN, - RecordReaderImpl.compareToRange("b", "b", "b", false, false)); + RecordReaderImpl.compareToRange("b", "b", "b", null, null)); assertEquals(Location.AFTER, - RecordReaderImpl.compareToRange("c", "b", "b", false, false)); + RecordReaderImpl.compareToRange("c", "b", "b", null, null)); } @Test public void testCompareToCharNeedConvert() throws Exception { assertEquals(Location.BEFORE, - RecordReaderImpl.compareToRange("apple", "hello", "world", false, false)); + RecordReaderImpl.compareToRange("apple", "hello", "world", null, null)); assertEquals(Location.AFTER, - RecordReaderImpl.compareToRange("zombie", "hello", "world", false, false)); + RecordReaderImpl.compareToRange("zombie", "hello", "world", null, null)); assertEquals(Location.MIN, - RecordReaderImpl.compareToRange("hello", "hello", "world", false, false)); + RecordReaderImpl.compareToRange("hello", "hello", "world", null, null)); assertEquals(Location.MIDDLE, - RecordReaderImpl.compareToRange("pilot", "hello", "world", false, false)); + RecordReaderImpl.compareToRange("pilot", "hello", "world", null, null)); assertEquals(Location.MAX, - RecordReaderImpl.compareToRange("world", "hello", "world", false, false)); + RecordReaderImpl.compareToRange("world", "hello", "world", null, null)); assertEquals(Location.BEFORE, - RecordReaderImpl.compareToRange("apple", "hello", "hello", false, false)); + RecordReaderImpl.compareToRange("apple", "hello", "hello", null, null)); assertEquals(Location.MIN, - RecordReaderImpl.compareToRange("hello", "hello", "hello", false, false)); + RecordReaderImpl.compareToRange("hello", "hello", "hello", null, null)); assertEquals(Location.AFTER, - RecordReaderImpl.compareToRange("zombie", "hello", "hello", false, false)); + RecordReaderImpl.compareToRange("zombie", "hello", "hello", null, null)); } @Test