From 0b9e106304d8a0bdf2a82d5d4d65799e3ab9b2b1 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 2 Nov 2022 21:37:41 -0700 Subject: [PATCH] fix nested column range index range computation (#13297) * fix nested column range index range computation * simplify, add missing bounds check for FixedIndexed --- .../druid/segment/data/FixedIndexed.java | 7 + ...NestedFieldLiteralColumnIndexSupplier.java | 36 +++-- .../druid/segment/data/FixedIndexedTest.java | 3 + ...edFieldLiteralColumnIndexSupplierTest.java | 130 +++++++++++++++++- 4 files changed, 160 insertions(+), 16 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/data/FixedIndexed.java b/processing/src/main/java/org/apache/druid/segment/data/FixedIndexed.java index 10a29dff9196..5275f19d0c86 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/FixedIndexed.java +++ b/processing/src/main/java/org/apache/druid/segment/data/FixedIndexed.java @@ -21,6 +21,7 @@ import com.google.common.base.Preconditions; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.IAE; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.TypeStrategy; @@ -112,6 +113,12 @@ public int size() @Override public T get(int index) { + if (index < 0) { + throw new IAE("Index[%s] < 0", index); + } + if (index >= size) { + throw new IAE("Index[%d] >= size[%d]", index, size); + } if (hasNull) { if (index == 0) { return null; diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplier.java index d3030c66f22c..df2d064f6d9d 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplier.java @@ -186,6 +186,7 @@ private IntIntPair getLocalRangeFromDictionary( { int globalStartIndex, globalEndIndex; int localStartIndex, localEndIndex; + // start with standard range finding in global value dictionary if (startValue == null) { globalStartIndex = adjust == 0 ? 1 : adjust; // global index 0 is always the null value } else { @@ -196,15 +197,6 @@ private IntIntPair getLocalRangeFromDictionary( globalStartIndex = adjust + (-(found + 1)); } } - // with starting global index settled, now lets find starting local index - int localFound = dictionary.indexOf(globalStartIndex); - if (localFound < 0) { - // the first valid global index is not within the local dictionary, so the insertion point is where we begin - localStartIndex = -(localFound + 1); - } else { - // valid global index in local dictionary, start here - localStartIndex = localFound; - } if (endValue == null) { globalEndIndex = globalDictionary.size() + adjust; @@ -217,16 +209,32 @@ private IntIntPair getLocalRangeFromDictionary( } } globalEndIndex = Math.max(globalStartIndex, globalEndIndex); - // end index is not inclusive, so we find the last value in the local dictionary that falls within the range - int localEndFound = dictionary.indexOf(globalEndIndex - 1); + + if (globalStartIndex == globalEndIndex) { + return new IntIntImmutablePair(0, 0); + } + + // with global dictionary id range settled, now lets map that onto a local dictionary id range + int localFound = dictionary.indexOf(globalStartIndex); + if (localFound < 0) { + // the first valid global index is not within the local dictionary, so the insertion point is where we begin + localStartIndex = -(localFound + 1); + } else { + // valid global index in local dictionary, start here + localStartIndex = localFound; + } + // global end index is exclusive already, so we don't adjust local end index even for missing values + int localEndFound = dictionary.indexOf(globalEndIndex); if (localEndFound < 0) { localEndIndex = -localEndFound; } else { - // add 1 because the last valid global end value is in the local dictionary, and end index is exclusive - localEndIndex = localEndFound + 1; + localEndIndex = localEndFound; } - return new IntIntImmutablePair(localStartIndex, Math.min(dictionary.size(), localEndIndex)); + localStartIndex = Math.min(localStartIndex, dictionary.size()); + localEndIndex = Math.max(localStartIndex, Math.min(dictionary.size(), localEndIndex)); + + return new IntIntImmutablePair(localStartIndex, localEndIndex); } diff --git a/processing/src/test/java/org/apache/druid/segment/data/FixedIndexedTest.java b/processing/src/test/java/org/apache/druid/segment/data/FixedIndexedTest.java index dcfc5c058e1c..d175c7cf7d61 100644 --- a/processing/src/test/java/org/apache/druid/segment/data/FixedIndexedTest.java +++ b/processing/src/test/java/org/apache/druid/segment/data/FixedIndexedTest.java @@ -74,6 +74,9 @@ public void testGet() throws IOException Assert.assertEquals(LONGS[i], fixedIndexed.get(i)); Assert.assertEquals(i, fixedIndexed.indexOf(LONGS[i])); } + + Assert.assertThrows(IllegalArgumentException.class, () -> fixedIndexed.get(-1)); + Assert.assertThrows(IllegalArgumentException.class, () -> fixedIndexed.get(LONGS.length)); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplierTest.java index 850baf4412bb..8931746102e2 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldLiteralColumnIndexSupplierTest.java @@ -22,6 +22,7 @@ import com.google.common.collect.ImmutableSet; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.collections.bitmap.MutableBitmap; +import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.DefaultBitmapResultFactory; import org.apache.druid.query.filter.DruidPredicateFactory; @@ -34,6 +35,7 @@ import org.apache.druid.segment.column.LexicographicalRangeIndex; import org.apache.druid.segment.column.NullValueIndex; import org.apache.druid.segment.column.NumericRangeIndex; +import org.apache.druid.segment.column.SpatialIndex; import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.column.TypeStrategies; import org.apache.druid.segment.data.BitmapSerdeFactory; @@ -136,6 +138,9 @@ public void testSingleTypeStringColumnValueIndex() throws IOException NullValueIndex nullIndex = indexSupplier.as(NullValueIndex.class); Assert.assertNotNull(nullIndex); + // sanity check to make sure we don't return indexes we don't support + Assert.assertNull(indexSupplier.as(SpatialIndex.class)); + // 10 rows // local: [b, foo, fooo, z] // column: [foo, b, fooo, b, z, fooo, z, b, b, foo] @@ -497,6 +502,9 @@ public void testSingleTypeLongColumnValueSetIndex() throws IOException StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class); Assert.assertNotNull(valueSetIndex); + // sanity check to make sure we don't return indexes we don't support + Assert.assertNull(indexSupplier.as(SpatialIndex.class)); + // 10 rows // local: [1, 3, 100, 300] // column: [100, 1, 300, 1, 3, 3, 100, 300, 300, 1] @@ -611,6 +619,25 @@ public void testSingleTypeLongColumnWithNullValueSetIndex() throws IOException Assert.assertEquals(0.5, columnIndex.estimateSelectivity(10), 0.0); bitmap = columnIndex.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 1, 3, 4, 7, 9); + + // set index with null + TreeSet treeSet = new TreeSet<>(Comparators.naturalNullsFirst()); + treeSet.add(null); + treeSet.add("1"); + treeSet.add("3"); + treeSet.add("300"); + columnIndex = valueSetIndex.forSortedValues(treeSet); + Assert.assertNotNull(columnIndex); + Assert.assertEquals(0.8, columnIndex.estimateSelectivity(10), 0.0); + bitmap = columnIndex.computeBitmapResult(bitmapResultFactory); + checkBitmap(bitmap, 1, 2, 3, 4, 5, 7, 8, 9); + + // null value should really use NullValueIndex, but this works for classic reasons + columnIndex = valueSetIndex.forValue(null); + Assert.assertNotNull(columnIndex); + Assert.assertEquals(0.3, columnIndex.estimateSelectivity(10), 0.0); + bitmap = columnIndex.computeBitmapResult(bitmapResultFactory); + checkBitmap(bitmap, 2, 5, 8); } @Test @@ -674,6 +701,9 @@ public void testSingleTypeDoubleColumnValueSetIndex() throws IOException StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class); Assert.assertNotNull(valueSetIndex); + // sanity check to make sure we don't return indexes we don't support + Assert.assertNull(indexSupplier.as(SpatialIndex.class)); + // 10 rows // local: [1.1, 1.2, 3.3, 6.6] // column: [1.1, 1.1, 1.2, 3.3, 1.2, 6.6, 3.3, 1.2, 1.1, 3.3] @@ -720,10 +750,10 @@ public void testSingleTypeDoubleColumnRangeIndex() throws IOException forRange = rangeIndex.forRange(1.1, true, 3.3, true); Assert.assertNotNull(forRange); - Assert.assertEquals(0.6, forRange.estimateSelectivity(10), 0.0); + Assert.assertEquals(0.3, forRange.estimateSelectivity(10), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); - checkBitmap(bitmap, 2, 3, 4, 6, 7, 9); + checkBitmap(bitmap, 2, 4, 7); forRange = rangeIndex.forRange(null, true, null, true); Assert.assertEquals(1.0, forRange.estimateSelectivity(10), 0.0); @@ -734,6 +764,55 @@ public void testSingleTypeDoubleColumnRangeIndex() throws IOException Assert.assertEquals(1.0, forRange.estimateSelectivity(10), 0.0); bitmap = forRange.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9); + + forRange = rangeIndex.forRange(1.111, true, 1.19, true); + Assert.assertNotNull(forRange); + Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0); + + bitmap = forRange.computeBitmapResult(bitmapResultFactory); + checkBitmap(bitmap); + + forRange = rangeIndex.forRange(1.01, true, 1.09, true); + Assert.assertNotNull(forRange); + Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0); + + bitmap = forRange.computeBitmapResult(bitmapResultFactory); + checkBitmap(bitmap); + + forRange = rangeIndex.forRange(0.05, true, 0.98, true); + Assert.assertNotNull(forRange); + Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0); + + bitmap = forRange.computeBitmapResult(bitmapResultFactory); + checkBitmap(bitmap); + + forRange = rangeIndex.forRange(0.05, true, 1.1, true); + Assert.assertNotNull(forRange); + Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0); + + bitmap = forRange.computeBitmapResult(bitmapResultFactory); + checkBitmap(bitmap); + + forRange = rangeIndex.forRange(8.99, true, 10.10, true); + Assert.assertNotNull(forRange); + Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0); + + bitmap = forRange.computeBitmapResult(bitmapResultFactory); + checkBitmap(bitmap); + + forRange = rangeIndex.forRange(8.99, true, 10.10, true); + Assert.assertNotNull(forRange); + Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0); + + bitmap = forRange.computeBitmapResult(bitmapResultFactory); + checkBitmap(bitmap); + + forRange = rangeIndex.forRange(10.00, true, 10.10, true); + Assert.assertNotNull(forRange); + Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0); + + bitmap = forRange.computeBitmapResult(bitmapResultFactory); + checkBitmap(bitmap); } @Test @@ -802,6 +881,25 @@ public void testSingleTypeDoubleColumnWithNullValueSetIndex() throws IOException Assert.assertEquals(0.4, columnIndex.estimateSelectivity(10), 0.0); bitmap = columnIndex.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 2, 4, 7, 9); + + // set index with null + TreeSet treeSet = new TreeSet<>(Comparators.naturalNullsFirst()); + treeSet.add(null); + treeSet.add("1.2"); + treeSet.add("3.3"); + treeSet.add("7.7"); + columnIndex = valueSetIndex.forSortedValues(treeSet); + Assert.assertNotNull(columnIndex); + Assert.assertEquals(0.7, columnIndex.estimateSelectivity(10), 0.0); + bitmap = columnIndex.computeBitmapResult(bitmapResultFactory); + checkBitmap(bitmap, 1, 2, 3, 4, 6, 7, 9); + + // null value should really use NullValueIndex, but this works for classic reasons + columnIndex = valueSetIndex.forValue(null); + Assert.assertNotNull(columnIndex); + Assert.assertEquals(0.3, columnIndex.estimateSelectivity(10), 0.0); + bitmap = columnIndex.computeBitmapResult(bitmapResultFactory); + checkBitmap(bitmap, 1, 3, 6); } @Test @@ -865,6 +963,9 @@ public void testVariantNullValueIndex() throws IOException NullValueIndex nullIndex = indexSupplier.as(NullValueIndex.class); Assert.assertNotNull(nullIndex); + // sanity check to make sure we don't return indexes we don't support + Assert.assertNull(indexSupplier.as(SpatialIndex.class)); + // 10 rows // local: [null, b, z, 1, 300, 1.1, 9.9] // column: [1, b, null, 9.9, 300, 1, z, null, 1.1, b] @@ -912,6 +1013,26 @@ public void testVariantValueSetIndex() throws IOException Assert.assertEquals(0.4, columnIndex.estimateSelectivity(10), 0.0); bitmap = columnIndex.computeBitmapResult(bitmapResultFactory); checkBitmap(bitmap, 1, 3, 4, 9); + + // set index with null + TreeSet treeSet = new TreeSet<>(Comparators.naturalNullsFirst()); + treeSet.add(null); + treeSet.add("b"); + treeSet.add("300"); + treeSet.add("9.9"); + treeSet.add("1.6"); + columnIndex = valueSetIndex.forSortedValues(treeSet); + Assert.assertNotNull(columnIndex); + Assert.assertEquals(0.6, columnIndex.estimateSelectivity(10), 0.0); + bitmap = columnIndex.computeBitmapResult(bitmapResultFactory); + checkBitmap(bitmap, 1, 2, 3, 4, 7, 9); + + // null value should really use NullValueIndex, but this works for classic reasons + columnIndex = valueSetIndex.forValue(null); + Assert.assertNotNull(columnIndex); + Assert.assertEquals(0.2, columnIndex.estimateSelectivity(10), 0.0); + bitmap = columnIndex.computeBitmapResult(bitmapResultFactory); + checkBitmap(bitmap, 2, 7); } @Test @@ -969,6 +1090,11 @@ public void testDictionaryEncodedStringValueIndex() throws IOException Assert.assertEquals("300", lowLevelIndex.getValue(4)); Assert.assertEquals("1.1", lowLevelIndex.getValue(5)); Assert.assertEquals("9.9", lowLevelIndex.getValue(6)); + + Assert.assertEquals(7, lowLevelIndex.getCardinality()); + checkBitmap(lowLevelIndex.getBitmap(0), 2, 7); + checkBitmap(lowLevelIndex.getBitmap(1), 1, 9); + checkBitmap(lowLevelIndex.getBitmap(-1)); } private NestedFieldLiteralColumnIndexSupplier makeSingleTypeStringSupplier() throws IOException