From 467523857fa0e6168d8d30b65c59f2a3bfb9daa9 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 15 Nov 2018 23:39:43 -0800 Subject: [PATCH 1/3] add optional useBitmapIndex field to BloomDimFilter, defaulting to false for better performance --- .../druid/query/filter/BloomDimFilter.java | 68 ++--- .../query/filter/BloomDimFilterTest.java | 244 ++++++++++-------- 2 files changed, 169 insertions(+), 143 deletions(-) diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java index f235b59c30de..24b4f4877850 100644 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java @@ -26,28 +26,30 @@ import com.google.common.collect.RangeSet; import com.google.common.collect.Sets; import com.google.common.hash.HashCode; -import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.segment.filter.DimensionPredicateFilter; +import javax.annotation.Nullable; import java.util.HashSet; +import java.util.Objects; /** */ public class BloomDimFilter implements DimFilter { - private final String dimension; private final BloomKFilter bloomKFilter; private final HashCode hash; private final ExtractionFn extractionFn; + private final boolean useBitmapIndex; @JsonCreator public BloomDimFilter( @JsonProperty("dimension") String dimension, @JsonProperty("bloomKFilter") BloomKFilterHolder bloomKFilterHolder, - @JsonProperty("extractionFn") ExtractionFn extractionFn + @JsonProperty("extractionFn") ExtractionFn extractionFn, + @Nullable @JsonProperty("useBitmapIndex") Boolean useBitmapIndex ) { Preconditions.checkArgument(dimension != null, "dimension must not be null"); @@ -56,6 +58,7 @@ public BloomDimFilter( this.bloomKFilter = bloomKFilterHolder.getFilter(); this.hash = bloomKFilterHolder.getFilterHash(); this.extractionFn = extractionFn; + this.useBitmapIndex = useBitmapIndex != null && useBitmapIndex; } @Override @@ -153,7 +156,14 @@ public boolean applyNull() } }, extractionFn - ); + ) + { + @Override + public boolean supportsBitmapIndex(BitmapIndexSelector selector) + { + return useBitmapIndex; + } + }; } @JsonProperty @@ -174,14 +184,17 @@ public ExtractionFn getExtractionFn() return extractionFn; } + @Override - public String toString() + public RangeSet getDimensionRangeSet(String dimension) { - if (extractionFn != null) { - return StringUtils.format("%s(%s) = %s", extractionFn, dimension, hash.toString()); - } else { - return StringUtils.format("%s = %s", dimension, hash.toString()); - } + return null; + } + + @Override + public HashSet getRequiredColumns() + { + return Sets.newHashSet(dimension); } @Override @@ -193,36 +206,27 @@ public boolean equals(Object o) if (o == null || getClass() != o.getClass()) { return false; } - BloomDimFilter that = (BloomDimFilter) o; - - if (!dimension.equals(that.dimension)) { - return false; - } - if (hash != null ? !hash.equals(that.hash) : that.hash != null) { - return false; - } - return extractionFn != null ? extractionFn.equals(that.extractionFn) : that.extractionFn == null; - } - - @Override - public RangeSet getDimensionRangeSet(String dimension) - { - return null; + return useBitmapIndex == that.useBitmapIndex && + Objects.equals(dimension, that.dimension) && + Objects.equals(hash, that.hash) && + Objects.equals(extractionFn, that.extractionFn); } @Override - public HashSet getRequiredColumns() + public int hashCode() { - return Sets.newHashSet(dimension); + return Objects.hash(dimension, hash, extractionFn, useBitmapIndex); } @Override - public int hashCode() + public String toString() { - int result = dimension.hashCode(); - result = 31 * result + (hash != null ? hash.hashCode() : 0); - result = 31 * result + (extractionFn != null ? extractionFn.hashCode() : 0); - return result; + return "BloomDimFilter{" + + "dimension='" + dimension + '\'' + + ", hash=" + hash + + ", extractionFn=" + extractionFn + + ", useBitmapIndex=" + useBitmapIndex + + '}'; } } diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java index fb8d31aa4a78..9a352deff2db 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java @@ -125,6 +125,59 @@ public static void tearDown() throws Exception BaseFilterTest.tearDown(BloomDimFilterTest.class.getName()); } + private static BloomKFilterHolder bloomKFilter(int expectedEntries, String... values) throws IOException + { + BloomKFilter filter = new BloomKFilter(expectedEntries); + for (String value : values) { + if (value == null) { + filter.addBytes(null, 0, 0); + } else { + filter.addString(value); + } + } + + return BloomKFilterHolder.fromBloomKFilter(filter); + } + + private static BloomKFilterHolder bloomKFilter(int expectedEntries, Float... values) throws IOException + { + BloomKFilter filter = new BloomKFilter(expectedEntries); + for (Float value : values) { + if (value == null) { + filter.addBytes(null, 0, 0); + } else { + filter.addFloat(value); + } + } + return BloomKFilterHolder.fromBloomKFilter(filter); + } + + private static BloomKFilterHolder bloomKFilter(int expectedEntries, Double... values) throws IOException + { + BloomKFilter filter = new BloomKFilter(expectedEntries); + for (Double value : values) { + if (value == null) { + filter.addBytes(null, 0, 0); + } else { + filter.addDouble(value); + } + } + return BloomKFilterHolder.fromBloomKFilter(filter); + } + + private static BloomKFilterHolder bloomKFilter(int expectedEntries, Long... values) throws IOException + { + BloomKFilter filter = new BloomKFilter(expectedEntries); + for (Long value : values) { + if (value == null) { + filter.addBytes(null, 0, 0); + } else { + filter.addLong(value); + } + } + return BloomKFilterHolder.fromBloomKFilter(filter); + } + @Test public void testSerde() throws IOException { @@ -134,7 +187,8 @@ public void testSerde() throws IOException BloomDimFilter bloomDimFilter = new BloomDimFilter( "abc", holder, - new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true) + new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true), + false ); DimFilter filter = mapper.readValue(mapper.writeValueAsBytes(bloomDimFilter), DimFilter.class); Assert.assertTrue(filter instanceof BloomDimFilter); @@ -151,49 +205,59 @@ public void testWithTimeExtractionFnNull() throws IOException assertFilterMatches(new BloomDimFilter( "dim0", bloomKFilter(1000, null, ""), - new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true) + new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true), + false ), ImmutableList.of()); assertFilterMatches(new BloomDimFilter( "dim6", bloomKFilter(1000, null, ""), - new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true) + new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true), + false ), ImmutableList.of("3", "4", "5")); assertFilterMatches(new BloomDimFilter( "dim6", bloomKFilter(1000, "2017-07"), - new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true) + new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true), + false ), ImmutableList.of("0", "1")); assertFilterMatches(new BloomDimFilter( "dim6", bloomKFilter(1000, "2017-05"), - new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true) + new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true), + false ), ImmutableList.of("2")); } @Test public void testSingleValueStringColumnWithoutNulls() throws IOException { - assertFilterMatches(new BloomDimFilter("dim0", bloomKFilter(1000, (String) null), null), ImmutableList.of()); - assertFilterMatches(new BloomDimFilter("dim0", bloomKFilter(1000, ""), null), ImmutableList.of()); - assertFilterMatches(new BloomDimFilter("dim0", bloomKFilter(1000, "0"), null), ImmutableList.of("0")); - assertFilterMatches(new BloomDimFilter("dim0", bloomKFilter(1000, "1"), null), ImmutableList.of("1")); + assertFilterMatches(new BloomDimFilter("dim0", bloomKFilter(1000, (String) null), null, false), ImmutableList.of()); + assertFilterMatches(new BloomDimFilter("dim0", bloomKFilter(1000, ""), null, false), ImmutableList.of()); + assertFilterMatches(new BloomDimFilter("dim0", bloomKFilter(1000, "0"), null, false), ImmutableList.of("0")); + assertFilterMatches(new BloomDimFilter("dim0", bloomKFilter(1000, "1"), null, false), ImmutableList.of("1")); } @Test public void testSingleValueStringColumnWithNulls() throws IOException { if (NullHandling.replaceWithDefault()) { - assertFilterMatches(new BloomDimFilter("dim1", bloomKFilter(1000, (String) null), null), ImmutableList.of("0")); + assertFilterMatches( + new BloomDimFilter("dim1", bloomKFilter(1000, (String) null), null, false), + ImmutableList.of("0") + ); } else { - assertFilterMatches(new BloomDimFilter("dim1", bloomKFilter(1000, (String) null), null), ImmutableList.of()); - assertFilterMatches(new BloomDimFilter("dim1", bloomKFilter(1000, ""), null), ImmutableList.of("0")); + assertFilterMatches( + new BloomDimFilter("dim1", bloomKFilter(1000, (String) null), null, false), + ImmutableList.of() + ); + assertFilterMatches(new BloomDimFilter("dim1", bloomKFilter(1000, ""), null, false), ImmutableList.of("0")); } - assertFilterMatches(new BloomDimFilter("dim1", bloomKFilter(1000, "10"), null), ImmutableList.of("1")); - assertFilterMatches(new BloomDimFilter("dim1", bloomKFilter(1000, "2"), null), ImmutableList.of("2")); - assertFilterMatches(new BloomDimFilter("dim1", bloomKFilter(1000, "1"), null), ImmutableList.of("3")); - assertFilterMatches(new BloomDimFilter("dim1", bloomKFilter(1000, "def"), null), ImmutableList.of("4")); - assertFilterMatches(new BloomDimFilter("dim1", bloomKFilter(1000, "abc"), null), ImmutableList.of("5")); - assertFilterMatches(new BloomDimFilter("dim1", bloomKFilter(1000, "ab"), null), ImmutableList.of()); + assertFilterMatches(new BloomDimFilter("dim1", bloomKFilter(1000, "10"), null, false), ImmutableList.of("1")); + assertFilterMatches(new BloomDimFilter("dim1", bloomKFilter(1000, "2"), null, false), ImmutableList.of("2")); + assertFilterMatches(new BloomDimFilter("dim1", bloomKFilter(1000, "1"), null, false), ImmutableList.of("3")); + assertFilterMatches(new BloomDimFilter("dim1", bloomKFilter(1000, "def"), null, false), ImmutableList.of("4")); + assertFilterMatches(new BloomDimFilter("dim1", bloomKFilter(1000, "abc"), null, false), ImmutableList.of("5")); + assertFilterMatches(new BloomDimFilter("dim1", bloomKFilter(1000, "ab"), null, false), ImmutableList.of()); } @Test @@ -201,66 +265,66 @@ public void testMultiValueStringColumn() throws IOException { if (NullHandling.replaceWithDefault()) { assertFilterMatches( - new BloomDimFilter("dim2", bloomKFilter(1000, (String) null), null), + new BloomDimFilter("dim2", bloomKFilter(1000, (String) null), null, false), ImmutableList.of("1", "2", "5") ); } else { assertFilterMatches( - new BloomDimFilter("dim2", bloomKFilter(1000, (String) null), null), + new BloomDimFilter("dim2", bloomKFilter(1000, (String) null), null, false), ImmutableList.of("1", "5") ); - assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, ""), null), ImmutableList.of("2")); + assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, ""), null, false), ImmutableList.of("2")); } - assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "a"), null), ImmutableList.of("0", "3")); - assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "b"), null), ImmutableList.of("0")); - assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "c"), null), ImmutableList.of("4")); - assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "d"), null), ImmutableList.of()); + assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "a"), null, false), ImmutableList.of("0", "3")); + assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "b"), null, false), ImmutableList.of("0")); + assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "c"), null, false), ImmutableList.of("4")); + assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "d"), null, false), ImmutableList.of()); } @Test public void testMissingColumnSpecifiedInDimensionList() throws IOException { assertFilterMatches( - new BloomDimFilter("dim3", bloomKFilter(1000, (String) null), null), + new BloomDimFilter("dim3", bloomKFilter(1000, (String) null), null, false), ImmutableList.of("0", "1", "2", "3", "4", "5") ); - assertFilterMatches(new BloomDimFilter("dim3", bloomKFilter(1000, ""), null), ImmutableList.of()); - assertFilterMatches(new BloomDimFilter("dim3", bloomKFilter(1000, "a"), null), ImmutableList.of()); - assertFilterMatches(new BloomDimFilter("dim3", bloomKFilter(1000, "b"), null), ImmutableList.of()); - assertFilterMatches(new BloomDimFilter("dim3", bloomKFilter(1000, "c"), null), ImmutableList.of()); + assertFilterMatches(new BloomDimFilter("dim3", bloomKFilter(1000, ""), null, false), ImmutableList.of()); + assertFilterMatches(new BloomDimFilter("dim3", bloomKFilter(1000, "a"), null, false), ImmutableList.of()); + assertFilterMatches(new BloomDimFilter("dim3", bloomKFilter(1000, "b"), null, false), ImmutableList.of()); + assertFilterMatches(new BloomDimFilter("dim3", bloomKFilter(1000, "c"), null, false), ImmutableList.of()); } @Test public void testMissingColumnNotSpecifiedInDimensionList() throws IOException { assertFilterMatches( - new BloomDimFilter("dim4", bloomKFilter(1000, (String) null), null), + new BloomDimFilter("dim4", bloomKFilter(1000, (String) null), null, false), ImmutableList.of("0", "1", "2", "3", "4", "5") ); - assertFilterMatches(new BloomDimFilter("dim4", bloomKFilter(1000, ""), null), ImmutableList.of()); - assertFilterMatches(new BloomDimFilter("dim4", bloomKFilter(1000, "a"), null), ImmutableList.of()); - assertFilterMatches(new BloomDimFilter("dim4", bloomKFilter(1000, "b"), null), ImmutableList.of()); - assertFilterMatches(new BloomDimFilter("dim4", bloomKFilter(1000, "c"), null), ImmutableList.of()); + assertFilterMatches(new BloomDimFilter("dim4", bloomKFilter(1000, ""), null, false), ImmutableList.of()); + assertFilterMatches(new BloomDimFilter("dim4", bloomKFilter(1000, "a"), null, false), ImmutableList.of()); + assertFilterMatches(new BloomDimFilter("dim4", bloomKFilter(1000, "b"), null, false), ImmutableList.of()); + assertFilterMatches(new BloomDimFilter("dim4", bloomKFilter(1000, "c"), null, false), ImmutableList.of()); } @Test public void testExpressionVirtualColumn() throws IOException { assertFilterMatches( - new BloomDimFilter("expr", bloomKFilter(1000, 1.1F), null), + new BloomDimFilter("expr", bloomKFilter(1000, 1.1F), null, false), ImmutableList.of("0", "1", "2", "3", "4", "5") ); - assertFilterMatches(new BloomDimFilter("expr", bloomKFilter(1000, 1.2F), null), ImmutableList.of()); + assertFilterMatches(new BloomDimFilter("expr", bloomKFilter(1000, 1.2F), null, false), ImmutableList.of()); assertFilterMatches( - new BloomDimFilter("exprDouble", bloomKFilter(1000, 2.1D), null), + new BloomDimFilter("exprDouble", bloomKFilter(1000, 2.1D), null, false), ImmutableList.of("0", "1", "2", "3", "4", "5") ); - assertFilterMatches(new BloomDimFilter("exprDouble", bloomKFilter(1000, 2.2D), null), ImmutableList.of()); + assertFilterMatches(new BloomDimFilter("exprDouble", bloomKFilter(1000, 2.2D), null, false), ImmutableList.of()); assertFilterMatches( - new BloomDimFilter("exprLong", bloomKFilter(1000, 3L), null), + new BloomDimFilter("exprLong", bloomKFilter(1000, 3L), null, false), ImmutableList.of("0", "1", "2", "3", "4", "5") ); - assertFilterMatches(new BloomDimFilter("exprLong", bloomKFilter(1000, 4L), null), ImmutableList.of()); + assertFilterMatches(new BloomDimFilter("exprLong", bloomKFilter(1000, 4L), null, false), ImmutableList.of()); } @Test @@ -275,33 +339,42 @@ public void testSelectorWithLookupExtractionFn() throws IOException LookupExtractor mapExtractor = new MapLookupExtractor(stringMap, false); LookupExtractionFn lookupFn = new LookupExtractionFn(mapExtractor, false, "UNKNOWN", false, true); - assertFilterMatches(new BloomDimFilter("dim0", bloomKFilter(1000, "HELLO"), lookupFn), ImmutableList.of("1")); assertFilterMatches( - new BloomDimFilter("dim0", bloomKFilter(1000, "UNKNOWN"), lookupFn), + new BloomDimFilter("dim0", bloomKFilter(1000, "HELLO"), lookupFn, false), + ImmutableList.of("1") + ); + assertFilterMatches( + new BloomDimFilter("dim0", bloomKFilter(1000, "UNKNOWN"), lookupFn, false), ImmutableList.of("0", "2", "3", "4", "5") ); - assertFilterMatches(new BloomDimFilter("dim1", bloomKFilter(1000, "HELLO"), lookupFn), ImmutableList.of("3", "4")); assertFilterMatches( - new BloomDimFilter("dim1", bloomKFilter(1000, "UNKNOWN"), lookupFn), + new BloomDimFilter("dim1", bloomKFilter(1000, "HELLO"), lookupFn, false), + ImmutableList.of("3", "4") + ); + assertFilterMatches( + new BloomDimFilter("dim1", bloomKFilter(1000, "UNKNOWN"), lookupFn, false), ImmutableList.of("0", "1", "2", "5") ); - assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "HELLO"), lookupFn), ImmutableList.of("0", "3")); assertFilterMatches( - new BloomDimFilter("dim2", bloomKFilter(1000, "UNKNOWN"), lookupFn), + new BloomDimFilter("dim2", bloomKFilter(1000, "HELLO"), lookupFn, false), + ImmutableList.of("0", "3") + ); + assertFilterMatches( + new BloomDimFilter("dim2", bloomKFilter(1000, "UNKNOWN"), lookupFn, false), ImmutableList.of("0", "1", "2", "4", "5") ); - assertFilterMatches(new BloomDimFilter("dim3", bloomKFilter(1000, "HELLO"), lookupFn), ImmutableList.of()); + assertFilterMatches(new BloomDimFilter("dim3", bloomKFilter(1000, "HELLO"), lookupFn, false), ImmutableList.of()); assertFilterMatches( - new BloomDimFilter("dim3", bloomKFilter(1000, "UNKNOWN"), lookupFn), + new BloomDimFilter("dim3", bloomKFilter(1000, "UNKNOWN"), lookupFn, false), ImmutableList.of("0", "1", "2", "3", "4", "5") ); - assertFilterMatches(new BloomDimFilter("dim4", bloomKFilter(1000, "HELLO"), lookupFn), ImmutableList.of()); + assertFilterMatches(new BloomDimFilter("dim4", bloomKFilter(1000, "HELLO"), lookupFn, false), ImmutableList.of()); assertFilterMatches( - new BloomDimFilter("dim4", bloomKFilter(1000, "UNKNOWN"), lookupFn), + new BloomDimFilter("dim4", bloomKFilter(1000, "UNKNOWN"), lookupFn, false), ImmutableList.of("0", "1", "2", "3", "4", "5") ); @@ -310,7 +383,10 @@ public void testSelectorWithLookupExtractionFn() throws IOException ); LookupExtractor mapExtractor2 = new MapLookupExtractor(stringMap2, false); LookupExtractionFn lookupFn2 = new LookupExtractionFn(mapExtractor2, true, null, false, true); - assertFilterMatches(new BloomDimFilter("dim0", bloomKFilter(1000, "5"), lookupFn2), ImmutableList.of("2", "5")); + assertFilterMatches( + new BloomDimFilter("dim0", bloomKFilter(1000, "5"), lookupFn2, false), + ImmutableList.of("2", "5") + ); final Map stringMap3 = ImmutableMap.of( "1", "" @@ -320,16 +396,16 @@ public void testSelectorWithLookupExtractionFn() throws IOException if (NullHandling.replaceWithDefault()) { // Nulls and empty strings are considered equivalent assertFilterMatches( - new BloomDimFilter("dim0", bloomKFilter(1000, (String) null), lookupFn3), + new BloomDimFilter("dim0", bloomKFilter(1000, (String) null), lookupFn3, false), ImmutableList.of("0", "1", "2", "3", "4", "5") ); } else { assertFilterMatches( - new BloomDimFilter("dim0", bloomKFilter(1000, (String) null), lookupFn3), + new BloomDimFilter("dim0", bloomKFilter(1000, (String) null), lookupFn3, false), ImmutableList.of("0", "2", "3", "4", "5") ); assertFilterMatches( - new BloomDimFilter("dim0", bloomKFilter(1000, ""), lookupFn3), + new BloomDimFilter("dim0", bloomKFilter(1000, ""), lookupFn3, false), ImmutableList.of("1") ); } @@ -347,7 +423,8 @@ public void testCacheKeyIsNotGiantIfFilterIsGiant() throws IOException BloomDimFilter bloomDimFilter = new BloomDimFilter( "abc", holder, - new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true) + new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true), + false ); byte[] bloomFilterBytes = BloomFilterSerializersModule.bloomKFilterToBytes(bloomFilter); @@ -376,7 +453,6 @@ public void testStringHiveCompat() throws IOException Assert.assertFalse(druidFilter.testString("not_match")); } - @Test public void testFloatHiveCompat() throws IOException { @@ -395,7 +471,6 @@ public void testFloatHiveCompat() throws IOException Assert.assertFalse(druidFilter.testFloat(0.3F)); } - @Test public void testDoubleHiveCompat() throws IOException { @@ -431,57 +506,4 @@ public void testLongHiveCompat() throws IOException Assert.assertTrue(druidFilter.testLong(664L)); Assert.assertFalse(druidFilter.testLong(3L)); } - - private static BloomKFilterHolder bloomKFilter(int expectedEntries, String... values) throws IOException - { - BloomKFilter filter = new BloomKFilter(expectedEntries); - for (String value : values) { - if (value == null) { - filter.addBytes(null, 0, 0); - } else { - filter.addString(value); - } - } - - return BloomKFilterHolder.fromBloomKFilter(filter); - } - - private static BloomKFilterHolder bloomKFilter(int expectedEntries, Float... values) throws IOException - { - BloomKFilter filter = new BloomKFilter(expectedEntries); - for (Float value : values) { - if (value == null) { - filter.addBytes(null, 0, 0); - } else { - filter.addFloat(value); - } - } - return BloomKFilterHolder.fromBloomKFilter(filter); - } - - private static BloomKFilterHolder bloomKFilter(int expectedEntries, Double... values) throws IOException - { - BloomKFilter filter = new BloomKFilter(expectedEntries); - for (Double value : values) { - if (value == null) { - filter.addBytes(null, 0, 0); - } else { - filter.addDouble(value); - } - } - return BloomKFilterHolder.fromBloomKFilter(filter); - } - - private static BloomKFilterHolder bloomKFilter(int expectedEntries, Long... values) throws IOException - { - BloomKFilter filter = new BloomKFilter(expectedEntries); - for (Long value : values) { - if (value == null) { - filter.addBytes(null, 0, 0); - } else { - filter.addLong(value); - } - } - return BloomKFilterHolder.fromBloomKFilter(filter); - } } From eb4434b8a2c179ffe6eb4162739e1d84dc99742d Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 16 Nov 2018 00:55:54 -0800 Subject: [PATCH 2/3] docs --- docs/content/development/extensions-core/bloom-filter.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/content/development/extensions-core/bloom-filter.md b/docs/content/development/extensions-core/bloom-filter.md index 3dd2cca20fb1..ea0fa5a7a849 100644 --- a/docs/content/development/extensions-core/bloom-filter.md +++ b/docs/content/development/extensions-core/bloom-filter.md @@ -43,7 +43,8 @@ Internally, this implementation of bloom filter uses Murmur3 fast non-cryptograp "type" : "bloom", "dimension" : , "bloomKFilter" : , - "extractionFn" : + "extractionFn" : , + "useBitmapIndex" : } ``` @@ -53,6 +54,7 @@ Internally, this implementation of bloom filter uses Murmur3 fast non-cryptograp |`dimension` |The dimension to filter over. | yes | |`bloomKFilter` |Base64 encoded Binary representation of `org.apache.hive.common.util.BloomKFilter`| yes | |`extractionFn`|[Extraction function](./../dimensionspecs.html#extraction-functions) to apply to the dimension values |no| +|`useBitmapIndex` |Use bitmap indexes for filter the dimension. This can be slower for higher cardinality dimensions.|no (default `false`)| ### Serialized Format for BloomKFilter From c9924150a5c6ec68abc03430f01bfa66533c9a2f Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 16 Nov 2018 00:58:33 -0800 Subject: [PATCH 3/3] revert unintended change --- .../sql/BloomFilterOperatorConversion.java | 3 +- .../query/filter/BloomDimFilterTest.java | 106 +++++++++--------- .../filter/sql/BloomDimFilterSqlTest.java | 6 +- 3 files changed, 58 insertions(+), 57 deletions(-) diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/sql/BloomFilterOperatorConversion.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/sql/BloomFilterOperatorConversion.java index ab9419e8df6c..872080895a36 100644 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/sql/BloomFilterOperatorConversion.java +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/sql/BloomFilterOperatorConversion.java @@ -91,7 +91,8 @@ public DimFilter toDruidFilter( return new BloomDimFilter( druidExpression.getSimpleExtraction().getColumn(), holder, - druidExpression.getSimpleExtraction().getExtractionFn() + druidExpression.getSimpleExtraction().getExtractionFn(), + false ); } else { // expression virtual columns not currently supported diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java index 9a352deff2db..73c0f26faa44 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java @@ -125,59 +125,6 @@ public static void tearDown() throws Exception BaseFilterTest.tearDown(BloomDimFilterTest.class.getName()); } - private static BloomKFilterHolder bloomKFilter(int expectedEntries, String... values) throws IOException - { - BloomKFilter filter = new BloomKFilter(expectedEntries); - for (String value : values) { - if (value == null) { - filter.addBytes(null, 0, 0); - } else { - filter.addString(value); - } - } - - return BloomKFilterHolder.fromBloomKFilter(filter); - } - - private static BloomKFilterHolder bloomKFilter(int expectedEntries, Float... values) throws IOException - { - BloomKFilter filter = new BloomKFilter(expectedEntries); - for (Float value : values) { - if (value == null) { - filter.addBytes(null, 0, 0); - } else { - filter.addFloat(value); - } - } - return BloomKFilterHolder.fromBloomKFilter(filter); - } - - private static BloomKFilterHolder bloomKFilter(int expectedEntries, Double... values) throws IOException - { - BloomKFilter filter = new BloomKFilter(expectedEntries); - for (Double value : values) { - if (value == null) { - filter.addBytes(null, 0, 0); - } else { - filter.addDouble(value); - } - } - return BloomKFilterHolder.fromBloomKFilter(filter); - } - - private static BloomKFilterHolder bloomKFilter(int expectedEntries, Long... values) throws IOException - { - BloomKFilter filter = new BloomKFilter(expectedEntries); - for (Long value : values) { - if (value == null) { - filter.addBytes(null, 0, 0); - } else { - filter.addLong(value); - } - } - return BloomKFilterHolder.fromBloomKFilter(filter); - } - @Test public void testSerde() throws IOException { @@ -506,4 +453,57 @@ public void testLongHiveCompat() throws IOException Assert.assertTrue(druidFilter.testLong(664L)); Assert.assertFalse(druidFilter.testLong(3L)); } + + private static BloomKFilterHolder bloomKFilter(int expectedEntries, String... values) throws IOException + { + BloomKFilter filter = new BloomKFilter(expectedEntries); + for (String value : values) { + if (value == null) { + filter.addBytes(null, 0, 0); + } else { + filter.addString(value); + } + } + + return BloomKFilterHolder.fromBloomKFilter(filter); + } + + private static BloomKFilterHolder bloomKFilter(int expectedEntries, Float... values) throws IOException + { + BloomKFilter filter = new BloomKFilter(expectedEntries); + for (Float value : values) { + if (value == null) { + filter.addBytes(null, 0, 0); + } else { + filter.addFloat(value); + } + } + return BloomKFilterHolder.fromBloomKFilter(filter); + } + + private static BloomKFilterHolder bloomKFilter(int expectedEntries, Double... values) throws IOException + { + BloomKFilter filter = new BloomKFilter(expectedEntries); + for (Double value : values) { + if (value == null) { + filter.addBytes(null, 0, 0); + } else { + filter.addDouble(value); + } + } + return BloomKFilterHolder.fromBloomKFilter(filter); + } + + private static BloomKFilterHolder bloomKFilter(int expectedEntries, Long... values) throws IOException + { + BloomKFilter filter = new BloomKFilter(expectedEntries); + for (Long value : values) { + if (value == null) { + filter.addBytes(null, 0, 0); + } else { + filter.addLong(value); + } + } + return BloomKFilterHolder.fromBloomKFilter(filter); + } } diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java index 3f1f719df42d..eaa7562a0709 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java @@ -100,7 +100,7 @@ public void testBloomFilter() throws Exception .intervals(QSS(Filtration.eternity())) .granularity(Granularities.ALL) .filters( - new BloomDimFilter("dim1", BloomKFilterHolder.fromBloomKFilter(filter), null) + new BloomDimFilter("dim1", BloomKFilterHolder.fromBloomKFilter(filter), null, false) ) .aggregators(AGGS(new CountAggregatorFactory("a0"))) .context(TIMESERIES_CONTEXT_DEFAULT) @@ -134,8 +134,8 @@ public void testBloomFilters() throws Exception .granularity(Granularities.ALL) .filters( new OrDimFilter( - new BloomDimFilter("dim1", BloomKFilterHolder.fromBloomKFilter(filter), null), - new BloomDimFilter("dim2", BloomKFilterHolder.fromBloomKFilter(filter2), null) + new BloomDimFilter("dim1", BloomKFilterHolder.fromBloomKFilter(filter), null, false), + new BloomDimFilter("dim2", BloomKFilterHolder.fromBloomKFilter(filter2), null, false) ) ) .aggregators(AGGS(new CountAggregatorFactory("a0")))