From a5dce44f55eca64b038f743b9c48ad878a50ee55 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 29 Oct 2018 19:02:40 -0700 Subject: [PATCH 1/2] use BloomFilter instead of BloomKFilter since the latters test method is not threadsafe --- .../guice/BloomFilterSerializersModule.java | 46 +++++++++---------- .../druid/query/filter/BloomDimFilter.java | 32 ++++++------- ...lterHolder.java => BloomFilterHolder.java} | 24 +++++----- .../query/filter/BloomDimFilterTest.java | 42 ++++++++--------- 4 files changed, 72 insertions(+), 72 deletions(-) rename extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/{BloomKFilterHolder.java => BloomFilterHolder.java} (69%) diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/guice/BloomFilterSerializersModule.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/guice/BloomFilterSerializersModule.java index 0072ff38d275..f11f120c1a53 100644 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/guice/BloomFilterSerializersModule.java +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/guice/BloomFilterSerializersModule.java @@ -27,8 +27,8 @@ import com.fasterxml.jackson.databind.module.SimpleModule; import com.fasterxml.jackson.databind.ser.std.StdSerializer; import org.apache.druid.query.filter.BloomDimFilter; -import org.apache.druid.query.filter.BloomKFilterHolder; -import org.apache.hive.common.util.BloomKFilter; +import org.apache.druid.query.filter.BloomFilterHolder; +import org.apache.hive.common.util.BloomFilter; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -41,65 +41,65 @@ public class BloomFilterSerializersModule extends SimpleModule public BloomFilterSerializersModule() { registerSubtypes(new NamedType(BloomDimFilter.class, BLOOM_FILTER_TYPE_NAME)); - addSerializer(BloomKFilter.class, new BloomKFilterSerializer()); - addDeserializer(BloomKFilter.class, new BloomKFilterDeserializer()); - addDeserializer(BloomKFilterHolder.class, new BloomKFilterHolderDeserializer()); + addSerializer(BloomFilter.class, new BloomFilterSerializer()); + addDeserializer(BloomFilter.class, new BloomKFilterDeserializer()); + addDeserializer(BloomFilterHolder.class, new BloomFilterHolderDeserializer()); } - private static class BloomKFilterSerializer extends StdSerializer + private static class BloomFilterSerializer extends StdSerializer { - BloomKFilterSerializer() + BloomFilterSerializer() { - super(BloomKFilter.class); + super(BloomFilter.class); } @Override - public void serialize(BloomKFilter bloomKFilter, JsonGenerator jsonGenerator, SerializerProvider serializerProvider) + public void serialize(BloomFilter bloomFilter, JsonGenerator jsonGenerator, SerializerProvider serializerProvider) throws IOException { - jsonGenerator.writeBinary(bloomKFilterToBytes(bloomKFilter)); + jsonGenerator.writeBinary(bloomFilterToBytes(bloomFilter)); } } - private static class BloomKFilterDeserializer extends StdDeserializer + private static class BloomKFilterDeserializer extends StdDeserializer { BloomKFilterDeserializer() { - super(BloomKFilter.class); + super(BloomFilter.class); } @Override - public BloomKFilter deserialize(JsonParser jsonParser, DeserializationContext deserializationContext) + public BloomFilter deserialize(JsonParser jsonParser, DeserializationContext deserializationContext) throws IOException { - return bloomKFilterFromBytes(jsonParser.getBinaryValue()); + return bloomFilterFromBytes(jsonParser.getBinaryValue()); } } - private static class BloomKFilterHolderDeserializer extends StdDeserializer + private static class BloomFilterHolderDeserializer extends StdDeserializer { - BloomKFilterHolderDeserializer() + BloomFilterHolderDeserializer() { - super(BloomKFilterHolder.class); + super(BloomFilterHolder.class); } @Override - public BloomKFilterHolder deserialize(JsonParser jsonParser, DeserializationContext deserializationContext) + public BloomFilterHolder deserialize(JsonParser jsonParser, DeserializationContext deserializationContext) throws IOException { - return BloomKFilterHolder.fromBytes(jsonParser.getBinaryValue()); + return BloomFilterHolder.fromBytes(jsonParser.getBinaryValue()); } } - public static byte[] bloomKFilterToBytes(BloomKFilter bloomKFilter) throws IOException + public static byte[] bloomFilterToBytes(BloomFilter bloomFilter) throws IOException { ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); - BloomKFilter.serialize(byteArrayOutputStream, bloomKFilter); + BloomFilter.serialize(byteArrayOutputStream, bloomFilter); return byteArrayOutputStream.toByteArray(); } - public static BloomKFilter bloomKFilterFromBytes(byte[] bytes) throws IOException + public static BloomFilter bloomFilterFromBytes(byte[] bytes) throws IOException { - return BloomKFilter.deserialize(new ByteArrayInputStream(bytes)); + return BloomFilter.deserialize(new ByteArrayInputStream(bytes)); } } diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java index 383488cc41ac..d5492ae7aff0 100644 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java @@ -30,7 +30,7 @@ import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.segment.filter.DimensionPredicateFilter; -import org.apache.hive.common.util.BloomKFilter; +import org.apache.hive.common.util.BloomFilter; import java.util.HashSet; @@ -40,22 +40,22 @@ public class BloomDimFilter implements DimFilter { private final String dimension; - private final BloomKFilter bloomKFilter; + private final BloomFilter bloomFilter; private final HashCode hash; private final ExtractionFn extractionFn; @JsonCreator public BloomDimFilter( @JsonProperty("dimension") String dimension, - @JsonProperty("bloomKFilter") BloomKFilterHolder bloomKFilterHolder, + @JsonProperty("bloomFilter") BloomFilterHolder bloomFilterHolder, @JsonProperty("extractionFn") ExtractionFn extractionFn ) { Preconditions.checkArgument(dimension != null, "dimension must not be null"); - Preconditions.checkNotNull(bloomKFilterHolder); + Preconditions.checkNotNull(bloomFilterHolder); this.dimension = dimension; - this.bloomKFilter = bloomKFilterHolder.getFilter(); - this.hash = bloomKFilterHolder.getFilterHash(); + this.bloomFilter = bloomFilterHolder.getFilter(); + this.hash = bloomFilterHolder.getFilterHash(); this.extractionFn = extractionFn; } @@ -90,9 +90,9 @@ public Predicate makeStringPredicate() { return str -> { if (str == null) { - return bloomKFilter.testBytes(null, 0, 0); + return bloomFilter.testBytes(null, 0, 0); } - return bloomKFilter.testString(str); + return bloomFilter.testString(str); }; } @@ -104,13 +104,13 @@ public DruidLongPredicate makeLongPredicate() @Override public boolean applyLong(long input) { - return bloomKFilter.testLong(input); + return bloomFilter.testLong(input); } @Override public boolean applyNull() { - return bloomKFilter.testBytes(null, 0, 0); + return bloomFilter.testBytes(null, 0, 0); } }; } @@ -123,13 +123,13 @@ public DruidFloatPredicate makeFloatPredicate() @Override public boolean applyFloat(float input) { - return bloomKFilter.testFloat(input); + return bloomFilter.testDouble(input); } @Override public boolean applyNull() { - return bloomKFilter.testBytes(null, 0, 0); + return bloomFilter.testBytes(null, 0, 0); } }; } @@ -142,13 +142,13 @@ public DruidDoublePredicate makeDoublePredicate() @Override public boolean applyDouble(double input) { - return bloomKFilter.testDouble(input); + return bloomFilter.testDouble(input); } @Override public boolean applyNull() { - return bloomKFilter.testBytes(null, 0, 0); + return bloomFilter.testBytes(null, 0, 0); } }; } @@ -164,9 +164,9 @@ public String getDimension() } @JsonProperty - public BloomKFilter getBloomKFilter() + public BloomFilter getBloomFilter() { - return bloomKFilter; + return bloomFilter; } @JsonProperty diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomKFilterHolder.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomFilterHolder.java similarity index 69% rename from extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomKFilterHolder.java rename to extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomFilterHolder.java index e06632f0a60c..801b2464fb0f 100644 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomKFilterHolder.java +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomFilterHolder.java @@ -22,23 +22,23 @@ import com.google.common.hash.HashCode; import com.google.common.hash.Hashing; import org.apache.druid.guice.BloomFilterSerializersModule; -import org.apache.hive.common.util.BloomKFilter; +import org.apache.hive.common.util.BloomFilter; import java.io.IOException; import java.util.Objects; -public class BloomKFilterHolder +public class BloomFilterHolder { - private final BloomKFilter filter; + private final BloomFilter filter; private final HashCode hash; - public BloomKFilterHolder(BloomKFilter filter, HashCode hash) + public BloomFilterHolder(BloomFilter filter, HashCode hash) { this.filter = filter; this.hash = hash; } - BloomKFilter getFilter() + BloomFilter getFilter() { return filter; } @@ -48,17 +48,17 @@ HashCode getFilterHash() return hash; } - public static BloomKFilterHolder fromBloomKFilter(BloomKFilter filter) throws IOException + public static BloomFilterHolder fromBloomFilter(BloomFilter filter) throws IOException { - byte[] bytes = BloomFilterSerializersModule.bloomKFilterToBytes(filter); + byte[] bytes = BloomFilterSerializersModule.bloomFilterToBytes(filter); - return new BloomKFilterHolder(filter, Hashing.sha512().hashBytes(bytes)); + return new BloomFilterHolder(filter, Hashing.sha512().hashBytes(bytes)); } - public static BloomKFilterHolder fromBytes(byte[] bytes) throws IOException + public static BloomFilterHolder fromBytes(byte[] bytes) throws IOException { - return new BloomKFilterHolder( - BloomFilterSerializersModule.bloomKFilterFromBytes(bytes), + return new BloomFilterHolder( + BloomFilterSerializersModule.bloomFilterFromBytes(bytes), Hashing.sha512().hashBytes(bytes) ); } @@ -73,7 +73,7 @@ public boolean equals(Object o) return false; } - BloomKFilterHolder that = (BloomKFilterHolder) o; + BloomFilterHolder that = (BloomFilterHolder) o; return Objects.equals(this.hash, that.hash); } diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java index 81a272f5f95a..0318aaa628fb 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java @@ -41,7 +41,7 @@ import org.apache.druid.segment.StorageAdapter; import org.apache.druid.segment.filter.BaseFilterTest; import org.apache.druid.segment.incremental.IncrementalIndexSchema; -import org.apache.hive.common.util.BloomKFilter; +import org.apache.hive.common.util.BloomFilter; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; @@ -128,9 +128,9 @@ public static void tearDown() throws Exception @Test public void testSerde() throws IOException { - BloomKFilter bloomFilter = new BloomKFilter(1500); + BloomFilter bloomFilter = new BloomFilter(1500); bloomFilter.addString("myTestString"); - BloomKFilterHolder holder = new BloomKFilterHolder(bloomFilter, null); + BloomFilterHolder holder = new BloomFilterHolder(bloomFilter, null); BloomDimFilter bloomDimFilter = new BloomDimFilter( "abc", holder, @@ -141,8 +141,8 @@ public void testSerde() throws IOException BloomDimFilter serde = (BloomDimFilter) filter; Assert.assertEquals(bloomDimFilter.getDimension(), serde.getDimension()); Assert.assertEquals(bloomDimFilter.getExtractionFn(), serde.getExtractionFn()); - Assert.assertTrue(bloomDimFilter.getBloomKFilter().testString("myTestString")); - Assert.assertFalse(bloomDimFilter.getBloomKFilter().testString("not_match")); + Assert.assertTrue(bloomDimFilter.getBloomFilter().testString("myTestString")); + Assert.assertFalse(bloomDimFilter.getBloomFilter().testString("not_match")); } @Test @@ -338,11 +338,11 @@ public void testSelectorWithLookupExtractionFn() throws IOException @Test public void testCacheKeyIsNotGiantIfFilterIsGiant() throws IOException { - BloomKFilter bloomFilter = new BloomKFilter(10_000_000); + BloomFilter bloomFilter = new BloomFilter(10_000_000); // FILL IT UP! bloomFilter.addString("myTestString"); - BloomKFilterHolder holder = BloomKFilterHolder.fromBloomKFilter(bloomFilter); + BloomFilterHolder holder = BloomFilterHolder.fromBloomFilter(bloomFilter); BloomDimFilter bloomDimFilter = new BloomDimFilter( "abc", @@ -350,7 +350,7 @@ public void testCacheKeyIsNotGiantIfFilterIsGiant() throws IOException new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true) ); - byte[] bloomFilterBytes = BloomFilterSerializersModule.bloomKFilterToBytes(bloomFilter); + byte[] bloomFilterBytes = BloomFilterSerializersModule.bloomFilterToBytes(bloomFilter); // serialized filter can be quite large for high capacity bloom filters... Assert.assertTrue(bloomFilterBytes.length > 7794000); @@ -360,9 +360,9 @@ public void testCacheKeyIsNotGiantIfFilterIsGiant() throws IOException Assert.assertTrue(actualSize < 100); } - private static BloomKFilterHolder bloomKFilter(int expectedEntries, String... values) throws IOException + private static BloomFilterHolder bloomKFilter(int expectedEntries, String... values) throws IOException { - BloomKFilter filter = new BloomKFilter(expectedEntries); + BloomFilter filter = new BloomFilter(expectedEntries); for (String value : values) { if (value == null) { filter.addBytes(null, 0, 0); @@ -371,25 +371,25 @@ private static BloomKFilterHolder bloomKFilter(int expectedEntries, String... va } } - return BloomKFilterHolder.fromBloomKFilter(filter); + return BloomFilterHolder.fromBloomFilter(filter); } - private static BloomKFilterHolder bloomKFilter(int expectedEntries, Float... values) throws IOException + private static BloomFilterHolder bloomKFilter(int expectedEntries, Float... values) throws IOException { - BloomKFilter filter = new BloomKFilter(expectedEntries); + BloomFilter filter = new BloomFilter(expectedEntries); for (Float value : values) { if (value == null) { filter.addBytes(null, 0, 0); } else { - filter.addFloat(value); + filter.addDouble(value); } } - return BloomKFilterHolder.fromBloomKFilter(filter); + return BloomFilterHolder.fromBloomFilter(filter); } - private static BloomKFilterHolder bloomKFilter(int expectedEntries, Double... values) throws IOException + private static BloomFilterHolder bloomKFilter(int expectedEntries, Double... values) throws IOException { - BloomKFilter filter = new BloomKFilter(expectedEntries); + BloomFilter filter = new BloomFilter(expectedEntries); for (Double value : values) { if (value == null) { filter.addBytes(null, 0, 0); @@ -397,12 +397,12 @@ private static BloomKFilterHolder bloomKFilter(int expectedEntries, Double... va filter.addDouble(value); } } - return BloomKFilterHolder.fromBloomKFilter(filter); + return BloomFilterHolder.fromBloomFilter(filter); } - private static BloomKFilterHolder bloomKFilter(int expectedEntries, Long... values) throws IOException + private static BloomFilterHolder bloomKFilter(int expectedEntries, Long... values) throws IOException { - BloomKFilter filter = new BloomKFilter(expectedEntries); + BloomFilter filter = new BloomFilter(expectedEntries); for (Long value : values) { if (value == null) { filter.addBytes(null, 0, 0); @@ -410,6 +410,6 @@ private static BloomKFilterHolder bloomKFilter(int expectedEntries, Long... valu filter.addLong(value); } } - return BloomKFilterHolder.fromBloomKFilter(filter); + return BloomFilterHolder.fromBloomFilter(filter); } } From 0214ab2b2718f87a6ec2a88ed5199db518936f11 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 29 Oct 2018 19:59:35 -0700 Subject: [PATCH 2/2] fixes and update docs --- .../extensions-core/bloom-filter.md | 12 +- .../guice/BloomFilterSerializersModule.java | 9 +- .../query/filter/BloomDimFilterTest.java | 116 +++++++++--------- 3 files changed, 69 insertions(+), 68 deletions(-) diff --git a/docs/content/development/extensions-core/bloom-filter.md b/docs/content/development/extensions-core/bloom-filter.md index 140111d123a3..baff804ae9df 100644 --- a/docs/content/development/extensions-core/bloom-filter.md +++ b/docs/content/development/extensions-core/bloom-filter.md @@ -23,7 +23,7 @@ Internally, this implementation of bloom filter uses Murmur3 fast non-cryptograp { "type" : "bloom", "dimension" : , - "bloomKFilter" : , + "bloomFilter" : , "extractionFn" : } ``` @@ -32,14 +32,14 @@ Internally, this implementation of bloom filter uses Murmur3 fast non-cryptograp |-------------------------|------------------------------|----------------------------------| |`type` |Filter Type. Should always be `bloom`|yes| |`dimension` |The dimension to filter over. | yes | -|`bloomKFilter` |Base64 encoded Binary representation of `org.apache.hive.common.util.BloomKFilter`| yes | +|`bloomFilter` |Base64 encoded Binary representation of `org.apache.hive.common.util.BloomFilter`| yes | |`extractionFn`|[Extraction function](./../dimensionspecs.html#extraction-functions) to apply to the dimension values |no| -### Serialized Format for BloomKFilter - Serialized BloomKFilter format: +### Serialized Format for BloomFilter + Serialized BloomFilter format: - 1 byte for the number of hash functions. - 1 big endian int(That is how OutputStream works) for the number of longs in the bitset - - big endian longs in the BloomKFilter bitset + - big endian longs in the BloomFilter bitset -Note: `org.apache.hive.common.util.BloomKFilter` provides a serialize method which can be used to serialize bloom filters to outputStream. \ No newline at end of file +Note: `org.apache.hive.common.util.BloomFilter` provides a serialize method which can be used to serialize bloom filters to outputStream. \ No newline at end of file diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/guice/BloomFilterSerializersModule.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/guice/BloomFilterSerializersModule.java index f11f120c1a53..5ac758e01ba0 100644 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/guice/BloomFilterSerializersModule.java +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/guice/BloomFilterSerializersModule.java @@ -42,7 +42,7 @@ public BloomFilterSerializersModule() { registerSubtypes(new NamedType(BloomDimFilter.class, BLOOM_FILTER_TYPE_NAME)); addSerializer(BloomFilter.class, new BloomFilterSerializer()); - addDeserializer(BloomFilter.class, new BloomKFilterDeserializer()); + addDeserializer(BloomFilter.class, new BloomFilterDeserializer()); addDeserializer(BloomFilterHolder.class, new BloomFilterHolderDeserializer()); } @@ -61,9 +61,9 @@ public void serialize(BloomFilter bloomFilter, JsonGenerator jsonGenerator, Seri } } - private static class BloomKFilterDeserializer extends StdDeserializer + private static class BloomFilterDeserializer extends StdDeserializer { - BloomKFilterDeserializer() + BloomFilterDeserializer() { super(BloomFilter.class); } @@ -87,7 +87,8 @@ private static class BloomFilterHolderDeserializer extends StdDeserializer stringMap3 = ImmutableMap.of( "1", "" @@ -320,16 +320,16 @@ public void testSelectorWithLookupExtractionFn() throws IOException if (NullHandling.replaceWithDefault()) { // Nulls and empty strings are considered equivalent assertFilterMatches( - new BloomDimFilter("dim0", bloomKFilter(1000, (String) null), lookupFn3), + new BloomDimFilter("dim0", createBloomFilterHolder(1000, (String) null), lookupFn3), ImmutableList.of("0", "1", "2", "3", "4", "5") ); } else { assertFilterMatches( - new BloomDimFilter("dim0", bloomKFilter(1000, (String) null), lookupFn3), + new BloomDimFilter("dim0", createBloomFilterHolder(1000, (String) null), lookupFn3), ImmutableList.of("0", "2", "3", "4", "5") ); assertFilterMatches( - new BloomDimFilter("dim0", bloomKFilter(1000, ""), lookupFn3), + new BloomDimFilter("dim0", createBloomFilterHolder(1000, ""), lookupFn3), ImmutableList.of("1") ); } @@ -360,7 +360,7 @@ public void testCacheKeyIsNotGiantIfFilterIsGiant() throws IOException Assert.assertTrue(actualSize < 100); } - private static BloomFilterHolder bloomKFilter(int expectedEntries, String... values) throws IOException + private static BloomFilterHolder createBloomFilterHolder(int expectedEntries, String... values) throws IOException { BloomFilter filter = new BloomFilter(expectedEntries); for (String value : values) { @@ -374,7 +374,7 @@ private static BloomFilterHolder bloomKFilter(int expectedEntries, String... val return BloomFilterHolder.fromBloomFilter(filter); } - private static BloomFilterHolder bloomKFilter(int expectedEntries, Float... values) throws IOException + private static BloomFilterHolder createBloomFilterHolder(int expectedEntries, Float... values) throws IOException { BloomFilter filter = new BloomFilter(expectedEntries); for (Float value : values) { @@ -387,7 +387,7 @@ private static BloomFilterHolder bloomKFilter(int expectedEntries, Float... valu return BloomFilterHolder.fromBloomFilter(filter); } - private static BloomFilterHolder bloomKFilter(int expectedEntries, Double... values) throws IOException + private static BloomFilterHolder createBloomFilterHolder(int expectedEntries, Double... values) throws IOException { BloomFilter filter = new BloomFilter(expectedEntries); for (Double value : values) { @@ -400,7 +400,7 @@ private static BloomFilterHolder bloomKFilter(int expectedEntries, Double... val return BloomFilterHolder.fromBloomFilter(filter); } - private static BloomFilterHolder bloomKFilter(int expectedEntries, Long... values) throws IOException + private static BloomFilterHolder createBloomFilterHolder(int expectedEntries, Long... values) throws IOException { BloomFilter filter = new BloomFilter(expectedEntries); for (Long value : values) {