From 76aa704cd4d6ffa50eb4c5a2a4f3794488dcba6f Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Wed, 29 Apr 2020 11:30:49 -0700 Subject: [PATCH 01/16] Avoid sorting values in InDimFilter if possible --- .../druid/common/config/NullHandling.java | 5 + .../druid/query/filter/AndDimFilter.java | 11 +- .../apache/druid/query/filter/DimFilter.java | 4 +- .../druid/query/filter/DimFilterUtils.java | 1 + .../druid/query/filter/FalseDimFilter.java | 63 +++++++++ .../druid/query/filter/InDimFilter.java | 73 ++++++---- .../druid/query/filter/OrDimFilter.java | 11 +- .../query/filter/InDimFilterSerDesrTest.java | 85 ----------- .../druid/query/filter/InDimFilterTest.java | 132 ++++++++++++++++++ .../sql/calcite/filtration/Filtration.java | 12 +- 10 files changed, 275 insertions(+), 122 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/query/filter/FalseDimFilter.java delete mode 100644 processing/src/test/java/org/apache/druid/query/filter/InDimFilterSerDesrTest.java create mode 100644 processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java diff --git a/core/src/main/java/org/apache/druid/common/config/NullHandling.java b/core/src/main/java/org/apache/druid/common/config/NullHandling.java index bd0f0ee8ae51..51cb2658628b 100644 --- a/core/src/main/java/org/apache/druid/common/config/NullHandling.java +++ b/core/src/main/java/org/apache/druid/common/config/NullHandling.java @@ -94,6 +94,11 @@ public static String emptyToNullIfNeeded(@Nullable String value) //CHECKSTYLE.ON: Regexp } + public static boolean needsEmptyToNull(@Nullable String value) + { + return replaceWithDefault() && Strings.isNullOrEmpty(value); + } + @Nullable public static String defaultStringValue() { diff --git a/processing/src/main/java/org/apache/druid/query/filter/AndDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/AndDimFilter.java index 682c2470c30d..f5aaab06226c 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/AndDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/AndDimFilter.java @@ -33,6 +33,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.stream.Collectors; /** */ @@ -73,7 +74,15 @@ public byte[] getCacheKey() public DimFilter optimize() { List elements = DimFilters.optimize(fields); - return elements.size() == 1 ? elements.get(0) : new AndDimFilter(elements); + if (elements.size() == 1) { + return elements.get(0); + } else if (elements.stream().anyMatch(filter -> filter instanceof FalseDimFilter)) { + return new FalseDimFilter(); + } else { + return new AndDimFilter( + elements.stream().filter(filter -> filter instanceof TrueDimFilter).collect(Collectors.toList()) + ); + } } @Override diff --git a/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java index 7688d362b3e8..6a38457f30c4 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java @@ -47,7 +47,8 @@ @JsonSubTypes.Type(name = "interval", value = IntervalDimFilter.class), @JsonSubTypes.Type(name = "like", value = LikeDimFilter.class), @JsonSubTypes.Type(name = "expression", value = ExpressionDimFilter.class), - @JsonSubTypes.Type(name = "true", value = TrueDimFilter.class) + @JsonSubTypes.Type(name = "true", value = TrueDimFilter.class), + @JsonSubTypes.Type(name = "false", value = FalseDimFilter.class) }) public interface DimFilter extends Cacheable { @@ -78,6 +79,7 @@ public interface DimFilter extends Cacheable * @return a RangeSet that represent the possible range of the input dimension, or null if it is not possible to * determine for this DimFilter. */ + @Nullable RangeSet getDimensionRangeSet(String dimension); /** diff --git a/processing/src/main/java/org/apache/druid/query/filter/DimFilterUtils.java b/processing/src/main/java/org/apache/druid/query/filter/DimFilterUtils.java index c980adb92cfb..3fcd719e25f5 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/DimFilterUtils.java +++ b/processing/src/main/java/org/apache/druid/query/filter/DimFilterUtils.java @@ -52,6 +52,7 @@ public class DimFilterUtils static final byte COLUMN_COMPARISON_CACHE_ID = 0xD; static final byte EXPRESSION_CACHE_ID = 0xE; static final byte TRUE_CACHE_ID = 0xF; + static final byte FALSE_CACHE_ID = 0x11; public static final byte BLOOM_DIM_FILTER_CACHE_ID = 0x10; public static final byte STRING_SEPARATOR = (byte) 0xFF; diff --git a/processing/src/main/java/org/apache/druid/query/filter/FalseDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/FalseDimFilter.java new file mode 100644 index 000000000000..864e85f7a63a --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/FalseDimFilter.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter; + +import com.google.common.collect.ImmutableRangeSet; +import com.google.common.collect.RangeSet; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.filter.FalseFilter; + +import javax.annotation.Nullable; +import java.util.Collections; +import java.util.Set; + +public class FalseDimFilter implements DimFilter +{ + @Override + public DimFilter optimize() + { + return this; + } + + @Override + public Filter toFilter() + { + return FalseFilter.instance(); + } + + @Nullable + @Override + public RangeSet getDimensionRangeSet(String dimension) + { + return ImmutableRangeSet.of(); + } + + @Override + public Set getRequiredColumns() + { + return Collections.emptySet(); + } + + @Override + public byte[] getCacheKey() + { + return new CacheKeyBuilder(DimFilterUtils.FALSE_CACHE_ID).build(); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java index d97d08644e1f..32b596d789c4 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java @@ -20,6 +20,7 @@ package org.apache.druid.query.filter; import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.annotations.VisibleForTesting; @@ -32,13 +33,14 @@ import com.google.common.collect.Range; import com.google.common.collect.RangeSet; import com.google.common.collect.TreeRangeSet; +import com.google.common.hash.Hasher; +import com.google.common.hash.Hashing; import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import it.unimi.dsi.fastutil.longs.LongArrayList; import it.unimi.dsi.fastutil.longs.LongOpenHashSet; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.query.lookup.LookupExtractionFn; @@ -47,14 +49,15 @@ import org.apache.druid.segment.filter.InFilter; import javax.annotation.Nullable; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.Objects; import java.util.Set; -import java.util.SortedSet; -import java.util.TreeSet; +import java.util.stream.Collectors; public class InDimFilter implements DimFilter { @@ -63,7 +66,7 @@ public class InDimFilter implements DimFilter public static final int NUMERIC_HASHING_THRESHOLD = 16; // Values can contain `null` object - private final SortedSet values; + private final Set values; private final String dimension; @Nullable private final ExtractionFn extractionFn; @@ -73,6 +76,9 @@ public class InDimFilter implements DimFilter private final Supplier floatPredicateSupplier; private final Supplier doublePredicateSupplier; + @JsonIgnore + private byte[] cacheKey; + @JsonCreator public InDimFilter( @JsonProperty("dimension") String dimension, @@ -82,11 +88,13 @@ public InDimFilter( ) { Preconditions.checkNotNull(dimension, "dimension can not be null"); - Preconditions.checkArgument(values != null, "values can not be null"); + Preconditions.checkArgument(values != null && !values.isEmpty(), "values can not be null"); - this.values = new TreeSet<>(Comparators.naturalNullsFirst()); - for (String value : values) { - this.values.add(NullHandling.emptyToNullIfNeeded(value)); + // The values set can be huge. Try to avoid copying the set if possible. + if (values instanceof Set && values.stream().noneMatch(NullHandling::needsEmptyToNull)) { + this.values = Collections.unmodifiableSet((Set) values); + } else { + this.values = values.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toSet()); } this.dimension = dimension; this.extractionFn = extractionFn; @@ -115,6 +123,7 @@ public Set getValues() } @Nullable + @JsonInclude(JsonInclude.Include.NON_NULL) @JsonProperty public ExtractionFn getExtractionFn() { @@ -132,31 +141,42 @@ public FilterTuning getFilterTuning() @Override public byte[] getCacheKey() { - boolean hasNull = false; - for (String value : values) { - if (value == null) { - hasNull = true; - break; - } + if (cacheKey == null) { + final boolean hasNull = values.stream().anyMatch(Objects::isNull); + final List sortedValues = new ArrayList<>(values); + Collections.sort(sortedValues); + final Hasher hasher = Hashing.sha256().newHasher(); + sortedValues.forEach(v -> { + if (v == null) { + hasher.putInt(0); + } else { + hasher.putString(v, StandardCharsets.UTF_8); + } + }); + cacheKey = new CacheKeyBuilder(DimFilterUtils.IN_CACHE_ID) + .appendString(dimension) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendByteArray(extractionFn == null ? new byte[0] : extractionFn.getCacheKey()) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendByte(hasNull ? NullHandling.IS_NULL_BYTE : NullHandling.IS_NOT_NULL_BYTE) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendByteArray(hasher.hash().asBytes()) + .build(); } - return new CacheKeyBuilder(DimFilterUtils.IN_CACHE_ID) - .appendString(dimension) - .appendByte(DimFilterUtils.STRING_SEPARATOR) - .appendByteArray(extractionFn == null ? new byte[0] : extractionFn.getCacheKey()) - .appendByte(DimFilterUtils.STRING_SEPARATOR) - .appendByte(hasNull ? NullHandling.IS_NULL_BYTE : NullHandling.IS_NOT_NULL_BYTE) - .appendByte(DimFilterUtils.STRING_SEPARATOR) - .appendStrings(values).build(); + return cacheKey; } @Override public DimFilter optimize() { InDimFilter inFilter = optimizeLookup(); + if (NullHandling.sqlCompatible() && inFilter.values.contains(null)) { + return new FalseDimFilter(); + } if (inFilter.values.size() == 1) { return new SelectorDimFilter( inFilter.dimension, - inFilter.values.first(), + inFilter.values.iterator().next(), inFilter.getExtractionFn(), filterTuning ); @@ -215,6 +235,7 @@ public Filter toFilter() ); } + @Nullable @Override public RangeSet getDimensionRangeSet(String dimension) { @@ -302,7 +323,7 @@ private DruidLongPredicate createLongPredicate() // This supplier must be thread-safe, since this DimFilter will be accessed in the query runners. private Supplier getLongPredicateSupplier() { - Supplier longPredicate = () -> createLongPredicate(); + Supplier longPredicate = this::createLongPredicate; return Suppliers.memoize(longPredicate); } @@ -330,7 +351,7 @@ private DruidFloatPredicate createFloatPredicate() private Supplier getFloatPredicateSupplier() { - Supplier floatPredicate = () -> createFloatPredicate(); + Supplier floatPredicate = this::createFloatPredicate; return Suppliers.memoize(floatPredicate); } @@ -358,7 +379,7 @@ private DruidDoublePredicate createDoublePredicate() private Supplier getDoublePredicateSupplier() { - Supplier doublePredicate = () -> createDoublePredicate(); + Supplier doublePredicate = this::createDoublePredicate; return Suppliers.memoize(doublePredicate); } } diff --git a/processing/src/main/java/org/apache/druid/query/filter/OrDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/OrDimFilter.java index 762ad72ea5fe..a11a42fac99f 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/OrDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/OrDimFilter.java @@ -34,6 +34,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.stream.Collectors; /** */ @@ -83,7 +84,15 @@ public byte[] getCacheKey() public DimFilter optimize() { List elements = DimFilters.optimize(fields); - return elements.size() == 1 ? elements.get(0) : new OrDimFilter(elements); + if (elements.size() == 1) { + return elements.get(0); + } else if (elements.stream().anyMatch(filter -> filter instanceof TrueDimFilter)) { + return new TrueDimFilter(); + } else { + return new OrDimFilter( + elements.stream().filter(filter -> filter instanceof FalseDimFilter).collect(Collectors.toList()) + ); + } } @Override diff --git a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterSerDesrTest.java b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterSerDesrTest.java deleted file mode 100644 index 691769aa7ba4..000000000000 --- a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterSerDesrTest.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.query.filter; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.inject.Injector; -import com.google.inject.Key; -import org.apache.druid.guice.GuiceInjectors; -import org.apache.druid.guice.annotations.Json; -import org.apache.druid.query.extraction.RegexDimExtractionFn; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Collections; - -public class InDimFilterSerDesrTest -{ - private static ObjectMapper mapper; - - private final String serializedFilter = - "{\"type\":\"in\",\"dimension\":\"dimTest\",\"values\":[\"bad\",\"good\"],\"extractionFn\":null}"; - - @Before - public void setUp() - { - Injector defaultInjector = GuiceInjectors.makeStartupInjector(); - mapper = defaultInjector.getInstance(Key.get(ObjectMapper.class, Json.class)); - } - - @Test - public void testDeserialization() throws IOException - { - final InDimFilter actualInDimFilter = mapper.readerFor(DimFilter.class).readValue(serializedFilter); - final InDimFilter expectedInDimFilter = new InDimFilter("dimTest", Arrays.asList("good", "bad"), null); - Assert.assertEquals(expectedInDimFilter, actualInDimFilter); - } - - @Test - public void testSerialization() throws IOException - { - final InDimFilter dimInFilter = new InDimFilter("dimTest", Arrays.asList("good", "bad"), null); - final String actualSerializedFilter = mapper.writeValueAsString(dimInFilter); - Assert.assertEquals(serializedFilter, actualSerializedFilter); - } - - @Test - public void testGetCacheKey() - { - final InDimFilter inDimFilter_1 = new InDimFilter("dimTest", Arrays.asList("good", "bad"), null); - final InDimFilter inDimFilter_2 = new InDimFilter("dimTest", Collections.singletonList("good,bad"), null); - Assert.assertNotEquals(inDimFilter_1.getCacheKey(), inDimFilter_2.getCacheKey()); - - RegexDimExtractionFn regexFn = new RegexDimExtractionFn(".*", false, null); - final InDimFilter inDimFilter_3 = new InDimFilter("dimTest", Arrays.asList("good", "bad"), regexFn); - final InDimFilter inDimFilter_4 = new InDimFilter("dimTest", Collections.singletonList("good,bad"), regexFn); - Assert.assertNotEquals(inDimFilter_3.getCacheKey(), inDimFilter_4.getCacheKey()); - } - - @Test - public void testGetCacheKeyNullValue() throws IOException - { - InDimFilter inDimFilter = mapper.readValue("{\"type\":\"in\",\"dimension\":\"dimTest\",\"values\":[null]}", InDimFilter.class); - Assert.assertNotNull(inDimFilter.getCacheKey()); - } -} diff --git a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java new file mode 100644 index 000000000000..6ac66cc56654 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Sets; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.query.extraction.RegexDimExtractionFn; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.Set; + +public class InDimFilterTest extends InitializedNullHandlingTest +{ + private ObjectMapper mapper = new DefaultObjectMapper(); + + private final String serializedFilter = + "{\"type\":\"in\",\"dimension\":\"dimTest\",\"values\":[\"bad\",\"good\"]}"; + + @Test + public void testDeserialization() throws IOException + { + final InDimFilter actualInDimFilter = mapper.readerFor(DimFilter.class).readValue(serializedFilter); + final InDimFilter expectedInDimFilter = new InDimFilter("dimTest", Arrays.asList("good", "bad"), null); + Assert.assertEquals(expectedInDimFilter, actualInDimFilter); + } + + @Test + public void testSerialization() throws IOException + { + final InDimFilter dimInFilter = new InDimFilter("dimTest", Arrays.asList("good", "bad"), null); + final String actualSerializedFilter = mapper.writeValueAsString(dimInFilter); + Assert.assertEquals(serializedFilter, actualSerializedFilter); + } + + @Test + public void testGetValuesWithValuesSetOfNonEmptyStringsUseTheGivenSet() + { + final Set values = ImmutableSet.of("v1", "v2", "v3"); + final InDimFilter filter = new InDimFilter("dim", values, null); + Assert.assertSame(values, filter.getValues()); + } + + @Test + public void testGetValuesWithValuesSetIncludingEmptyString() + { + final Set values = Sets.newHashSet("v1", "", "v3"); + final InDimFilter filter = new InDimFilter("dim", values, null); + if (NullHandling.replaceWithDefault()) { + Assert.assertNotSame(values, filter.getValues()); + Assert.assertEquals(Sets.newHashSet("v1", null, "v3"), filter.getValues()); + } else { + Assert.assertSame(values, filter.getValues()); + } + } + + @Test + public void testGetCacheKeyReturningSameKeyForValuesOfDifferentOrders() + { + final InDimFilter dimFilter1 = new InDimFilter("dim", ImmutableList.of("v1", "v2"), null); + final InDimFilter dimFilter2 = new InDimFilter("dim", ImmutableList.of("v2", "v1"), null); + Assert.assertArrayEquals(dimFilter1.getCacheKey(), dimFilter2.getCacheKey()); + } + + @Test + public void testGetCacheKeyDifferentKeysForListOfStringsAndSingleStringOfLists() + { + final InDimFilter inDimFilter1 = new InDimFilter("dimTest", Arrays.asList("good", "bad"), null); + final InDimFilter inDimFilter2 = new InDimFilter("dimTest", Collections.singletonList("good,bad"), null); + Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey())); + } + + @Test + public void testGetCacheKeyDifferentKeysForListOfStringsAndSingleStringOfListsWithExtractFn() + { + RegexDimExtractionFn regexFn = new RegexDimExtractionFn(".*", false, null); + final InDimFilter inDimFilter1 = new InDimFilter("dimTest", Arrays.asList("good", "bad"), regexFn); + final InDimFilter inDimFilter2 = new InDimFilter("dimTest", Collections.singletonList("good,bad"), regexFn); + Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey())); + } + + @Test + public void testGetCacheKeyNullValue() throws IOException + { + InDimFilter inDimFilter = mapper.readValue( + "{\"type\":\"in\",\"dimension\":\"dimTest\",\"values\":[null]}", + InDimFilter.class + ); + Assert.assertNotNull(inDimFilter.getCacheKey()); + } + + @Test + public void testGetCacheKeyReturningCachedCacheKey() + { + final InDimFilter filter = new InDimFilter("dim", ImmutableList.of("v1", "v2"), null); + // Compares the array object, not the elements of the array + Assert.assertSame(filter.getCacheKey(), filter.getCacheKey()); + } + + @Test + public void testGetDimensionRangeSetValuesOfDifferentOrdersReturningSameResult() + { + final InDimFilter dimFilter1 = new InDimFilter("dim", ImmutableList.of("v1", "v2", "v3"), null); + final InDimFilter dimFilter2 = new InDimFilter("dim", ImmutableList.of("v3", "v2", "v1"), null); + Assert.assertEquals(dimFilter1.getDimensionRangeSet("dim"), dimFilter2.getDimensionRangeSet("dim")); + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Filtration.java b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Filtration.java index 677961eacea8..229d7a42dacd 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Filtration.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Filtration.java @@ -23,9 +23,9 @@ import com.google.common.collect.ImmutableList; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Intervals; -import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.filter.DimFilter; -import org.apache.druid.query.filter.ExpressionDimFilter; +import org.apache.druid.query.filter.FalseDimFilter; +import org.apache.druid.query.filter.TrueDimFilter; import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; import org.apache.druid.query.spec.QuerySegmentSpec; import org.apache.druid.segment.column.RowSignature; @@ -36,12 +36,8 @@ public class Filtration { - private static final DimFilter MATCH_NOTHING = new ExpressionDimFilter( - "1 == 2", ExprMacroTable.nil() - ); - private static final DimFilter MATCH_EVERYTHING = new ExpressionDimFilter( - "1 == 1", ExprMacroTable.nil() - ); + private static final DimFilter MATCH_NOTHING = new FalseDimFilter(); + private static final DimFilter MATCH_EVERYTHING = new TrueDimFilter(); // 1) If "dimFilter" is null, it should be ignored and not affect filtration. // 2) There is an implicit AND between "intervals" and "dimFilter" (if dimFilter is non-null). From 18991e14681dfb0d6b1120ef493bd99b4bcee7bc Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 30 Apr 2020 00:29:30 -0700 Subject: [PATCH 02/16] tests --- .../apache/druid/query/filter/AndDimFilterTest.java | 10 ++++++++++ .../org/apache/druid/query/filter/OrDimFilterTest.java | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/processing/src/test/java/org/apache/druid/query/filter/AndDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/AndDimFilterTest.java index 7aeae013f9ff..4f89cc018f85 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/AndDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/AndDimFilterTest.java @@ -65,4 +65,14 @@ public void testToFilterWithDuplicateFilters() ); Assert.assertEquals(expected, dimFilter.toFilter()); } + + @Test + public void testOptimieShortCircuitWithFalseFilter() + { + DimFilter filter = DimFilterTestUtils.and( + DimFilterTestUtils.selector("col1", "1"), + new FalseDimFilter() + ); + Assert.assertTrue(filter.optimize() instanceof FalseDimFilter); + } } diff --git a/processing/src/test/java/org/apache/druid/query/filter/OrDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/OrDimFilterTest.java index 9f02194f840f..99ed12579489 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/OrDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/OrDimFilterTest.java @@ -50,4 +50,14 @@ public void testToFilterWithDuplicateFilters() ); Assert.assertEquals(expected, dimFilter.toFilter()); } + + @Test + public void testOptimieShortCircuitWithTrueFilter() + { + DimFilter filter = DimFilterTestUtils.or( + DimFilterTestUtils.selector("col1", "1"), + new TrueDimFilter() + ); + Assert.assertTrue(filter.optimize() instanceof TrueDimFilter); + } } From 0346843569f9bc23169696a1718c0c3b25a9d481 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 30 Apr 2020 00:33:41 -0700 Subject: [PATCH 03/16] more tests --- .../druid/query/filter/InDimFilterTest.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java index 6ac66cc56654..1a2bbfe27626 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java @@ -129,4 +129,22 @@ public void testGetDimensionRangeSetValuesOfDifferentOrdersReturningSameResult() final InDimFilter dimFilter2 = new InDimFilter("dim", ImmutableList.of("v3", "v2", "v1"), null); Assert.assertEquals(dimFilter1.getDimensionRangeSet("dim"), dimFilter2.getDimensionRangeSet("dim")); } + + @Test + public void testOptimizeShortCircuitWithNull() + { + final InDimFilter filter = new InDimFilter("dim", Sets.newHashSet(null, "v1"), null); + if (NullHandling.sqlCompatible()) { + Assert.assertTrue(filter.optimize() instanceof FalseDimFilter); + } else { + Assert.assertEquals(filter, filter.optimize()); + } + } + + @Test + public void testOptimizeSingleValueInToSelector() + { + final InDimFilter filter = new InDimFilter("dim", Collections.singleton("v1"), null); + Assert.assertEquals(new SelectorDimFilter("dim", "v1", null), filter.optimize()); + } } From 4d6e7a5cbdd370eed0650a8c0ce20ce96101d5d5 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 30 Apr 2020 10:29:05 -0700 Subject: [PATCH 04/16] fix and and or filters --- .../druid/query/filter/AndDimFilter.java | 16 +++++--- .../druid/query/filter/FalseDimFilter.java | 31 ++++++++++++++ .../druid/query/filter/InDimFilter.java | 2 +- .../druid/query/filter/OrDimFilter.java | 20 +++++----- .../druid/query/filter/TrueDimFilter.java | 33 ++++++++++++++- .../FilteredAggregatorFactoryTest.java | 6 +-- .../druid/query/filter/AndDimFilterTest.java | 9 ++++- .../query/filter/FalseDimFilterTest.java | 40 +++++++++++++++++++ .../druid/query/filter/OrDimFilterTest.java | 9 ++++- .../druid/query/filter/TrueDimFilterTest.java | 40 +++++++++++++++++++ .../sql/calcite/filtration/Filtration.java | 4 +- 11 files changed, 185 insertions(+), 25 deletions(-) create mode 100644 processing/src/test/java/org/apache/druid/query/filter/FalseDimFilterTest.java create mode 100644 processing/src/test/java/org/apache/druid/query/filter/TrueDimFilterTest.java diff --git a/processing/src/main/java/org/apache/druid/query/filter/AndDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/AndDimFilter.java index f5aaab06226c..18df9d67a170 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/AndDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/AndDimFilter.java @@ -73,15 +73,19 @@ public byte[] getCacheKey() @Override public DimFilter optimize() { - List elements = DimFilters.optimize(fields); - if (elements.size() == 1) { + List elements = DimFilters.optimize(fields) + .stream() + .filter(filter -> !(filter instanceof TrueDimFilter)) + .collect(Collectors.toList()); + if (elements.isEmpty()) { + // All elements were TrueDimFilter after optimization + return TrueDimFilter.instance(); + } else if (elements.size() == 1) { return elements.get(0); } else if (elements.stream().anyMatch(filter -> filter instanceof FalseDimFilter)) { - return new FalseDimFilter(); + return FalseDimFilter.instance(); } else { - return new AndDimFilter( - elements.stream().filter(filter -> filter instanceof TrueDimFilter).collect(Collectors.toList()) - ); + return new AndDimFilter(elements); } } diff --git a/processing/src/main/java/org/apache/druid/query/filter/FalseDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/FalseDimFilter.java index 864e85f7a63a..7a1e854593f5 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/FalseDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/FalseDimFilter.java @@ -19,6 +19,7 @@ package org.apache.druid.query.filter; +import com.fasterxml.jackson.annotation.JsonCreator; import com.google.common.collect.ImmutableRangeSet; import com.google.common.collect.RangeSet; import org.apache.druid.query.cache.CacheKeyBuilder; @@ -30,6 +31,18 @@ public class FalseDimFilter implements DimFilter { + private static final FalseDimFilter INSTANCE = new FalseDimFilter(); + + @JsonCreator + public static FalseDimFilter instance() + { + return INSTANCE; + } + + private FalseDimFilter() + { + } + @Override public DimFilter optimize() { @@ -60,4 +73,22 @@ public byte[] getCacheKey() { return new CacheKeyBuilder(DimFilterUtils.FALSE_CACHE_ID).build(); } + + @Override + public int hashCode() + { + return DimFilterUtils.FALSE_CACHE_ID; + } + + @Override + public boolean equals(Object o) + { + return o == this; + } + + @Override + public String toString() + { + return "FALSE"; + } } diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java index 32b596d789c4..3cc2e36eb95b 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java @@ -171,7 +171,7 @@ public DimFilter optimize() { InDimFilter inFilter = optimizeLookup(); if (NullHandling.sqlCompatible() && inFilter.values.contains(null)) { - return new FalseDimFilter(); + return FalseDimFilter.instance(); } if (inFilter.values.size() == 1) { return new SelectorDimFilter( diff --git a/processing/src/main/java/org/apache/druid/query/filter/OrDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/OrDimFilter.java index a11a42fac99f..ca8c105b7a3d 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/OrDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/OrDimFilter.java @@ -45,9 +45,7 @@ public class OrDimFilter implements DimFilter private final List fields; @JsonCreator - public OrDimFilter( - @JsonProperty("fields") List fields - ) + public OrDimFilter(@JsonProperty("fields") List fields) { fields = DimFilters.filterNulls(fields); Preconditions.checkArgument(fields.size() > 0, "OR operator requires at least one field"); @@ -83,15 +81,19 @@ public byte[] getCacheKey() @Override public DimFilter optimize() { - List elements = DimFilters.optimize(fields); - if (elements.size() == 1) { + List elements = DimFilters.optimize(fields) + .stream() + .filter(filter -> !(filter instanceof FalseDimFilter)) + .collect(Collectors.toList()); + if (elements.isEmpty()) { + // All elements were FalseDimFilter after optimization + return FalseDimFilter.instance(); + } else if (elements.size() == 1) { return elements.get(0); } else if (elements.stream().anyMatch(filter -> filter instanceof TrueDimFilter)) { - return new TrueDimFilter(); + return TrueDimFilter.instance(); } else { - return new OrDimFilter( - elements.stream().filter(filter -> filter instanceof FalseDimFilter).collect(Collectors.toList()) - ); + return new OrDimFilter(elements); } } diff --git a/processing/src/main/java/org/apache/druid/query/filter/TrueDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/TrueDimFilter.java index 22543580e47b..2e7bd2a72a07 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/TrueDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/TrueDimFilter.java @@ -19,6 +19,7 @@ package org.apache.druid.query.filter; +import com.fasterxml.jackson.annotation.JsonCreator; import com.google.common.collect.RangeSet; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.segment.filter.TrueFilter; @@ -26,10 +27,20 @@ import java.util.Collections; import java.util.Set; -/** - */ public class TrueDimFilter implements DimFilter { + private static final TrueDimFilter INSTANCE = new TrueDimFilter(); + + @JsonCreator + public static TrueDimFilter instance() + { + return INSTANCE; + } + + private TrueDimFilter() + { + } + @Override public byte[] getCacheKey() { @@ -59,4 +70,22 @@ public Set getRequiredColumns() { return Collections.emptySet(); } + + @Override + public int hashCode() + { + return DimFilterUtils.TRUE_CACHE_ID; + } + + @Override + public boolean equals(Object o) + { + return o == this; + } + + @Override + public String toString() + { + return "TRUE"; + } } diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/FilteredAggregatorFactoryTest.java b/processing/src/test/java/org/apache/druid/query/aggregation/FilteredAggregatorFactoryTest.java index 4db7b0e5c542..879a252b4948 100644 --- a/processing/src/test/java/org/apache/druid/query/aggregation/FilteredAggregatorFactoryTest.java +++ b/processing/src/test/java/org/apache/druid/query/aggregation/FilteredAggregatorFactoryTest.java @@ -30,17 +30,17 @@ public void testSimpleNaming() { Assert.assertEquals("overrideName", new FilteredAggregatorFactory( new CountAggregatorFactory("foo"), - new TrueDimFilter(), + TrueDimFilter.instance(), "overrideName" ).getName()); Assert.assertEquals("delegateName", new FilteredAggregatorFactory( new CountAggregatorFactory("delegateName"), - new TrueDimFilter(), + TrueDimFilter.instance(), "" ).getName()); Assert.assertEquals("delegateName", new FilteredAggregatorFactory( new CountAggregatorFactory("delegateName"), - new TrueDimFilter(), + TrueDimFilter.instance(), null ).getName()); } diff --git a/processing/src/test/java/org/apache/druid/query/filter/AndDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/AndDimFilterTest.java index 4f89cc018f85..8b919820bc82 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/AndDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/AndDimFilterTest.java @@ -71,8 +71,15 @@ public void testOptimieShortCircuitWithFalseFilter() { DimFilter filter = DimFilterTestUtils.and( DimFilterTestUtils.selector("col1", "1"), - new FalseDimFilter() + FalseDimFilter.instance() ); Assert.assertTrue(filter.optimize() instanceof FalseDimFilter); } + + @Test + public void testOptimizeOringTrueFilters() + { + DimFilter filter = DimFilterTestUtils.and(TrueDimFilter.instance(), TrueDimFilter.instance()); + Assert.assertSame(TrueDimFilter.instance(), filter.optimize()); + } } diff --git a/processing/src/test/java/org/apache/druid/query/filter/FalseDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/FalseDimFilterTest.java new file mode 100644 index 000000000000..97d856d22e7f --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/filter/FalseDimFilterTest.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; + +public class FalseDimFilterTest +{ + @Test + public void testSerde() throws IOException + { + final ObjectMapper mapper = new DefaultObjectMapper(); + final FalseDimFilter original = FalseDimFilter.instance(); + final byte[] bytes = mapper.writeValueAsBytes(original); + final FalseDimFilter fromBytes = (FalseDimFilter) mapper.readValue(bytes, DimFilter.class); + Assert.assertSame(original, fromBytes); + } +} diff --git a/processing/src/test/java/org/apache/druid/query/filter/OrDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/OrDimFilterTest.java index 99ed12579489..c1584a6f3d81 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/OrDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/OrDimFilterTest.java @@ -56,8 +56,15 @@ public void testOptimieShortCircuitWithTrueFilter() { DimFilter filter = DimFilterTestUtils.or( DimFilterTestUtils.selector("col1", "1"), - new TrueDimFilter() + TrueDimFilter.instance() ); Assert.assertTrue(filter.optimize() instanceof TrueDimFilter); } + + @Test + public void testOptimizeOringFalseFilters() + { + DimFilter filter = DimFilterTestUtils.or(FalseDimFilter.instance(), FalseDimFilter.instance()); + Assert.assertSame(FalseDimFilter.instance(), filter.optimize()); + } } diff --git a/processing/src/test/java/org/apache/druid/query/filter/TrueDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/TrueDimFilterTest.java new file mode 100644 index 000000000000..f80701cfa20a --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/filter/TrueDimFilterTest.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; + +public class TrueDimFilterTest +{ + @Test + public void testSerde() throws IOException + { + final ObjectMapper mapper = new DefaultObjectMapper(); + final TrueDimFilter original = TrueDimFilter.instance(); + final byte[] bytes = mapper.writeValueAsBytes(original); + final TrueDimFilter fromBytes = (TrueDimFilter) mapper.readValue(bytes, DimFilter.class); + Assert.assertSame(original, fromBytes); + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Filtration.java b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Filtration.java index 229d7a42dacd..8f2f7604fbca 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Filtration.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Filtration.java @@ -36,8 +36,8 @@ public class Filtration { - private static final DimFilter MATCH_NOTHING = new FalseDimFilter(); - private static final DimFilter MATCH_EVERYTHING = new TrueDimFilter(); + private static final DimFilter MATCH_NOTHING = FalseDimFilter.instance(); + private static final DimFilter MATCH_EVERYTHING = TrueDimFilter.instance(); // 1) If "dimFilter" is null, it should be ignored and not affect filtration. // 2) There is an implicit AND between "intervals" and "dimFilter" (if dimFilter is non-null). From e97ce94d189bdbe5d61cb4fd5378f73a08f1e323 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 30 Apr 2020 11:40:43 -0700 Subject: [PATCH 05/16] fix build --- .../firehose/IngestSegmentFirehoseFactoryTimelineTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactoryTimelineTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactoryTimelineTest.java index 7c2760cafbed..e8dff5dc4bf1 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactoryTimelineTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactoryTimelineTest.java @@ -348,7 +348,7 @@ public DataSegment fetchUsedSegment(String dataSource, String segmentId) DATA_SOURCE, testCase.interval, null, - new TrueDimFilter(), + TrueDimFilter.instance(), Arrays.asList(DIMENSIONS), Arrays.asList(METRICS), // Split as much as possible From 2e40df19324f958acff7f03c01a1f852e2926018 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 30 Apr 2020 17:03:22 -0700 Subject: [PATCH 06/16] false and true vector matchers --- .../druid/query/filter/InDimFilter.java | 6 ++- .../filter/vector/FalseVectorMatcher.java | 51 ++++++++++++++++++ .../filter/vector/TrueVectorMatcher.java | 52 +++++++++++++++++++ .../filter/vector/VectorValueMatcher.java | 1 + .../druid/segment/filter/FalseFilter.java | 15 ++++++ .../druid/segment/filter/TrueFilter.java | 15 ++++++ 6 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 processing/src/main/java/org/apache/druid/query/filter/vector/FalseVectorMatcher.java create mode 100644 processing/src/main/java/org/apache/druid/query/filter/vector/TrueVectorMatcher.java diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java index 3cc2e36eb95b..f481982d9b6f 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java @@ -88,7 +88,7 @@ public InDimFilter( ) { Preconditions.checkNotNull(dimension, "dimension can not be null"); - Preconditions.checkArgument(values != null && !values.isEmpty(), "values can not be null"); + Preconditions.checkArgument(values != null, "values can not be null"); // The values set can be huge. Try to avoid copying the set if possible. if (values instanceof Set && values.stream().noneMatch(NullHandling::needsEmptyToNull)) { @@ -170,6 +170,10 @@ public byte[] getCacheKey() public DimFilter optimize() { InDimFilter inFilter = optimizeLookup(); + + if (inFilter.values.isEmpty()) { + return FalseDimFilter.instance(); + } if (NullHandling.sqlCompatible() && inFilter.values.contains(null)) { return FalseDimFilter.instance(); } diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/FalseVectorMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/FalseVectorMatcher.java new file mode 100644 index 000000000000..398f25667d85 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/FalseVectorMatcher.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +public class FalseVectorMatcher implements VectorValueMatcher +{ + private final int maxVectorSize; + + private int currentVectorSize; + + public FalseVectorMatcher(int maxVectorSize) + { + this.maxVectorSize = maxVectorSize; + } + + @Override + public ReadableVectorMatch match(ReadableVectorMatch mask) + { + currentVectorSize = mask.getSelectionSize(); + return VectorMatch.allFalse(); + } + + @Override + public int getMaxVectorSize() + { + return maxVectorSize; + } + + @Override + public int getCurrentVectorSize() + { + return currentVectorSize; + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/TrueVectorMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/TrueVectorMatcher.java new file mode 100644 index 000000000000..73eaa04f98b2 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/TrueVectorMatcher.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +public class TrueVectorMatcher implements VectorValueMatcher +{ + private final int maxVectorSize; + + private int currentVectorSize; + + public TrueVectorMatcher(int maxVectorSize) + { + this.maxVectorSize = maxVectorSize; + } + + @Override + public ReadableVectorMatch match(ReadableVectorMatch mask) + { + currentVectorSize = mask.getSelectionSize(); + // The given mask is all true for its valid selections. + return mask; + } + + @Override + public int getMaxVectorSize() + { + return maxVectorSize; + } + + @Override + public int getCurrentVectorSize() + { + return currentVectorSize; + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcher.java index 242166115b99..cf0f7beade1b 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcher.java @@ -33,6 +33,7 @@ public interface VectorValueMatcher extends VectorSizeInspector { /** * Examine the current vector and return a match indicating what is accepted. + * This method should be called before calling {@link #getCurrentVectorSize()} for the current vector. * * @param mask must not be null; use {@link VectorMatch#allTrue} if you don't need a mask. * diff --git a/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java index e143b70235e4..6e8c9c987f45 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java @@ -23,8 +23,11 @@ import org.apache.druid.query.filter.BitmapIndexSelector; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.FalseVectorMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import java.util.Collections; import java.util.Set; @@ -60,6 +63,12 @@ public ValueMatcher makeMatcher(ColumnSelectorFactory factory) return FalseValueMatcher.instance(); } + @Override + public VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory) + { + return new FalseVectorMatcher(factory.getMaxVectorSize()); + } + @Override public boolean supportsBitmapIndex(BitmapIndexSelector selector) { @@ -78,6 +87,12 @@ public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, Bitm return true; } + @Override + public boolean canVectorizeMatcher() + { + return true; + } + @Override public Set getRequiredColumns() { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/TrueFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/TrueFilter.java index 58cda8c1c018..03f28c42300b 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/TrueFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/TrueFilter.java @@ -23,8 +23,11 @@ import org.apache.druid.query.filter.BitmapIndexSelector; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.TrueVectorMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import java.util.Collections; import java.util.Set; @@ -56,6 +59,12 @@ public ValueMatcher makeMatcher(ColumnSelectorFactory factory) return TrueValueMatcher.instance(); } + @Override + public VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory) + { + return new TrueVectorMatcher(factory.getMaxVectorSize()); + } + @Override public boolean supportsBitmapIndex(BitmapIndexSelector selector) { @@ -74,6 +83,12 @@ public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, Bitm return true; } + @Override + public boolean canVectorizeMatcher() + { + return true; + } + @Override public Set getRequiredColumns() { From d1f3d171230faf538da88d060e12e68ec91a7427 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 30 Apr 2020 18:02:15 -0700 Subject: [PATCH 07/16] fix vector matchers --- .../query/filter/vector/FalseVectorMatcher.java | 15 +++++++-------- .../query/filter/vector/TrueVectorMatcher.java | 15 +++++++-------- .../query/filter/vector/VectorValueMatcher.java | 1 - .../apache/druid/segment/filter/FalseFilter.java | 2 +- .../apache/druid/segment/filter/TrueFilter.java | 2 +- ...QueryableIndexVectorColumnSelectorFactory.java | 4 ++-- .../vector/VectorColumnSelectorFactory.java | 10 +++++++++- 7 files changed, 27 insertions(+), 22 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/FalseVectorMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/FalseVectorMatcher.java index 398f25667d85..9d20c1dd25ee 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/FalseVectorMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/FalseVectorMatcher.java @@ -19,33 +19,32 @@ package org.apache.druid.query.filter.vector; +import org.apache.druid.segment.vector.VectorSizeInspector; + public class FalseVectorMatcher implements VectorValueMatcher { - private final int maxVectorSize; - - private int currentVectorSize; + private final VectorSizeInspector vectorSizeInspector; - public FalseVectorMatcher(int maxVectorSize) + public FalseVectorMatcher(VectorSizeInspector vectorSizeInspector) { - this.maxVectorSize = maxVectorSize; + this.vectorSizeInspector = vectorSizeInspector; } @Override public ReadableVectorMatch match(ReadableVectorMatch mask) { - currentVectorSize = mask.getSelectionSize(); return VectorMatch.allFalse(); } @Override public int getMaxVectorSize() { - return maxVectorSize; + return vectorSizeInspector.getMaxVectorSize(); } @Override public int getCurrentVectorSize() { - return currentVectorSize; + return vectorSizeInspector.getCurrentVectorSize(); } } diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/TrueVectorMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/TrueVectorMatcher.java index 73eaa04f98b2..24b2b273a46c 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/TrueVectorMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/TrueVectorMatcher.java @@ -19,21 +19,20 @@ package org.apache.druid.query.filter.vector; +import org.apache.druid.segment.vector.VectorSizeInspector; + public class TrueVectorMatcher implements VectorValueMatcher { - private final int maxVectorSize; - - private int currentVectorSize; + private final VectorSizeInspector vectorSizeInspector; - public TrueVectorMatcher(int maxVectorSize) + public TrueVectorMatcher(VectorSizeInspector vectorSizeInspector) { - this.maxVectorSize = maxVectorSize; + this.vectorSizeInspector = vectorSizeInspector; } @Override public ReadableVectorMatch match(ReadableVectorMatch mask) { - currentVectorSize = mask.getSelectionSize(); // The given mask is all true for its valid selections. return mask; } @@ -41,12 +40,12 @@ public ReadableVectorMatch match(ReadableVectorMatch mask) @Override public int getMaxVectorSize() { - return maxVectorSize; + return vectorSizeInspector.getMaxVectorSize(); } @Override public int getCurrentVectorSize() { - return currentVectorSize; + return vectorSizeInspector.getCurrentVectorSize(); } } diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcher.java index cf0f7beade1b..242166115b99 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcher.java @@ -33,7 +33,6 @@ public interface VectorValueMatcher extends VectorSizeInspector { /** * Examine the current vector and return a match indicating what is accepted. - * This method should be called before calling {@link #getCurrentVectorSize()} for the current vector. * * @param mask must not be null; use {@link VectorMatch#allTrue} if you don't need a mask. * diff --git a/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java index 6e8c9c987f45..ecfe4d795b7f 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java @@ -66,7 +66,7 @@ public ValueMatcher makeMatcher(ColumnSelectorFactory factory) @Override public VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory) { - return new FalseVectorMatcher(factory.getMaxVectorSize()); + return new FalseVectorMatcher(factory.getVectorSizeInspector()); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/filter/TrueFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/TrueFilter.java index 03f28c42300b..2dde512ce107 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/TrueFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/TrueFilter.java @@ -62,7 +62,7 @@ public ValueMatcher makeMatcher(ColumnSelectorFactory factory) @Override public VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory) { - return new TrueVectorMatcher(factory.getMaxVectorSize()); + return new TrueVectorMatcher(factory.getVectorSizeInspector()); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java index 83110499862f..b81c371fc854 100644 --- a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java @@ -66,9 +66,9 @@ public QueryableIndexVectorColumnSelectorFactory( } @Override - public int getMaxVectorSize() + public VectorSizeInspector getVectorSizeInspector() { - return offset.getMaxVectorSize(); + return offset; } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/vector/VectorColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/vector/VectorColumnSelectorFactory.java index 1634cc6ab3a1..bea845f7f5fb 100644 --- a/processing/src/main/java/org/apache/druid/segment/vector/VectorColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/vector/VectorColumnSelectorFactory.java @@ -31,12 +31,20 @@ */ public interface VectorColumnSelectorFactory { + /** + * Returns a {@link VectorSizeInspector} for the {@link VectorCursor} that generated this object. + */ + VectorSizeInspector getVectorSizeInspector(); + /** * Returns the maximum vector size for the {@link VectorCursor} that generated this object. * * @see VectorCursor#getMaxVectorSize() */ - int getMaxVectorSize(); + default int getMaxVectorSize() + { + return getVectorSizeInspector().getMaxVectorSize(); + } /** * Returns a string-typed, single-value-per-row column selector. From e4e4590ac2feffddaf917bfaef7eb36a17f97d7b Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 30 Apr 2020 18:36:42 -0700 Subject: [PATCH 08/16] checkstyle --- .../main/java/org/apache/druid/segment/filter/FalseFilter.java | 2 +- .../main/java/org/apache/druid/segment/filter/TrueFilter.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java index ecfe4d795b7f..902dae48ab11 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/FalseFilter.java @@ -88,7 +88,7 @@ public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, Bitm } @Override - public boolean canVectorizeMatcher() + public boolean canVectorizeMatcher() { return true; } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/TrueFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/TrueFilter.java index 2dde512ce107..bb35b4f5a314 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/TrueFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/TrueFilter.java @@ -84,7 +84,7 @@ public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, Bitm } @Override - public boolean canVectorizeMatcher() + public boolean canVectorizeMatcher() { return true; } From 07cbeff02c36dab31fc90cf5dd5eb5b75041f69d Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 30 Apr 2020 21:04:54 -0700 Subject: [PATCH 09/16] in filter null handling --- .../java/org/apache/druid/query/filter/InDimFilter.java | 5 +---- .../java/org/apache/druid/query/filter/NotDimFilter.java | 9 ++++++++- .../java/org/apache/druid/segment/filter/InFilter.java | 5 +++++ .../org/apache/druid/segment/filter/SelectorFilter.java | 5 +++++ 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java index f481982d9b6f..9fb72dbe8e93 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java @@ -92,7 +92,7 @@ public InDimFilter( // The values set can be huge. Try to avoid copying the set if possible. if (values instanceof Set && values.stream().noneMatch(NullHandling::needsEmptyToNull)) { - this.values = Collections.unmodifiableSet((Set) values); + this.values = (Set) values; } else { this.values = values.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toSet()); } @@ -174,9 +174,6 @@ public DimFilter optimize() if (inFilter.values.isEmpty()) { return FalseDimFilter.instance(); } - if (NullHandling.sqlCompatible() && inFilter.values.contains(null)) { - return FalseDimFilter.instance(); - } if (inFilter.values.size() == 1) { return new SelectorDimFilter( inFilter.dimension, diff --git a/processing/src/main/java/org/apache/druid/query/filter/NotDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/NotDimFilter.java index 039667eb7edb..0cd2969fe698 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/NotDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/NotDimFilter.java @@ -60,10 +60,17 @@ public byte[] getCacheKey() return ByteBuffer.allocate(1 + subKey.length).put(DimFilterUtils.NOT_CACHE_ID).put(subKey).array(); } + @SuppressWarnings("ObjectEquality") @Override public DimFilter optimize() { - return new NotDimFilter(this.getField().optimize()); + final DimFilter optimized = this.getField().optimize(); + if (optimized == FalseDimFilter.instance()) { + return TrueDimFilter.instance(); + } else if (optimized == TrueDimFilter.instance()) { + return FalseDimFilter.instance(); + } + return new NotDimFilter(optimized); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/filter/InFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/InFilter.java index 6e1da91f11f0..d609a5418ebe 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/InFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/InFilter.java @@ -54,6 +54,11 @@ * given {@link #values}. * For multi-valued dimension, this filter returns true if one of the dimension values matches to one of the * given {@link #values}. + * + * In SQL-compatible null handling mode, this filter is equivalent to {@code (dimension IN [values])} or + * {@code (dimension IN [non-null values] OR dimension IS NULL)} when {@link #values} contains nulls. + * In default null handling mode, this filter is equivalent to {@code (dimension IN [values])} or + * {@code (dimension IN [non-null values, ''])} when {@link #values} contains nulls. */ public class InFilter implements Filter { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java index 75874278d7a0..f56bfacdab69 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java @@ -38,6 +38,11 @@ import java.util.Set; /** + * This filter is to select the rows where the {@link #dimension} has the {@link #value}. The value can be null. + * In SQL-compatible null handling mode, this filter is equivalent to {@code dimension = value} + * or {@code dimension IS NULL} when the value is null. + * In default null handling mode, this filter is equivalent to {@code dimension = value} or + * {@code dimension = ''} when the value is null. */ public class SelectorFilter implements Filter { From 5e67d9cb7bd60bf9d36104371e45c0f97968b285 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 30 Apr 2020 21:22:30 -0700 Subject: [PATCH 10/16] remove wrong test --- .../apache/druid/query/filter/InDimFilterTest.java | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java index 1a2bbfe27626..a10fcd02392f 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java @@ -130,17 +130,6 @@ public void testGetDimensionRangeSetValuesOfDifferentOrdersReturningSameResult() Assert.assertEquals(dimFilter1.getDimensionRangeSet("dim"), dimFilter2.getDimensionRangeSet("dim")); } - @Test - public void testOptimizeShortCircuitWithNull() - { - final InDimFilter filter = new InDimFilter("dim", Sets.newHashSet(null, "v1"), null); - if (NullHandling.sqlCompatible()) { - Assert.assertTrue(filter.optimize() instanceof FalseDimFilter); - } else { - Assert.assertEquals(filter, filter.optimize()); - } - } - @Test public void testOptimizeSingleValueInToSelector() { From ef6938d5844818e6a388b4cb4471b2ef18648dbf Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Mon, 4 May 2020 10:30:14 -0700 Subject: [PATCH 11/16] address comments --- .../org/apache/druid/query/filter/InDimFilter.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java index 9fb72dbe8e93..b1f4742565e9 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java @@ -82,6 +82,8 @@ public class InDimFilter implements DimFilter @JsonCreator public InDimFilter( @JsonProperty("dimension") String dimension, + // This 'values' collection instance can be reused if possible to avoid copying a big collection. + // Callers should _not_ modify the collection after it is passed to this constructor. @JsonProperty("values") Collection values, @JsonProperty("extractionFn") @Nullable ExtractionFn extractionFn, @JsonProperty("filterTuning") @Nullable FilterTuning filterTuning @@ -91,7 +93,8 @@ public InDimFilter( Preconditions.checkArgument(values != null, "values can not be null"); // The values set can be huge. Try to avoid copying the set if possible. - if (values instanceof Set && values.stream().noneMatch(NullHandling::needsEmptyToNull)) { + if (values instanceof Set + && (NullHandling.sqlCompatible() || values.stream().noneMatch(NullHandling::needsEmptyToNull))) { this.values = (Set) values; } else { this.values = values.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toSet()); @@ -142,17 +145,18 @@ public FilterTuning getFilterTuning() public byte[] getCacheKey() { if (cacheKey == null) { - final boolean hasNull = values.stream().anyMatch(Objects::isNull); final List sortedValues = new ArrayList<>(values); Collections.sort(sortedValues); final Hasher hasher = Hashing.sha256().newHasher(); - sortedValues.forEach(v -> { + boolean hasNull = false; + for (String v : sortedValues) { if (v == null) { + hasNull = true; hasher.putInt(0); } else { hasher.putString(v, StandardCharsets.UTF_8); } - }); + } cacheKey = new CacheKeyBuilder(DimFilterUtils.IN_CACHE_ID) .appendString(dimension) .appendByte(DimFilterUtils.STRING_SEPARATOR) From 9ce40f929347443c17ca15a3143f40417ce5324c Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Mon, 4 May 2020 10:42:55 -0700 Subject: [PATCH 12/16] remove unnecessary null check --- .../java/org/apache/druid/query/filter/InDimFilter.java | 8 +++----- .../org/apache/druid/query/filter/InDimFilterTest.java | 8 ++++++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java index b1f4742565e9..6f6ad7e8a251 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java @@ -30,6 +30,7 @@ import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; +import com.google.common.collect.Ordering; import com.google.common.collect.Range; import com.google.common.collect.RangeSet; import com.google.common.collect.TreeRangeSet; @@ -53,7 +54,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.Collections; +import java.util.Comparator; import java.util.List; import java.util.Objects; import java.util.Set; @@ -146,12 +147,10 @@ public byte[] getCacheKey() { if (cacheKey == null) { final List sortedValues = new ArrayList<>(values); - Collections.sort(sortedValues); + sortedValues.sort(Comparator.nullsFirst(Ordering.natural())); final Hasher hasher = Hashing.sha256().newHasher(); - boolean hasNull = false; for (String v : sortedValues) { if (v == null) { - hasNull = true; hasher.putInt(0); } else { hasher.putString(v, StandardCharsets.UTF_8); @@ -162,7 +161,6 @@ public byte[] getCacheKey() .appendByte(DimFilterUtils.STRING_SEPARATOR) .appendByteArray(extractionFn == null ? new byte[0] : extractionFn.getCacheKey()) .appendByte(DimFilterUtils.STRING_SEPARATOR) - .appendByte(hasNull ? NullHandling.IS_NULL_BYTE : NullHandling.IS_NOT_NULL_BYTE) .appendByte(DimFilterUtils.STRING_SEPARATOR) .appendByteArray(hasher.hash().asBytes()) .build(); diff --git a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java index a10fcd02392f..105247151a3e 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java @@ -114,6 +114,14 @@ public void testGetCacheKeyNullValue() throws IOException Assert.assertNotNull(inDimFilter.getCacheKey()); } + @Test + public void testGetCacheKeyReturningDifferentKeysWithAndWithoutNull() + { + InDimFilter filter1 = new InDimFilter("dim", Arrays.asList("val", null), null); + InDimFilter filter2 = new InDimFilter("dim", Collections.singletonList("val"), null); + Assert.assertFalse(Arrays.equals(filter1.getCacheKey(), filter2.getCacheKey())); + } + @Test public void testGetCacheKeyReturningCachedCacheKey() { From f033ccbcd24bab9781e88609f5f77933031a5007 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Mon, 4 May 2020 18:18:11 -0700 Subject: [PATCH 13/16] redundant separator --- .../src/main/java/org/apache/druid/query/filter/InDimFilter.java | 1 - 1 file changed, 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java index 6f6ad7e8a251..d7cf0572e393 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java @@ -161,7 +161,6 @@ public byte[] getCacheKey() .appendByte(DimFilterUtils.STRING_SEPARATOR) .appendByteArray(extractionFn == null ? new byte[0] : extractionFn.getCacheKey()) .appendByte(DimFilterUtils.STRING_SEPARATOR) - .appendByte(DimFilterUtils.STRING_SEPARATOR) .appendByteArray(hasher.hash().asBytes()) .build(); } From fe69355a87a1f7419e4b8b23b99b95d37ea8fd3c Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Tue, 5 May 2020 13:54:11 -0700 Subject: [PATCH 14/16] address comments --- .../java/org/apache/druid/query/Druids.java | 7 +++++-- .../druid/query/filter/FalseDimFilter.java | 5 +++-- .../apache/druid/query/filter/InDimFilter.java | 13 +++++++------ .../druid/query/filter/SelectorDimFilter.java | 2 +- .../druid/query/filter/TrueDimFilter.java | 5 +++-- .../druid/query/topn/TopNQueryBuilder.java | 7 +++++-- .../druid/query/filter/AndDimFilterTest.java | 18 ++++++++++++++++++ .../druid/query/filter/FalseDimFilterTest.java | 7 +++++++ .../druid/query/filter/OrDimFilterTest.java | 18 ++++++++++++++++++ .../druid/query/filter/TrueDimFilterTest.java | 7 +++++++ .../filtration/ConvertSelectorsToIns.java | 4 +++- 11 files changed, 77 insertions(+), 16 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/Druids.java b/processing/src/main/java/org/apache/druid/query/Druids.java index 1dcd0767af0e..a71331099938 100644 --- a/processing/src/main/java/org/apache/druid/query/Druids.java +++ b/processing/src/main/java/org/apache/druid/query/Druids.java @@ -23,7 +23,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.query.aggregation.AggregatorFactory; @@ -58,6 +58,7 @@ import java.util.EnumSet; import java.util.List; import java.util.Map; +import java.util.Set; /** */ @@ -202,7 +203,9 @@ public TimeseriesQueryBuilder filters(String dimensionName, String value) public TimeseriesQueryBuilder filters(String dimensionName, String value, String... values) { - dimFilter = new InDimFilter(dimensionName, Lists.asList(value, values), null, null); + final Set filterValues = Sets.newHashSet(value); + filterValues.add(value); + dimFilter = new InDimFilter(dimensionName, filterValues, null, null); return this; } diff --git a/processing/src/main/java/org/apache/druid/query/filter/FalseDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/FalseDimFilter.java index 7a1e854593f5..4b21e5eb4939 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/FalseDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/FalseDimFilter.java @@ -32,6 +32,7 @@ public class FalseDimFilter implements DimFilter { private static final FalseDimFilter INSTANCE = new FalseDimFilter(); + private static final byte[] CACHE_KEY = new CacheKeyBuilder(DimFilterUtils.FALSE_CACHE_ID).build(); @JsonCreator public static FalseDimFilter instance() @@ -71,7 +72,7 @@ public Set getRequiredColumns() @Override public byte[] getCacheKey() { - return new CacheKeyBuilder(DimFilterUtils.FALSE_CACHE_ID).build(); + return CACHE_KEY; } @Override @@ -83,7 +84,7 @@ public int hashCode() @Override public boolean equals(Object o) { - return o == this; + return o != null && o.getClass() == this.getClass(); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java index d7cf0572e393..a82f71a52e85 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java @@ -55,6 +55,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Comparator; +import java.util.HashSet; import java.util.List; import java.util.Objects; import java.util.Set; @@ -85,7 +86,7 @@ public InDimFilter( @JsonProperty("dimension") String dimension, // This 'values' collection instance can be reused if possible to avoid copying a big collection. // Callers should _not_ modify the collection after it is passed to this constructor. - @JsonProperty("values") Collection values, + @JsonProperty("values") Set values, @JsonProperty("extractionFn") @Nullable ExtractionFn extractionFn, @JsonProperty("filterTuning") @Nullable FilterTuning filterTuning ) @@ -94,9 +95,9 @@ public InDimFilter( Preconditions.checkArgument(values != null, "values can not be null"); // The values set can be huge. Try to avoid copying the set if possible. - if (values instanceof Set - && (NullHandling.sqlCompatible() || values.stream().noneMatch(NullHandling::needsEmptyToNull))) { - this.values = (Set) values; + // Note that we may still need to copy values to a list for caching. See getCacheKey(). + if ((NullHandling.sqlCompatible() || values.stream().noneMatch(NullHandling::needsEmptyToNull))) { + this.values = values; } else { this.values = values.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toSet()); } @@ -111,7 +112,7 @@ public InDimFilter( @VisibleForTesting public InDimFilter(String dimension, Collection values, @Nullable ExtractionFn extractionFn) { - this(dimension, values, extractionFn, null); + this(dimension, new HashSet<>(values), extractionFn, null); } @JsonProperty @@ -193,7 +194,7 @@ private InDimFilter optimizeLookup() LookupExtractionFn exFn = (LookupExtractionFn) extractionFn; LookupExtractor lookup = exFn.getLookup(); - final List keys = new ArrayList<>(); + final Set keys = new HashSet<>(); for (String value : values) { // We cannot do an unapply()-based optimization if the selector value diff --git a/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java index 91ec334fc1e3..d27f4136ac02 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java @@ -96,7 +96,7 @@ public byte[] getCacheKey() @Override public DimFilter optimize() { - return new InDimFilter(dimension, Collections.singletonList(value), extractionFn, filterTuning).optimize(); + return new InDimFilter(dimension, Collections.singleton(value), extractionFn, filterTuning).optimize(); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/filter/TrueDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/TrueDimFilter.java index 2e7bd2a72a07..af297103aa71 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/TrueDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/TrueDimFilter.java @@ -30,6 +30,7 @@ public class TrueDimFilter implements DimFilter { private static final TrueDimFilter INSTANCE = new TrueDimFilter(); + private static final byte[] CACHE_KEY = new CacheKeyBuilder(DimFilterUtils.TRUE_CACHE_ID).build(); @JsonCreator public static TrueDimFilter instance() @@ -44,7 +45,7 @@ private TrueDimFilter() @Override public byte[] getCacheKey() { - return new CacheKeyBuilder(DimFilterUtils.TRUE_CACHE_ID).build(); + return CACHE_KEY; } @Override @@ -80,7 +81,7 @@ public int hashCode() @Override public boolean equals(Object o) { - return o == this; + return o != null && o.getClass() == this.getClass(); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/topn/TopNQueryBuilder.java b/processing/src/main/java/org/apache/druid/query/topn/TopNQueryBuilder.java index d50c5657686a..c6715df88171 100644 --- a/processing/src/main/java/org/apache/druid/query/topn/TopNQueryBuilder.java +++ b/processing/src/main/java/org/apache/druid/query/topn/TopNQueryBuilder.java @@ -19,7 +19,7 @@ package org.apache.druid.query.topn; -import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.query.DataSource; @@ -42,6 +42,7 @@ import java.util.Collection; import java.util.List; import java.util.Map; +import java.util.Set; /** * A Builder for TopNQuery. @@ -230,7 +231,9 @@ public TopNQueryBuilder filters(String dimensionName, String value) public TopNQueryBuilder filters(String dimensionName, String value, String... values) { - dimFilter = new InDimFilter(dimensionName, Lists.asList(value, values), null, null); + final Set filterValues = Sets.newHashSet(values); + filterValues.add(value); + dimFilter = new InDimFilter(dimensionName, filterValues, null, null); return this; } diff --git a/processing/src/test/java/org/apache/druid/query/filter/AndDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/AndDimFilterTest.java index 8b919820bc82..e45ed299464c 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/AndDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/AndDimFilterTest.java @@ -82,4 +82,22 @@ public void testOptimizeOringTrueFilters() DimFilter filter = DimFilterTestUtils.and(TrueDimFilter.instance(), TrueDimFilter.instance()); Assert.assertSame(TrueDimFilter.instance(), filter.optimize()); } + + @Test + public void testOptimizeAndOfSingleFilterUnwrapAnd() + { + DimFilter expected = DimFilterTestUtils.selector("col1", "1"); + DimFilter filter = DimFilterTestUtils.and(expected); + Assert.assertEquals(expected, filter.optimize()); + } + + @Test + public void testOptimizeAndOfMultipleFiltersReturningAsItIs() + { + DimFilter filter = DimFilterTestUtils.and( + DimFilterTestUtils.selector("col1", "1"), + DimFilterTestUtils.selector("col1", "2") + ); + Assert.assertEquals(filter, filter.optimize()); + } } diff --git a/processing/src/test/java/org/apache/druid/query/filter/FalseDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/FalseDimFilterTest.java index 97d856d22e7f..a512bcac7389 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/FalseDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/FalseDimFilterTest.java @@ -20,6 +20,7 @@ package org.apache.druid.query.filter; import com.fasterxml.jackson.databind.ObjectMapper; +import nl.jqno.equalsverifier.EqualsVerifier; import org.apache.druid.jackson.DefaultObjectMapper; import org.junit.Assert; import org.junit.Test; @@ -37,4 +38,10 @@ public void testSerde() throws IOException final FalseDimFilter fromBytes = (FalseDimFilter) mapper.readValue(bytes, DimFilter.class); Assert.assertSame(original, fromBytes); } + + @Test + public void testEquals() + { + EqualsVerifier.forClass(FalseDimFilter.class).usingGetClass().verify(); + } } diff --git a/processing/src/test/java/org/apache/druid/query/filter/OrDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/OrDimFilterTest.java index c1584a6f3d81..fe080dbf6503 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/OrDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/OrDimFilterTest.java @@ -67,4 +67,22 @@ public void testOptimizeOringFalseFilters() DimFilter filter = DimFilterTestUtils.or(FalseDimFilter.instance(), FalseDimFilter.instance()); Assert.assertSame(FalseDimFilter.instance(), filter.optimize()); } + + @Test + public void testOptimizeOrOfSingleFilterUnwrapOr() + { + DimFilter expected = DimFilterTestUtils.selector("col1", "1"); + DimFilter filter = DimFilterTestUtils.or(expected); + Assert.assertEquals(expected, filter.optimize()); + } + + @Test + public void testOptimizeOrOfMultipleFiltersReturningAsItIs() + { + DimFilter filter = DimFilterTestUtils.or( + DimFilterTestUtils.selector("col1", "1"), + DimFilterTestUtils.selector("col1", "2") + ); + Assert.assertEquals(filter, filter.optimize()); + } } diff --git a/processing/src/test/java/org/apache/druid/query/filter/TrueDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/TrueDimFilterTest.java index f80701cfa20a..3c8838278ef2 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/TrueDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/TrueDimFilterTest.java @@ -20,6 +20,7 @@ package org.apache.druid.query.filter; import com.fasterxml.jackson.databind.ObjectMapper; +import nl.jqno.equalsverifier.EqualsVerifier; import org.apache.druid.jackson.DefaultObjectMapper; import org.junit.Assert; import org.junit.Test; @@ -37,4 +38,10 @@ public void testSerde() throws IOException final TrueDimFilter fromBytes = (TrueDimFilter) mapper.readValue(bytes, DimFilter.class); Assert.assertSame(original, fromBytes); } + + @Test + public void testEquals() + { + EqualsVerifier.forClass(TrueDimFilter.class).usingGetClass().verify(); + } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java index 83da05716c3f..54b2625988b1 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java @@ -20,6 +20,7 @@ package org.apache.druid.sql.calcite.filtration; import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import org.apache.druid.java.util.common.ISE; import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.InDimFilter; @@ -33,6 +34,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; public class ConvertSelectorsToIns extends BottomUpTransform { @@ -78,7 +80,7 @@ public DimFilter process(DimFilter filter) final List filterList = entry.getValue(); if (filterList.size() > 1) { // We found a simplification. Remove the old filters and add new ones. - final List values = new ArrayList<>(); + final Set values = Sets.newHashSetWithExpectedSize(filterList.size()); for (final SelectorDimFilter selector : filterList) { values.add(selector.getValue()); From ef4f62f289185c91829b6771908f94b49b1e5a2d Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Wed, 6 May 2020 13:18:50 -0700 Subject: [PATCH 15/16] typo --- processing/src/main/java/org/apache/druid/query/Druids.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/query/Druids.java b/processing/src/main/java/org/apache/druid/query/Druids.java index a71331099938..522f26b36899 100644 --- a/processing/src/main/java/org/apache/druid/query/Druids.java +++ b/processing/src/main/java/org/apache/druid/query/Druids.java @@ -203,7 +203,7 @@ public TimeseriesQueryBuilder filters(String dimensionName, String value) public TimeseriesQueryBuilder filters(String dimensionName, String value, String... values) { - final Set filterValues = Sets.newHashSet(value); + final Set filterValues = Sets.newHashSet(values); filterValues.add(value); dimFilter = new InDimFilter(dimensionName, filterValues, null, null); return this; From e2dc504e1e8506d491446e0149854b30cc5a42aa Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Wed, 6 May 2020 14:06:08 -0700 Subject: [PATCH 16/16] tests --- .../main/java/org/apache/druid/query/filter/InDimFilter.java | 3 +++ .../java/org/apache/druid/query/filter/InDimFilterTest.java | 4 ++-- .../expression/builtin/ArrayOverlapOperatorConversion.java | 3 ++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java index a82f71a52e85..d5ec5588ba38 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java @@ -109,6 +109,9 @@ public InDimFilter( this.doublePredicateSupplier = getDoublePredicateSupplier(); } + /** + * This constructor should be called only in unit tests since it creates a new hash set wrapping the given values. + */ @VisibleForTesting public InDimFilter(String dimension, Collection values, @Nullable ExtractionFn extractionFn) { diff --git a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java index 105247151a3e..9cf5b8f67bdb 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java @@ -62,7 +62,7 @@ public void testSerialization() throws IOException public void testGetValuesWithValuesSetOfNonEmptyStringsUseTheGivenSet() { final Set values = ImmutableSet.of("v1", "v2", "v3"); - final InDimFilter filter = new InDimFilter("dim", values, null); + final InDimFilter filter = new InDimFilter("dim", values, null, null); Assert.assertSame(values, filter.getValues()); } @@ -70,7 +70,7 @@ public void testGetValuesWithValuesSetOfNonEmptyStringsUseTheGivenSet() public void testGetValuesWithValuesSetIncludingEmptyString() { final Set values = Sets.newHashSet("v1", "", "v3"); - final InDimFilter filter = new InDimFilter("dim", values, null); + final InDimFilter filter = new InDimFilter("dim", values, null, null); if (NullHandling.replaceWithDefault()) { Assert.assertNotSame(values, filter.getValues()); Assert.assertEquals(Sets.newHashSet("v1", null, "v3"), filter.getValues()); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java index 175670ec8292..cb1ddf526a56 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java @@ -123,7 +123,8 @@ public DimFilter toDruidFilter( return new InDimFilter( simpleExtractionExpr.getSimpleExtraction().getColumn(), Sets.newHashSet(arrayElements), - simpleExtractionExpr.getSimpleExtraction().getExtractionFn() + simpleExtractionExpr.getSimpleExtraction().getExtractionFn(), + null ); } }