From a3f708454a1f8ff93bb54c6f01b77505e3b0904c Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 4 Mar 2024 21:32:29 -0800 Subject: [PATCH 01/15] add new typed in filter changes: * adds TypedInFilter which preserves matching sets in the native match value type * SQL planner uses new TypedInFilter when druid.generic.useDefaultValueForNull=false (the default) --- .../druid/benchmark/InFilterBenchmark.java | 43 +- .../druid/benchmark/query/SqlBenchmark.java | 2 +- .../query/SqlNestedDataBenchmark.java | 66 +- extensions-core/druid-ranger-security/pom.xml | 1 + .../apache/druid/query/filter/DimFilter.java | 3 +- .../druid/query/filter/DimFilterUtils.java | 1 + .../druid/query/filter/TypedInFilter.java | 660 ++++++++++++++++++ .../index/IndexedUtf8ValueIndexes.java | 218 +++--- .../semantic/DictionaryEncodedValueIndex.java | 1 - .../index/semantic/StringValueSetIndexes.java | 8 +- .../index/semantic/Utf8ValueSetIndexes.java | 4 +- .../index/semantic/ValueSetIndexes.java | 273 ++++++++ .../ScalarDoubleColumnAndIndexSupplier.java | 71 ++ .../ScalarLongColumnAndIndexSupplier.java | 71 ++ .../serde/StringUtf8ColumnIndexSupplier.java | 4 +- .../druid/segment/filter/InFilterTest.java | 2 +- .../segment/filter/TypedInFilterTests.java | 543 ++++++++++++++ .../ArrayOverlapOperatorConversion.java | 34 +- .../filtration/CollectComparisons.java | 12 +- .../filtration/ConvertSelectorsToIns.java | 41 +- .../planner/SqlParameterizerShuttle.java | 2 + .../sql/calcite/rule/ReverseLookupRule.java | 8 +- .../sql/calcite/BaseCalciteQueryTest.java | 23 +- .../sql/calcite/CalciteArraysQueryTest.java | 55 +- .../sql/calcite/CalciteJoinQueryTest.java | 66 +- .../CalciteLookupFunctionQueryTest.java | 60 +- .../CalciteMultiValueStringQueryTest.java | 9 +- .../calcite/CalciteNestedDataQueryTest.java | 19 +- .../calcite/CalciteParameterQueryTest.java | 7 +- .../druid/sql/calcite/CalciteQueryTest.java | 80 +-- .../sql/calcite/CalciteSelectQueryTest.java | 2 +- .../sql/calcite/CalciteSubqueryTest.java | 10 +- .../sql/calcite/CalciteUnionQueryTest.java | 10 +- 33 files changed, 2018 insertions(+), 391 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java create mode 100644 processing/src/main/java/org/apache/druid/segment/index/semantic/ValueSetIndexes.java create mode 100644 processing/src/test/java/org/apache/druid/segment/filter/TypedInFilterTests.java diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/InFilterBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/InFilterBenchmark.java index 7bdcaf1c6abd..821209f6d8a4 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/InFilterBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/InFilterBenchmark.java @@ -28,6 +28,8 @@ import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.filter.ColumnIndexSelector; import org.apache.druid.query.filter.InDimFilter; +import org.apache.druid.query.filter.TypedInFilter; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; @@ -53,8 +55,8 @@ @State(Scope.Benchmark) @Fork(value = 1) -@Warmup(iterations = 10) -@Measurement(iterations = 10) +@Warmup(iterations = 2) +@Measurement(iterations = 3) public class InFilterBenchmark { static { @@ -65,6 +67,8 @@ public class InFilterBenchmark private InDimFilter inFilter; private InDimFilter endInDimFilter; + private TypedInFilter newInFilter; + private TypedInFilter newEndInFilter; // cardinality of the dictionary. it will contain this many ints (as strings, of course), starting at START_INT, // even numbers only. @@ -110,12 +114,29 @@ public void setup() "dummy", IntStream.range(START_INT, START_INT + filterSize).mapToObj(String::valueOf).collect(Collectors.toSet()) ); + newInFilter = (TypedInFilter) new TypedInFilter( + "dummy", + ColumnType.STRING, + IntStream.range(START_INT, START_INT + filterSize).mapToObj(String::valueOf).collect(Collectors.toList()), + null, + null + ).toFilter(); endInDimFilter = new InDimFilter( "dummy", IntStream.range(START_INT + dictionarySize * 2, START_INT + dictionarySize * 2 + 1) .mapToObj(String::valueOf) .collect(Collectors.toSet()) ); + + newEndInFilter = (TypedInFilter) new TypedInFilter( + "dummy", + ColumnType.STRING, + IntStream.range(START_INT + dictionarySize * 2, START_INT + dictionarySize * 2 + 1) + .mapToObj(String::valueOf) + .collect(Collectors.toList()), + null, + null + ).toFilter(); } @Benchmark @@ -136,6 +157,24 @@ public void doFilterAtEnd(Blackhole blackhole) blackhole.consume(bitmapIndex); } + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void doFilter2(Blackhole blackhole) + { + final ImmutableBitmap bitmapIndex = Filters.computeDefaultBitmapResults(newInFilter, selector); + blackhole.consume(bitmapIndex); + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void doFilterAtEnd2(Blackhole blackhole) + { + final ImmutableBitmap bitmapIndex = Filters.computeDefaultBitmapResults(newEndInFilter, selector); + blackhole.consume(bitmapIndex); + } + private Iterable intGenerator() { // i * 2 => half of these values will be present in the inFilter, half won't. diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java index 60300d88d0cc..71fde96c887e 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java @@ -770,4 +770,4 @@ public void planSql(Blackhole blackhole) blackhole.consume(plannerResult); } } -} +} \ No newline at end of file diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java index 1628babb97c4..69a38434f5e2 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java @@ -19,6 +19,9 @@ package org.apache.druid.benchmark.query; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonMappingException; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; @@ -28,6 +31,8 @@ import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.java.util.common.guava.Yielder; +import org.apache.druid.java.util.common.guava.Yielders; import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.math.expr.ExpressionProcessing; @@ -198,7 +203,17 @@ public String getFormatString() "SELECT SUM(long1) FROM foo WHERE string5 LIKE '%1%' AND string1 = '1000'", "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%' AND JSON_VALUE(nested, '$.nesteder.string1') = '1000'", "SELECT SUM(long1) FROM foo WHERE string1 = '1000' AND string5 LIKE '%1%'", - "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string1') = '1000' AND JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%'" + "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string1') = '1000' AND JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%'", + //48,49 bigger in + "SELECT long2 FROM foo WHERE long2 IN (1, 19, 21, 23, 25, 26, 46, 50, 51, 55, 60, 61, 66, 68, 69, 70, 77, 88, 90, 92, 93, 94, 95, 100, 101, 102, 104, 109, 111, 113, 114, 115, 120, 121, 122, 134, 135, 136, 140, 142, 150, 155, 170, 172, 173, 174, 180, 181, 190, 199, 200, 201, 202, 203, 204)", + "SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) IN (1, 19, 21, 23, 25, 26, 46, 50, 51, 55, 60, 61, 66, 68, 69, 70, 77, 88, 90, 92, 93, 94, 95, 100, 101, 102, 104, 109, 111, 113, 114, 115, 120, 121, 122, 134, 135, 136, 140, 142, 150, 155, 170, 172, 173, 174, 180, 181, 190, 199, 200, 201, 202, 203, 204)", + //50, 51 bigger in group + "SELECT long2 FROM foo WHERE long2 IN (1, 19, 21, 23, 25, 26, 46, 50, 51, 55, 60, 61, 66, 68, 69, 70, 77, 88, 90, 92, 93, 94, 95, 100, 101, 102, 104, 109, 111, 113, 114, 115, 120, 121, 122, 134, 135, 136, 140, 142, 150, 155, 170, 172, 173, 174, 180, 181, 190, 199, 200, 201, 202, 203, 204) GROUP BY 1", + "SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) IN (1, 19, 21, 23, 25, 26, 46, 50, 51, 55, 60, 61, 66, 68, 69, 70, 77, 88, 90, 92, 93, 94, 95, 100, 101, 102, 104, 109, 111, 113, 114, 115, 120, 121, 122, 134, 135, 136, 140, 142, 150, 155, 170, 172, 173, 174, 180, 181, 190, 199, 200, 201, 202, 203, 204) GROUP BY 1", + "SELECT long2 FROM foo WHERE double3 IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0)", + "SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0)", + "SELECT long2 FROM foo WHERE double3 IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0) GROUP BY 1", + "SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0) GROUP BY 1" ); @Param({"5000000"}) @@ -271,7 +286,15 @@ public String getFormatString() "44", "45", "46", - "47" + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55" }) private String query; @@ -386,8 +409,41 @@ public void setup() QUERIES.get(Integer.parseInt(query)) ); } - catch (Throwable ignored) { - // the show must go on + catch (Throwable ex) { +// log.warn(ex, "failed to sanity check"); + } + + final String sql = QUERIES.get(Integer.parseInt(query)); + final ObjectMapper jsonMapper = CalciteTests.getJsonMapper(); + try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, "EXPLAIN PLAN FOR " + sql, ImmutableMap.of("useNativeQueryExplain", true))) { + final PlannerResult plannerResult = planner.plan(); + final Sequence resultSequence = plannerResult.run().getResults(); + final Object[] planResult = resultSequence.toList().get(0); + log.info("Native query plan:\n" + + jsonMapper.writerWithDefaultPrettyPrinter() + .writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class)) + ); + } + catch (JsonMappingException e) { + throw new RuntimeException(e); + } + catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + + try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) { + final PlannerResult plannerResult = planner.plan(); + final Sequence resultSequence = plannerResult.run().getResults(); + final Yielder yielder = Yielders.each(resultSequence); + int rowCounter = 0; + while (!yielder.isDone()) { + rowCounter++; + yielder.next(yielder.get()); + } + log.info("Total result row count:" + rowCounter); + } + catch (Throwable ex) { +// log.warn(ex, "failed to count rows"); } } @@ -414,4 +470,4 @@ public void querySql(Blackhole blackhole) blackhole.consume(lastRow); } } -} +} \ No newline at end of file diff --git a/extensions-core/druid-ranger-security/pom.xml b/extensions-core/druid-ranger-security/pom.xml index 9680e885dc99..9d52cc2ac193 100644 --- a/extensions-core/druid-ranger-security/pom.xml +++ b/extensions-core/druid-ranger-security/pom.xml @@ -45,6 +45,7 @@ com.amazonaws aws-java-sdk-bundle ${aws.sdk.version} + provided diff --git a/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java index e78bc9870290..99ad72f0ee93 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java @@ -55,7 +55,8 @@ @JsonSubTypes.Type(name = "range", value = RangeFilter.class), @JsonSubTypes.Type(name = "isfalse", value = IsFalseDimFilter.class), @JsonSubTypes.Type(name = "istrue", value = IsTrueDimFilter.class), - @JsonSubTypes.Type(name = "arrayContainsElement", value = ArrayContainsElementFilter.class) + @JsonSubTypes.Type(name = "arrayContainsElement", value = ArrayContainsElementFilter.class), + @JsonSubTypes.Type(name = "inType", value = TypedInFilter.class) }) public interface DimFilter extends Cacheable { diff --git a/processing/src/main/java/org/apache/druid/query/filter/DimFilterUtils.java b/processing/src/main/java/org/apache/druid/query/filter/DimFilterUtils.java index b60ac9572cac..af61cf1efa82 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/DimFilterUtils.java +++ b/processing/src/main/java/org/apache/druid/query/filter/DimFilterUtils.java @@ -62,6 +62,7 @@ public class DimFilterUtils static final byte RANGE_CACHE_ID = 0x14; static final byte IS_FILTER_BOOLEAN_FILTER_CACHE_ID = 0x15; static final byte ARRAY_CONTAINS_CACHE_ID = 0x16; + static final byte NEW_IN_CACHE_ID = 0x17; public static final byte STRING_SEPARATOR = (byte) 0xFF; diff --git a/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java new file mode 100644 index 000000000000..e8cf0aa0b360 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java @@ -0,0 +1,660 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.base.Supplier; +import com.google.common.base.Suppliers; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; +import com.google.common.collect.Range; +import com.google.common.collect.RangeSet; +import com.google.common.collect.Sets; +import com.google.common.collect.TreeRangeSet; +import com.google.common.hash.Hasher; +import com.google.common.hash.Hashing; +import it.unimi.dsi.fastutil.doubles.DoubleOpenHashSet; +import it.unimi.dsi.fastutil.floats.FloatOpenHashSet; +import it.unimi.dsi.fastutil.longs.LongOpenHashSet; +import it.unimi.dsi.fastutil.objects.ObjectAVLTreeSet; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.error.InvalidInput; +import org.apache.druid.java.util.common.ByteBufferUtils; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.math.expr.Evals; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcherColumnProcessorFactory; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.ColumnProcessors; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.column.ColumnIndexSupplier; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.NullableTypeStrategy; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.filter.Filters; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.semantic.Utf8ValueSetIndexes; +import org.apache.druid.segment.index.semantic.ValueSetIndexes; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.SortedSet; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class TypedInFilter extends AbstractOptimizableDimFilter implements Filter +{ + private final String column; + private final ColumnType matchValueType; + @Nullable + private final List unsortedValues; + private final Supplier> lazyMatchValues; + @Nullable + private final Supplier> lazyMatchValueBytes; + @Nullable + private final FilterTuning filterTuning; + private final Supplier predicateFactorySupplier; + + @JsonIgnore + private final Supplier cacheKeySupplier; + + /** + * Creates a new filter. + * + * @param column column to search + * @param values set of values to match. This collection may be reused to avoid copying a big collection. + * Therefore, callers should not modify the collection after it is passed to this + * constructor. + * @param matchValueType type of values contained in set + * @param filterTuning optional tuning + */ + @JsonCreator + public TypedInFilter( + @JsonProperty("column") String column, + @JsonProperty("matchValueType") ColumnType matchValueType, + @JsonProperty("values") @Nullable List values, + @JsonProperty("sortedValues") @Nullable List sortedValues, + @JsonProperty("filterTuning") @Nullable FilterTuning filterTuning + ) + { + this.column = column; + if (column == null) { + throw InvalidInput.exception("Invalid IN filter, column cannot be null"); + } + this.filterTuning = filterTuning; + this.matchValueType = matchValueType; + if (matchValueType == null) { + throw InvalidInput.exception("Invalid IN filter on column [%s], matchValueType cannot be null", column); + } + // one of sorted or not sorted + if (values == null && sortedValues == null) { + throw InvalidInput.exception( + "Invalid IN filter on column [%s], exactly one of values or sortedValues must be non-null", + column + ); + } + if (sortedValues != null) { + this.unsortedValues = null; + // jackson is a jerk and turns longs into ints sometimes... + this.lazyMatchValues = () -> sortedValues; + } else { + this.unsortedValues = values; + this.lazyMatchValues = Suppliers.memoize(() -> sortValues(unsortedValues, matchValueType)); + } + if (matchValueType.is(ValueType.STRING)) { + this.lazyMatchValueBytes = Suppliers.memoize(() -> { + final SortedSet matchValueBytes = new ObjectAVLTreeSet<>(ByteBufferUtils.utf8Comparator()); + for (Object s : lazyMatchValues.get()) { + matchValueBytes.add(StringUtils.toUtf8ByteBuffer(Evals.asString(s))); + } + return matchValueBytes; + }); + } else { + this.lazyMatchValueBytes = null; + } + + this.predicateFactorySupplier = Suppliers.memoize( + () -> new InFilterDruidPredicateFactory(lazyMatchValues.get(), matchValueType) + ); + this.cacheKeySupplier = Suppliers.memoize(this::computeCacheKey); + } + + @JsonProperty + public String getColumn() + { + return column; + } + + @JsonProperty + public List getSortedValues() + { + return lazyMatchValues.get(); + } + + @JsonProperty + public ColumnType getMatchValueType() + { + return matchValueType; + } + + @Nullable + @JsonInclude(JsonInclude.Include.NON_NULL) + @JsonProperty + public FilterTuning getFilterTuning() + { + return filterTuning; + } + + @Override + public byte[] getCacheKey() + { + return cacheKeySupplier.get(); + } + + @Override + public DimFilter optimize(final boolean mayIncludeUnknown) + { + final List matchValues = lazyMatchValues.get(); + if (matchValues.isEmpty()) { + return FalseDimFilter.instance(); + } else if (matchValues.size() == 1) { + if (matchValues.get(0) == null) { + return NullFilter.forColumn(column); + } + return new EqualityFilter( + column, + matchValueType, + matchValues.iterator().next(), + filterTuning + ); + } + return this; + } + + @Override + public Filter toFilter() + { + return this; + } + + @Nullable + @Override + public RangeSet getDimensionRangeSet(String dimension) + { + if (!Objects.equals(getColumn(), dimension)) { + return null; + } + RangeSet retSet = TreeRangeSet.create(); + for (Object value : lazyMatchValues.get()) { + String valueEquivalent = NullHandling.nullToEmptyIfNeeded(Evals.asString(value)); + if (valueEquivalent == null) { + // Case when SQL compatible null handling is enabled + // Range.singleton(null) is invalid, so use the fact that + // only null values are less than empty string. + retSet.add(Range.lessThan("")); + } else { + retSet.add(Range.singleton(valueEquivalent)); + } + } + return retSet; + } + + @Override + public Set getRequiredColumns() + { + return ImmutableSet.of(column); + } + + @Override + @Nullable + public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) + { + if (!Filters.checkFilterTuningUseIndex(column, selector, filterTuning)) { + return null; + } + final ColumnIndexSupplier indexSupplier = selector.getIndexSupplier(column); + + if (indexSupplier == null) { + // column doesn't exist, match against null + DruidPredicateMatch match = predicateFactorySupplier.get().makeStringPredicate().apply(null); + return Filters.makeMissingColumnNullIndex(match, selector); + } + + if (lazyMatchValueBytes != null) { + final Utf8ValueSetIndexes utf8ValueSetIndexes = indexSupplier.as(Utf8ValueSetIndexes.class); + if (utf8ValueSetIndexes != null) { + return utf8ValueSetIndexes.forSortedValuesUtf8(lazyMatchValueBytes.get()); + } + } + + final ValueSetIndexes valueSetIndexes = indexSupplier.as(ValueSetIndexes.class); + if (valueSetIndexes != null) { + return valueSetIndexes.forSortedValues(lazyMatchValues.get(), matchValueType); + } + + return Filters.makePredicateIndex( + column, + selector, + predicateFactorySupplier.get() + ); + } + + @Override + public ValueMatcher makeMatcher(ColumnSelectorFactory factory) + { + return Filters.makeValueMatcher(factory, column, predicateFactorySupplier.get()); + } + + @Override + public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) + { + return ColumnProcessors.makeVectorProcessor( + column, + VectorValueMatcherColumnProcessorFactory.instance(), + factory + ).makeMatcher(predicateFactorySupplier.get()); + } + + @Override + public boolean canVectorizeMatcher(ColumnInspector inspector) + { + return true; + } + + @Override + public boolean supportsRequiredColumnRewrite() + { + return true; + } + + @Override + public Filter rewriteRequiredColumns(Map columnRewrites) + { + String rewriteDimensionTo = columnRewrites.get(column); + if (rewriteDimensionTo == null) { + throw new IAE("Received a non-applicable rewrite: %s, filter's dimension: %s", columnRewrites, column); + } + + if (rewriteDimensionTo.equals(column)) { + return this; + } else { + return new TypedInFilter( + rewriteDimensionTo, + matchValueType, + null, + lazyMatchValues.get(), + filterTuning + ); + } + } + + @Override + public String toString() + { + final DimFilter.DimFilterToStringBuilder builder = new DimFilter.DimFilterToStringBuilder(); + return builder.appendDimension(column, null) + .append(" IN (") + .append(Joiner.on(", ").join(Iterables.transform(lazyMatchValues.get(), String::valueOf))) + .append(")") + .append(" (" + matchValueType + ")") + .appendFilterTuning(filterTuning) + .build(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + TypedInFilter that = (TypedInFilter) o; + return column.equals(that.column) && + Objects.equals(matchValueType, that.matchValueType) && + compareValues(lazyMatchValues.get(), that.lazyMatchValues.get(), matchValueType) && + Objects.equals(filterTuning, that.filterTuning); + } + + @Override + public int hashCode() + { + return Objects.hash(lazyMatchValues.get(), column, matchValueType, filterTuning); + } + + private byte[] computeCacheKey() + { + // Hash all values, in sorted order, as their length followed by their content. + final Hasher hasher = Hashing.sha256().newHasher(); + for (Object v : lazyMatchValues.get()) { + if (v == null) { + // Encode null as length -1, no content. + hasher.putInt(-1); + } else { + final String s = Evals.asString(v); + hasher.putInt(s.length()); + hasher.putString(s, StandardCharsets.UTF_8); + } + } + + return new CacheKeyBuilder(DimFilterUtils.NEW_IN_CACHE_ID) + .appendString(column) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendString(matchValueType.asTypeString()) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendByteArray(hasher.hash().asBytes()) + .build(); + } + + private static List sortValues(List unsortedValues, ColumnType matchValueType) + { + final Stream coerced; + if (matchValueType.is(ValueType.LONG)) { + coerced = unsortedValues.stream().map(DimensionHandlerUtils::convertObjectToLong).distinct(); + } else if (matchValueType.is(ValueType.DOUBLE)) { + coerced = unsortedValues.stream().map(DimensionHandlerUtils::convertObjectToDouble).distinct(); + } else if (matchValueType.is(ValueType.FLOAT)) { + coerced = unsortedValues.stream().map(DimensionHandlerUtils::convertObjectToFloat).distinct(); + } else { + coerced = unsortedValues.stream().distinct(); + } + return coerced.sorted(matchValueType.getNullableStrategy()).collect(Collectors.toList()); + } + + /** + * Since jackson might translate longs into ints and such, we use the type comparator to check lists + * for {@link #equals(Object)} instead of directly using {@link Objects#equals(Object, Object)} + */ + private static boolean compareValues(List o1, List o2, ColumnType matchValueType) + { + final NullableTypeStrategy comparator = matchValueType.getNullableStrategy(); + //noinspection ObjectEquality + if (o1 == o2) { + return true; + } + if (o1 == null) { + return false; + } + if (o2 == null) { + return false; + } + final int iter = Math.min(o1.size(), o2.size()); + for (int i = 0; i < iter; i++) { + final int cmp = comparator.compare(o1.get(i), o2.get(i)); + if (cmp == 0) { + continue; + } + return false; + } + return o1.size() == o2.size(); + } + + private static DruidObjectPredicate createStringPredicate( + final List sortedValues, + final ColumnType matchValueType + ) + { + Preconditions.checkNotNull(sortedValues, "values"); + final boolean containsNull = sortedValues.get(0) == null; + final Comparator comparator = matchValueType.getNullableStrategy(); + if (matchValueType.is(ValueType.STRING)) { + return value -> { + if (value == null) { + return containsNull ? DruidPredicateMatch.TRUE : DruidPredicateMatch.UNKNOWN; + } + final int index = Collections.binarySearch(sortedValues, value, comparator); + return DruidPredicateMatch.of(index >= 0); + }; + } + // convert set to strings + final Set stringSet = Sets.newHashSetWithExpectedSize(sortedValues.size()); + for (Object o : sortedValues) { + stringSet.add(Evals.asString(o)); + } + return value -> { + if (value == null) { + return containsNull ? DruidPredicateMatch.TRUE : DruidPredicateMatch.UNKNOWN; + } + return DruidPredicateMatch.of(stringSet.contains(value)); + }; + } + + private static DruidLongPredicate createLongPredicate(final List sortedValues, ColumnType matchValueType) + { + boolean matchNulls = sortedValues.get(0) == null; + if (matchValueType.is(ValueType.LONG)) { + final Comparator comparator = matchValueType.getNullableStrategy(); + return new DruidLongPredicate() + { + @Override + public DruidPredicateMatch applyLong(long input) + { + final int index = Collections.binarySearch(sortedValues, input, comparator); + return DruidPredicateMatch.of(index >= 0); + } + + @Override + public DruidPredicateMatch applyNull() + { + return matchNulls ? DruidPredicateMatch.TRUE : DruidPredicateMatch.UNKNOWN; + } + }; + } + // convert set to longs + LongOpenHashSet longs = new LongOpenHashSet(); + for (Object value : sortedValues) { + final Long longValue = DimensionHandlerUtils.convertObjectToLong(value); + if (longValue != null) { + longs.add(longValue.longValue()); + } + } + return new DruidLongPredicate() + { + @Override + public DruidPredicateMatch applyLong(long input) + { + return DruidPredicateMatch.of(longs.contains(input)); + } + + @Override + public DruidPredicateMatch applyNull() + { + return matchNulls ? DruidPredicateMatch.TRUE : DruidPredicateMatch.UNKNOWN; + } + }; + } + + private static DruidFloatPredicate createFloatPredicate(final List sortedValues, ColumnType matchValueType) + { + boolean matchNulls = sortedValues.get(0) == null; + if (matchValueType.is(ValueType.FLOAT)) { + final Comparator comparator = matchValueType.getNullableStrategy(); + return new DruidFloatPredicate() + { + @Override + public DruidPredicateMatch applyFloat(float input) + { + final int index = Collections.binarySearch(sortedValues, input, comparator); + return DruidPredicateMatch.of(index >= 0); + } + + @Override + public DruidPredicateMatch applyNull() + { + return matchNulls ? DruidPredicateMatch.TRUE : DruidPredicateMatch.UNKNOWN; + } + }; + } + // convert set to floats + final FloatOpenHashSet floatSet = new FloatOpenHashSet(); + for (Object value : sortedValues) { + final Float floatValue = DimensionHandlerUtils.convertObjectToFloat(value); + if (floatValue != null) { + floatSet.add(floatValue.floatValue()); + } + } + return new DruidFloatPredicate() + { + @Override + public DruidPredicateMatch applyFloat(float input) + { + return DruidPredicateMatch.of(floatSet.contains(input)); + } + + @Override + public DruidPredicateMatch applyNull() + { + return matchNulls ? DruidPredicateMatch.TRUE : DruidPredicateMatch.UNKNOWN; + } + }; + } + + private static DruidDoublePredicate createDoublePredicate(final List sortedValues, ColumnType matchValueType) + { + boolean matchNulls = sortedValues.get(0) == null; + if (matchValueType.is(ValueType.DOUBLE)) { + final Comparator comparator = matchValueType.getNullableStrategy(); + return new DruidDoublePredicate() + { + @Override + public DruidPredicateMatch applyDouble(double input) + { + final int index = Collections.binarySearch(sortedValues, input, comparator); + return DruidPredicateMatch.of(index >= 0); + } + + @Override + public DruidPredicateMatch applyNull() + { + return matchNulls ? DruidPredicateMatch.TRUE : DruidPredicateMatch.UNKNOWN; + } + }; + } + + // convert set to doubles + final DoubleOpenHashSet doubleSet = new DoubleOpenHashSet(sortedValues.size()); + for (Object value : sortedValues) { + Double doubleValue = DimensionHandlerUtils.convertObjectToDouble(value); + if (doubleValue != null) { + doubleSet.add(doubleValue.doubleValue()); + } + } + return new DruidDoublePredicate() + { + @Override + public DruidPredicateMatch applyDouble(double input) + { + return DruidPredicateMatch.of(doubleSet.contains(input)); + } + + @Override + public DruidPredicateMatch applyNull() + { + return matchNulls ? DruidPredicateMatch.TRUE : DruidPredicateMatch.UNKNOWN; + } + }; + } + + public static class InFilterDruidPredicateFactory implements DruidPredicateFactory + { + private final ColumnType matchValueType; + private final List sortedValues; + private final Supplier> stringPredicateSupplier; + private final Supplier longPredicateSupplier; + private final Supplier floatPredicateSupplier; + private final Supplier doublePredicateSupplier; + + public InFilterDruidPredicateFactory(final List sortedValues, final ColumnType matchValueType) + { + this.sortedValues = sortedValues; + this.matchValueType = matchValueType; + + // As the set of filtered values can be large, parsing them as numbers should be done only if needed, and + // only once. Pass in a common long predicate supplier to all filters created by .toFilter(), so that we only + // compute the long hashset/array once per query. This supplier must be thread-safe, since this DimFilter will be + // accessed in the query runners. + this.stringPredicateSupplier = Suppliers.memoize(() -> createStringPredicate(sortedValues, matchValueType)); + this.longPredicateSupplier = Suppliers.memoize(() -> createLongPredicate(sortedValues, matchValueType)); + this.floatPredicateSupplier = Suppliers.memoize(() -> createFloatPredicate(sortedValues, matchValueType)); + this.doublePredicateSupplier = Suppliers.memoize(() -> createDoublePredicate(sortedValues, matchValueType)); + } + + @Override + public DruidObjectPredicate makeStringPredicate() + { + return stringPredicateSupplier.get(); + } + + @Override + public DruidLongPredicate makeLongPredicate() + { + return longPredicateSupplier.get(); + } + + @Override + public DruidFloatPredicate makeFloatPredicate() + { + return floatPredicateSupplier.get(); + } + + @Override + public DruidDoublePredicate makeDoublePredicate() + { + return doublePredicateSupplier.get(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + InFilterDruidPredicateFactory that = (InFilterDruidPredicateFactory) o; + return Objects.equals(matchValueType, that.matchValueType) && + Objects.equals(sortedValues, that.sortedValues); + } + + @Override + public int hashCode() + { + return Objects.hash(matchValueType, sortedValues); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java index 8ab99f05787f..c175640b99fa 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java +++ b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java @@ -20,10 +20,9 @@ package org.apache.druid.segment.index; import com.google.common.base.Preconditions; +import com.google.common.base.Supplier; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; -import com.google.common.collect.Iterators; -import com.google.common.collect.PeekingIterator; import org.apache.druid.annotations.SuppressFBWarnings; import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.collections.bitmap.ImmutableBitmap; @@ -33,23 +32,25 @@ import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.BitmapResultFactory; +import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.column.TypeSignature; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.index.semantic.Utf8ValueSetIndexes; import org.apache.druid.segment.index.semantic.ValueIndexes; +import org.apache.druid.segment.index.semantic.ValueSetIndexes; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.nio.ByteBuffer; +import java.util.Collections; import java.util.Comparator; -import java.util.Iterator; -import java.util.NoSuchElementException; +import java.util.List; import java.util.SortedSet; public final class IndexedUtf8ValueIndexes> - implements StringValueSetIndexes, Utf8ValueSetIndexes, ValueIndexes + implements StringValueSetIndexes, Utf8ValueSetIndexes, ValueIndexes, ValueSetIndexes { // This determines the cut-off point to switch the merging algorithm from doing binary-search per element in the value // set to doing a sorted merge algorithm between value set and dictionary. The ratio here represents the ratio b/w @@ -159,149 +160,100 @@ private ImmutableBitmap getBitmap(int idx) /** * Helper used by {@link #forSortedValues} and {@link #forSortedValuesUtf8}. */ - private BitmapColumnIndex getBitmapColumnIndexForSortedIterableUtf8(Iterable valuesUtf8, int size, boolean valuesContainsNull) + private BitmapColumnIndex getBitmapColumnIndexForSortedIterableUtf8( + Iterable valuesUtf8, + int size, + boolean valuesContainsNull + ) { // for large number of in-filter values in comparison to the dictionary size, use the sorted merge algorithm. if (size > SORTED_MERGE_RATIO_THRESHOLD * dictionary.size()) { - return new SimpleImmutableBitmapDelegatingIterableIndex() - { - @Override - public Iterable getBitmapIterable() - { - return () -> new Iterator() - { - final PeekingIterator valuesIterator = Iterators.peekingIterator(valuesUtf8.iterator()); - final PeekingIterator dictionaryIterator = Iterators.peekingIterator(dictionary.iterator()); - int next = -1; - int idx = 0; - - @Override - public boolean hasNext() - { - if (next < 0) { - findNext(); - } - return next >= 0; - } - - @Override - public ImmutableBitmap next() - { - if (next < 0) { - findNext(); - if (next < 0) { - throw new NoSuchElementException(); - } - } - final int swap = next; - next = -1; - return getBitmap(swap); - } - - private void findNext() - { - while (next < 0 && valuesIterator.hasNext() && dictionaryIterator.hasNext()) { - final ByteBuffer nextValue = valuesIterator.peek(); - final ByteBuffer nextDictionaryKey = dictionaryIterator.peek(); - final int comparison = COMPARATOR.compare(nextValue, nextDictionaryKey); - if (comparison == 0) { - next = idx; - valuesIterator.next(); - break; - } else if (comparison < 0) { - valuesIterator.next(); - } else { - dictionaryIterator.next(); - idx++; - } - } + return ValueSetIndexes.getIndexFromSortedIteratorSortedMerged( + bitmapFactory, + COMPARATOR, + valuesUtf8, + dictionary, + bitmaps, + () -> { + if (!valuesContainsNull && NullHandling.isNullOrEquivalent(dictionary.get(0))) { + return bitmaps.get(0); } - }; - } + return null; + } + ); + } - @Nullable - @Override - protected ImmutableBitmap getUnknownsBitmap() - { + // if the size of in-filter values is less than the threshold percentage of dictionary size, then use binary search + // based lookup per value. The algorithm works well for smaller number of values. + return ValueSetIndexes.getIndexFromSortedIterator( + bitmapFactory, + valuesUtf8, + dictionary, + bitmaps, + () -> { if (!valuesContainsNull && NullHandling.isNullOrEquivalent(dictionary.get(0))) { return bitmaps.get(0); } return null; } - }; - } - - // if the size of in-filter values is less than the threshold percentage of dictionary size, then use binary search - // based lookup per value. The algorithm works well for smaller number of values. - return getSimpleImmutableBitmapIterableIndexFromIterator(valuesUtf8, valuesContainsNull); + ); } - /** - * Iterates over the value set, using binary search to look up each element. The algorithm works well for smaller - * number of values, and must be used if the values are not sorted in the same manner as {@link #dictionary} - */ - private SimpleImmutableBitmapDelegatingIterableIndex getSimpleImmutableBitmapIterableIndexFromIterator(Iterable valuesUtf8, boolean valuesContainsNull) + @Nullable + @Override + public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSignature matchValueType) { - return new SimpleImmutableBitmapDelegatingIterableIndex() - { - @Override - public Iterable getBitmapIterable() - { - return () -> new Iterator() - { - final int dictionarySize = dictionary.size(); - final Iterator iterator = valuesUtf8.iterator(); - int next = -1; - - @Override - public boolean hasNext() - { - if (next < 0) { - findNext(); - } - return next >= 0; - } - - @Override - public ImmutableBitmap next() - { - if (next < 0) { - findNext(); - if (next < 0) { - throw new NoSuchElementException(); - } - } - final int swap = next; - next = -1; - return getBitmap(swap); - } - - private void findNext() - { - while (next < 0 && iterator.hasNext()) { - ByteBuffer nextValue = iterator.next(); - next = dictionary.indexOf(nextValue); - - if (next == -dictionarySize - 1) { - // nextValue is past the end of the dictionary so we can break early - // Note: we can rely on indexOf returning (-(insertion point) - 1), because of the earlier check - // for Indexed.isSorted(), which guarantees this behavior - break; - } - } - } - }; + final boolean matchNull = sortedValues.get(0) == null; + final Supplier unknownsIndex = () -> { + if (!matchNull && dictionary.get(0) == null) { + return bitmaps.get(0); } + return null; + }; + if (matchValueType.is(ValueType.STRING)) { + final List tailSet; + final List baseSet = (List) sortedValues; - @Nullable - @Override - protected ImmutableBitmap getUnknownsBitmap() - { - if (!valuesContainsNull && NullHandling.isNullOrEquivalent(dictionary.get(0))) { - return bitmaps.get(0); - } - return null; + if (sortedValues.size() >= ValueSetIndexes.SIZE_WORTH_CHECKING_MIN) { + final Object minValueInColumn = dictionary.get(0); + final int position = Collections.binarySearch( + sortedValues, + StringUtils.fromUtf8((ByteBuffer) minValueInColumn), + matchValueType.getNullableStrategy() + ); + tailSet = baseSet.subList(position >= 0 ? position : -(position + 1), baseSet.size()); + } else { + tailSet = baseSet; } - }; + if (tailSet.size() > ValueSetIndexes.SORTED_MERGE_RATIO_THRESHOLD * dictionary.size()) { + return ValueSetIndexes.getIndexFromSortedIteratorSortedMerged( + bitmapFactory, + ByteBufferUtils.utf8Comparator(), + Iterables.transform(tailSet, StringUtils::toUtf8ByteBuffer), + dictionary, + bitmaps, + unknownsIndex + ); + } + // fall through to value iteration + return ValueSetIndexes.getIndexFromSortedIterator( + bitmapFactory, + Iterables.transform(tailSet, StringUtils::toUtf8ByteBuffer), + dictionary, + bitmaps, + unknownsIndex + ); + } else { + return ValueSetIndexes.getIndexFromIterator( + bitmapFactory, + Iterables.transform( + sortedValues, + x -> StringUtils.toUtf8ByteBuffer(DimensionHandlerUtils.convertObjectToString(x)) + ), + dictionary, + bitmaps, + unknownsIndex + ); + } } } diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java index a99f91c2dc1d..b60115e4066a 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java @@ -54,6 +54,5 @@ public interface DictionaryEncodedValueIndex @Nullable T getValue(int index); - @SuppressWarnings({"unreachable", "unused"}) BitmapFactory getBitmapFactory(); } diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/StringValueSetIndexes.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/StringValueSetIndexes.java index acb4b6712716..f25a16b7b106 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/StringValueSetIndexes.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/StringValueSetIndexes.java @@ -31,14 +31,14 @@ public interface StringValueSetIndexes { /** - * Get the {@link ImmutableBitmap} corresponding to the supplied value. Generates an empty bitmap when passed a - * value that doesn't exist. Never returns null. + * Get the wrapped {@link ImmutableBitmap} corresponding to the supplied value. Generates an empty bitmap when + * passed a value that doesn't exist. Never returns null. */ BitmapColumnIndex forValue(@Nullable String value); /** - * Get an {@link Iterable} of {@link ImmutableBitmap} corresponding to the specified set of values (if they are - * contained in the underlying column). The set must be sorted using + * Get the wrapped {@link ImmutableBitmap} corresponding to the specified set of values (if they are contained in the + * underlying column). The set must be sorted using * {@link org.apache.druid.java.util.common.guava.Comparators#naturalNullsFirst()}. */ BitmapColumnIndex forSortedValues(SortedSet values); diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/Utf8ValueSetIndexes.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/Utf8ValueSetIndexes.java index 1bf2792e444d..e4cc9894ca8b 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/Utf8ValueSetIndexes.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/Utf8ValueSetIndexes.java @@ -28,8 +28,8 @@ public interface Utf8ValueSetIndexes { /** - * Get an {@link Iterable} of {@link ImmutableBitmap} corresponding to the specified set of values (if they are - * contained in the underlying column). The set must be sorted using + * Get the wrapped {@link ImmutableBitmap} corresponding to the specified set of values (if they are contained in the + * underlying column). The set must be sorted using * {@link org.apache.druid.java.util.common.ByteBufferUtils#utf8Comparator()}. */ BitmapColumnIndex forSortedValuesUtf8(SortedSet valuesUtf8); diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueSetIndexes.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueSetIndexes.java new file mode 100644 index 000000000000..ed6df2597a28 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueSetIndexes.java @@ -0,0 +1,273 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.index.semantic; + +import com.google.common.base.Supplier; +import com.google.common.collect.Iterators; +import com.google.common.collect.PeekingIterator; +import org.apache.druid.collections.bitmap.BitmapFactory; +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.segment.column.TypeSignature; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.SimpleImmutableBitmapDelegatingIterableIndex; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + +public interface ValueSetIndexes +{ + double SORTED_MERGE_RATIO_THRESHOLD = 0.12D; + int SIZE_WORTH_CHECKING_MIN = 8; + + /** + * Get the wrapped {@link ImmutableBitmap} corresponding to the specified set of values (if they are contained in the + * underlying column). The set must be sorted using the comparator of the supplied matchValueType. + * + * @param sortedValues values to match, sorted in matchValueType order + * @param matchValueType type of the value to match, used to assist conversion from the match value type to the column + * value type + * @return {@link ImmutableBitmap} corresponding to the rows which match the values, or null if an index + * connot be computed for the supplied value type + */ + @Nullable + BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSignature matchValueType); + + static BitmapColumnIndex getIndexFromSortedIteratorSortedMerged( + BitmapFactory bitmapFactory, + Comparator comparator, + Iterable values, + Indexed dictionary, + Indexed bitmaps, + Supplier unknownsBitmap + ) + { + return new BaseValueSetIndexesFromIterable(bitmapFactory, bitmaps, unknownsBitmap) + { + @Override + public Iterable getBitmapIterable() + { + return () -> new Iterator() + { + final PeekingIterator valuesIterator = Iterators.peekingIterator(values.iterator()); + final PeekingIterator dictionaryIterator = Iterators.peekingIterator(dictionary.iterator()); + int next = -1; + int idx = 0; + + @Override + public boolean hasNext() + { + if (next < 0) { + findNext(); + } + return next >= 0; + } + + @Override + public ImmutableBitmap next() + { + if (next < 0) { + findNext(); + if (next < 0) { + throw new NoSuchElementException(); + } + } + final int swap = next; + next = -1; + return getBitmap(swap); + } + + private void findNext() + { + while (next < 0 && valuesIterator.hasNext() && dictionaryIterator.hasNext()) { + final T nextValue = valuesIterator.peek(); + final T nextDictionaryKey = dictionaryIterator.peek(); + final int comparison = comparator.compare(nextValue, nextDictionaryKey); + if (comparison == 0) { + next = idx; + valuesIterator.next(); + break; + } else if (comparison < 0) { + valuesIterator.next(); + } else { + dictionaryIterator.next(); + idx++; + } + } + } + }; + } + }; + } + + static BitmapColumnIndex getIndexFromSortedIterator( + BitmapFactory bitmapFactory, + Iterable values, + Indexed dictionary, + Indexed bitmaps, + Supplier getUnknownsIndex + ) + { + return new BaseValueSetIndexesFromIterable(bitmapFactory, bitmaps, getUnknownsIndex) + { + @Override + public Iterable getBitmapIterable() + { + return () -> new Iterator() + { + final int dictionarySize = dictionary.size(); + final Iterator iterator = values.iterator(); + int next = -1; + + @Override + public boolean hasNext() + { + if (next < 0) { + findNext(); + } + return next >= 0; + } + + @Override + public ImmutableBitmap next() + { + if (next < 0) { + findNext(); + if (next < 0) { + throw new NoSuchElementException(); + } + } + final int swap = next; + next = -1; + return getBitmap(swap); + } + + private void findNext() + { + while (next < 0 && iterator.hasNext()) { + T nextValue = iterator.next(); + next = dictionary.indexOf(nextValue); + + if (next == -dictionarySize - 1) { + // nextValue is past the end of the dictionary so we can break early + // Note: we can rely on indexOf returning (-(insertion point) - 1), because of the earlier check + // for Indexed.isSorted(), which guarantees this behavior + break; + } + } + } + }; + } + }; + } + + static BitmapColumnIndex getIndexFromIterator( + BitmapFactory bitmapFactory, + Iterable values, + Indexed dictionary, + Indexed bitmaps, + Supplier getUnknownsIndex + ) + { + return new BaseValueSetIndexesFromIterable(bitmapFactory, bitmaps, getUnknownsIndex) + { + @Override + public Iterable getBitmapIterable() + { + return () -> new Iterator() + { + final Iterator iterator = values.iterator(); + int next = -1; + + @Override + public boolean hasNext() + { + if (next < 0) { + findNext(); + } + return next >= 0; + } + + @Override + public ImmutableBitmap next() + { + if (next < 0) { + findNext(); + if (next < 0) { + throw new NoSuchElementException(); + } + } + final int swap = next; + next = -1; + return getBitmap(swap); + } + + private void findNext() + { + while (next < 0 && iterator.hasNext()) { + T nextValue = iterator.next(); + next = dictionary.indexOf(nextValue); + } + } + }; + } + }; + } + + abstract class BaseValueSetIndexesFromIterable extends SimpleImmutableBitmapDelegatingIterableIndex + { + private final Indexed bitmaps; + private final BitmapFactory bitmapFactory; + private final Supplier unknownsBitmap; + + public BaseValueSetIndexesFromIterable( + BitmapFactory bitmapFactory, + Indexed bitmaps, + Supplier unknownsBitmap + ) + { + this.bitmaps = bitmaps; + this.bitmapFactory = bitmapFactory; + this.unknownsBitmap = unknownsBitmap; + } + + @Nullable + @Override + protected ImmutableBitmap getUnknownsBitmap() + { + return unknownsBitmap.get(); + } + + protected ImmutableBitmap getBitmap(int idx) + { + if (idx < 0) { + return bitmapFactory.makeEmptyImmutableBitmap(); + } + + final ImmutableBitmap bitmap = bitmaps.get(idx); + return bitmap == null ? bitmapFactory.makeEmptyImmutableBitmap() : bitmap; + } + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java index 97788148288c..6e73ac530813 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java @@ -22,6 +22,7 @@ import com.google.common.base.Strings; import com.google.common.base.Supplier; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; import com.google.common.primitives.Doubles; import it.unimi.dsi.fastutil.doubles.DoubleArraySet; import it.unimi.dsi.fastutil.doubles.DoubleIterator; @@ -40,6 +41,7 @@ import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.filter.DruidDoublePredicate; import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.IntListUtils; import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.column.ColumnConfig; @@ -67,6 +69,7 @@ import org.apache.druid.segment.index.semantic.NumericRangeIndexes; import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.index.semantic.ValueIndexes; +import org.apache.druid.segment.index.semantic.ValueSetIndexes; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; import javax.annotation.Nonnull; @@ -74,7 +77,9 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.Collections; import java.util.Iterator; +import java.util.List; import java.util.NoSuchElementException; import java.util.SortedSet; @@ -195,6 +200,8 @@ public T as(Class clazz) return (T) (NullValueIndex) () -> nullIndex; } else if (clazz.equals(ValueIndexes.class)) { return (T) new DoubleValueIndexes(); + } else if (clazz.equals(ValueSetIndexes.class)) { + return (T) new DoubleValueSetIndexes(); } else if (clazz.equals(StringValueSetIndexes.class)) { return (T) new DoubleStringValueSetIndexes(); } else if (clazz.equals(NumericRangeIndexes.class)) { @@ -258,6 +265,70 @@ public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boo } } + private final class DoubleValueSetIndexes implements ValueSetIndexes + { + final FixedIndexed dictionary = doubleDictionarySupplier.get(); + + @Nullable + @Override + public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSignature matchValueType) + { + final boolean matchNull = sortedValues.get(0) == null; + final Supplier unknownsIndex = () -> { + if (!matchNull && dictionary.get(0) == null) { + return valueIndexes.get(0); + } + return null; + }; + + // values are doubles and ordered in double order + if (matchValueType.is(ValueType.DOUBLE)) { + final List tailSet; + final List baseSet = (List) sortedValues; + + if (sortedValues.size() >= ValueSetIndexes.SIZE_WORTH_CHECKING_MIN) { + final double minValueInColumn = dictionary.get(0) == null ? dictionary.get(1) : dictionary.get(0); + final int position = Collections.binarySearch( + sortedValues, + minValueInColumn, + matchValueType.getNullableStrategy() + ); + + tailSet = baseSet.subList(position >= 0 ? position : -(position + 1), baseSet.size()); + } else { + tailSet = baseSet; + } + if (tailSet.size() > ValueSetIndexes.SORTED_MERGE_RATIO_THRESHOLD * dictionary.size()) { + return ValueSetIndexes.getIndexFromSortedIteratorSortedMerged( + bitmapFactory, + ColumnType.DOUBLE.getNullableStrategy(), + tailSet, + dictionary, + valueIndexes, + unknownsIndex + ); + } + // fall through to sorted value iteration + return ValueSetIndexes.getIndexFromSortedIterator( + bitmapFactory, + tailSet, + dictionary, + valueIndexes, + unknownsIndex + ); + } else { + // values in set are not sorted in double order, transform them on the fly and iterate them all + return ValueSetIndexes.getIndexFromIterator( + bitmapFactory, + Iterables.transform(sortedValues, DimensionHandlerUtils::convertObjectToDouble), + dictionary, + valueIndexes, + unknownsIndex + ); + } + } + } + private class DoubleStringValueSetIndexes implements StringValueSetIndexes { @Override diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java index d2aa90e607e9..f3cb3f7d73f8 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java @@ -21,6 +21,7 @@ import com.google.common.base.Supplier; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; import it.unimi.dsi.fastutil.ints.IntIntPair; import it.unimi.dsi.fastutil.ints.IntIterator; import it.unimi.dsi.fastutil.longs.LongArraySet; @@ -39,6 +40,7 @@ import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.filter.DruidLongPredicate; import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.IntListUtils; import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.column.ColumnConfig; @@ -66,6 +68,7 @@ import org.apache.druid.segment.index.semantic.NumericRangeIndexes; import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.index.semantic.ValueIndexes; +import org.apache.druid.segment.index.semantic.ValueSetIndexes; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; import javax.annotation.Nonnull; @@ -73,7 +76,9 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.Collections; import java.util.Iterator; +import java.util.List; import java.util.NoSuchElementException; import java.util.SortedSet; @@ -195,6 +200,8 @@ public T as(Class clazz) return (T) (NullValueIndex) () -> nullIndex; } else if (clazz.equals(ValueIndexes.class)) { return (T) new LongValueIndexes(); + } else if (clazz.equals(ValueSetIndexes.class)) { + return (T) new LongValueSetIndexes(); } else if (clazz.equals(StringValueSetIndexes.class)) { return (T) new LongStringValueSetIndexes(); } else if (clazz.equals(NumericRangeIndexes.class)) { @@ -259,6 +266,70 @@ public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boo } } + private final class LongValueSetIndexes implements ValueSetIndexes + { + final FixedIndexed dictionary = longDictionarySupplier.get(); + + @Nullable + @Override + public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSignature matchValueType) + { + final boolean matchNull = sortedValues.get(0) == null; + final Supplier unknownsIndex = () -> { + if (!matchNull && dictionary.get(0) == null) { + return valueIndexes.get(0); + } + return null; + }; + if (matchValueType.is(ValueType.LONG)) { + final List tailSet; + final List baseSet = (List) sortedValues; + + if (sortedValues.size() >= ValueSetIndexes.SIZE_WORTH_CHECKING_MIN) { + final long minValueInColumn = dictionary.get(0) == null ? dictionary.get(1) : dictionary.get(0); + final int position = Collections.binarySearch( + sortedValues, + minValueInColumn, + matchValueType.getNullableStrategy() + ); + tailSet = baseSet.subList(position >= 0 ? position : -(position + 1), baseSet.size()); + } else { + tailSet = baseSet; + } + if (tailSet.size() > ValueSetIndexes.SORTED_MERGE_RATIO_THRESHOLD * dictionary.size()) { + return ValueSetIndexes.getIndexFromSortedIteratorSortedMerged( + bitmapFactory, + ColumnType.LONG.getNullableStrategy(), + tailSet, + dictionary, + valueIndexes, + unknownsIndex + ); + } + // fall through to sort value iteration + return ValueSetIndexes.getIndexFromSortedIterator( + bitmapFactory, + tailSet, + dictionary, + valueIndexes, + unknownsIndex + ); + } else { + // values in set are not sorted in double order, transform them on the fly and iterate them all + return ValueSetIndexes.getIndexFromIterator( + bitmapFactory, + Iterables.transform( + sortedValues, + DimensionHandlerUtils::convertObjectToLong + ), + dictionary, + valueIndexes, + unknownsIndex + ); + } + } + } + private class LongStringValueSetIndexes implements StringValueSetIndexes { final FixedIndexed dictionary = longDictionarySupplier.get(); diff --git a/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java index 23e86ce45a23..51e09434fb06 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java @@ -43,6 +43,7 @@ import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.index.semantic.Utf8ValueSetIndexes; import org.apache.druid.segment.index.semantic.ValueIndexes; +import org.apache.druid.segment.index.semantic.ValueSetIndexes; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -100,7 +101,8 @@ public T as(Class clazz) } else if ( clazz.equals(StringValueSetIndexes.class) || clazz.equals(Utf8ValueSetIndexes.class) || - clazz.equals(ValueIndexes.class) + clazz.equals(ValueIndexes.class) || + clazz.equals(ValueSetIndexes.class) ) { return (T) new IndexedUtf8ValueIndexes<>( bitmapFactory, diff --git a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java index fd25166bcd17..aeb408b7961c 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java @@ -56,7 +56,7 @@ @RunWith(Parameterized.class) public class InFilterTest extends BaseFilterTest { - private static final List ROWS = ImmutableList.of( + static final List ROWS = ImmutableList.of( makeDefaultSchemaRow("a", "", ImmutableList.of("a", "b"), "2017-07-25", "", 0.0, 0.0f, 0L), makeDefaultSchemaRow("b", "10", ImmutableList.of(), "2017-07-25", "a", 10.1, 10.1f, 100L), makeDefaultSchemaRow("c", "2", ImmutableList.of(""), "2017-05-25", null, null, 5.5f, 40L), diff --git a/processing/src/test/java/org/apache/druid/segment/filter/TypedInFilterTests.java b/processing/src/test/java/org/apache/druid/segment/filter/TypedInFilterTests.java new file mode 100644 index 000000000000..4842c2d0717e --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/filter/TypedInFilterTests.java @@ -0,0 +1,543 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.filter; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Function; +import com.google.common.base.Supplier; +import com.google.common.base.Suppliers; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.RangeSet; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.error.DruidException; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.filter.Filter; +import org.apache.druid.query.filter.NotDimFilter; +import org.apache.druid.query.filter.TypedInFilter; +import org.apache.druid.segment.IndexBuilder; +import org.apache.druid.segment.StorageAdapter; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Test; +import org.junit.experimental.runners.Enclosed; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.Closeable; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +@RunWith(Enclosed.class) +public class TypedInFilterTests +{ + @RunWith(Parameterized.class) + public static class TypedInFilterTest extends BaseFilterTest + { + private final ObjectMapper jsonMapper = new DefaultObjectMapper(); + + public TypedInFilterTest( + String testName, + IndexBuilder indexBuilder, + Function> finisher, + boolean cnf, + boolean optimize + ) + { + super(testName, InFilterTest.ROWS, indexBuilder, finisher, cnf, optimize); + } + + + @AfterClass + public static void tearDown() throws Exception + { + BaseFilterTest.tearDown(TypedInFilterTest.class.getName()); + } + + @Test + public void testSingleValueStringColumnWithNulls() + { + assertFilterMatches( + inFilter("dim1", ColumnType.STRING, Arrays.asList(null, "")), + NullHandling.sqlCompatible() ? ImmutableList.of("a") : ImmutableList.of() + ); + + assertFilterMatches( + inFilter("dim1", ColumnType.STRING, Collections.singletonList("")), + NullHandling.sqlCompatible() ? ImmutableList.of("a") : ImmutableList.of() + ); + + assertFilterMatches( + inFilter("dim1", ColumnType.STRING, Arrays.asList("-1", "ab", "de")), + ImmutableList.of() + ); + + assertFilterMatches( + inFilter("s0", ColumnType.STRING, Arrays.asList("a", "b")), + ImmutableList.of("b", "d", "f") + ); + assertFilterMatches( + inFilter("s0", ColumnType.STRING, Collections.singletonList("noexist")), + ImmutableList.of() + ); + + if (NullHandling.replaceWithDefault()) { + assertFilterMatches( + inFilter("dim1", ColumnType.STRING, Arrays.asList(null, "10", "abc")), + ImmutableList.of("a", "b", "f") + ); + assertFilterMatches( + inFilter("dim1", ColumnType.STRING, Arrays.asList(null, "10", "abc")), + ImmutableList.of("a", "b", "f") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("dim1", ColumnType.STRING, Arrays.asList("-1", "ab", "de"))), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("s0", ColumnType.STRING, Arrays.asList("a", "b"))), + ImmutableList.of("a", "c", "e") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("s0", ColumnType.STRING, Collections.singletonList("noexist"))), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + } else { + assertFilterMatches( + inFilter("dim1", ColumnType.STRING, Arrays.asList(null, "10", "abc")), + ImmutableList.of("b", "f") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("dim1", ColumnType.STRING, Arrays.asList("-1", "ab", "de"))), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("s0", ColumnType.STRING, Arrays.asList("a", "b"))), + ImmutableList.of("a", "e") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("s0", ColumnType.STRING, Collections.singletonList("noexist"))), + ImmutableList.of("a", "b", "d", "e", "f") + ); + } + } + + @Test + public void testMultiValueStringColumn() + { + if (isAutoSchema()) { + return; + } + if (NullHandling.replaceWithDefault()) { + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Arrays.asList("b", "d")), + ImmutableList.of("a") + ); + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Collections.singletonList(null)), + ImmutableList.of("b", "c", "f") + ); + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Arrays.asList(null, "a")), + ImmutableList.of("a", "b", "c", "d", "f") + ); + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Arrays.asList(null, "b")), + ImmutableList.of("a", "b", "c", "f") + ); + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Collections.singletonList("")), + ImmutableList.of("b", "c", "f") + ); + } else { + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Collections.singletonList(null)), + ImmutableList.of("b", "f") + ); + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Arrays.asList(null, "a")), + ImmutableList.of("a", "b", "d", "f") + ); + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Arrays.asList(null, "b")), + ImmutableList.of("a", "b", "f") + ); + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Collections.singletonList("")), + ImmutableList.of("c") + ); + } + + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Arrays.asList("", null)), + ImmutableList.of("b", "c", "f") + ); + + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Collections.singletonList("c")), + ImmutableList.of("e") + ); + + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Collections.singletonList("d")), + ImmutableList.of() + ); + } + + @Test + public void testMissingColumn() + { + assertFilterMatches( + inFilter("dim3", ColumnType.STRING, Arrays.asList(null, null)), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("dim3", ColumnType.STRING, Arrays.asList(null, null))), + ImmutableList.of() + ); + + if (NullHandling.replaceWithDefault()) { + assertFilterMatches( + inFilter("dim3", ColumnType.STRING, Collections.singletonList("")), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + } else { + assertFilterMatches( + inFilter("dim3", ColumnType.STRING, Collections.singletonList("")), + ImmutableList.of() + ); + assertFilterMatches( + NotDimFilter.of(inFilter("dim3", ColumnType.STRING, Collections.singletonList(""))), + ImmutableList.of() + ); + } + + assertFilterMatches( + inFilter("dim3", ColumnType.STRING, Arrays.asList(null, "a")), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("dim3", ColumnType.STRING, Arrays.asList(null, "a"))), + ImmutableList.of() + ); + + assertFilterMatches( + inFilter("dim3", ColumnType.STRING, Collections.singletonList("a")), + ImmutableList.of() + ); + assertFilterMatches( + NotDimFilter.of(inFilter("dim3", ColumnType.STRING, Collections.singletonList("a"))), + NullHandling.sqlCompatible() ? ImmutableList.of() : ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + + assertFilterMatches( + inFilter("dim3", ColumnType.STRING, Collections.singletonList("b")), + ImmutableList.of() + ); + + assertFilterMatches( + inFilter("dim3", ColumnType.STRING, Collections.singletonList("c")), + ImmutableList.of() + ); + } + + @Test + public void testNumeric() + { + assertFilterMatches(inFilter("f0", ColumnType.FLOAT, Collections.singletonList(0f)), ImmutableList.of("a")); + assertFilterMatches(inFilter("d0", ColumnType.DOUBLE, Collections.singletonList(0.0)), ImmutableList.of("a")); + assertFilterMatches(inFilter("l0", ColumnType.LONG, Collections.singletonList(0L)), ImmutableList.of("a")); + assertFilterMatches( + NotDimFilter.of(inFilter("f0", ColumnType.FLOAT, Collections.singletonList(0f))), + NullHandling.sqlCompatible() + ? ImmutableList.of("b", "c", "d", "f") + : ImmutableList.of("b", "c", "d", "e", "f") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("d0", ColumnType.DOUBLE, Collections.singletonList(0.0))), + NullHandling.sqlCompatible() + ? ImmutableList.of("b", "d", "e", "f") + : ImmutableList.of("b", "c", "d", "e", "f") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("l0", ColumnType.LONG, Collections.singletonList(0L))), + NullHandling.sqlCompatible() + ? ImmutableList.of("b", "c", "e", "f") + : ImmutableList.of("b", "c", "d", "e", "f") + ); + assertFilterMatches(inFilter("f0", ColumnType.FLOAT, Collections.singletonList(null)), ImmutableList.of("e")); + assertFilterMatches(inFilter("d0", ColumnType.DOUBLE, Collections.singletonList(null)), ImmutableList.of("c")); + assertFilterMatches(inFilter("l0", ColumnType.LONG, Collections.singletonList(null)), ImmutableList.of("d")); + assertFilterMatches( + NotDimFilter.of(inFilter("f0", ColumnType.FLOAT, Collections.singletonList(null))), + ImmutableList.of("a", "b", "c", "d", "f") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("d0", ColumnType.DOUBLE, Collections.singletonList(null))), + ImmutableList.of("a", "b", "d", "e", "f") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("l0", ColumnType.LONG, Collections.singletonList(null))), + ImmutableList.of("a", "b", "c", "e", "f") + ); + + assertFilterMatches(inFilter("f0", ColumnType.FLOAT, Arrays.asList("0", "999")), ImmutableList.of("a")); + assertFilterMatches(inFilter("d0", ColumnType.DOUBLE, Arrays.asList("0", "999")), ImmutableList.of("a")); + assertFilterMatches(inFilter("l0", ColumnType.LONG, Arrays.asList("0", "999")), ImmutableList.of("a")); + assertFilterMatches(inFilter("f0", ColumnType.FLOAT, Arrays.asList(null, "999")), ImmutableList.of("e")); + assertFilterMatches(inFilter("d0", ColumnType.DOUBLE, Arrays.asList(null, "999")), ImmutableList.of("c")); + assertFilterMatches(inFilter("l0", ColumnType.LONG, Arrays.asList(null, "999")), ImmutableList.of("d")); + } + + @Override + protected void assertFilterMatches(DimFilter filter, List expectedRows) + { + super.assertFilterMatches(filter, expectedRows); + try { + // make sure round trip json serde is cool + super.assertFilterMatches( + jsonMapper.readValue(jsonMapper.writeValueAsString(filter), DimFilter.class), + expectedRows + ); + } + catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } + } + + public static class TypedInFilterFilterNonParameterizedTests extends InitializedNullHandlingTest + { + @Test + public void testSerde() throws JsonProcessingException + { + ObjectMapper mapper = new DefaultObjectMapper(); + TypedInFilter filter = inFilter("column", ColumnType.STRING, Arrays.asList("a", "b", "c")); + String s = mapper.writeValueAsString(filter); + Assert.assertEquals(filter, mapper.readValue(s, TypedInFilter.class)); + + filter = inFilter("column", ColumnType.STRING, Arrays.asList("a", "b", "b", null, "c")); + s = mapper.writeValueAsString(filter); + Assert.assertEquals(filter, mapper.readValue(s, TypedInFilter.class)); + + filter = inFilter("column", ColumnType.LONG, Arrays.asList(1L, 2L, 2L, null, 3L)); + s = mapper.writeValueAsString(filter); + Assert.assertEquals(filter, mapper.readValue(s, TypedInFilter.class)); + + filter = inFilter("column", ColumnType.DOUBLE, Arrays.asList(1.1, 2.2, 2.3, null, 3.3)); + s = mapper.writeValueAsString(filter); + Assert.assertEquals(filter, mapper.readValue(s, TypedInFilter.class)); + + filter = inFilter("column", ColumnType.FLOAT, Arrays.asList(1.1f, 2.2f, 2.2f, null, 3.3f)); + s = mapper.writeValueAsString(filter); + Assert.assertEquals(filter, mapper.readValue(s, TypedInFilter.class)); + + filter = inFilter("column", ColumnType.FLOAT, Arrays.asList(1.1, 2.2, 2.3, null, 3.3)); + s = mapper.writeValueAsString(filter); + Assert.assertEquals(filter, mapper.readValue(s, TypedInFilter.class)); + } + + @Test + public void testGetCacheKey() + { + TypedInFilter filterUnsorted = inFilter("column", ColumnType.STRING, Arrays.asList("a", "b", "b", null, "c")); + TypedInFilter filterDifferent = inFilter("column", ColumnType.STRING, Arrays.asList("a", "b", "b", "c")); + TypedInFilter filterPresorted = new TypedInFilter( + "column", + ColumnType.STRING, + null, + Arrays.asList(null, "a", "b", "c"), + null + ); + + Assert.assertEquals(filterPresorted, filterUnsorted); + Assert.assertNotEquals(filterDifferent, filterPresorted); + Assert.assertArrayEquals(filterPresorted.getCacheKey(), filterUnsorted.getCacheKey()); + Assert.assertFalse(Arrays.equals(filterDifferent.getCacheKey(), filterPresorted.getCacheKey())); + + filterUnsorted = inFilter("column", ColumnType.LONG, Arrays.asList(2L, -2L, 1L, null, 2L, 3L)); + filterDifferent = inFilter("column", ColumnType.LONG, Arrays.asList(2L, -2L, 1L, 3L)); + filterPresorted = new TypedInFilter( + "column", + ColumnType.LONG, + null, + Arrays.asList(null, -2L, 1L, 2L, 3L), + null + ); + + Assert.assertEquals(filterPresorted, filterUnsorted); + Assert.assertNotEquals(filterDifferent, filterPresorted); + Assert.assertArrayEquals(filterPresorted.getCacheKey(), filterUnsorted.getCacheKey()); + Assert.assertFalse(Arrays.equals(filterDifferent.getCacheKey(), filterPresorted.getCacheKey())); + + filterUnsorted = inFilter("column", ColumnType.DOUBLE, Arrays.asList(2.2, -2.2, 1.1, null, 2.2, 3.3)); + filterDifferent = inFilter("column", ColumnType.DOUBLE, Arrays.asList(2.2, -2.2, 1.1, 3.3)); + filterPresorted = new TypedInFilter( + "column", + ColumnType.DOUBLE, + null, + Arrays.asList(null, -2.2, 1.1, 2.2, 3.3), + null + ); + + Assert.assertEquals(filterPresorted, filterUnsorted); + Assert.assertNotEquals(filterDifferent, filterPresorted); + Assert.assertArrayEquals(filterPresorted.getCacheKey(), filterUnsorted.getCacheKey()); + Assert.assertFalse(Arrays.equals(filterDifferent.getCacheKey(), filterPresorted.getCacheKey())); + + filterUnsorted = inFilter("column", ColumnType.FLOAT, Arrays.asList(2.2f, -2.2f, 1.1f, null, 2.2f, 3.3f)); + filterDifferent = inFilter("column", ColumnType.FLOAT, Arrays.asList(2.2f, -2.2f, 1.1f, 3.3f)); + filterPresorted = new TypedInFilter( + "column", + ColumnType.FLOAT, + null, + Arrays.asList(null, -2.2f, 1.1f, 2.2f, 3.3f), + null + ); + + Assert.assertEquals(filterPresorted, filterUnsorted); + Assert.assertNotEquals(filterDifferent, filterPresorted); + Assert.assertArrayEquals(filterPresorted.getCacheKey(), filterUnsorted.getCacheKey()); + Assert.assertFalse(Arrays.equals(filterDifferent.getCacheKey(), filterPresorted.getCacheKey())); + } + + @Test + public void testInvalidParameters() + { + Throwable t = Assert.assertThrows( + DruidException.class, + () -> new TypedInFilter(null, ColumnType.STRING, null, null, null) + ); + Assert.assertEquals("Invalid IN filter, column cannot be null", t.getMessage()); + t = Assert.assertThrows( + DruidException.class, + () -> new TypedInFilter("dim0", null, null, null, null) + ); + Assert.assertEquals("Invalid IN filter on column [dim0], matchValueType cannot be null", t.getMessage()); + t = Assert.assertThrows( + DruidException.class, + () -> new TypedInFilter("dim0", ColumnType.STRING, null, null, null) + ); + Assert.assertEquals( + "Invalid IN filter on column [dim0], exactly one of values or sortedValues must be non-null", + t.getMessage() + ); + } + + @Test + public void testGetDimensionRangeSet() + { + TypedInFilter filter = inFilter("x", ColumnType.STRING, Arrays.asList(null, "a", "b", "c")); + TypedInFilter filter2 = inFilter("x", ColumnType.STRING, Arrays.asList("a", "b", null, "c")); + + Assert.assertEquals(filter.getDimensionRangeSet("x"), filter2.getDimensionRangeSet("x")); + RangeSet range = filter.getDimensionRangeSet("x"); + Assert.assertTrue(range.contains("b")); + + filter = inFilter("x", ColumnType.LONG, Arrays.asList(null, 1L, 2L, 3L)); + filter2 = inFilter("x", ColumnType.LONG, Arrays.asList(3L, 1L, null, 2L)); + Assert.assertEquals(filter.getDimensionRangeSet("x"), filter2.getDimensionRangeSet("x")); + range = filter.getDimensionRangeSet("x"); + Assert.assertTrue(range.contains("2")); + + filter = inFilter("x", ColumnType.DOUBLE, Arrays.asList(null, 1.1, 2.2, 3.3)); + filter2 = inFilter("x", ColumnType.DOUBLE, Arrays.asList(3.3, 1.1, null, 2.2)); + range = filter.getDimensionRangeSet("x"); + Assert.assertEquals(filter.getDimensionRangeSet("x"), filter2.getDimensionRangeSet("x")); + Assert.assertTrue(range.contains("2.2")); + + filter = inFilter("x", ColumnType.FLOAT, Arrays.asList(null, 1.1f, 2.2f, 3.3f)); + filter2 = inFilter("x", ColumnType.FLOAT, Arrays.asList(3.3f, 1.1f, null, 2.2f)); + range = filter.getDimensionRangeSet("x"); + Assert.assertEquals(filter.getDimensionRangeSet("x"), filter2.getDimensionRangeSet("x")); + Assert.assertTrue(range.contains("2.2")); + } + + @Test + public void testRequiredColumnRewrite() + { + TypedInFilter filter = inFilter("dim0", ColumnType.STRING, Arrays.asList("a", "c")); + TypedInFilter filter2 = inFilter("dim1", ColumnType.STRING, Arrays.asList("a", "c")); + + Assert.assertTrue(filter.supportsRequiredColumnRewrite()); + Assert.assertTrue(filter2.supportsRequiredColumnRewrite()); + + Filter rewrittenFilter = filter.rewriteRequiredColumns(ImmutableMap.of("dim0", "dim1")); + Assert.assertEquals(filter2, rewrittenFilter); + + Throwable t = Assert.assertThrows( + IAE.class, + () -> filter.rewriteRequiredColumns(ImmutableMap.of("invalidName", "dim1")) + ); + Assert.assertEquals( + "Received a non-applicable rewrite: {invalidName=dim1}, filter's dimension: dim0", + t.getMessage() + ); + } + + @Test + public void test_equals() + { + EqualsVerifier.forClass(TypedInFilter.class).usingGetClass() + .withNonnullFields( + "column", + "matchValueType", + "unsortedValues", + "lazyMatchValues", + "optimizedFilterIncludeUnknown", + "optimizedFilterNoIncludeUnknown" + ) + .withPrefabValues(ColumnType.class, ColumnType.STRING, ColumnType.DOUBLE) + .withPrefabValues( + Supplier.class, + Suppliers.ofInstance(ImmutableList.of("a", "b")), + Suppliers.ofInstance(ImmutableList.of("b", "c")) + ) + .withIgnoredFields( + "unsortedValues", + "lazyMatchValueBytes", + "predicateFactorySupplier", + "cacheKeySupplier", + "optimizedFilterIncludeUnknown", + "optimizedFilterNoIncludeUnknown" + ) + .verify(); + } + } + + private static TypedInFilter inFilter(String columnName, ColumnType matchValueType, List values) + { + return new TypedInFilter( + columnName, + matchValueType, + values, + null, + null + ); + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java index 23cfcfaa4a45..3b95d8578823 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java @@ -36,6 +36,7 @@ import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.query.filter.NullFilter; import org.apache.druid.query.filter.OrDimFilter; +import org.apache.druid.query.filter.TypedInFilter; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.sql.calcite.expression.DruidExpression; @@ -46,6 +47,7 @@ import javax.annotation.Nullable; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; public class ArrayOverlapOperatorConversion extends BaseExpressionDimFilterOperatorConversion @@ -127,7 +129,7 @@ public DimFilter toDruidFilter( // to create an empty array with no argument, we just return null. return null; } else if (arrayElements.length == 1) { - if (plannerContext.isUseBoundsAndSelectors()) { + if (plannerContext.isUseBoundsAndSelectors() || (!simpleExtractionExpr.isDirectColumnAccess() && virtualColumnRegistry == null)) { return newSelectorDimFilter(simpleExtractionExpr.getSimpleExtraction(), Evals.asString(arrayElements[0])); } else { final String column = simpleExtractionExpr.isDirectColumnAccess() @@ -148,17 +150,27 @@ public DimFilter toDruidFilter( ); } } else { - final InDimFilter.ValuesSet valuesSet = InDimFilter.ValuesSet.create(); - for (final Object arrayElement : arrayElements) { - valuesSet.add(Evals.asString(arrayElement)); - } + if (plannerContext.isUseBoundsAndSelectors() || !simpleExtractionExpr.isDirectColumnAccess()) { + final InDimFilter.ValuesSet valuesSet = InDimFilter.ValuesSet.create(); + for (final Object arrayElement : arrayElements) { + valuesSet.add(Evals.asString(arrayElement)); + } - return new InDimFilter( - simpleExtractionExpr.getSimpleExtraction().getColumn(), - valuesSet, - simpleExtractionExpr.getSimpleExtraction().getExtractionFn(), - null - ); + return new InDimFilter( + simpleExtractionExpr.getSimpleExtraction().getColumn(), + valuesSet, + simpleExtractionExpr.getSimpleExtraction().getExtractionFn(), + null + ); + } else { + return new TypedInFilter( + simpleExtractionExpr.getSimpleExtraction().getColumn(), + ExpressionType.toColumnType((ExpressionType) exprEval.type().getElementType()), + Arrays.asList(arrayElements), + null, + null + ); + } } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/CollectComparisons.java b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/CollectComparisons.java index 52b518ff8cfb..e67600bb6da5 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/CollectComparisons.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/CollectComparisons.java @@ -23,10 +23,10 @@ import it.unimi.dsi.fastutil.ints.IntSet; import it.unimi.dsi.fastutil.objects.ObjectIntPair; import org.apache.druid.java.util.common.Pair; -import org.apache.druid.query.filter.InDimFilter; import javax.annotation.Nullable; import java.util.ArrayList; +import java.util.Collection; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -37,7 +37,7 @@ * comparisons with the same {@link CollectionKey} can potentially become a single {@link CollectedType}. * For example: x = 'a', x = 'b' can become x IN ('a', 'b'). */ -public abstract class CollectComparisons +public abstract class CollectComparisons> { /** * List of {@link BaseType} that were ORed together. @@ -94,7 +94,7 @@ public List collect() // Emit a collected comparison (e.g. IN filters) for each collection. for (Map.Entry>, List>> entry : collectMap.entrySet()) { final List> comparisonList = entry.getValue(); - final InDimFilter.ValuesSet values = new InDimFilter.ValuesSet(); + final CollectionType values = makeCollection(); for (ObjectIntPair subEntry : comparisonList) { final ComparisonType selector = subEntry.first(); @@ -153,6 +153,8 @@ public List collect() @Nullable protected abstract Pair> getCollectibleComparison(BaseType expr); + protected abstract CollectionType makeCollection(); + /** * Given a comparison, returns its collection key, which will be used to group it together with like comparisons. * This method will be called on objects returned by {@link #getCollectibleComparison(Object)}. If this method returns @@ -164,14 +166,14 @@ public List collect() /** * Given a comparison, returns the strings that it matches. */ - protected abstract Set getMatchValues(ComparisonType comparison); + protected abstract Set getMatchValues(ComparisonType comparison); /** * Given a set of strings from {@link #getMatchValues(Object)} from various comparisons, returns a single collected * comparison that matches all those strings. */ @Nullable - protected abstract CollectedType makeCollectedComparison(CollectionKey key, InDimFilter.ValuesSet values); + protected abstract CollectedType makeCollectedComparison(CollectionKey key, CollectionType values); /** * Given a list of expressions, returns an AND expression with those exprs as children. Only called if diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java index 5f457096a31b..da5efeb6a08f 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java @@ -21,16 +21,14 @@ import com.google.common.collect.Lists; import org.apache.druid.java.util.common.Pair; -import org.apache.druid.math.expr.ExprEval; -import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.filter.AndDimFilter; import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.EqualityFilter; import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.query.filter.OrDimFilter; import org.apache.druid.query.filter.SelectorDimFilter; +import org.apache.druid.query.filter.TypedInFilter; import org.apache.druid.segment.column.RowSignature; -import org.apache.druid.segment.column.ValueType; import org.apache.druid.sql.calcite.expression.SimpleExtraction; import org.apache.druid.sql.calcite.table.RowSignatures; @@ -131,7 +129,7 @@ private static Pair> splitAnd( * Helper for collecting {@link SelectorDimFilter} into {@link InDimFilter}. */ private static class CollectSelectors - extends CollectComparisons + extends CollectComparisons { private final RowSignature sourceRowSignature; @@ -155,6 +153,12 @@ protected Pair> getCollectibleComparison(DimF ); } + @Override + protected InDimFilter.ValuesSet makeCollection() + { + return new InDimFilter.ValuesSet(); + } + @Nullable @Override protected BoundRefKey getCollectionKey(SelectorDimFilter selector) @@ -195,7 +199,7 @@ protected DimFilter makeAnd(List exprs) /** * Helper for collecting {@link EqualityFilter} into {@link InDimFilter}. */ - private static class CollectEqualities extends CollectComparisons + private static class CollectEqualities extends CollectComparisons> { public CollectEqualities(final List orExprs) { @@ -216,38 +220,33 @@ protected Pair> getCollectibleComparison(DimFilt ); } + @Override + protected List makeCollection() + { + return new ArrayList<>(); + } + @Nullable @Override protected RangeRefKey getCollectionKey(EqualityFilter selector) { - if (!selector.getMatchValueType().is(ValueType.STRING)) { - // skip non-string equality filters since InDimFilter uses a sorted string set, which is a different sort - // than numbers or other types might use - return null; - } - return RangeRefKey.from(selector); } @Override - protected Set getMatchValues(EqualityFilter selector) + protected Set getMatchValues(EqualityFilter selector) { - return Collections.singleton( - ExprEval.ofType(ExpressionType.fromColumnType(selector.getMatchValueType()), selector.getMatchValue()) - .castTo(ExpressionType.STRING) - .asString() - ); + return Collections.singleton(selector.getMatchValue()); } @Nullable @Override - protected InDimFilter makeCollectedComparison(RangeRefKey rangeRefKey, InDimFilter.ValuesSet values) + protected TypedInFilter makeCollectedComparison(RangeRefKey rangeRefKey, List values) { if (values.size() > 1) { - return new InDimFilter(rangeRefKey.getColumn(), values, null, null); - } else { - return null; + return new TypedInFilter(rangeRefKey.getColumn(), rangeRefKey.getMatchValueType(), values, null, null); } + return null; } @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/SqlParameterizerShuttle.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/SqlParameterizerShuttle.java index 29da45b085fd..5273a65ccf7a 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/SqlParameterizerShuttle.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/SqlParameterizerShuttle.java @@ -135,6 +135,8 @@ private SqlNode createArrayLiteral(Object value, int posn) SqlNode node; if (element instanceof String) { node = SqlLiteral.createCharString((String) element, SqlParserPos.ZERO); + } else if (element instanceof Float || element instanceof Double) { + node = SqlLiteral.createApproxNumeric(element.toString(), SqlParserPos.ZERO); } else if (element instanceof Integer || element instanceof Long) { // No direct way to create a literal from an Integer or Long, have // to parse a string, sadly. diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/ReverseLookupRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/ReverseLookupRule.java index f53cc3e282a0..95ad2b11334b 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/ReverseLookupRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/ReverseLookupRule.java @@ -310,7 +310,7 @@ private RexNode visitComparison(final RexCall call) * Collect and reverse a set of lookups that appear as children to OR. */ private class CollectReverseLookups - extends CollectComparisons + extends CollectComparisons { private final RexBuilder rexBuilder; @@ -335,6 +335,12 @@ protected Pair> getCollectibleComparison(RexNode expr) } } + @Override + protected InDimFilter.ValuesSet makeCollection() + { + return new InDimFilter.ValuesSet(); + } + @Nullable @Override protected ReverseLookupKey getCollectionKey(RexCall call) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java index 378486bf2249..f8629ce9be8b 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java @@ -64,6 +64,7 @@ import org.apache.druid.query.filter.OrDimFilter; import org.apache.druid.query.filter.RangeFilter; import org.apache.druid.query.filter.SelectorDimFilter; +import org.apache.druid.query.filter.TypedInFilter; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.having.DimFilterHavingSpec; import org.apache.druid.query.lookup.LookupExtractorFactoryContainerProvider; @@ -368,11 +369,31 @@ public static IsTrueDimFilter istrue(DimFilter filter) return new IsTrueDimFilter(filter); } - public static InDimFilter in(String dimension, Collection values, ExtractionFn extractionFn) + public static DimFilter in(String dimension, Collection values) { + if (NullHandling.sqlCompatible()) { + return in(dimension, ColumnType.STRING, new ArrayList<>(values)); + } + return new InDimFilter(dimension, values, null); + } + + public static DimFilter in(String dimension, Collection values, ExtractionFn extractionFn) + { + if (NullHandling.sqlCompatible() && extractionFn == null) { + return in(dimension, ColumnType.STRING, new ArrayList<>(values)); + } return new InDimFilter(dimension, values, extractionFn); } + public static DimFilter in(String dimension, ColumnType matchValueType, List values) + { + if (NullHandling.sqlCompatible()) { + return new TypedInFilter(dimension, matchValueType, values, null, null); + } + Set set = values.stream().map(Evals::asString).collect(Collectors.toSet()); + return in(dimension, set, null); + } + public static DimFilter isNull(final String fieldName) { return isNull(fieldName, null); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 61b484b6a8b2..2c34f5050f98 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -841,7 +841,7 @@ public void testArrayOverlapFilter() newScanQueryBuilder() .dataSource(CalciteTests.DATASOURCE3) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(new InDimFilter("dim3", ImmutableList.of("a", "b"), null)) + .filters(in("dim3", ImmutableList.of("a", "b"))) .columns("dim3") .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .limit(5) @@ -4403,7 +4403,7 @@ public void testUnnestTwiceWithFiltersAndExpressions() "string_to_array(\"dim1\",'\\u005C.')", ColumnType.STRING_ARRAY ), - in("j0.unnest", ImmutableList.of("1", "2"), null) + in("j0.unnest", ImmutableList.of("1", "2")) ), expressionVirtualColumn( "_j0.unnest", @@ -5319,7 +5319,7 @@ public void testUnnestWithInFiltersWithExpressionInInnerQuery() .dataSource(UnnestDataSource.create( FilteredDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - new InDimFilter("dim2", ImmutableList.of("a", "b"), null) + in("dim2", ImmutableList.of("a", "b")) ), expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), null @@ -5455,7 +5455,7 @@ public void testUnnestWithFiltersInsideAndOutside1() ), expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), or( - in("j0.unnest", ImmutableList.of("a", "c"), null), + in("j0.unnest", ImmutableList.of("a", "c")), new LikeDimFilter("j0.unnest", "_", null, null) ) )) @@ -5497,7 +5497,7 @@ public void testUnnestWithFiltersOutside() ), expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), or( - in("j0.unnest", ImmutableList.of("a", "c"), null), + in("j0.unnest", ImmutableList.of("a", "c")), new LikeDimFilter("j0.unnest", "_", null, null) ) )) @@ -5532,7 +5532,7 @@ public void testUnnestWithInFilters() .dataSource(UnnestDataSource.create( FilteredDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - new InDimFilter("dim2", ImmutableList.of("a", "b", "ab", "abc"), null) + in("dim2", ImmutableList.of("a", "b", "ab", "abc")) ), expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), null @@ -5674,7 +5674,7 @@ public void testUnnestWithJoinOnTheLeft() .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) - .filters(new InDimFilter("dim2", ImmutableList.of("a", "b", "ab", "abc"), null)) + .filters(in("dim2", ImmutableList.of("a", "b", "ab", "abc"))) .columns("dim2") .context(QUERY_CONTEXT_UNNEST) .build() @@ -5837,7 +5837,7 @@ public void testUnnestWithINFiltersWithLeftRewrite() range("m1", ColumnType.LONG, null, 10L, false, true) ), expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - new InDimFilter("j0.unnest", ImmutableSet.of("a", "b"), null) + in("j0.unnest", ImmutableSet.of("a", "b")) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -5867,7 +5867,7 @@ public void testUnnestWithINFiltersWithNoLeftRewrite() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY), - new InDimFilter("j0.unnest", ImmutableSet.of("a", "b"), null) + in("j0.unnest", ImmutableSet.of("a", "b")) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -5900,7 +5900,7 @@ public void testUnnestWithInvalidINFiltersOnUnnestedColumn() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - new InDimFilter("j0.unnest", ImmutableSet.of("foo", "bar"), null) + in("j0.unnest", ImmutableSet.of("foo", "bar")) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -6028,7 +6028,7 @@ public void testUnnestWithSelectorFiltersOnVirtualStringColumn() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY), - new InDimFilter("j0.unnest", ImmutableSet.of("a", "ab"), null) + in("j0.unnest", ImmutableSet.of("a", "ab")) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -6131,7 +6131,7 @@ public void testUnnestWithMultipleAndFiltersOnSelectedUnnestedColumns() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - new InDimFilter("j0.unnest", ImmutableSet.of("a", "b"), null) + in("j0.unnest", ImmutableSet.of("a", "b")) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -6161,7 +6161,7 @@ public void testUnnestWithMultipleOrFiltersOnUnnestedColumns() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - new InDimFilter("j0.unnest", ImmutableSet.of("b", "d"), null) + in("j0.unnest", ImmutableSet.of("b", "d")) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -6281,7 +6281,7 @@ public void testUnnestWithMultipleOrFiltersOnSelectedVirtualColumns() .context(QUERY_CONTEXT_UNNEST) .filters( or( - new InDimFilter("j0.unnest", ImmutableSet.of("a", "aa"), null), + in("j0.unnest", ImmutableSet.of("a", "aa")), range("m1", ColumnType.LONG, null, 2L, false, true) ) ) @@ -6523,7 +6523,7 @@ public void testUnnestWithGroupByHavingWithWhereOnAggCol() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - new InDimFilter("j0.unnest", ImmutableSet.of("a", "c"), null) + in("j0.unnest", ImmutableSet.of("a", "c")) )) .setInterval(querySegmentSpec(Filtration.eternity())) .setContext(QUERY_CONTEXT_UNNEST) @@ -6554,7 +6554,7 @@ public void testUnnestWithGroupByHavingWithWhereOnUnnestCol() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - new InDimFilter("j0.unnest", ImmutableSet.of("a", "c"), null) + in("j0.unnest", ImmutableSet.of("a", "c")) )) .setInterval(querySegmentSpec(Filtration.eternity())) .setContext(QUERY_CONTEXT_UNNEST) @@ -6584,13 +6584,7 @@ public void testUnnestWithGroupByWithWhereOnUnnestArrayCol() .setDataSource(UnnestDataSource.create( new TableDataSource(DATA_SOURCE_ARRAYS), expressionVirtualColumn("j0.unnest", "\"arrayLongNulls\"", ColumnType.LONG_ARRAY), - NullHandling.sqlCompatible() - ? or( - equality("j0.unnest", 1L, ColumnType.LONG), - equality("j0.unnest", 2L, ColumnType.LONG), - equality("j0.unnest", 3L, ColumnType.LONG) - ) - : in("j0.unnest", ImmutableList.of("1", "2", "3"), null) + in("j0.unnest", ColumnType.LONG, ImmutableList.of(1L, 2L, 3L)) )) .setInterval(querySegmentSpec(Filtration.eternity())) .setContext(QUERY_CONTEXT_UNNEST) @@ -6621,13 +6615,7 @@ public void testUnnestWithGroupByHavingWithWhereOnUnnestArrayCol() .setDataSource(UnnestDataSource.create( new TableDataSource(DATA_SOURCE_ARRAYS), expressionVirtualColumn("j0.unnest", "\"arrayLongNulls\"", ColumnType.LONG_ARRAY), - NullHandling.sqlCompatible() - ? or( - equality("j0.unnest", 1L, ColumnType.LONG), - equality("j0.unnest", 2L, ColumnType.LONG), - equality("j0.unnest", 3L, ColumnType.LONG) - ) - : in("j0.unnest", ImmutableList.of("1", "2", "3"), null) + in("j0.unnest", ColumnType.LONG, ImmutableList.of(1L, 2L, 3L)) )) .setInterval(querySegmentSpec(Filtration.eternity())) .setContext(QUERY_CONTEXT_UNNEST) @@ -6960,12 +6948,7 @@ public void testUnnestWithTimeFilterInsideSubquery() ) .intervals(querySegmentSpec(Filtration.eternity())) .filters( - NullHandling.sqlCompatible() ? - or( - equality("m1", 1.0f, ColumnType.FLOAT), - equality("m1", 2.0f, ColumnType.FLOAT) - ) : - new InDimFilter("m1", ImmutableList.of("1", "2"), null) + in("m1", ColumnType.FLOAT, ImmutableList.of(1.0f, 2.0f)) ) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java index 5420be74e7f2..e525e5e05d95 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java @@ -65,7 +65,6 @@ import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.ExtractionDimensionSpec; import org.apache.druid.query.extraction.SubstringDimExtractionFn; -import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.query.filter.LikeDimFilter; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.ResultRow; @@ -3999,7 +3998,7 @@ public void testTwoSemiJoinsSimultaneously(Map queryContext) ) .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) - .setDimFilter(in("dim1", ImmutableList.of("abc", "def"), null)) + .setDimFilter(in("dim1", ImmutableList.of("abc", "def"))) .setDimensions(dimensions(new DefaultDimensionSpec("dim1", "d0", ColumnType.STRING))) .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) .setContext(queryContext) @@ -4096,7 +4095,7 @@ public void testSemiAndAntiJoinSimultaneouslyUsingWhereInSubquery(Map(Arrays.asList( - "abc", - "xyz" - )) - )) + .filters(in("channel", Arrays.asList("abc", "xyz"))) .context(QUERY_CONTEXT_DEFAULT) .build() ), diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteUnionQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteUnionQueryTest.java index af0066bba051..2ff53b4b7424 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteUnionQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteUnionQueryTest.java @@ -58,7 +58,7 @@ public void testUnionAllDifferentTablesWithMapping() ) .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) - .setDimFilter(in("dim2", ImmutableList.of("def", "a"), null)) + .setDimFilter(in("dim2", ImmutableList.of("def", "a"))) .setDimensions( new DefaultDimensionSpec("dim1", "d0"), new DefaultDimensionSpec("dim2", "d1") @@ -100,7 +100,7 @@ public void testJoinUnionAllDifferentTablesWithMapping() ) .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) - .setDimFilter(in("dim2", ImmutableList.of("def", "a"), null)) + .setDimFilter(in("dim2", ImmutableList.of("def", "a"))) .setDimensions( new DefaultDimensionSpec("dim1", "d0"), new DefaultDimensionSpec("dim2", "d1") @@ -166,7 +166,7 @@ public void testUnionAllTablesColumnTypeMismatchFloatLong() ) .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) - .setDimFilter(in("dim2", ImmutableList.of("en", "a"), null)) + .setDimFilter(in("dim2", ImmutableList.of("en", "a"))) .setDimensions( new DefaultDimensionSpec("dim1", "d0"), new DefaultDimensionSpec("dim2", "d1") @@ -271,7 +271,7 @@ public void testUnionAllSameTableTwice() ) .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) - .setDimFilter(in("dim2", ImmutableList.of("def", "a"), null)) + .setDimFilter(in("dim2", ImmutableList.of("def", "a"))) .setDimensions( new DefaultDimensionSpec("dim1", "d0"), new DefaultDimensionSpec("dim2", "d1") @@ -313,7 +313,7 @@ public void testUnionAllSameTableTwiceWithSameMapping() ) .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) - .setDimFilter(in("dim2", ImmutableList.of("def", "a"), null)) + .setDimFilter(in("dim2", ImmutableList.of("def", "a"))) .setDimensions( new DefaultDimensionSpec("dim1", "d0"), new DefaultDimensionSpec("dim2", "d1") From da21223b3544e5545fa75bd6fe4b376d8a1da942 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 5 Mar 2024 02:08:21 -0800 Subject: [PATCH 02/15] adjust --- .../java/org/apache/druid/benchmark/query/SqlBenchmark.java | 2 +- .../druid/benchmark/query/SqlNestedDataBenchmark.java | 6 +++--- .../druid/sql/calcite/planner/SqlParameterizerShuttle.java | 2 -- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java index 71fde96c887e..60300d88d0cc 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java @@ -770,4 +770,4 @@ public void planSql(Blackhole blackhole) blackhole.consume(plannerResult); } } -} \ No newline at end of file +} diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java index 69a38434f5e2..0be1f4d52e6a 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java @@ -410,7 +410,7 @@ public void setup() ); } catch (Throwable ex) { -// log.warn(ex, "failed to sanity check"); + log.warn(ex, "failed to sanity check"); } final String sql = QUERIES.get(Integer.parseInt(query)); @@ -443,7 +443,7 @@ public void setup() log.info("Total result row count:" + rowCounter); } catch (Throwable ex) { -// log.warn(ex, "failed to count rows"); + log.warn(ex, "failed to count rows"); } } @@ -470,4 +470,4 @@ public void querySql(Blackhole blackhole) blackhole.consume(lastRow); } } -} \ No newline at end of file +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/SqlParameterizerShuttle.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/SqlParameterizerShuttle.java index 5273a65ccf7a..29da45b085fd 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/SqlParameterizerShuttle.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/SqlParameterizerShuttle.java @@ -135,8 +135,6 @@ private SqlNode createArrayLiteral(Object value, int posn) SqlNode node; if (element instanceof String) { node = SqlLiteral.createCharString((String) element, SqlParserPos.ZERO); - } else if (element instanceof Float || element instanceof Double) { - node = SqlLiteral.createApproxNumeric(element.toString(), SqlParserPos.ZERO); } else if (element instanceof Integer || element instanceof Long) { // No direct way to create a literal from an Integer or Long, have // to parse a string, sadly. From 68feab474a004bb00081efbe452e08a36e6571dc Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 5 Mar 2024 15:26:47 -0800 Subject: [PATCH 03/15] only use in sql compatible mode --- .../druid/query/filter/TypedInFilter.java | 6 +- .../segment/filter/TypedInFilterTests.java | 186 +++++++----------- .../ArrayOverlapOperatorConversion.java | 3 +- .../filtration/ConvertSelectorsToIns.java | 77 +++++++- .../sql/calcite/CalciteArraysQueryTest.java | 4 +- .../sql/calcite/CalciteJoinQueryTest.java | 4 +- .../CalciteLookupFunctionQueryTest.java | 6 +- .../druid/sql/calcite/CalciteQueryTest.java | 9 +- 8 files changed, 176 insertions(+), 119 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java index e8cf0aa0b360..aebaede81633 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java @@ -110,6 +110,11 @@ public TypedInFilter( @JsonProperty("filterTuning") @Nullable FilterTuning filterTuning ) { + if (NullHandling.replaceWithDefault()) { + throw InvalidInput.exception( + "Invalid IN filter, typed in filter only supports SQL compatible null handling mode, set druid.generic.useDefaultValue=false to use this filter" + ); + } this.column = column; if (column == null) { throw InvalidInput.exception("Invalid IN filter, column cannot be null"); @@ -128,7 +133,6 @@ public TypedInFilter( } if (sortedValues != null) { this.unsortedValues = null; - // jackson is a jerk and turns longs into ints sometimes... this.lazyMatchValues = () -> sortedValues; } else { this.unsortedValues = values; diff --git a/processing/src/test/java/org/apache/druid/segment/filter/TypedInFilterTests.java b/processing/src/test/java/org/apache/druid/segment/filter/TypedInFilterTests.java index 4842c2d0717e..de140c40199f 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/TypedInFilterTests.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/TypedInFilterTests.java @@ -43,6 +43,7 @@ import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.AfterClass; import org.junit.Assert; +import org.junit.Assume; import org.junit.Test; import org.junit.experimental.runners.Enclosed; import org.junit.runner.RunWith; @@ -82,14 +83,16 @@ public static void tearDown() throws Exception @Test public void testSingleValueStringColumnWithNulls() { + Assume.assumeTrue(NullHandling.sqlCompatible()); + assertFilterMatches( inFilter("dim1", ColumnType.STRING, Arrays.asList(null, "")), - NullHandling.sqlCompatible() ? ImmutableList.of("a") : ImmutableList.of() + ImmutableList.of("a") ); assertFilterMatches( inFilter("dim1", ColumnType.STRING, Collections.singletonList("")), - NullHandling.sqlCompatible() ? ImmutableList.of("a") : ImmutableList.of() + ImmutableList.of("a") ); assertFilterMatches( @@ -106,92 +109,46 @@ public void testSingleValueStringColumnWithNulls() ImmutableList.of() ); - if (NullHandling.replaceWithDefault()) { - assertFilterMatches( - inFilter("dim1", ColumnType.STRING, Arrays.asList(null, "10", "abc")), - ImmutableList.of("a", "b", "f") - ); - assertFilterMatches( - inFilter("dim1", ColumnType.STRING, Arrays.asList(null, "10", "abc")), - ImmutableList.of("a", "b", "f") - ); - assertFilterMatches( - NotDimFilter.of(inFilter("dim1", ColumnType.STRING, Arrays.asList("-1", "ab", "de"))), - ImmutableList.of("a", "b", "c", "d", "e", "f") - ); - assertFilterMatches( - NotDimFilter.of(inFilter("s0", ColumnType.STRING, Arrays.asList("a", "b"))), - ImmutableList.of("a", "c", "e") - ); - assertFilterMatches( - NotDimFilter.of(inFilter("s0", ColumnType.STRING, Collections.singletonList("noexist"))), - ImmutableList.of("a", "b", "c", "d", "e", "f") - ); - } else { - assertFilterMatches( - inFilter("dim1", ColumnType.STRING, Arrays.asList(null, "10", "abc")), - ImmutableList.of("b", "f") - ); - assertFilterMatches( - NotDimFilter.of(inFilter("dim1", ColumnType.STRING, Arrays.asList("-1", "ab", "de"))), - ImmutableList.of("a", "b", "c", "d", "e", "f") - ); - assertFilterMatches( - NotDimFilter.of(inFilter("s0", ColumnType.STRING, Arrays.asList("a", "b"))), - ImmutableList.of("a", "e") - ); - assertFilterMatches( - NotDimFilter.of(inFilter("s0", ColumnType.STRING, Collections.singletonList("noexist"))), - ImmutableList.of("a", "b", "d", "e", "f") - ); - } + assertFilterMatches( + inFilter("dim1", ColumnType.STRING, Arrays.asList(null, "10", "abc")), + ImmutableList.of("b", "f") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("dim1", ColumnType.STRING, Arrays.asList("-1", "ab", "de"))), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("s0", ColumnType.STRING, Arrays.asList("a", "b"))), + ImmutableList.of("a", "e") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("s0", ColumnType.STRING, Collections.singletonList("noexist"))), + ImmutableList.of("a", "b", "d", "e", "f") + ); } @Test public void testMultiValueStringColumn() { - if (isAutoSchema()) { - return; - } - if (NullHandling.replaceWithDefault()) { - assertFilterMatches( - inFilter("dim2", ColumnType.STRING, Arrays.asList("b", "d")), - ImmutableList.of("a") - ); - assertFilterMatches( - inFilter("dim2", ColumnType.STRING, Collections.singletonList(null)), - ImmutableList.of("b", "c", "f") - ); - assertFilterMatches( - inFilter("dim2", ColumnType.STRING, Arrays.asList(null, "a")), - ImmutableList.of("a", "b", "c", "d", "f") - ); - assertFilterMatches( - inFilter("dim2", ColumnType.STRING, Arrays.asList(null, "b")), - ImmutableList.of("a", "b", "c", "f") - ); - assertFilterMatches( - inFilter("dim2", ColumnType.STRING, Collections.singletonList("")), - ImmutableList.of("b", "c", "f") - ); - } else { - assertFilterMatches( - inFilter("dim2", ColumnType.STRING, Collections.singletonList(null)), - ImmutableList.of("b", "f") - ); - assertFilterMatches( - inFilter("dim2", ColumnType.STRING, Arrays.asList(null, "a")), - ImmutableList.of("a", "b", "d", "f") - ); - assertFilterMatches( - inFilter("dim2", ColumnType.STRING, Arrays.asList(null, "b")), - ImmutableList.of("a", "b", "f") - ); - assertFilterMatches( - inFilter("dim2", ColumnType.STRING, Collections.singletonList("")), - ImmutableList.of("c") - ); - } + Assume.assumeTrue(NullHandling.sqlCompatible()); + Assume.assumeFalse(isAutoSchema()); + + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Collections.singletonList(null)), + ImmutableList.of("b", "f") + ); + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Arrays.asList(null, "a")), + ImmutableList.of("a", "b", "d", "f") + ); + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Arrays.asList(null, "b")), + ImmutableList.of("a", "b", "f") + ); + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Collections.singletonList("")), + ImmutableList.of("c") + ); assertFilterMatches( inFilter("dim2", ColumnType.STRING, Arrays.asList("", null)), @@ -212,6 +169,7 @@ public void testMultiValueStringColumn() @Test public void testMissingColumn() { + Assume.assumeTrue(NullHandling.sqlCompatible()); assertFilterMatches( inFilter("dim3", ColumnType.STRING, Arrays.asList(null, null)), ImmutableList.of("a", "b", "c", "d", "e", "f") @@ -221,21 +179,14 @@ public void testMissingColumn() ImmutableList.of() ); - if (NullHandling.replaceWithDefault()) { - assertFilterMatches( - inFilter("dim3", ColumnType.STRING, Collections.singletonList("")), - ImmutableList.of("a", "b", "c", "d", "e", "f") - ); - } else { - assertFilterMatches( - inFilter("dim3", ColumnType.STRING, Collections.singletonList("")), - ImmutableList.of() - ); - assertFilterMatches( - NotDimFilter.of(inFilter("dim3", ColumnType.STRING, Collections.singletonList(""))), - ImmutableList.of() - ); - } + assertFilterMatches( + inFilter("dim3", ColumnType.STRING, Collections.singletonList("")), + ImmutableList.of() + ); + assertFilterMatches( + NotDimFilter.of(inFilter("dim3", ColumnType.STRING, Collections.singletonList(""))), + ImmutableList.of() + ); assertFilterMatches( inFilter("dim3", ColumnType.STRING, Arrays.asList(null, "a")), @@ -252,7 +203,7 @@ public void testMissingColumn() ); assertFilterMatches( NotDimFilter.of(inFilter("dim3", ColumnType.STRING, Collections.singletonList("a"))), - NullHandling.sqlCompatible() ? ImmutableList.of() : ImmutableList.of("a", "b", "c", "d", "e", "f") + ImmutableList.of() ); assertFilterMatches( @@ -269,26 +220,27 @@ public void testMissingColumn() @Test public void testNumeric() { - assertFilterMatches(inFilter("f0", ColumnType.FLOAT, Collections.singletonList(0f)), ImmutableList.of("a")); - assertFilterMatches(inFilter("d0", ColumnType.DOUBLE, Collections.singletonList(0.0)), ImmutableList.of("a")); + Assume.assumeTrue(NullHandling.sqlCompatible()); + assertFilterMatches( + inFilter("f0", ColumnType.FLOAT, Collections.singletonList(0f)), + ImmutableList.of("a") + ); + assertFilterMatches( + inFilter("d0", ColumnType.DOUBLE, Collections.singletonList(0.0)), + ImmutableList.of("a") + ); assertFilterMatches(inFilter("l0", ColumnType.LONG, Collections.singletonList(0L)), ImmutableList.of("a")); assertFilterMatches( NotDimFilter.of(inFilter("f0", ColumnType.FLOAT, Collections.singletonList(0f))), - NullHandling.sqlCompatible() - ? ImmutableList.of("b", "c", "d", "f") - : ImmutableList.of("b", "c", "d", "e", "f") + ImmutableList.of("b", "c", "d", "f") ); assertFilterMatches( NotDimFilter.of(inFilter("d0", ColumnType.DOUBLE, Collections.singletonList(0.0))), - NullHandling.sqlCompatible() - ? ImmutableList.of("b", "d", "e", "f") - : ImmutableList.of("b", "c", "d", "e", "f") + ImmutableList.of("b", "d", "e", "f") ); assertFilterMatches( NotDimFilter.of(inFilter("l0", ColumnType.LONG, Collections.singletonList(0L))), - NullHandling.sqlCompatible() - ? ImmutableList.of("b", "c", "e", "f") - : ImmutableList.of("b", "c", "d", "e", "f") + ImmutableList.of("b", "c", "e", "f") ); assertFilterMatches(inFilter("f0", ColumnType.FLOAT, Collections.singletonList(null)), ImmutableList.of("e")); assertFilterMatches(inFilter("d0", ColumnType.DOUBLE, Collections.singletonList(null)), ImmutableList.of("c")); @@ -336,6 +288,7 @@ public static class TypedInFilterFilterNonParameterizedTests extends Initialized @Test public void testSerde() throws JsonProcessingException { + Assume.assumeTrue(NullHandling.sqlCompatible()); ObjectMapper mapper = new DefaultObjectMapper(); TypedInFilter filter = inFilter("column", ColumnType.STRING, Arrays.asList("a", "b", "c")); String s = mapper.writeValueAsString(filter); @@ -365,6 +318,7 @@ public void testSerde() throws JsonProcessingException @Test public void testGetCacheKey() { + Assume.assumeTrue(NullHandling.sqlCompatible()); TypedInFilter filterUnsorted = inFilter("column", ColumnType.STRING, Arrays.asList("a", "b", "b", null, "c")); TypedInFilter filterDifferent = inFilter("column", ColumnType.STRING, Arrays.asList("a", "b", "b", "c")); TypedInFilter filterPresorted = new TypedInFilter( @@ -429,6 +383,15 @@ public void testGetCacheKey() @Test public void testInvalidParameters() { + if (NullHandling.replaceWithDefault()) { + Throwable t = Assert.assertThrows( + DruidException.class, + () -> new TypedInFilter(null, ColumnType.STRING, null, null, null) + ); + Assert.assertEquals("Invalid IN filter, typed in filter only supports SQL compatible null handling mode, set druid.generic.useDefaultValue=false to use this filter", t.getMessage()); + } + + Assume.assumeTrue(NullHandling.sqlCompatible()); Throwable t = Assert.assertThrows( DruidException.class, () -> new TypedInFilter(null, ColumnType.STRING, null, null, null) @@ -452,6 +415,7 @@ public void testInvalidParameters() @Test public void testGetDimensionRangeSet() { + Assume.assumeTrue(NullHandling.sqlCompatible()); TypedInFilter filter = inFilter("x", ColumnType.STRING, Arrays.asList(null, "a", "b", "c")); TypedInFilter filter2 = inFilter("x", ColumnType.STRING, Arrays.asList("a", "b", null, "c")); @@ -481,6 +445,7 @@ public void testGetDimensionRangeSet() @Test public void testRequiredColumnRewrite() { + Assume.assumeTrue(NullHandling.sqlCompatible()); TypedInFilter filter = inFilter("dim0", ColumnType.STRING, Arrays.asList("a", "c")); TypedInFilter filter2 = inFilter("dim1", ColumnType.STRING, Arrays.asList("a", "c")); @@ -503,6 +468,7 @@ public void testRequiredColumnRewrite() @Test public void test_equals() { + Assume.assumeTrue(NullHandling.sqlCompatible()); EqualsVerifier.forClass(TypedInFilter.class).usingGetClass() .withNonnullFields( "column", diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java index 3b95d8578823..9eea97241b4c 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java @@ -25,6 +25,7 @@ import org.apache.calcite.sql.type.OperandTypes; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlTypeFamily; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.math.expr.Evals; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; @@ -150,7 +151,7 @@ public DimFilter toDruidFilter( ); } } else { - if (plannerContext.isUseBoundsAndSelectors() || !simpleExtractionExpr.isDirectColumnAccess()) { + if (plannerContext.isUseBoundsAndSelectors() || NullHandling.replaceWithDefault() || !simpleExtractionExpr.isDirectColumnAccess()) { final InDimFilter.ValuesSet valuesSet = InDimFilter.ValuesSet.create(); for (final Object arrayElement : arrayElements) { valuesSet.add(Evals.asString(arrayElement)); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java index da5efeb6a08f..1c29889e040c 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java @@ -20,7 +20,9 @@ package org.apache.druid.sql.calcite.filtration; import com.google.common.collect.Lists; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.Pair; +import org.apache.druid.math.expr.Evals; import org.apache.druid.query.filter.AndDimFilter; import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.EqualityFilter; @@ -29,6 +31,7 @@ import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.filter.TypedInFilter; import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.sql.calcite.expression.SimpleExtraction; import org.apache.druid.sql.calcite.table.RowSignatures; @@ -64,7 +67,11 @@ public DimFilter process(DimFilter filter) children = new CollectSelectors(children, sourceRowSignature).collect(); // Process "equality" filters, which are used when "sqlUseBoundAndSelectors" is false. - children = new CollectEqualities(children).collect(); + if (NullHandling.sqlCompatible()) { + children = new CollectEqualities(children).collect(); + } else { + children = new CollectEqualitiesDefaultValueMode(children).collect(); + } if (!children.equals(((OrDimFilter) filter).getFields())) { return children.size() == 1 ? children.get(0) : new OrDimFilter(children); @@ -199,7 +206,8 @@ protected DimFilter makeAnd(List exprs) /** * Helper for collecting {@link EqualityFilter} into {@link InDimFilter}. */ - private static class CollectEqualities extends CollectComparisons> + private static class CollectEqualities + extends CollectComparisons> { public CollectEqualities(final List orExprs) { @@ -255,4 +263,69 @@ protected DimFilter makeAnd(List exprs) return new AndDimFilter(exprs); } } + + private static class CollectEqualitiesDefaultValueMode + extends CollectComparisons + { + public CollectEqualitiesDefaultValueMode(final List orExprs) + { + super(orExprs); + } + + @Nullable + @Override + protected Pair> getCollectibleComparison(DimFilter filter) + { + return ConvertSelectorsToIns.splitAnd( + filter, + EqualityFilter.class, + + // Prefer extracting nonnull vs null comparisons when ANDed, as nonnull comparisons are more likely to + // find companions in other ORs. + Comparator.comparing(equality -> equality.getMatchValue() == null ? 0 : 1) + ); + } + + @Override + protected InDimFilter.ValuesSet makeCollection() + { + return new InDimFilter.ValuesSet(); + } + + @Nullable + @Override + protected RangeRefKey getCollectionKey(EqualityFilter selector) + { + if (!selector.getMatchValueType().is(ValueType.STRING)) { + // skip non-string equality filters since InDimFilter uses a sorted string set, which is a different sort + // than numbers or other types might use + return null; + } + return RangeRefKey.from(selector); + } + + @Override + protected Set getMatchValues(EqualityFilter selector) + { + return Collections.singleton(Evals.asString(selector.getMatchValue())); + } + + @Nullable + @Override + protected InDimFilter makeCollectedComparison(RangeRefKey rangeRefKey, InDimFilter.ValuesSet values) + { + if (values.size() > 1) { + // skip non-string equality filters since InDimFilter uses a sorted string set, which is a different sort + // than numbers or other types might use + return new InDimFilter(rangeRefKey.getColumn(), values, null, null); + } + return null; + } + + @Override + protected DimFilter makeAnd(List exprs) + { + return new AndDimFilter(exprs); + } + } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 2c34f5050f98..34291b3081c6 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -6948,7 +6948,9 @@ public void testUnnestWithTimeFilterInsideSubquery() ) .intervals(querySegmentSpec(Filtration.eternity())) .filters( - in("m1", ColumnType.FLOAT, ImmutableList.of(1.0f, 2.0f)) + NullHandling.sqlCompatible() + ? in("m1", ColumnType.FLOAT, ImmutableList.of(1.0f, 2.0f)) + : in("m1", ImmutableList.of("1", "2")) ) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java index e525e5e05d95..7c1535f280ac 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java @@ -6388,7 +6388,9 @@ public void testJoinsOverUnnestOverFilterDSOverJoin() "(\"dim2\" == \"j0.dim2\")", JoinType.INNER ), - in("m1", ColumnType.FLOAT, ImmutableList.of(1.0, 4.0)) + NullHandling.sqlCompatible() + ? in("m1", ColumnType.FLOAT, ImmutableList.of(1.0, 4.0)) + : in("m1", ImmutableList.of("1", "4")) ), expressionVirtualColumn("_j0.unnest", "\"dim3\"", ColumnType.STRING), equality("_j0.unnest", "a", ColumnType.STRING) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteLookupFunctionQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteLookupFunctionQueryTest.java index 7e9ac48efd9a..7d73a894c8fe 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteLookupFunctionQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteLookupFunctionQueryTest.java @@ -817,7 +817,11 @@ public void testFilterMvOverlapNullInjective() testQuery( buildFilterTestSql("MV_OVERLAP(lookup(dim1, 'lookyloo121'), ARRAY['xabc', 'x6', 'nonexistent', NULL])"), QUERY_CONTEXT, - buildFilterTestExpectedQuery(in("dim1", Arrays.asList(null, "abc"))), + buildFilterTestExpectedQuery( + NullHandling.sqlCompatible() + ? in("dim1", Arrays.asList(null, "abc")) + : equality("dim1", "abc", ColumnType.STRING) + ), ImmutableList.of(new Object[]{"xabc", 1L}) ); } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 7e76edf47272..f03cf8b46c7c 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -86,6 +86,7 @@ import org.apache.druid.query.filter.EqualityFilter; import org.apache.druid.query.filter.LikeDimFilter; import org.apache.druid.query.filter.NotDimFilter; +import org.apache.druid.query.filter.NullFilter; import org.apache.druid.query.filter.RangeFilter; import org.apache.druid.query.filter.RegexDimFilter; import org.apache.druid.query.groupby.GroupByQuery; @@ -3920,7 +3921,7 @@ public void testCoalesceColumnsFilterWithEquality() ) .setDimFilter(or( in("dim2", ImmutableSet.of("a", "abc")), - and(isNull("dim2"), in("dim1", ImmutableList.of("a", "abc"))) + and(NullFilter.forColumn("dim2"), in("dim1", ImmutableList.of("a", "abc"))) )) .setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.STRING))) .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) @@ -4582,7 +4583,11 @@ public void testCountStarWithLongColumnFiltersOnFloatLiterals() .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .filters(in("cnt", ColumnType.DOUBLE, ImmutableList.of(1.0, 100000001.0))) + .filters( + NullHandling.sqlCompatible() + ? in("cnt", ColumnType.DOUBLE, ImmutableList.of(1.0, 100000001.0)) + : in("cnt", ImmutableList.of("1.0", "100000001.0"), null) + ) .aggregators(aggregators(new CountAggregatorFactory("a0"))) .context(QUERY_CONTEXT_DEFAULT) .build() From f76ec093ef6fda6f94df4617cbdb7f017305f0a7 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 8 Mar 2024 14:14:32 -0800 Subject: [PATCH 04/15] check for sortedness --- .../druid/query/filter/TypedInFilter.java | 85 +++++++++++++++---- .../segment/filter/TypedInFilterTests.java | 10 +-- .../druid/sql/calcite/CalciteQueryTest.java | 12 +-- 3 files changed, 78 insertions(+), 29 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java index aebaede81633..f31ae38d57da 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java @@ -39,6 +39,7 @@ import it.unimi.dsi.fastutil.floats.FloatOpenHashSet; import it.unimi.dsi.fastutil.longs.LongOpenHashSet; import it.unimi.dsi.fastutil.objects.ObjectAVLTreeSet; +import it.unimi.dsi.fastutil.objects.ObjectArrays; import org.apache.druid.common.config.NullHandling; import org.apache.druid.error.InvalidInput; import org.apache.druid.java.util.common.ByteBufferUtils; @@ -65,6 +66,7 @@ import javax.annotation.Nullable; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.List; @@ -72,8 +74,6 @@ import java.util.Objects; import java.util.Set; import java.util.SortedSet; -import java.util.stream.Collectors; -import java.util.stream.Stream; public class TypedInFilter extends AbstractOptimizableDimFilter implements Filter { @@ -135,8 +135,13 @@ public TypedInFilter( this.unsortedValues = null; this.lazyMatchValues = () -> sortedValues; } else { - this.unsortedValues = values; - this.lazyMatchValues = Suppliers.memoize(() -> sortValues(unsortedValues, matchValueType)); + if (checkSorted(values, matchValueType)) { + this.unsortedValues = null; + this.lazyMatchValues = () -> values; + } else { + this.unsortedValues = values; + this.lazyMatchValues = Suppliers.memoize(() -> sortValues(unsortedValues, matchValueType)); + } } if (matchValueType.is(ValueType.STRING)) { this.lazyMatchValueBytes = Suppliers.memoize(() -> { @@ -151,7 +156,7 @@ public TypedInFilter( } this.predicateFactorySupplier = Suppliers.memoize( - () -> new InFilterDruidPredicateFactory(lazyMatchValues.get(), matchValueType) + () -> new PredicateFactory(lazyMatchValues.get(), matchValueType) ); this.cacheKeySupplier = Suppliers.memoize(this::computeCacheKey); } @@ -384,19 +389,63 @@ private byte[] computeCacheKey() .build(); } + private static boolean checkSorted(List unsortedValues, ColumnType matchValueType) + { + final Comparator comparator = matchValueType.getNullableStrategy(); + Object prev = null; + boolean needsCoerceCheck = true; + for (Object o : unsortedValues) { + if (needsCoerceCheck && o != null) { + Object coerced = coerceValue(o, matchValueType); + //noinspection ObjectEquality + if (coerced != o) { + return false; + } + needsCoerceCheck = false; + } + if (prev != null && comparator.compare(prev, o) >= 0) { + return false; + } + prev = o; + } + return true; + } + + @Nullable + private static Object coerceValue(@Nullable Object o, ColumnType matchValueType) + { + if (o == null) { + return o; + } + switch (matchValueType.getType()) { + case STRING: + return DimensionHandlerUtils.convertObjectToString(o); + case LONG: + return DimensionHandlerUtils.convertObjectToLong(o); + case FLOAT: + return DimensionHandlerUtils.convertObjectToFloat(o); + case DOUBLE: + return DimensionHandlerUtils.convertObjectToDouble(o); + default: + throw InvalidInput.exception("Unsupported matchValueType[%s]", matchValueType); + } + } + private static List sortValues(List unsortedValues, ColumnType matchValueType) { - final Stream coerced; - if (matchValueType.is(ValueType.LONG)) { - coerced = unsortedValues.stream().map(DimensionHandlerUtils::convertObjectToLong).distinct(); - } else if (matchValueType.is(ValueType.DOUBLE)) { - coerced = unsortedValues.stream().map(DimensionHandlerUtils::convertObjectToDouble).distinct(); - } else if (matchValueType.is(ValueType.FLOAT)) { - coerced = unsortedValues.stream().map(DimensionHandlerUtils::convertObjectToFloat).distinct(); - } else { - coerced = unsortedValues.stream().distinct(); + final Object[] a = unsortedValues.toArray(); + // check if values need coerced + for (int i = 0; i < a.length; i++) { + Object coerced = coerceValue(a[i], matchValueType); + //noinspection ObjectEquality + if (coerced != null && a[i] == coerced) { + // assume list is all same type objects... + break; + } + a[i] = coerced; } - return coerced.sorted(matchValueType.getNullableStrategy()).collect(Collectors.toList()); + ObjectArrays.quickSort(a, matchValueType.getNullableStrategy()); + return Arrays.asList(a); } /** @@ -593,7 +642,7 @@ public DruidPredicateMatch applyNull() }; } - public static class InFilterDruidPredicateFactory implements DruidPredicateFactory + public static class PredicateFactory implements DruidPredicateFactory { private final ColumnType matchValueType; private final List sortedValues; @@ -602,7 +651,7 @@ public static class InFilterDruidPredicateFactory implements DruidPredicateFacto private final Supplier floatPredicateSupplier; private final Supplier doublePredicateSupplier; - public InFilterDruidPredicateFactory(final List sortedValues, final ColumnType matchValueType) + public PredicateFactory(final List sortedValues, final ColumnType matchValueType) { this.sortedValues = sortedValues; this.matchValueType = matchValueType; @@ -650,7 +699,7 @@ public boolean equals(Object o) if (o == null || getClass() != o.getClass()) { return false; } - InFilterDruidPredicateFactory that = (InFilterDruidPredicateFactory) o; + PredicateFactory that = (PredicateFactory) o; return Objects.equals(matchValueType, that.matchValueType) && Objects.equals(sortedValues, that.sortedValues); } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/TypedInFilterTests.java b/processing/src/test/java/org/apache/druid/segment/filter/TypedInFilterTests.java index de140c40199f..9a0b52b48d60 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/TypedInFilterTests.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/TypedInFilterTests.java @@ -319,8 +319,8 @@ public void testSerde() throws JsonProcessingException public void testGetCacheKey() { Assume.assumeTrue(NullHandling.sqlCompatible()); - TypedInFilter filterUnsorted = inFilter("column", ColumnType.STRING, Arrays.asList("a", "b", "b", null, "c")); - TypedInFilter filterDifferent = inFilter("column", ColumnType.STRING, Arrays.asList("a", "b", "b", "c")); + TypedInFilter filterUnsorted = inFilter("column", ColumnType.STRING, Arrays.asList("a", "b", null, "c")); + TypedInFilter filterDifferent = inFilter("column", ColumnType.STRING, Arrays.asList("a", "c", "b")); TypedInFilter filterPresorted = new TypedInFilter( "column", ColumnType.STRING, @@ -334,7 +334,7 @@ public void testGetCacheKey() Assert.assertArrayEquals(filterPresorted.getCacheKey(), filterUnsorted.getCacheKey()); Assert.assertFalse(Arrays.equals(filterDifferent.getCacheKey(), filterPresorted.getCacheKey())); - filterUnsorted = inFilter("column", ColumnType.LONG, Arrays.asList(2L, -2L, 1L, null, 2L, 3L)); + filterUnsorted = inFilter("column", ColumnType.LONG, Arrays.asList(2L, -2L, 1L, null, 3L)); filterDifferent = inFilter("column", ColumnType.LONG, Arrays.asList(2L, -2L, 1L, 3L)); filterPresorted = new TypedInFilter( "column", @@ -349,7 +349,7 @@ public void testGetCacheKey() Assert.assertArrayEquals(filterPresorted.getCacheKey(), filterUnsorted.getCacheKey()); Assert.assertFalse(Arrays.equals(filterDifferent.getCacheKey(), filterPresorted.getCacheKey())); - filterUnsorted = inFilter("column", ColumnType.DOUBLE, Arrays.asList(2.2, -2.2, 1.1, null, 2.2, 3.3)); + filterUnsorted = inFilter("column", ColumnType.DOUBLE, Arrays.asList(2.2, -2.2, 1.1, null, 3.3)); filterDifferent = inFilter("column", ColumnType.DOUBLE, Arrays.asList(2.2, -2.2, 1.1, 3.3)); filterPresorted = new TypedInFilter( "column", @@ -364,7 +364,7 @@ public void testGetCacheKey() Assert.assertArrayEquals(filterPresorted.getCacheKey(), filterUnsorted.getCacheKey()); Assert.assertFalse(Arrays.equals(filterDifferent.getCacheKey(), filterPresorted.getCacheKey())); - filterUnsorted = inFilter("column", ColumnType.FLOAT, Arrays.asList(2.2f, -2.2f, 1.1f, null, 2.2f, 3.3f)); + filterUnsorted = inFilter("column", ColumnType.FLOAT, Arrays.asList(2.2f, -2.2f, 1.1f, null, 3.3f)); filterDifferent = inFilter("column", ColumnType.FLOAT, Arrays.asList(2.2f, -2.2f, 1.1f, 3.3f)); filterPresorted = new TypedInFilter( "column", diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index eb502c3ad105..c757969207c2 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -5435,7 +5435,7 @@ public void testExpressionFilteringAndGroupingOnStringCastToNumber() public void testInFilter() { testQuery( - "SELECT dim1, COUNT(*) FROM druid.foo WHERE dim1 IN ('abc', 'def', 'ghi') GROUP BY dim1", + "SELECT dim1, COUNT(*) FROM druid.foo WHERE dim1 IN ('ghi', 'abc', 'def') GROUP BY dim1", ImmutableList.of( GroupByQuery.builder() .setDataSource(CalciteTests.DATASOURCE1) @@ -5462,7 +5462,7 @@ public void testInFilter() public void testNotInFilter() { testQuery( - "SELECT dim1, COUNT(*) FROM druid.foo WHERE dim1 NOT IN ('abc', 'def', 'ghi') GROUP BY dim1", + "SELECT dim1, COUNT(*) FROM druid.foo WHERE dim1 NOT IN ('ghi', 'abc', 'def') GROUP BY dim1", ImmutableList.of( GroupByQuery.builder() .setDataSource(CalciteTests.DATASOURCE1) @@ -5491,7 +5491,7 @@ public void testNotInFilter() public void testInIsNotTrueFilter() { testQuery( - "SELECT dim1, COUNT(*) FROM druid.foo WHERE dim1 IN ('abc', 'def', 'ghi') IS NOT TRUE GROUP BY dim1", + "SELECT dim1, COUNT(*) FROM druid.foo WHERE dim1 IN ('ghi', 'abc', 'def') IS NOT TRUE GROUP BY dim1", ImmutableList.of( GroupByQuery.builder() .setDataSource(CalciteTests.DATASOURCE1) @@ -5523,7 +5523,7 @@ public void testInIsNotTrueFilter() public void testNotInOrIsNullFilter() { testQuery( - "SELECT dim1, COUNT(*) FROM druid.foo WHERE dim1 NOT IN ('abc', 'def', 'ghi') OR dim1 IS NULL GROUP BY dim1", + "SELECT dim1, COUNT(*) FROM druid.foo WHERE dim1 NOT IN ('ghi', 'abc', 'def') OR dim1 IS NULL GROUP BY dim1", ImmutableList.of( GroupByQuery.builder() .setDataSource(CalciteTests.DATASOURCE1) @@ -5555,7 +5555,7 @@ public void testNotInOrIsNullFilter() public void testNotInAndLessThanFilter() { testQuery( - "SELECT dim1, COUNT(*) FROM druid.foo WHERE dim1 NOT IN ('abc', 'def', 'ghi') AND dim1 < 'zzz' GROUP BY dim1", + "SELECT dim1, COUNT(*) FROM druid.foo WHERE dim1 NOT IN ('ghi', 'abc', 'def') AND dim1 < 'zzz' GROUP BY dim1", ImmutableList.of( GroupByQuery.builder() .setDataSource(CalciteTests.DATASOURCE1) @@ -5589,7 +5589,7 @@ public void testNotInAndLessThanFilter() public void testInIsNotTrueAndLessThanFilter() { testQuery( - "SELECT dim1, COUNT(*) FROM druid.foo WHERE dim1 IN ('abc', 'def', 'ghi') IS NOT TRUE " + "SELECT dim1, COUNT(*) FROM druid.foo WHERE dim1 IN ('def', 'abc', 'ghi') IS NOT TRUE " + "AND dim1 < 'zzz' GROUP BY dim1", ImmutableList.of( GroupByQuery.builder() From d52430669c159707c4ac4c03ad7f70cb65b64b84 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 8 Mar 2024 19:43:11 -0800 Subject: [PATCH 05/15] fix java 8 --- .../apache/druid/query/filter/TypedInFilter.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java index f31ae38d57da..4d69542da25c 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java @@ -433,19 +433,19 @@ private static Object coerceValue(@Nullable Object o, ColumnType matchValueType) private static List sortValues(List unsortedValues, ColumnType matchValueType) { - final Object[] a = unsortedValues.toArray(); + final Object[] array = unsortedValues.toArray(new Object[0]); // check if values need coerced - for (int i = 0; i < a.length; i++) { - Object coerced = coerceValue(a[i], matchValueType); + for (int i = 0; i < array.length; i++) { + Object coerced = coerceValue(array[i], matchValueType); //noinspection ObjectEquality - if (coerced != null && a[i] == coerced) { + if (coerced != null && array[i] == coerced) { // assume list is all same type objects... break; } - a[i] = coerced; + array[i] = coerced; } - ObjectArrays.quickSort(a, matchValueType.getNullableStrategy()); - return Arrays.asList(a); + ObjectArrays.quickSort(array, matchValueType.getNullableStrategy()); + return Arrays.asList(array); } /** From 0e0f3154d66d052e5dcb60c228d3d651518b53d9 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 12 Mar 2024 17:08:51 -0700 Subject: [PATCH 06/15] dont explode benchmark on exception in setup --- .../org/apache/druid/benchmark/query/SqlBenchmark.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java index 60300d88d0cc..f04f7438d7bf 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java @@ -583,8 +583,8 @@ public void setup() .writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class)) ); } - catch (JsonProcessingException e) { - throw new RuntimeException(e); + catch (JsonProcessingException ignored) { + } try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) { @@ -598,6 +598,9 @@ public void setup() } log.info("Total result row count:" + rowCounter); } + catch (Throwable ignored) { + + } } private StringEncodingStrategy getStringEncodingStrategy() From ac7d4f25aaad5d7560dbefcb1ff3a422777d09e2 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 21 Mar 2024 06:25:18 -0700 Subject: [PATCH 07/15] tweaks and javadocs --- ...ryEncodedStringIndexSupplierBenchmark.java | 14 +- .../druid/query/filter/BoundDimFilter.java | 4 + .../druid/query/filter/InDimFilter.java | 17 +- .../druid/query/filter/SelectorDimFilter.java | 3 +- .../druid/query/filter/TypedInFilter.java | 175 ++++++++++-------- .../druid/segment/filter/BoundFilter.java | 4 + .../druid/segment/filter/SelectorFilter.java | 3 + .../index/IndexedUtf8ValueIndexes.java | 30 +-- .../index/semantic/Utf8ValueSetIndexes.java | 4 +- .../index/semantic/ValueSetIndexes.java | 67 ++++++- .../ScalarDoubleColumnAndIndexSupplier.java | 8 +- .../ScalarLongColumnAndIndexSupplier.java | 8 +- .../druid/segment/filter/BoundFilterTest.java | 4 + .../druid/segment/filter/InFilterTest.java | 4 + .../segment/filter/SelectorFilterTest.java | 4 + 15 files changed, 234 insertions(+), 115 deletions(-) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java index c80a618a84bb..8491a1283296 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java @@ -25,11 +25,11 @@ import org.apache.druid.collections.bitmap.MutableBitmap; import org.apache.druid.collections.bitmap.RoaringBitmapFactory; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.ByteBufferUtils; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; -import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.index.IndexedUtf8ValueIndexes; import org.apache.druid.segment.index.semantic.StringValueSetIndexes; import org.apache.druid.segment.serde.StringUtf8ColumnIndexSupplier; @@ -73,7 +73,7 @@ public static class BenchmarkState { @Nullable private IndexedUtf8ValueIndexes stringValueSetIndex; - private final TreeSet values = new TreeSet<>(); + private final List values = new ArrayList<>(); private static final int START_INT = 10_000_000; // cardinality of the dictionary. it will contain this many ints (as strings, of course), starting at START_INT, @@ -122,14 +122,16 @@ public void setup() Random r = new Random(9001); Collections.shuffle(filterValues); Collections.shuffle(nonFilterValues); - values.clear(); + TreeSet sortedValues = new TreeSet<>(ByteBufferUtils.utf8Comparator()); for (int i = 0; i < filterToDictionaryPercentage * dictionarySize / 100; i++) { if (r.nextInt(100) < selectivityPercentage) { - values.add(ByteBuffer.wrap((filterValues.get(i).toString()).getBytes(StandardCharsets.UTF_8))); + sortedValues.add(ByteBuffer.wrap((filterValues.get(i).toString()).getBytes(StandardCharsets.UTF_8))); } else { - values.add(ByteBuffer.wrap((nonFilterValues.get(i).toString()).getBytes(StandardCharsets.UTF_8))); + sortedValues.add(ByteBuffer.wrap((nonFilterValues.get(i).toString()).getBytes(StandardCharsets.UTF_8))); } } + values.clear(); + values.addAll(sortedValues); } private Iterable intGenerator() @@ -144,6 +146,6 @@ private Iterable intGenerator() @OutputTimeUnit(TimeUnit.MICROSECONDS) public void doValueSetCheck(Blackhole blackhole, BenchmarkState state) { - BitmapColumnIndex bitmapIndex = state.stringValueSetIndex.forSortedValuesUtf8(state.values); + blackhole.consume(state.stringValueSetIndex.forSortedValuesUtf8(state.values)); } } diff --git a/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java index c5e0bee077c4..347940de991f 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java @@ -47,6 +47,10 @@ import java.util.Objects; import java.util.Set; +/** + * @deprecated use {@link RangeFilter} instead + */ +@Deprecated public class BoundDimFilter extends AbstractOptimizableDimFilter implements DimFilter { private final String dimension; diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java index 035fd18eeabb..649a8dff4cfa 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java @@ -31,6 +31,7 @@ import com.google.common.collect.ForwardingSortedSet; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; import com.google.common.collect.Range; import com.google.common.collect.RangeSet; import com.google.common.collect.Sets; @@ -42,7 +43,6 @@ import it.unimi.dsi.fastutil.longs.LongArrayList; import it.unimi.dsi.fastutil.longs.LongOpenHashSet; import org.apache.druid.common.config.NullHandling; -import org.apache.druid.java.util.common.ByteBufferUtils; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.guava.Comparators; @@ -69,12 +69,21 @@ import java.util.Collection; import java.util.Collections; import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; +/** + * Approximately like the SQL 'IN' filter, with the main difference being that this will match NULL values if contained + * in the values list instead of ignoring them. + *

+ * This filter specifies all match values as a sorted string set; matching against other column types must incur the + * cost of converting values to check for matches. For the most part, {@link TypedInFilter} should be used instead. + */ +@Deprecated public class InDimFilter extends AbstractOptimizableDimFilter implements Filter { /** @@ -84,7 +93,7 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements Filter */ private final ValuesSet values; // Computed eagerly, not lazily, because lazy computations would block all processing threads for a given query. - private final SortedSet valuesUtf8; + private final List valuesUtf8; private final String dimension; @Nullable private final ExtractionFn extractionFn; @@ -806,9 +815,9 @@ public static ValuesSet copyOf(final Collection values) return copyOf(values.iterator()); } - public SortedSet toUtf8() + public List toUtf8() { - final TreeSet valuesUtf8 = new TreeSet<>(ByteBufferUtils.utf8Comparator()); + final List valuesUtf8 = Lists.newArrayListWithCapacity(values.size()); for (final String value : values) { if (value == null) { diff --git a/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java index 4489a9fa601d..b8c01a215b72 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java @@ -39,8 +39,9 @@ import java.util.Set; /** - * + * @deprecated use {@link EqualityFilter} or {@link NullFilter} instead. */ +@Deprecated public class SelectorDimFilter extends AbstractOptimizableDimFilter implements DimFilter { private final String dimension; diff --git a/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java index 4d69542da25c..1f33b84a377e 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java @@ -29,6 +29,7 @@ import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; import com.google.common.collect.Range; import com.google.common.collect.RangeSet; import com.google.common.collect.Sets; @@ -38,11 +39,9 @@ import it.unimi.dsi.fastutil.doubles.DoubleOpenHashSet; import it.unimi.dsi.fastutil.floats.FloatOpenHashSet; import it.unimi.dsi.fastutil.longs.LongOpenHashSet; -import it.unimi.dsi.fastutil.objects.ObjectAVLTreeSet; -import it.unimi.dsi.fastutil.objects.ObjectArrays; +import it.unimi.dsi.fastutil.objects.ObjectRBTreeSet; import org.apache.druid.common.config.NullHandling; import org.apache.druid.error.InvalidInput; -import org.apache.druid.java.util.common.ByteBufferUtils; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.math.expr.Evals; @@ -66,28 +65,58 @@ import javax.annotation.Nullable; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; -import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; -import java.util.SortedSet; +/** + * Approximately like the SQL 'IN' filter, with the main difference being that this will match NULL values if contained + * in the values list instead of ignoring them. + *

+ * This is a typed version of {@link InDimFilter}, which allows the match values to exist in their native type and + * sorted in their type native order for better performance matching against all column types. + */ public class TypedInFilter extends AbstractOptimizableDimFilter implements Filter { + /** + * Column to match {@link #sortedMatchValues} or {@link #sortedUtf8MatchValueBytes} against. + */ private final String column; + + /** + * Type of values contained in {@link #sortedMatchValues}. This might be the same or different than the + * {@link ColumnType} of {@link #column}, but is encouraged to be the same there are several optimizations available + * if they match. + */ private final ColumnType matchValueType; + + /** + * Unsorted values. This will be null if the values are found to be sorted, or have been already sorted "upstream". + * Otherwise, this set of values will be lazily computed into {@link #sortedMatchValues} as needed, e.g. for + * JSON serialization, cache key building, building a hashcode, or checking equality. + */ @Nullable private final List unsortedValues; - private final Supplier> lazyMatchValues; + + /** + * Supplier for list of values sorted by {@link #matchValueType}. This is lazily computed if + * {@link #unsortedValues} is not null and previously sorted. + */ + private final Supplier> sortedMatchValues; + + /** + * Supplier for list of utf8 byte values sorted by {@link #matchValueType}. If {@link #sortedMatchValues} was supplied + * directly instead of lazily computed, and {@link #matchValueType} is {@link ColumnType#STRING}, the backing list + * will be eagerly computed. If {@link #sortedMatchValues} is lazily computed, this value will be null. + */ @Nullable - private final Supplier> lazyMatchValueBytes; + private final Supplier> sortedUtf8MatchValueBytes; @Nullable private final FilterTuning filterTuning; private final Supplier predicateFactorySupplier; - @JsonIgnore private final Supplier cacheKeySupplier; @@ -95,10 +124,12 @@ public class TypedInFilter extends AbstractOptimizableDimFilter implements Filte * Creates a new filter. * * @param column column to search - * @param values set of values to match. This collection may be reused to avoid copying a big collection. - * Therefore, callers should not modify the collection after it is passed to this - * constructor. - * @param matchValueType type of values contained in set + * @param values set of values to match, may or may not be sorted. + * @param sortedValues set of values to match this is sorted in matchValueType order. These values absolutely must + * be sorted in the specified order for proper operation. This value is computed from values to + * be used 'downstream' to avoid repeating the work of sorting and checking for sortedness over + * and over. + * @param matchValueType type of values contained in values/sortedValues * @param filterTuning optional tuning */ @JsonCreator @@ -110,11 +141,6 @@ public TypedInFilter( @JsonProperty("filterTuning") @Nullable FilterTuning filterTuning ) { - if (NullHandling.replaceWithDefault()) { - throw InvalidInput.exception( - "Invalid IN filter, typed in filter only supports SQL compatible null handling mode, set druid.generic.useDefaultValue=false to use this filter" - ); - } this.column = column; if (column == null) { throw InvalidInput.exception("Invalid IN filter, column cannot be null"); @@ -125,7 +151,7 @@ public TypedInFilter( throw InvalidInput.exception("Invalid IN filter on column [%s], matchValueType cannot be null", column); } // one of sorted or not sorted - if (values == null && sortedValues == null) { + if ((values == null && sortedValues == null) || (values != null && sortedValues != null)) { throw InvalidInput.exception( "Invalid IN filter on column [%s], exactly one of values or sortedValues must be non-null", column @@ -133,30 +159,29 @@ public TypedInFilter( } if (sortedValues != null) { this.unsortedValues = null; - this.lazyMatchValues = () -> sortedValues; + this.sortedMatchValues = () -> sortedValues; + if (matchValueType.is(ValueType.STRING)) { + final List matchValueBytes = Lists.newArrayListWithCapacity(sortedValues.size()); + for (Object s : sortedMatchValues.get()) { + matchValueBytes.add(StringUtils.toUtf8ByteBuffer(Evals.asString(s))); + } + this.sortedUtf8MatchValueBytes = () -> matchValueBytes; + } else { + this.sortedUtf8MatchValueBytes = null; + } } else { if (checkSorted(values, matchValueType)) { this.unsortedValues = null; - this.lazyMatchValues = () -> values; + this.sortedMatchValues = () -> values; } else { this.unsortedValues = values; - this.lazyMatchValues = Suppliers.memoize(() -> sortValues(unsortedValues, matchValueType)); + this.sortedMatchValues = Suppliers.memoize(() -> sortValues(unsortedValues, matchValueType)); } - } - if (matchValueType.is(ValueType.STRING)) { - this.lazyMatchValueBytes = Suppliers.memoize(() -> { - final SortedSet matchValueBytes = new ObjectAVLTreeSet<>(ByteBufferUtils.utf8Comparator()); - for (Object s : lazyMatchValues.get()) { - matchValueBytes.add(StringUtils.toUtf8ByteBuffer(Evals.asString(s))); - } - return matchValueBytes; - }); - } else { - this.lazyMatchValueBytes = null; + this.sortedUtf8MatchValueBytes = null; } this.predicateFactorySupplier = Suppliers.memoize( - () -> new PredicateFactory(lazyMatchValues.get(), matchValueType) + () -> new PredicateFactory(sortedMatchValues.get(), matchValueType) ); this.cacheKeySupplier = Suppliers.memoize(this::computeCacheKey); } @@ -170,7 +195,7 @@ public String getColumn() @JsonProperty public List getSortedValues() { - return lazyMatchValues.get(); + return sortedMatchValues.get(); } @JsonProperty @@ -196,7 +221,7 @@ public byte[] getCacheKey() @Override public DimFilter optimize(final boolean mayIncludeUnknown) { - final List matchValues = lazyMatchValues.get(); + final List matchValues = this.sortedMatchValues.get(); if (matchValues.isEmpty()) { return FalseDimFilter.instance(); } else if (matchValues.size() == 1) { @@ -216,6 +241,11 @@ public DimFilter optimize(final boolean mayIncludeUnknown) @Override public Filter toFilter() { + if (NullHandling.replaceWithDefault()) { + throw InvalidInput.exception( + "Invalid IN filter, typed in filter only supports SQL compatible null handling mode, set druid.generic.useDefaultValue=false to use this filter" + ); + } return this; } @@ -227,12 +257,9 @@ public RangeSet getDimensionRangeSet(String dimension) return null; } RangeSet retSet = TreeRangeSet.create(); - for (Object value : lazyMatchValues.get()) { - String valueEquivalent = NullHandling.nullToEmptyIfNeeded(Evals.asString(value)); + for (Object value : sortedMatchValues.get()) { + String valueEquivalent = Evals.asString(value); if (valueEquivalent == null) { - // Case when SQL compatible null handling is enabled - // Range.singleton(null) is invalid, so use the fact that - // only null values are less than empty string. retSet.add(Range.lessThan("")); } else { retSet.add(Range.singleton(valueEquivalent)); @@ -262,16 +289,16 @@ public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) return Filters.makeMissingColumnNullIndex(match, selector); } - if (lazyMatchValueBytes != null) { + if (sortedUtf8MatchValueBytes != null) { final Utf8ValueSetIndexes utf8ValueSetIndexes = indexSupplier.as(Utf8ValueSetIndexes.class); if (utf8ValueSetIndexes != null) { - return utf8ValueSetIndexes.forSortedValuesUtf8(lazyMatchValueBytes.get()); + return utf8ValueSetIndexes.forSortedValuesUtf8(sortedUtf8MatchValueBytes.get()); } } final ValueSetIndexes valueSetIndexes = indexSupplier.as(ValueSetIndexes.class); if (valueSetIndexes != null) { - return valueSetIndexes.forSortedValues(lazyMatchValues.get(), matchValueType); + return valueSetIndexes.forSortedValues(sortedMatchValues.get(), matchValueType); } return Filters.makePredicateIndex( @@ -324,7 +351,7 @@ public Filter rewriteRequiredColumns(Map columnRewrites) rewriteDimensionTo, matchValueType, null, - lazyMatchValues.get(), + sortedMatchValues.get(), filterTuning ); } @@ -336,7 +363,7 @@ public String toString() final DimFilter.DimFilterToStringBuilder builder = new DimFilter.DimFilterToStringBuilder(); return builder.appendDimension(column, null) .append(" IN (") - .append(Joiner.on(", ").join(Iterables.transform(lazyMatchValues.get(), String::valueOf))) + .append(Joiner.on(", ").join(Iterables.transform(sortedMatchValues.get(), String::valueOf))) .append(")") .append(" (" + matchValueType + ")") .appendFilterTuning(filterTuning) @@ -355,21 +382,21 @@ public boolean equals(Object o) TypedInFilter that = (TypedInFilter) o; return column.equals(that.column) && Objects.equals(matchValueType, that.matchValueType) && - compareValues(lazyMatchValues.get(), that.lazyMatchValues.get(), matchValueType) && + compareValues(sortedMatchValues.get(), that.sortedMatchValues.get(), matchValueType) && Objects.equals(filterTuning, that.filterTuning); } @Override public int hashCode() { - return Objects.hash(lazyMatchValues.get(), column, matchValueType, filterTuning); + return Objects.hash(sortedMatchValues.get(), column, matchValueType, filterTuning); } private byte[] computeCacheKey() { // Hash all values, in sorted order, as their length followed by their content. final Hasher hasher = Hashing.sha256().newHasher(); - for (Object v : lazyMatchValues.get()) { + for (Object v : sortedMatchValues.get()) { if (v == null) { // Encode null as length -1, no content. hasher.putInt(-1); @@ -393,15 +420,13 @@ private static boolean checkSorted(List unsortedValues, ColumnType matchValue { final Comparator comparator = matchValueType.getNullableStrategy(); Object prev = null; - boolean needsCoerceCheck = true; for (Object o : unsortedValues) { - if (needsCoerceCheck && o != null) { + if (o != null) { Object coerced = coerceValue(o, matchValueType); //noinspection ObjectEquality if (coerced != o) { return false; } - needsCoerceCheck = false; } if (prev != null && comparator.compare(prev, o) >= 0) { return false; @@ -415,7 +440,7 @@ private static boolean checkSorted(List unsortedValues, ColumnType matchValue private static Object coerceValue(@Nullable Object o, ColumnType matchValueType) { if (o == null) { - return o; + return null; } switch (matchValueType.getType()) { case STRING: @@ -433,19 +458,13 @@ private static Object coerceValue(@Nullable Object o, ColumnType matchValueType) private static List sortValues(List unsortedValues, ColumnType matchValueType) { - final Object[] array = unsortedValues.toArray(new Object[0]); - // check if values need coerced - for (int i = 0; i < array.length; i++) { - Object coerced = coerceValue(array[i], matchValueType); - //noinspection ObjectEquality - if (coerced != null && array[i] == coerced) { - // assume list is all same type objects... - break; - } - array[i] = coerced; + final ObjectRBTreeSet sortedSet = new ObjectRBTreeSet<>(matchValueType.getNullableStrategy()); + for (Object value : unsortedValues) { + sortedSet.add(coerceValue(value, matchValueType)); } - ObjectArrays.quickSort(array, matchValueType.getNullableStrategy()); - return Arrays.asList(array); + final List sortedList = Lists.newArrayListWithCapacity(unsortedValues.size()); + sortedList.addAll(sortedSet); + return sortedList; } /** @@ -454,7 +473,7 @@ private static List sortValues(List unsortedValues, ColumnType matchValueT */ private static boolean compareValues(List o1, List o2, ColumnType matchValueType) { - final NullableTypeStrategy comparator = matchValueType.getNullableStrategy(); + final NullableTypeStrategy comparator = matchValueType.getNullableStrategy(); //noinspection ObjectEquality if (o1 == o2) { return true; @@ -465,25 +484,29 @@ private static boolean compareValues(List o1, List o2, ColumnType matchVal if (o2 == null) { return false; } - final int iter = Math.min(o1.size(), o2.size()); - for (int i = 0; i < iter; i++) { + final int size1 = o1.size(); + final int size2 = o2.size(); + if (size1 != size2) { + return false; + } + for (int i = 0; i < size1; i++) { final int cmp = comparator.compare(o1.get(i), o2.get(i)); if (cmp == 0) { continue; } return false; } - return o1.size() == o2.size(); + return true; } private static DruidObjectPredicate createStringPredicate( - final List sortedValues, + final List sortedValues, final ColumnType matchValueType ) { Preconditions.checkNotNull(sortedValues, "values"); final boolean containsNull = sortedValues.get(0) == null; - final Comparator comparator = matchValueType.getNullableStrategy(); + final Comparator comparator = matchValueType.getNullableStrategy(); if (matchValueType.is(ValueType.STRING)) { return value -> { if (value == null) { @@ -506,11 +529,11 @@ private static DruidObjectPredicate createStringPredicate( }; } - private static DruidLongPredicate createLongPredicate(final List sortedValues, ColumnType matchValueType) + private static DruidLongPredicate createLongPredicate(final List sortedValues, ColumnType matchValueType) { boolean matchNulls = sortedValues.get(0) == null; if (matchValueType.is(ValueType.LONG)) { - final Comparator comparator = matchValueType.getNullableStrategy(); + final Comparator comparator = matchValueType.getNullableStrategy(); return new DruidLongPredicate() { @Override @@ -551,11 +574,11 @@ public DruidPredicateMatch applyNull() }; } - private static DruidFloatPredicate createFloatPredicate(final List sortedValues, ColumnType matchValueType) + private static DruidFloatPredicate createFloatPredicate(final List sortedValues, ColumnType matchValueType) { boolean matchNulls = sortedValues.get(0) == null; if (matchValueType.is(ValueType.FLOAT)) { - final Comparator comparator = matchValueType.getNullableStrategy(); + final Comparator comparator = matchValueType.getNullableStrategy(); return new DruidFloatPredicate() { @Override @@ -596,11 +619,11 @@ public DruidPredicateMatch applyNull() }; } - private static DruidDoublePredicate createDoublePredicate(final List sortedValues, ColumnType matchValueType) + private static DruidDoublePredicate createDoublePredicate(final List sortedValues, ColumnType matchValueType) { boolean matchNulls = sortedValues.get(0) == null; if (matchValueType.is(ValueType.DOUBLE)) { - final Comparator comparator = matchValueType.getNullableStrategy(); + final Comparator comparator = matchValueType.getNullableStrategy(); return new DruidDoublePredicate() { @Override diff --git a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java index 4fa1a4802489..cf6e499c67d8 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java @@ -58,6 +58,10 @@ import java.util.Objects; import java.util.Set; +/** + * @deprecated use {@link org.apache.druid.query.filter.RangeFilter} instead + */ +@Deprecated public class BoundFilter implements Filter { private final BoundDimFilter boundDimFilter; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java index 9a7e1c3abe79..9854665fd10d 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java @@ -51,7 +51,10 @@ * or {@code dimension IS NULL} when the value is null. * In default null handling mode, this filter is equivalent to {@code dimension = value} or * {@code dimension = ''} when the value is null. + * @deprecated use {@link org.apache.druid.query.filter.EqualityFilter} or + * {@link org.apache.druid.query.filter.NullFilter} instead. */ +@Deprecated public class SelectorFilter implements Filter { private final String dimension; diff --git a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java index c175640b99fa..35b0d78ba47a 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java +++ b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java @@ -133,18 +133,24 @@ public BitmapColumnIndex forSortedValues(SortedSet values) @SuppressFBWarnings("NP_NONNULL_PARAM_VIOLATION") @Override - public BitmapColumnIndex forSortedValuesUtf8(SortedSet valuesUtf8) + public BitmapColumnIndex forSortedValuesUtf8(List sortedValuesUtf8) { - final SortedSet tailSet; + final boolean matchNull = sortedValuesUtf8.get(0) == null; + final List tailSet; - if (valuesUtf8.size() >= SIZE_WORTH_CHECKING_MIN) { + if (sortedValuesUtf8.size() >= SIZE_WORTH_CHECKING_MIN) { final ByteBuffer minValueInColumn = dictionary.get(0); - tailSet = valuesUtf8.tailSet(minValueInColumn); + final int position = Collections.binarySearch( + sortedValuesUtf8, + minValueInColumn, + ByteBufferUtils.utf8Comparator() + ); + tailSet = sortedValuesUtf8.subList(position, sortedValuesUtf8.size()); } else { - tailSet = valuesUtf8; + tailSet = sortedValuesUtf8; } - return getBitmapColumnIndexForSortedIterableUtf8(tailSet, tailSet.size(), valuesUtf8.contains(null)); + return getBitmapColumnIndexForSortedIterableUtf8(tailSet, tailSet.size(), matchNull); } private ImmutableBitmap getBitmap(int idx) @@ -168,7 +174,7 @@ private BitmapColumnIndex getBitmapColumnIndexForSortedIterableUtf8( { // for large number of in-filter values in comparison to the dictionary size, use the sorted merge algorithm. if (size > SORTED_MERGE_RATIO_THRESHOLD * dictionary.size()) { - return ValueSetIndexes.getIndexFromSortedIteratorSortedMerged( + return ValueSetIndexes.buildBitmapColumnIndexFromSortedIteratorScan( bitmapFactory, COMPARATOR, valuesUtf8, @@ -185,7 +191,7 @@ private BitmapColumnIndex getBitmapColumnIndexForSortedIterableUtf8( // if the size of in-filter values is less than the threshold percentage of dictionary size, then use binary search // based lookup per value. The algorithm works well for smaller number of values. - return ValueSetIndexes.getIndexFromSortedIterator( + return ValueSetIndexes.buildBitmapColumnIndexFromSortedIteratorBinarySearch( bitmapFactory, valuesUtf8, dictionary, @@ -225,8 +231,8 @@ public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSign } else { tailSet = baseSet; } - if (tailSet.size() > ValueSetIndexes.SORTED_MERGE_RATIO_THRESHOLD * dictionary.size()) { - return ValueSetIndexes.getIndexFromSortedIteratorSortedMerged( + if (tailSet.size() > ValueSetIndexes.SORTED_SCAN_RATIO_THRESHOLD * dictionary.size()) { + return ValueSetIndexes.buildBitmapColumnIndexFromSortedIteratorScan( bitmapFactory, ByteBufferUtils.utf8Comparator(), Iterables.transform(tailSet, StringUtils::toUtf8ByteBuffer), @@ -236,7 +242,7 @@ public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSign ); } // fall through to value iteration - return ValueSetIndexes.getIndexFromSortedIterator( + return ValueSetIndexes.buildBitmapColumnIndexFromSortedIteratorBinarySearch( bitmapFactory, Iterables.transform(tailSet, StringUtils::toUtf8ByteBuffer), dictionary, @@ -244,7 +250,7 @@ public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSign unknownsIndex ); } else { - return ValueSetIndexes.getIndexFromIterator( + return ValueSetIndexes.buildBitmapColumnIndexFromIteratorBinarySearch( bitmapFactory, Iterables.transform( sortedValues, diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/Utf8ValueSetIndexes.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/Utf8ValueSetIndexes.java index e4cc9894ca8b..3e880ca42dcb 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/Utf8ValueSetIndexes.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/Utf8ValueSetIndexes.java @@ -23,7 +23,7 @@ import org.apache.druid.segment.index.BitmapColumnIndex; import java.nio.ByteBuffer; -import java.util.SortedSet; +import java.util.List; public interface Utf8ValueSetIndexes { @@ -32,5 +32,5 @@ public interface Utf8ValueSetIndexes * underlying column). The set must be sorted using * {@link org.apache.druid.java.util.common.ByteBufferUtils#utf8Comparator()}. */ - BitmapColumnIndex forSortedValuesUtf8(SortedSet valuesUtf8); + BitmapColumnIndex forSortedValuesUtf8(List sortedValuesUtf8); } diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueSetIndexes.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueSetIndexes.java index ed6df2597a28..0b0d42c27f13 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueSetIndexes.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueSetIndexes.java @@ -39,7 +39,18 @@ public interface ValueSetIndexes { - double SORTED_MERGE_RATIO_THRESHOLD = 0.12D; + /** + * threshold of sorted match value iterator size compared to dictionary size to use + * {@link #buildBitmapColumnIndexFromSortedIteratorScan} instead of + * {@link #buildBitmapColumnIndexFromSortedIteratorBinarySearch}. + */ + double SORTED_SCAN_RATIO_THRESHOLD = 0.12D; + + /** + * minimum sorted match value iterator size to trim the initial values from the iterator to seek to the start of the + * value dictionary when using {@link #buildBitmapColumnIndexFromSortedIteratorScan} or + * {@link #buildBitmapColumnIndexFromSortedIteratorBinarySearch}. + */ int SIZE_WORTH_CHECKING_MIN = 8; /** @@ -55,7 +66,23 @@ public interface ValueSetIndexes @Nullable BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSignature matchValueType); - static BitmapColumnIndex getIndexFromSortedIteratorSortedMerged( + + /** + * Helper method for implementing {@link #forSortedValues} for a value set that is sorted the same as the column + * dictionary. + *

+ * Builds a {@link BitmapColumnIndex} from an {@link Iterable} that is sorted the same as the columns + * {@link Indexed} value dictionary. Uses a strategy that does zipping similar to the merge step of a sort-merge, + * where we step forward on both the iterator and the dictionary to find matches to build a + * {@link Iterable}. + *

+ * If sorted match value iterator size is greater than (dictionary size * {@link #SORTED_SCAN_RATIO_THRESHOLD}), + * consider using this method instead of {@link #buildBitmapColumnIndexFromSortedIteratorBinarySearch}. + *

+ * If the values in the iterator are NOT sorted the same as the dictionary, do NOT use this method, use + * {@link #buildBitmapColumnIndexFromIteratorBinarySearch} instead. + */ + static BitmapColumnIndex buildBitmapColumnIndexFromSortedIteratorScan( BitmapFactory bitmapFactory, Comparator comparator, Iterable values, @@ -122,7 +149,22 @@ private void findNext() }; } - static BitmapColumnIndex getIndexFromSortedIterator( + /** + * Helper method for implementing {@link #forSortedValues} for a value set that is sorted the same as the column + * dictionary. + *

+ * Builds a {@link BitmapColumnIndex} from an {@link Iterable} that is sorted the same as the columns + * {@link Indexed} value dictionary. This algorithm iterates the values to match and does a binary search for + * matching values using {@link Indexed#indexOf(Object)} to build a {@link Iterable} short-circuiting + * the iteration if we reach the end of the {@link Indexed} before the values to match are exhausted. + *

+ * If sorted match value iterator size is less than (dictionary size * {@link #SORTED_SCAN_RATIO_THRESHOLD}), + * consider using this method instead of {@link #buildBitmapColumnIndexFromSortedIteratorScan}. + *

+ * If the values in the iterator are not sorted the same as the dictionary, do not use this method, use + * {@link #buildBitmapColumnIndexFromIteratorBinarySearch} instead. + */ + static BitmapColumnIndex buildBitmapColumnIndexFromSortedIteratorBinarySearch( BitmapFactory bitmapFactory, Iterable values, Indexed dictionary, @@ -172,8 +214,8 @@ private void findNext() if (next == -dictionarySize - 1) { // nextValue is past the end of the dictionary so we can break early - // Note: we can rely on indexOf returning (-(insertion point) - 1), because of the earlier check - // for Indexed.isSorted(), which guarantees this behavior + // Note: we can rely on indexOf returning (-(insertion point) - 1), because the Indexed + // is sorted, which guarantees this behavior break; } } @@ -183,7 +225,20 @@ private void findNext() }; } - static BitmapColumnIndex getIndexFromIterator( + /** + * Helper method for implementing {@link #forSortedValues} for a value set that is NOT sorted the same as the column + * dictionary. + *

+ * Builds a {@link BitmapColumnIndex} from an {@link Iterable} that is NOT sorted the same as the columns + * {@link Indexed} value dictionary. This algorithm iterates the values to match and does a binary search for + * matching values using {@link Indexed#indexOf(Object)} to build a {@link Iterable} until the match + * values iterator is exhausted. + *

+ * If values of the iterator are sorted the same as the dictionary, use + * {@link #buildBitmapColumnIndexFromSortedIteratorScan} or + * {@link #buildBitmapColumnIndexFromSortedIteratorBinarySearch} instead. + */ + static BitmapColumnIndex buildBitmapColumnIndexFromIteratorBinarySearch( BitmapFactory bitmapFactory, Iterable values, Indexed dictionary, diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java index 6e73ac530813..5ea3c9194eb3 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java @@ -298,8 +298,8 @@ public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSign } else { tailSet = baseSet; } - if (tailSet.size() > ValueSetIndexes.SORTED_MERGE_RATIO_THRESHOLD * dictionary.size()) { - return ValueSetIndexes.getIndexFromSortedIteratorSortedMerged( + if (tailSet.size() > ValueSetIndexes.SORTED_SCAN_RATIO_THRESHOLD * dictionary.size()) { + return ValueSetIndexes.buildBitmapColumnIndexFromSortedIteratorScan( bitmapFactory, ColumnType.DOUBLE.getNullableStrategy(), tailSet, @@ -309,7 +309,7 @@ public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSign ); } // fall through to sorted value iteration - return ValueSetIndexes.getIndexFromSortedIterator( + return ValueSetIndexes.buildBitmapColumnIndexFromSortedIteratorBinarySearch( bitmapFactory, tailSet, dictionary, @@ -318,7 +318,7 @@ public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSign ); } else { // values in set are not sorted in double order, transform them on the fly and iterate them all - return ValueSetIndexes.getIndexFromIterator( + return ValueSetIndexes.buildBitmapColumnIndexFromIteratorBinarySearch( bitmapFactory, Iterables.transform(sortedValues, DimensionHandlerUtils::convertObjectToDouble), dictionary, diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java index f3cb3f7d73f8..0c1bc1a05990 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java @@ -296,8 +296,8 @@ public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSign } else { tailSet = baseSet; } - if (tailSet.size() > ValueSetIndexes.SORTED_MERGE_RATIO_THRESHOLD * dictionary.size()) { - return ValueSetIndexes.getIndexFromSortedIteratorSortedMerged( + if (tailSet.size() > ValueSetIndexes.SORTED_SCAN_RATIO_THRESHOLD * dictionary.size()) { + return ValueSetIndexes.buildBitmapColumnIndexFromSortedIteratorScan( bitmapFactory, ColumnType.LONG.getNullableStrategy(), tailSet, @@ -307,7 +307,7 @@ public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSign ); } // fall through to sort value iteration - return ValueSetIndexes.getIndexFromSortedIterator( + return ValueSetIndexes.buildBitmapColumnIndexFromSortedIteratorBinarySearch( bitmapFactory, tailSet, dictionary, @@ -316,7 +316,7 @@ public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSign ); } else { // values in set are not sorted in double order, transform them on the fly and iterate them all - return ValueSetIndexes.getIndexFromIterator( + return ValueSetIndexes.buildBitmapColumnIndexFromIteratorBinarySearch( bitmapFactory, Iterables.transform( sortedValues, diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java index 3916954906c2..6cba418a3117 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java @@ -46,6 +46,10 @@ import java.io.Closeable; import java.util.List; +/** + * Classic {@link BoundFilter} test. Consider adding tests to {@link RangeFilterTests} in addition to, or instead of + * here. + */ @RunWith(Parameterized.class) public class BoundFilterTest extends BaseFilterTest { diff --git a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java index aeb408b7961c..c56d6c0e491e 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java @@ -53,6 +53,10 @@ import java.util.List; import java.util.Map; +/** + * Classic {@link InDimFilter} test. Consider adding tests to {@link TypedInFilterTests} in addition to, or instead of + * here. + */ @RunWith(Parameterized.class) public class InFilterTest extends BaseFilterTest { diff --git a/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java index bfe0248d9295..c51b94fc59a5 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java @@ -45,6 +45,10 @@ import java.util.Arrays; import java.util.Map; +/** + * Classic {@link SelectorFilter} test. Consider adding tests to {@link EqualityFilterTests} in addition to, or + * instead of here. + */ @RunWith(Parameterized.class) public class SelectorFilterTest extends BaseFilterTest { From 1fdc549f8a68fe1601a70fe256e629a2d939dc64 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 21 Mar 2024 06:31:37 -0700 Subject: [PATCH 08/15] fix test --- .../org/apache/druid/segment/filter/TypedInFilterTests.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/segment/filter/TypedInFilterTests.java b/processing/src/test/java/org/apache/druid/segment/filter/TypedInFilterTests.java index 9a0b52b48d60..bd05f1904661 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/TypedInFilterTests.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/TypedInFilterTests.java @@ -474,7 +474,7 @@ public void test_equals() "column", "matchValueType", "unsortedValues", - "lazyMatchValues", + "sortedMatchValues", "optimizedFilterIncludeUnknown", "optimizedFilterNoIncludeUnknown" ) @@ -486,7 +486,7 @@ public void test_equals() ) .withIgnoredFields( "unsortedValues", - "lazyMatchValueBytes", + "sortedUtf8MatchValueBytes", "predicateFactorySupplier", "cacheKeySupplier", "optimizedFilterIncludeUnknown", From 7e280f1a89d6f9b0144c4cd53868781b24ac3e63 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 21 Mar 2024 16:53:39 -0700 Subject: [PATCH 09/15] unified in filter native test --- .../druid/query/filter/BoundDimFilter.java | 3 +- .../druid/query/filter/InDimFilter.java | 1 - .../druid/query/filter/SelectorDimFilter.java | 3 +- .../druid/query/filter/TypedInFilter.java | 24 +- .../druid/segment/filter/BoundFilter.java | 4 - .../druid/segment/filter/SelectorFilter.java | 3 - .../index/IndexedUtf8ValueIndexes.java | 8 +- .../ScalarDoubleColumnAndIndexSupplier.java | 3 + .../ScalarLongColumnAndIndexSupplier.java | 5 +- .../druid/segment/filter/InFilterTest.java | 589 ----------- .../druid/segment/filter/InFilterTests.java | 976 ++++++++++++++++++ .../segment/filter/TypedInFilterTests.java | 509 --------- .../ArrayOverlapOperatorConversion.java | 21 +- .../filtration/ConvertSelectorsToIns.java | 6 - 14 files changed, 1021 insertions(+), 1134 deletions(-) delete mode 100644 processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java create mode 100644 processing/src/test/java/org/apache/druid/segment/filter/InFilterTests.java delete mode 100644 processing/src/test/java/org/apache/druid/segment/filter/TypedInFilterTests.java diff --git a/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java index 347940de991f..311595e99e87 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java @@ -48,9 +48,8 @@ import java.util.Set; /** - * @deprecated use {@link RangeFilter} instead + * Recommended to use {@link RangeFilter} instead */ -@Deprecated public class BoundDimFilter extends AbstractOptimizableDimFilter implements DimFilter { private final String dimension; diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java index 649a8dff4cfa..d678f4b53f12 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java @@ -83,7 +83,6 @@ * This filter specifies all match values as a sorted string set; matching against other column types must incur the * cost of converting values to check for matches. For the most part, {@link TypedInFilter} should be used instead. */ -@Deprecated public class InDimFilter extends AbstractOptimizableDimFilter implements Filter { /** diff --git a/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java index b8c01a215b72..05cd4730e4b9 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java @@ -39,9 +39,8 @@ import java.util.Set; /** - * @deprecated use {@link EqualityFilter} or {@link NullFilter} instead. + * Recommended to use {@link EqualityFilter} or {@link NullFilter} instead */ -@Deprecated public class SelectorDimFilter extends AbstractOptimizableDimFilter implements DimFilter { private final String dimension; diff --git a/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java index 1f33b84a377e..e76870d589a2 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java @@ -221,6 +221,7 @@ public byte[] getCacheKey() @Override public DimFilter optimize(final boolean mayIncludeUnknown) { + checkSqlCompatible(); final List matchValues = this.sortedMatchValues.get(); if (matchValues.isEmpty()) { return FalseDimFilter.instance(); @@ -241,11 +242,7 @@ public DimFilter optimize(final boolean mayIncludeUnknown) @Override public Filter toFilter() { - if (NullHandling.replaceWithDefault()) { - throw InvalidInput.exception( - "Invalid IN filter, typed in filter only supports SQL compatible null handling mode, set druid.generic.useDefaultValue=false to use this filter" - ); - } + checkSqlCompatible(); return this; } @@ -416,6 +413,15 @@ private byte[] computeCacheKey() .build(); } + private void checkSqlCompatible() + { + if (NullHandling.replaceWithDefault()) { + throw InvalidInput.exception( + "Invalid IN filter, typed in filter only supports SQL compatible null handling mode, set druid.generic.useDefaultValue=false to use this filter" + ); + } + } + private static boolean checkSorted(List unsortedValues, ColumnType matchValueType) { final Comparator comparator = matchValueType.getNullableStrategy(); @@ -505,7 +511,7 @@ private static DruidObjectPredicate createStringPredicate( ) { Preconditions.checkNotNull(sortedValues, "values"); - final boolean containsNull = sortedValues.get(0) == null; + final boolean containsNull = !sortedValues.isEmpty() && sortedValues.get(0) == null; final Comparator comparator = matchValueType.getNullableStrategy(); if (matchValueType.is(ValueType.STRING)) { return value -> { @@ -531,7 +537,7 @@ private static DruidObjectPredicate createStringPredicate( private static DruidLongPredicate createLongPredicate(final List sortedValues, ColumnType matchValueType) { - boolean matchNulls = sortedValues.get(0) == null; + boolean matchNulls = !sortedValues.isEmpty() && sortedValues.get(0) == null; if (matchValueType.is(ValueType.LONG)) { final Comparator comparator = matchValueType.getNullableStrategy(); return new DruidLongPredicate() @@ -576,7 +582,7 @@ public DruidPredicateMatch applyNull() private static DruidFloatPredicate createFloatPredicate(final List sortedValues, ColumnType matchValueType) { - boolean matchNulls = sortedValues.get(0) == null; + boolean matchNulls = !sortedValues.isEmpty() && sortedValues.get(0) == null; if (matchValueType.is(ValueType.FLOAT)) { final Comparator comparator = matchValueType.getNullableStrategy(); return new DruidFloatPredicate() @@ -621,7 +627,7 @@ public DruidPredicateMatch applyNull() private static DruidDoublePredicate createDoublePredicate(final List sortedValues, ColumnType matchValueType) { - boolean matchNulls = sortedValues.get(0) == null; + boolean matchNulls = !sortedValues.isEmpty() && sortedValues.get(0) == null; if (matchValueType.is(ValueType.DOUBLE)) { final Comparator comparator = matchValueType.getNullableStrategy(); return new DruidDoublePredicate() diff --git a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java index cf6e499c67d8..4fa1a4802489 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java @@ -58,10 +58,6 @@ import java.util.Objects; import java.util.Set; -/** - * @deprecated use {@link org.apache.druid.query.filter.RangeFilter} instead - */ -@Deprecated public class BoundFilter implements Filter { private final BoundDimFilter boundDimFilter; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java index 9854665fd10d..9a7e1c3abe79 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java @@ -51,10 +51,7 @@ * or {@code dimension IS NULL} when the value is null. * In default null handling mode, this filter is equivalent to {@code dimension = value} or * {@code dimension = ''} when the value is null. - * @deprecated use {@link org.apache.druid.query.filter.EqualityFilter} or - * {@link org.apache.druid.query.filter.NullFilter} instead. */ -@Deprecated public class SelectorFilter implements Filter { private final String dimension; diff --git a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java index 35b0d78ba47a..65395d148b26 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java +++ b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java @@ -135,6 +135,9 @@ public BitmapColumnIndex forSortedValues(SortedSet values) @Override public BitmapColumnIndex forSortedValuesUtf8(List sortedValuesUtf8) { + if (sortedValuesUtf8.isEmpty()) { + return new AllFalseBitmapColumnIndex(bitmapFactory); + } final boolean matchNull = sortedValuesUtf8.get(0) == null; final List tailSet; @@ -145,7 +148,7 @@ public BitmapColumnIndex forSortedValuesUtf8(List sortedValuesUtf8) minValueInColumn, ByteBufferUtils.utf8Comparator() ); - tailSet = sortedValuesUtf8.subList(position, sortedValuesUtf8.size()); + tailSet = sortedValuesUtf8.subList(position >= 0 ? position : -(position + 1), sortedValuesUtf8.size()); } else { tailSet = sortedValuesUtf8; } @@ -209,6 +212,9 @@ private BitmapColumnIndex getBitmapColumnIndexForSortedIterableUtf8( @Override public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSignature matchValueType) { + if (sortedValues.isEmpty()) { + return new AllFalseBitmapColumnIndex(bitmapFactory); + } final boolean matchNull = sortedValues.get(0) == null; final Supplier unknownsIndex = () -> { if (!matchNull && dictionary.get(0) == null) { diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java index 5ea3c9194eb3..ff0019a2b687 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java @@ -273,6 +273,9 @@ private final class DoubleValueSetIndexes implements ValueSetIndexes @Override public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSignature matchValueType) { + if (sortedValues.isEmpty()) { + return new AllFalseBitmapColumnIndex(bitmapFactory); + } final boolean matchNull = sortedValues.get(0) == null; final Supplier unknownsIndex = () -> { if (!matchNull && dictionary.get(0) == null) { diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java index 0c1bc1a05990..5f0f3a61efeb 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java @@ -274,6 +274,9 @@ private final class LongValueSetIndexes implements ValueSetIndexes @Override public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSignature matchValueType) { + if (sortedValues.isEmpty()) { + return new AllFalseBitmapColumnIndex(bitmapFactory); + } final boolean matchNull = sortedValues.get(0) == null; final Supplier unknownsIndex = () -> { if (!matchNull && dictionary.get(0) == null) { @@ -395,7 +398,7 @@ public Iterable getBitmapIterable() if (value == null) { needNullCheck = true; } else { - Long theValue = GuavaUtils.tryParseLong(value); + Long theValue = DimensionHandlerUtils.convertObjectToLong(value); if (theValue != null) { longs.add(theValue.longValue()); if (NullHandling.replaceWithDefault() && theValue.equals(NullHandling.defaultLongValue())) { diff --git a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java deleted file mode 100644 index c56d6c0e491e..000000000000 --- a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java +++ /dev/null @@ -1,589 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.filter; - -import com.google.common.base.Function; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import nl.jqno.equalsverifier.EqualsVerifier; -import org.apache.druid.common.config.NullHandling; -import org.apache.druid.data.input.InputRow; -import org.apache.druid.java.util.common.IAE; -import org.apache.druid.java.util.common.Pair; -import org.apache.druid.js.JavaScriptConfig; -import org.apache.druid.query.extraction.ExtractionFn; -import org.apache.druid.query.extraction.JavaScriptExtractionFn; -import org.apache.druid.query.extraction.MapLookupExtractor; -import org.apache.druid.query.filter.DimFilter; -import org.apache.druid.query.filter.Filter; -import org.apache.druid.query.filter.InDimFilter; -import org.apache.druid.query.filter.NotDimFilter; -import org.apache.druid.query.lookup.LookupExtractionFn; -import org.apache.druid.query.lookup.LookupExtractor; -import org.apache.druid.segment.IndexBuilder; -import org.apache.druid.segment.StorageAdapter; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -import java.io.Closeable; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; - -/** - * Classic {@link InDimFilter} test. Consider adding tests to {@link TypedInFilterTests} in addition to, or instead of - * here. - */ -@RunWith(Parameterized.class) -public class InFilterTest extends BaseFilterTest -{ - static final List ROWS = ImmutableList.of( - makeDefaultSchemaRow("a", "", ImmutableList.of("a", "b"), "2017-07-25", "", 0.0, 0.0f, 0L), - makeDefaultSchemaRow("b", "10", ImmutableList.of(), "2017-07-25", "a", 10.1, 10.1f, 100L), - makeDefaultSchemaRow("c", "2", ImmutableList.of(""), "2017-05-25", null, null, 5.5f, 40L), - makeDefaultSchemaRow("d", "1", ImmutableList.of("a"), "2020-01-25", "b", 120.0245, 110.0f, null), - makeDefaultSchemaRow("e", "def", ImmutableList.of("c"), null, "c", 60.0, null, 9001L), - makeDefaultSchemaRow("f", "abc", null, "2020-01-25", "a", 765.432, 123.45f, 12345L) - ); - - public InFilterTest( - String testName, - IndexBuilder indexBuilder, - Function> finisher, - boolean cnf, - boolean optimize - ) - { - super(testName, ROWS, indexBuilder, finisher, cnf, optimize); - } - - - @AfterClass - public static void tearDown() throws Exception - { - BaseFilterTest.tearDown(InFilterTest.class.getName()); - } - - @Test - public void testSingleValueStringColumnWithoutNulls() - { - assertFilterMatches( - toInFilter("dim0"), - ImmutableList.of() - ); - assertFilterMatches( - NotDimFilter.of(toInFilter("dim0")), - ImmutableList.of("a", "b", "c", "d", "e", "f") - ); - - assertFilterMatches( - toInFilter("dim0", null), - ImmutableList.of() - ); - - assertFilterMatches( - toInFilter("dim0", "", ""), - ImmutableList.of() - ); - - assertFilterMatches( - toInFilter("dim0", "a", "c"), - ImmutableList.of("a", "c") - ); - - assertFilterMatches( - toInFilter("dim0", "e", "x"), - ImmutableList.of("e") - ); - - assertFilterMatches( - NotDimFilter.of(toInFilter("dim0", "e", "x")), - ImmutableList.of("a", "b", "c", "d", "f") - ); - } - - @Test - public void testSingleValueStringColumnWithNulls() - { - assertFilterMatches( - toInFilter("dim1", null, ""), - ImmutableList.of("a") - ); - - assertFilterMatches( - toInFilter("dim1", ""), - ImmutableList.of("a") - ); - - assertFilterMatches( - toInFilter("dim1", "-1", "ab", "de"), - ImmutableList.of() - ); - - assertFilterMatches( - toInFilter("s0", "a", "b"), - ImmutableList.of("b", "d", "f") - ); - assertFilterMatches( - toInFilter("s0", "noexist"), - ImmutableList.of() - ); - - if (NullHandling.replaceWithDefault()) { - assertFilterMatches( - toInFilter("dim1", null, "10", "abc"), - ImmutableList.of("a", "b", "f") - ); - assertFilterMatches( - toInFilter("dim1", null, "10", "abc"), - ImmutableList.of("a", "b", "f") - ); - assertFilterMatches( - NotDimFilter.of(toInFilter("dim1", "-1", "ab", "de")), - ImmutableList.of("a", "b", "c", "d", "e", "f") - ); - assertFilterMatches( - NotDimFilter.of(toInFilter("s0", "a", "b")), - ImmutableList.of("a", "c", "e") - ); - assertFilterMatches( - NotDimFilter.of(toInFilter("s0", "noexist")), - ImmutableList.of("a", "b", "c", "d", "e", "f") - ); - } else { - assertFilterMatches( - toInFilter("dim1", null, "10", "abc"), - ImmutableList.of("b", "f") - ); - assertFilterMatches( - NotDimFilter.of(toInFilter("dim1", "-1", "ab", "de")), - ImmutableList.of("a", "b", "c", "d", "e", "f") - ); - assertFilterMatches( - NotDimFilter.of(toInFilter("s0", "a", "b")), - ImmutableList.of("a", "e") - ); - assertFilterMatches( - NotDimFilter.of(toInFilter("s0", "noexist")), - ImmutableList.of("a", "b", "d", "e", "f") - ); - } - } - - @Test - public void testMultiValueStringColumn() - { - if (isAutoSchema()) { - return; - } - if (NullHandling.replaceWithDefault()) { - assertFilterMatches( - toInFilter("dim2", "b", "d"), - ImmutableList.of("a") - ); - assertFilterMatches( - toInFilter("dim2", null), - ImmutableList.of("b", "c", "f") - ); - assertFilterMatches( - toInFilter("dim2", null, "a"), - ImmutableList.of("a", "b", "c", "d", "f") - ); - assertFilterMatches( - toInFilter("dim2", null, "b"), - ImmutableList.of("a", "b", "c", "f") - ); - assertFilterMatches( - toInFilter("dim2", ""), - ImmutableList.of("b", "c", "f") - ); - } else { - assertFilterMatches( - toInFilter("dim2", null), - ImmutableList.of("b", "f") - ); - assertFilterMatches( - toInFilter("dim2", null, "a"), - ImmutableList.of("a", "b", "d", "f") - ); - assertFilterMatches( - toInFilter("dim2", null, "b"), - ImmutableList.of("a", "b", "f") - ); - assertFilterMatches( - toInFilter("dim2", ""), - ImmutableList.of("c") - ); - } - - assertFilterMatches( - toInFilter("dim2", "", (String) null), - ImmutableList.of("b", "c", "f") - ); - - assertFilterMatches( - toInFilter("dim2", "c"), - ImmutableList.of("e") - ); - - assertFilterMatches( - toInFilter("dim2", "d"), - ImmutableList.of() - ); - } - - @Test - public void testMissingColumn() - { - assertFilterMatches( - toInFilter("dim3", null, (String) null), - ImmutableList.of("a", "b", "c", "d", "e", "f") - ); - assertFilterMatches( - NotDimFilter.of(toInFilter("dim3", null, (String) null)), - ImmutableList.of() - ); - - if (NullHandling.replaceWithDefault()) { - assertFilterMatches( - toInFilter("dim3", ""), - ImmutableList.of("a", "b", "c", "d", "e", "f") - ); - } else { - assertFilterMatches( - toInFilter("dim3", ""), - ImmutableList.of() - ); - assertFilterMatches( - NotDimFilter.of(toInFilter("dim3", "")), - ImmutableList.of() - ); - } - - assertFilterMatches( - toInFilter("dim3", null, "a"), - ImmutableList.of("a", "b", "c", "d", "e", "f") - ); - assertFilterMatches( - NotDimFilter.of(toInFilter("dim3", null, "a")), - ImmutableList.of() - ); - - assertFilterMatches( - toInFilter("dim3", "a"), - ImmutableList.of() - ); - assertFilterMatches( - NotDimFilter.of(toInFilter("dim3", "a")), - NullHandling.sqlCompatible() ? ImmutableList.of() : ImmutableList.of("a", "b", "c", "d", "e", "f") - ); - - assertFilterMatches( - toInFilter("dim3", "b"), - ImmutableList.of() - ); - - assertFilterMatches( - toInFilter("dim3", "c"), - ImmutableList.of() - ); - } - - @Test - public void testMatchWithExtractionFn() - { - String extractionJsFn = "function(str) { return 'super-' + str; }"; - ExtractionFn superFn = new JavaScriptExtractionFn(extractionJsFn, false, JavaScriptConfig.getEnabledInstance()); - - String nullJsFn = "function(str) { if (str === null) { return 'YES'; } else { return 'NO';} }"; - ExtractionFn yesNullFn = new JavaScriptExtractionFn(nullJsFn, false, JavaScriptConfig.getEnabledInstance()); - - if (NullHandling.replaceWithDefault()) { - assertFilterMatchesSkipArrays( - toInFilterWithFn("dim2", superFn, "super-null", "super-a", "super-b"), - ImmutableList.of("a", "b", "c", "d", "f") - ); - assertFilterMatchesSkipArrays( - NotDimFilter.of(toInFilterWithFn("dim2", superFn, "super-null", "super-a", "super-b")), - ImmutableList.of("e") - ); - assertFilterMatchesSkipArrays( - toInFilterWithFn("dim2", yesNullFn, "YES"), - ImmutableList.of("b", "c", "f") - ); - assertFilterMatchesSkipArrays( - NotDimFilter.of(toInFilterWithFn("dim2", yesNullFn, "YES")), - ImmutableList.of("a", "d", "e") - ); - assertFilterMatches( - toInFilterWithFn("dim1", superFn, "super-null", "super-10", "super-def"), - ImmutableList.of("a", "b", "e") - ); - assertFilterMatches( - toInFilterWithFn("dim1", yesNullFn, "NO"), - ImmutableList.of("b", "c", "d", "e", "f") - ); - } else { - assertFilterMatchesSkipArrays( - toInFilterWithFn("dim2", superFn, "super-null", "super-a", "super-b"), - ImmutableList.of("a", "b", "d", "f") - ); - assertFilterMatchesSkipArrays( - NotDimFilter.of(toInFilterWithFn("dim2", superFn, "super-null", "super-a", "super-b")), - ImmutableList.of("c", "e") - ); - assertFilterMatchesSkipArrays( - toInFilterWithFn("dim2", yesNullFn, "YES"), - ImmutableList.of("b", "f") - ); - assertFilterMatchesSkipArrays( - NotDimFilter.of(toInFilterWithFn("dim2", yesNullFn, "YES")), - ImmutableList.of("a", "c", "d", "e") - ); - assertFilterMatches( - toInFilterWithFn("dim1", superFn, "super-null", "super-10", "super-def"), - ImmutableList.of("b", "e") - ); - - assertFilterMatches( - toInFilterWithFn("dim1", yesNullFn, "NO"), - ImmutableList.of("a", "b", "c", "d", "e", "f") - ); - } - - - assertFilterMatches( - toInFilterWithFn("dim3", yesNullFn, "NO"), - ImmutableList.of() - ); - assertFilterMatches( - NotDimFilter.of(toInFilterWithFn("dim3", yesNullFn, "NO")), - ImmutableList.of("a", "b", "c", "d", "e", "f") - ); - assertFilterMatches( - toInFilterWithFn("dim3", yesNullFn, "YES"), - ImmutableList.of("a", "b", "c", "d", "e", "f") - ); - - } - - @Test - public void testMatchWithLookupExtractionFn() - { - final Map stringMap = ImmutableMap.of( - "a", "HELLO", - "10", "HELLO", - "def", "HELLO", - "c", "BYE" - ); - LookupExtractor mapExtractor = new MapLookupExtractor(stringMap, false); - LookupExtractionFn lookupFn = new LookupExtractionFn(mapExtractor, false, "UNKNOWN", false, true); - - assertFilterMatches(toInFilterWithFn("dim0", lookupFn, null, "HELLO"), ImmutableList.of("a")); - assertFilterMatches(toInFilterWithFn("dim0", lookupFn, "HELLO", "BYE"), ImmutableList.of("a", "c")); - assertFilterMatches(toInFilterWithFn("dim0", lookupFn, "UNKNOWN"), ImmutableList.of("b", "d", "e", "f")); - assertFilterMatches(toInFilterWithFn("dim1", lookupFn, "HELLO"), ImmutableList.of("b", "e")); - assertFilterMatches(toInFilterWithFn("dim1", lookupFn, "N/A"), ImmutableList.of()); - - if (optimize) { - // Arrays don't cause errors when the extractionFn is optimized, because the "IN" filter vanishes completely. - assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "a"), ImmutableList.of()); - } else { - assertFilterMatchesSkipArrays(toInFilterWithFn("dim2", lookupFn, "a"), ImmutableList.of()); - } - - assertFilterMatchesSkipArrays(toInFilterWithFn("dim2", lookupFn, "HELLO"), ImmutableList.of("a", "d")); - assertFilterMatchesSkipArrays( - toInFilterWithFn("dim2", lookupFn, "HELLO", "BYE", "UNKNOWN"), - ImmutableList.of("a", "b", "c", "d", "e", "f") - ); - - final Map stringMap2 = ImmutableMap.of( - "a", "e" - ); - LookupExtractor mapExtractor2 = new MapLookupExtractor(stringMap2, false); - LookupExtractionFn lookupFn2 = new LookupExtractionFn(mapExtractor2, true, null, false, true); - - assertFilterMatches(toInFilterWithFn("dim0", lookupFn2, null, "e"), ImmutableList.of("a", "e")); - assertFilterMatches(toInFilterWithFn("dim0", lookupFn2, "a"), ImmutableList.of()); - - final Map stringMap3 = ImmutableMap.of( - "c", "500", - "100", "e" - ); - LookupExtractor mapExtractor3 = new MapLookupExtractor(stringMap3, false); - LookupExtractionFn lookupFn3 = new LookupExtractionFn(mapExtractor3, false, null, false, true); - - assertFilterMatches(toInFilterWithFn("dim0", lookupFn3, null, "c"), ImmutableList.of("a", "b", "d", "e", "f")); - assertFilterMatches(toInFilterWithFn("dim0", lookupFn3, "e"), ImmutableList.of()); - - } - - @Test - public void testNumericColumnNullsAndDefaults() - { - if (canTestNumericNullsAsDefaultValues) { - assertFilterMatches(new InDimFilter("f0", Sets.newHashSet("0"), null), ImmutableList.of("a", "e")); - assertFilterMatches(new InDimFilter("d0", Sets.newHashSet("0"), null), ImmutableList.of("a", "c")); - assertFilterMatches(new InDimFilter("l0", Sets.newHashSet("0"), null), ImmutableList.of("a", "d")); - assertFilterMatches( - NotDimFilter.of(new InDimFilter("f0", Sets.newHashSet("0"), null)), - ImmutableList.of("b", "c", "d", "f") - ); - assertFilterMatches( - NotDimFilter.of(new InDimFilter("d0", Sets.newHashSet("0"), null)), - ImmutableList.of("b", "d", "e", "f") - ); - assertFilterMatches( - NotDimFilter.of(new InDimFilter("l0", Sets.newHashSet("0"), null)), - ImmutableList.of("b", "c", "e", "f") - ); - assertFilterMatches(new InDimFilter("f0", Collections.singleton(null), null), ImmutableList.of()); - assertFilterMatches(new InDimFilter("d0", Collections.singleton(null), null), ImmutableList.of()); - assertFilterMatches(new InDimFilter("l0", Collections.singleton(null), null), ImmutableList.of()); - - assertFilterMatches(new InDimFilter("f0", Sets.newHashSet("0", "999"), null), ImmutableList.of("a", "e")); - assertFilterMatches(new InDimFilter("d0", Sets.newHashSet("0", "999"), null), ImmutableList.of("a", "c")); - assertFilterMatches(new InDimFilter("l0", Sets.newHashSet("0", "999"), null), ImmutableList.of("a", "d")); - assertFilterMatches(new InDimFilter("f0", Sets.newHashSet(null, "999"), null), ImmutableList.of()); - assertFilterMatches(new InDimFilter("d0", Sets.newHashSet(null, "999"), null), ImmutableList.of()); - assertFilterMatches(new InDimFilter("l0", Sets.newHashSet(null, "999"), null), ImmutableList.of()); - } else { - assertFilterMatches(new InDimFilter("f0", Sets.newHashSet("0"), null), ImmutableList.of("a")); - assertFilterMatches(new InDimFilter("d0", Sets.newHashSet("0"), null), ImmutableList.of("a")); - assertFilterMatches(new InDimFilter("l0", Sets.newHashSet("0"), null), ImmutableList.of("a")); - assertFilterMatches( - NotDimFilter.of(new InDimFilter("f0", Sets.newHashSet("0"), null)), - NullHandling.sqlCompatible() - ? ImmutableList.of("b", "c", "d", "f") - : ImmutableList.of("b", "c", "d", "e", "f") - ); - assertFilterMatches( - NotDimFilter.of(new InDimFilter("d0", Sets.newHashSet("0"), null)), - NullHandling.sqlCompatible() - ? ImmutableList.of("b", "d", "e", "f") - : ImmutableList.of("b", "c", "d", "e", "f") - ); - assertFilterMatches( - NotDimFilter.of(new InDimFilter("l0", Sets.newHashSet("0"), null)), - NullHandling.sqlCompatible() - ? ImmutableList.of("b", "c", "e", "f") - : ImmutableList.of("b", "c", "d", "e", "f") - ); - assertFilterMatches(new InDimFilter("f0", Collections.singleton(null), null), ImmutableList.of("e")); - assertFilterMatches(new InDimFilter("d0", Collections.singleton(null), null), ImmutableList.of("c")); - assertFilterMatches(new InDimFilter("l0", Collections.singleton(null), null), ImmutableList.of("d")); - assertFilterMatches( - NotDimFilter.of(new InDimFilter("f0", Collections.singleton(null), null)), - ImmutableList.of("a", "b", "c", "d", "f") - ); - assertFilterMatches( - NotDimFilter.of(new InDimFilter("d0", Collections.singleton(null), null)), - ImmutableList.of("a", "b", "d", "e", "f") - ); - assertFilterMatches( - NotDimFilter.of(new InDimFilter("l0", Collections.singleton(null), null)), - ImmutableList.of("a", "b", "c", "e", "f") - ); - - assertFilterMatches(new InDimFilter("f0", Sets.newHashSet("0", "999"), null), ImmutableList.of("a")); - assertFilterMatches(new InDimFilter("d0", Sets.newHashSet("0", "999"), null), ImmutableList.of("a")); - assertFilterMatches(new InDimFilter("l0", Sets.newHashSet("0", "999"), null), ImmutableList.of("a")); - assertFilterMatches(new InDimFilter("f0", Sets.newHashSet(null, "999"), null), ImmutableList.of("e")); - assertFilterMatches(new InDimFilter("d0", Sets.newHashSet(null, "999"), null), ImmutableList.of("c")); - assertFilterMatches(new InDimFilter("l0", Sets.newHashSet(null, "999"), null), ImmutableList.of("d")); - } - } - - @Test - public void testRequiredColumnRewrite() - { - InDimFilter filter = (InDimFilter) toInFilter("dim0", "a", "c").toFilter(); - InDimFilter filter2 = (InDimFilter) toInFilter("dim1", "a", "c").toFilter(); - - Assert.assertTrue(filter.supportsRequiredColumnRewrite()); - Assert.assertTrue(filter2.supportsRequiredColumnRewrite()); - - Filter rewrittenFilter = filter.rewriteRequiredColumns(ImmutableMap.of("dim0", "dim1")); - Assert.assertEquals(filter2, rewrittenFilter); - - Throwable t = Assert.assertThrows( - IAE.class, - () -> filter.rewriteRequiredColumns(ImmutableMap.of("invalidName", "dim1")) - ); - Assert.assertEquals( - "Received a non-applicable rewrite: {invalidName=dim1}, filter's dimension: dim0", - t.getMessage() - ); - } - - @Test - public void test_equals() - { - EqualsVerifier.forClass(InDimFilter.class) - .usingGetClass() - .withNonnullFields("dimension", "values") - .withIgnoredFields( - "cacheKeySupplier", - "predicateFactory", - "optimizedFilterIncludeUnknown", - "optimizedFilterNoIncludeUnknown", - "valuesUtf8" - ) - .verify(); - } - - @Test - public void test_equals_forInFilterDruidPredicateFactory() - { - EqualsVerifier.forClass(InDimFilter.InFilterDruidPredicateFactory.class) - .usingGetClass() - .withNonnullFields("values") - .withIgnoredFields( - "longPredicateSupplier", - "floatPredicateSupplier", - "doublePredicateSupplier", - "stringPredicateSupplier" - ) - .verify(); - } - - private DimFilter toInFilter(String dim) - { - List emptyList = new ArrayList<>(); - return new InDimFilter(dim, emptyList, null); - } - - private DimFilter toInFilter(String dim, String value, String... values) - { - return new InDimFilter(dim, Lists.asList(value, values), null); - } - - private DimFilter toInFilterWithFn(String dim, ExtractionFn fn, String value, String... values) - { - return new InDimFilter(dim, Lists.asList(value, values), fn); - } -} diff --git a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTests.java b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTests.java new file mode 100644 index 000000000000..3e5d8852080e --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTests.java @@ -0,0 +1,976 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.filter; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Function; +import com.google.common.base.Supplier; +import com.google.common.base.Suppliers; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.RangeSet; +import com.google.common.collect.Sets; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.data.input.InputRow; +import org.apache.druid.error.DruidException; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.js.JavaScriptConfig; +import org.apache.druid.query.extraction.ExtractionFn; +import org.apache.druid.query.extraction.JavaScriptExtractionFn; +import org.apache.druid.query.extraction.MapLookupExtractor; +import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.filter.Filter; +import org.apache.druid.query.filter.InDimFilter; +import org.apache.druid.query.filter.NotDimFilter; +import org.apache.druid.query.filter.TypedInFilter; +import org.apache.druid.query.lookup.LookupExtractionFn; +import org.apache.druid.query.lookup.LookupExtractor; +import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.IndexBuilder; +import org.apache.druid.segment.StorageAdapter; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Assume; +import org.junit.Test; +import org.junit.experimental.runners.Enclosed; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import javax.annotation.Nullable; +import java.io.Closeable; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +@RunWith(Enclosed.class) +public class InFilterTests +{ + @RunWith(Parameterized.class) + public static class InFilterTest extends BaseFilterTest + { + private static final List ROWS = ImmutableList.of( + makeDefaultSchemaRow("a", "", ImmutableList.of("a", "b"), "2017-07-25", "", 0.0, 0.0f, 0L), + makeDefaultSchemaRow("b", "10", ImmutableList.of(), "2017-07-25", "a", 10.1, 10.1f, 100L), + makeDefaultSchemaRow("c", "2", ImmutableList.of(""), "2017-05-25", null, null, 5.5f, 40L), + makeDefaultSchemaRow("d", "1", ImmutableList.of("a"), "2020-01-25", "b", 120.0245, 110.0f, null), + makeDefaultSchemaRow("e", "def", ImmutableList.of("c"), null, "c", 60.0, null, 9001L), + makeDefaultSchemaRow("f", "abc", null, "2020-01-25", "a", 765.432, 123.45f, 12345L) + ); + + private final ObjectMapper jsonMapper = new DefaultObjectMapper(); + + public InFilterTest( + String testName, + IndexBuilder indexBuilder, + Function> finisher, + boolean cnf, + boolean optimize + ) + { + super(testName, ROWS, indexBuilder, finisher, cnf, optimize); + } + + + @AfterClass + public static void tearDown() throws Exception + { + BaseFilterTest.tearDown(InFilterTest.class.getName()); + } + + @Test + public void testSingleValueStringColumnWithoutNulls() + { + assertFilterMatches( + inFilter("dim0", ColumnType.STRING, Collections.emptyList()), + ImmutableList.of() + ); + assertFilterMatches( + NotDimFilter.of(inFilter("dim0", ColumnType.STRING, Collections.emptyList())), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + + assertFilterMatches( + inFilter("dim0", ColumnType.STRING, Collections.singletonList(null)), + ImmutableList.of() + ); + + assertFilterMatches( + inFilter("dim0", ColumnType.STRING, Arrays.asList("", "")), + ImmutableList.of() + ); + + assertFilterMatches( + inFilter("dim0", ColumnType.STRING, Arrays.asList("a", "c")), + ImmutableList.of("a", "c") + ); + + assertFilterMatches( + inFilter("dim0", ColumnType.STRING, Arrays.asList("e", "x")), + ImmutableList.of("e") + ); + + assertFilterMatches( + NotDimFilter.of(inFilter("dim0", ColumnType.STRING, Arrays.asList("e", "x"))), + ImmutableList.of("a", "b", "c", "d", "f") + ); + } + @Test + public void testSingleValueStringColumnWithNulls() + { + assertFilterMatches( + inFilter("dim1", ColumnType.STRING, Arrays.asList(null, "")), + ImmutableList.of("a") + ); + + assertFilterMatches( + inFilter("dim1", ColumnType.STRING, Collections.singletonList("")), + ImmutableList.of("a") + ); + + assertFilterMatches( + inFilter("dim1", ColumnType.STRING, Arrays.asList("-1", "ab", "de")), + ImmutableList.of() + ); + + assertFilterMatches( + inFilter("s0", ColumnType.STRING, Arrays.asList("a", "b")), + ImmutableList.of("b", "d", "f") + ); + + assertFilterMatches( + inFilter("s0", ColumnType.STRING, Collections.singletonList("noexist")), + ImmutableList.of() + ); + + if (NullHandling.sqlCompatible()) { + assertFilterMatches( + inFilter("dim1", ColumnType.STRING, Arrays.asList(null, "10", "abc")), + ImmutableList.of("b", "f") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("dim1", ColumnType.STRING, Arrays.asList("-1", "ab", "de"))), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("s0", ColumnType.STRING, Arrays.asList("a", "b"))), + ImmutableList.of("a", "e") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("s0", ColumnType.STRING, Collections.singletonList("noexist"))), + ImmutableList.of("a", "b", "d", "e", "f") + ); + } else { + // typed in filter doesn't support default value mode, so use classic filter only + assertLegacyFilterMatches( + legacyInFilter("dim1", null, "10", "abc"), + ImmutableList.of("a", "b", "f") + ); + assertLegacyFilterMatches( + legacyInFilter("dim1", null, "10", "abc"), + ImmutableList.of("a", "b", "f") + ); + assertLegacyFilterMatches( + NotDimFilter.of(legacyInFilter("dim1", "-1", "ab", "de")), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + assertLegacyFilterMatches( + NotDimFilter.of(legacyInFilter("s0", "a", "b")), + ImmutableList.of("a", "c", "e") + ); + assertLegacyFilterMatches( + NotDimFilter.of(legacyInFilter("s0", "noexist")), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + } + } + + @Test + public void testMultiValueStringColumn() + { + Assume.assumeFalse(isAutoSchema()); + + if (NullHandling.sqlCompatible()) { + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Collections.singletonList(null)), + ImmutableList.of("b", "f") + ); + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Arrays.asList(null, "a")), + ImmutableList.of("a", "b", "d", "f") + ); + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Arrays.asList(null, "b")), + ImmutableList.of("a", "b", "f") + ); + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Collections.singletonList("")), + ImmutableList.of("c") + ); + } else { + assertLegacyFilterMatches( + legacyInFilter("dim2", "b", "d"), + ImmutableList.of("a") + ); + assertLegacyFilterMatches( + legacyInFilter("dim2", null), + ImmutableList.of("b", "c", "f") + ); + assertLegacyFilterMatches( + legacyInFilter("dim2", null, "a"), + ImmutableList.of("a", "b", "c", "d", "f") + ); + assertLegacyFilterMatches( + legacyInFilter("dim2", null, "b"), + ImmutableList.of("a", "b", "c", "f") + ); + assertLegacyFilterMatches( + legacyInFilter("dim2", ""), + ImmutableList.of("b", "c", "f") + ); + } + + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Arrays.asList("", null)), + ImmutableList.of("b", "c", "f") + ); + + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Collections.singletonList("c")), + ImmutableList.of("e") + ); + + assertFilterMatches( + inFilter("dim2", ColumnType.STRING, Collections.singletonList("d")), + ImmutableList.of() + ); + } + + @Test + public void testMissingColumn() + { + assertFilterMatches( + inFilter("dim3", ColumnType.STRING, Arrays.asList(null, null)), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("dim3", ColumnType.STRING, Arrays.asList(null, null))), + ImmutableList.of() + ); + + assertFilterMatches( + inFilter("dim3", ColumnType.STRING, Arrays.asList(null, "a")), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + assertFilterMatches( + inFilter("dim3", ColumnType.STRING, Collections.singletonList("a")), + ImmutableList.of() + ); + assertFilterMatches( + inFilter("dim3", ColumnType.STRING, Collections.singletonList("b")), + ImmutableList.of() + ); + assertFilterMatches( + inFilter("dim3", ColumnType.STRING, Collections.singletonList("c")), + ImmutableList.of() + ); + + + if (NullHandling.sqlCompatible()) { + assertFilterMatches( + inFilter("dim3", ColumnType.STRING, Collections.singletonList("")), + ImmutableList.of() + ); + assertFilterMatches( + NotDimFilter.of(inFilter("dim3", ColumnType.STRING, Collections.singletonList(""))), + ImmutableList.of() + ); + assertFilterMatches( + NotDimFilter.of(inFilter("dim3", ColumnType.STRING, Collections.singletonList("a"))), + ImmutableList.of() + ); + assertFilterMatches( + NotDimFilter.of(inFilter("dim3", ColumnType.STRING, Arrays.asList(null, "a"))), + ImmutableList.of() + ); + } else { + assertLegacyFilterMatches( + legacyInFilter("dim3", ""), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + assertLegacyFilterMatches( + NotDimFilter.of(legacyInFilter("dim3", "")), + ImmutableList.of() + ); + assertLegacyFilterMatches( + NotDimFilter.of(legacyInFilter("dim3", "a")), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + assertLegacyFilterMatches( + NotDimFilter.of(legacyInFilter("dim3", null, "a")), + ImmutableList.of() + ); + } + } + + @Test + public void testNumeric() + { + Assume.assumeTrue(NullHandling.sqlCompatible()); + assertFilterMatches( + inFilter("f0", ColumnType.FLOAT, Collections.singletonList(0f)), + ImmutableList.of("a") + ); + assertFilterMatches( + inFilter("d0", ColumnType.DOUBLE, Collections.singletonList(0.0)), + ImmutableList.of("a") + ); + assertFilterMatches(inFilter("l0", ColumnType.LONG, Collections.singletonList(0L)), ImmutableList.of("a")); + assertFilterMatches( + NotDimFilter.of(inFilter("f0", ColumnType.FLOAT, Collections.singletonList(0f))), + ImmutableList.of("b", "c", "d", "f") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("d0", ColumnType.DOUBLE, Collections.singletonList(0.0))), + ImmutableList.of("b", "d", "e", "f") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("l0", ColumnType.LONG, Collections.singletonList(0L))), + ImmutableList.of("b", "c", "e", "f") + ); + assertFilterMatches(inFilter("f0", ColumnType.FLOAT, Collections.singletonList(null)), ImmutableList.of("e")); + assertFilterMatches(inFilter("d0", ColumnType.DOUBLE, Collections.singletonList(null)), ImmutableList.of("c")); + assertFilterMatches(inFilter("l0", ColumnType.LONG, Collections.singletonList(null)), ImmutableList.of("d")); + assertFilterMatches( + NotDimFilter.of(inFilter("f0", ColumnType.FLOAT, Collections.singletonList(null))), + ImmutableList.of("a", "b", "c", "d", "f") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("d0", ColumnType.DOUBLE, Collections.singletonList(null))), + ImmutableList.of("a", "b", "d", "e", "f") + ); + assertFilterMatches( + NotDimFilter.of(inFilter("l0", ColumnType.LONG, Collections.singletonList(null))), + ImmutableList.of("a", "b", "c", "e", "f") + ); + + assertFilterMatches(inFilter("f0", ColumnType.FLOAT, Arrays.asList("0", "999")), ImmutableList.of("a")); + assertFilterMatches(inFilter("d0", ColumnType.DOUBLE, Arrays.asList("0", "999")), ImmutableList.of("a")); + assertFilterMatches(inFilter("l0", ColumnType.LONG, Arrays.asList("0", "999")), ImmutableList.of("a")); + assertFilterMatches(inFilter("f0", ColumnType.FLOAT, Arrays.asList(null, "999")), ImmutableList.of("e")); + assertFilterMatches(inFilter("d0", ColumnType.DOUBLE, Arrays.asList(null, "999")), ImmutableList.of("c")); + assertFilterMatches(inFilter("l0", ColumnType.LONG, Arrays.asList(null, "999")), ImmutableList.of("d")); + + assertFilterMatches( + inFilter("l0", ColumnType.LONG, Arrays.asList(100L, 9001L)), + ImmutableList.of("b", "e") + ); + assertFilterMatches( + inFilter("l0", ColumnType.FLOAT, Arrays.asList(100.0f, 110.0f)), + ImmutableList.of("b") + ); + assertFilterMatches( + inFilter("l0", ColumnType.DOUBLE, Arrays.asList(100.0, 110.0)), + ImmutableList.of("b") + ); + + assertFilterMatches( + inFilter("d0", ColumnType.DOUBLE, Arrays.asList(10.1, 120.0245)), + ImmutableList.of("b", "d") + ); + + // auto schema doesn't have float columns, so these get kind of funny + Assume.assumeFalse(isAutoSchema()); + assertFilterMatches( + inFilter("f0", ColumnType.FLOAT, Arrays.asList(10.1f, 110.0f)), + ImmutableList.of("b", "d") + ); + assertFilterMatches( + inFilter("f0", ColumnType.DOUBLE, Arrays.asList(10.1, 110.0)), + ImmutableList.of("b", "d") + ); + } + + @Test + public void testLegacyNumericDefaults() + { + if (canTestNumericNullsAsDefaultValues) { + assertLegacyFilterMatches(new InDimFilter("f0", Sets.newHashSet("0"), null), ImmutableList.of("a", "e")); + assertLegacyFilterMatches(new InDimFilter("d0", Sets.newHashSet("0"), null), ImmutableList.of("a", "c")); + assertLegacyFilterMatches(new InDimFilter("l0", Sets.newHashSet("0"), null), ImmutableList.of("a", "d")); + assertLegacyFilterMatches( + NotDimFilter.of(new InDimFilter("f0", Sets.newHashSet("0"), null)), + ImmutableList.of("b", "c", "d", "f") + ); + assertLegacyFilterMatches( + NotDimFilter.of(new InDimFilter("d0", Sets.newHashSet("0"), null)), + ImmutableList.of("b", "d", "e", "f") + ); + assertLegacyFilterMatches( + NotDimFilter.of(new InDimFilter("l0", Sets.newHashSet("0"), null)), + ImmutableList.of("b", "c", "e", "f") + ); + assertLegacyFilterMatches(new InDimFilter("f0", Collections.singleton(null), null), ImmutableList.of()); + assertLegacyFilterMatches(new InDimFilter("d0", Collections.singleton(null), null), ImmutableList.of()); + assertLegacyFilterMatches(new InDimFilter("l0", Collections.singleton(null), null), ImmutableList.of()); + + assertLegacyFilterMatches(new InDimFilter("f0", Sets.newHashSet("0", "999"), null), ImmutableList.of("a", "e")); + assertLegacyFilterMatches(new InDimFilter("d0", Sets.newHashSet("0", "999"), null), ImmutableList.of("a", "c")); + assertLegacyFilterMatches(new InDimFilter("l0", Sets.newHashSet("0", "999"), null), ImmutableList.of("a", "d")); + assertLegacyFilterMatches(new InDimFilter("f0", Sets.newHashSet(null, "999"), null), ImmutableList.of()); + assertLegacyFilterMatches(new InDimFilter("d0", Sets.newHashSet(null, "999"), null), ImmutableList.of()); + assertLegacyFilterMatches(new InDimFilter("l0", Sets.newHashSet(null, "999"), null), ImmutableList.of()); + } else { + assertLegacyFilterMatches(new InDimFilter("f0", Sets.newHashSet("0"), null), ImmutableList.of("a")); + assertLegacyFilterMatches(new InDimFilter("d0", Sets.newHashSet("0"), null), ImmutableList.of("a")); + assertLegacyFilterMatches(new InDimFilter("l0", Sets.newHashSet("0"), null), ImmutableList.of("a")); + assertLegacyFilterMatches( + NotDimFilter.of(new InDimFilter("f0", Sets.newHashSet("0"), null)), + NullHandling.sqlCompatible() + ? ImmutableList.of("b", "c", "d", "f") + : ImmutableList.of("b", "c", "d", "e", "f") + ); + assertLegacyFilterMatches( + NotDimFilter.of(new InDimFilter("d0", Sets.newHashSet("0"), null)), + NullHandling.sqlCompatible() + ? ImmutableList.of("b", "d", "e", "f") + : ImmutableList.of("b", "c", "d", "e", "f") + ); + assertLegacyFilterMatches( + NotDimFilter.of(new InDimFilter("l0", Sets.newHashSet("0"), null)), + NullHandling.sqlCompatible() + ? ImmutableList.of("b", "c", "e", "f") + : ImmutableList.of("b", "c", "d", "e", "f") + ); + assertLegacyFilterMatches(new InDimFilter("f0", Collections.singleton(null), null), ImmutableList.of("e")); + assertLegacyFilterMatches(new InDimFilter("d0", Collections.singleton(null), null), ImmutableList.of("c")); + assertLegacyFilterMatches(new InDimFilter("l0", Collections.singleton(null), null), ImmutableList.of("d")); + assertLegacyFilterMatches( + NotDimFilter.of(new InDimFilter("f0", Collections.singleton(null), null)), + ImmutableList.of("a", "b", "c", "d", "f") + ); + assertLegacyFilterMatches( + NotDimFilter.of(new InDimFilter("d0", Collections.singleton(null), null)), + ImmutableList.of("a", "b", "d", "e", "f") + ); + assertLegacyFilterMatches( + NotDimFilter.of(new InDimFilter("l0", Collections.singleton(null), null)), + ImmutableList.of("a", "b", "c", "e", "f") + ); + + assertLegacyFilterMatches(new InDimFilter("f0", Sets.newHashSet("0", "999"), null), ImmutableList.of("a")); + assertLegacyFilterMatches(new InDimFilter("d0", Sets.newHashSet("0", "999"), null), ImmutableList.of("a")); + assertLegacyFilterMatches(new InDimFilter("l0", Sets.newHashSet("0", "999"), null), ImmutableList.of("a")); + assertLegacyFilterMatches(new InDimFilter("f0", Sets.newHashSet(null, "999"), null), ImmutableList.of("e")); + assertLegacyFilterMatches(new InDimFilter("d0", Sets.newHashSet(null, "999"), null), ImmutableList.of("c")); + assertLegacyFilterMatches(new InDimFilter("l0", Sets.newHashSet(null, "999"), null), ImmutableList.of("d")); + } + } + @Test + public void testLegacyMatchWithExtractionFn() + { + String extractionJsFn = "function(str) { return 'super-' + str; }"; + ExtractionFn superFn = new JavaScriptExtractionFn(extractionJsFn, false, JavaScriptConfig.getEnabledInstance()); + + String nullJsFn = "function(str) { if (str === null) { return 'YES'; } else { return 'NO';} }"; + ExtractionFn yesNullFn = new JavaScriptExtractionFn(nullJsFn, false, JavaScriptConfig.getEnabledInstance()); + + if (NullHandling.replaceWithDefault()) { + assertFilterMatchesSkipArrays( + legacyInFilterWithFn("dim2", superFn, "super-null", "super-a", "super-b"), + ImmutableList.of("a", "b", "c", "d", "f") + ); + assertFilterMatchesSkipArrays( + NotDimFilter.of(legacyInFilterWithFn("dim2", superFn, "super-null", "super-a", "super-b")), + ImmutableList.of("e") + ); + assertFilterMatchesSkipArrays( + legacyInFilterWithFn("dim2", yesNullFn, "YES"), + ImmutableList.of("b", "c", "f") + ); + assertFilterMatchesSkipArrays( + NotDimFilter.of(legacyInFilterWithFn("dim2", yesNullFn, "YES")), + ImmutableList.of("a", "d", "e") + ); + assertLegacyFilterMatches( + legacyInFilterWithFn("dim1", superFn, "super-null", "super-10", "super-def"), + ImmutableList.of("a", "b", "e") + ); + assertLegacyFilterMatches( + legacyInFilterWithFn("dim1", yesNullFn, "NO"), + ImmutableList.of("b", "c", "d", "e", "f") + ); + } else { + assertFilterMatchesSkipArrays( + legacyInFilterWithFn("dim2", superFn, "super-null", "super-a", "super-b"), + ImmutableList.of("a", "b", "d", "f") + ); + assertFilterMatchesSkipArrays( + NotDimFilter.of(legacyInFilterWithFn("dim2", superFn, "super-null", "super-a", "super-b")), + ImmutableList.of("c", "e") + ); + assertFilterMatchesSkipArrays( + legacyInFilterWithFn("dim2", yesNullFn, "YES"), + ImmutableList.of("b", "f") + ); + assertFilterMatchesSkipArrays( + NotDimFilter.of(legacyInFilterWithFn("dim2", yesNullFn, "YES")), + ImmutableList.of("a", "c", "d", "e") + ); + assertLegacyFilterMatches( + legacyInFilterWithFn("dim1", superFn, "super-null", "super-10", "super-def"), + ImmutableList.of("b", "e") + ); + + assertLegacyFilterMatches( + legacyInFilterWithFn("dim1", yesNullFn, "NO"), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + } + + assertLegacyFilterMatches( + legacyInFilterWithFn("dim3", yesNullFn, "NO"), + ImmutableList.of() + ); + assertLegacyFilterMatches( + NotDimFilter.of(legacyInFilterWithFn("dim3", yesNullFn, "NO")), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + assertLegacyFilterMatches( + legacyInFilterWithFn("dim3", yesNullFn, "YES"), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + + } + + @Test + public void testLegacyMatchWithLookupExtractionFn() + { + final Map stringMap = ImmutableMap.of( + "a", "HELLO", + "10", "HELLO", + "def", "HELLO", + "c", "BYE" + ); + LookupExtractor mapExtractor = new MapLookupExtractor(stringMap, false); + LookupExtractionFn lookupFn = new LookupExtractionFn(mapExtractor, false, "UNKNOWN", false, true); + + assertLegacyFilterMatches(legacyInFilterWithFn("dim0", lookupFn, null, "HELLO"), ImmutableList.of("a")); + assertLegacyFilterMatches(legacyInFilterWithFn("dim0", lookupFn, "HELLO", "BYE"), ImmutableList.of("a", "c")); + assertLegacyFilterMatches(legacyInFilterWithFn("dim0", lookupFn, "UNKNOWN"), ImmutableList.of("b", "d", "e", "f")); + assertLegacyFilterMatches(legacyInFilterWithFn("dim1", lookupFn, "HELLO"), ImmutableList.of("b", "e")); + assertLegacyFilterMatches(legacyInFilterWithFn("dim1", lookupFn, "N/A"), ImmutableList.of()); + + if (optimize) { + // Arrays don't cause errors when the extractionFn is optimized, because the "IN" filter vanishes completely. + assertLegacyFilterMatches(legacyInFilterWithFn("dim2", lookupFn, "a"), ImmutableList.of()); + } else { + assertFilterMatchesSkipArrays(legacyInFilterWithFn("dim2", lookupFn, "a"), ImmutableList.of()); + } + + assertFilterMatchesSkipArrays(legacyInFilterWithFn("dim2", lookupFn, "HELLO"), ImmutableList.of("a", "d")); + assertFilterMatchesSkipArrays( + legacyInFilterWithFn("dim2", lookupFn, "HELLO", "BYE", "UNKNOWN"), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + + final Map stringMap2 = ImmutableMap.of( + "a", "e" + ); + LookupExtractor mapExtractor2 = new MapLookupExtractor(stringMap2, false); + LookupExtractionFn lookupFn2 = new LookupExtractionFn(mapExtractor2, true, null, false, true); + + assertLegacyFilterMatches(legacyInFilterWithFn("dim0", lookupFn2, null, "e"), ImmutableList.of("a", "e")); + assertLegacyFilterMatches(legacyInFilterWithFn("dim0", lookupFn2, "a"), ImmutableList.of()); + + final Map stringMap3 = ImmutableMap.of( + "c", "500", + "100", "e" + ); + LookupExtractor mapExtractor3 = new MapLookupExtractor(stringMap3, false); + LookupExtractionFn lookupFn3 = new LookupExtractionFn(mapExtractor3, false, null, false, true); + + assertLegacyFilterMatches(legacyInFilterWithFn("dim0", lookupFn3, null, "c"), ImmutableList.of("a", "b", "d", "e", "f")); + assertLegacyFilterMatches(legacyInFilterWithFn("dim0", lookupFn3, "e"), ImmutableList.of()); + } + + @Override + protected void assertFilterMatches(DimFilter filter, List expectedRows) + { + assertTypedFilterMatches(filter, expectedRows); + assertLegacyFilterMatches(filter, expectedRows); + } + + private void assertTypedFilterMatches(DimFilter filter, List expectedRows) + { + // this filter only tests in sql compatible mode + if (NullHandling.sqlCompatible()) { + super.assertFilterMatches(filter, expectedRows); + try { + // make sure round trip json serde is cool + super.assertFilterMatches( + jsonMapper.readValue(jsonMapper.writeValueAsString(filter), DimFilter.class), + expectedRows + ); + } + catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } else { + Throwable t = Assert.assertThrows( + DruidException.class, + () -> super.assertFilterMatches(filter, expectedRows) + ); + Assert.assertEquals("Invalid IN filter, typed in filter only supports SQL compatible null handling mode, set druid.generic.useDefaultValue=false to use this filter", t.getMessage()); + } + } + + private void assertLegacyFilterMatches(DimFilter filter, List expectedRows) + { + DimFilter newFilter = rewriteToLegacyFilter(filter); + if (newFilter != null) { + super.assertFilterMatches(newFilter, expectedRows); + } + } + + @Nullable + private DimFilter rewriteToLegacyFilter(DimFilter filter) { + if (filter instanceof InDimFilter) { + return filter; + } else if (filter instanceof TypedInFilter) { + TypedInFilter theFilter = (TypedInFilter) filter; + return new InDimFilter( + theFilter.getColumn(), + InDimFilter.ValuesSet.copyOf( + theFilter.getSortedValues() + .stream() + .map(DimensionHandlerUtils::convertObjectToString) + .collect(Collectors.toList()) + ), + null + ); + } else if (filter instanceof NotDimFilter) { + DimFilter rewrite = rewriteToLegacyFilter(((NotDimFilter) filter).getField()); + if (rewrite != null) { + return NotDimFilter.of(rewrite); + } + } + return null; + } + } + + public static class TypedInFilterFilterNonParameterizedTests extends InitializedNullHandlingTest + { + @Test + public void testSerde() throws JsonProcessingException + { + Assume.assumeTrue(NullHandling.sqlCompatible()); + ObjectMapper mapper = new DefaultObjectMapper(); + TypedInFilter filter = inFilter("column", ColumnType.STRING, Arrays.asList("a", "b", "c")); + String s = mapper.writeValueAsString(filter); + Assert.assertEquals(filter, mapper.readValue(s, TypedInFilter.class)); + + filter = inFilter("column", ColumnType.STRING, Arrays.asList("a", "b", "b", null, "c")); + s = mapper.writeValueAsString(filter); + Assert.assertEquals(filter, mapper.readValue(s, TypedInFilter.class)); + + filter = inFilter("column", ColumnType.LONG, Arrays.asList(1L, 2L, 2L, null, 3L)); + s = mapper.writeValueAsString(filter); + Assert.assertEquals(filter, mapper.readValue(s, TypedInFilter.class)); + + filter = inFilter("column", ColumnType.DOUBLE, Arrays.asList(1.1, 2.2, 2.3, null, 3.3)); + s = mapper.writeValueAsString(filter); + Assert.assertEquals(filter, mapper.readValue(s, TypedInFilter.class)); + + filter = inFilter("column", ColumnType.FLOAT, Arrays.asList(1.1f, 2.2f, 2.2f, null, 3.3f)); + s = mapper.writeValueAsString(filter); + Assert.assertEquals(filter, mapper.readValue(s, TypedInFilter.class)); + + filter = inFilter("column", ColumnType.FLOAT, Arrays.asList(1.1, 2.2, 2.3, null, 3.3)); + s = mapper.writeValueAsString(filter); + Assert.assertEquals(filter, mapper.readValue(s, TypedInFilter.class)); + } + + @Test + public void testGetCacheKey() + { + Assume.assumeTrue(NullHandling.sqlCompatible()); + TypedInFilter filterUnsorted = inFilter("column", ColumnType.STRING, Arrays.asList("a", "b", null, "c")); + TypedInFilter filterDifferent = inFilter("column", ColumnType.STRING, Arrays.asList("a", "c", "b")); + TypedInFilter filterPresorted = new TypedInFilter( + "column", + ColumnType.STRING, + null, + Arrays.asList(null, "a", "b", "c"), + null + ); + + Assert.assertEquals(filterPresorted, filterUnsorted); + Assert.assertNotEquals(filterDifferent, filterPresorted); + Assert.assertArrayEquals(filterPresorted.getCacheKey(), filterUnsorted.getCacheKey()); + Assert.assertFalse(Arrays.equals(filterDifferent.getCacheKey(), filterPresorted.getCacheKey())); + + filterUnsorted = inFilter("column", ColumnType.LONG, Arrays.asList(2L, -2L, 1L, null, 3L)); + filterDifferent = inFilter("column", ColumnType.LONG, Arrays.asList(2L, -2L, 1L, 3L)); + filterPresorted = new TypedInFilter( + "column", + ColumnType.LONG, + null, + Arrays.asList(null, -2L, 1L, 2L, 3L), + null + ); + + Assert.assertEquals(filterPresorted, filterUnsorted); + Assert.assertNotEquals(filterDifferent, filterPresorted); + Assert.assertArrayEquals(filterPresorted.getCacheKey(), filterUnsorted.getCacheKey()); + Assert.assertFalse(Arrays.equals(filterDifferent.getCacheKey(), filterPresorted.getCacheKey())); + + filterUnsorted = inFilter("column", ColumnType.DOUBLE, Arrays.asList(2.2, -2.2, 1.1, null, 3.3)); + filterDifferent = inFilter("column", ColumnType.DOUBLE, Arrays.asList(2.2, -2.2, 1.1, 3.3)); + filterPresorted = new TypedInFilter( + "column", + ColumnType.DOUBLE, + null, + Arrays.asList(null, -2.2, 1.1, 2.2, 3.3), + null + ); + + Assert.assertEquals(filterPresorted, filterUnsorted); + Assert.assertNotEquals(filterDifferent, filterPresorted); + Assert.assertArrayEquals(filterPresorted.getCacheKey(), filterUnsorted.getCacheKey()); + Assert.assertFalse(Arrays.equals(filterDifferent.getCacheKey(), filterPresorted.getCacheKey())); + + filterUnsorted = inFilter("column", ColumnType.FLOAT, Arrays.asList(2.2f, -2.2f, 1.1f, null, 3.3f)); + filterDifferent = inFilter("column", ColumnType.FLOAT, Arrays.asList(2.2f, -2.2f, 1.1f, 3.3f)); + filterPresorted = new TypedInFilter( + "column", + ColumnType.FLOAT, + null, + Arrays.asList(null, -2.2f, 1.1f, 2.2f, 3.3f), + null + ); + + Assert.assertEquals(filterPresorted, filterUnsorted); + Assert.assertNotEquals(filterDifferent, filterPresorted); + Assert.assertArrayEquals(filterPresorted.getCacheKey(), filterUnsorted.getCacheKey()); + Assert.assertFalse(Arrays.equals(filterDifferent.getCacheKey(), filterPresorted.getCacheKey())); + } + + @Test + public void testInvalidParameters() + { + if (NullHandling.replaceWithDefault()) { + Throwable t = Assert.assertThrows( + DruidException.class, + () -> new TypedInFilter("column", ColumnType.STRING, Collections.emptyList(), null, null).toFilter() + ); + Assert.assertEquals("Invalid IN filter, typed in filter only supports SQL compatible null handling mode, set druid.generic.useDefaultValue=false to use this filter", t.getMessage()); + } + + Assume.assumeTrue(NullHandling.sqlCompatible()); + Throwable t = Assert.assertThrows( + DruidException.class, + () -> new TypedInFilter(null, ColumnType.STRING, null, null, null) + ); + Assert.assertEquals("Invalid IN filter, column cannot be null", t.getMessage()); + t = Assert.assertThrows( + DruidException.class, + () -> new TypedInFilter("dim0", null, null, null, null) + ); + Assert.assertEquals("Invalid IN filter on column [dim0], matchValueType cannot be null", t.getMessage()); + t = Assert.assertThrows( + DruidException.class, + () -> new TypedInFilter("dim0", ColumnType.STRING, null, null, null) + ); + Assert.assertEquals( + "Invalid IN filter on column [dim0], exactly one of values or sortedValues must be non-null", + t.getMessage() + ); + } + + @Test + public void testGetDimensionRangeSet() + { + Assume.assumeTrue(NullHandling.sqlCompatible()); + TypedInFilter filter = inFilter("x", ColumnType.STRING, Arrays.asList(null, "a", "b", "c")); + TypedInFilter filter2 = inFilter("x", ColumnType.STRING, Arrays.asList("a", "b", null, "c")); + + Assert.assertEquals(filter.getDimensionRangeSet("x"), filter2.getDimensionRangeSet("x")); + RangeSet range = filter.getDimensionRangeSet("x"); + Assert.assertTrue(range.contains("b")); + + filter = inFilter("x", ColumnType.LONG, Arrays.asList(null, 1L, 2L, 3L)); + filter2 = inFilter("x", ColumnType.LONG, Arrays.asList(3L, 1L, null, 2L)); + Assert.assertEquals(filter.getDimensionRangeSet("x"), filter2.getDimensionRangeSet("x")); + range = filter.getDimensionRangeSet("x"); + Assert.assertTrue(range.contains("2")); + + filter = inFilter("x", ColumnType.DOUBLE, Arrays.asList(null, 1.1, 2.2, 3.3)); + filter2 = inFilter("x", ColumnType.DOUBLE, Arrays.asList(3.3, 1.1, null, 2.2)); + range = filter.getDimensionRangeSet("x"); + Assert.assertEquals(filter.getDimensionRangeSet("x"), filter2.getDimensionRangeSet("x")); + Assert.assertTrue(range.contains("2.2")); + + filter = inFilter("x", ColumnType.FLOAT, Arrays.asList(null, 1.1f, 2.2f, 3.3f)); + filter2 = inFilter("x", ColumnType.FLOAT, Arrays.asList(3.3f, 1.1f, null, 2.2f)); + range = filter.getDimensionRangeSet("x"); + Assert.assertEquals(filter.getDimensionRangeSet("x"), filter2.getDimensionRangeSet("x")); + Assert.assertTrue(range.contains("2.2")); + } + + @Test + public void testRequiredColumnRewrite() + { + Assume.assumeTrue(NullHandling.sqlCompatible()); + TypedInFilter filter = inFilter("dim0", ColumnType.STRING, Arrays.asList("a", "c")); + TypedInFilter filter2 = inFilter("dim1", ColumnType.STRING, Arrays.asList("a", "c")); + + Assert.assertTrue(filter.supportsRequiredColumnRewrite()); + Assert.assertTrue(filter2.supportsRequiredColumnRewrite()); + + Filter rewrittenFilter = filter.rewriteRequiredColumns(ImmutableMap.of("dim0", "dim1")); + Assert.assertEquals(filter2, rewrittenFilter); + + Throwable t = Assert.assertThrows( + IAE.class, + () -> filter.rewriteRequiredColumns(ImmutableMap.of("invalidName", "dim1")) + ); + Assert.assertEquals( + "Received a non-applicable rewrite: {invalidName=dim1}, filter's dimension: dim0", + t.getMessage() + ); + } + + @Test + public void testEquals() + { + Assume.assumeTrue(NullHandling.sqlCompatible()); + EqualsVerifier.forClass(TypedInFilter.class).usingGetClass() + .withNonnullFields( + "column", + "matchValueType", + "unsortedValues", + "sortedMatchValues", + "optimizedFilterIncludeUnknown", + "optimizedFilterNoIncludeUnknown" + ) + .withPrefabValues(ColumnType.class, ColumnType.STRING, ColumnType.DOUBLE) + .withPrefabValues( + Supplier.class, + Suppliers.ofInstance(ImmutableList.of("a", "b")), + Suppliers.ofInstance(ImmutableList.of("b", "c")) + ) + .withIgnoredFields( + "unsortedValues", + "sortedUtf8MatchValueBytes", + "predicateFactorySupplier", + "cacheKeySupplier", + "optimizedFilterIncludeUnknown", + "optimizedFilterNoIncludeUnknown" + ) + .verify(); + } + } + + public static class LegacyInDimFilterNonParameterizedTests extends InitializedNullHandlingTest + { + @Test + public void testRequiredColumnRewrite() + { + InDimFilter filter = (InDimFilter) legacyInFilter("dim0", "a", "c").toFilter(); + InDimFilter filter2 = (InDimFilter) legacyInFilter("dim1", "a", "c").toFilter(); + + Assert.assertTrue(filter.supportsRequiredColumnRewrite()); + Assert.assertTrue(filter2.supportsRequiredColumnRewrite()); + + Filter rewrittenFilter = filter.rewriteRequiredColumns(ImmutableMap.of("dim0", "dim1")); + Assert.assertEquals(filter2, rewrittenFilter); + + Throwable t = Assert.assertThrows( + IAE.class, + () -> filter.rewriteRequiredColumns(ImmutableMap.of("invalidName", "dim1")) + ); + Assert.assertEquals( + "Received a non-applicable rewrite: {invalidName=dim1}, filter's dimension: dim0", + t.getMessage() + ); + } + + @Test + public void testEuals() + { + EqualsVerifier.forClass(InDimFilter.class) + .usingGetClass() + .withNonnullFields("dimension", "values") + .withIgnoredFields( + "cacheKeySupplier", + "predicateFactory", + "optimizedFilterIncludeUnknown", + "optimizedFilterNoIncludeUnknown", + "valuesUtf8" + ) + .verify(); + } + + @Test + public void testEqualsForInFilterDruidPredicateFactory() + { + EqualsVerifier.forClass(InDimFilter.InFilterDruidPredicateFactory.class) + .usingGetClass() + .withNonnullFields("values") + .withIgnoredFields( + "longPredicateSupplier", + "floatPredicateSupplier", + "doublePredicateSupplier", + "stringPredicateSupplier" + ) + .verify(); + } + } + + private static TypedInFilter inFilter(String columnName, ColumnType matchValueType, List values) + { + return new TypedInFilter( + columnName, + matchValueType, + values, + null, + null + ); + } + + private static InDimFilter legacyInFilter(String dim, String value, String... values) + { + return new InDimFilter(dim, Lists.asList(value, values), null); + } + + private static InDimFilter legacyInFilterWithFn(String dim, ExtractionFn fn, String value, String... values) + { + return new InDimFilter(dim, Lists.asList(value, values), fn); + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/filter/TypedInFilterTests.java b/processing/src/test/java/org/apache/druid/segment/filter/TypedInFilterTests.java deleted file mode 100644 index bd05f1904661..000000000000 --- a/processing/src/test/java/org/apache/druid/segment/filter/TypedInFilterTests.java +++ /dev/null @@ -1,509 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.filter; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Function; -import com.google.common.base.Supplier; -import com.google.common.base.Suppliers; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.RangeSet; -import nl.jqno.equalsverifier.EqualsVerifier; -import org.apache.druid.common.config.NullHandling; -import org.apache.druid.error.DruidException; -import org.apache.druid.jackson.DefaultObjectMapper; -import org.apache.druid.java.util.common.IAE; -import org.apache.druid.java.util.common.Pair; -import org.apache.druid.query.filter.DimFilter; -import org.apache.druid.query.filter.Filter; -import org.apache.druid.query.filter.NotDimFilter; -import org.apache.druid.query.filter.TypedInFilter; -import org.apache.druid.segment.IndexBuilder; -import org.apache.druid.segment.StorageAdapter; -import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.testing.InitializedNullHandlingTest; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.Test; -import org.junit.experimental.runners.Enclosed; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -import java.io.Closeable; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - -@RunWith(Enclosed.class) -public class TypedInFilterTests -{ - @RunWith(Parameterized.class) - public static class TypedInFilterTest extends BaseFilterTest - { - private final ObjectMapper jsonMapper = new DefaultObjectMapper(); - - public TypedInFilterTest( - String testName, - IndexBuilder indexBuilder, - Function> finisher, - boolean cnf, - boolean optimize - ) - { - super(testName, InFilterTest.ROWS, indexBuilder, finisher, cnf, optimize); - } - - - @AfterClass - public static void tearDown() throws Exception - { - BaseFilterTest.tearDown(TypedInFilterTest.class.getName()); - } - - @Test - public void testSingleValueStringColumnWithNulls() - { - Assume.assumeTrue(NullHandling.sqlCompatible()); - - assertFilterMatches( - inFilter("dim1", ColumnType.STRING, Arrays.asList(null, "")), - ImmutableList.of("a") - ); - - assertFilterMatches( - inFilter("dim1", ColumnType.STRING, Collections.singletonList("")), - ImmutableList.of("a") - ); - - assertFilterMatches( - inFilter("dim1", ColumnType.STRING, Arrays.asList("-1", "ab", "de")), - ImmutableList.of() - ); - - assertFilterMatches( - inFilter("s0", ColumnType.STRING, Arrays.asList("a", "b")), - ImmutableList.of("b", "d", "f") - ); - assertFilterMatches( - inFilter("s0", ColumnType.STRING, Collections.singletonList("noexist")), - ImmutableList.of() - ); - - assertFilterMatches( - inFilter("dim1", ColumnType.STRING, Arrays.asList(null, "10", "abc")), - ImmutableList.of("b", "f") - ); - assertFilterMatches( - NotDimFilter.of(inFilter("dim1", ColumnType.STRING, Arrays.asList("-1", "ab", "de"))), - ImmutableList.of("a", "b", "c", "d", "e", "f") - ); - assertFilterMatches( - NotDimFilter.of(inFilter("s0", ColumnType.STRING, Arrays.asList("a", "b"))), - ImmutableList.of("a", "e") - ); - assertFilterMatches( - NotDimFilter.of(inFilter("s0", ColumnType.STRING, Collections.singletonList("noexist"))), - ImmutableList.of("a", "b", "d", "e", "f") - ); - } - - @Test - public void testMultiValueStringColumn() - { - Assume.assumeTrue(NullHandling.sqlCompatible()); - Assume.assumeFalse(isAutoSchema()); - - assertFilterMatches( - inFilter("dim2", ColumnType.STRING, Collections.singletonList(null)), - ImmutableList.of("b", "f") - ); - assertFilterMatches( - inFilter("dim2", ColumnType.STRING, Arrays.asList(null, "a")), - ImmutableList.of("a", "b", "d", "f") - ); - assertFilterMatches( - inFilter("dim2", ColumnType.STRING, Arrays.asList(null, "b")), - ImmutableList.of("a", "b", "f") - ); - assertFilterMatches( - inFilter("dim2", ColumnType.STRING, Collections.singletonList("")), - ImmutableList.of("c") - ); - - assertFilterMatches( - inFilter("dim2", ColumnType.STRING, Arrays.asList("", null)), - ImmutableList.of("b", "c", "f") - ); - - assertFilterMatches( - inFilter("dim2", ColumnType.STRING, Collections.singletonList("c")), - ImmutableList.of("e") - ); - - assertFilterMatches( - inFilter("dim2", ColumnType.STRING, Collections.singletonList("d")), - ImmutableList.of() - ); - } - - @Test - public void testMissingColumn() - { - Assume.assumeTrue(NullHandling.sqlCompatible()); - assertFilterMatches( - inFilter("dim3", ColumnType.STRING, Arrays.asList(null, null)), - ImmutableList.of("a", "b", "c", "d", "e", "f") - ); - assertFilterMatches( - NotDimFilter.of(inFilter("dim3", ColumnType.STRING, Arrays.asList(null, null))), - ImmutableList.of() - ); - - assertFilterMatches( - inFilter("dim3", ColumnType.STRING, Collections.singletonList("")), - ImmutableList.of() - ); - assertFilterMatches( - NotDimFilter.of(inFilter("dim3", ColumnType.STRING, Collections.singletonList(""))), - ImmutableList.of() - ); - - assertFilterMatches( - inFilter("dim3", ColumnType.STRING, Arrays.asList(null, "a")), - ImmutableList.of("a", "b", "c", "d", "e", "f") - ); - assertFilterMatches( - NotDimFilter.of(inFilter("dim3", ColumnType.STRING, Arrays.asList(null, "a"))), - ImmutableList.of() - ); - - assertFilterMatches( - inFilter("dim3", ColumnType.STRING, Collections.singletonList("a")), - ImmutableList.of() - ); - assertFilterMatches( - NotDimFilter.of(inFilter("dim3", ColumnType.STRING, Collections.singletonList("a"))), - ImmutableList.of() - ); - - assertFilterMatches( - inFilter("dim3", ColumnType.STRING, Collections.singletonList("b")), - ImmutableList.of() - ); - - assertFilterMatches( - inFilter("dim3", ColumnType.STRING, Collections.singletonList("c")), - ImmutableList.of() - ); - } - - @Test - public void testNumeric() - { - Assume.assumeTrue(NullHandling.sqlCompatible()); - assertFilterMatches( - inFilter("f0", ColumnType.FLOAT, Collections.singletonList(0f)), - ImmutableList.of("a") - ); - assertFilterMatches( - inFilter("d0", ColumnType.DOUBLE, Collections.singletonList(0.0)), - ImmutableList.of("a") - ); - assertFilterMatches(inFilter("l0", ColumnType.LONG, Collections.singletonList(0L)), ImmutableList.of("a")); - assertFilterMatches( - NotDimFilter.of(inFilter("f0", ColumnType.FLOAT, Collections.singletonList(0f))), - ImmutableList.of("b", "c", "d", "f") - ); - assertFilterMatches( - NotDimFilter.of(inFilter("d0", ColumnType.DOUBLE, Collections.singletonList(0.0))), - ImmutableList.of("b", "d", "e", "f") - ); - assertFilterMatches( - NotDimFilter.of(inFilter("l0", ColumnType.LONG, Collections.singletonList(0L))), - ImmutableList.of("b", "c", "e", "f") - ); - assertFilterMatches(inFilter("f0", ColumnType.FLOAT, Collections.singletonList(null)), ImmutableList.of("e")); - assertFilterMatches(inFilter("d0", ColumnType.DOUBLE, Collections.singletonList(null)), ImmutableList.of("c")); - assertFilterMatches(inFilter("l0", ColumnType.LONG, Collections.singletonList(null)), ImmutableList.of("d")); - assertFilterMatches( - NotDimFilter.of(inFilter("f0", ColumnType.FLOAT, Collections.singletonList(null))), - ImmutableList.of("a", "b", "c", "d", "f") - ); - assertFilterMatches( - NotDimFilter.of(inFilter("d0", ColumnType.DOUBLE, Collections.singletonList(null))), - ImmutableList.of("a", "b", "d", "e", "f") - ); - assertFilterMatches( - NotDimFilter.of(inFilter("l0", ColumnType.LONG, Collections.singletonList(null))), - ImmutableList.of("a", "b", "c", "e", "f") - ); - - assertFilterMatches(inFilter("f0", ColumnType.FLOAT, Arrays.asList("0", "999")), ImmutableList.of("a")); - assertFilterMatches(inFilter("d0", ColumnType.DOUBLE, Arrays.asList("0", "999")), ImmutableList.of("a")); - assertFilterMatches(inFilter("l0", ColumnType.LONG, Arrays.asList("0", "999")), ImmutableList.of("a")); - assertFilterMatches(inFilter("f0", ColumnType.FLOAT, Arrays.asList(null, "999")), ImmutableList.of("e")); - assertFilterMatches(inFilter("d0", ColumnType.DOUBLE, Arrays.asList(null, "999")), ImmutableList.of("c")); - assertFilterMatches(inFilter("l0", ColumnType.LONG, Arrays.asList(null, "999")), ImmutableList.of("d")); - } - - @Override - protected void assertFilterMatches(DimFilter filter, List expectedRows) - { - super.assertFilterMatches(filter, expectedRows); - try { - // make sure round trip json serde is cool - super.assertFilterMatches( - jsonMapper.readValue(jsonMapper.writeValueAsString(filter), DimFilter.class), - expectedRows - ); - } - catch (JsonProcessingException e) { - throw new RuntimeException(e); - } - } - } - - public static class TypedInFilterFilterNonParameterizedTests extends InitializedNullHandlingTest - { - @Test - public void testSerde() throws JsonProcessingException - { - Assume.assumeTrue(NullHandling.sqlCompatible()); - ObjectMapper mapper = new DefaultObjectMapper(); - TypedInFilter filter = inFilter("column", ColumnType.STRING, Arrays.asList("a", "b", "c")); - String s = mapper.writeValueAsString(filter); - Assert.assertEquals(filter, mapper.readValue(s, TypedInFilter.class)); - - filter = inFilter("column", ColumnType.STRING, Arrays.asList("a", "b", "b", null, "c")); - s = mapper.writeValueAsString(filter); - Assert.assertEquals(filter, mapper.readValue(s, TypedInFilter.class)); - - filter = inFilter("column", ColumnType.LONG, Arrays.asList(1L, 2L, 2L, null, 3L)); - s = mapper.writeValueAsString(filter); - Assert.assertEquals(filter, mapper.readValue(s, TypedInFilter.class)); - - filter = inFilter("column", ColumnType.DOUBLE, Arrays.asList(1.1, 2.2, 2.3, null, 3.3)); - s = mapper.writeValueAsString(filter); - Assert.assertEquals(filter, mapper.readValue(s, TypedInFilter.class)); - - filter = inFilter("column", ColumnType.FLOAT, Arrays.asList(1.1f, 2.2f, 2.2f, null, 3.3f)); - s = mapper.writeValueAsString(filter); - Assert.assertEquals(filter, mapper.readValue(s, TypedInFilter.class)); - - filter = inFilter("column", ColumnType.FLOAT, Arrays.asList(1.1, 2.2, 2.3, null, 3.3)); - s = mapper.writeValueAsString(filter); - Assert.assertEquals(filter, mapper.readValue(s, TypedInFilter.class)); - } - - @Test - public void testGetCacheKey() - { - Assume.assumeTrue(NullHandling.sqlCompatible()); - TypedInFilter filterUnsorted = inFilter("column", ColumnType.STRING, Arrays.asList("a", "b", null, "c")); - TypedInFilter filterDifferent = inFilter("column", ColumnType.STRING, Arrays.asList("a", "c", "b")); - TypedInFilter filterPresorted = new TypedInFilter( - "column", - ColumnType.STRING, - null, - Arrays.asList(null, "a", "b", "c"), - null - ); - - Assert.assertEquals(filterPresorted, filterUnsorted); - Assert.assertNotEquals(filterDifferent, filterPresorted); - Assert.assertArrayEquals(filterPresorted.getCacheKey(), filterUnsorted.getCacheKey()); - Assert.assertFalse(Arrays.equals(filterDifferent.getCacheKey(), filterPresorted.getCacheKey())); - - filterUnsorted = inFilter("column", ColumnType.LONG, Arrays.asList(2L, -2L, 1L, null, 3L)); - filterDifferent = inFilter("column", ColumnType.LONG, Arrays.asList(2L, -2L, 1L, 3L)); - filterPresorted = new TypedInFilter( - "column", - ColumnType.LONG, - null, - Arrays.asList(null, -2L, 1L, 2L, 3L), - null - ); - - Assert.assertEquals(filterPresorted, filterUnsorted); - Assert.assertNotEquals(filterDifferent, filterPresorted); - Assert.assertArrayEquals(filterPresorted.getCacheKey(), filterUnsorted.getCacheKey()); - Assert.assertFalse(Arrays.equals(filterDifferent.getCacheKey(), filterPresorted.getCacheKey())); - - filterUnsorted = inFilter("column", ColumnType.DOUBLE, Arrays.asList(2.2, -2.2, 1.1, null, 3.3)); - filterDifferent = inFilter("column", ColumnType.DOUBLE, Arrays.asList(2.2, -2.2, 1.1, 3.3)); - filterPresorted = new TypedInFilter( - "column", - ColumnType.DOUBLE, - null, - Arrays.asList(null, -2.2, 1.1, 2.2, 3.3), - null - ); - - Assert.assertEquals(filterPresorted, filterUnsorted); - Assert.assertNotEquals(filterDifferent, filterPresorted); - Assert.assertArrayEquals(filterPresorted.getCacheKey(), filterUnsorted.getCacheKey()); - Assert.assertFalse(Arrays.equals(filterDifferent.getCacheKey(), filterPresorted.getCacheKey())); - - filterUnsorted = inFilter("column", ColumnType.FLOAT, Arrays.asList(2.2f, -2.2f, 1.1f, null, 3.3f)); - filterDifferent = inFilter("column", ColumnType.FLOAT, Arrays.asList(2.2f, -2.2f, 1.1f, 3.3f)); - filterPresorted = new TypedInFilter( - "column", - ColumnType.FLOAT, - null, - Arrays.asList(null, -2.2f, 1.1f, 2.2f, 3.3f), - null - ); - - Assert.assertEquals(filterPresorted, filterUnsorted); - Assert.assertNotEquals(filterDifferent, filterPresorted); - Assert.assertArrayEquals(filterPresorted.getCacheKey(), filterUnsorted.getCacheKey()); - Assert.assertFalse(Arrays.equals(filterDifferent.getCacheKey(), filterPresorted.getCacheKey())); - } - - @Test - public void testInvalidParameters() - { - if (NullHandling.replaceWithDefault()) { - Throwable t = Assert.assertThrows( - DruidException.class, - () -> new TypedInFilter(null, ColumnType.STRING, null, null, null) - ); - Assert.assertEquals("Invalid IN filter, typed in filter only supports SQL compatible null handling mode, set druid.generic.useDefaultValue=false to use this filter", t.getMessage()); - } - - Assume.assumeTrue(NullHandling.sqlCompatible()); - Throwable t = Assert.assertThrows( - DruidException.class, - () -> new TypedInFilter(null, ColumnType.STRING, null, null, null) - ); - Assert.assertEquals("Invalid IN filter, column cannot be null", t.getMessage()); - t = Assert.assertThrows( - DruidException.class, - () -> new TypedInFilter("dim0", null, null, null, null) - ); - Assert.assertEquals("Invalid IN filter on column [dim0], matchValueType cannot be null", t.getMessage()); - t = Assert.assertThrows( - DruidException.class, - () -> new TypedInFilter("dim0", ColumnType.STRING, null, null, null) - ); - Assert.assertEquals( - "Invalid IN filter on column [dim0], exactly one of values or sortedValues must be non-null", - t.getMessage() - ); - } - - @Test - public void testGetDimensionRangeSet() - { - Assume.assumeTrue(NullHandling.sqlCompatible()); - TypedInFilter filter = inFilter("x", ColumnType.STRING, Arrays.asList(null, "a", "b", "c")); - TypedInFilter filter2 = inFilter("x", ColumnType.STRING, Arrays.asList("a", "b", null, "c")); - - Assert.assertEquals(filter.getDimensionRangeSet("x"), filter2.getDimensionRangeSet("x")); - RangeSet range = filter.getDimensionRangeSet("x"); - Assert.assertTrue(range.contains("b")); - - filter = inFilter("x", ColumnType.LONG, Arrays.asList(null, 1L, 2L, 3L)); - filter2 = inFilter("x", ColumnType.LONG, Arrays.asList(3L, 1L, null, 2L)); - Assert.assertEquals(filter.getDimensionRangeSet("x"), filter2.getDimensionRangeSet("x")); - range = filter.getDimensionRangeSet("x"); - Assert.assertTrue(range.contains("2")); - - filter = inFilter("x", ColumnType.DOUBLE, Arrays.asList(null, 1.1, 2.2, 3.3)); - filter2 = inFilter("x", ColumnType.DOUBLE, Arrays.asList(3.3, 1.1, null, 2.2)); - range = filter.getDimensionRangeSet("x"); - Assert.assertEquals(filter.getDimensionRangeSet("x"), filter2.getDimensionRangeSet("x")); - Assert.assertTrue(range.contains("2.2")); - - filter = inFilter("x", ColumnType.FLOAT, Arrays.asList(null, 1.1f, 2.2f, 3.3f)); - filter2 = inFilter("x", ColumnType.FLOAT, Arrays.asList(3.3f, 1.1f, null, 2.2f)); - range = filter.getDimensionRangeSet("x"); - Assert.assertEquals(filter.getDimensionRangeSet("x"), filter2.getDimensionRangeSet("x")); - Assert.assertTrue(range.contains("2.2")); - } - - @Test - public void testRequiredColumnRewrite() - { - Assume.assumeTrue(NullHandling.sqlCompatible()); - TypedInFilter filter = inFilter("dim0", ColumnType.STRING, Arrays.asList("a", "c")); - TypedInFilter filter2 = inFilter("dim1", ColumnType.STRING, Arrays.asList("a", "c")); - - Assert.assertTrue(filter.supportsRequiredColumnRewrite()); - Assert.assertTrue(filter2.supportsRequiredColumnRewrite()); - - Filter rewrittenFilter = filter.rewriteRequiredColumns(ImmutableMap.of("dim0", "dim1")); - Assert.assertEquals(filter2, rewrittenFilter); - - Throwable t = Assert.assertThrows( - IAE.class, - () -> filter.rewriteRequiredColumns(ImmutableMap.of("invalidName", "dim1")) - ); - Assert.assertEquals( - "Received a non-applicable rewrite: {invalidName=dim1}, filter's dimension: dim0", - t.getMessage() - ); - } - - @Test - public void test_equals() - { - Assume.assumeTrue(NullHandling.sqlCompatible()); - EqualsVerifier.forClass(TypedInFilter.class).usingGetClass() - .withNonnullFields( - "column", - "matchValueType", - "unsortedValues", - "sortedMatchValues", - "optimizedFilterIncludeUnknown", - "optimizedFilterNoIncludeUnknown" - ) - .withPrefabValues(ColumnType.class, ColumnType.STRING, ColumnType.DOUBLE) - .withPrefabValues( - Supplier.class, - Suppliers.ofInstance(ImmutableList.of("a", "b")), - Suppliers.ofInstance(ImmutableList.of("b", "c")) - ) - .withIgnoredFields( - "unsortedValues", - "sortedUtf8MatchValueBytes", - "predicateFactorySupplier", - "cacheKeySupplier", - "optimizedFilterIncludeUnknown", - "optimizedFilterNoIncludeUnknown" - ) - .verify(); - } - } - - private static TypedInFilter inFilter(String columnName, ColumnType matchValueType, List values) - { - return new TypedInFilter( - columnName, - matchValueType, - values, - null, - null - ); - } -} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java index 534464590c5a..d3e73cf3d681 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java @@ -130,15 +130,22 @@ public DimFilter toDruidFilter( // to create an empty array with no argument, we just return null. return null; } else if (arrayElements.length == 1) { - if (plannerContext.isUseBoundsAndSelectors() || (!simpleExtractionExpr.isDirectColumnAccess() && virtualColumnRegistry == null)) { + if (plannerContext.isUseBoundsAndSelectors()) { return newSelectorDimFilter(simpleExtractionExpr.getSimpleExtraction(), Evals.asString(arrayElements[0])); } else { - final String column = simpleExtractionExpr.isDirectColumnAccess() - ? simpleExtractionExpr.getSimpleExtraction().getColumn() - : virtualColumnRegistry.getOrCreateVirtualColumnForExpression( - simpleExtractionExpr, - simpleExtractionExpr.getDruidType() - ); + final String column; + if (simpleExtractionExpr.isDirectColumnAccess()) { + column = simpleExtractionExpr.getDirectColumn(); + } else { + if (virtualColumnRegistry == null) { + // fall back to expression filter + return toExpressionFilter(plannerContext, druidExpressions); + } + column = virtualColumnRegistry.getOrCreateVirtualColumnForExpression( + simpleExtractionExpr, + simpleExtractionExpr.getDruidType() + ); + } final Object elementValue = arrayElements[0]; if (elementValue == null) { return NullFilter.forColumn(column); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java index 1c29889e040c..413d75fbc6dc 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/ConvertSelectorsToIns.java @@ -31,7 +31,6 @@ import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.filter.TypedInFilter; import org.apache.druid.segment.column.RowSignature; -import org.apache.druid.segment.column.ValueType; import org.apache.druid.sql.calcite.expression.SimpleExtraction; import org.apache.druid.sql.calcite.table.RowSignatures; @@ -296,11 +295,6 @@ protected InDimFilter.ValuesSet makeCollection() @Override protected RangeRefKey getCollectionKey(EqualityFilter selector) { - if (!selector.getMatchValueType().is(ValueType.STRING)) { - // skip non-string equality filters since InDimFilter uses a sorted string set, which is a different sort - // than numbers or other types might use - return null; - } return RangeRefKey.from(selector); } From b6ce3d1b4f82005e23ce868db3627ecca63feff5 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 21 Mar 2024 16:54:48 -0700 Subject: [PATCH 10/15] cache id name --- .../main/java/org/apache/druid/query/filter/DimFilterUtils.java | 2 +- .../main/java/org/apache/druid/query/filter/TypedInFilter.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/DimFilterUtils.java b/processing/src/main/java/org/apache/druid/query/filter/DimFilterUtils.java index af61cf1efa82..dad5cfd98b30 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/DimFilterUtils.java +++ b/processing/src/main/java/org/apache/druid/query/filter/DimFilterUtils.java @@ -62,7 +62,7 @@ public class DimFilterUtils static final byte RANGE_CACHE_ID = 0x14; static final byte IS_FILTER_BOOLEAN_FILTER_CACHE_ID = 0x15; static final byte ARRAY_CONTAINS_CACHE_ID = 0x16; - static final byte NEW_IN_CACHE_ID = 0x17; + static final byte TYPED_IN_CACHE_ID = 0x17; public static final byte STRING_SEPARATOR = (byte) 0xFF; diff --git a/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java index e76870d589a2..018f23b5fcbd 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java @@ -404,7 +404,7 @@ private byte[] computeCacheKey() } } - return new CacheKeyBuilder(DimFilterUtils.NEW_IN_CACHE_ID) + return new CacheKeyBuilder(DimFilterUtils.TYPED_IN_CACHE_ID) .appendString(column) .appendByte(DimFilterUtils.STRING_SEPARATOR) .appendString(matchValueType.asTypeString()) From ce4d05bd4941e51e1530d5ce12524e2655929c00 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 21 Mar 2024 17:08:56 -0700 Subject: [PATCH 11/15] style --- .../java/org/apache/druid/segment/filter/InFilterTests.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTests.java b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTests.java index 3e5d8852080e..317fc1aa6bad 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTests.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTests.java @@ -659,7 +659,8 @@ private void assertLegacyFilterMatches(DimFilter filter, List expectedRo } @Nullable - private DimFilter rewriteToLegacyFilter(DimFilter filter) { + private DimFilter rewriteToLegacyFilter(DimFilter filter) + { if (filter instanceof InDimFilter) { return filter; } else if (filter instanceof TypedInFilter) { From 37ca67ca1ca7d597bd661509117fe0a17a014866 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 21 Mar 2024 20:17:12 -0700 Subject: [PATCH 12/15] javadoc --- .../apache/druid/segment/index/BitmapColumnIndex.java | 4 +++- .../segment/index/semantic/ArrayElementIndexes.java | 4 ++++ .../segment/index/semantic/DruidPredicateIndexes.java | 3 ++- .../druid/segment/index/semantic/NullValueIndex.java | 2 +- .../segment/index/semantic/StringValueSetIndexes.java | 2 ++ .../segment/index/semantic/Utf8ValueSetIndexes.java | 3 +++ .../druid/segment/index/semantic/ValueIndexes.java | 10 +++++++--- .../druid/segment/index/semantic/ValueSetIndexes.java | 3 +++ 8 files changed, 25 insertions(+), 6 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java b/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java index 1421e4fc16ef..154dd66e206e 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java @@ -27,7 +27,9 @@ /** * Common interface for bitmap indexes for use by {@link org.apache.druid.query.filter.Filter} for cursor creation, to - * allow fast row skipping during query processing. + * allow fast row skipping during query processing. Ideally implementaitons of this are 'lazy', and not do any real + * work until {@link #computeBitmapResult(BitmapResultFactory, int, int, boolean)} or + * {@link #computeBitmapResult(BitmapResultFactory, boolean)} is called. */ public interface BitmapColumnIndex { diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/ArrayElementIndexes.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/ArrayElementIndexes.java index 5a8a9d841b55..90eb8ce8a8f0 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/ArrayElementIndexes.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/ArrayElementIndexes.java @@ -26,6 +26,10 @@ import javax.annotation.Nullable; +/** + * Construct a {@link BitmapColumnIndex} for any array element which might be present in an array contained in the + * column. + */ public interface ArrayElementIndexes { /** diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/DruidPredicateIndexes.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/DruidPredicateIndexes.java index c7e87ec20f1e..f0e802f7fab3 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/DruidPredicateIndexes.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/DruidPredicateIndexes.java @@ -25,7 +25,8 @@ import javax.annotation.Nullable; /** - * Uses a {@link DruidPredicateFactory} to construct a {@link BitmapColumnIndex} + * Uses a {@link DruidPredicateFactory} to construct a {@link BitmapColumnIndex} containing rows for all values which + * satisfy the predicate. */ public interface DruidPredicateIndexes { diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/NullValueIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/NullValueIndex.java index 8768caa54627..884866f95769 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/NullValueIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/NullValueIndex.java @@ -22,7 +22,7 @@ import org.apache.druid.segment.index.BitmapColumnIndex; /** - * Provides index for all null rows in a column, to use with IS/IS NOT NULL filters + * Provides {@link BitmapColumnIndex} for all null rows in a column, to use with IS/IS NOT NULL filters */ public interface NullValueIndex { diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/StringValueSetIndexes.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/StringValueSetIndexes.java index f25a16b7b106..09c88776e9d4 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/StringValueSetIndexes.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/StringValueSetIndexes.java @@ -27,7 +27,9 @@ /** * Index on individual values, and provides bitmaps for the rows which contain these values + * @deprecated use {@link ValueIndexes}, {@link ValueSetIndexes}, or {@link Utf8ValueSetIndexes} instead */ +@Deprecated public interface StringValueSetIndexes { /** diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/Utf8ValueSetIndexes.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/Utf8ValueSetIndexes.java index 3e880ca42dcb..2180c76d30f1 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/Utf8ValueSetIndexes.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/Utf8ValueSetIndexes.java @@ -25,6 +25,9 @@ import java.nio.ByteBuffer; import java.util.List; +/** + * Construct a {@link BitmapColumnIndex} for a set of utf8 byte values which might be present in the column. + */ public interface Utf8ValueSetIndexes { /** diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueIndexes.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueIndexes.java index 9eee56896d95..4234a7b47bec 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueIndexes.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueIndexes.java @@ -27,13 +27,17 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; +/** + * Construct a {@link BitmapColumnIndex} for a specific value which might be present in the column. + */ public interface ValueIndexes { /** - * Get the {@link ImmutableBitmap} corresponding to rows matching the supplied value. Generates an empty bitmap when - * passed a value that doesn't exist. May return null if a value index cannot be computed for the supplied value type. - * + * Get a {@link BitmapColumnIndex} which can compute the {@link ImmutableBitmap} corresponding to rows matching the + * supplied value. Generates an empty bitmap when passed a value that doesn't exist. May return null if a value + * index cannot be computed for the supplied value type. + *

* Does not match null, use {@link NullValueIndex} for matching nulls. * * @param value value to match diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueSetIndexes.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueSetIndexes.java index 0b0d42c27f13..56d6da9a19af 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueSetIndexes.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueSetIndexes.java @@ -37,6 +37,9 @@ import java.util.List; import java.util.NoSuchElementException; +/** + * Construct a {@link BitmapColumnIndex} for a set of values which might be present in the column. + */ public interface ValueSetIndexes { /** From 9527adabb07e0e32557193561afb121886d1ae71 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 22 Mar 2024 02:55:32 -0700 Subject: [PATCH 13/15] more comments, dedupe test --- .../java/org/apache/druid/query/filter/TypedInFilter.java | 6 +++++- .../org/apache/druid/segment/filter/InFilterTests.java | 7 ++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java index 018f23b5fcbd..6ea06234d2b2 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java @@ -103,7 +103,9 @@ public class TypedInFilter extends AbstractOptimizableDimFilter implements Filte /** * Supplier for list of values sorted by {@link #matchValueType}. This is lazily computed if - * {@link #unsortedValues} is not null and previously sorted. + * {@link #unsortedValues} is not null and previously sorted. Data will be deduplicated upon sorting if computed. + * Manually set this value with unsorted or duplicated values at your own risk. Duplicated values are unlikely to + * cause a problem, but unsorted values can result in incorrect results. */ private final Supplier> sortedMatchValues; @@ -257,6 +259,8 @@ public RangeSet getDimensionRangeSet(String dimension) for (Object value : sortedMatchValues.get()) { String valueEquivalent = Evals.asString(value); if (valueEquivalent == null) { + // Range.singleton(null) is invalid, so use the fact that + // only null values are less than empty string. retSet.add(Range.lessThan("")); } else { retSet.add(Range.singleton(valueEquivalent)); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTests.java b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTests.java index 317fc1aa6bad..46f120922392 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTests.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTests.java @@ -56,7 +56,6 @@ import org.junit.Assert; import org.junit.Assume; import org.junit.Test; -import org.junit.experimental.runners.Enclosed; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -68,7 +67,7 @@ import java.util.Map; import java.util.stream.Collectors; -@RunWith(Enclosed.class) + public class InFilterTests { @RunWith(Parameterized.class) @@ -698,7 +697,9 @@ public void testSerde() throws JsonProcessingException filter = inFilter("column", ColumnType.STRING, Arrays.asList("a", "b", "b", null, "c")); s = mapper.writeValueAsString(filter); - Assert.assertEquals(filter, mapper.readValue(s, TypedInFilter.class)); + TypedInFilter deserialized = mapper.readValue(s, TypedInFilter.class); + Assert.assertEquals(Arrays.asList(null, "a", "b", "c"), deserialized.getSortedValues()); + Assert.assertEquals(filter, deserialized); filter = inFilter("column", ColumnType.LONG, Arrays.asList(1L, 2L, 2L, null, 3L)); s = mapper.writeValueAsString(filter); From 108c6cb19cc129f7b9311bad810a41e551067ad6 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 22 Mar 2024 03:43:29 -0700 Subject: [PATCH 14/15] faster sort --- .../druid/query/filter/TypedInFilter.java | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java index 6ea06234d2b2..44f3ec9c2901 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java @@ -39,7 +39,7 @@ import it.unimi.dsi.fastutil.doubles.DoubleOpenHashSet; import it.unimi.dsi.fastutil.floats.FloatOpenHashSet; import it.unimi.dsi.fastutil.longs.LongOpenHashSet; -import it.unimi.dsi.fastutil.objects.ObjectRBTreeSet; +import it.unimi.dsi.fastutil.objects.ObjectArrays; import org.apache.druid.common.config.NullHandling; import org.apache.druid.error.InvalidInput; import org.apache.druid.java.util.common.IAE; @@ -468,12 +468,21 @@ private static Object coerceValue(@Nullable Object o, ColumnType matchValueType) private static List sortValues(List unsortedValues, ColumnType matchValueType) { - final ObjectRBTreeSet sortedSet = new ObjectRBTreeSet<>(matchValueType.getNullableStrategy()); - for (Object value : unsortedValues) { - sortedSet.add(coerceValue(value, matchValueType)); + final Object[] array = unsortedValues.toArray(new Object[0]); + // check if values need coerced + for (int i = 0; i < array.length; i++) { + Object coerced = coerceValue(array[i], matchValueType); + array[i] = coerced; + } + Comparator comparator = matchValueType.getNullableStrategy(); + ObjectArrays.quickSort(array, comparator); + final List sortedList = Lists.newArrayListWithCapacity(array.length); + for (int i = 0; i < array.length; i++) { + if (i > 0 && comparator.compare(array[i - 1], array[i]) == 0) { + continue; + } + sortedList.add(array[i]); } - final List sortedList = Lists.newArrayListWithCapacity(unsortedValues.size()); - sortedList.addAll(sortedSet); return sortedList; } From 6d2299fe2c4f5adf78a77d327df8cafd7d662f06 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 22 Mar 2024 03:50:38 -0700 Subject: [PATCH 15/15] adjust --- .../java/org/apache/druid/query/filter/TypedInFilter.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java index 44f3ec9c2901..d3b307ef2f42 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java @@ -469,13 +469,14 @@ private static Object coerceValue(@Nullable Object o, ColumnType matchValueType) private static List sortValues(List unsortedValues, ColumnType matchValueType) { final Object[] array = unsortedValues.toArray(new Object[0]); - // check if values need coerced + // coerce values to matchValueType for (int i = 0; i < array.length; i++) { Object coerced = coerceValue(array[i], matchValueType); array[i] = coerced; } - Comparator comparator = matchValueType.getNullableStrategy(); + final Comparator comparator = matchValueType.getNullableStrategy(); ObjectArrays.quickSort(array, comparator); + // dedupe values final List sortedList = Lists.newArrayListWithCapacity(array.length); for (int i = 0; i < array.length; i++) { if (i > 0 && comparator.compare(array[i - 1], array[i]) == 0) {