diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlGroupByBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlGroupByBenchmark.java new file mode 100644 index 000000000000..52745e62fb30 --- /dev/null +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlGroupByBenchmark.java @@ -0,0 +1,385 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.benchmark.query; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.data.input.impl.DimensionSchema; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.guice.NestedDataModule; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.math.expr.ExpressionProcessing; +import org.apache.druid.query.DruidProcessingConfig; +import org.apache.druid.query.QueryRunnerFactoryConglomerate; +import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.segment.AutoTypeColumnSchema; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.segment.column.StringEncodingStrategy; +import org.apache.druid.segment.generator.GeneratorBasicSchemas; +import org.apache.druid.segment.generator.GeneratorSchemaInfo; +import org.apache.druid.segment.generator.SegmentGenerator; +import org.apache.druid.segment.transform.ExpressionTransform; +import org.apache.druid.segment.transform.TransformSpec; +import org.apache.druid.server.QueryStackTests; +import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker; +import org.apache.druid.server.security.AuthConfig; +import org.apache.druid.server.security.AuthTestUtils; +import org.apache.druid.sql.calcite.SqlVectorizedExpressionSanityTest; +import org.apache.druid.sql.calcite.planner.CalciteRulesManager; +import org.apache.druid.sql.calcite.planner.CatalogResolver; +import org.apache.druid.sql.calcite.planner.DruidPlanner; +import org.apache.druid.sql.calcite.planner.PlannerConfig; +import org.apache.druid.sql.calcite.planner.PlannerFactory; +import org.apache.druid.sql.calcite.planner.PlannerResult; +import org.apache.druid.sql.calcite.run.SqlEngine; +import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog; +import org.apache.druid.sql.calcite.util.CalciteTests; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.partition.LinearShardSpec; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +import javax.annotation.Nullable; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +@State(Scope.Benchmark) +@Fork(value = 1) +@Warmup(iterations = 3) +@Measurement(iterations = 5) +public class SqlGroupByBenchmark +{ + static { + NullHandling.initializeForTests(); + ExpressionProcessing.initializeForTests(); + NestedDataModule.registerHandlersAndSerde(); + } + + private static final DruidProcessingConfig PROCESSING_CONFIG = new DruidProcessingConfig() + { + @Override + public int intermediateComputeSizeBytes() + { + return 512 * 1024 * 1024; + } + + @Override + public int getNumMergeBuffers() + { + return 3; + } + + @Override + public int getNumThreads() + { + return 1; + } + + @Override + public String getFormatString() + { + return "benchmarks-processing-%s"; + } + }; + + @Param({ + "string-Sequential-100_000", + "string-Sequential-10_000_000", + // "string-Sequential-1_000_000_000", + "string-ZipF-1_000_000", + "string-Uniform-1_000_000", + + "multi-string-Sequential-100_000", + "multi-string-Sequential-10_000_000", + // "multi-string-Sequential-1_000_000_000", + "multi-string-ZipF-1_000_000", + "multi-string-Uniform-1_000_000", + + "long-Sequential-100_000", + "long-Sequential-10_000_000", + // "long-Sequential-1_000_000_000", + "long-ZipF-1_000_000", + "long-Uniform-1_000_000", + + "double-ZipF-1_000_000", + "double-Uniform-1_000_000", + + "float-ZipF-1_000_000", + "float-Uniform-1_000_000", + + "stringArray-Sequential-100_000", + "stringArray-Sequential-3_000_000", + // "stringArray-Sequential-1_000_000_000", + "stringArray-ZipF-1_000_000", + "stringArray-Uniform-1_000_000", + + "longArray-Sequential-100_000", + "longArray-Sequential-3_000_000", + // "longArray-Sequential-1_000_000_000", + "longArray-ZipF-1_000_000", + "longArray-Uniform-1_000_000", + + "nested-Sequential-100_000", + "nested-Sequential-3_000_000", + // "nested-Sequential-1_000_000_000", + "nested-ZipF-1_000_000", + "nested-Uniform-1_000_000", + }) + private String groupingDimension; + + private SqlEngine engine; + @Nullable + private PlannerFactory plannerFactory; + private Closer closer = Closer.create(); + + @Setup(Level.Trial) + public void setup() + { + final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("groupBy-testbench"); + + final DataSegment dataSegment = DataSegment.builder() + .dataSource("foo") + .interval(schemaInfo.getDataInterval()) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .size(0) + .build(); + final DataSegment dataSegment2 = DataSegment.builder() + .dataSource("foo") + .interval(schemaInfo.getDataInterval()) + .version("1") + .shardSpec(new LinearShardSpec(1)) + .size(0) + .build(); + + + final PlannerConfig plannerConfig = new PlannerConfig(); + + String columnCardinalityWithUnderscores = groupingDimension.substring(groupingDimension.lastIndexOf('-') + 1); + int rowsPerSegment = Integer.parseInt(StringUtils.replace(columnCardinalityWithUnderscores, "_", "")); + + final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator()); + + TransformSpec transformSpec = new TransformSpec( + null, + ImmutableList.of( + // string array dims + new ExpressionTransform( + "stringArray-Sequential-100_000", + "array(\"string-Sequential-100_000\")", + TestExprMacroTable.INSTANCE + ), + new ExpressionTransform( + "stringArray-Sequential-3_000_000", + "array(\"string-Sequential-10_000_000\")", + TestExprMacroTable.INSTANCE + ), + /* + new ExpressionTransform( + "stringArray-Sequential-1_000_000_000", + "array(\"string-Sequential-1_000_000_000\")", + TestExprMacroTable.INSTANCE + ),*/ + new ExpressionTransform( + "stringArray-ZipF-1_000_000", + "array(\"string-ZipF-1_000_000\")", + TestExprMacroTable.INSTANCE + ), + new ExpressionTransform( + "stringArray-Uniform-1_000_000", + "array(\"string-Uniform-1_000_000\")", + TestExprMacroTable.INSTANCE + ), + + // long array dims + new ExpressionTransform( + "longArray-Sequential-100_000", + "array(\"long-Sequential-100_000\")", + TestExprMacroTable.INSTANCE + ), + new ExpressionTransform( + "longArray-Sequential-3_000_000", + "array(\"long-Sequential-10_000_000\")", + TestExprMacroTable.INSTANCE + ), + /* + new ExpressionTransform( + "longArray-Sequential-1_000_000_000", + "array(\"long-Sequential-1_000_000_000\")", + TestExprMacroTable.INSTANCE + ),*/ + new ExpressionTransform( + "longArray-ZipF-1_000_000", + "array(\"long-ZipF-1_000_000\")", + TestExprMacroTable.INSTANCE + ), + new ExpressionTransform( + "longArray-Uniform-1_000_000", + "array(\"long-Uniform-1_000_000\")", + TestExprMacroTable.INSTANCE + ), + + // nested complex json dim + new ExpressionTransform( + "nested-Sequential-100_000", + "json_object('long1', \"long-Sequential-100_000\", 'nesteder', json_object('long1', \"long-Sequential-100_000\"))", + TestExprMacroTable.INSTANCE + ), + new ExpressionTransform( + "nested-Sequential-3_000_000", + "json_object('long1', \"long-Sequential-10_000_000\", 'nesteder', json_object('long1', \"long-Sequential-10_000_000\"))", + TestExprMacroTable.INSTANCE + ), + /*new ExpressionTransform( + "nested-Sequential-1_000_000_000", + "json_object('long1', \"long-Sequential-1_000_000_000\", 'nesteder', json_object('long1', \"long-Sequential-1_000_000_000\"))", + TestExprMacroTable.INSTANCE + ),*/ + new ExpressionTransform( + "nested-ZipF-1_000_000", + "json_object('long1', \"long-ZipF-1_000_000\", 'nesteder', json_object('long1', \"long-ZipF-1_000_000\"))", + TestExprMacroTable.INSTANCE + ), + new ExpressionTransform( + "nested-Uniform-1_000_000", + "json_object('long1', \"long-Uniform-1_000_000\", 'nesteder', json_object('long1', \"long-Uniform-1_000_000\"))", + TestExprMacroTable.INSTANCE + ) + ) + ); + + List columnSchemas = schemaInfo.getDimensionsSpec() + .getDimensions() + .stream() + .map(x -> new AutoTypeColumnSchema(x.getName(), null)) + .collect(Collectors.toList()); + + List transformSchemas = transformSpec + .getTransforms() + .stream() + .map( + transform -> new AutoTypeColumnSchema(transform.getName(), null) + ) + .collect(Collectors.toList()); + + + + final QueryableIndex index = segmentGenerator.generate( + dataSegment, + schemaInfo, + DimensionsSpec.builder() + .setDimensions(ImmutableList.builder() + .addAll(columnSchemas) + .addAll(transformSchemas) + .build() + ) + .build(), + transformSpec, + IndexSpec.builder().withStringDictionaryEncoding(new StringEncodingStrategy.Utf8()).build(), + Granularities.NONE, + rowsPerSegment + ); + + final QueryRunnerFactoryConglomerate conglomerate = QueryStackTests.createQueryRunnerFactoryConglomerate( + closer, + PROCESSING_CONFIG + ); + + final SpecificSegmentsQuerySegmentWalker walker = SpecificSegmentsQuerySegmentWalker.createWalker(conglomerate) + .add(dataSegment, index) + .add(dataSegment2, index); + closer.register(walker); + + // Hacky and pollutes global namespace, but it is fine since benchmarks are run in isolation. Wasn't able + // to work up a cleaner way of doing it by modifying the injector. + CalciteTests.getJsonMapper().registerModules(NestedDataModule.getJacksonModulesList()); + + final DruidSchemaCatalog rootSchema = + CalciteTests.createMockRootSchema(conglomerate, walker, plannerConfig, AuthTestUtils.TEST_AUTHORIZER_MAPPER); + engine = CalciteTests.createMockSqlEngine(walker, conglomerate); + plannerFactory = new PlannerFactory( + rootSchema, + CalciteTests.createOperatorTable(), + CalciteTests.createExprMacroTable(), + plannerConfig, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, + CalciteTests.getJsonMapper(), + CalciteTests.DRUID_SCHEMA_NAME, + new CalciteRulesManager(ImmutableSet.of()), + CalciteTests.createJoinableFactoryWrapper(), + CatalogResolver.NULL_RESOLVER, + new AuthConfig() + ); + + try { + SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries( + plannerFactory, + sqlQuery(groupingDimension) + ); + } + catch (Throwable ignored) { + // the show must go on + } + } + + @TearDown(Level.Trial) + public void tearDown() throws Exception + { + closer.close(); + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public void querySql(Blackhole blackhole) + { + final String sql = sqlQuery(groupingDimension); + try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, Collections.emptyMap())) { + final PlannerResult plannerResult = planner.plan(); + final Sequence resultSequence = plannerResult.run().getResults(); + final Object[] lastRow = resultSequence.accumulate(null, (accumulated, in) -> in); + blackhole.consume(lastRow); + } + } + + private static String sqlQuery(String groupingDimension) + { + return StringUtils.format("SELECT \"%s\", COUNT(*) FROM foo GROUP BY 1", groupingDimension); + } +} diff --git a/extensions-contrib/virtual-columns/src/test/java/org/apache/druid/segment/MapVirtualColumnGroupByTest.java b/extensions-contrib/virtual-columns/src/test/java/org/apache/druid/segment/MapVirtualColumnGroupByTest.java index ebbee1f8a547..aeab939f4ec8 100644 --- a/extensions-contrib/virtual-columns/src/test/java/org/apache/druid/segment/MapVirtualColumnGroupByTest.java +++ b/extensions-contrib/virtual-columns/src/test/java/org/apache/druid/segment/MapVirtualColumnGroupByTest.java @@ -24,6 +24,7 @@ import org.apache.druid.collections.StupidPool; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.MapBasedRow; +import org.apache.druid.error.DruidException; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Intervals; @@ -134,10 +135,10 @@ public void testWithMapColumn() ); Throwable t = Assert.assertThrows( - UnsupportedOperationException.class, + DruidException.class, () -> runner.run(QueryPlus.wrap(query)).toList() ); - Assert.assertEquals("Map column doesn't support getRow()", t.getMessage()); + Assert.assertEquals("Unable to group on the column[params]", t.getMessage()); } diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java index c2d7e1af6c58..7c4af7389f6c 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java @@ -2028,7 +2028,7 @@ public void testGroupByWithComplexColumnThrowsUnsupportedException(String contex .setExpectedExecutionErrorMatcher(CoreMatchers.allOf( CoreMatchers.instanceOf(DruidException.class), ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString( - "SQL requires a group-by on a column of type COMPLEX that is unsupported")) + "SQL requires a group-by on a column with type [COMPLEX] that is unsupported.")) )) .verifyExecutionError(); } diff --git a/processing/src/main/java/org/apache/druid/common/config/NullHandling.java b/processing/src/main/java/org/apache/druid/common/config/NullHandling.java index 7b4722052d47..747512cece53 100644 --- a/processing/src/main/java/org/apache/druid/common/config/NullHandling.java +++ b/processing/src/main/java/org/apache/druid/common/config/NullHandling.java @@ -142,6 +142,33 @@ public static String nullToEmptyIfNeeded(@Nullable String value) //CHECKSTYLE.ON: Regexp } + @Nullable + public static Long nullToEmptyIfNeeded(@Nullable Long value) + { + if (replaceWithDefault() && value == null) { + return defaultLongValue(); + } + return value; + } + + @Nullable + public static Float nullToEmptyIfNeeded(@Nullable Float value) + { + if (replaceWithDefault() && value == null) { + return defaultFloatValue(); + } + return value; + } + + @Nullable + public static Double nullToEmptyIfNeeded(@Nullable Double value) + { + if (replaceWithDefault() && value == null) { + return defaultDoubleValue(); + } + return value; + } + @Nullable public static String emptyToNullIfNeeded(@Nullable String value) { diff --git a/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java b/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java index 864a9cc183e3..0bb78b2109b7 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java +++ b/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java @@ -33,7 +33,6 @@ import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; -import org.apache.druid.segment.data.ComparableIntArray; import org.apache.druid.segment.data.IndexedInts; import javax.annotation.Nullable; @@ -195,10 +194,6 @@ public static List getNumericArrayFromObject(Object row) for (Object value : (Object[]) row) { retVal.add((Number) value); } - } else if (row instanceof ComparableIntArray) { - for (int value : ((ComparableIntArray) row).getDelegate()) { - retVal.add(value); - } } else { throw new ISE("Unexpected type %s found", row.getClass().getName()); } diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/cardinality/types/CardinalityAggregatorColumnSelectorStrategyFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/cardinality/types/CardinalityAggregatorColumnSelectorStrategyFactory.java index 12ed3c5f04b4..ad22956efd41 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/cardinality/types/CardinalityAggregatorColumnSelectorStrategyFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/cardinality/types/CardinalityAggregatorColumnSelectorStrategyFactory.java @@ -30,7 +30,8 @@ public class CardinalityAggregatorColumnSelectorStrategyFactory @Override public CardinalityAggregatorColumnSelectorStrategy makeColumnSelectorStrategy( ColumnCapabilities capabilities, - ColumnValueSelector selector + ColumnValueSelector selector, + String dimension ) { switch (capabilities.getType()) { @@ -46,4 +47,10 @@ public CardinalityAggregatorColumnSelectorStrategy makeColumnSelectorStrategy( throw new IAE("Cannot create query type helper from invalid type [%s]", capabilities.asTypeString()); } } + + @Override + public boolean supportsComplexTypes() + { + return false; + } } diff --git a/processing/src/main/java/org/apache/druid/query/dimension/ColumnSelectorStrategyFactory.java b/processing/src/main/java/org/apache/druid/query/dimension/ColumnSelectorStrategyFactory.java index dc9304d26962..7e4db58a57df 100644 --- a/processing/src/main/java/org/apache/druid/query/dimension/ColumnSelectorStrategyFactory.java +++ b/processing/src/main/java/org/apache/druid/query/dimension/ColumnSelectorStrategyFactory.java @@ -24,5 +24,11 @@ public interface ColumnSelectorStrategyFactory { - ColumnSelectorStrategyClass makeColumnSelectorStrategy(ColumnCapabilities capabilities, ColumnValueSelector selector); + ColumnSelectorStrategyClass makeColumnSelectorStrategy(ColumnCapabilities capabilities, ColumnValueSelector selector, String dimension); + + /** + * Whether the strategy supports complex types. If a strategy doesn't support the complex types, they can either throw an + * unsupported exception or treat them like strings. + */ + boolean supportsComplexTypes(); } diff --git a/processing/src/main/java/org/apache/druid/query/filter/ArrayContainsElementFilter.java b/processing/src/main/java/org/apache/druid/query/filter/ArrayContainsElementFilter.java index 129c8570dbb9..6e6a418167d7 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/ArrayContainsElementFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/ArrayContainsElementFilter.java @@ -26,6 +26,7 @@ import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableSet; import com.google.common.collect.RangeSet; +import org.apache.druid.error.DruidException; import org.apache.druid.error.InvalidInput; import org.apache.druid.java.util.common.IAE; import org.apache.druid.math.expr.ExprEval; @@ -108,7 +109,15 @@ public byte[] getCacheKey() final NullableTypeStrategy typeStrategy = elementMatchValueEval.type().getNullableStrategy(); final int size = typeStrategy.estimateSizeBytes(elementMatchValueEval.value()); final ByteBuffer valueBuffer = ByteBuffer.allocate(size); - typeStrategy.write(valueBuffer, elementMatchValueEval.value(), size); + if (typeStrategy.write(valueBuffer, elementMatchValueEval.value(), size) < 0) { + // Defensive check, since the size had already been estimated from the same type strategy + throw DruidException.defensive( + "Unable to write the for the column [%s] with value [%s] and size [%d]", + elementMatchValueEval.value(), + column, + size + ); + } return new CacheKeyBuilder(DimFilterUtils.ARRAY_CONTAINS_CACHE_ID) .appendByte(DimFilterUtils.STRING_SEPARATOR) .appendString(column) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java index 4ca5b096ec0c..cdcf9e3daf40 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java @@ -43,6 +43,7 @@ import org.apache.druid.java.util.common.guava.Sequences; import org.apache.druid.query.BaseQuery; import org.apache.druid.query.DataSource; +import org.apache.druid.query.DimensionComparisonUtils; import org.apache.druid.query.Queries; import org.apache.druid.query.Query; import org.apache.druid.query.QueryDataSource; @@ -597,11 +598,7 @@ private Ordering getRowOrderingForPushDown( needsReverseList.add(false); final ColumnType type = dimensions.get(i).getOutputType(); dimensionTypes.add(type); - if (type.isNumeric()) { - comparators.add(StringComparators.NUMERIC); - } else { - comparators.add(StringComparators.LEXICOGRAPHIC); - } + comparators.add(StringComparators.NATURAL); } } @@ -762,6 +759,12 @@ private DateTime computeUniversalTimestamp() } } + /** + * Compares the dimensions for limit pushdown. + * + * Due to legacy reason, the provided StringComparator for the arrays isn't applied and must be changed once we + * get rid of the StringComparators for array types + */ private static int compareDimsForLimitPushDown( final IntList fields, final List needsReverseList, @@ -781,7 +784,7 @@ private static int compareDimsForLimitPushDown( final Object rhsObj = rhs.get(fieldNumber); if (dimensionType.isNumeric()) { - if (comparator.equals(StringComparators.NUMERIC)) { + if (DimensionComparisonUtils.isNaturalComparator(dimensionType.getType(), comparator)) { dimCompare = DimensionHandlerUtils.compareObjectsAsType(lhsObj, rhsObj, dimensionType); } else { dimCompare = comparator.compare(String.valueOf(lhsObj), String.valueOf(rhsObj)); @@ -795,6 +798,8 @@ private static int compareDimsForLimitPushDown( final Object[] lhsArr = DimensionHandlerUtils.convertToArray(lhsObj, dimensionType.getElementType()); final Object[] rhsArr = DimensionHandlerUtils.convertToArray(rhsObj, dimensionType.getElementType()); dimCompare = dimensionType.getNullableStrategy().compare(lhsArr, rhsArr); + } else if (DimensionComparisonUtils.isNaturalComparator(dimensionType.getType(), comparator)) { + dimCompare = DimensionHandlerUtils.compareObjectsAsType(lhsObj, rhsObj, dimensionType); } else { dimCompare = comparator.compare((String) lhsObj, (String) rhsObj); } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/DictionaryBuilding.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/DictionaryBuildingUtils.java similarity index 50% rename from processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/DictionaryBuilding.java rename to processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/DictionaryBuildingUtils.java index a8f16d2b2ec5..243aedf22c24 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/DictionaryBuilding.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/DictionaryBuildingUtils.java @@ -22,20 +22,15 @@ import it.unimi.dsi.fastutil.Hash; import it.unimi.dsi.fastutil.objects.Object2IntMap; import it.unimi.dsi.fastutil.objects.Object2IntOpenCustomHashMap; -import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; -import org.apache.druid.error.DruidException; import org.apache.druid.segment.DimensionDictionary; -import org.apache.druid.segment.column.TypeSignature; -import org.apache.druid.segment.column.ValueType; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; /** * Utilities for parts of the groupBy engine that need to build dictionaries. */ -public class DictionaryBuilding +public class DictionaryBuildingUtils { // Entry in dictionary, node pointer in reverseDictionary, hash + k/v/next pointer in reverseDictionary nodes private static final int ROUGH_OVERHEAD_PER_DICTIONARY_ENTRY = Long.BYTES * 5 + Integer.BYTES; @@ -49,53 +44,23 @@ public static List createDictionary() } /** - * Creates a reverse dictionary (value -> dictionary ID). If a value is not present in the reverse dictionary, - * {@link Object2IntMap#getInt} will return {@link DimensionDictionary#ABSENT_VALUE_ID}. + * Create reverse dictionary (value -> dictionary ID) that relies on the given {@link Hash.Strategy} for + * hashing and comparing equality. It explicitly requires a hashing strategy, so that callers are aware of the + * correct implementation of the .hashCode and the .equals method used to store and address the objects * - * WARNING: This assumes that the .hashCode and the .equals of the method are implemented correctly. This does not - * apply for primitive array types, which donot consider new Object[]{1L, 2L} = new Object[]{1, 2}. For such objects, - * (especially arrays), a custom hash strategy must be passed. + * If a value is not present in the reverse dictionary, {@link Object2IntMap#getInt} will + * return {@link DimensionDictionary#ABSENT_VALUE_ID}. + * + * The object's {@link org.apache.druid.segment.column.NullableTypeStrategy} is often enough to create a reverse + * dictionary for those objects */ - public static Object2IntMap createReverseDictionary() - { - final Object2IntOpenHashMap m = new Object2IntOpenHashMap<>(); - m.defaultReturnValue(DimensionDictionary.ABSENT_VALUE_ID); - return m; - } - - private static Object2IntMap createReverseDictionary(final Hash.Strategy hashStrategy) + public static Object2IntMap createReverseDictionary(final Hash.Strategy hashStrategy) { final Object2IntOpenCustomHashMap m = new Object2IntOpenCustomHashMap<>(hashStrategy); m.defaultReturnValue(DimensionDictionary.ABSENT_VALUE_ID); return m; } - /** - * Creates a reverse dictionary for arrays of primitive types. - */ - public static Object2IntMap createReverseDictionaryForPrimitiveArray(TypeSignature arrayType) - { - if (!arrayType.isPrimitiveArray()) { - throw DruidException.defensive("Dictionary building function expected an array of a primitive type"); - } - return createReverseDictionary(new Hash.Strategy() - { - @Override - public int hashCode(Object[] o) - { - // We don't do a deep comparison, because the array type is primitive, therefore we don't need to incur the extra - // overhead of checking the nestings - return Arrays.hashCode(o); - } - - @Override - public boolean equals(Object[] a, Object[] b) - { - return arrayType.getNullableStrategy().compare(a, b) == 0; - } - }); - } - /** * Estimated footprint of a new entry. */ diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByColumnSelectorStrategyFactory.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByColumnSelectorStrategyFactory.java new file mode 100644 index 000000000000..53ae47d41196 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByColumnSelectorStrategyFactory.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae; + +import org.apache.druid.error.DruidException; +import org.apache.druid.error.InvalidInput; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.dimension.ColumnSelectorStrategyFactory; +import org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingGroupByColumnSelectorStrategy; +import org.apache.druid.query.groupby.epinephelinae.column.FixedWidthGroupByColumnSelectorStrategy; +import org.apache.druid.query.groupby.epinephelinae.column.GroupByColumnSelectorStrategy; +import org.apache.druid.query.groupby.epinephelinae.column.KeyMappingMultiValueGroupByColumnSelectorStrategy; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnType; + +/** + * Creates {@link org.apache.druid.query.dimension.ColumnSelectorStrategy}s for grouping dimensions + * If the type is STRING, then it delegates the group by handling to {@link KeyMappingMultiValueGroupByColumnSelectorStrategy} + * which is specialized for {@link DimensionSelector}s and multi-value dimensions. + * If the type is numeric, then it delegates the handling to the {@link FixedWidthGroupByColumnSelectorStrategy} + * Else, it delegates the handling to {@link DictionaryBuildingGroupByColumnSelectorStrategy} which is a generic strategy + * and builds dictionaries on the fly. + */ +public class GroupByColumnSelectorStrategyFactory implements ColumnSelectorStrategyFactory +{ + @Override + public GroupByColumnSelectorStrategy makeColumnSelectorStrategy( + ColumnCapabilities capabilities, + ColumnValueSelector selector, + String dimension + ) + { + if (capabilities == null || capabilities.getType() == null) { + throw DruidException.defensive("Unable to deduce type for the grouping dimension"); + } + try { + if (!capabilities.toColumnType().getNullableStrategy().groupable()) { + // InvalidInput because the SQL planner would have already flagged these dimensions, therefore this will only happen + // if native queries have been submitted. + throw InvalidInput.exception( + "Unable to group on the column[%s] with type[%s]", + dimension, + capabilities.toColumnType() + ); + } + } + catch (Exception e) { + throw InvalidInput.exception(e, "Unable to group on the column[%s]", dimension); + } + + switch (capabilities.getType()) { + case STRING: + return KeyMappingMultiValueGroupByColumnSelectorStrategy.create(capabilities, (DimensionSelector) selector); + case LONG: + return new FixedWidthGroupByColumnSelectorStrategy<>( + Byte.BYTES + Long.BYTES, + ColumnType.LONG, + ColumnValueSelector::getLong, + ColumnValueSelector::isNull + ); + case FLOAT: + return new FixedWidthGroupByColumnSelectorStrategy<>( + Byte.BYTES + Float.BYTES, + ColumnType.FLOAT, + ColumnValueSelector::getFloat, + ColumnValueSelector::isNull + ); + case DOUBLE: + return new FixedWidthGroupByColumnSelectorStrategy<>( + Byte.BYTES + Double.BYTES, + ColumnType.DOUBLE, + ColumnValueSelector::getDouble, + ColumnValueSelector::isNull + ); + case ARRAY: + switch (capabilities.getElementType().getType()) { + case LONG: + case STRING: + case DOUBLE: + return DictionaryBuildingGroupByColumnSelectorStrategy.forType(capabilities.toColumnType()); + case FLOAT: + // Array not supported in expressions, ingestion + default: + throw new IAE("Cannot create query type helper from invalid type [%s]", capabilities.asTypeString()); + + } + case COMPLEX: + return DictionaryBuildingGroupByColumnSelectorStrategy.forType(capabilities.toColumnType()); + default: + throw new IAE("Cannot create query type helper from invalid type [%s]", capabilities.asTypeString()); + } + } + + @Override + public boolean supportsComplexTypes() + { + return true; + } +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java index 9ffc006799ce..05a5084e7365 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java @@ -33,7 +33,6 @@ import org.apache.druid.query.DruidProcessingConfig; import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.AggregatorFactory; -import org.apache.druid.query.dimension.ColumnSelectorStrategyFactory; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.groupby.GroupByQuery; @@ -41,23 +40,13 @@ import org.apache.druid.query.groupby.GroupByQueryMetrics; import org.apache.druid.query.groupby.GroupingEngine; import org.apache.druid.query.groupby.ResultRow; -import org.apache.druid.query.groupby.epinephelinae.column.ArrayDoubleGroupByColumnSelectorStrategy; -import org.apache.druid.query.groupby.epinephelinae.column.ArrayLongGroupByColumnSelectorStrategy; -import org.apache.druid.query.groupby.epinephelinae.column.ArrayStringGroupByColumnSelectorStrategy; -import org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingStringGroupByColumnSelectorStrategy; -import org.apache.druid.query.groupby.epinephelinae.column.DoubleGroupByColumnSelectorStrategy; -import org.apache.druid.query.groupby.epinephelinae.column.FloatGroupByColumnSelectorStrategy; import org.apache.druid.query.groupby.epinephelinae.column.GroupByColumnSelectorPlus; import org.apache.druid.query.groupby.epinephelinae.column.GroupByColumnSelectorStrategy; -import org.apache.druid.query.groupby.epinephelinae.column.LongGroupByColumnSelectorStrategy; -import org.apache.druid.query.groupby.epinephelinae.column.NullableNumericGroupByColumnSelectorStrategy; -import org.apache.druid.query.groupby.epinephelinae.column.StringGroupByColumnSelectorStrategy; import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; import org.apache.druid.query.groupby.orderby.OrderByColumnSpec; import org.apache.druid.query.ordering.StringComparator; import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; -import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.Cursor; import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.DimensionSelector; @@ -92,7 +81,7 @@ */ public class GroupByQueryEngine { - private static final GroupByStrategyFactory STRATEGY_FACTORY = new GroupByStrategyFactory(); + private static final GroupByColumnSelectorStrategyFactory STRATEGY_FACTORY = new GroupByColumnSelectorStrategyFactory(); private GroupByQueryEngine() { @@ -142,7 +131,7 @@ public GroupByEngineIterator make() curPos, query.getResultRowDimensionStart() + i ); - curPos += dims[i].getColumnSelectorStrategy().getGroupingKeySize(); + curPos += dims[i].getColumnSelectorStrategy().getGroupingKeySizeBytes(); } final int cardinalityForArrayAggregation = GroupingEngine.getCardinalityForArrayAggregation( @@ -239,57 +228,6 @@ private static boolean hasNoImplicitUnnestDimensions( }); } - private static class GroupByStrategyFactory implements ColumnSelectorStrategyFactory - { - @Override - public GroupByColumnSelectorStrategy makeColumnSelectorStrategy( - ColumnCapabilities capabilities, - ColumnValueSelector selector - ) - { - switch (capabilities.getType()) { - case STRING: - DimensionSelector dimSelector = (DimensionSelector) selector; - if (dimSelector.getValueCardinality() >= 0) { - return new StringGroupByColumnSelectorStrategy(dimSelector::lookupName, capabilities); - } else { - return new DictionaryBuildingStringGroupByColumnSelectorStrategy(); - } - case LONG: - return makeNullableNumericStrategy(new LongGroupByColumnSelectorStrategy()); - case FLOAT: - return makeNullableNumericStrategy(new FloatGroupByColumnSelectorStrategy()); - case DOUBLE: - return makeNullableNumericStrategy(new DoubleGroupByColumnSelectorStrategy()); - case ARRAY: - switch (capabilities.getElementType().getType()) { - case LONG: - return new ArrayLongGroupByColumnSelectorStrategy(); - case STRING: - return new ArrayStringGroupByColumnSelectorStrategy(); - case DOUBLE: - return new ArrayDoubleGroupByColumnSelectorStrategy(); - case FLOAT: - // Array not supported in expressions, ingestion - default: - throw new IAE("Cannot create query type helper from invalid type [%s]", capabilities.asTypeString()); - - } - default: - throw new IAE("Cannot create query type helper from invalid type [%s]", capabilities.asTypeString()); - } - } - - private GroupByColumnSelectorStrategy makeNullableNumericStrategy(GroupByColumnSelectorStrategy delegate) - { - if (NullHandling.sqlCompatible()) { - return new NullableNumericGroupByColumnSelectorStrategy(delegate); - } else { - return delegate; - } - } - } - private abstract static class GroupByEngineIterator implements Iterator, Closeable { protected final GroupByQuery query; @@ -843,7 +781,7 @@ private GroupByEngineKeySerde(final GroupByColumnSelectorPlus[] dims, GroupByQue this.dims = dims; int keySize = 0; for (GroupByColumnSelectorPlus selectorPlus : dims) { - keySize += selectorPlus.getColumnSelectorStrategy().getGroupingKeySize(); + keySize += selectorPlus.getColumnSelectorStrategy().getGroupingKeySizeBytes(); } this.keySize = keySize; diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java index e98af6af84fb..491c28d41427 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java @@ -89,8 +89,10 @@ import java.util.Arrays; import java.util.BitSet; import java.util.Comparator; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.function.Function; import java.util.function.Predicate; @@ -743,7 +745,8 @@ private static class InputRawSupplierColumnSelectorStrategyFactory @Override public InputRawSupplierColumnSelectorStrategy makeColumnSelectorStrategy( ColumnCapabilities capabilities, - ColumnValueSelector selector + ColumnValueSelector selector, + String dimension ) { switch (capabilities.getType()) { @@ -759,28 +762,20 @@ public InputRawSupplierColumnSelectorStrategy makeColumnSelectorStrategy( return (InputRawSupplierColumnSelectorStrategy) columnSelector -> () -> columnSelector.isNull() ? null : columnSelector.getDouble(); case ARRAY: - switch (capabilities.getElementType().getType()) { - case STRING: - return (InputRawSupplierColumnSelectorStrategy) - columnSelector -> - () -> DimensionHandlerUtils.coerceToStringArray(columnSelector.getObject()); - case FLOAT: - case LONG: - case DOUBLE: - return (InputRawSupplierColumnSelectorStrategy) - columnSelector -> - () -> DimensionHandlerUtils.convertToArray(columnSelector.getObject(), - capabilities.getElementType()); - default: - throw new IAE( - "Cannot create query type helper from invalid type [%s]", - capabilities.asTypeString() - ); - } + case COMPLEX: + return (InputRawSupplierColumnSelectorStrategy) + columnSelector -> + () -> DimensionHandlerUtils.convertObjectToType(columnSelector.getObject(), capabilities.toColumnType()); default: throw new IAE("Cannot create query type helper from invalid type [%s]", capabilities.asTypeString()); } } + + @Override + public boolean supportsComplexTypes() + { + return true; + } } @SuppressWarnings("unchecked") @@ -1157,7 +1152,7 @@ private static int compareDimsInRowsWithAggs( static long estimateStringKeySize(@Nullable String key) { - return DictionaryBuilding.estimateEntryFootprint((key == null ? 0 : key.length()) * Character.BYTES); + return DictionaryBuildingUtils.estimateEntryFootprint((key == null ? 0 : key.length()) * Character.BYTES); } private static class RowBasedKeySerde implements Grouper.KeySerde @@ -1172,7 +1167,6 @@ private static class RowBasedKeySerde implements Grouper.KeySerde valueTypes; - private final boolean enableRuntimeDictionaryGeneration; private final List dictionary; @@ -1190,6 +1184,8 @@ private static class RowBasedKeySerde implements Grouper.KeySerde doubleArrayDictionary; private final Object2IntMap reverseDoubleArrayDictionary; + private final Map> genericDictionaries = new HashMap<>(); + private final Map> genericReverseDictionaries = new HashMap<>(); // Size limiting for the dictionary, in (roughly estimated) bytes. private final long maxDictionarySize; @@ -1219,20 +1215,21 @@ private static class RowBasedKeySerde implements Grouper.KeySerde elementType; + final String columnTypeName; + + final List dictionary; + final Object2IntMap reverseDictionary; + + public GenericRowBasedKeySerdeHelper( + int keyBufferPosition, + ColumnType columnType + ) + { + super(keyBufferPosition); + validateColumnType(columnType); + this.columnTypeName = columnType.asTypeString(); + this.dictionary = genericDictionaries.computeIfAbsent( + columnTypeName, + ignored -> DictionaryBuildingUtils.createDictionary() + ); + this.reverseDictionary = genericReverseDictionaries.computeIfAbsent( + columnTypeName, + ignored -> DictionaryBuildingUtils.createReverseDictionary(columnType.getNullableStrategy()) + ); + this.bufferComparator = (lhsBuffer, rhsBuffer, lhsPosition, rhsPosition) -> + columnType.getNullableStrategy().compare( + dictionary.get(lhsBuffer.getInt(lhsPosition + keyBufferPosition)), + dictionary.get(rhsBuffer.getInt(rhsPosition + keyBufferPosition)) + ); + } + + // Asserts that we don't entertain any complex types without a typename, to prevent intermixing dictionaries of + // different types. + private void validateColumnType(TypeSignature columnType) + { + if (columnType.isArray()) { + validateColumnType(columnType.getElementType()); + } else if (columnType.is(ValueType.COMPLEX)) { + if (columnType.getComplexTypeName() == null) { + throw DruidException.defensive("complex type name expected"); + } + } + } + + @Override + public BufferComparator getBufferComparator() + { + return bufferComparator; + } + + @Override + public List getDictionary() + { + return dictionary; + } + + @Override + public Object2IntMap getReverseDictionary() + { + return reverseDictionary; + } + } + + + private class ArrayNumericRowBasedKeySerdeHelper extends DictionaryBuildingSingleValuedRowBasedKeySerdeHelper + { + private final BufferComparator bufferComparator; + private final List dictionary; + private final Object2IntMap reverseDictionary; public ArrayNumericRowBasedKeySerdeHelper( int keyBufferPosition, @@ -1543,20 +1660,22 @@ public ArrayNumericRowBasedKeySerdeHelper( ColumnType arrayType ) { - this.keyBufferPosition = keyBufferPosition; - this.elementType = arrayType.getElementType(); + super(keyBufferPosition); + final TypeSignature elementType = arrayType.getElementType(); + this.dictionary = getDictionaryForType(elementType); + this.reverseDictionary = getReverseDictionaryForType(elementType); this.bufferComparator = (lhsBuffer, rhsBuffer, lhsPosition, rhsPosition) -> { if (stringComparator == null || StringComparators.NUMERIC.equals(stringComparator) || StringComparators.NATURAL.equals(stringComparator)) { return arrayType.getNullableStrategy().compare( - getDictionaryForType(elementType).get(lhsBuffer.getInt(lhsPosition + keyBufferPosition)), - getDictionaryForType(elementType).get(rhsBuffer.getInt(rhsPosition + keyBufferPosition)) + this.dictionary.get(lhsBuffer.getInt(lhsPosition + keyBufferPosition)), + this.dictionary.get(rhsBuffer.getInt(rhsPosition + keyBufferPosition)) ); } else { return new DimensionComparisonUtils.ArrayComparatorForUnnaturalStringComparator(stringComparator).compare( - getDictionaryForType(elementType).get(lhsBuffer.getInt(lhsPosition + keyBufferPosition)), - getDictionaryForType(elementType).get(rhsBuffer.getInt(rhsPosition + keyBufferPosition)) + this.dictionary.get(lhsBuffer.getInt(lhsPosition + keyBufferPosition)), + this.dictionary.get(rhsBuffer.getInt(rhsPosition + keyBufferPosition)) ); } }; @@ -1590,43 +1709,27 @@ private Object2IntMap getReverseDictionaryForType(TypeSignature getDictionary() { - dimValues[dimValIdx] = getDictionaryForType(elementType).get(buffer.getInt(initialOffset + keyBufferPosition)); + return dictionary; } @Override - public BufferComparator getBufferComparator() + public Object2IntMap getReverseDictionary() { - return bufferComparator; + return reverseDictionary; } } - private class ArrayStringRowBasedKeySerdeHelper implements RowBasedKeySerdeHelper + private class ArrayStringRowBasedKeySerdeHelper extends DictionaryBuildingSingleValuedRowBasedKeySerdeHelper { - final int keyBufferPosition; final BufferComparator bufferComparator; ArrayStringRowBasedKeySerdeHelper( @@ -1634,9 +1737,10 @@ private class ArrayStringRowBasedKeySerdeHelper implements RowBasedKeySerdeHelpe @Nullable StringComparator stringComparator ) { - this.keyBufferPosition = keyBufferPosition; + super(keyBufferPosition); bufferComparator = (lhsBuffer, rhsBuffer, lhsPosition, rhsPosition) -> - new DimensionComparisonUtils.ArrayComparator(stringComparator == null ? StringComparators.LEXICOGRAPHIC : stringComparator) + new DimensionComparisonUtils.ArrayComparator<>( + stringComparator == null ? StringComparators.LEXICOGRAPHIC : stringComparator) .compare( stringArrayDictionary.get(lhsBuffer.getInt(lhsPosition + keyBufferPosition)), stringArrayDictionary.get(rhsBuffer.getInt(rhsPosition + keyBufferPosition)) @@ -1650,38 +1754,21 @@ public int getKeyBufferValueSize() } @Override - public boolean putToKeyBuffer(RowBasedKey key, int idx) + public BufferComparator getBufferComparator() { - Object[] stringArray = (Object[]) key.getKey()[idx]; - final int id = addToArrayDictionary(stringArray); - if (id < 0) { - return false; - } - keyBuffer.putInt(id); - return true; + return bufferComparator; } @Override - public void getFromByteBuffer(ByteBuffer buffer, int initialOffset, int dimValIdx, Object[] dimValues) + public List getDictionary() { - dimValues[dimValIdx] = stringArrayDictionary.get(buffer.getInt(initialOffset + keyBufferPosition)); + return stringArrayDictionary; } @Override - public BufferComparator getBufferComparator() + public Object2IntMap getReverseDictionary() { - return bufferComparator; - } - - private int addToArrayDictionary(final Object[] s) - { - int idx = reverseStringArrayDictionary.getInt(s); - if (idx == DimensionDictionary.ABSENT_VALUE_ID) { - idx = stringArrayDictionary.size(); - reverseStringArrayDictionary.put(s, idx); - stringArrayDictionary.add(s); - } - return idx; + return reverseStringArrayDictionary; } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayDoubleGroupByColumnSelectorStrategy.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayDoubleGroupByColumnSelectorStrategy.java deleted file mode 100644 index e1a7e940de26..000000000000 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayDoubleGroupByColumnSelectorStrategy.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.query.groupby.epinephelinae.column; - -import org.apache.druid.java.util.common.ISE; -import org.apache.druid.segment.ColumnValueSelector; -import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.ValueType; - -import java.util.Arrays; -import java.util.List; - -public class ArrayDoubleGroupByColumnSelectorStrategy extends ArrayNumericGroupByColumnSelectorStrategy -{ - public ArrayDoubleGroupByColumnSelectorStrategy() - { - super(Double.BYTES, ColumnType.DOUBLE_ARRAY); - } - - @Override - protected int computeDictionaryId(ColumnValueSelector selector) - { - Object object = selector.getObject(); - if (object == null) { - return GROUP_BY_MISSING_VALUE; - } else if (object instanceof Double) { - return addToIndexedDictionary(new Object[]{object}); - } else if (object instanceof List) { - return addToIndexedDictionary(((List) object).toArray()); - } else if (object instanceof Double[]) { - // Defensive check, since we don't usually expect to encounter Double[] objects from selectors - return addToIndexedDictionary(Arrays.stream((Double[]) object).toArray()); - } else if (object instanceof Object[]) { - return addToIndexedDictionary((Object[]) object); - } else { - throw new ISE("Found unexpected object type [%s] in %s array.", object.getClass().getName(), ValueType.DOUBLE); - } - } -} - - diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayLongGroupByColumnSelectorStrategy.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayLongGroupByColumnSelectorStrategy.java deleted file mode 100644 index 49cd91baedf9..000000000000 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayLongGroupByColumnSelectorStrategy.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.query.groupby.epinephelinae.column; - -import org.apache.druid.java.util.common.ISE; -import org.apache.druid.segment.ColumnValueSelector; -import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.ValueType; - -import java.util.Arrays; -import java.util.List; - -public class ArrayLongGroupByColumnSelectorStrategy extends ArrayNumericGroupByColumnSelectorStrategy -{ - public ArrayLongGroupByColumnSelectorStrategy() - { - super(Long.BYTES, ColumnType.LONG_ARRAY); - } - - @Override - protected int computeDictionaryId(ColumnValueSelector selector) - { - Object object = selector.getObject(); - if (object == null) { - return GROUP_BY_MISSING_VALUE; - } else if (object instanceof Long) { - return addToIndexedDictionary(new Object[]{object}); - } else if (object instanceof List) { - return addToIndexedDictionary(((List) object).toArray()); - } else if (object instanceof Long[]) { - // Defensive check, since we don't usually expect to encounter Long[] objects from selectors - return addToIndexedDictionary(Arrays.stream((Long[]) object).toArray()); - } else if (object instanceof Object[]) { - return addToIndexedDictionary((Object[]) object); - } else { - throw new ISE("Found unexpected object type [%s] in %s array.", object.getClass().getName(), ValueType.LONG); - } - } -} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayNumericGroupByColumnSelectorStrategy.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayNumericGroupByColumnSelectorStrategy.java deleted file mode 100644 index 62c479885021..000000000000 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayNumericGroupByColumnSelectorStrategy.java +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.query.groupby.epinephelinae.column; - -import com.google.common.annotations.VisibleForTesting; -import it.unimi.dsi.fastutil.objects.Object2IntMap; -import org.apache.druid.query.groupby.ResultRow; -import org.apache.druid.query.groupby.epinephelinae.DictionaryBuilding; -import org.apache.druid.query.groupby.epinephelinae.Grouper; -import org.apache.druid.query.ordering.StringComparator; -import org.apache.druid.query.ordering.StringComparators; -import org.apache.druid.segment.ColumnValueSelector; -import org.apache.druid.segment.column.ColumnType; - -import javax.annotation.Nullable; -import java.nio.ByteBuffer; -import java.util.List; - -public abstract class ArrayNumericGroupByColumnSelectorStrategy - implements GroupByColumnSelectorStrategy -{ - protected static final int GROUP_BY_MISSING_VALUE = -1; - - private final List dictionary; - private final Object2IntMap reverseDictionary; - private long estimatedFootprint = 0L; - private final int valueFootprint; - - public ArrayNumericGroupByColumnSelectorStrategy(final int valueFootprint, final ColumnType arrayType) - { - this.dictionary = DictionaryBuilding.createDictionary(); - this.reverseDictionary = DictionaryBuilding.createReverseDictionaryForPrimitiveArray(arrayType); - this.valueFootprint = valueFootprint; - } - - @Override - public int getGroupingKeySize() - { - return Integer.BYTES; - } - - @Override - public void processValueFromGroupingKey( - GroupByColumnSelectorPlus selectorPlus, - ByteBuffer key, - ResultRow resultRow, - int keyBufferPosition - ) - { - final int id = key.getInt(keyBufferPosition); - - // GROUP_BY_MISSING_VALUE is used to indicate empty rows, which are omitted from the result map. - if (id != GROUP_BY_MISSING_VALUE) { - final Object[] value = dictionary.get(id); - resultRow.set(selectorPlus.getResultRowPosition(), value); - } else { - resultRow.set(selectorPlus.getResultRowPosition(), null); - } - } - - @Override - public int initColumnValues(ColumnValueSelector selector, int columnIndex, Object[] valuess) - { - final long priorFootprint = estimatedFootprint; - valuess[columnIndex] = computeDictionaryId(selector); - return (int) (estimatedFootprint - priorFootprint); - } - - @Override - public void initGroupingKeyColumnValue( - int keyBufferPosition, - int dimensionIndex, - Object rowObj, - ByteBuffer keyBuffer, - int[] stack - ) - { - final int groupingKey = (int) rowObj; - writeToKeyBuffer(keyBufferPosition, groupingKey, keyBuffer); - if (groupingKey == GROUP_BY_MISSING_VALUE) { - stack[dimensionIndex] = 0; - } else { - stack[dimensionIndex] = 1; - } - - } - - @Override - public boolean checkRowIndexAndAddValueToGroupingKey( - int keyBufferPosition, - Object rowObj, - int rowValIdx, - ByteBuffer keyBuffer - ) - { - return false; - } - - protected abstract int computeDictionaryId(ColumnValueSelector selector); - - @Override - public int writeToKeyBuffer(int keyBufferPosition, ColumnValueSelector selector, ByteBuffer keyBuffer) - { - final long priorFootprint = estimatedFootprint; - - // computeDictionaryId updates estimatedFootprint - keyBuffer.putInt(keyBufferPosition, computeDictionaryId(selector)); - - return (int) (estimatedFootprint - priorFootprint); - } - - protected int addToIndexedDictionary(Object[] t) - { - final int dictId = reverseDictionary.getInt(t); - if (dictId < 0) { - final int size = dictionary.size(); - dictionary.add(t); - reverseDictionary.put(t, size); - - // Footprint estimate: one pointer, one value per list entry. - estimatedFootprint += DictionaryBuilding.estimateEntryFootprint(t.length * (Long.BYTES + valueFootprint)); - return size; - } - return dictId; - } - - @Override - public Grouper.BufferComparator bufferComparator(int keyBufferPosition, @Nullable StringComparator stringComparator) - { - StringComparator comparator = stringComparator == null ? StringComparators.NUMERIC : stringComparator; - return (lhsBuffer, rhsBuffer, lhsPosition, rhsPosition) -> { - Object[] lhs = dictionary.get(lhsBuffer.getInt(lhsPosition + keyBufferPosition)); - Object[] rhs = dictionary.get(rhsBuffer.getInt(rhsPosition + keyBufferPosition)); - - int minLength = Math.min(lhs.length, rhs.length); - //noinspection ArrayEquality - if (lhs == rhs) { - return 0; - } else { - for (int i = 0; i < minLength; i++) { - final Object left = lhs[i]; - final Object right = rhs[i]; - final int cmp; - if (left == null && right == null) { - cmp = 0; - } else if (left == null) { - cmp = -1; - } else { - cmp = comparator.compare(String.valueOf(left), String.valueOf(right)); - } - if (cmp == 0) { - continue; - } - return cmp; - } - if (lhs.length == rhs.length) { - return 0; - } else if (lhs.length < rhs.length) { - return -1; - } - return 1; - } - }; - } - - @Override - public void reset() - { - dictionary.clear(); - reverseDictionary.clear(); - estimatedFootprint = 0; - } - - @VisibleForTesting - void writeToKeyBuffer(int keyBufferPosition, int groupingKey, ByteBuffer keyBuffer) - { - keyBuffer.putInt(keyBufferPosition, groupingKey); - } -} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayStringGroupByColumnSelectorStrategy.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayStringGroupByColumnSelectorStrategy.java deleted file mode 100644 index 375496671fa2..000000000000 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayStringGroupByColumnSelectorStrategy.java +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.query.groupby.epinephelinae.column; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.BiMap; -import com.google.common.collect.HashBiMap; -import org.apache.druid.java.util.common.ISE; -import org.apache.druid.query.groupby.ResultRow; -import org.apache.druid.query.groupby.epinephelinae.DictionaryBuilding; -import org.apache.druid.query.groupby.epinephelinae.Grouper; -import org.apache.druid.query.ordering.StringComparator; -import org.apache.druid.query.ordering.StringComparators; -import org.apache.druid.segment.ColumnValueSelector; -import org.apache.druid.segment.column.ValueType; -import org.apache.druid.segment.data.ComparableIntArray; - -import javax.annotation.Nullable; -import java.nio.ByteBuffer; -import java.util.List; - -public class ArrayStringGroupByColumnSelectorStrategy implements GroupByColumnSelectorStrategy -{ - private static final int GROUP_BY_MISSING_VALUE = -1; - - - // contains string <-> id for each element of the multi value grouping column - // for eg : [a,b,c] is the col value. dictionaryToInt will contain { a <-> 1, b <-> 2, c <-> 3} - private final BiMap dictionaryToInt; - - // stores each row as an integer array where the int represents the value in dictionaryToInt - // for eg : [a,b,c] would be converted to [1,2,3] and assigned a integer value 1. - // [1,2,3] <-> 1 - private final BiMap intListToInt; - - private long estimatedFootprint = 0L; - - @Override - public int getGroupingKeySize() - { - return Integer.BYTES; - } - - public ArrayStringGroupByColumnSelectorStrategy() - { - dictionaryToInt = HashBiMap.create(); - intListToInt = HashBiMap.create(); - } - - @VisibleForTesting - ArrayStringGroupByColumnSelectorStrategy( - BiMap dictionaryToInt, - BiMap intArrayToInt - ) - { - this.dictionaryToInt = dictionaryToInt; - this.intListToInt = intArrayToInt; - } - - @Override - public void processValueFromGroupingKey( - GroupByColumnSelectorPlus selectorPlus, - ByteBuffer key, - ResultRow resultRow, - int keyBufferPosition - ) - { - final int id = key.getInt(keyBufferPosition); - - // GROUP_BY_MISSING_VALUE is used to indicate empty rows - if (id != GROUP_BY_MISSING_VALUE) { - final int[] intRepresentation = intListToInt.inverse() - .get(id) - .getDelegate(); - final Object[] stringRepresentaion = new Object[intRepresentation.length]; - for (int i = 0; i < intRepresentation.length; i++) { - stringRepresentaion[i] = dictionaryToInt.inverse().get(intRepresentation[i]); - } - resultRow.set(selectorPlus.getResultRowPosition(), stringRepresentaion); - } else { - resultRow.set(selectorPlus.getResultRowPosition(), null); - } - - } - - @Override - public int initColumnValues( - ColumnValueSelector selector, - int columnIndex, - Object[] valuess - ) - { - final long priorFootprint = estimatedFootprint; - final int groupingKey = computeDictionaryId(selector); - valuess[columnIndex] = groupingKey; - return (int) (estimatedFootprint - priorFootprint); - } - - @Override - public void initGroupingKeyColumnValue( - int keyBufferPosition, - int dimensionIndex, - Object rowObj, - ByteBuffer keyBuffer, - int[] stack - ) - { - final int groupingKey = (int) rowObj; - writeToKeyBuffer(keyBufferPosition, groupingKey, keyBuffer); - if (groupingKey == GROUP_BY_MISSING_VALUE) { - stack[dimensionIndex] = 0; - } else { - stack[dimensionIndex] = 1; - } - } - - @Override - public boolean checkRowIndexAndAddValueToGroupingKey( - int keyBufferPosition, - Object rowObj, - int rowValIdx, - ByteBuffer keyBuffer - ) - { - return false; - } - - /** - * Compute dictionary ID for the given selector. Updates {@link #estimatedFootprint} as necessary. - */ - @VisibleForTesting - int computeDictionaryId(ColumnValueSelector selector) - { - final int[] intRepresentation; - Object object = selector.getObject(); - if (object == null) { - return GROUP_BY_MISSING_VALUE; - } else if (object instanceof String) { - intRepresentation = new int[1]; - intRepresentation[0] = addToIndexedDictionary((String) object); - } else if (object instanceof List) { - final int size = ((List) object).size(); - intRepresentation = new int[size]; - for (int i = 0; i < size; i++) { - intRepresentation[i] = addToIndexedDictionary((String) ((List) object).get(i)); - } - } else if (object instanceof String[]) { - final int size = ((String[]) object).length; - intRepresentation = new int[size]; - for (int i = 0; i < size; i++) { - intRepresentation[i] = addToIndexedDictionary(((String[]) object)[i]); - } - } else if (object instanceof Object[]) { - final int size = ((Object[]) object).length; - intRepresentation = new int[size]; - for (int i = 0; i < size; i++) { - intRepresentation[i] = addToIndexedDictionary((String) ((Object[]) object)[i]); - } - } else { - throw new ISE("Found unexpected object type [%s] in %s array.", object.getClass().getName(), ValueType.STRING); - } - - final ComparableIntArray comparableIntArray = ComparableIntArray.of(intRepresentation); - final int dictId = intListToInt.getOrDefault(comparableIntArray, GROUP_BY_MISSING_VALUE); - if (dictId == GROUP_BY_MISSING_VALUE) { - final int nextId = intListToInt.keySet().size(); - intListToInt.put(comparableIntArray, nextId); - - // We're not using the dictionary and reverseDictionary from DictionaryBuilding, but the BiMap is close enough - // that we expect this footprint calculation to still be useful. (It doesn't have to be exact.) - estimatedFootprint += - DictionaryBuilding.estimateEntryFootprint(comparableIntArray.getDelegate().length * Integer.BYTES); - - return nextId; - } else { - return dictId; - } - } - - private int addToIndexedDictionary(String value) - { - final Integer dictId = dictionaryToInt.get(value); - if (dictId == null) { - final int nextId = dictionaryToInt.size(); - dictionaryToInt.put(value, nextId); - - // We're not using the dictionary and reverseDictionary from DictionaryBuilding, but the BiMap is close enough - // that we expect this footprint calculation to still be useful. (It doesn't have to be exact.) - estimatedFootprint += - DictionaryBuilding.estimateEntryFootprint((value == null ? 0 : value.length()) * Character.BYTES); - - return nextId; - } else { - return dictId; - } - } - - @Override - public int writeToKeyBuffer(int keyBufferPosition, ColumnValueSelector selector, ByteBuffer keyBuffer) - { - final long priorFootprint = estimatedFootprint; - - // computeDictionaryId updates estimatedFootprint - keyBuffer.putInt(keyBufferPosition, computeDictionaryId(selector)); - - return (int) (estimatedFootprint - priorFootprint); - } - - @Override - public Grouper.BufferComparator bufferComparator(int keyBufferPosition, @Nullable StringComparator stringComparator) - { - final StringComparator comparator = stringComparator == null ? StringComparators.LEXICOGRAPHIC : stringComparator; - return (lhsBuffer, rhsBuffer, lhsPosition, rhsPosition) -> { - int[] lhs = intListToInt.inverse().get(lhsBuffer.getInt(lhsPosition + keyBufferPosition)).getDelegate(); - int[] rhs = intListToInt.inverse().get(rhsBuffer.getInt(rhsPosition + keyBufferPosition)).getDelegate(); - - int minLength = Math.min(lhs.length, rhs.length); - //noinspection ArrayEquality - if (lhs == rhs) { - return 0; - } else { - for (int i = 0; i < minLength; i++) { - final int cmp = comparator.compare( - dictionaryToInt.inverse().get(lhs[i]), - dictionaryToInt.inverse().get(rhs[i]) - ); - if (cmp == 0) { - continue; - } - return cmp; - } - if (lhs.length == rhs.length) { - return 0; - } else if (lhs.length < rhs.length) { - return -1; - } - return 1; - } - }; - } - - @Override - public void reset() - { - dictionaryToInt.clear(); - intListToInt.clear(); - estimatedFootprint = 0; - } - - @VisibleForTesting - void writeToKeyBuffer(int keyBufferPosition, int groupingKey, ByteBuffer keyBuffer) - { - keyBuffer.putInt(keyBufferPosition, groupingKey); - } -} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DictionaryBuildingGroupByColumnSelectorStrategy.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DictionaryBuildingGroupByColumnSelectorStrategy.java new file mode 100644 index 000000000000..cf033eaa65d6 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DictionaryBuildingGroupByColumnSelectorStrategy.java @@ -0,0 +1,264 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.column; + +import com.google.common.collect.BiMap; +import com.google.common.collect.HashBiMap; +import it.unimi.dsi.fastutil.objects.Object2IntMap; +import org.apache.druid.error.DruidException; +import org.apache.druid.query.groupby.epinephelinae.DictionaryBuildingUtils; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.NullableTypeStrategy; + +import javax.annotation.concurrent.NotThreadSafe; +import java.util.ArrayList; +import java.util.List; + +/** + * Strategy for grouping dimensions which can have variable-width objects, and aren't backed by prebuilt dictionaries. It + * encapsulates the dictionary building logic, along with providing the implementations for dimension to dictionary id + * encoding-decoding. + *

+ * This strategy can handle any dimension that can be addressed on a reverse-dictionary. Reverse dictionary uses + * a sorted map, rather than a hashmap. + *

+ * This is the most expensive of all the strategies, and hence must be used only when other strategies aren't valid. + */ +@NotThreadSafe +public class DictionaryBuildingGroupByColumnSelectorStrategy + extends KeyMappingGroupByColumnSelectorStrategy +{ + + private DictionaryBuildingGroupByColumnSelectorStrategy( + DimensionIdCodec dimensionIdCodec, + ColumnType columnType, + NullableTypeStrategy nullableTypeStrategy, + DimensionType defaultValue + ) + { + super(dimensionIdCodec, columnType, nullableTypeStrategy, defaultValue); + } + + /** + * Creates an implementation of the strategy for the given type + */ + public static GroupByColumnSelectorStrategy forType(final ColumnType columnType) + { + if (columnType.equals(ColumnType.STRING)) { + // String types are handled specially because they can have multi-value dimensions + throw DruidException.defensive("Should use special variant which handles multi-value dimensions"); + } else if ( + // Defensive check, primitives should be using a faster fixed-width strategy + columnType.equals(ColumnType.DOUBLE) + || columnType.equals(ColumnType.FLOAT) + || columnType.equals(ColumnType.LONG)) { + throw DruidException.defensive("Could used a fixed width strategy"); + } + + if (ColumnType.STRING_ARRAY.equals(columnType)) { + forStringArrays(); + } + + // Catch-all for all other types, that can only have single-valued dimensions + return forArrayAndComplexTypes(columnType); + } + + /** + * Implemenatation of dictionary building strategy for types other than strings (since they can be multi-valued and need + * to be handled separately) and numeric primitives (since they can be handled by fixed-width strategy). + * This also means that we handle array and complex types here, which simplifies the generics a lot, as everything can be + * treated as Object in this class. + *

+ * Also, there isn't any concept of multi-values here, therefore Dimension == DimensionHolderType == Object. We still + * homogenize rogue selectors which can return non-standard implementation of arrays (like Long[] for long arrays instead of + * Object[]) to what the callers would expect (i.e. Object[] in this case). + */ + private static GroupByColumnSelectorStrategy forArrayAndComplexTypes(final ColumnType columnType) + { + return new DictionaryBuildingGroupByColumnSelectorStrategy<>( + new UniValueDimensionIdCodec(columnType.getNullableStrategy()), + columnType, + columnType.getNullableStrategy(), + null + ); + } + + private static GroupByColumnSelectorStrategy forStringArrays() + { + return new DictionaryBuildingGroupByColumnSelectorStrategy<>( + new StringArrayDimensionIdCodec(), + ColumnType.STRING_ARRAY, + ColumnType.STRING_ARRAY.getNullableStrategy(), + null + ); + } + + private static class UniValueDimensionIdCodec implements DimensionIdCodec + { + /** + * Dictionary for mapping the dimension value to an index. i-th position in the dictionary holds the value represented + * by the dictionaryId "i". + * Therefore, if a value has a dictionary id "i", dictionary.get(i) = value + * If a -> 0, b -> 1, c -> 2 (value -> dictionaryId), then the dictionary would be laid out like: [a, b, c] + */ + private final List dictionary; + + /** + * Reverse dictionary for faster lookup into the dictionary, and reusing pre-existing dictionary ids. + *

+ * An entry of form (value, i) in the reverse dictionary represents that "value" is present at the i-th location in the + * {@link #dictionary}. + * Absence of mapping of a "value" (denoted by returning {@link GroupByColumnSelectorStrategy#GROUP_BY_MISSING_VALUE}) + * represents that the value is absent in the dictionary + * If a -> 0, b -> 1, c -> 2 (value -> dictionaryId), then the reverse dictionary would have the entries (a, 0), (b, 1), + * (c, 2) + */ + private final Object2IntMap reverseDictionary; + + @SuppressWarnings("rawtypes") + private final NullableTypeStrategy nullableTypeStrategy; + + public UniValueDimensionIdCodec(final NullableTypeStrategy nullableTypeStrategy) + { + this.dictionary = DictionaryBuildingUtils.createDictionary(); + this.reverseDictionary = DictionaryBuildingUtils.createReverseDictionary(nullableTypeStrategy); + this.nullableTypeStrategy = nullableTypeStrategy; + } + + @Override + public MemoryFootprint lookupId(Object dimension) + { + int dictId = reverseDictionary.getInt(dimension); + int footprintIncrease = 0; + // Even if called again, then this is no-op + if (dictId < 0) { + final int size = dictionary.size(); + dictionary.add(dimension); + reverseDictionary.put(dimension, size); + dictId = size; + // MultiValueHOlder is always expected to handle the type, once the coercion is complete + //noinspection unchecked + footprintIncrease = DictionaryBuildingUtils.estimateEntryFootprint( + nullableTypeStrategy.estimateSizeBytes(dimension) + ); + } + return new MemoryFootprint<>(dictId, footprintIncrease); + } + + @Override + public Object idToKey(int id) + { + if (id >= dictionary.size()) { + throw DruidException.defensive("Unknown dictionary id [%d]", id); + } + // No need to handle GROUP_BY_MISSING_VALUE, by contract + return dictionary.get(id); + } + + @Override + public boolean canCompareIds() + { + // Dictionaries are built on the fly, and ids are assigned in the order in which the value is added to the + // dictionary. + return false; + } + + @Override + public void reset() + { + dictionary.clear(); + reverseDictionary.clear(); + } + } + + /** + * {@link DimensionIdCodec} for string arrays. Dictionary building for string arrays is optimised to have a dual + * dictionary - one that maps the string values to an id, and another which maps an array of these ids, to the returned + * dictionary id. This reduces the amount of heap memory required to build the dictionaries + */ + private static class StringArrayDimensionIdCodec implements DimensionIdCodec + { + // contains string <-> id for each element of the multi value grouping column + // for eg : [a,b,c] is the col value. dictionaryToInt will contain { a <-> 1, b <-> 2, c <-> 3} + private final BiMap elementBiDictionary = HashBiMap.create(); + + // stores each row as an integer array where the int represents the value in dictionaryToInt + // for eg : [a,b,c] would be converted to [1,2,3] and assigned a integer value 1. + // [1,2,3] <-> 1 + private final BiMap, Integer> arrayBiDictionary = HashBiMap.create(); + + @Override + public MemoryFootprint lookupId(Object dimension) + { + // dimension IS non-null, by contract of this method + Object[] stringArray = (Object[]) dimension; + ArrayList dictionaryEncodedStringArray = new ArrayList<>(); + int estimatedFootprint = 0; + for (Object element : stringArray) { + String elementCasted = (String) element; + Integer elementDictId = elementBiDictionary.get(elementCasted); + if (elementDictId == null) { + elementDictId = elementBiDictionary.size(); + elementBiDictionary.put(elementCasted, elementDictId); + // We're not using the dictionary and reverseDictionary from DictionaryBuilding, but the BiMap is close enough + // that we expect this footprint calculation to still be useful. + estimatedFootprint += + DictionaryBuildingUtils.estimateEntryFootprint(elementCasted == null + ? 0 + : elementCasted.length() * Character.BYTES); + } + dictionaryEncodedStringArray.add(elementDictId); + } + + Integer arrayDictId = arrayBiDictionary.get(dictionaryEncodedStringArray); + if (arrayDictId == null) { + arrayDictId = arrayBiDictionary.size(); + arrayBiDictionary.put(dictionaryEncodedStringArray, arrayDictId); + estimatedFootprint += + DictionaryBuildingUtils.estimateEntryFootprint(dictionaryEncodedStringArray.size() * Integer.BYTES); + } + return new MemoryFootprint<>(arrayDictId, estimatedFootprint); + } + + @Override + public Object idToKey(int id) + { + ArrayList dictionaryEncodedStringArray = arrayBiDictionary.inverse().get(id); + final Object[] stringRepresentation = new Object[dictionaryEncodedStringArray.size()]; + for (int i = 0; i < dictionaryEncodedStringArray.size(); ++i) { + stringRepresentation[i] = elementBiDictionary.inverse().get(dictionaryEncodedStringArray.get(i)); + } + return stringRepresentation; + } + + @Override + public boolean canCompareIds() + { + return false; + } + + @Override + public void reset() + { + arrayBiDictionary.clear(); + elementBiDictionary.clear(); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DictionaryBuildingStringGroupByColumnSelectorStrategy.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DictionaryBuildingStringGroupByColumnSelectorStrategy.java deleted file mode 100644 index dfc5149d52a5..000000000000 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DictionaryBuildingStringGroupByColumnSelectorStrategy.java +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.query.groupby.epinephelinae.column; - -import com.google.common.base.Preconditions; -import it.unimi.dsi.fastutil.objects.Object2IntMap; -import org.apache.druid.common.config.NullHandling; -import org.apache.druid.query.groupby.ResultRow; -import org.apache.druid.query.groupby.epinephelinae.DictionaryBuilding; -import org.apache.druid.query.groupby.epinephelinae.Grouper; -import org.apache.druid.query.ordering.StringComparator; -import org.apache.druid.query.ordering.StringComparators; -import org.apache.druid.segment.ColumnValueSelector; -import org.apache.druid.segment.DimensionDictionary; -import org.apache.druid.segment.DimensionSelector; -import org.apache.druid.segment.data.ArrayBasedIndexedInts; -import org.apache.druid.segment.data.IndexedInts; - -import javax.annotation.Nullable; -import java.nio.ByteBuffer; -import java.util.List; - -/** - * A String strategy that builds an internal String<->Integer dictionary for - * DimensionSelectors that return false for nameLookupPossibleInAdvance() - */ -public class DictionaryBuildingStringGroupByColumnSelectorStrategy extends StringGroupByColumnSelectorStrategy -{ - private static final int GROUP_BY_MISSING_VALUE = -1; - - private final List dictionary = DictionaryBuilding.createDictionary(); - private final Object2IntMap reverseDictionary = DictionaryBuilding.createReverseDictionary(); - - public DictionaryBuildingStringGroupByColumnSelectorStrategy() - { - super(null, null); - } - - @Override - public void processValueFromGroupingKey( - GroupByColumnSelectorPlus selectorPlus, - ByteBuffer key, - ResultRow resultRow, - int keyBufferPosition - ) - { - final int id = key.getInt(keyBufferPosition); - - // GROUP_BY_MISSING_VALUE is used to indicate empty rows, which are omitted from the result map. - if (id != GROUP_BY_MISSING_VALUE) { - final String value = dictionary.get(id); - resultRow.set(selectorPlus.getResultRowPosition(), value); - } else { - resultRow.set(selectorPlus.getResultRowPosition(), NullHandling.defaultStringValue()); - } - } - - @Override - public int initColumnValues(ColumnValueSelector selector, int columnIndex, Object[] valuess) - { - final DimensionSelector dimSelector = (DimensionSelector) selector; - final IndexedInts row = dimSelector.getRow(); - int stateFootprintIncrease = 0; - ArrayBasedIndexedInts newRow = (ArrayBasedIndexedInts) valuess[columnIndex]; - if (newRow == null) { - newRow = new ArrayBasedIndexedInts(); - valuess[columnIndex] = newRow; - } - int rowSize = row.size(); - newRow.ensureSize(rowSize); - for (int i = 0; i < rowSize; i++) { - final String value = dimSelector.lookupName(row.get(i)); - final int dictId = reverseDictionary.getInt(value); - if (dictId < 0) { - final int nextId = dictionary.size(); - dictionary.add(value); - reverseDictionary.put(value, nextId); - newRow.setValue(i, nextId); - stateFootprintIncrease += - DictionaryBuilding.estimateEntryFootprint((value == null ? 0 : value.length()) * Character.BYTES); - } else { - newRow.setValue(i, dictId); - } - } - newRow.setSize(rowSize); - return stateFootprintIncrease; - } - - @Override - public int writeToKeyBuffer(int keyBufferPosition, ColumnValueSelector selector, ByteBuffer keyBuffer) - { - final DimensionSelector dimSelector = (DimensionSelector) selector; - final IndexedInts row = dimSelector.getRow(); - - Preconditions.checkState(row.size() < 2, "Not supported for multi-value dimensions"); - - if (row.size() == 0) { - writeToKeyBuffer(keyBufferPosition, GROUP_BY_MISSING_VALUE, keyBuffer); - return 0; - } - - final String value = dimSelector.lookupName(row.get(0)); - final int dictId = reverseDictionary.getInt(value); - if (dictId == DimensionDictionary.ABSENT_VALUE_ID) { - final int nextId = dictionary.size(); - dictionary.add(value); - reverseDictionary.put(value, nextId); - writeToKeyBuffer(keyBufferPosition, nextId, keyBuffer); - return DictionaryBuilding.estimateEntryFootprint((value == null ? 0 : value.length()) * Character.BYTES); - } else { - writeToKeyBuffer(keyBufferPosition, dictId, keyBuffer); - return 0; - } - } - - @Override - public Grouper.BufferComparator bufferComparator(int keyBufferPosition, @Nullable StringComparator stringComparator) - { - final StringComparator realComparator = stringComparator == null ? - StringComparators.LEXICOGRAPHIC : - stringComparator; - return (lhsBuffer, rhsBuffer, lhsPosition, rhsPosition) -> { - String lhsStr = dictionary.get(lhsBuffer.getInt(lhsPosition + keyBufferPosition)); - String rhsStr = dictionary.get(rhsBuffer.getInt(rhsPosition + keyBufferPosition)); - return realComparator.compare(lhsStr, rhsStr); - }; - } - - @Override - public void reset() - { - dictionary.clear(); - reverseDictionary.clear(); - } -} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DimensionIdCodec.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DimensionIdCodec.java new file mode 100644 index 000000000000..3f5a2074453b --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DimensionIdCodec.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.column; + +/** + * Dimension to integer id encoder - decoder i.e. it is an interface for converters of dimension to dictionary id and back. + * It only handles single value dimensions. Handle multi-value dimensions (i.e. strings) using the {@link KeyMappingMultiValueGroupByColumnSelectorStrategy}. + *

+ * Encoding
+ * The caller is expected to handle non-null values. Null values must be filtered by the caller, and assigned {@link GroupByColumnSelectorStrategy#GROUP_BY_MISSING_VALUE} + * 1. {@link DimensionIdCodec} extracts the dimension from the selector + * 2. The value gets encoded into a dictionaryId, using {@link DimensionIdCodec#lookupId} + * 3. The callers can use this integer dictionaryID to represent the grouping key + *

+ * Decoding
+ * Converts back the dictionaryId to the dimension value. The implementations are not expected to handle {@link GroupByColumnSelectorStrategy#GROUP_BY_MISSING_VALUE}. + * The callers should handle those values appropriately ontheir own, and filter those out before trying to convert + * the dictionary id back to value. + * + * @param Type of the dimension holder + */ +public interface DimensionIdCodec +{ + /** + * @return DictionaryId of the object at the given index and the memory increase associated with it + */ + MemoryFootprint lookupId(DimensionType dimension); + + DimensionType idToKey(int id); + + /** + * Returns if the object comparison can be optimised directly by comparing the dictionaryIds, instead of decoding the + * objects and comparing those. Therefore, it returns true iff the "dict" function defined by dict(id) = value is + * monotonically increasing. + * + * Ids backed by dictionaries built on the fly can never be compared, therefore those should always return false. + */ + boolean canCompareIds(); + + void reset(); +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DoubleGroupByColumnSelectorStrategy.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DoubleGroupByColumnSelectorStrategy.java deleted file mode 100644 index 6fcbf9ca9d57..000000000000 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DoubleGroupByColumnSelectorStrategy.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.query.groupby.epinephelinae.column; - -import org.apache.druid.query.groupby.ResultRow; -import org.apache.druid.query.groupby.epinephelinae.Grouper; -import org.apache.druid.query.groupby.epinephelinae.GrouperBufferComparatorUtils; -import org.apache.druid.query.ordering.StringComparator; -import org.apache.druid.segment.ColumnValueSelector; -import org.apache.druid.segment.DimensionHandlerUtils; - -import javax.annotation.Nullable; -import java.nio.ByteBuffer; - -public class DoubleGroupByColumnSelectorStrategy implements GroupByColumnSelectorStrategy -{ - @Override - public int getGroupingKeySize() - { - return Double.BYTES; - } - - @Override - public void processValueFromGroupingKey( - GroupByColumnSelectorPlus selectorPlus, - ByteBuffer key, - ResultRow resultRow, - int keyBufferPosition - ) - { - final double val = key.getDouble(keyBufferPosition); - resultRow.set(selectorPlus.getResultRowPosition(), val); - } - - @Override - public int initColumnValues(ColumnValueSelector selector, int columnIndex, Object[] values) - { - values[columnIndex] = selector.getDouble(); - return 0; - } - - @Override - public int writeToKeyBuffer(int keyBufferPosition, ColumnValueSelector selector, ByteBuffer keyBuffer) - { - keyBuffer.putDouble(keyBufferPosition, selector.getDouble()); - return 0; - } - - @Override - public void initGroupingKeyColumnValue( - int keyBufferPosition, - int dimensionIndex, - Object rowObj, - ByteBuffer keyBuffer, - int[] stack - ) - { - writeToKeyBuffer(keyBufferPosition, DimensionHandlerUtils.nullToZero((Double) rowObj), keyBuffer); - stack[dimensionIndex] = 1; - } - - @Override - public boolean checkRowIndexAndAddValueToGroupingKey( - int keyBufferPosition, - Object rowObj, - int rowValIdx, - ByteBuffer keyBuffer - ) - { - // rows from a double column always have a single value, multi-value is not currently supported - // this method handles row values after the first in a multivalued row, so just return false - return false; - } - - @Override - public Grouper.BufferComparator bufferComparator(int keyBufferPosition, @Nullable StringComparator stringComparator) - { - return GrouperBufferComparatorUtils.makeBufferComparatorForDouble( - keyBufferPosition, - true, - stringComparator - ); - } - - @Override - public void reset() - { - // Nothing to do. - } - - private void writeToKeyBuffer(int keyBufferPosition, double value, ByteBuffer keyBuffer) - { - keyBuffer.putDouble(keyBufferPosition, value); - } -} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/FixedWidthGroupByColumnSelectorStrategy.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/FixedWidthGroupByColumnSelectorStrategy.java new file mode 100644 index 000000000000..378689a42059 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/FixedWidthGroupByColumnSelectorStrategy.java @@ -0,0 +1,200 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.column; + +import org.apache.druid.error.DruidException; +import org.apache.druid.query.DimensionComparisonUtils; +import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.query.groupby.epinephelinae.Grouper; +import org.apache.druid.query.ordering.StringComparator; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.NullableTypeStrategy; + +import javax.annotation.Nullable; +import javax.annotation.concurrent.NotThreadSafe; +import java.nio.ByteBuffer; +import java.util.function.Function; + +/** + * Strategy for grouping dimensions which have fixed-width objects. It is only used for numeric primitive types, + * however complex types can reuse this strategy if they can hint the engine that they are always fixed width + * (for e.g. IP types). Such types donot need to be backed by a dictionary, and hence are faster to group by. + * + * @param Class of the dimension + */ +@NotThreadSafe +public class FixedWidthGroupByColumnSelectorStrategy implements GroupByColumnSelectorStrategy +{ + /** + * Size of the key when materialized as bytes + */ + final int keySizeBytes; + + /** + * Type of the dimension on which the grouping strategy is being used + */ + final ColumnType columnType; + + /** + * Nullable type strategy of the dimension + */ + final NullableTypeStrategy nullableTypeStrategy; + + final Function, T> valueGetter; + final Function, Boolean> nullityGetter; + + public FixedWidthGroupByColumnSelectorStrategy( + int keySizeBytes, + ColumnType columnType, + Function, T> valueGetter, + Function, Boolean> nullityGetter + ) + { + this.keySizeBytes = keySizeBytes; + this.columnType = columnType; + this.nullableTypeStrategy = columnType.getNullableStrategy(); + this.valueGetter = valueGetter; + this.nullityGetter = nullityGetter; + } + + @Override + public int getGroupingKeySizeBytes() + { + return keySizeBytes; + } + + @Override + public void processValueFromGroupingKey( + GroupByColumnSelectorPlus selectorPlus, + ByteBuffer key, + ResultRow resultRow, + int keyBufferPosition + ) + { + resultRow.set( + selectorPlus.getResultRowPosition(), + nullableTypeStrategy.read(key, keyBufferPosition) + ); + } + + @Override + public int initColumnValues(ColumnValueSelector selector, int columnIndex, Object[] valuess) + { + valuess[columnIndex] = getValue(selector); + return 0; + } + + @Override + public void initGroupingKeyColumnValue( + int keyBufferPosition, + int dimensionIndex, + Object rowObj, + ByteBuffer keyBuffer, + int[] stack + ) + { + int written; + if (rowObj == null) { + written = nullableTypeStrategy.write(keyBuffer, keyBufferPosition, null, keySizeBytes); + stack[dimensionIndex] = 0; + } else { + written = nullableTypeStrategy.write(keyBuffer, keyBufferPosition, (T) rowObj, keySizeBytes); + stack[dimensionIndex] = 1; + } + // Since this is a fixed width strategy, the caller should already have allocated enough space to materialize the + // key object, and the type strategy should always be able to write to the buffer + if (written < 0) { + throw DruidException.defensive("Unable to serialize the value [%s] to buffer", rowObj); + } + } + + /** + * This is used for multi-valued dimensions, for values after the first one. None of the current types supported by + * this strategy handle multi-valued dimensions, therefore this short circuits and returns false + */ + @Override + public boolean checkRowIndexAndAddValueToGroupingKey( + int keyBufferPosition, + Object rowObj, + int rowValIdx, + ByteBuffer keyBuffer + ) + { + return false; + } + + @Override + public int writeToKeyBuffer( + int keyBufferPosition, + ColumnValueSelector selector, + ByteBuffer keyBuffer + ) + { + T value = getValue(selector); + int written = nullableTypeStrategy.write(keyBuffer, keyBufferPosition, value, keySizeBytes); + if (written < 0) { + throw DruidException.defensive("Unable to serialize the value [%s] to buffer", value); + } + // This strategy doesn't use dictionary building and doesn't hold any internal state, therefore size increase is nil. + return 0; + } + + @Override + public Grouper.BufferComparator bufferComparator( + int keyBufferPosition, + @Nullable StringComparator stringComparator + ) + { + return (lhsBuffer, rhsBuffer, lhsPosition, rhsPosition) -> { + T lhs = nullableTypeStrategy.read(lhsBuffer, lhsPosition + keyBufferPosition); + T rhs = nullableTypeStrategy.read(rhsBuffer, rhsPosition + keyBufferPosition); + if (stringComparator != null + && !DimensionComparisonUtils.isNaturalComparator(columnType.getType(), stringComparator)) { + return stringComparator.compare(String.valueOf(lhs), String.valueOf(rhs)); + } + // Nulls are allowed while comparing + //noinspection ConstantConditions + return nullableTypeStrategy.compare(lhs, rhs); + }; + } + + + @Override + public void reset() + { + // Nothing to reset + } + + /** + * Returns the value of the selector. It handles nullity of the value and casts it to the proper type so that the + * upstream callers donot need to worry about handling incorrect types (for example, if a double column value selector + * returns a long) + */ + @Nullable + private T getValue(ColumnValueSelector columnValueSelector) + { + if (nullityGetter.apply(columnValueSelector)) { + return null; + } + // Convert the object to the desired type + return valueGetter.apply(columnValueSelector); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/FloatGroupByColumnSelectorStrategy.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/FloatGroupByColumnSelectorStrategy.java deleted file mode 100644 index a01c3c3bd1b8..000000000000 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/FloatGroupByColumnSelectorStrategy.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.query.groupby.epinephelinae.column; - -import org.apache.druid.query.groupby.ResultRow; -import org.apache.druid.query.groupby.epinephelinae.Grouper; -import org.apache.druid.query.groupby.epinephelinae.GrouperBufferComparatorUtils; -import org.apache.druid.query.ordering.StringComparator; -import org.apache.druid.segment.ColumnValueSelector; -import org.apache.druid.segment.DimensionHandlerUtils; - -import javax.annotation.Nullable; -import java.nio.ByteBuffer; - -public class FloatGroupByColumnSelectorStrategy implements GroupByColumnSelectorStrategy -{ - - @Override - public int getGroupingKeySize() - { - return Float.BYTES; - } - - @Override - public void processValueFromGroupingKey( - GroupByColumnSelectorPlus selectorPlus, - ByteBuffer key, - ResultRow resultRow, - int keyBufferPosition - ) - { - final float val = key.getFloat(keyBufferPosition); - resultRow.set(selectorPlus.getResultRowPosition(), val); - } - - @Override - public int initColumnValues(ColumnValueSelector selector, int columnIndex, Object[] valuess) - { - valuess[columnIndex] = selector.getFloat(); - return 0; - } - - @Override - public int writeToKeyBuffer(int keyBufferPosition, ColumnValueSelector selector, ByteBuffer keyBuffer) - { - keyBuffer.putFloat(keyBufferPosition, selector.getFloat()); - return 0; - } - - @Override - public Grouper.BufferComparator bufferComparator( - int keyBufferPosition, - @Nullable StringComparator stringComparator - ) - { - return GrouperBufferComparatorUtils.makeBufferComparatorForFloat( - keyBufferPosition, - true, - stringComparator - ); - } - - @Override - public void initGroupingKeyColumnValue( - int keyBufferPosition, - int dimensionIndex, - Object rowObj, - ByteBuffer keyBuffer, - int[] stack - ) - { - writeToKeyBuffer(keyBufferPosition, DimensionHandlerUtils.nullToZero((Float) rowObj), keyBuffer); - stack[dimensionIndex] = 1; - } - - @Override - public boolean checkRowIndexAndAddValueToGroupingKey( - int keyBufferPosition, - Object rowObj, - int rowValIdx, - ByteBuffer keyBuffer - ) - { - // rows from a float column always have a single value, multi-value is not currently supported - // this method handles row values after the first in a multivalued row, so just return false - return false; - } - - @Override - public void reset() - { - // Nothing to do. - } - - private void writeToKeyBuffer(int keyBufferPosition, float value, ByteBuffer keyBuffer) - { - keyBuffer.putFloat(keyBufferPosition, value); - } -} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/GroupByColumnSelectorStrategy.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/GroupByColumnSelectorStrategy.java index 26095b5a2b29..bb7ac01aafc1 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/GroupByColumnSelectorStrategy.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/GroupByColumnSelectorStrategy.java @@ -36,7 +36,7 @@ * Each GroupByColumnSelectorStrategy is associated with a single dimension. * * Strategies may have internal state, such as the dictionary maintained by - * {@link DictionaryBuildingStringGroupByColumnSelectorStrategy}. Callers should assume that the internal + * {@link DictionaryBuildingGroupByColumnSelectorStrategy}. Callers should assume that the internal * state footprint starts out empty (zero bytes) and is also reset to zero on each call to {@link #reset()}. Each call * to {@link #initColumnValues} or {@link #writeToKeyBuffer(int, ColumnValueSelector, ByteBuffer)} returns the * incremental increase in internal state footprint that happened as a result of that particular call. @@ -45,6 +45,9 @@ */ public interface GroupByColumnSelectorStrategy extends ColumnSelectorStrategy { + /** + * Index to indicate the absence of a key in the dictionary + */ int GROUP_BY_MISSING_VALUE = -1; /** @@ -54,7 +57,7 @@ public interface GroupByColumnSelectorStrategy extends ColumnSelectorStrategy * * @return size, in bytes, of this dimension's values in the grouping key. */ - int getGroupingKeySize(); + int getGroupingKeySizeBytes(); /** * Read a value from a grouping key and add it to the group by query result row, using the output name specified @@ -117,7 +120,9 @@ void initGroupingKeyColumnValue( /** * If rowValIdx is less than the size of rowObj (haven't handled all of the row values): * First, read the value at rowValIdx from a rowObj and write that value to the keyBuffer at keyBufferPosition. - * Then return true + * Then return true. + * This method assumes that the size increase associated with the dictionary building has occurred already when calling + * {@link #initColumnValues} * * Otherwise, return false. * diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/KeyMappingGroupByColumnSelectorStrategy.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/KeyMappingGroupByColumnSelectorStrategy.java new file mode 100644 index 000000000000..350118c95cb8 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/KeyMappingGroupByColumnSelectorStrategy.java @@ -0,0 +1,229 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.column; + +import org.apache.druid.query.DimensionComparisonUtils; +import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.query.groupby.epinephelinae.Grouper; +import org.apache.druid.query.ordering.StringComparator; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.NullableTypeStrategy; + +import javax.annotation.Nullable; +import javax.annotation.concurrent.NotThreadSafe; +import java.nio.ByteBuffer; + +/** + * Strategy for grouping single value dimensions which can have variable-width objects. Materializing such objects on the buffer + * require an additional step of mapping them to an integer index. The integer index can be materialized on the buffer within + * a fixed width, and is often backed by a dictionary representing the actual dimension object. It is used for arrays, + * and complex types. + *

+ * The visibility of the class is limited, and the callers must use the following variant of the mapping strategy: + * 2. {@link DictionaryBuildingGroupByColumnSelectorStrategy} + *

+ * {@code null} can be represented by either -1 or the position of null in the dictionary it was stored when it was + * encountered. This is fine, because most of the time, the dictionary id has no value of its own, and is converted back to + * the value it represents, before doing further operations. The only place where it would matter would be when + * {@link DimensionIdCodec#canCompareIds()} is true, and we compare directly on the dictionary ids for prebuilt + * dictionaries (we can't compare ids for the dictionaries built on the fly in the grouping strategy). However, in that case, + * it is guaranteed that the dictionaryId of null represented by the pre-built dictionary would be the lowest (most likely 0) + * and therefore nulls (-1) would be adjacent to nulls (represented by the lowest non-negative dictionary id), and would get + * grouped in the later merge stages. + *

+ * It only handles single value dimensions, i.e. every type except for strings. Strings are handled by the implementations + * of {@link KeyMappingMultiValueGroupByColumnSelectorStrategy} + *

+ * It only handles non-primitive types, because numeric primitives are handled by the {@link FixedWidthGroupByColumnSelectorStrategy} + * and the string primitives are handled by the {@link KeyMappingMultiValueGroupByColumnSelectorStrategy} + * + * @param Class of the dimension + * @see DimensionIdCodec encoding decoding logic for converting value to dictionary + */ +@NotThreadSafe +class KeyMappingGroupByColumnSelectorStrategy implements GroupByColumnSelectorStrategy +{ + /** + * Converts the dimension to equivalent dictionaryId. + */ + final DimensionIdCodec dimensionIdCodec; + + /** + * Type of the dimension on which the grouping strategy is used + */ + final ColumnType columnType; + + /** + * Nullable type strategy of the dimension + */ + final NullableTypeStrategy nullableTypeStrategy; + + /** + * Default value of the dimension + */ + final DimensionType defaultValue; + + KeyMappingGroupByColumnSelectorStrategy( + final DimensionIdCodec dimensionIdCodec, + final ColumnType columnType, + final NullableTypeStrategy nullableTypeStrategy, + final DimensionType defaultValue + ) + { + this.dimensionIdCodec = dimensionIdCodec; + this.columnType = columnType; + this.nullableTypeStrategy = nullableTypeStrategy; + this.defaultValue = defaultValue; + } + + /** + * Strategy maps to integer dictionary ids + */ + @Override + public int getGroupingKeySizeBytes() + { + return Integer.BYTES; + } + + @Override + public void processValueFromGroupingKey( + GroupByColumnSelectorPlus selectorPlus, + ByteBuffer key, + ResultRow resultRow, + int keyBufferPosition + ) + { + final int id = key.getInt(keyBufferPosition); + if (id != GROUP_BY_MISSING_VALUE) { + resultRow.set(selectorPlus.getResultRowPosition(), dimensionIdCodec.idToKey(id)); + } else { + resultRow.set(selectorPlus.getResultRowPosition(), defaultValue); + } + } + + @Override + public int initColumnValues(ColumnValueSelector selector, int columnIndex, Object[] valuess) + { + //noinspection unchecked + final DimensionType value = (DimensionType) DimensionHandlerUtils.convertObjectToType( + selector.getObject(), + columnType + ); + if (value == null) { + valuess[columnIndex] = GROUP_BY_MISSING_VALUE; + return 0; + } else { + MemoryFootprint idAndMemoryFootprint = dimensionIdCodec.lookupId(value); + valuess[columnIndex] = idAndMemoryFootprint.value(); + return idAndMemoryFootprint.memoryIncrease(); + } + } + + @Override + public void initGroupingKeyColumnValue( + int keyBufferPosition, + int dimensionIndex, + Object rowObj, + ByteBuffer keyBuffer, + int[] stack + ) + { + // It is always called with the dictionaryId that we'd have initialized + //noinspection unchecked + int dictId = (int) rowObj; + keyBuffer.putInt(keyBufferPosition, dictId); + if (dictId == GROUP_BY_MISSING_VALUE) { + stack[dimensionIndex] = 0; + } else { + stack[dimensionIndex] = 1; + } + } + + // The method is only used for single value dimensions, therefore doesn't have any actual implementation of this + // method, which is only called for multi-value dimensions + @Override + public boolean checkRowIndexAndAddValueToGroupingKey( + int keyBufferPosition, + Object rowObj, + int rowValIdx, + ByteBuffer keyBuffer + ) + { + return false; + } + + @Override + public int writeToKeyBuffer(int keyBufferPosition, ColumnValueSelector selector, ByteBuffer keyBuffer) + { + //noinspection unchecked + final DimensionType value = (DimensionType) DimensionHandlerUtils.convertObjectToType( + selector.getObject(), + columnType + ); + final int memoryIncrease; + if (value == null) { + keyBuffer.putInt(keyBufferPosition, GROUP_BY_MISSING_VALUE); + return 0; + } else { + MemoryFootprint idAndMemoryIncrease = dimensionIdCodec.lookupId(value); + keyBuffer.putInt(keyBufferPosition, idAndMemoryIncrease.value()); + memoryIncrease = idAndMemoryIncrease.memoryIncrease(); + } + return memoryIncrease; + } + + @Override + public Grouper.BufferComparator bufferComparator(int keyBufferPosition, @Nullable StringComparator stringComparator) + { + boolean usesNaturalComparator = + stringComparator == null + || DimensionComparisonUtils.isNaturalComparator(columnType.getType(), stringComparator); + if (dimensionIdCodec.canCompareIds() && usesNaturalComparator) { + return (lhsBuffer, rhsBuffer, lhsPosition, rhsPosition) -> Integer.compare( + lhsBuffer.getInt(lhsPosition + keyBufferPosition), + rhsBuffer.getInt(rhsPosition + keyBufferPosition) + ); + } else { + return (lhsBuffer, rhsBuffer, lhsPosition, rhsPosition) -> { + int lhsDictId = lhsBuffer.getInt(lhsPosition + keyBufferPosition); + int rhsDictId = rhsBuffer.getInt(rhsPosition + keyBufferPosition); + + Object lhsObject = lhsDictId == GROUP_BY_MISSING_VALUE ? null : dimensionIdCodec.idToKey(lhsDictId); + Object rhsObject = rhsDictId == GROUP_BY_MISSING_VALUE ? null : dimensionIdCodec.idToKey(rhsDictId); + if (usesNaturalComparator) { + return nullableTypeStrategy.compare( + (DimensionType) DimensionHandlerUtils.convertObjectToType(lhsObject, columnType), + (DimensionType) DimensionHandlerUtils.convertObjectToType(rhsObject, columnType) + ); + } else { + return stringComparator.compare(String.valueOf(lhsObject), String.valueOf(rhsObject)); + } + }; + } + } + + @Override + public void reset() + { + dimensionIdCodec.reset(); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/KeyMappingMultiValueGroupByColumnSelectorStrategy.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/KeyMappingMultiValueGroupByColumnSelectorStrategy.java new file mode 100644 index 000000000000..aac9f2f14e7a --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/KeyMappingMultiValueGroupByColumnSelectorStrategy.java @@ -0,0 +1,303 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.column; + +import com.google.common.base.Preconditions; +import it.unimi.dsi.fastutil.objects.Object2IntMap; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.query.groupby.epinephelinae.DictionaryBuildingUtils; +import org.apache.druid.query.groupby.epinephelinae.Grouper; +import org.apache.druid.query.ordering.StringComparator; +import org.apache.druid.query.ordering.StringComparators; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.DimensionDictionary; +import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.data.ArrayBasedIndexedInts; +import org.apache.druid.segment.data.IndexedInts; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.function.IntFunction; + +/** + * Like {@link KeyMappingGroupByColumnSelectorStrategy}, but for multi-value dimensions, i.e. strings. It can only handle + * {@link DimensionSelector} + */ +public abstract class KeyMappingMultiValueGroupByColumnSelectorStrategy implements GroupByColumnSelectorStrategy +{ + + public static GroupByColumnSelectorStrategy create( + ColumnCapabilities capabilities, + DimensionSelector dimensionSelector + ) + { + if (dimensionSelector.getValueCardinality() >= 0 && dimensionSelector.nameLookupPossibleInAdvance()) { + return new PrebuiltDictionary(capabilities, dimensionSelector::lookupName); + } + return new DictionaryBuilding(); + } + + @Override + public int getGroupingKeySizeBytes() + { + return Integer.BYTES; + } + + + @Override + public void initGroupingKeyColumnValue( + int keyBufferPosition, + int dimensionIndex, + Object rowObj, + ByteBuffer keyBuffer, + int[] stack + ) + { + IndexedInts row = (IndexedInts) rowObj; + int rowSize = row.size(); + + keyBuffer.putInt( + keyBufferPosition, + rowSize == 0 ? GROUP_BY_MISSING_VALUE : row.get(0) + ); + + stack[dimensionIndex] = rowSize == 0 ? 0 : 1; + } + + @Override + public boolean checkRowIndexAndAddValueToGroupingKey( + int keyBufferPosition, + Object rowObj, + int rowValIdx, + ByteBuffer keyBuffer + ) + { + IndexedInts row = (IndexedInts) rowObj; + int rowSize = row.size(); + + if (rowValIdx < rowSize) { + keyBuffer.putInt( + keyBufferPosition, + row.get(rowValIdx) + ); + return true; + } else { + return false; + } + } + + public static class PrebuiltDictionary extends KeyMappingMultiValueGroupByColumnSelectorStrategy + { + private final ColumnCapabilities capabilities; + private final IntFunction dictionaryLookup; + + public PrebuiltDictionary( + ColumnCapabilities capabilities, + IntFunction dictionaryLookup + ) + { + this.capabilities = capabilities; + this.dictionaryLookup = dictionaryLookup; + } + + @Override + public void processValueFromGroupingKey( + GroupByColumnSelectorPlus selectorPlus, + ByteBuffer key, ResultRow resultRow, + int keyBufferPosition + ) + { + final int id = key.getInt(keyBufferPosition); + if (id != GROUP_BY_MISSING_VALUE) { + resultRow.set( + selectorPlus.getResultRowPosition(), + ((DimensionSelector) selectorPlus.getSelector()).lookupName(id) + ); + } else { + // Since this is used for String dimensions only, we can directly put the default string value here + resultRow.set(selectorPlus.getResultRowPosition(), NullHandling.defaultStringValue()); + } + } + + @Override + public int initColumnValues(ColumnValueSelector selector, int columnIndex, Object[] valuess) + { + DimensionSelector dimSelector = (DimensionSelector) selector; + IndexedInts row = dimSelector.getRow(); + valuess[columnIndex] = row; + return 0; + } + + @Override + public int writeToKeyBuffer(int keyBufferPosition, ColumnValueSelector selector, ByteBuffer keyBuffer) + { + final DimensionSelector dimSelector = (DimensionSelector) selector; + final IndexedInts row = dimSelector.getRow(); + Preconditions.checkState(row.size() < 2, "Not supported for multi-value dimensions"); + final int dictId = row.size() == 1 ? row.get(0) : GROUP_BY_MISSING_VALUE; + keyBuffer.putInt(keyBufferPosition, dictId); + return 0; + } + + @Override + public Grouper.BufferComparator bufferComparator( + int keyBufferPosition, + @Nullable StringComparator stringComparator + ) + { + final boolean canCompareInts = + capabilities != null && + capabilities.isDictionaryEncoded().and( + capabilities.areDictionaryValuesSorted().and( + capabilities.areDictionaryValuesUnique() + ) + ).isTrue(); + + final StringComparator comparator = stringComparator == null ? StringComparators.LEXICOGRAPHIC : stringComparator; + if (canCompareInts && StringComparators.LEXICOGRAPHIC.equals(comparator)) { + return (lhsBuffer, rhsBuffer, lhsPosition, rhsPosition) -> Integer.compare( + lhsBuffer.getInt(lhsPosition + keyBufferPosition), + rhsBuffer.getInt(rhsPosition + keyBufferPosition) + ); + } else { + Preconditions.checkState(dictionaryLookup != null, "null dictionary lookup"); + return (lhsBuffer, rhsBuffer, lhsPosition, rhsPosition) -> { + String lhsStr = dictionaryLookup.apply(lhsBuffer.getInt(lhsPosition + keyBufferPosition)); + String rhsStr = dictionaryLookup.apply(rhsBuffer.getInt(rhsPosition + keyBufferPosition)); + return comparator.compare(lhsStr, rhsStr); + }; + } + } + + @Override + public void reset() + { + // Nothing to do. + } + } + + public static class DictionaryBuilding extends KeyMappingMultiValueGroupByColumnSelectorStrategy + { + + private final List dictionary = DictionaryBuildingUtils.createDictionary(); + private final Object2IntMap reverseDictionary = DictionaryBuildingUtils.createReverseDictionary(ColumnType.STRING.getNullableStrategy()); + + @Override + public void processValueFromGroupingKey( + GroupByColumnSelectorPlus selectorPlus, + ByteBuffer key, + ResultRow resultRow, + int keyBufferPosition + ) + { + final int id = key.getInt(keyBufferPosition); + + // GROUP_BY_MISSING_VALUE is used to indicate empty rows, which are omitted from the result map. + if (id != GROUP_BY_MISSING_VALUE) { + final String value = dictionary.get(id); + resultRow.set(selectorPlus.getResultRowPosition(), value); + } else { + resultRow.set(selectorPlus.getResultRowPosition(), NullHandling.defaultStringValue()); + } + } + + @Override + public int initColumnValues(ColumnValueSelector selector, int columnIndex, Object[] valuess) + { + final DimensionSelector dimSelector = (DimensionSelector) selector; + final IndexedInts row = dimSelector.getRow(); + int stateFootprintIncrease = 0; + ArrayBasedIndexedInts newRow = (ArrayBasedIndexedInts) valuess[columnIndex]; + if (newRow == null) { + newRow = new ArrayBasedIndexedInts(); + valuess[columnIndex] = newRow; + } + int rowSize = row.size(); + newRow.ensureSize(rowSize); + for (int i = 0; i < rowSize; i++) { + final String value = dimSelector.lookupName(row.get(i)); + final int dictId = reverseDictionary.getInt(value); + if (dictId < 0) { + final int nextId = dictionary.size(); + dictionary.add(value); + reverseDictionary.put(value, nextId); + newRow.setValue(i, nextId); + stateFootprintIncrease += + DictionaryBuildingUtils.estimateEntryFootprint((value == null ? 0 : value.length()) * Character.BYTES); + } else { + newRow.setValue(i, dictId); + } + } + newRow.setSize(rowSize); + return stateFootprintIncrease; + } + + @Override + public int writeToKeyBuffer(int keyBufferPosition, ColumnValueSelector selector, ByteBuffer keyBuffer) + { + final DimensionSelector dimSelector = (DimensionSelector) selector; + final IndexedInts row = dimSelector.getRow(); + + Preconditions.checkState(row.size() < 2, "Not supported for multi-value dimensions"); + + if (row.size() == 0) { + keyBuffer.putInt(keyBufferPosition, GROUP_BY_MISSING_VALUE); + return 0; + } + + final String value = dimSelector.lookupName(row.get(0)); + final int dictId = reverseDictionary.getInt(value); + if (dictId == DimensionDictionary.ABSENT_VALUE_ID) { + final int nextId = dictionary.size(); + dictionary.add(value); + reverseDictionary.put(value, nextId); + keyBuffer.putInt(keyBufferPosition, nextId); + return DictionaryBuildingUtils.estimateEntryFootprint((value == null ? 0 : value.length()) * Character.BYTES); + } else { + keyBuffer.putInt(keyBufferPosition, dictId); + return 0; + } + } + + @Override + public Grouper.BufferComparator bufferComparator(int keyBufferPosition, @Nullable StringComparator stringComparator) + { + final StringComparator realComparator = stringComparator == null ? + StringComparators.LEXICOGRAPHIC : + stringComparator; + return (lhsBuffer, rhsBuffer, lhsPosition, rhsPosition) -> { + String lhsStr = dictionary.get(lhsBuffer.getInt(lhsPosition + keyBufferPosition)); + String rhsStr = dictionary.get(rhsBuffer.getInt(rhsPosition + keyBufferPosition)); + return realComparator.compare(lhsStr, rhsStr); + }; + } + + @Override + public void reset() + { + dictionary.clear(); + reverseDictionary.clear(); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/LongGroupByColumnSelectorStrategy.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/LongGroupByColumnSelectorStrategy.java deleted file mode 100644 index 95d57e03da15..000000000000 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/LongGroupByColumnSelectorStrategy.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.query.groupby.epinephelinae.column; - -import org.apache.druid.query.groupby.ResultRow; -import org.apache.druid.query.groupby.epinephelinae.Grouper; -import org.apache.druid.query.groupby.epinephelinae.GrouperBufferComparatorUtils; -import org.apache.druid.query.ordering.StringComparator; -import org.apache.druid.segment.ColumnValueSelector; -import org.apache.druid.segment.DimensionHandlerUtils; - -import javax.annotation.Nullable; -import java.nio.ByteBuffer; - -public class LongGroupByColumnSelectorStrategy implements GroupByColumnSelectorStrategy -{ - - @Override - public int getGroupingKeySize() - { - return Long.BYTES; - } - - @Override - public void processValueFromGroupingKey( - GroupByColumnSelectorPlus selectorPlus, - ByteBuffer key, - ResultRow resultRow, - int keyBufferPosition - ) - { - final long val = key.getLong(keyBufferPosition); - resultRow.set(selectorPlus.getResultRowPosition(), val); - } - - @Override - public int initColumnValues(ColumnValueSelector selector, int columnIndex, Object[] valuess) - { - valuess[columnIndex] = selector.getLong(); - return 0; - } - - @Override - public int writeToKeyBuffer(int keyBufferPosition, ColumnValueSelector selector, ByteBuffer keyBuffer) - { - keyBuffer.putLong(keyBufferPosition, selector.getLong()); - return 0; - } - - @Override - public Grouper.BufferComparator bufferComparator( - int keyBufferPosition, - @Nullable StringComparator stringComparator - ) - { - return GrouperBufferComparatorUtils.makeBufferComparatorForLong( - keyBufferPosition, - true, - stringComparator - ); - } - - @Override - public void initGroupingKeyColumnValue( - int keyBufferPosition, - int dimensionIndex, - Object rowObj, - ByteBuffer keyBuffer, - int[] stack - ) - { - writeToKeyBuffer(keyBufferPosition, DimensionHandlerUtils.nullToZero((Long) rowObj), keyBuffer); - stack[dimensionIndex] = 1; - } - - @Override - public boolean checkRowIndexAndAddValueToGroupingKey( - int keyBufferPosition, - Object rowObj, - int rowValIdx, - ByteBuffer keyBuffer - ) - { - // rows from a long column always have a single value, multi-value is not currently supported - // this method handles row values after the first in a multivalued row, so just return false - return false; - } - - @Override - public void reset() - { - // Nothing to do. - } - - public void writeToKeyBuffer(int keyBufferPosition, long value, ByteBuffer keyBuffer) - { - keyBuffer.putLong(keyBufferPosition, value); - } -} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/MemoryFootprint.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/MemoryFootprint.java new file mode 100644 index 000000000000..64303770d623 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/MemoryFootprint.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.column; + +/** + * Holder for a value and the memory increase in the internal dictionary associated with the increase + */ +public class MemoryFootprint +{ + private final T value; + private final int footprintIncrease; + + // Reduced visibility + MemoryFootprint(T value, int footprintIncrease) + { + this.value = value; + this.footprintIncrease = footprintIncrease; + } + + public T value() + { + return value; + } + + public int memoryIncrease() + { + return footprintIncrease; + } +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/NullableNumericGroupByColumnSelectorStrategy.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/NullableNumericGroupByColumnSelectorStrategy.java deleted file mode 100644 index 34af76211397..000000000000 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/NullableNumericGroupByColumnSelectorStrategy.java +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.query.groupby.epinephelinae.column; - -import org.apache.druid.common.config.NullHandling; -import org.apache.druid.query.groupby.ResultRow; -import org.apache.druid.query.groupby.epinephelinae.Grouper; -import org.apache.druid.query.groupby.epinephelinae.GrouperBufferComparatorUtils; -import org.apache.druid.query.ordering.StringComparator; -import org.apache.druid.segment.ColumnValueSelector; - -import javax.annotation.Nullable; -import java.nio.ByteBuffer; - -/** - * A wrapper around a numeric {@link GroupByColumnSelectorStrategy} that makes it null-aware. Should only be used - * for numeric strategies, not for string strategies. - * - * @see org.apache.druid.segment.BaseNullableColumnValueSelector#isNull() for why this only works in the numeric case - */ -public class NullableNumericGroupByColumnSelectorStrategy implements GroupByColumnSelectorStrategy -{ - private final GroupByColumnSelectorStrategy delegate; - private final byte[] nullKeyBytes; - - public NullableNumericGroupByColumnSelectorStrategy(GroupByColumnSelectorStrategy delegate) - { - this.delegate = delegate; - this.nullKeyBytes = new byte[delegate.getGroupingKeySize() + 1]; - this.nullKeyBytes[0] = NullHandling.IS_NULL_BYTE; - } - - @Override - public int getGroupingKeySize() - { - return delegate.getGroupingKeySize() + Byte.BYTES; - } - - @Override - public void processValueFromGroupingKey( - GroupByColumnSelectorPlus selectorPlus, - ByteBuffer key, - ResultRow resultRow, - int keyBufferPosition - ) - { - if (key.get(keyBufferPosition) == NullHandling.IS_NULL_BYTE) { - resultRow.set(selectorPlus.getResultRowPosition(), null); - } else { - delegate.processValueFromGroupingKey(selectorPlus, key, resultRow, keyBufferPosition + Byte.BYTES); - } - } - - @Override - public int initColumnValues(ColumnValueSelector selector, int columnIndex, Object[] values) - { - if (selector.isNull()) { - values[columnIndex] = null; - return 0; - } else { - return delegate.initColumnValues(selector, columnIndex, values); - } - } - - @Override - public int writeToKeyBuffer(int keyBufferPosition, ColumnValueSelector selector, ByteBuffer keyBuffer) - { - if (selector.isNull()) { - keyBuffer.position(keyBufferPosition); - keyBuffer.put(nullKeyBytes); - return 0; - } else { - keyBuffer.put(keyBufferPosition, NullHandling.IS_NOT_NULL_BYTE); - return delegate.writeToKeyBuffer(keyBufferPosition + Byte.BYTES, selector, keyBuffer); - } - } - - @Override - public Grouper.BufferComparator bufferComparator( - int keyBufferPosition, - @Nullable StringComparator stringComparator - ) - { - return GrouperBufferComparatorUtils.makeNullHandlingBufferComparatorForNumericData( - keyBufferPosition, - delegate.bufferComparator(keyBufferPosition + Byte.BYTES, stringComparator) - ); - } - - @Override - public void initGroupingKeyColumnValue( - int keyBufferPosition, - int dimensionIndex, - Object rowObj, - ByteBuffer keyBuffer, - int[] stack - ) - { - if (rowObj == null) { - keyBuffer.position(keyBufferPosition); - keyBuffer.put(nullKeyBytes); - } else { - keyBuffer.put(keyBufferPosition, NullHandling.IS_NOT_NULL_BYTE); - - // No need to update stack ourselves; we expect the delegate to do this. - delegate.initGroupingKeyColumnValue( - keyBufferPosition + Byte.BYTES, - dimensionIndex, - rowObj, - keyBuffer, - stack - ); - } - } - - @Override - public boolean checkRowIndexAndAddValueToGroupingKey( - int keyBufferPosition, - Object rowObj, - int rowValIdx, - ByteBuffer keyBuffer - ) - { - // rows from a nullable column always have a single value, multi-value is not currently supported - // this method handles row values after the first in a multivalued row, so just return false - return false; - } - - @Override - public void reset() - { - delegate.reset(); - } -} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/StringGroupByColumnSelectorStrategy.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/StringGroupByColumnSelectorStrategy.java deleted file mode 100644 index 8c25c775d71d..000000000000 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/StringGroupByColumnSelectorStrategy.java +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.query.groupby.epinephelinae.column; - -import com.google.common.base.Preconditions; -import org.apache.druid.common.config.NullHandling; -import org.apache.druid.query.groupby.ResultRow; -import org.apache.druid.query.groupby.epinephelinae.Grouper; -import org.apache.druid.query.ordering.StringComparator; -import org.apache.druid.query.ordering.StringComparators; -import org.apache.druid.segment.ColumnValueSelector; -import org.apache.druid.segment.DimensionSelector; -import org.apache.druid.segment.column.ColumnCapabilities; -import org.apache.druid.segment.data.IndexedInts; - -import javax.annotation.Nullable; -import java.nio.ByteBuffer; -import java.util.function.IntFunction; - -public class StringGroupByColumnSelectorStrategy implements GroupByColumnSelectorStrategy -{ - @Nullable - private final ColumnCapabilities capabilities; - - @Nullable - private final IntFunction dictionaryLookup; - - public StringGroupByColumnSelectorStrategy(IntFunction dictionaryLookup, ColumnCapabilities capabilities) - { - this.dictionaryLookup = dictionaryLookup; - this.capabilities = capabilities; - } - - @Override - public int getGroupingKeySize() - { - return Integer.BYTES; - } - - @Override - public void processValueFromGroupingKey( - GroupByColumnSelectorPlus selectorPlus, - ByteBuffer key, - ResultRow resultRow, - int keyBufferPosition - ) - { - final int id = key.getInt(keyBufferPosition); - - // GROUP_BY_MISSING_VALUE is used to indicate empty rows, which are omitted from the result map. - if (id != GROUP_BY_MISSING_VALUE) { - resultRow.set( - selectorPlus.getResultRowPosition(), - ((DimensionSelector) selectorPlus.getSelector()).lookupName(id) - ); - } else { - resultRow.set(selectorPlus.getResultRowPosition(), NullHandling.defaultStringValue()); - } - } - - @Override - public int initColumnValues(ColumnValueSelector selector, int columnIndex, Object[] valuess) - { - DimensionSelector dimSelector = (DimensionSelector) selector; - IndexedInts row = dimSelector.getRow(); - valuess[columnIndex] = row; - return 0; - } - - /** - * Writes a dictionary ID to the grouping key. - * - * Protected so subclasses can access it, like {@link DictionaryBuildingStringGroupByColumnSelectorStrategy}. - */ - protected void writeToKeyBuffer(int keyBufferPosition, int dictId, ByteBuffer keyBuffer) - { - keyBuffer.putInt(keyBufferPosition, dictId); - } - - @Override - public int writeToKeyBuffer(int keyBufferPosition, ColumnValueSelector selector, ByteBuffer keyBuffer) - { - final DimensionSelector dimSelector = (DimensionSelector) selector; - final IndexedInts row = dimSelector.getRow(); - Preconditions.checkState(row.size() < 2, "Not supported for multi-value dimensions"); - final int dictId = row.size() == 1 ? row.get(0) : GROUP_BY_MISSING_VALUE; - keyBuffer.putInt(keyBufferPosition, dictId); - return 0; - } - - @Override - public void initGroupingKeyColumnValue( - int keyBufferPosition, - int dimensionIndex, - Object rowObj, - ByteBuffer keyBuffer, - int[] stack - ) - { - IndexedInts row = (IndexedInts) rowObj; - int rowSize = row.size(); - - initializeGroupingKeyV2Dimension(row, rowSize, keyBuffer, keyBufferPosition); - stack[dimensionIndex] = rowSize == 0 ? 0 : 1; - } - - @Override - public boolean checkRowIndexAndAddValueToGroupingKey( - int keyBufferPosition, - Object rowObj, - int rowValIdx, - ByteBuffer keyBuffer - ) - { - IndexedInts row = (IndexedInts) rowObj; - int rowSize = row.size(); - - if (rowValIdx < rowSize) { - keyBuffer.putInt( - keyBufferPosition, - row.get(rowValIdx) - ); - return true; - } else { - return false; - } - } - - private void initializeGroupingKeyV2Dimension( - final IndexedInts values, - final int rowSize, - final ByteBuffer keyBuffer, - final int keyBufferPosition - ) - { - if (rowSize == 0) { - keyBuffer.putInt(keyBufferPosition, GROUP_BY_MISSING_VALUE); - } else { - keyBuffer.putInt(keyBufferPosition, values.get(0)); - } - } - - @Override - public Grouper.BufferComparator bufferComparator(int keyBufferPosition, @Nullable StringComparator stringComparator) - { - final boolean canCompareInts = - capabilities != null && - capabilities.hasBitmapIndexes() && - capabilities.areDictionaryValuesSorted().and(capabilities.areDictionaryValuesUnique()).isTrue(); - final StringComparator comparator = stringComparator == null ? StringComparators.LEXICOGRAPHIC : stringComparator; - if (canCompareInts && StringComparators.LEXICOGRAPHIC.equals(comparator)) { - return (lhsBuffer, rhsBuffer, lhsPosition, rhsPosition) -> Integer.compare( - lhsBuffer.getInt(lhsPosition + keyBufferPosition), - rhsBuffer.getInt(rhsPosition + keyBufferPosition) - ); - } else { - Preconditions.checkState(dictionaryLookup != null, "null dictionary lookup"); - return (lhsBuffer, rhsBuffer, lhsPosition, rhsPosition) -> { - String lhsStr = dictionaryLookup.apply(lhsBuffer.getInt(lhsPosition + keyBufferPosition)); - String rhsStr = dictionaryLookup.apply(rhsBuffer.getInt(rhsPosition + keyBufferPosition)); - return comparator.compare(lhsStr, rhsStr); - }; - } - } - - @Override - public void reset() - { - // Nothing to do. - } -} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java index 83f49e1c834e..393c7173ebb4 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java @@ -23,7 +23,7 @@ import org.apache.datasketches.memory.WritableMemory; import org.apache.druid.common.config.NullHandling; import org.apache.druid.query.groupby.ResultRow; -import org.apache.druid.query.groupby.epinephelinae.DictionaryBuilding; +import org.apache.druid.query.groupby.epinephelinae.DictionaryBuildingUtils; import org.apache.druid.query.groupby.epinephelinae.collection.MemoryPointer; import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.vector.VectorObjectSelector; @@ -36,7 +36,8 @@ * single-valued STRING columns which are not natively dictionary encoded, e.g. expression virtual columns. * * This is effectively the {@link VectorGroupByEngine} analog of - * {@link org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingStringGroupByColumnSelectorStrategy} + * {@link org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingGroupByColumnSelectorStrategy} for + * String columns */ public class DictionaryBuildingSingleValueStringGroupByVectorColumnSelector implements GroupByVectorColumnSelector { @@ -82,7 +83,7 @@ public int writeKeys( // Use same ROUGH_OVERHEAD_PER_DICTIONARY_ENTRY as the nonvectorized version; dictionary structure is the same. stateFootprintIncrease += - DictionaryBuilding.estimateEntryFootprint((value == null ? 0 : value.length()) * Character.BYTES); + DictionaryBuildingUtils.estimateEntryFootprint((value == null ? 0 : value.length()) * Character.BYTES); } else { keySpace.putInt(j, dictId); } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java index a87723732c69..a2fc9cec8a6e 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java @@ -219,8 +219,8 @@ private static boolean canVectorizeDimensions( return false; } - if (dimension.getOutputType().isArray()) { - // group by on arrays is not currently supported in the vector processing engine + if (!dimension.getOutputType().isPrimitive()) { + // group by on arrays and complex types is not currently supported in the vector processing engine return false; } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/orderby/DefaultLimitSpec.java b/processing/src/main/java/org/apache/druid/query/groupby/orderby/DefaultLimitSpec.java index 6f7fab563810..a34d58b6d140 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/orderby/DefaultLimitSpec.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/orderby/DefaultLimitSpec.java @@ -478,16 +478,33 @@ private Ordering dimensionOrdering( throw new ISE("Cannot create comparator for array type %s.", columnType.toString()); } } + final Comparator comparatorToUse; + if (arrayComparator != null) { + comparatorToUse = arrayComparator; + } else { + comparatorToUse = DimensionComparisonUtils.isNaturalComparator(columnType.getType(), stringComparator) + ? columnType.getNullableStrategy() + : stringComparator; + } + return Ordering.from( Comparator.comparing( (ResultRow row) -> { if (columnType.isArray()) { + // Arrays have a specialized comparator, that applies the ordering per element. That will handle the casting + // and the comparison + return row.get(column); + } else if (DimensionComparisonUtils.isNaturalComparator(columnType.getType(), stringComparator)) { + // If the natural comparator is used, we can directly extract the dimension value, and the type strategy's comparison + // function will handle it, without casting return row.get(column); } else { + // If the comparator is not natural, we will be using the string comparator, and we need to cast the dimension to string + // before comparison return getDimensionValue(row, column); } }, - Comparator.nullsFirst(arrayComparator == null ? stringComparator : arrayComparator) + Comparator.nullsFirst(comparatorToUse) ) ); } diff --git a/processing/src/main/java/org/apache/druid/query/search/SearchQueryRunner.java b/processing/src/main/java/org/apache/druid/query/search/SearchQueryRunner.java index 776c115408e9..49d9a5ac3e24 100644 --- a/processing/src/main/java/org/apache/druid/query/search/SearchQueryRunner.java +++ b/processing/src/main/java/org/apache/druid/query/search/SearchQueryRunner.java @@ -68,7 +68,8 @@ private static class SearchColumnSelectorStrategyFactory @Override public SearchColumnSelectorStrategy makeColumnSelectorStrategy( ColumnCapabilities capabilities, - ColumnValueSelector selector + ColumnValueSelector selector, + String dimension ) { switch (capabilities.getType()) { @@ -84,6 +85,12 @@ public SearchColumnSelectorStrategy makeColumnSelectorStrategy( throw new IAE("Cannot create query type helper from invalid type [%s]", capabilities.asTypeString()); } } + + @Override + public boolean supportsComplexTypes() + { + return false; + } } public interface SearchColumnSelectorStrategy extends ColumnSelectorStrategy diff --git a/processing/src/main/java/org/apache/druid/query/topn/types/TopNColumnAggregatesProcessorFactory.java b/processing/src/main/java/org/apache/druid/query/topn/types/TopNColumnAggregatesProcessorFactory.java index 0eafee26af72..e921e1231f83 100644 --- a/processing/src/main/java/org/apache/druid/query/topn/types/TopNColumnAggregatesProcessorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/topn/types/TopNColumnAggregatesProcessorFactory.java @@ -43,7 +43,8 @@ public TopNColumnAggregatesProcessorFactory(final ColumnType dimensionType) @Override public TopNColumnAggregatesProcessor makeColumnSelectorStrategy( ColumnCapabilities capabilities, - ColumnValueSelector selector + ColumnValueSelector selector, + String dimension ) { if (capabilities.is(ValueType.STRING)) { @@ -78,4 +79,10 @@ public TopNColumnAggregatesProcessor makeColumnSelectorStrategy( throw new IAE("Cannot create query type helper from invalid type [%s]", capabilities.asTypeString()); } + + @Override + public boolean supportsComplexTypes() + { + return false; + } } diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java index 1ca911a95448..da63413cb4ba 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java @@ -25,6 +25,7 @@ import com.google.common.primitives.Floats; import org.apache.druid.common.guava.GuavaUtils; import org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.parsers.ParseException; @@ -201,7 +202,8 @@ public static ColumnSelectorPlus selector = getColumnValueSelectorFromDimensionSpec( dimSpec, - columnSelectorFactory + columnSelectorFactory, + strategyFactory.supportsComplexTypes() ); Strategy strategy = makeStrategy( strategyFactory, @@ -222,12 +224,13 @@ public static ColumnSelectorPlus getColumnValueSelectorFromDimensionSpec( DimensionSpec dimSpec, - ColumnSelectorFactory columnSelectorFactory + ColumnSelectorFactory columnSelectorFactory, + boolean supportsComplexTypes ) { String dimName = dimSpec.getDimension(); ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(dimName); - capabilities = getEffectiveCapabilities(dimSpec, capabilities); + capabilities = getEffectiveCapabilities(dimSpec, capabilities, supportsComplexTypes); if (capabilities.is(ValueType.STRING)) { return columnSelectorFactory.makeDimensionSelector(dimSpec); } @@ -241,7 +244,8 @@ private static ColumnValueSelector getColumnValueSelectorFromDimensionSpec( */ private static ColumnCapabilities getEffectiveCapabilities( DimensionSpec dimSpec, - @Nullable ColumnCapabilities capabilities + @Nullable ColumnCapabilities capabilities, + boolean supportsComplexTypes ) { if (capabilities == null) { @@ -249,7 +253,7 @@ private static ColumnCapabilities getEffectiveCapabilities( } // Complex dimension type is not supported - if (capabilities.is(ValueType.COMPLEX)) { + if (!supportsComplexTypes && capabilities.is(ValueType.COMPLEX)) { capabilities = DEFAULT_STRING_CAPABILITIES; } @@ -289,8 +293,8 @@ private static Strategy makeStrategy( ColumnValueSelector selector ) { - capabilities = getEffectiveCapabilities(dimSpec, capabilities); - return strategyFactory.makeColumnSelectorStrategy(capabilities, selector); + capabilities = getEffectiveCapabilities(dimSpec, capabilities, strategyFactory.supportsComplexTypes()); + return strategyFactory.makeColumnSelectorStrategy(capabilities, selector, dimSpec.getDimension()); } @Nullable @@ -403,19 +407,16 @@ public static Object convertObjectToType( case STRING: return convertObjectToString(obj); case ARRAY: - switch (type.getElementType().getType()) { - case STRING: - return coerceToStringArray(obj); - case LONG: - return coerceToObjectArrayWithElementCoercionFunction(obj, DimensionHandlerUtils::convertObjectToLong); - case FLOAT: - return coerceToObjectArrayWithElementCoercionFunction(obj, DimensionHandlerUtils::convertObjectToFloat); - case DOUBLE: - return coerceToObjectArrayWithElementCoercionFunction(obj, DimensionHandlerUtils::convertObjectToDouble); - } - + return coerceToObjectArrayWithElementCoercionFunction( + obj, + x -> DimensionHandlerUtils.convertObjectToType(x, type.getElementType()) + ); + case COMPLEX: + // Can't coerce complex objects, and we shouldn't need to. If in future selectors behave weirdly, or we need to + // cast them (for some unknown reason), we can have that casting knowledge in the type strategy + return obj; default: - throw new IAE("Type[%s] is not supported for dimensions!", type); + throw DruidException.defensive("Type[%s] is not supported for dimensions!", type); } } @@ -426,8 +427,9 @@ public static Object[] convertToArray(Object obj, TypeSignature eleme } + @Nullable public static Object[] coerceToObjectArrayWithElementCoercionFunction( - Object obj, + @Nullable Object obj, Function coercionFunction ) { diff --git a/processing/src/main/java/org/apache/druid/segment/column/NullableTypeStrategy.java b/processing/src/main/java/org/apache/druid/segment/column/NullableTypeStrategy.java index f7ed0298cc55..044a092a3020 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/NullableTypeStrategy.java +++ b/processing/src/main/java/org/apache/druid/segment/column/NullableTypeStrategy.java @@ -19,8 +19,10 @@ package org.apache.druid.segment.column; +import it.unimi.dsi.fastutil.Hash; import org.apache.druid.common.config.NullHandling; +import javax.annotation.CheckReturnValue; import javax.annotation.Nullable; import java.nio.ByteBuffer; import java.util.Comparator; @@ -37,7 +39,7 @@ * * @see TypeStrategy */ -public final class NullableTypeStrategy implements Comparator +public final class NullableTypeStrategy implements Comparator, Hash.Strategy { private final TypeStrategy delegate; private final Comparator delegateComparator; @@ -66,6 +68,7 @@ public T read(ByteBuffer buffer) return delegate.read(buffer); } + @CheckReturnValue public int write(ByteBuffer buffer, @Nullable T value, int maxSizeBytes) { final int max = Math.min(buffer.limit() - buffer.position(), maxSizeBytes); @@ -112,6 +115,7 @@ public boolean readRetainsBufferReference() return delegate.readRetainsBufferReference(); } + @CheckReturnValue public int write(ByteBuffer buffer, int offset, @Nullable T value, int maxSizeBytes) { final int oldPosition = buffer.position(); @@ -129,4 +133,24 @@ public int compare(T o1, T o2) { return delegateComparator.compare(o1, o2); } + + public boolean groupable() + { + return delegate.groupable(); + } + + @Override + public int hashCode(@Nullable T o) + { + return o == null ? 0 : delegate.hashCode(o); + } + + @Override + public boolean equals(@Nullable T a, @Nullable T b) + { + if (a == null) { + return b == null; + } + return b != null && delegate.equals(a, b); + } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/ObjectStrategyComplexTypeStrategy.java b/processing/src/main/java/org/apache/druid/segment/column/ObjectStrategyComplexTypeStrategy.java index d40ee5dee8e1..267477e52319 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ObjectStrategyComplexTypeStrategy.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ObjectStrategyComplexTypeStrategy.java @@ -19,6 +19,8 @@ package org.apache.druid.segment.column; +import it.unimi.dsi.fastutil.Hash; +import org.apache.druid.error.DruidException; import org.apache.druid.segment.data.ObjectStrategy; import javax.annotation.Nullable; @@ -27,7 +29,7 @@ /** * Default implementation of {@link TypeStrategy} for all {@link org.apache.druid.segment.serde.ComplexMetricSerde} * implementations that just wraps the {@link ObjectStrategy} they are required to implement. - * + *

* This is not likely to be the most efficient way to do things, especially since writing must first produce a byte * array before it can be written to the buffer, but it is cheap and should work correctly, which is important. */ @@ -35,11 +37,24 @@ public class ObjectStrategyComplexTypeStrategy implements TypeStrategy { private final ObjectStrategy objectStrategy; private final TypeSignature typeSignature; + @Nullable + private final Hash.Strategy hashStrategy; public ObjectStrategyComplexTypeStrategy(ObjectStrategy objectStrategy, TypeSignature signature) + { + this(objectStrategy, signature, null); + } + + public ObjectStrategyComplexTypeStrategy( + ObjectStrategy objectStrategy, + TypeSignature signature, + @Nullable final Hash.Strategy hashStrategy + ) { this.objectStrategy = objectStrategy; this.typeSignature = signature; + this.hashStrategy = hashStrategy; + } @Override @@ -94,4 +109,28 @@ public T fromBytes(byte[] value) { return objectStrategy.fromByteBufferSafe(ByteBuffer.wrap(value), value.length); } + + @Override + public boolean groupable() + { + return hashStrategy != null; + } + + @Override + public int hashCode(T o) + { + if (hashStrategy == null) { + throw DruidException.defensive("hashStrategy not provided"); + } + return hashStrategy.hashCode(o); + } + + @Override + public boolean equals(T a, T b) + { + if (hashStrategy == null) { + throw DruidException.defensive("hashStrategy not provided"); + } + return hashStrategy.equals(a, b); + } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/TypeStrategies.java b/processing/src/main/java/org/apache/druid/segment/column/TypeStrategies.java index 3afcfdb7074c..bae29179b4d5 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/TypeStrategies.java +++ b/processing/src/main/java/org/apache/druid/segment/column/TypeStrategies.java @@ -257,6 +257,12 @@ public Long read(ByteBuffer buffer, int offset) return buffer.getLong(offset); } + @Override + public boolean groupable() + { + return true; + } + @Override public boolean readRetainsBufferReference() { @@ -281,6 +287,18 @@ public int compare(Object o1, Object o2) { return Longs.compare(((Number) o1).longValue(), ((Number) o2).longValue()); } + + @Override + public int hashCode(Long o) + { + return o.hashCode(); + } + + @Override + public boolean equals(Long a, Long b) + { + return a.equals(b); + } } /** @@ -308,6 +326,12 @@ public Float read(ByteBuffer buffer, int offset) return buffer.getFloat(offset); } + @Override + public boolean groupable() + { + return true; + } + @Override public boolean readRetainsBufferReference() { @@ -332,6 +356,18 @@ public int compare(Object o1, Object o2) { return Floats.compare(((Number) o1).floatValue(), ((Number) o2).floatValue()); } + + @Override + public int hashCode(Float o) + { + return o.hashCode(); + } + + @Override + public boolean equals(Float a, Float b) + { + return a.equals(b); + } } /** @@ -360,6 +396,12 @@ public Double read(ByteBuffer buffer, int offset) return buffer.getDouble(offset); } + @Override + public boolean groupable() + { + return true; + } + @Override public boolean readRetainsBufferReference() { @@ -384,6 +426,18 @@ public int compare(Object o1, Object o2) { return Double.compare(((Number) o1).doubleValue(), ((Number) o2).doubleValue()); } + + @Override + public int hashCode(Double o) + { + return o.hashCode(); + } + + @Override + public boolean equals(Double a, Double b) + { + return a.equals(b); + } } /** @@ -434,6 +488,12 @@ public int write(ByteBuffer buffer, String value, int maxSizeBytes) return remaining; } + @Override + public boolean groupable() + { + return true; + } + @Override public int compare(Object s, Object s2) { @@ -447,6 +507,18 @@ public int compare(Object s, Object s2) return ORDERING.compare((String) s, (String) s2); } + + @Override + public int hashCode(String o) + { + return o.hashCode(); + } + + @Override + public boolean equals(String a, String b) + { + return a.equals(b); + } } /** @@ -518,6 +590,12 @@ public int write(ByteBuffer buffer, Object[] value, int maxSizeBytes) return extraNeeded < 0 ? extraNeeded : sizeBytes; } + @Override + public boolean groupable() + { + return elementStrategy.groupable(); + } + @Override public int compare(@Nullable Object o1Obj, @Nullable Object o2Obj) { @@ -544,5 +622,47 @@ public int compare(@Nullable Object o1Obj, @Nullable Object o2Obj) } return Integer.compare(o1.length, o2.length); } + + /** + * Implements {@link Arrays#hashCode(Object[])} but the element hashing uses the element's type strategy + */ + @Override + public int hashCode(Object[] o) + { + if (o == null) { + return 0; + } else { + int result = 1; + for (Object element : o) { + result = 31 * result + (element == null ? 0 : elementStrategy.hashCode(element)); + } + return result; + } + } + /** + * Implements {@link Arrays#equals} but the element equality uses the element's type strategy + */ + @Override + public boolean equals(@Nullable Object[] a, @Nullable Object[] b) + { + //noinspection ArrayEquality + if (a == b) { + return true; + } else if (a != null && b != null) { + int length = a.length; + if (b.length != length) { + return false; + } else { + for (int i = 0; i < length; ++i) { + if (!elementStrategy.equals(a[i], b[i])) { + return false; + } + } + return true; + } + } else { + return false; + } + } } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/TypeStrategy.java b/processing/src/main/java/org/apache/druid/segment/column/TypeStrategy.java index e4856f889714..3d2493bc80c5 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/TypeStrategy.java +++ b/processing/src/main/java/org/apache/druid/segment/column/TypeStrategy.java @@ -19,7 +19,9 @@ package org.apache.druid.segment.column; +import it.unimi.dsi.fastutil.Hash; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.error.DruidException; import java.nio.ByteBuffer; import java.util.Comparator; @@ -63,7 +65,7 @@ * {@code Comparator}. So, we fall back to effectively erasing the generic type and having them all be * {@code Comparator}. */ -public interface TypeStrategy extends Comparator +public interface TypeStrategy extends Comparator, Hash.Strategy { /** * Estimate the size in bytes that writing this value to memory would require. This method is not required to be @@ -170,4 +172,30 @@ default T fromBytes(byte[] value) { throw new IllegalStateException("Not supported"); } + + /** + * Whether the type is groupable or not. This is always true for all the primitive types, arrays, and nested arrays + * therefore the SQL and the native layer might ignore this flag for those types. For complex types, this flag can be + * true or false, depending on whether the semantics and implementation of the type naturally leads to groupability + * or not. For example, it makes sense for JSON columns to be groupable, however there is little sense in grouping + * sketches (before finalizing). + * + * If a type is groupable, it MUST implement the {@link #hashCode} and {@link #equals} correctly + */ + default boolean groupable() + { + return false; + } + + @Override + default int hashCode(T o) + { + throw DruidException.defensive("Not implemented. Check groupable() first"); + } + + @Override + default boolean equals(T a, T b) + { + throw DruidException.defensive("Not implemented. Check groupable() first"); + } } diff --git a/processing/src/main/java/org/apache/druid/segment/data/ComparableIntArray.java b/processing/src/main/java/org/apache/druid/segment/data/ComparableIntArray.java deleted file mode 100644 index 7769e98fda92..000000000000 --- a/processing/src/main/java/org/apache/druid/segment/data/ComparableIntArray.java +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.data; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonValue; - -import java.util.Arrays; - -public class ComparableIntArray implements Comparable -{ - public static final ComparableIntArray EMPTY_ARRAY = new ComparableIntArray(new int[0]); - - final int[] delegate; - private int hashCode; - private boolean hashCodeComputed; - - private ComparableIntArray(int[] array) - { - delegate = array; - } - - @JsonCreator - public static ComparableIntArray of(int... array) - { - if (array.length == 0) { - return EMPTY_ARRAY; - } else { - return new ComparableIntArray(array); - } - } - - @JsonValue - public int[] getDelegate() - { - return delegate; - } - - @Override - public int hashCode() - { - // Check is not thread-safe, but that's fine. Even if used by multiple threads, it's ok to write these primitive - // fields more than once. - // As ComparableIntArray is used in hot loop caching the hashcode - if (!hashCodeComputed) { - hashCode = Arrays.hashCode(delegate); - hashCodeComputed = true; - } - - return hashCode; - } - - @Override - public boolean equals(Object obj) - { - if (this == obj) { - return true; - } - if (obj == null || getClass() != obj.getClass()) { - return false; - } - - return Arrays.equals(delegate, ((ComparableIntArray) obj).getDelegate()); - } - - @Override - public int compareTo(ComparableIntArray rhs) - { - // rhs.getDelegate() cannot be null - if (rhs == null) { - return 1; - } - final int minSize = Math.min(this.getDelegate().length, rhs.getDelegate().length); - //noinspection ArrayEquality - if (this.delegate == rhs.getDelegate()) { - return 0; - } else { - for (int i = 0; i < minSize; i++) { - //int's cant be null - final int cmp = Integer.compare(delegate[i], rhs.getDelegate()[i]); - if (cmp == 0) { - continue; - } - return cmp; - } - if (this.getDelegate().length == rhs.getDelegate().length) { - return 0; - } else if (this.getDelegate().length < rhs.getDelegate().length) { - return -1; - } else { - return 1; - } - } - } - - @Override - public String toString() - { - return Arrays.toString(delegate); - } -} diff --git a/processing/src/main/java/org/apache/druid/segment/generator/GeneratorBasicSchemas.java b/processing/src/main/java/org/apache/druid/segment/generator/GeneratorBasicSchemas.java index c6b05a4dda7d..0727309654b5 100644 --- a/processing/src/main/java/org/apache/druid/segment/generator/GeneratorBasicSchemas.java +++ b/processing/src/main/java/org/apache/druid/segment/generator/GeneratorBasicSchemas.java @@ -35,6 +35,7 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.Map; @@ -364,6 +365,50 @@ public class GeneratorBasicSchemas SCHEMA_INFO_BUILDER.put("expression-testbench", expressionsTestsSchema); } + static { + // schema for benchmarking group-by + List expressionsTestsSchemaColumns = ImmutableList.of( + // string dims + GeneratorColumnSchema.makeSequential("string-Sequential-100_000", ValueType.STRING, false, 1, null, 0, 100_000), + GeneratorColumnSchema.makeSequential("string-Sequential-10_000_000", ValueType.STRING, false, 1, null, 0, 10_000_000), + // GeneratorColumnSchema.makeSequential("string-Sequential-1_000_000_000", ValueType.STRING, false, 1, null, 0, 1_000_000_000), + GeneratorColumnSchema.makeLazyZipf("string-ZipF-1_000_000", ValueType.STRING, false, 1, 0.1, 0, 1_000_000, 2.0), + GeneratorColumnSchema.makeLazyDiscreteUniform("string-Uniform-1_000_000", ValueType.STRING, false, 1, 0.3, 0, 1_000_000), + + // multi string dims + GeneratorColumnSchema.makeSequential("multi-string-Sequential-100_000", ValueType.STRING, false, 8, null, 0, 100_000), + GeneratorColumnSchema.makeSequential("multi-string-Sequential-10_000_000", ValueType.STRING, false, 8, null, 0, 10_000_000), + // GeneratorColumnSchema.makeSequential("multi-string-Sequential-1_000_000_000", ValueType.STRING, false, 8, null, 0, 1_000_000_000), + GeneratorColumnSchema.makeLazyZipf("multi-string-ZipF-1_000_000", ValueType.STRING, false, 16, 0.1, 0, 1_000_000, 2.0), + GeneratorColumnSchema.makeLazyDiscreteUniform("multi-string-Uniform-1_000_000", ValueType.STRING, false, 4, null, 0, 1_000_000), + + // numeric dims + GeneratorColumnSchema.makeSequential("long-Sequential-100_000", ValueType.LONG, false, 1, null, 0, 100_000), + GeneratorColumnSchema.makeSequential("long-Sequential-10_000_000", ValueType.LONG, false, 1, null, 0, 10_000_000), + // GeneratorColumnSchema.makeSequential("long-Sequential-1_000_000_000", ValueType.LONG, false, 1, null, 0, 1_000_000_000), + GeneratorColumnSchema.makeLazyZipf("long-ZipF-1_000_000", ValueType.LONG, false, 1, 0.1, 0, 1_000_000, 2.0), + GeneratorColumnSchema.makeLazyDiscreteUniform("long-Uniform-1_000_000", ValueType.LONG, false, 1, 0.3, 0, 1_000_000), + + GeneratorColumnSchema.makeLazyZipf("double-ZipF-1_000_000", ValueType.DOUBLE, false, 1, 0.1, 0, 1_000_000, 2.0), + GeneratorColumnSchema.makeContinuousUniform("double-Uniform-1_000_000", ValueType.DOUBLE, false, 1, null, 0.0, 1_000_000.0), + + GeneratorColumnSchema.makeLazyZipf("float-ZipF-1_000_000", ValueType.FLOAT, false, 1, 0.1, 0, 1_000_000, 2.0), + GeneratorColumnSchema.makeContinuousUniform("float-Uniform-1_000_000", ValueType.FLOAT, false, 1, null, 0.0, 1_000_000.0) + // Generate the array dims, and the complex value dims by wrapping the pre-existing primitive dims within simple expressions + ); + + Interval interval = Intervals.of("2000-01-01/P1D"); + + GeneratorSchemaInfo groupByTestsSchema = new GeneratorSchemaInfo( + expressionsTestsSchemaColumns, + Collections.emptyList(), + interval, + false + ); + + SCHEMA_INFO_BUILDER.put("groupBy-testbench", groupByTestsSchema); + } + static { List inTestsSchemaColumns = ImmutableList.of( GeneratorColumnSchema.makeSequential("long1", ValueType.LONG, false, 1, null, 0, 40000), diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java index c7c569545e15..4d1bb347f218 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.smile.SmileFactory; import com.fasterxml.jackson.dataformat.smile.SmileGenerator; +import it.unimi.dsi.fastutil.Hash; import org.apache.druid.data.input.impl.DimensionSchema; import org.apache.druid.guice.NestedDataModule; import org.apache.druid.jackson.DefaultObjectMapper; @@ -37,6 +38,8 @@ import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnFormat; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.ObjectStrategyComplexTypeStrategy; +import org.apache.druid.segment.column.TypeStrategy; import org.apache.druid.segment.data.ObjectStrategy; import org.apache.druid.segment.serde.ComplexMetricExtractor; import org.apache.druid.segment.serde.ComplexMetricSerde; @@ -157,6 +160,29 @@ public byte[] toBytes(@Nullable Object val) }; } + @Override + public > TypeStrategy getTypeStrategy() + { + return new ObjectStrategyComplexTypeStrategy<>( + getObjectStrategy(), + ColumnType.ofComplex(TYPE_NAME), + new Hash.Strategy() + { + @Override + public int hashCode(Object o) + { + return StructuredData.wrap(o).equalityHash(); + } + + @Override + public boolean equals(Object a, Object b) + { + return StructuredData.wrap(a).compareTo(StructuredData.wrap(b)) == 0; + } + } + ); + } + public static class NestedColumnFormatV4 implements ColumnFormat { @Override diff --git a/processing/src/main/java/org/apache/druid/segment/nested/StructuredData.java b/processing/src/main/java/org/apache/druid/segment/nested/StructuredData.java index 32513d62e025..9dfa79b57d95 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/StructuredData.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/StructuredData.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.core.JsonProcessingException; +import com.google.common.primitives.Longs; import net.jpountz.xxhash.XXHash64; import net.jpountz.xxhash.XXHashFactory; import org.apache.druid.java.util.common.guava.Comparators; @@ -185,6 +186,12 @@ public int hashCode() return Objects.hash(value); } + // hashCode that relies on the object equality. Translates the hashcode to an integer as well + public int equalityHash() + { + return Longs.hashCode(hash.getAsLong()); + } + @Override public String toString() { diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java index 6f6538f6fd50..e1b7e4085962 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java @@ -9856,31 +9856,22 @@ public void testGroupByLongColumn() @Test public void testGroupByComplexColumn() { + cannotVectorize(); GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.DATA_SOURCE) .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) - .setDimensions(new DefaultDimensionSpec("quality_uniques", "quality_uniques")) + .setDimensions(new DefaultDimensionSpec( + "quality_uniques", + "quality_uniques", + HyperUniquesAggregatorFactory.TYPE + )) .setDimFilter(new SelectorDimFilter("quality_uniques", null, null)) .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index")) .setGranularity(QueryRunnerTestHelper.ALL_GRAN) .build(); - Assert.assertEquals(Functions.>identity(), query.getLimitSpec().build(query)); - - List expectedResults = Collections.singletonList( - makeRow( - query, - "2011-04-01", - "quality_uniques", - null, - "rows", - 26L, - "idx", - 12446L - ) - ); - Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); - TestHelper.assertExpectedObjects(expectedResults, results, "long"); + expectedException.expect(RuntimeException.class); + GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); } @Test diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayDoubleGroupByColumnSelectorStrategyTest.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayDoubleGroupByColumnSelectorStrategyTest.java deleted file mode 100644 index ad0c96c4673f..000000000000 --- a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayDoubleGroupByColumnSelectorStrategyTest.java +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.query.groupby.epinephelinae.column; - -import com.google.common.collect.ImmutableList; -import org.apache.druid.query.groupby.ResultRow; -import org.apache.druid.query.groupby.epinephelinae.Grouper; -import org.apache.druid.query.ordering.StringComparators; -import org.apache.druid.segment.ColumnValueSelector; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.mockito.Mockito; - -import java.nio.ByteBuffer; - -public class ArrayDoubleGroupByColumnSelectorStrategyTest -{ - private final ByteBuffer buffer1 = ByteBuffer.allocate(4); - private final ByteBuffer buffer2 = ByteBuffer.allocate(4); - - private ArrayNumericGroupByColumnSelectorStrategy strategy; - - @Before - public void setup() - { - strategy = new ArrayDoubleGroupByColumnSelectorStrategy(); - addToStrategy(new Object[]{1.0, 2.0}); - addToStrategy(ImmutableList.of(2.0, 3.0)); - addToStrategy(new Double[]{1.0}); - } - - @Test - public void testKeySize() - { - Assert.assertEquals(Integer.BYTES, strategy.getGroupingKeySize()); - } - - @Test - public void testWriteKey() - { - strategy.writeToKeyBuffer(0, 1, buffer1); - Assert.assertEquals(1, buffer1.getInt(0)); - } - - @Test - public void testBufferComparatorsWithNullAndNonNullStringComprators() - { - buffer1.putInt(1); - buffer2.putInt(2); - Grouper.BufferComparator comparator = strategy.bufferComparator(0, null); - Assert.assertTrue(comparator.compare(buffer1, buffer2, 0, 0) > 0); - Assert.assertTrue(comparator.compare(buffer2, buffer1, 0, 0) < 0); - - comparator = strategy.bufferComparator(0, StringComparators.LEXICOGRAPHIC); - Assert.assertTrue(comparator.compare(buffer1, buffer2, 0, 0) > 0); - Assert.assertTrue(comparator.compare(buffer2, buffer1, 0, 0) < 0); - - comparator = strategy.bufferComparator(0, StringComparators.STRLEN); - Assert.assertTrue(comparator.compare(buffer1, buffer2, 0, 0) > 0); - Assert.assertTrue(comparator.compare(buffer2, buffer1, 0, 0) < 0); - } - - @Test - public void testBufferComparator() - { - buffer1.putInt(0); - buffer2.putInt(2); - Grouper.BufferComparator comparator = strategy.bufferComparator(0, null); - Assert.assertTrue(comparator.compare(buffer1, buffer2, 0, 0) > 0); - - } - - @Test - public void testSanity() - { - testSanity(new Object[]{1.0, 2.0}, 0); - testSanity(new Object[]{2.0, 3.0}, 1); - testSanity(new Object[]{1.0}, 2); - } - - private void testSanity(Object[] storedValue, int expectedIndex) - { - ColumnValueSelector columnValueSelector = Mockito.mock(ColumnValueSelector.class); - Mockito.when(columnValueSelector.getObject()).thenReturn(storedValue); - Assert.assertEquals(expectedIndex, strategy.computeDictionaryId(columnValueSelector)); - - GroupByColumnSelectorPlus groupByColumnSelectorPlus = Mockito.mock(GroupByColumnSelectorPlus.class); - Mockito.when(groupByColumnSelectorPlus.getResultRowPosition()).thenReturn(0); - ResultRow row = ResultRow.create(1); - - buffer1.putInt(0, expectedIndex); - strategy.processValueFromGroupingKey(groupByColumnSelectorPlus, buffer1, row, 0); - Assert.assertArrayEquals(storedValue, (Object[]) row.get(0)); - } - - @Test - public void testAddingInDictionary() - { - ColumnValueSelector columnValueSelector = Mockito.mock(ColumnValueSelector.class); - Mockito.when(columnValueSelector.getObject()).thenReturn(ImmutableList.of(4.0, 2.0)); - Assert.assertEquals(3, strategy.computeDictionaryId(columnValueSelector)); - - GroupByColumnSelectorPlus groupByColumnSelectorPlus = Mockito.mock(GroupByColumnSelectorPlus.class); - Mockito.when(groupByColumnSelectorPlus.getResultRowPosition()).thenReturn(0); - ResultRow row = ResultRow.create(1); - - buffer1.putInt(3); - strategy.processValueFromGroupingKey(groupByColumnSelectorPlus, buffer1, row, 0); - Assert.assertArrayEquals(new Object[]{4.0, 2.0}, (Object[]) row.get(0)); - } - - @Test - public void testAddingInDictionaryWithObjects() - { - ColumnValueSelector columnValueSelector = Mockito.mock(ColumnValueSelector.class); - Mockito.when(columnValueSelector.getObject()).thenReturn(new Object[]{4.0D, 2.0D}); - Assert.assertEquals(3, strategy.computeDictionaryId(columnValueSelector)); - - GroupByColumnSelectorPlus groupByColumnSelectorPlus = Mockito.mock(GroupByColumnSelectorPlus.class); - Mockito.when(groupByColumnSelectorPlus.getResultRowPosition()).thenReturn(0); - ResultRow row = ResultRow.create(1); - buffer1.putInt(3); - strategy.processValueFromGroupingKey(groupByColumnSelectorPlus, buffer1, row, 0); - Assert.assertArrayEquals(new Object[]{4.0, 2.0}, (Object[]) row.get(0)); - } - - private void addToStrategy(Object value) - { - ColumnValueSelector columnValueSelector = Mockito.mock(ColumnValueSelector.class); - Mockito.when(columnValueSelector.getObject()).thenReturn(value); - strategy.computeDictionaryId(columnValueSelector); - } - - @After - public void tearDown() - { - buffer1.clear(); - buffer2.clear(); - } -} diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayLongGroupByColumnSelectorStrategyTest.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayLongGroupByColumnSelectorStrategyTest.java deleted file mode 100644 index d8bc5372cd70..000000000000 --- a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayLongGroupByColumnSelectorStrategyTest.java +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.query.groupby.epinephelinae.column; - -import com.google.common.collect.ImmutableList; -import org.apache.druid.query.groupby.ResultRow; -import org.apache.druid.query.groupby.epinephelinae.Grouper; -import org.apache.druid.query.ordering.StringComparators; -import org.apache.druid.segment.ColumnValueSelector; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.Mockito; -import org.mockito.junit.MockitoJUnitRunner; - -import java.nio.ByteBuffer; - -@RunWith(MockitoJUnitRunner.class) -public class ArrayLongGroupByColumnSelectorStrategyTest -{ - private final ByteBuffer buffer1 = ByteBuffer.allocate(4); - private final ByteBuffer buffer2 = ByteBuffer.allocate(4); - - private ArrayNumericGroupByColumnSelectorStrategy strategy; - - @Before - public void setup() - { - strategy = new ArrayLongGroupByColumnSelectorStrategy(); - addToStrategy(new Object[]{1L, 2L}); - addToStrategy(ImmutableList.of(2L, 3L)); - addToStrategy(new Long[]{1L}); - } - - @Test - public void testKeySize() - { - Assert.assertEquals(Integer.BYTES, strategy.getGroupingKeySize()); - } - - @Test - public void testWriteKey() - { - strategy.writeToKeyBuffer(0, 1, buffer1); - Assert.assertEquals(1, buffer1.getInt(0)); - } - - @Test - public void testBufferComparatorsWithNullAndNonNullStringComprators() - { - buffer1.putInt(1); - buffer2.putInt(2); - Grouper.BufferComparator comparator = strategy.bufferComparator(0, null); - Assert.assertTrue(comparator.compare(buffer1, buffer2, 0, 0) > 0); - Assert.assertTrue(comparator.compare(buffer2, buffer1, 0, 0) < 0); - - comparator = strategy.bufferComparator(0, StringComparators.LEXICOGRAPHIC); - Assert.assertTrue(comparator.compare(buffer1, buffer2, 0, 0) > 0); - Assert.assertTrue(comparator.compare(buffer2, buffer1, 0, 0) < 0); - - comparator = strategy.bufferComparator(0, StringComparators.STRLEN); - Assert.assertTrue(comparator.compare(buffer1, buffer2, 0, 0) > 0); - Assert.assertTrue(comparator.compare(buffer2, buffer1, 0, 0) < 0); - } - - @Test - public void testBufferComparator() - { - buffer1.putInt(0); - buffer2.putInt(2); - Grouper.BufferComparator comparator = strategy.bufferComparator(0, null); - Assert.assertTrue(comparator.compare(buffer1, buffer2, 0, 0) > 0); - - } - - @Test - public void testSanity() - { - testSanity(new Object[]{1L, 2L}, 0); - testSanity(new Object[]{2L, 3L}, 1); - testSanity(new Object[]{1L}, 2); - } - - private void testSanity(Object[] storedValue, int expectedIndex) - { - ColumnValueSelector columnValueSelector = Mockito.mock(ColumnValueSelector.class); - Mockito.when(columnValueSelector.getObject()).thenReturn(storedValue); - Assert.assertEquals(expectedIndex, strategy.computeDictionaryId(columnValueSelector)); - - GroupByColumnSelectorPlus groupByColumnSelectorPlus = Mockito.mock(GroupByColumnSelectorPlus.class); - Mockito.when(groupByColumnSelectorPlus.getResultRowPosition()).thenReturn(0); - ResultRow row = ResultRow.create(1); - - buffer1.putInt(0, expectedIndex); - strategy.processValueFromGroupingKey(groupByColumnSelectorPlus, buffer1, row, 0); - Assert.assertArrayEquals(storedValue, (Object[]) row.get(0)); - } - - @Test - public void testAddingInDictionary() - { - ColumnValueSelector columnValueSelector = Mockito.mock(ColumnValueSelector.class); - Mockito.when(columnValueSelector.getObject()).thenReturn(ImmutableList.of(4L, 2L)); - Assert.assertEquals(3, strategy.computeDictionaryId(columnValueSelector)); - - GroupByColumnSelectorPlus groupByColumnSelectorPlus = Mockito.mock(GroupByColumnSelectorPlus.class); - Mockito.when(groupByColumnSelectorPlus.getResultRowPosition()).thenReturn(0); - ResultRow row = ResultRow.create(1); - - buffer1.putInt(3); - strategy.processValueFromGroupingKey(groupByColumnSelectorPlus, buffer1, row, 0); - Assert.assertArrayEquals(new Object[]{4L, 2L}, (Object[]) row.get(0)); - } - - @Test - public void testAddingInDictionaryWithObjects() - { - ColumnValueSelector columnValueSelector = Mockito.mock(ColumnValueSelector.class); - Mockito.when(columnValueSelector.getObject()).thenReturn(new Object[]{4L, 2L}); - Assert.assertEquals(3, strategy.computeDictionaryId(columnValueSelector)); - - GroupByColumnSelectorPlus groupByColumnSelectorPlus = Mockito.mock(GroupByColumnSelectorPlus.class); - Mockito.when(groupByColumnSelectorPlus.getResultRowPosition()).thenReturn(0); - ResultRow row = ResultRow.create(1); - - buffer1.putInt(3); - strategy.processValueFromGroupingKey(groupByColumnSelectorPlus, buffer1, row, 0); - Assert.assertArrayEquals(new Object[]{4L, 2L}, (Object[]) row.get(0)); - } - - private void addToStrategy(Object value) - { - ColumnValueSelector columnValueSelector = Mockito.mock(ColumnValueSelector.class); - Mockito.when(columnValueSelector.getObject()).thenReturn(value); - strategy.computeDictionaryId(columnValueSelector); - } - - @After - public void tearDown() - { - buffer1.clear(); - buffer2.clear(); - } -} diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayStringGroupByColumnSelectorStrategyTest.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayStringGroupByColumnSelectorStrategyTest.java deleted file mode 100644 index b3ed04605fda..000000000000 --- a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayStringGroupByColumnSelectorStrategyTest.java +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.query.groupby.epinephelinae.column; - -import com.google.common.collect.BiMap; -import com.google.common.collect.HashBiMap; -import com.google.common.collect.ImmutableList; -import org.apache.druid.query.groupby.ResultRow; -import org.apache.druid.query.groupby.epinephelinae.Grouper; -import org.apache.druid.query.ordering.StringComparators; -import org.apache.druid.segment.ColumnValueSelector; -import org.apache.druid.segment.data.ComparableIntArray; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.Mockito; -import org.mockito.junit.MockitoJUnitRunner; - -import java.nio.ByteBuffer; - -@RunWith(MockitoJUnitRunner.class) -public class ArrayStringGroupByColumnSelectorStrategyTest -{ - private final BiMap dictionaryInt = HashBiMap.create(); - - // The dictionary has been constructed such that the values are not sorted lexicographically - // so we can tell when the comparator uses a lexicographic comparison and when it uses the indexes. - private final BiMap indexedIntArrays = HashBiMap.create(); - - private final ByteBuffer buffer1 = ByteBuffer.allocate(4); - private final ByteBuffer buffer2 = ByteBuffer.allocate(4); - - private ArrayStringGroupByColumnSelectorStrategy strategy; - - @Before - public void setup() - { - strategy = new ArrayStringGroupByColumnSelectorStrategy(dictionaryInt, indexedIntArrays); - - dictionaryInt.put("a", 0); - dictionaryInt.put("b", 1); - dictionaryInt.put("bd", 2); - dictionaryInt.put("d", 3); - dictionaryInt.put("e", 4); - - indexedIntArrays.put(ComparableIntArray.of(0, 1), 0); - indexedIntArrays.put(ComparableIntArray.of(2, 4), 1); - indexedIntArrays.put(ComparableIntArray.of(0, 2), 2); - } - - @Test - public void testKeySize() - { - Assert.assertEquals(Integer.BYTES, strategy.getGroupingKeySize()); - } - - @Test - public void testWriteKey() - { - strategy.writeToKeyBuffer(0, 1, buffer1); - Assert.assertEquals(1, buffer1.getInt(0)); - } - - @Test - public void testBufferComparatorCanCompareIntsAndNullStringComparatorShouldUseLexicographicComparator() - { - buffer1.putInt(1); - buffer2.putInt(2); - Grouper.BufferComparator comparator = strategy.bufferComparator(0, null); - Assert.assertTrue(comparator.compare(buffer1, buffer2, 0, 0) > 0); - Assert.assertTrue(comparator.compare(buffer2, buffer1, 0, 0) < 0); - } - - @Test - public void testBufferComparatorCanCompareIntsAndLexicographicStringComparatorShouldUseLexicographicComparator() - { - buffer1.putInt(1); - buffer2.putInt(2); - Grouper.BufferComparator comparator = strategy.bufferComparator(0, StringComparators.LEXICOGRAPHIC); - Assert.assertTrue(comparator.compare(buffer1, buffer2, 0, 0) > 0); - Assert.assertTrue(comparator.compare(buffer2, buffer1, 0, 0) < 0); - } - - @Test - public void testBufferComparatorCanCompareIntsAndStrLenStringComparatorShouldUseLexicographicComparator() - { - buffer1.putInt(1); - buffer2.putInt(2); - Grouper.BufferComparator comparator = strategy.bufferComparator(0, StringComparators.STRLEN); - Assert.assertTrue(comparator.compare(buffer1, buffer2, 0, 0) > 0); - Assert.assertTrue(comparator.compare(buffer2, buffer1, 0, 0) < 0); - } - - @Test - public void testSanity() - { - ColumnValueSelector columnValueSelector = Mockito.mock(ColumnValueSelector.class); - Mockito.when(columnValueSelector.getObject()).thenReturn(ImmutableList.of("a", "b")); - Assert.assertEquals(0, strategy.computeDictionaryId(columnValueSelector)); - - GroupByColumnSelectorPlus groupByColumnSelectorPlus = Mockito.mock(GroupByColumnSelectorPlus.class); - Mockito.when(groupByColumnSelectorPlus.getResultRowPosition()).thenReturn(0); - ResultRow row = ResultRow.create(1); - - buffer1.putInt(0); - strategy.processValueFromGroupingKey(groupByColumnSelectorPlus, buffer1, row, 0); - Assert.assertArrayEquals(new Object[]{"a", "b"}, (Object[]) row.get(0)); - } - - - @Test - public void testAddingInDictionary() - { - ColumnValueSelector columnValueSelector = Mockito.mock(ColumnValueSelector.class); - Mockito.when(columnValueSelector.getObject()).thenReturn(ImmutableList.of("f", "a")); - Assert.assertEquals(3, strategy.computeDictionaryId(columnValueSelector)); - - GroupByColumnSelectorPlus groupByColumnSelectorPlus = Mockito.mock(GroupByColumnSelectorPlus.class); - Mockito.when(groupByColumnSelectorPlus.getResultRowPosition()).thenReturn(0); - ResultRow row = ResultRow.create(1); - - buffer1.putInt(3); - strategy.processValueFromGroupingKey(groupByColumnSelectorPlus, buffer1, row, 0); - Assert.assertArrayEquals(new Object[]{"f", "a"}, (Object[]) row.get(0)); - } - - @Test - public void testAddingInDictionaryWithObjects() - { - ColumnValueSelector columnValueSelector = Mockito.mock(ColumnValueSelector.class); - Mockito.when(columnValueSelector.getObject()).thenReturn(new Object[]{"f", "a"}); - Assert.assertEquals(3, strategy.computeDictionaryId(columnValueSelector)); - - GroupByColumnSelectorPlus groupByColumnSelectorPlus = Mockito.mock(GroupByColumnSelectorPlus.class); - Mockito.when(groupByColumnSelectorPlus.getResultRowPosition()).thenReturn(0); - ResultRow row = ResultRow.create(1); - - buffer1.putInt(3); - strategy.processValueFromGroupingKey(groupByColumnSelectorPlus, buffer1, row, 0); - Assert.assertArrayEquals(new Object[]{"f", "a"}, (Object[]) row.get(0)); - } - - @After - public void tearDown() - { - buffer1.clear(); - buffer2.clear(); - } -} diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/column/FixedWidthGroupByColumnSelectorStrategyTest.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/column/FixedWidthGroupByColumnSelectorStrategyTest.java new file mode 100644 index 000000000000..d427cb88bef6 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/column/FixedWidthGroupByColumnSelectorStrategyTest.java @@ -0,0 +1,506 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.column; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.IterableRowsCursorHelper; +import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.query.groupby.epinephelinae.GroupByColumnSelectorStrategyFactory; +import org.apache.druid.query.ordering.StringComparators; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.Cursor; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.junit.Assert; +import org.junit.Test; +import org.mockito.Mockito; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.util.List; + +public class FixedWidthGroupByColumnSelectorStrategyTest extends InitializedNullHandlingTest +{ + private static final List DATASOURCE_ROWS = ImmutableList.of( + new Object[]{1L, 1.0f, 1.0d}, + new Object[]{2L, 2.0f, 2.0d}, + new Object[]{null, null, null}, + new Object[]{3L, 3.0f, 3.0d} + ); + private static final GroupByColumnSelectorStrategyFactory STRATEGY_FACTORY = new GroupByColumnSelectorStrategyFactory(); + private static final ByteBuffer BUFFER1 = ByteBuffer.allocate(10); + private static final ByteBuffer BUFFER2 = ByteBuffer.allocate(10); + private static final String LONG_COLUMN = "long"; + private static final String FLOAT_COLUMN = "float"; + private static final String DOUBLE_COLUMN = "double"; + + public static class LongGroupByColumnSelectorStrategyTest + { + private static final GroupByColumnSelectorStrategy STRATEGY = + STRATEGY_FACTORY.makeColumnSelectorStrategy( + createCursor().getColumnSelectorFactory().getColumnCapabilities(LONG_COLUMN), + createCursor().getColumnSelectorFactory().makeColumnValueSelector(LONG_COLUMN), + "dimension" + ); + + @Test + public void testKeySize() + { + Assert.assertEquals(Byte.BYTES + Long.BYTES, STRATEGY.getGroupingKeySizeBytes()); + } + + @Test + public void testWriteToKeyBuffer() + { + Cursor cursor = createCursor(); + ResultRow resultRow = ResultRow.create(1); + ColumnValueSelector columnValueSelector = cursor.getColumnSelectorFactory().makeColumnValueSelector(LONG_COLUMN); + GroupByColumnSelectorPlus groupByColumnSelectorPlus = Mockito.mock(GroupByColumnSelectorPlus.class); + Mockito.when(groupByColumnSelectorPlus.getResultRowPosition()).thenReturn(0); + + int rowNum = 0; + while (!cursor.isDone()) { + // Check if the round trip serde produces the same results + int sizeIncrease = STRATEGY.writeToKeyBuffer(0, columnValueSelector, BUFFER1); + STRATEGY.processValueFromGroupingKey(groupByColumnSelectorPlus, BUFFER1, resultRow, 0); + // There shouldn't be any internal size increase associated with the fixed width types + Assert.assertEquals(0, sizeIncrease); + Assert.assertEquals(NullHandling.nullToEmptyIfNeeded((Long) DATASOURCE_ROWS.get(rowNum)[0]), resultRow.get(0)); + cursor.advance(); + ++rowNum; + } + } + + @Test + public void testInitColumnValues() + { + Cursor cursor = createCursor(); + ColumnValueSelector columnValueSelector = cursor.getColumnSelectorFactory().makeColumnValueSelector(LONG_COLUMN); + Object[] valuess = new Object[1]; + + int rowNum = 0; + while (!cursor.isDone()) { + int sizeIncrease = STRATEGY.initColumnValues(columnValueSelector, 0, valuess); + Assert.assertEquals(0, sizeIncrease); + Assert.assertEquals(NullHandling.nullToEmptyIfNeeded((Long) DATASOURCE_ROWS.get(rowNum)[0]), valuess[0]); + cursor.advance(); + ++rowNum; + } + } + + @Test + public void testBufferComparator() + { + // lhs < rhs + writeGroupingKeyToBuffer(BUFFER1, 100L); + writeGroupingKeyToBuffer(BUFFER2, 200L); + Assert.assertEquals(-1, STRATEGY.bufferComparator(0, null).compare(BUFFER1, BUFFER2, 0, 0)); + + // lhs == rhs + writeGroupingKeyToBuffer(BUFFER1, 100L); + writeGroupingKeyToBuffer(BUFFER2, 100L); + Assert.assertEquals(0, STRATEGY.bufferComparator(0, null).compare(BUFFER1, BUFFER2, 0, 0)); + + // lhs > rhs + writeGroupingKeyToBuffer(BUFFER1, 200L); + writeGroupingKeyToBuffer(BUFFER2, 100L); + Assert.assertEquals(1, STRATEGY.bufferComparator(0, null).compare(BUFFER1, BUFFER2, 0, 0)); + + // lhs is null + writeGroupingKeyToBuffer(BUFFER1, null); + writeGroupingKeyToBuffer(BUFFER2, 0L); + Assert.assertEquals(-1, STRATEGY.bufferComparator(0, null).compare(BUFFER1, BUFFER2, 0, 0)); + + // rhs is null + writeGroupingKeyToBuffer(BUFFER1, 0L); + writeGroupingKeyToBuffer(BUFFER2, null); + Assert.assertEquals(1, STRATEGY.bufferComparator(0, null).compare(BUFFER1, BUFFER2, 0, 0)); + + // lhs and rhs are null + writeGroupingKeyToBuffer(BUFFER1, null); + writeGroupingKeyToBuffer(BUFFER2, null); + Assert.assertEquals(0, STRATEGY.bufferComparator(0, null).compare(BUFFER1, BUFFER2, 0, 0)); + + // stringComparator is provided, for lexicographic comparator "2" > "100" + writeGroupingKeyToBuffer(BUFFER1, 2L); + writeGroupingKeyToBuffer(BUFFER2, 100L); + Assert.assertEquals( + 1, + STRATEGY.bufferComparator(0, StringComparators.LEXICOGRAPHIC) + .compare(BUFFER1, BUFFER2, 0, 0) + ); + + // stringComparator is provided, for alphanumeric comparator number("2") < number("100") + writeGroupingKeyToBuffer(BUFFER1, 2L); + writeGroupingKeyToBuffer(BUFFER2, 100L); + Assert.assertEquals( + -1, + STRATEGY.bufferComparator(0, StringComparators.ALPHANUMERIC) + .compare(BUFFER1, BUFFER2, 0, 0) + ); + } + + private static void writeGroupingKeyToBuffer(final ByteBuffer buffer, @Nullable Long key) + { + ColumnValueSelector columnValueSelector1 = Mockito.mock(ColumnValueSelector.class); + + Mockito.when(columnValueSelector1.getObject()).thenReturn(key); + Mockito.when(columnValueSelector1.getLong()).thenReturn(key == null ? 0 : key); + Mockito.when(columnValueSelector1.isNull()).thenReturn(key == null); + + Assert.assertEquals(0, STRATEGY.writeToKeyBuffer(0, columnValueSelector1, buffer)); + } + + @Test + public void testMultiValueHandling() + { + // Returns false, because fixed width strategy doesn't handle multi-value dimensions, therefore it returns false + Assert.assertFalse(STRATEGY.checkRowIndexAndAddValueToGroupingKey(0, 1L, 0, BUFFER1)); + Assert.assertFalse(STRATEGY.checkRowIndexAndAddValueToGroupingKey(0, 1L, 10, BUFFER1)); + } + + @Test + public void testInitGroupingKeyColumnValue() + { + GroupByColumnSelectorPlus groupByColumnSelectorPlus = Mockito.mock(GroupByColumnSelectorPlus.class); + Mockito.when(groupByColumnSelectorPlus.getResultRowPosition()).thenReturn(0); + int[] stack = new int[1]; + ResultRow resultRow = ResultRow.create(1); + + STRATEGY.initGroupingKeyColumnValue(0, 0, 1001L, BUFFER1, stack); + Assert.assertEquals(1, stack[0]); + STRATEGY.processValueFromGroupingKey(groupByColumnSelectorPlus, BUFFER1, resultRow, 0); + Assert.assertEquals(1001L, resultRow.get(0)); + + + STRATEGY.initGroupingKeyColumnValue(0, 0, null, BUFFER1, stack); + Assert.assertEquals(0, stack[0]); + STRATEGY.processValueFromGroupingKey(groupByColumnSelectorPlus, BUFFER1, resultRow, 0); + Assert.assertEquals(null, resultRow.get(0)); + } + } + + public static class FloatGroupByColumnSelectorStrategyTest + { + private static final GroupByColumnSelectorStrategy STRATEGY = + STRATEGY_FACTORY.makeColumnSelectorStrategy( + createCursor().getColumnSelectorFactory().getColumnCapabilities(FLOAT_COLUMN), + createCursor().getColumnSelectorFactory().makeColumnValueSelector(FLOAT_COLUMN), + "dimension" + ); + + @Test + public void testKeySize() + { + Assert.assertEquals(Byte.BYTES + Float.BYTES, STRATEGY.getGroupingKeySizeBytes()); + } + + @Test + public void testWriteToKeyBuffer() + { + Cursor cursor = createCursor(); + ResultRow resultRow = ResultRow.create(1); + ColumnValueSelector columnValueSelector = cursor.getColumnSelectorFactory().makeColumnValueSelector(FLOAT_COLUMN); + GroupByColumnSelectorPlus groupByColumnSelectorPlus = Mockito.mock(GroupByColumnSelectorPlus.class); + Mockito.when(groupByColumnSelectorPlus.getResultRowPosition()).thenReturn(0); + + int rowNum = 0; + while (!cursor.isDone()) { + int sizeIncrease = STRATEGY.writeToKeyBuffer(0, columnValueSelector, BUFFER1); + STRATEGY.processValueFromGroupingKey(groupByColumnSelectorPlus, BUFFER1, resultRow, 0); + Assert.assertEquals(0, sizeIncrease); + Assert.assertEquals(NullHandling.nullToEmptyIfNeeded((Float) DATASOURCE_ROWS.get(rowNum)[1]), resultRow.get(0)); + cursor.advance(); + ++rowNum; + } + } + + @Test + public void testInitColumnValues() + { + Cursor cursor = createCursor(); + ColumnValueSelector columnValueSelector = cursor.getColumnSelectorFactory().makeColumnValueSelector(FLOAT_COLUMN); + Object[] valuess = new Object[1]; + + int rowNum = 0; + while (!cursor.isDone()) { + int sizeIncrease = STRATEGY.initColumnValues(columnValueSelector, 0, valuess); + Assert.assertEquals(0, sizeIncrease); + Assert.assertEquals(NullHandling.nullToEmptyIfNeeded((Float) DATASOURCE_ROWS.get(rowNum)[1]), valuess[0]); + cursor.advance(); + ++rowNum; + } + } + + @Test + public void testBufferComparator() + { + // lhs < rhs + writeGroupingKeyToBuffer(BUFFER1, 100.0F); + writeGroupingKeyToBuffer(BUFFER2, 200.0F); + Assert.assertEquals(-1, STRATEGY.bufferComparator(0, null).compare(BUFFER1, BUFFER2, 0, 0)); + + // lhs == rhs + writeGroupingKeyToBuffer(BUFFER1, 100.0F); + writeGroupingKeyToBuffer(BUFFER2, 100.0F); + Assert.assertEquals(0, STRATEGY.bufferComparator(0, null).compare(BUFFER1, BUFFER2, 0, 0)); + + // lhs > rhs + writeGroupingKeyToBuffer(BUFFER1, 200.0F); + writeGroupingKeyToBuffer(BUFFER2, 100.0F); + Assert.assertEquals(1, STRATEGY.bufferComparator(0, null).compare(BUFFER1, BUFFER2, 0, 0)); + + // lhs is null + writeGroupingKeyToBuffer(BUFFER1, null); + writeGroupingKeyToBuffer(BUFFER2, 0.0F); + Assert.assertEquals(-1, STRATEGY.bufferComparator(0, null).compare(BUFFER1, BUFFER2, 0, 0)); + + // rhs is null + writeGroupingKeyToBuffer(BUFFER1, 0.0F); + writeGroupingKeyToBuffer(BUFFER2, null); + Assert.assertEquals(1, STRATEGY.bufferComparator(0, null).compare(BUFFER1, BUFFER2, 0, 0)); + + // lhs and rhs are null + writeGroupingKeyToBuffer(BUFFER1, null); + writeGroupingKeyToBuffer(BUFFER2, null); + Assert.assertEquals(0, STRATEGY.bufferComparator(0, null).compare(BUFFER1, BUFFER2, 0, 0)); + + // stringComparator is provided, for lexicographic comparator "2.0" > "100.0" + writeGroupingKeyToBuffer(BUFFER1, 2.0F); + writeGroupingKeyToBuffer(BUFFER2, 100.0F); + Assert.assertEquals( + 1, + STRATEGY.bufferComparator(0, StringComparators.LEXICOGRAPHIC) + .compare(BUFFER1, BUFFER2, 0, 0) + ); + + // stringComparator is provided, for alphanumeric comparator number("2") < number("100") + writeGroupingKeyToBuffer(BUFFER1, 2.0F); + writeGroupingKeyToBuffer(BUFFER2, 100.0F); + Assert.assertEquals( + -1, + STRATEGY.bufferComparator(0, StringComparators.ALPHANUMERIC) + .compare(BUFFER1, BUFFER2, 0, 0) + ); + } + + private static void writeGroupingKeyToBuffer(final ByteBuffer buffer, @Nullable Float key) + { + ColumnValueSelector columnValueSelector1 = Mockito.mock(ColumnValueSelector.class); + + Mockito.when(columnValueSelector1.getObject()).thenReturn(key); + Mockito.when(columnValueSelector1.getFloat()).thenReturn(key == null ? 0.0f : key); + Mockito.when(columnValueSelector1.isNull()).thenReturn(key == null); + + Assert.assertEquals(0, STRATEGY.writeToKeyBuffer(0, columnValueSelector1, buffer)); + } + + @Test + public void testMultiValueHandling() + { + // Returns false, because fixed width strategy doesn't handle multi-value dimensions, therefore it returns false + Assert.assertFalse(STRATEGY.checkRowIndexAndAddValueToGroupingKey(0, 1.0F, 0, BUFFER1)); + Assert.assertFalse(STRATEGY.checkRowIndexAndAddValueToGroupingKey(0, 1.0F, 10, BUFFER1)); + } + + @Test + public void testInitGroupingKeyColumnValue() + { + GroupByColumnSelectorPlus groupByColumnSelectorPlus = Mockito.mock(GroupByColumnSelectorPlus.class); + Mockito.when(groupByColumnSelectorPlus.getResultRowPosition()).thenReturn(0); + int[] stack = new int[1]; + ResultRow resultRow = ResultRow.create(1); + + STRATEGY.initGroupingKeyColumnValue(0, 0, 1001.0F, BUFFER1, stack); + Assert.assertEquals(1, stack[0]); + STRATEGY.processValueFromGroupingKey(groupByColumnSelectorPlus, BUFFER1, resultRow, 0); + Assert.assertEquals(1001.0F, resultRow.get(0)); + + + STRATEGY.initGroupingKeyColumnValue(0, 0, null, BUFFER1, stack); + Assert.assertEquals(0, stack[0]); + STRATEGY.processValueFromGroupingKey(groupByColumnSelectorPlus, BUFFER1, resultRow, 0); + Assert.assertEquals(null, resultRow.get(0)); + } + } + + public static class DoubleGroupByColumnSelectorStrategyTest + { + private static final GroupByColumnSelectorStrategy STRATEGY = + STRATEGY_FACTORY.makeColumnSelectorStrategy( + createCursor().getColumnSelectorFactory().getColumnCapabilities(DOUBLE_COLUMN), + createCursor().getColumnSelectorFactory().makeColumnValueSelector(DOUBLE_COLUMN), + "dimension" + ); + + @Test + public void testKeySize() + { + Assert.assertEquals(Byte.BYTES + Double.BYTES, STRATEGY.getGroupingKeySizeBytes()); + } + + @Test + public void testWriteToKeyBuffer() + { + Cursor cursor = createCursor(); + ResultRow resultRow = ResultRow.create(1); + ColumnValueSelector columnValueSelector = cursor.getColumnSelectorFactory() + .makeColumnValueSelector(DOUBLE_COLUMN); + GroupByColumnSelectorPlus groupByColumnSelectorPlus = Mockito.mock(GroupByColumnSelectorPlus.class); + Mockito.when(groupByColumnSelectorPlus.getResultRowPosition()).thenReturn(0); + + int rowNum = 0; + while (!cursor.isDone()) { + int sizeIncrease = STRATEGY.writeToKeyBuffer(0, columnValueSelector, BUFFER1); + STRATEGY.processValueFromGroupingKey(groupByColumnSelectorPlus, BUFFER1, resultRow, 0); + Assert.assertEquals(0, sizeIncrease); + Assert.assertEquals( + NullHandling.nullToEmptyIfNeeded((Double) DATASOURCE_ROWS.get(rowNum)[2]), + resultRow.get(0) + ); + cursor.advance(); + ++rowNum; + } + } + + @Test + public void testInitColumnValues() + { + Cursor cursor = createCursor(); + ColumnValueSelector columnValueSelector = cursor.getColumnSelectorFactory() + .makeColumnValueSelector(DOUBLE_COLUMN); + Object[] valuess = new Object[1]; + + int rowNum = 0; + while (!cursor.isDone()) { + int sizeIncrease = STRATEGY.initColumnValues(columnValueSelector, 0, valuess); + Assert.assertEquals(0, sizeIncrease); + Assert.assertEquals(NullHandling.nullToEmptyIfNeeded((Double) DATASOURCE_ROWS.get(rowNum)[2]), valuess[0]); + cursor.advance(); + ++rowNum; + } + } + + @Test + public void testBufferComparator() + { + // lhs < rhs + writeGroupingKeyToBuffer(BUFFER1, 100.0D); + writeGroupingKeyToBuffer(BUFFER2, 200.0D); + Assert.assertEquals(-1, STRATEGY.bufferComparator(0, null).compare(BUFFER1, BUFFER2, 0, 0)); + + // lhs == rhs + writeGroupingKeyToBuffer(BUFFER1, 100.0D); + writeGroupingKeyToBuffer(BUFFER2, 100.0D); + Assert.assertEquals(0, STRATEGY.bufferComparator(0, null).compare(BUFFER1, BUFFER2, 0, 0)); + + // lhs > rhs + writeGroupingKeyToBuffer(BUFFER1, 200.0D); + writeGroupingKeyToBuffer(BUFFER2, 100.0D); + Assert.assertEquals(1, STRATEGY.bufferComparator(0, null).compare(BUFFER1, BUFFER2, 0, 0)); + + // lhs is null + writeGroupingKeyToBuffer(BUFFER1, null); + writeGroupingKeyToBuffer(BUFFER2, 0.0D); + Assert.assertEquals(-1, STRATEGY.bufferComparator(0, null).compare(BUFFER1, BUFFER2, 0, 0)); + + // rhs is null + writeGroupingKeyToBuffer(BUFFER1, 0.0D); + writeGroupingKeyToBuffer(BUFFER2, null); + Assert.assertEquals(1, STRATEGY.bufferComparator(0, null).compare(BUFFER1, BUFFER2, 0, 0)); + + // lhs and rhs are null + writeGroupingKeyToBuffer(BUFFER1, null); + writeGroupingKeyToBuffer(BUFFER2, null); + Assert.assertEquals(0, STRATEGY.bufferComparator(0, null).compare(BUFFER1, BUFFER2, 0, 0)); + + // stringComparator is provided, for lexicographic comparator "2.0" > "100.0" + writeGroupingKeyToBuffer(BUFFER1, 2.0D); + writeGroupingKeyToBuffer(BUFFER2, 100.0D); + Assert.assertEquals( + 1, + STRATEGY.bufferComparator(0, StringComparators.LEXICOGRAPHIC) + .compare(BUFFER1, BUFFER2, 0, 0) + ); + + // stringComparator is provided, for alphanumeric comparator number("2.0D") < number("100.0D") + writeGroupingKeyToBuffer(BUFFER1, 2.0D); + writeGroupingKeyToBuffer(BUFFER2, 100.0D); + Assert.assertEquals( + -1, + STRATEGY.bufferComparator(0, StringComparators.ALPHANUMERIC) + .compare(BUFFER1, BUFFER2, 0, 0) + ); + } + + private static void writeGroupingKeyToBuffer(final ByteBuffer buffer, @Nullable Double key) + { + ColumnValueSelector columnValueSelector1 = Mockito.mock(ColumnValueSelector.class); + + Mockito.when(columnValueSelector1.getObject()).thenReturn(key); + Mockito.when(columnValueSelector1.getDouble()).thenReturn(key == null ? 0.0d : key); + Mockito.when(columnValueSelector1.isNull()).thenReturn(key == null); + + Assert.assertEquals(0, STRATEGY.writeToKeyBuffer(0, columnValueSelector1, buffer)); + } + + @Test + public void testMultiValueHandling() + { + // Returns false, because fixed width strategy doesn't handle multi-value dimensions, therefore it returns false + Assert.assertFalse(STRATEGY.checkRowIndexAndAddValueToGroupingKey(0, 1.0D, 0, BUFFER1)); + Assert.assertFalse(STRATEGY.checkRowIndexAndAddValueToGroupingKey(0, 1.0D, 10, BUFFER1)); + } + + @Test + public void testInitGroupingKeyColumnValue() + { + GroupByColumnSelectorPlus groupByColumnSelectorPlus = Mockito.mock(GroupByColumnSelectorPlus.class); + Mockito.when(groupByColumnSelectorPlus.getResultRowPosition()).thenReturn(0); + int[] stack = new int[1]; + ResultRow resultRow = ResultRow.create(1); + + STRATEGY.initGroupingKeyColumnValue(0, 0, 1001.0D, BUFFER1, stack); + Assert.assertEquals(1, stack[0]); + STRATEGY.processValueFromGroupingKey(groupByColumnSelectorPlus, BUFFER1, resultRow, 0); + Assert.assertEquals(1001.0D, resultRow.get(0)); + + + STRATEGY.initGroupingKeyColumnValue(0, 0, null, BUFFER1, stack); + Assert.assertEquals(0, stack[0]); + STRATEGY.processValueFromGroupingKey(groupByColumnSelectorPlus, BUFFER1, resultRow, 0); + Assert.assertEquals(null, resultRow.get(0)); + } + } + + private static Cursor createCursor() + { + return IterableRowsCursorHelper.getCursorFromIterable( + DATASOURCE_ROWS, + RowSignature.builder() + .add(LONG_COLUMN, ColumnType.LONG) + .add(FLOAT_COLUMN, ColumnType.FLOAT) + .add(DOUBLE_COLUMN, ColumnType.DOUBLE) + .build() + ).lhs; + } +} diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/column/NestedColumnGroupByColumnSelectorStrategyTest.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/column/NestedColumnGroupByColumnSelectorStrategyTest.java new file mode 100644 index 000000000000..a35432cc80e3 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/column/NestedColumnGroupByColumnSelectorStrategyTest.java @@ -0,0 +1,183 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.column; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.druid.guice.NestedDataModule; +import org.apache.druid.query.IterableRowsCursorHelper; +import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.query.groupby.epinephelinae.GroupByColumnSelectorStrategyFactory; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.Cursor; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.segment.nested.StructuredData; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.junit.Assert; +import org.junit.Test; +import org.mockito.Mockito; + +import java.nio.ByteBuffer; +import java.util.List; + +/** + * Serves as tests for class {@link DictionaryBuildingGroupByColumnSelectorStrategy} when a complex type is specified + */ +public class NestedColumnGroupByColumnSelectorStrategyTest extends InitializedNullHandlingTest +{ + static { + NestedDataModule.registerHandlersAndSerde(); + } + + private static final GroupByColumnSelectorStrategyFactory STRATEGY_FACTORY = new GroupByColumnSelectorStrategyFactory(); + + // No datasource would exist like this, however the inline datasource is an easy way to create the required column value selectors + private static final List DATASOURCE_ROWS = ImmutableList.of( + new Object[]{StructuredData.wrap(ImmutableList.of("x", "y", "z"))}, + new Object[]{StructuredData.wrap(ImmutableMap.of("x", 1.1, "y", 2L))}, + new Object[]{null}, + new Object[]{StructuredData.wrap("hello")} + ); + + // Dictionary ids alloted to each object, in the column-0 of the DATASOURCE_ROWS, when building from scratch. + // null's dictionary id would be -1 + private static final int[] DICT_IDS = new int[]{0, 1, -1, 2}; + + private static final String NESTED_COLUMN = "nested"; + /** + * Row with null value in the column + */ + private static final int NULL_ROW_NUMBER = 2; + private static final ByteBuffer BUFFER1 = ByteBuffer.allocate(10); + private static final ByteBuffer BUFFER2 = ByteBuffer.allocate(10); + + @Test + public void testKeySize() + { + Assert.assertEquals(Integer.BYTES, createStrategy().getGroupingKeySizeBytes()); + } + + @Test + public void testInitColumnValues() + { + GroupByColumnSelectorStrategy strategy = createStrategy(); + Cursor cursor = createCursor(); + ColumnValueSelector columnValueSelector = cursor.getColumnSelectorFactory().makeColumnValueSelector(NESTED_COLUMN); + GroupByColumnSelectorPlus groupByColumnSelectorPlus = Mockito.mock(GroupByColumnSelectorPlus.class); + Mockito.when(groupByColumnSelectorPlus.getResultRowPosition()).thenReturn(0); + Object[] valuess = new Object[1]; + int rowNum = 0; + while (!cursor.isDone()) { + int sz = strategy.initColumnValues(columnValueSelector, 0, valuess); + // While adding the values for the first time, the initialisation should have a non-zero footprint, apart from the + // row with the null value + if (DATASOURCE_ROWS.get(rowNum)[0] == null) { + Assert.assertEquals(0, sz); + } else { + Assert.assertTrue(sz > 0); + } + Assert.assertEquals(DICT_IDS[rowNum], valuess[0]); + + cursor.advance(); + ++rowNum; + } + + cursor = createCursor(); + columnValueSelector = cursor.getColumnSelectorFactory().makeColumnValueSelector(NESTED_COLUMN); + rowNum = 0; + while (!cursor.isDone()) { + int sz = strategy.initColumnValues(columnValueSelector, 0, valuess); + // While adding the values for the first time, the initialisation should have a non-zero footprint + Assert.assertEquals(0, sz); + Assert.assertEquals(DICT_IDS[rowNum], valuess[0]); + + cursor.advance(); + ++rowNum; + } + } + + @Test + public void testWriteToKeyBuffer() + { + GroupByColumnSelectorStrategy strategy = createStrategy(); + ResultRow resultRow = ResultRow.create(1); + GroupByColumnSelectorPlus groupByColumnSelectorPlus = Mockito.mock(GroupByColumnSelectorPlus.class); + Mockito.when(groupByColumnSelectorPlus.getResultRowPosition()).thenReturn(0); + Cursor cursor = createCursor(); + ColumnValueSelector columnValueSelector = cursor.getColumnSelectorFactory().makeColumnValueSelector(NESTED_COLUMN); + + int rowNum = 0; + while (!cursor.isDone()) { + int sz = strategy.writeToKeyBuffer(0, columnValueSelector, BUFFER1); + if (DATASOURCE_ROWS.get(rowNum)[0] == null) { + Assert.assertEquals(0, sz); + } else { + Assert.assertTrue(sz > 0); + } + // null is represented by GROUP_BY_MISSING_VALUE on the buffer, even though it gets its own dictionaryId in the dictionary + Assert.assertEquals(DICT_IDS[rowNum], BUFFER1.getInt(0)); + // Readback the value + strategy.processValueFromGroupingKey(groupByColumnSelectorPlus, BUFFER1, resultRow, 0); + Assert.assertEquals(DATASOURCE_ROWS.get(rowNum)[0], resultRow.get(0)); + + cursor.advance(); + ++rowNum; + } + } + + @Test + public void testInitGroupingKeyColumnValue() + { + GroupByColumnSelectorStrategy strategy = createStrategy(); + GroupByColumnSelectorPlus groupByColumnSelectorPlus = Mockito.mock(GroupByColumnSelectorPlus.class); + Mockito.when(groupByColumnSelectorPlus.getResultRowPosition()).thenReturn(0); + int[] stack = new int[1]; + ResultRow resultRow = ResultRow.create(1); + + // Test nulls + strategy.initGroupingKeyColumnValue(0, 0, -1, BUFFER1, stack); + Assert.assertEquals(0, stack[0]); + strategy.processValueFromGroupingKey(groupByColumnSelectorPlus, BUFFER1, resultRow, 0); + Assert.assertNull(resultRow.get(0)); + } + + // test reset works fine + + private static GroupByColumnSelectorStrategy createStrategy() + { + return STRATEGY_FACTORY.makeColumnSelectorStrategy( + createCursor().getColumnSelectorFactory().getColumnCapabilities(NESTED_COLUMN), + createCursor().getColumnSelectorFactory().makeColumnValueSelector(NESTED_COLUMN), + "dimension" + ); + } + + + private static Cursor createCursor() + { + return IterableRowsCursorHelper.getCursorFromIterable( + DATASOURCE_ROWS, + RowSignature.builder() + .add(NESTED_COLUMN, ColumnType.NESTED_DATA) + .build() + ).lhs; + } +} diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/column/StringGroupByColumnSelectorStrategyTest.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/column/StringGroupByColumnSelectorStrategyTest.java deleted file mode 100644 index 86524bb91cb4..000000000000 --- a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/column/StringGroupByColumnSelectorStrategyTest.java +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.query.groupby.epinephelinae.column; - -import it.unimi.dsi.fastutil.ints.Int2ObjectArrayMap; -import it.unimi.dsi.fastutil.ints.Int2ObjectMap; -import org.apache.druid.query.groupby.epinephelinae.Grouper; -import org.apache.druid.query.ordering.StringComparators; -import org.apache.druid.segment.column.ColumnCapabilities; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.Mock; -import org.mockito.Mockito; -import org.mockito.junit.MockitoJUnitRunner; - -import java.nio.ByteBuffer; -import java.util.function.IntFunction; - -@RunWith(MockitoJUnitRunner.class) -public class StringGroupByColumnSelectorStrategyTest -{ - // The dictionary has been constructed such that the values are not sorted lexicographically - // so we can tell when the comparator uses a lexicographic comparison and when it uses the indexes. - private static final Int2ObjectMap DICTIONARY = new Int2ObjectArrayMap<>( - new int[] {0, 1, 2}, - new String[] {"A", "F1", "D"} - ); - - private final ByteBuffer lhsBuffer = ByteBuffer.allocate(4); - private final ByteBuffer rhsBuffer = ByteBuffer.allocate(4); - - @Mock - private ColumnCapabilities capabilities; - private final IntFunction dictionaryLookup = DICTIONARY::get; - - private StringGroupByColumnSelectorStrategy target; - - @Before - public void setUp() - { - lhsBuffer.putInt(1); - rhsBuffer.putInt(2); - Mockito.doReturn(true).when(capabilities).hasBitmapIndexes(); - Mockito.doReturn(ColumnCapabilities.Capable.TRUE).when(capabilities).areDictionaryValuesSorted(); - Mockito.doReturn(ColumnCapabilities.Capable.TRUE).when(capabilities).areDictionaryValuesUnique(); - target = new StringGroupByColumnSelectorStrategy(dictionaryLookup, capabilities); - } - - @Test - public void testBufferComparatorCannotCompareIntsAndNullStringComparatorShouldUseLexicographicComparator() - { - Mockito.when(capabilities.areDictionaryValuesSorted()).thenReturn(ColumnCapabilities.Capable.FALSE); - // The comparator is not using the short circuit so it isn't comparing indexes. - Grouper.BufferComparator comparator = target.bufferComparator(0, null); - Assert.assertTrue(comparator.compare(lhsBuffer, rhsBuffer, 0, 0) > 0); - Assert.assertTrue(comparator.compare(rhsBuffer, lhsBuffer, 0, 0) < 0); - } - - @Test - public void testBufferComparatorCanCompareIntsAndNullStringComparatorShouldUseLexicographicComparator() - { - Grouper.BufferComparator comparator = target.bufferComparator(0, null); - Assert.assertTrue(comparator.compare(lhsBuffer, rhsBuffer, 0, 0) < 0); - Assert.assertTrue(comparator.compare(rhsBuffer, lhsBuffer, 0, 0) > 0); - } - - @Test - public void testBufferComparatorCanCompareIntsAndLexicographicStringComparatorShouldUseLexicographicComparator() - { - Grouper.BufferComparator comparator = target.bufferComparator(0, StringComparators.LEXICOGRAPHIC); - Assert.assertTrue(comparator.compare(lhsBuffer, rhsBuffer, 0, 0) < 0); - Assert.assertTrue(comparator.compare(rhsBuffer, lhsBuffer, 0, 0) > 0); - } - - @Test - public void testBufferComparatorCanCompareIntsAndStrLenStringComparatorShouldUseLexicographicComparator() - { - Grouper.BufferComparator comparator = target.bufferComparator(0, StringComparators.STRLEN); - Assert.assertTrue(comparator.compare(lhsBuffer, rhsBuffer, 0, 0) > 0); - Assert.assertTrue(comparator.compare(rhsBuffer, lhsBuffer, 0, 0) < 0); - } - - @After - public void tearDown() - { - lhsBuffer.clear(); - rhsBuffer.clear(); - } -} diff --git a/processing/src/test/java/org/apache/druid/segment/column/TypeStrategiesTest.java b/processing/src/test/java/org/apache/druid/segment/column/TypeStrategiesTest.java index 19b49212ecf0..670da7a5cf66 100644 --- a/processing/src/test/java/org/apache/druid/segment/column/TypeStrategiesTest.java +++ b/processing/src/test/java/org/apache/druid/segment/column/TypeStrategiesTest.java @@ -110,6 +110,12 @@ public int compare(Object o1, Object o2) { return 0; } + + @Override + public boolean groupable() + { + return false; + } }); } @@ -693,6 +699,12 @@ public NullableLongPair fromBytes(byte[] value) { return read(ByteBuffer.wrap(value)); } + + @Override + public boolean groupable() + { + return false; + } } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/data/ComparableIntArrayTest.java b/processing/src/test/java/org/apache/druid/segment/data/ComparableIntArrayTest.java deleted file mode 100644 index cfc4e34440f2..000000000000 --- a/processing/src/test/java/org/apache/druid/segment/data/ComparableIntArrayTest.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.data; - -import org.junit.Assert; -import org.junit.Test; - -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; - -public class ComparableIntArrayTest -{ - private final int[] array = new int[]{1, 2, 3}; - private final ComparableIntArray comparableIntArray = ComparableIntArray.of(1, 2, 3); - - @Test - public void testDelegate() - { - Assert.assertArrayEquals(array, comparableIntArray.getDelegate()); - Assert.assertEquals(0, ComparableIntArray.of(new int[0]).getDelegate().length); - Assert.assertEquals(0, ComparableIntArray.of().getDelegate().length); - } - - @Test - public void testHashCode() - { - Assert.assertEquals(Arrays.hashCode(array), comparableIntArray.hashCode()); - Set set = new HashSet<>(); - set.add(comparableIntArray); - set.add(ComparableIntArray.of(array)); - Assert.assertEquals(1, set.size()); - } - - @Test - public void testEquals() - { - Assert.assertTrue(comparableIntArray.equals(ComparableIntArray.of(array))); - Assert.assertFalse(comparableIntArray.equals(ComparableIntArray.of(1, 2, 5))); - Assert.assertFalse(comparableIntArray.equals(ComparableIntArray.EMPTY_ARRAY)); - Assert.assertFalse(comparableIntArray.equals(null)); - } - - @Test - public void testCompareTo() - { - Assert.assertEquals(0, comparableIntArray.compareTo(ComparableIntArray.of(array))); - Assert.assertEquals(1, comparableIntArray.compareTo(null)); - Assert.assertEquals(1, comparableIntArray.compareTo(ComparableIntArray.of(1, 2))); - Assert.assertEquals(-1, comparableIntArray.compareTo(ComparableIntArray.of(1, 2, 3, 4))); - Assert.assertTrue(comparableIntArray.compareTo(ComparableIntArray.of(2)) < 0); - } -} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java index 1bd3f8021cb6..95a337921896 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java @@ -90,8 +90,6 @@ import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; -import org.apache.druid.segment.column.Types; -import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.join.JoinableFactoryWrapper; import org.apache.druid.sql.calcite.aggregation.Aggregation; import org.apache.druid.sql.calcite.aggregation.DimensionExpression; @@ -486,15 +484,16 @@ private static List computeDimensions( final RelDataType dataType = rexNode.getType(); final ColumnType outputType = Calcites.getColumnTypeForRelDataType(dataType); - if (Types.isNullOr(outputType, ValueType.COMPLEX)) { - // Can't group on unknown or COMPLEX types. - plannerContext.setPlanningError( - "SQL requires a group-by on a column of type %s that is unsupported.", - outputType - ); + if (outputType == null) { + // Can't group on unknown types. + plannerContext.setPlanningError("SQL requires a group-by on a column with unknown type that is unsupported."); + throw new CannotBuildQueryException(aggregate, rexNode); + } + if (!outputType.getNullableStrategy().groupable()) { + // Can't group on 'ungroupable' types. + plannerContext.setPlanningError("SQL requires a group-by on a column with type [%s] that is unsupported.", outputType); throw new CannotBuildQueryException(aggregate, rexNode); } - final String dimOutputName = outputNamePrefix + outputNameCounter++; if (!druidExpression.isSimpleExtraction()) { final String virtualColumn = virtualColumnRegistry.getOrCreateVirtualColumnForExpression( @@ -1250,8 +1249,9 @@ private TopNQuery toTopNQuery() } final DimensionSpec dimensionSpec = Iterables.getOnlyElement(grouping.getDimensions()).toDimensionSpec(); - // grouping col cannot be type array - if (dimensionSpec.getOutputType().isArray()) { + // TopN queries can't handle arrays or complex dimensions. Return's null so that they get planned as a group by query + // which does support complex and array dimensions + if (!dimensionSpec.getOutputType().isPrimitive()) { return null; } final OrderByColumnSpec limitColumn; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteGroupByQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteGroupByQueryTest.java new file mode 100644 index 000000000000..4595de28fcac --- /dev/null +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteGroupByQueryTest.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite; + +public class CalciteGroupByQueryTest +{ +} diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index 07e68984b6e4..dceddbaff436 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -54,6 +54,8 @@ import org.apache.druid.query.filter.ExpressionDimFilter; import org.apache.druid.query.filter.LikeDimFilter; import org.apache.druid.query.groupby.GroupByQuery; +import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; +import org.apache.druid.query.groupby.orderby.OrderByColumnSpec; import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.query.scan.ScanQuery; import org.apache.druid.query.topn.DimensionTopNMetricSpec; @@ -538,6 +540,212 @@ public void testTopNPath() ); } + @Test + public void testGroupByOnNestedColumn() + { + cannotVectorize(); + testQuery( + "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + new ExpressionVirtualColumn("v0", "strlen(\"string\")", ColumnType.LONG, queryFramework().macroTable()) + ) + .setDimensions(dimensions(new DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA))) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "v0"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{null, 9L}, + new Object[]{"\"hello\"", 3L}, + new Object[]{"2", 3L}, + new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}", 3L}, + new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 3L} + ) + ); + } + + @Test + public void testGroupByOnNestedColumnWithOrderBy() + { + cannotVectorize(); + testQuery( + "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + new ExpressionVirtualColumn("v0", "strlen(\"string\")", ColumnType.LONG, queryFramework().macroTable()) + ) + .setDimensions(dimensions(new DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA))) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "v0"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{null, 9L}, + new Object[]{"\"hello\"", 3L}, + new Object[]{"2", 3L}, + new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}", 3L}, + new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 3L} + ) + ); + } + + @Test + public void testGroupByOnNestedColumnWithOrderByAndLimit() + { + cannotVectorize(); + testQuery( + "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1 ORDER BY 1 LIMIT 100", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + new ExpressionVirtualColumn( + "v0", + "strlen(\"string\")", + ColumnType.LONG, + queryFramework().macroTable() + ) + ) + .setDimensions(dimensions(new DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA))) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "v0"))) + .setLimitSpec(new DefaultLimitSpec( + ImmutableList.of(new OrderByColumnSpec( + "d0", + OrderByColumnSpec.Direction.ASCENDING, + StringComparators.NATURAL + )), + 100 + )) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{null, 9L}, + new Object[]{"\"hello\"", 3L}, + new Object[]{"2", 3L}, + new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}", 3L}, + new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 3L} + ) + ); + } + + @Test + public void testGroupByOnNestedColumnWithOrderByAndLimit2() + { + cannotVectorize(); + testQuery( + "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1 ORDER BY 1 LIMIT 2", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + new ExpressionVirtualColumn( + "v0", + "strlen(\"string\")", + ColumnType.LONG, + queryFramework().macroTable() + ) + ) + .setDimensions(dimensions(new DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA))) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "v0"))) + .setLimitSpec(new DefaultLimitSpec( + ImmutableList.of(new OrderByColumnSpec( + "d0", + OrderByColumnSpec.Direction.ASCENDING, + StringComparators.NATURAL + )), + 2 + )) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{null, 9L}, + new Object[]{"\"hello\"", 3L} + ) + ); + } + + @Test + public void testGroupByOnNestedColumnWithLimit() + { + cannotVectorize(); + testQuery( + "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1 LIMIT 100", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + new ExpressionVirtualColumn( + "v0", + "strlen(\"string\")", + ColumnType.LONG, + queryFramework().macroTable() + ) + ) + .setDimensions(dimensions(new DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA))) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "v0"))) + .setLimitSpec(new DefaultLimitSpec(null, 100)) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{null, 9L}, + new Object[]{"\"hello\"", 3L}, + new Object[]{"2", 3L}, + new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}", 3L}, + new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 3L} + ) + ); + } + + @Test + public void testGroupByOnNestedColumnWithLimit2() + { + cannotVectorize(); + testQuery( + "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1 LIMIT 2", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + new ExpressionVirtualColumn( + "v0", + "strlen(\"string\")", + ColumnType.LONG, + queryFramework().macroTable() + ) + ) + .setDimensions(dimensions(new DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA))) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "v0"))) + .setLimitSpec(new DefaultLimitSpec(null, 2)) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{null, 9L}, + new Object[]{"\"hello\"", 3L} + ) + ); + } + @Test public void testGroupByRootPath() { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index d851ee027a9b..661785cb9db5 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -5847,8 +5847,8 @@ public void testUnplannableExactCountDistinctOnSketch() // COUNT DISTINCT on a sketch cannot be exact. assertQueryIsUnplannable( PLANNER_CONFIG_NO_HLL, - "SELECT COUNT(distinct unique_dim1) FROM druid.foo", - "SQL requires a group-by on a column of type COMPLEX that is unsupported." + "SELECT unique_dim1, COUNT(*) FROM druid.foo GROUP BY 1", + "SQL requires a group-by on a column with type [COMPLEX] that is unsupported." ); }